diff -pruN 2022.01.0+ds.1-1/continuous_integration/environment-3.9.yaml 2022.02.0+ds.1-1/continuous_integration/environment-3.9.yaml
--- 2022.01.0+ds.1-1/continuous_integration/environment-3.9.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/environment-3.9.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -23,6 +23,7 @@ dependencies:
   - lz4  # Only tested here
   - msgpack-python
   - netcdf4
+  - pillow<9.0  # https://github.com/pytorch/pytorch/issues/72293
   - paramiko
   - pre-commit
   - prometheus_client
diff -pruN 2022.01.0+ds.1-1/continuous_integration/gpuci/axis.yaml 2022.02.0+ds.1-1/continuous_integration/gpuci/axis.yaml
--- 2022.01.0+ds.1-1/continuous_integration/gpuci/axis.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/gpuci/axis.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -1,5 +1,5 @@
 PYTHON_VER:
-- "3.8"
+- "3.9"
 
 CUDA_VER:
 - "11.5"
@@ -8,6 +8,6 @@ LINUX_VER:
 - ubuntu18.04
 
 RAPIDS_VER:
-- "22.02"
+- "22.04"
 
 excludes:
diff -pruN 2022.01.0+ds.1-1/continuous_integration/recipes/dask/meta.yaml 2022.02.0+ds.1-1/continuous_integration/recipes/dask/meta.yaml
--- 2022.01.0+ds.1-1/continuous_integration/recipes/dask/meta.yaml	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/recipes/dask/meta.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,56 @@
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev') + environ.get('VERSION_SUFFIX', '') %}
+{% set dask_version = environ.get('DASK_VERSION', '0.0.0.dev') %}
+{% set dask_build = environ.get('DASK_BUILD', '') %}
+{% set py_version=environ.get('CONDA_PY', 36) %}
+
+
+package:
+  name: dask
+  version: {{ version }}
+
+source:
+  git_url: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  noarch: python
+
+requirements:
+  host:
+    - python >=3.7
+  run:
+    - python >=3.7
+    - dask-core {{ dask_version }}={{ dask_build }}
+    - distributed {{ version }}=py{{ py_version }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+    - cytoolz >=0.8.2
+    - numpy >=1.18
+    - pandas >=1.0
+    - bokeh >=2.1.1
+    - jinja2
+
+  run_constrained:
+    - openssl !=1.1.1e
+
+test:
+  imports:
+    - dask
+    - dask.array
+    - dask.bag
+    - dask.bytes
+    - dask.dataframe
+    - dask.dataframe.tseries
+    - dask.delayed
+    - dask.diagnostics
+    - dask.distributed
+    - distributed
+
+about:
+  home: https://dask.org/
+  summary: Parallel PyData with Task Scheduling
+  license: BSD-3-Clause
+  license_file: LICENSE.txt
+  description: |
+    Dask is a flexible parallel computing library for analytics.
+  doc_url: https://dask.org/
+  dev_url: https://github.com/dask/dask
diff -pruN 2022.01.0+ds.1-1/continuous_integration/recipes/distributed/conda_build_config.yaml 2022.02.0+ds.1-1/continuous_integration/recipes/distributed/conda_build_config.yaml
--- 2022.01.0+ds.1-1/continuous_integration/recipes/distributed/conda_build_config.yaml	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/recipes/distributed/conda_build_config.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,4 @@
+python:
+    - 3.7
+    - 3.8
+    - 3.9
diff -pruN 2022.01.0+ds.1-1/continuous_integration/recipes/distributed/meta.yaml 2022.02.0+ds.1-1/continuous_integration/recipes/distributed/meta.yaml
--- 2022.01.0+ds.1-1/continuous_integration/recipes/distributed/meta.yaml	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/recipes/distributed/meta.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,79 @@
+{% set version = environ.get('GIT_DESCRIBE_TAG', '0.0.0.dev') + environ.get('VERSION_SUFFIX', '') %}
+{% set dask_version = environ.get('DASK_VERSION', '0.0.0.dev') %}
+{% set dask_build = environ.get('DASK_BUILD', '') %}
+
+
+package:
+  name: distributed
+  version: {{ version }}
+
+source:
+  git_url: ../../..
+
+build:
+  number: {{ GIT_DESCRIBE_NUMBER }}
+  string: py{{ python | replace(".", "") }}_{{ GIT_DESCRIBE_HASH }}_{{ GIT_DESCRIBE_NUMBER }}
+  script: {{ PYTHON }} -m pip install . -vv
+  entry_points:
+    - dask-scheduler = distributed.cli.dask_scheduler:go
+    - dask-ssh = distributed.cli.dask_ssh:go
+    - dask-worker = distributed.cli.dask_worker:go
+
+requirements:
+  build:
+    - python                                 # [build_platform != target_platform]
+    - cross-python_{{ target_platform }}     # [build_platform != target_platform]
+  host:
+    - python
+    - pip
+
+  run:
+    - python
+    - click >=6.6
+    - cloudpickle >=1.5.0
+    - cytoolz >=0.8.2
+    - dask-core {{ dask_version }}={{ dask_build }}
+    - jinja2
+    - msgpack-python >=0.6.0
+    - psutil >=5.0
+    - pyyaml
+    - sortedcontainers !=2.0.0,!=2.0.1
+    - tblib >=1.6.0
+    - toolz >=0.8.2
+    - tornado >=5  # [py<38]
+    - tornado >=6.0.3  # [py>=38]
+    - zict >=0.1.3
+    - setuptools
+
+  run_constrained:
+    - openssl !=1.1.1e
+
+test:
+  imports:
+    - distributed
+    - distributed.cli
+    - distributed.comm
+    - distributed.deploy
+    - distributed.diagnostics
+    - distributed.protocol
+  commands:
+    - pip check
+    - dask-scheduler --help
+    - dask-ssh --help
+    - dask-worker --help
+  requires:
+    - pip
+
+about:
+  home: https://distributed.dask.org
+  summary: Distributed scheduler for Dask
+  license: BSD-3-Clause
+  license_family: BSD
+  license_file: LICENSE.txt
+  description: |
+    Distributed is a lightweight library for distributed computing in Python.
+    It extends both the concurrent.futures and dask APIs to moderate sized
+    clusters.
+  doc_url: https://distributed.dask.org
+  doc_source_url: https://github.com/dask/distributed/blob/main/docs/source/index.rst
+  dev_url: https://github.com/dask/distributed
diff -pruN 2022.01.0+ds.1-1/continuous_integration/scripts/test_report.py 2022.02.0+ds.1-1/continuous_integration/scripts/test_report.py
--- 2022.01.0+ds.1-1/continuous_integration/scripts/test_report.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/continuous_integration/scripts/test_report.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import html
 import io
 import os
 import re
@@ -12,8 +13,6 @@ import pandas
 import requests
 
 TOKEN = os.environ.get("GITHUB_TOKEN")
-if not TOKEN:
-    raise RuntimeError("Failed to find a GitHub Token")
 
 # Mapping between a symbol (pass, fail, skip) and a color
 COLORS = {
@@ -99,13 +98,13 @@ def download_and_parse_artifact(url):
     """
     Download the artifact at the url parse it.
     """
-    r = get_from_github(url)
-    f = zipfile.ZipFile(io.BytesIO(r.content))
     try:
+        r = get_from_github(url)
+        f = zipfile.ZipFile(io.BytesIO(r.content))
         run = junitparser.JUnitXml.fromstring(f.read(f.filelist[0].filename))
         return run
     except Exception:
-        print(f"Failed to parse {url}")
+        print(f"Failed to download/parse {url}")
         return None
 
 
@@ -139,7 +138,7 @@ def dataframe_from_jxml(run):
             else:
                 s = "x"
             status.append(s)
-            message.append(m)
+            message.append(html.escape(m))
     df = pandas.DataFrame(
         {"file": fname, "test": tname, "status": status, "message": message}
     )
@@ -162,34 +161,44 @@ def dataframe_from_jxml(run):
 
 
 if __name__ == "__main__":
+    if not TOKEN:
+        raise RuntimeError("Failed to find a GitHub Token")
     print("Getting all recent workflows...")
     workflows = get_workflow_listing()
 
-    # Filter the workflows listing to be in the last month,
+    # Filter the workflows listing to be in the retention period,
     # and only be test runs (i.e., no linting) that completed.
     workflows = [
         w
         for w in workflows
         if (
             pandas.to_datetime(w["created_at"])
-            > pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=31)
+            > pandas.Timestamp.now(tz="UTC") - pandas.Timedelta(days=90)
             and w["conclusion"] != "cancelled"
             and w["name"].lower() == "tests"
         )
     ]
+    # Each workflow processed takes ~10-15 API requests. To avoid being
+    # rate limited by GitHub (1000 requests per hour) we choose just the
+    # most recent N runs. This also keeps the viz size from blowing up.
+    workflows = sorted(workflows, key=lambda w: w["created_at"])[-50:]
 
     print("Getting the artifact listing for each workflow...")
     for w in workflows:
         artifacts = get_artifacts_for_workflow(w["id"])
         # We also upload timeout reports as artifacts, but we don't want them here.
-        w["artifacts"] = [a for a in artifacts if "timeouts" not in a["name"]]
+        w["artifacts"] = [
+            a
+            for a in artifacts
+            if "timeouts" not in a["name"] and "cluster_dumps" not in a["name"]
+        ]
 
     print("Downloading and parsing artifacts...")
     for w in workflows:
         w["dfs"] = []
         for a in w["artifacts"]:
             xml = download_and_parse_artifact(a["archive_download_url"])
-            df = dataframe_from_jxml(xml)
+            df = dataframe_from_jxml(xml) if xml else None
             # Note: we assign a column with the workflow timestamp rather than the
             # artifact timestamp so that artifacts triggered under the same workflow
             # can be aligned according to the same trigger time.
@@ -198,28 +207,32 @@ if __name__ == "__main__":
                     name=a["name"],
                     suite=suite_from_name(a["name"]),
                     date=w["created_at"],
+                    url=w["html_url"],
                 )
                 w["dfs"].append(df)
 
-    # Compute the set of test suites which form the top-level grouping for the chart
-    # (e.g., ubuntu-latest-3.9, windows-latest-3.7)
-    suites = set()
+    # Make a top-level dict of dataframes, mapping test name to a dataframe
+    # of all check suites that ran that test.
+    # Note: we drop **all** tests which did not have at least one failure.
+    # This is because, as nice as a block of green tests can be, there are
+    # far too many tests to visualize at once, so we only want to look at
+    # flaky tests. If the test suite has been doing well, this chart should
+    # dwindle to nothing!
+    dfs = []
     for w in workflows:
-        for a in w["artifacts"]:
-            suites.add(suite_from_name(a["name"]))
-
-    # Make a top-level dict of dataframes, mapping test suite name to a long-form
-    # dataframe of all the tests run in that suite.
-    overall: dict[str, pandas.DataFrame] = {}
-    for s in sorted(suites):
-        overall
-        dfs = []
-        for w in workflows:
-            dfs.extend([df[df.suite == s] for df in w["dfs"]])
-        overall[s] = pandas.concat(dfs, axis=0)
+        dfs.extend([df for df in w["dfs"]])
+    total = pandas.concat(dfs, axis=0)
+    grouped = (
+        total.groupby(total.index)
+        .filter(lambda g: (g.status == "x").any())
+        .reset_index()
+        .assign(test=lambda df: df.file + "." + df.test)
+        .groupby("test")
+    )
+    overall = {name: grouped.get_group(name) for name in grouped.groups}
 
     # Get all of the workflow timestamps that we wound up with, which we can use
-    # below to align the different suites.
+    # below to align the different groups.
     times = set()
     for df in overall.values():
         times.update(df.date.unique())
@@ -228,18 +241,6 @@ if __name__ == "__main__":
     altair.data_transformers.disable_max_rows()
     charts = []
     for name, df in overall.items():
-        # Final reshaping for altair plotting.
-        # Note: we drop **all** tests which did not have at least one failure.
-        # This is because, as nice as a block of green tests can be, there are
-        # far too many tests to visualize at once, so we only want to look at
-        # flaky tests. If the test suite has been doing well, this chart should
-        # dwindle to nothing!
-        df = (
-            df.groupby(df.index)
-            .filter(lambda g: (g.status == "x").any())
-            .reset_index()
-            .assign(test=lambda df: df.file + "." + df.test)
-        )
         # Don't show this suite if it has passed all tests recently.
         if not len(df):
             continue
@@ -247,10 +248,10 @@ if __name__ == "__main__":
         # Create an aggregated form of the suite with overall pass rate
         # over the time in question.
         df_agg = (
-            df[df.status == "✓"]
-            .groupby("test")
+            df[df.status != "x"]
+            .groupby("suite")
             .size()
-            .truediv(df.groupby("test").size(), fill_value=0)
+            .truediv(df.groupby("suite").size(), fill_value=0)
             .to_frame(name="Pass Rate")
             .reset_index()
         )
@@ -261,7 +262,8 @@ if __name__ == "__main__":
             .mark_rect(stroke="gray")
             .encode(
                 x=altair.X("date:O", scale=altair.Scale(domain=sorted(list(times)))),
-                y=altair.Y("test:N", title=None),
+                y=altair.Y("suite:N", title=None),
+                href=altair.Href("url:N"),
                 color=altair.Color(
                     "status:N",
                     scale=altair.Scale(
@@ -269,13 +271,13 @@ if __name__ == "__main__":
                         range=list(COLORS.values()),
                     ),
                 ),
-                tooltip=["test:N", "date:O", "status:N", "message:N"],
+                tooltip=["suite:N", "date:O", "status:N", "message:N", "url:N"],
             )
             .properties(title=name)
             | altair.Chart(df_agg.assign(_="_"))
             .mark_rect(stroke="gray")
             .encode(
-                y=altair.Y("test:N", title=None, axis=altair.Axis(labels=False)),
+                y=altair.Y("suite:N", title=None, axis=altair.Axis(labels=False)),
                 x=altair.X("_:N", title=None),
                 color=altair.Color(
                     "Pass Rate:Q",
@@ -283,7 +285,7 @@ if __name__ == "__main__":
                         range=[COLORS["x"], COLORS["✓"]], domain=[0.0, 1.0]
                     ),
                 ),
-                tooltip=["test:N", "Pass Rate:Q"],
+                tooltip=["suite:N", "Pass Rate:Q"],
             )
         )
 
@@ -291,6 +293,14 @@ if __name__ == "__main__":
     chart = (
         altair.vconcat(*charts)
         .configure_axis(labelLimit=1000)  # test names are long
+        .configure_title(anchor="start")
         .resolve_scale(x="shared")  # enforce aligned x axes
     )
-    altair_saver.save(chart, "test_report.html", embed_options={"renderer": "svg"})
+    altair_saver.save(
+        chart,
+        "test_report.html",
+        embed_options={
+            "renderer": "svg",  # Makes the text searchable
+            "loader": {"target": "_blank"},  # Open hrefs in a new window
+        },
+    )
diff -pruN 2022.01.0+ds.1-1/.coveragerc 2022.02.0+ds.1-1/.coveragerc
--- 2022.01.0+ds.1-1/.coveragerc	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/.coveragerc	2022-02-11 16:21:14.000000000 +0000
@@ -3,17 +3,25 @@ source =
     distributed
 omit =
     distributed/tests/test*
-    distributed/hdfs.py
-    distributed/cluster.py
     distributed/*/tests/test*
-    distributed/compatibility.py
     distributed/cli/utils.py
-    distributed/utils_test.py
     distributed/deploy/ssh.py
     distributed/_ipython_utils.py
+    distributed/_version.py
+    distributed/pytest_resourceleaks.py
+    distributed/comm/ucx.py
 
 [report]
 show_missing = True
+exclude_lines =
+    # re-enable the standard pragma
+    pragma: nocover
+    pragma: no cover
+    # exclude nvml calls
+    [\s(.]nvml[\s(.]
+    [\s(.]pynvml[\s(.]
+    # exclude LOG_PDB
+    LOG_PDB
 
 [html]
 directory = coverage_html_report
diff -pruN 2022.01.0+ds.1-1/debian/changelog 2022.02.0+ds.1-1/debian/changelog
--- 2022.01.0+ds.1-1/debian/changelog	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/changelog	2022-08-04 18:10:15.000000000 +0000
@@ -1,4 +1,4 @@
-dask.distributed (2022.01.0+ds.1-1) unstable; urgency=medium
+dask.distributed (2022.02.0+ds.1-1) unstable; urgency=medium
 
   * New upstream release
   * Refreshed patches
@@ -6,8 +6,14 @@ dask.distributed (2022.01.0+ds.1-1) unst
   * Remove pass-host-to-local-cluster.patch applied upstream
   * Disable pytest-asyncio-0.14.patch, set-client-asyncronous-earlier.patch
     seem to be unneeded.
+  * dask.distributed should be more closely tied to the dask version.
+  * Add test_spillbuffer_remove_sizetest.patch to skip a failing upstream
+    removed test
+  * Skip two tests that were failing and removed upstream.
+  * Skip block of tests that fail in network limited builder
+  * Copy over some updated pytest settings from upstream's CI configuration
 
- -- Diane Trout <diane@ghic.org>  Sat, 19 Feb 2022 16:23:23 -0800
+ -- Diane Trout <diane@ghic.org>  Thu, 04 Aug 2022 11:10:15 -0700
 
 dask.distributed (2021.09.1+ds.1-3) unstable; urgency=medium
 
diff -pruN 2022.01.0+ds.1-1/debian/clean 2022.02.0+ds.1-1/debian/clean
--- 2022.01.0+ds.1-1/debian/clean	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/clean	2022-08-04 17:56:48.000000000 +0000
@@ -0,0 +1,2 @@
+dask-worker-space/
+test_cluster_dump/
diff -pruN 2022.01.0+ds.1-1/debian/control 2022.02.0+ds.1-1/debian/control
--- 2022.01.0+ds.1-1/debian/control	2022-02-19 22:35:04.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/control	2022-08-04 17:56:48.000000000 +0000
@@ -9,7 +9,7 @@ Build-Depends: debhelper-compat (= 13),
                python3-click (>= 6.6),
                python3-cloudpickle (>= 1.5.0),
                python3-cryptography <!nocheck>,
-               python3-dask (>= 2021.09.1),
+               python3-dask (>= 2022.02.0),
                python3-doc <!nodoc>,
                python3-locket,
                python3-msgpack,
diff -pruN 2022.01.0+ds.1-1/debian/patches/fall-back-to-ipv6-localhost.patch 2022.02.0+ds.1-1/debian/patches/fall-back-to-ipv6-localhost.patch
--- 2022.01.0+ds.1-1/debian/patches/fall-back-to-ipv6-localhost.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/fall-back-to-ipv6-localhost.patch	2022-08-04 17:56:48.000000000 +0000
@@ -9,7 +9,7 @@ Forwarded: not yet
 
 --- a/distributed/utils.py
 +++ b/distributed/utils.py
-@@ -138,10 +138,14 @@
+@@ -137,10 +137,14 @@
              "reaching %r, defaulting to hostname: %s" % (host, e),
              RuntimeWarning,
          )
diff -pruN 2022.01.0+ds.1-1/debian/patches/mark-tests-require-installation.patch 2022.02.0+ds.1-1/debian/patches/mark-tests-require-installation.patch
--- 2022.01.0+ds.1-1/debian/patches/mark-tests-require-installation.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/mark-tests-require-installation.patch	2022-08-04 17:56:48.000000000 +0000
@@ -59,15 +59,15 @@ Forwarded: not-needed
          with popen(["dask-worker", "localhost:8786", "--no-dashboard"]) as a:
 --- a/distributed/cli/tests/test_dask_worker.py
 +++ b/distributed/cli/tests/test_dask_worker.py
-@@ -28,6 +28,7 @@
- )
+@@ -46,6 +46,7 @@
+         return await loop.run_in_executor(None, func_call)
  
  
 +@pytest.mark.isinstalled
  def test_nanny_worker_ports(loop):
      with popen(["dask-scheduler", "--port", "9359", "--no-dashboard"]):
          with popen(
-@@ -58,6 +59,7 @@
+@@ -76,6 +77,7 @@
                  )
  
  
@@ -75,7 +75,7 @@ Forwarded: not-needed
  @pytest.mark.slow
  def test_nanny_worker_port_range(loop):
      with popen(["dask-scheduler", "--port", "9359", "--no-dashboard"]) as sched:
-@@ -97,6 +99,7 @@
+@@ -115,6 +117,7 @@
                  assert set(nanny_ports.values()) == expected_nanny_ports
  
  
@@ -83,7 +83,7 @@ Forwarded: not-needed
  def test_nanny_worker_port_range_too_many_workers_raises(loop):
      with popen(["dask-scheduler", "--port", "9359", "--no-dashboard"]):
          with popen(
-@@ -119,6 +122,7 @@
+@@ -137,6 +140,7 @@
              )
  
  
@@ -91,7 +91,7 @@ Forwarded: not-needed
  def test_memory_limit(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
          with popen(
-@@ -139,6 +143,7 @@
+@@ -157,6 +161,7 @@
                  assert d["memory_limit"] == 2e9
  
  
@@ -99,23 +99,31 @@ Forwarded: not-needed
  def test_no_nanny(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
          with popen(
-@@ -147,6 +152,7 @@
+@@ -165,6 +170,7 @@
              assert any(b"Registered" in worker.stderr.readline() for i in range(15))
  
  
 +@pytest.mark.isinstalled
  @pytest.mark.slow
  @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
- def test_no_reconnect(nanny, loop):
-@@ -169,6 +175,7 @@
-             assert time() < start + 30
+ @pytest.mark.asyncio
+@@ -192,6 +198,7 @@
+             assert worker.returncode == 0
+ 
+ 
++@pytest.mark.isinstalled
+ @pytest.mark.slow
+ @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
+ @pytest.mark.asyncio
+@@ -223,6 +230,7 @@
+             assert worker.returncode == 0
  
  
 +@pytest.mark.isinstalled
  def test_resources(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
          with popen(
-@@ -188,6 +195,7 @@
+@@ -242,6 +250,7 @@
                  assert worker["resources"] == {"A": 1, "B": 2, "C": 3}
  
  
@@ -123,7 +131,7 @@ Forwarded: not-needed
  @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
  def test_local_directory(loop, nanny):
      with tmpfile() as fn:
-@@ -212,6 +220,7 @@
+@@ -266,6 +275,7 @@
                      assert worker["local_directory"].startswith(fn)
  
  
@@ -131,7 +139,7 @@ Forwarded: not-needed
  @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
  def test_scheduler_file(loop, nanny):
      with tmpfile() as fn:
-@@ -226,6 +235,7 @@
+@@ -280,6 +290,7 @@
                          assert time() < start + 10
  
  
@@ -139,39 +147,79 @@ Forwarded: not-needed
  def test_scheduler_address_env(loop, monkeypatch):
      monkeypatch.setenv("DASK_SCHEDULER_ADDRESS", "tcp://127.0.0.1:8786")
      with popen(["dask-scheduler", "--no-dashboard"]):
-@@ -237,6 +247,7 @@
+@@ -291,6 +302,7 @@
                      assert time() < start + 10
  
  
 +@pytest.mark.isinstalled
- def test_nprocs_requires_nanny(loop):
+ def test_nworkers_requires_nanny(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
          with popen(
-@@ -248,6 +259,7 @@
+@@ -302,6 +314,7 @@
              )
  
  
 +@pytest.mark.isinstalled
- def test_nprocs_negative(loop):
+ def test_nworkers_negative(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
-         with popen(["dask-worker", "127.0.0.1:8786", "--nprocs=-1"]):
-@@ -255,6 +267,7 @@
+         with popen(["dask-worker", "127.0.0.1:8786", "--nworkers=-1"]):
+@@ -309,6 +322,7 @@
                  c.wait_for_workers(cpu_count(), timeout="10 seconds")
  
  
 +@pytest.mark.isinstalled
- def test_nprocs_auto(loop):
+ def test_nworkers_auto(loop):
      with popen(["dask-scheduler", "--no-dashboard"]):
-         with popen(["dask-worker", "127.0.0.1:8786", "--nprocs=auto"]):
-@@ -263,6 +276,7 @@
+         with popen(["dask-worker", "127.0.0.1:8786", "--nworkers=auto"]):
+@@ -317,6 +331,7 @@
                  c.wait_for_workers(procs, timeout="10 seconds")
  
  
 +@pytest.mark.isinstalled
- def test_nprocs_expands_name(loop):
+ def test_nworkers_expands_name(loop):
+     with popen(["dask-scheduler", "--no-dashboard"]):
+         with popen(["dask-worker", "127.0.0.1:8786", "--nworkers", "2", "--name", "0"]):
+@@ -334,6 +349,7 @@
+                     assert len(set(names)) == 4
+ 
+ 
++@pytest.mark.isinstalled
+ def test_worker_cli_nprocs_renamed_to_nworkers(loop):
+     n_workers = 2
      with popen(["dask-scheduler", "--no-dashboard"]):
-         with popen(["dask-worker", "127.0.0.1:8786", "--nprocs", "2", "--name", "0"]):
-@@ -371,6 +385,7 @@
+@@ -347,6 +363,7 @@
+                 c.wait_for_workers(n_workers, timeout="30 seconds")
+ 
+ 
++@pytest.mark.isinstalled
+ def test_worker_cli_nworkers_with_nprocs_is_an_error():
+     with popen(["dask-scheduler", "--no-dashboard"]):
+         with popen(
+@@ -358,6 +375,7 @@
+             )
+ 
+ 
++@pytest.mark.isinstalled
+ @pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
+ @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
+ @pytest.mark.parametrize(
+@@ -392,6 +410,7 @@
+                 assert client.run(func) == {"tcp://127.0.0.2:39837": listen_address}
+ 
+ 
++@pytest.mark.isinstalled
+ @requires_ipv6
+ @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
+ @pytest.mark.parametrize("listen_address", ["tcp://:39838", "tcp://[::1]:39838"])
+@@ -426,6 +445,7 @@
+                 assert client.run(func) == {expected_name: expected_listen}
+ 
+ 
++@pytest.mark.isinstalled
+ @pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
+ @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
+ @pytest.mark.parametrize("host", ["127.0.0.2", "0.0.0.0"])
+@@ -449,6 +469,7 @@
                  assert all(host in v for v in listen_addresses.values())
  
  
@@ -179,7 +227,15 @@ Forwarded: not-needed
  def test_dashboard_non_standard_ports(loop):
      pytest.importorskip("bokeh")
      try:
-@@ -454,6 +469,7 @@
+@@ -523,6 +544,7 @@
+             pass
+ 
+ 
++@pytest.mark.isinstalled
+ @gen_cluster(nthreads=[])
+ async def test_integer_names(s):
+     with popen(["dask-worker", s.address, "--name", "123"]):
+@@ -532,6 +554,7 @@
          assert ws.name == 123
  
  
@@ -187,6 +243,14 @@ Forwarded: not-needed
  @pytest.mark.asyncio
  @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
  async def test_worker_class(cleanup, tmp_path, nanny):
+@@ -575,6 +598,7 @@
+                 assert all(name == "MyWorker" for name in worker_types.values())
+ 
+ 
++@pytest.mark.isinstalled
+ @gen_cluster(nthreads=[], client=True)
+ async def test_preload_config(c, s):
+     # Ensure dask-worker pulls the preload from the Dask config if
 --- a/distributed/tests/test_config.py
 +++ b/distributed/tests/test_config.py
 @@ -109,6 +109,7 @@
@@ -241,7 +305,7 @@ Forwarded: not-needed
      # worker in a separate Python process than the client
 --- a/distributed/tests/test_variable.py
 +++ b/distributed/tests/test_variable.py
-@@ -38,6 +38,7 @@
+@@ -37,6 +37,7 @@
          assert time() < start + 5
  
  
diff -pruN 2022.01.0+ds.1-1/debian/patches/override-worker-test.patch 2022.02.0+ds.1-1/debian/patches/override-worker-test.patch
--- 2022.01.0+ds.1-1/debian/patches/override-worker-test.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/override-worker-test.patch	2022-08-04 17:56:48.000000000 +0000
@@ -9,7 +9,7 @@ Forwarded: not-needed
 
 --- a/distributed/tests/test_worker.py
 +++ b/distributed/tests/test_worker.py
-@@ -214,7 +214,10 @@
+@@ -222,7 +222,10 @@
  @gen_cluster(client=True)
  async def test_upload_egg(c, s, a, b):
      eggname = "testegg-1.0.0-py3.4.egg"
@@ -21,7 +21,7 @@ Forwarded: not-needed
      assert not os.path.exists(os.path.join(a.local_directory, eggname))
      assert not os.path.exists(os.path.join(b.local_directory, eggname))
      assert a.local_directory != b.local_directory
-@@ -243,6 +246,10 @@
+@@ -251,6 +254,10 @@
  @gen_cluster(client=True)
  async def test_upload_pyz(c, s, a, b):
      pyzname = "mytest.pyz"
diff -pruN 2022.01.0+ds.1-1/debian/patches/python-3.10-loop.patch 2022.02.0+ds.1-1/debian/patches/python-3.10-loop.patch
--- 2022.01.0+ds.1-1/debian/patches/python-3.10-loop.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/python-3.10-loop.patch	2022-08-04 17:56:48.000000000 +0000
@@ -26,7 +26,7 @@ Subject: Python 3.10 hid the asyncio loo
                      x = await run_actor_function_on_worker()
 --- a/distributed/tests/test_client.py
 +++ b/distributed/tests/test_client.py
-@@ -6303,16 +6303,17 @@
+@@ -6464,16 +6464,17 @@
  @gen_cluster(nthreads=[])
  async def test_client_gather_semaphore_loop(s):
      async with Client(s.address, asynchronous=True) as c:
diff -pruN 2022.01.0+ds.1-1/debian/patches/series 2022.02.0+ds.1-1/debian/patches/series
--- 2022.01.0+ds.1-1/debian/patches/series	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/series	2022-08-04 17:56:48.000000000 +0000
@@ -11,3 +11,4 @@ skip-bokeh-tests.patch
 skip-directory-test-when-root.patch
 #set-client-asyncronous-earlier.patch
 python-3.10-loop.patch
+test_spillbuffer_remove_sizetest.patch
diff -pruN 2022.01.0+ds.1-1/debian/patches/skip-bokeh-tests.patch 2022.02.0+ds.1-1/debian/patches/skip-bokeh-tests.patch
--- 2022.01.0+ds.1-1/debian/patches/skip-bokeh-tests.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/skip-bokeh-tests.patch	2022-08-04 17:56:48.000000000 +0000
@@ -4,7 +4,7 @@ Forward: no
 
 --- a/distributed/tests/test_client.py
 +++ b/distributed/tests/test_client.py
-@@ -6796,6 +6796,7 @@
+@@ -6810,6 +6810,7 @@
  @gen_cluster(client=True, nthreads=[("127.0.0.1", 0)])
  async def test_get_client_functions_spawn_clusters(c, s, a):
      # see gh4565
diff -pruN 2022.01.0+ds.1-1/debian/patches/skip-directory-test-when-root.patch 2022.02.0+ds.1-1/debian/patches/skip-directory-test-when-root.patch
--- 2022.01.0+ds.1-1/debian/patches/skip-directory-test-when-root.patch	2022-02-20 00:23:23.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/skip-directory-test-when-root.patch	2022-08-04 17:56:48.000000000 +0000
@@ -4,7 +4,7 @@ Forwarded: https://github.com/dask/distr
 
 --- a/distributed/tests/test_worker.py
 +++ b/distributed/tests/test_worker.py
-@@ -1724,6 +1724,8 @@
+@@ -1891,6 +1891,8 @@
      assert "Heartbeat to scheduler failed" in logger.getvalue()
  
  
diff -pruN 2022.01.0+ds.1-1/debian/patches/test_spillbuffer_remove_sizetest.patch 2022.02.0+ds.1-1/debian/patches/test_spillbuffer_remove_sizetest.patch
--- 2022.01.0+ds.1-1/debian/patches/test_spillbuffer_remove_sizetest.patch	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/patches/test_spillbuffer_remove_sizetest.patch	2022-08-04 17:56:48.000000000 +0000
@@ -0,0 +1,22 @@
+Author: Diane Trout <diane@ghic.org>
+Description: Remove some memory tests that were failing as upstream
+ had also removed them.
+Forwarded: no
+
+--- a/distributed/tests/test_spill.py
++++ b/distributed/tests/test_spill.py
+@@ -70,13 +70,12 @@
+     # Single key is larger than target and goes directly into slow
+     e = "e" * 500
+     slarge = sizeof(e)
++
+     buf["e"] = e
+     assert set(buf.disk) == {"d", "e"}
+     assert buf.spilled_by_key == {"d": s, "e": slarge}
+-    assert buf.spilled_total == s + slarge
+ 
+     # Updating a spilled key with another larger than target updates slow directly
+     buf["d"] = "d" * 500
+     assert set(buf.disk) == {"d", "e"}
+     assert buf.spilled_by_key == {"d": slarge, "e": slarge}
+-    assert buf.spilled_total == slarge * 2
diff -pruN 2022.01.0+ds.1-1/debian/rules 2022.02.0+ds.1-1/debian/rules
--- 2022.01.0+ds.1-1/debian/rules	2022-01-15 18:30:25.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/rules	2022-08-04 17:56:48.000000000 +0000
@@ -1,20 +1,41 @@
 #!/usr/bin/make -f
 
 
-SKIP=test_upload_file_egg or \
-     test_web_preload or \
-     test_dont_overlap_communications_to_same_worker or \
-     test_share_communication or \
-     test_robust_to_bad_sizeof_estimates or \
-     test_broken_worker_during_computation or \
-     test_profile_server or \
-     test_feed_large_bytestring or \
-     test_steal_expensive_data_slow_computation
+SKIP=test_upload_file_egg \
+     or test_web_preload \
+     or test_dont_overlap_communications_to_same_worker \
+     or test_share_communication \
+     or test_robust_to_bad_sizeof_estimates \
+     or test_broken_worker_during_computation \
+     or test_profile_server \
+     or test_feed_large_bytestring \
+     or test_steal_expensive_data_slow_computation \
+     or test_value_raises_during_spilling \
+     or test_fail_write_to_disk \
+
+# these failed when building, perhaps due to network issues?
+# dont fail when run outside of the builder
+SKIP+=or test_shutdown_localcluster \
+      or test_nanny_closed_by_keyboard_interrupt \
+      or test_preload_import_time \
+      or test_tls_scheduler \
+      or test_plugin_exception \
+      or test_plugin_multiple_exceptions \
+      or test_plugin_internal_exception \
+      or test_interface_async \
+      or test_host_uses_scheduler_protocol \
+      or test_worker_listens_on_same_interface_by_default \
+      or test_no_dangling_asyncio_tasks \
+
 
 export PYBUILD_NAME=distributed
 export PYBUILD_TEST_ARGS=$(CURDIR)/distributed/tests -v \
   --ignore=distributed/deploy/utils_test.py \
   --ignore=distributed/utils_test.py \
+  --ignore=continuous_integration \
+  --ignore=docs \
+  --ignore=.github \
+  --timeout-method=signal \
   --timeout=300 \
   -m "not (avoid_travis or isinstalled or slow or flaky)" -k "not ( $(SKIP) )"
 
diff -pruN 2022.01.0+ds.1-1/debian/tests/control 2022.02.0+ds.1-1/debian/tests/control
--- 2022.01.0+ds.1-1/debian/tests/control	2022-01-17 19:55:50.000000000 +0000
+++ 2022.02.0+ds.1-1/debian/tests/control	2022-08-04 17:56:48.000000000 +0000
@@ -7,7 +7,7 @@ Test-Command: set -e
  ; cp distributed/tests/testegg-1.0.0-py3.4.egg $AUTOPKGTEST_TMP
  ; chown $AUTOPKGTEST_NORMAL_USER $AUTOPKGTEST_TMP
  ; cp -r distributed/tests /usr/lib/python3/dist-packages/distributed/
- ; SKIP_TESTS=test_gather_failing_cnn_error
+ ; SKIP_TESTS="test_gather_failing_cnn_error or test_value_raises_during_spilling or test_fail_write_to_disk"
  ; for py in $(py3versions -r 2>/dev/null)
  ; do cd "$AUTOPKGTEST_TMP"
  ;   echo "Testing with $py:"
diff -pruN 2022.01.0+ds.1-1/distributed/active_memory_manager.py 2022.02.0+ds.1-1/distributed/active_memory_manager.py
--- 2022.01.0+ds.1-1/distributed/active_memory_manager.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/active_memory_manager.py	2022-02-11 16:21:14.000000000 +0000
@@ -312,8 +312,7 @@ class ActiveMemoryManagerExtension:
         # THEN temporarily keep the extra replica on the candidate with status=running.
         #
         # This prevents a ping-pong effect between ReduceReplicas (or any other policy
-        # that yields drop commands with multiple candidates) and RetireWorker
-        # (to be later introduced by https://github.com/dask/distributed/pull/5381):
+        # that yields drop commands with multiple candidates) and RetireWorker:
         # 1. RetireWorker replicates in-memory tasks from worker A (very busy and being
         #    retired) to worker B (idle)
         # 2. on the next AMM iteration 2 seconds later, ReduceReplicas drops the same
@@ -494,3 +493,151 @@ class ReduceReplicas(ActiveMemoryManager
                 ndrop,
                 nkeys,
             )
+
+
+class RetireWorker(ActiveMemoryManagerPolicy):
+    """Replicate somewhere else all unique in-memory tasks on a worker, preparing for
+    its shutdown.
+
+    At any given time, the AMM may have registered multiple instances of this policy,
+    one for each worker currently being retired - meaning that most of the time no
+    instances will be registered at all. For this reason, this policy doesn't figure in
+    the dask config (:file:`distributed.yaml`). Instances are added by
+    :meth:`distributed.Scheduler.retire_workers` and automatically remove themselves
+    once the worker has been retired. If the AMM is disabled in the dask config,
+    :meth:`~distributed.Scheduler.retire_workers` will start a temporary ad-hoc one.
+
+    **Failure condition**
+
+    There may not be any suitable workers to receive the tasks from the retiring worker.
+    This happens in two use cases:
+
+    1. This is the only worker in the cluster, or
+    2. All workers are either paused or being retired at the same time
+
+    In either case, this policy will fail to move out all keys and set the
+    ``no_recipients`` boolean to True. :meth:`~distributed.Scheduler.retire_workers`
+    will abort the retirement.
+
+    There is a third use case, where a task fails to be replicated away for whatever
+    reason, e.g. because its recipient is unresponsive but the Scheduler doesn't know
+    yet. In this case we'll just wait for the next AMM iteration and try again (possibly
+    with a different receiving worker, e.g. if the receiving worker was hung but not yet
+    declared dead).
+
+    **Retiring a worker with spilled tasks**
+
+    On its very first iteration, this policy suggests that other workers should fetch
+    all unique in-memory tasks of the retiring worker. Frequently, this means that in
+    the next few moments the retiring worker will be bombarded by
+    :meth:`distributed.worker.Worker.get_data` calls from the rest of the cluster. This
+    can be a problem if most of the managed memory of the worker has been spilled out,
+    as it could send the worker above its terminate threshold. Two measures are in place
+    in order to prevent this:
+
+    - At every iteration, this policy drops all tasks on the retiring worker that have
+      already been replicated somewhere else. This makes room for further tasks to be
+      moved out of the spill file in order to be replicated onto another worker.
+    - Once the worker passes the ``pause`` threshold,
+      :meth:`~distributed.worker.Worker.get_data` throttles the number of outgoing
+      connections to 1.
+
+    Parameters
+    ==========
+    address: str
+        URI of the worker to be retired
+    """
+
+    address: str
+    no_recipients: bool
+
+    def __init__(self, address: str):
+        self.address = address
+        self.no_recipients = False
+
+    def __repr__(self) -> str:
+        return f"RetireWorker({self.address!r})"
+
+    def run(self):
+        """"""
+        ws = self.manager.scheduler.workers.get(self.address)
+        if ws is None:
+            logger.debug("Removing policy %s: Worker no longer in cluster", self)
+            self.manager.policies.remove(self)
+            return
+
+        nrepl = 0
+        nno_rec = 0
+
+        logger.debug("Retiring %s", ws)
+        for ts in ws.has_what:
+            if len(ts.who_has) > 1:
+                # There are already replicas of this key on other workers.
+                # Suggest dropping the replica from this worker.
+                # Use cases:
+                # 1. The suggestion is accepted by the AMM and by the Worker.
+                #    The replica on this worker is dropped.
+                # 2. The suggestion is accepted by the AMM, but rejected by the Worker.
+                #    We'll try again at the next AMM iteration.
+                # 3. The suggestion is rejected by the AMM, because another policy
+                #    (e.g. ReduceReplicas) already suggested the same for this worker
+                # 4. The suggestion is rejected by the AMM, because the task has
+                #    dependents queued or running on the same worker.
+                #    We'll try again at the next AMM iteration.
+                # 5. The suggestion is rejected by the AMM, because all replicas of the
+                #    key are on workers being retired and the other RetireWorker
+                #    instances already made the same suggestion. We need to deal with
+                #    this case and create a replica elsewhere.
+                drop_ws = (yield "drop", ts, {ws})
+                if drop_ws:
+                    continue  # Use case 1 or 2
+                if ts.who_has & self.manager.scheduler.running:
+                    continue  # Use case 3 or 4
+                # Use case 5
+
+            # Either the worker holds the only replica or all replicas are being held
+            # by workers that are being retired
+            nrepl += 1
+            # Don't create an unnecessary additional replica if another policy already
+            # asked for one
+            try:
+                has_pending_repl = bool(self.manager.pending[ts][0])
+            except KeyError:
+                has_pending_repl = False
+
+            if not has_pending_repl:
+                rec_ws = (yield "replicate", ts, None)
+                if not rec_ws:
+                    # replication was rejected by the AMM (see _find_recipient)
+                    nno_rec += 1
+
+        if nno_rec:
+            # All workers are paused or closing_gracefully.
+            # Scheduler.retire_workers will read this flag and exit immediately.
+            # TODO after we implement the automatic transition of workers from paused
+            #      to closing_gracefully after a timeout expires, we should revisit this
+            #      code to wait for paused workers and only exit immediately if all
+            #      workers are in closing_gracefully status.
+            self.no_recipients = True
+            logger.warning(
+                f"Tried retiring worker {self.address}, but {nno_rec} tasks could not "
+                "be moved as there are no suitable workers to receive them. "
+                "The worker will not be retired."
+            )
+            self.manager.policies.remove(self)
+        elif nrepl:
+            logger.info(
+                f"Retiring worker {self.address}; {nrepl} keys are being moved away.",
+            )
+        else:
+            logger.info(
+                f"Retiring worker {self.address}; no unique keys need to be moved away."
+            )
+            self.manager.policies.remove(self)
+
+    def done(self) -> bool:
+        """Return True if it is safe to close the worker down; False otherwise"""
+        ws = self.manager.scheduler.workers.get(self.address)
+        if ws is None:
+            return True
+        return all(len(ts.who_has) > 1 for ts in ws.has_what)
diff -pruN 2022.01.0+ds.1-1/distributed/batched.py 2022.02.0+ds.1-1/distributed/batched.py
--- 2022.01.0+ds.1-1/distributed/batched.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/batched.py	2022-02-11 16:21:14.000000000 +0000
@@ -8,6 +8,7 @@ import dask
 from dask.utils import parse_timedelta
 
 from .core import CommClosedError
+from .metrics import time
 
 logger = logging.getLogger(__name__)
 
@@ -83,12 +84,12 @@ class BatchedSend:
                 # Nothing to send
                 self.next_deadline = None
                 continue
-            if self.next_deadline is not None and self.loop.time() < self.next_deadline:
+            if self.next_deadline is not None and time() < self.next_deadline:
                 # Send interval not expired yet
                 continue
             payload, self.buffer = self.buffer, []
             self.batch_count += 1
-            self.next_deadline = self.loop.time() + self.interval
+            self.next_deadline = time() + self.interval
             try:
                 nbytes = yield self.comm.write(
                     payload, serializers=self.serializers, on_error="raise"
diff -pruN 2022.01.0+ds.1-1/distributed/cli/dask_ssh.py 2022.02.0+ds.1-1/distributed/cli/dask_ssh.py
--- 2022.01.0+ds.1-1/distributed/cli/dask_ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/cli/dask_ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,15 +1,24 @@
+import logging
+import sys
+import warnings
+from textwrap import dedent
+
 import click
 
 from distributed.cli.utils import check_python_3
 from distributed.deploy.old_ssh import SSHCluster
 
+logger = logging.getLogger("distributed.dask_ssh")
+
 
 @click.command(
-    help="""Launch a distributed cluster over SSH. A 'dask-scheduler' process will run on the
-                         first host specified in [HOSTNAMES] or in the hostfile (unless --scheduler is specified
-                         explicitly). One or more 'dask-worker' processes will be run each host in [HOSTNAMES] or
-                         in the hostfile. Use command line flags to adjust how many dask-worker process are run on
-                         each host (--nprocs) and how many cpus are used by each dask-worker process (--nthreads)."""
+    help=dedent(
+        """Launch a distributed cluster over SSH. A 'dask-scheduler' process will run on the
+        first host specified in [HOSTNAMES] or in the hostfile, unless --scheduler is specified
+        explicitly. One or more 'dask-worker' processes will be run on each host. Use the flag
+        --nworkers to adjust how many dask-worker process are run on each host and the flag
+        --nthreads to adjust how many CPUs are used by each dask-worker process."""
+    )
 )
 @click.option(
     "--scheduler",
@@ -36,7 +45,15 @@ from distributed.deploy.old_ssh import S
 )
 @click.option(
     "--nprocs",
-    default=1,
+    default=None,
+    show_default=True,
+    type=int,
+    help="Deprecated. Use --nworkers instead. Number of worker processes per host.",
+)
+@click.option(
+    "--nworkers",
+    "n_workers",  # This sets the Python argument name
+    default=None,
     show_default=True,
     type=int,
     help="Number of worker processes per host.",
@@ -122,6 +139,7 @@ def main(
     hostfile,
     nthreads,
     nprocs,
+    n_workers,
     ssh_username,
     ssh_port,
     ssh_private_key,
@@ -148,12 +166,27 @@ def main(
         print(ctx.get_help())
         exit(1)
 
+    if nprocs is not None and n_workers is not None:
+        logger.error(
+            "Both --nprocs and --nworkers were specified. Use --nworkers only."
+        )
+        sys.exit(1)
+    elif nprocs is not None:
+        warnings.warn(
+            "The --nprocs flag will be removed in a future release. It has been "
+            "renamed to --nworkers.",
+            FutureWarning,
+        )
+        n_workers = nprocs
+    elif n_workers is None:
+        n_workers = 1
+
     c = SSHCluster(
         scheduler,
         scheduler_port,
         hostnames,
         nthreads,
-        nprocs,
+        n_workers,
         ssh_username,
         ssh_port,
         ssh_private_key,
diff -pruN 2022.01.0+ds.1-1/distributed/cli/dask_worker.py 2022.02.0+ds.1-1/distributed/cli/dask_worker.py
--- 2022.01.0+ds.1-1/distributed/cli/dask_worker.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/cli/dask_worker.py	2022-02-11 16:21:14.000000000 +0000
@@ -56,7 +56,7 @@ pem_file_option_type = click.Path(exists
     "--worker-port",
     default=None,
     help="Serving computation port, defaults to random. "
-    "When creating multiple workers with --nprocs, a sequential range of "
+    "When creating multiple workers with --nworkers, a sequential range of "
     "worker ports may be used by specifying the first and last available "
     "ports like <first-port>:<last-port>. For example, --worker-port=3000:3026 "
     "will use ports 3000, 3001, ..., 3025, 3026.",
@@ -65,7 +65,7 @@ pem_file_option_type = click.Path(exists
     "--nanny-port",
     default=None,
     help="Serving nanny port, defaults to random. "
-    "When creating multiple nannies with --nprocs, a sequential range of "
+    "When creating multiple nannies with --nworkers, a sequential range of "
     "nanny ports may be used by specifying the first and last available "
     "ports like <first-port>:<last-port>. For example, --nanny-port=3000:3026 "
     "will use ports 3000, 3001, ..., 3025, 3026.",
@@ -127,18 +127,28 @@ pem_file_option_type = click.Path(exists
 @click.option(
     "--nprocs",
     type=str,
-    default=1,
+    default=None,
     show_default=True,
-    help="Number of worker processes to launch. "
-    "If negative, then (CPU_COUNT + 1 + nprocs) is used. "
+    help="Deprecated. Use '--nworkers' instead. Number of worker processes to "
+    "launch. If negative, then (CPU_COUNT + 1 + nprocs) is used. "
     "Set to 'auto' to set nprocs and nthreads dynamically based on CPU_COUNT",
 )
 @click.option(
+    "--nworkers",
+    "n_workers",  # This sets the Python argument name
+    type=str,
+    default=None,
+    show_default=True,
+    help="Number of worker processes to launch. "
+    "If negative, then (CPU_COUNT + 1 + nworkers) is used. "
+    "Set to 'auto' to set nworkers and nthreads dynamically based on CPU_COUNT",
+)
+@click.option(
     "--name",
     type=str,
     default=None,
     help="A unique name for this worker like 'worker-1'. "
-    "If used with --nprocs then the process number "
+    "If used with --nworkers then the process number "
     "will be appended like name-0, name-1, name-2, ...",
 )
 @click.option(
@@ -174,7 +184,7 @@ pem_file_option_type = click.Path(exists
     default=None,
     help='Resources for task constraints like "GPU=2 MEM=10e9". '
     "Resources are applied separately to each worker process "
-    "(only relevant when starting multiple worker processes with '--nprocs').",
+    "(only relevant when starting multiple worker processes with '--nworkers').",
 )
 @click.option(
     "--scheduler-file",
@@ -250,6 +260,7 @@ def main(
     nanny_port,
     nthreads,
     nprocs,
+    n_workers,
     nanny,
     name,
     pid_file,
@@ -295,23 +306,38 @@ def main(
         if v is not None
     }
 
-    if nprocs == "auto":
-        nprocs, nthreads = nprocesses_nthreads()
+    if nprocs is not None and n_workers is not None:
+        logger.error(
+            "Both --nprocs and --nworkers were specified. Use --nworkers only."
+        )
+        sys.exit(1)
+    elif nprocs is not None:
+        warnings.warn(
+            "The --nprocs flag will be removed in a future release. It has been "
+            "renamed to --nworkers.",
+            FutureWarning,
+        )
+        n_workers = nprocs
+
+    if n_workers == "auto":
+        n_workers, nthreads = nprocesses_nthreads()
+    elif n_workers is None:
+        n_workers = 1
     else:
-        nprocs = int(nprocs)
+        n_workers = int(n_workers)
 
-    if nprocs < 0:
-        nprocs = CPU_COUNT + 1 + nprocs
+    if n_workers < 0:
+        n_workers = CPU_COUNT + 1 + n_workers
 
-    if nprocs <= 0:
+    if n_workers <= 0:
         logger.error(
-            "Failed to launch worker. Must specify --nprocs so that there's at least one process."
+            "Failed to launch worker. Must specify --nworkers so that there's at least one process."
         )
         sys.exit(1)
 
-    if nprocs > 1 and not nanny:
+    if n_workers > 1 and not nanny:
         logger.error(
-            "Failed to launch worker.  You cannot use the --no-nanny argument when nprocs > 1."
+            "Failed to launch worker.  You cannot use the --no-nanny argument when n_workers > 1."
         )
         sys.exit(1)
 
@@ -322,10 +348,10 @@ def main(
         )
         sys.exit(1)
 
-    if nprocs > 1 and listen_address:
+    if n_workers > 1 and listen_address:
         logger.error(
             "Failed to launch worker. "
-            "You cannot specify --listen-address when nprocs > 1."
+            "You cannot specify --listen-address when n_workers > 1."
         )
         sys.exit(1)
 
@@ -359,7 +385,7 @@ def main(
         port = worker_port
 
     if not nthreads:
-        nthreads = CPU_COUNT // nprocs
+        nthreads = CPU_COUNT // n_workers
 
     if pid_file:
         with open(pid_file, "w") as f:
@@ -420,11 +446,11 @@ def main(
             dashboard=dashboard,
             dashboard_address=dashboard_address,
             name=name
-            if nprocs == 1 or name is None or name == ""
+            if n_workers == 1 or name is None or name == ""
             else str(name) + "-" + str(i),
             **kwargs,
         )
-        for i in range(nprocs)
+        for i in range(n_workers)
     ]
 
     async def close_all():
diff -pruN 2022.01.0+ds.1-1/distributed/cli/tests/test_dask_ssh.py 2022.02.0+ds.1-1/distributed/cli/tests/test_dask_ssh.py
--- 2022.01.0+ds.1-1/distributed/cli/tests/test_dask_ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/cli/tests/test_dask_ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,9 +1,40 @@
+import pytest
 from click.testing import CliRunner
 
+from distributed import Client
 from distributed.cli.dask_ssh import main
+from distributed.compatibility import MACOS, WINDOWS
+from distributed.utils_test import popen
+
+pytestmark = [
+    pytest.mark.xfail(MACOS, reason="very high flakiness; see distributed/issues/4543"),
+    pytest.mark.skipif(WINDOWS, reason="no CI support; see distributed/issues/4509"),
+]
 
 
 def test_version_option():
     runner = CliRunner()
     result = runner.invoke(main, ["--version"])
     assert result.exit_code == 0
+
+
+def test_ssh_cli_nprocs_renamed_to_nworkers(loop):
+    n_workers = 2
+    with popen(
+        ["dask-ssh", f"--nprocs={n_workers}", "--nohost", "localhost"]
+    ) as cluster:
+        with Client("tcp://127.0.0.1:8786", timeout="15 seconds", loop=loop) as c:
+            c.wait_for_workers(n_workers, timeout="15 seconds")
+        # This interrupt is necessary for the cluster to place output into the stdout
+        # and stderr pipes
+        cluster.send_signal(2)
+        _, stderr = cluster.communicate()
+
+    assert any(b"renamed to --nworkers" in l for l in stderr.splitlines())
+
+
+def test_ssh_cli_nworkers_with_nprocs_is_an_error():
+    with popen(["dask-ssh", "localhost", "--nprocs=2", "--nworkers=2"]) as c:
+        assert any(
+            b"Both --nprocs and --nworkers" in c.stderr.readline() for i in range(15)
+        )
diff -pruN 2022.01.0+ds.1-1/distributed/cli/tests/test_dask_worker.py 2022.02.0+ds.1-1/distributed/cli/tests/test_dask_worker.py
--- 2022.01.0+ds.1-1/distributed/cli/tests/test_dask_worker.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/cli/tests/test_dask_worker.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,4 +1,7 @@
 import asyncio
+import contextvars
+import functools
+import sys
 
 import pytest
 from click.testing import CliRunner
@@ -19,13 +22,28 @@ from distributed.compatibility import LI
 from distributed.deploy.utils import nprocesses_nthreads
 from distributed.metrics import time
 from distributed.utils import parse_ports, sync
-from distributed.utils_test import (
-    gen_cluster,
-    popen,
-    requires_ipv6,
-    terminate_process,
-    wait_for_port,
-)
+from distributed.utils_test import gen_cluster, popen, requires_ipv6
+
+if sys.version_info >= (3, 9):
+    from asyncio import to_thread
+else:
+
+    async def to_thread(*func_args, **kwargs):
+        """Asynchronously run function *func* in a separate thread.
+        Any *args and **kwargs supplied for this function are directly passed
+        to *func*. Also, the current :class:`contextvars.Context` is propagated,
+        allowing context variables from the main thread to be accessed in the
+        separate thread.
+        Return a coroutine that can be awaited to get the eventual result of *func*.
+
+        backport from
+        https://github.com/python/cpython/blob/3f1ea163ea54513e00e0e9d5442fee1b639825cc/Lib/asyncio/threads.py#L12-L25
+        """
+        func, *args = func_args
+        loop = asyncio.get_running_loop()
+        ctx = contextvars.copy_context()
+        func_call = functools.partial(ctx.run, func, *args, **kwargs)
+        return await loop.run_in_executor(None, func_call)
 
 
 def test_nanny_worker_ports(loop):
@@ -61,15 +79,15 @@ def test_nanny_worker_ports(loop):
 @pytest.mark.slow
 def test_nanny_worker_port_range(loop):
     with popen(["dask-scheduler", "--port", "9359", "--no-dashboard"]) as sched:
-        nprocs = 3
+        n_workers = 3
         worker_port = "9684:9686"
         nanny_port = "9688:9690"
         with popen(
             [
                 "dask-worker",
                 "127.0.0.1:9359",
-                "--nprocs",
-                f"{nprocs}",
+                "--nworkers",
+                f"{n_workers}",
                 "--host",
                 "127.0.0.1",
                 "--worker-port",
@@ -81,7 +99,7 @@ def test_nanny_worker_port_range(loop):
         ):
             with Client("127.0.0.1:9359", loop=loop) as c:
                 start = time()
-                while len(c.scheduler_info()["workers"]) < nprocs:
+                while len(c.scheduler_info()["workers"]) < n_workers:
                     sleep(0.1)
                     assert time() - start < 60
 
@@ -103,7 +121,7 @@ def test_nanny_worker_port_range_too_man
             [
                 "dask-worker",
                 "127.0.0.1:9359",
-                "--nprocs",
+                "--nworkers",
                 "3",
                 "--host",
                 "127.0.0.1",
@@ -149,24 +167,60 @@ def test_no_nanny(loop):
 
 @pytest.mark.slow
 @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
-def test_no_reconnect(nanny, loop):
-    with popen(["dask-scheduler", "--no-dashboard"]) as sched:
-        wait_for_port(("127.0.0.1", 8786))
+@pytest.mark.asyncio
+async def test_no_reconnect(nanny):
+    async with Scheduler(dashboard_address=":0") as s, Client(
+        s.address, asynchronous=True
+    ) as c:
         with popen(
             [
                 "dask-worker",
-                "tcp://127.0.0.1:8786",
+                s.address,
                 "--no-reconnect",
                 nanny,
                 "--no-dashboard",
             ]
         ) as worker:
-            sleep(2)
-            terminate_process(sched)
-        start = time()
-        while worker.poll() is None:
-            sleep(0.1)
-            assert time() < start + 30
+            # roundtrip works
+            assert await c.submit(lambda x: x + 1, 10) == 11
+
+            (comm,) = s.stream_comms.values()
+            comm.abort()
+
+            # worker terminates as soon as the connection is aborted
+            await to_thread(worker.communicate, timeout=5)
+            assert worker.returncode == 0
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
+@pytest.mark.asyncio
+async def test_reconnect(nanny):
+    async with Scheduler(dashboard_address=":0") as s, Client(
+        s.address, asynchronous=True
+    ) as c:
+        with popen(
+            [
+                "dask-worker",
+                s.address,
+                "--reconnect",
+                nanny,
+                "--no-dashboard",
+            ]
+        ) as worker:
+            # roundtrip works
+            await c.submit(lambda x: x + 1, 10) == 11
+
+            (comm,) = s.stream_comms.values()
+            comm.abort()
+
+            # roundtrip still works, which means the worker reconnected
+            assert await c.submit(lambda x: x + 1, 11) == 12
+
+            # closing the scheduler cleanly does terminate the worker
+            await s.close()
+            await to_thread(worker.communicate, timeout=5)
+            assert worker.returncode == 0
 
 
 def test_resources(loop):
@@ -237,10 +291,10 @@ def test_scheduler_address_env(loop, mon
                     assert time() < start + 10
 
 
-def test_nprocs_requires_nanny(loop):
+def test_nworkers_requires_nanny(loop):
     with popen(["dask-scheduler", "--no-dashboard"]):
         with popen(
-            ["dask-worker", "127.0.0.1:8786", "--nprocs=2", "--no-nanny"]
+            ["dask-worker", "127.0.0.1:8786", "--nworkers=2", "--no-nanny"]
         ) as worker:
             assert any(
                 b"Failed to launch worker" in worker.stderr.readline()
@@ -248,25 +302,25 @@ def test_nprocs_requires_nanny(loop):
             )
 
 
-def test_nprocs_negative(loop):
+def test_nworkers_negative(loop):
     with popen(["dask-scheduler", "--no-dashboard"]):
-        with popen(["dask-worker", "127.0.0.1:8786", "--nprocs=-1"]):
+        with popen(["dask-worker", "127.0.0.1:8786", "--nworkers=-1"]):
             with Client("tcp://127.0.0.1:8786", loop=loop) as c:
                 c.wait_for_workers(cpu_count(), timeout="10 seconds")
 
 
-def test_nprocs_auto(loop):
+def test_nworkers_auto(loop):
     with popen(["dask-scheduler", "--no-dashboard"]):
-        with popen(["dask-worker", "127.0.0.1:8786", "--nprocs=auto"]):
+        with popen(["dask-worker", "127.0.0.1:8786", "--nworkers=auto"]):
             with Client("tcp://127.0.0.1:8786", loop=loop) as c:
                 procs, _ = nprocesses_nthreads()
                 c.wait_for_workers(procs, timeout="10 seconds")
 
 
-def test_nprocs_expands_name(loop):
+def test_nworkers_expands_name(loop):
     with popen(["dask-scheduler", "--no-dashboard"]):
-        with popen(["dask-worker", "127.0.0.1:8786", "--nprocs", "2", "--name", "0"]):
-            with popen(["dask-worker", "127.0.0.1:8786", "--nprocs", "2"]):
+        with popen(["dask-worker", "127.0.0.1:8786", "--nworkers", "2", "--name", "0"]):
+            with popen(["dask-worker", "127.0.0.1:8786", "--nworkers", "2"]):
                 with Client("tcp://127.0.0.1:8786", loop=loop) as c:
                     start = time()
                     while len(c.scheduler_info()["workers"]) < 4:
@@ -280,6 +334,30 @@ def test_nprocs_expands_name(loop):
                     assert len(set(names)) == 4
 
 
+def test_worker_cli_nprocs_renamed_to_nworkers(loop):
+    n_workers = 2
+    with popen(["dask-scheduler", "--no-dashboard"]):
+        with popen(
+            ["dask-worker", "127.0.0.1:8786", f"--nprocs={n_workers}"]
+        ) as worker:
+            assert any(
+                b"renamed to --nworkers" in worker.stderr.readline() for i in range(15)
+            )
+            with Client("tcp://127.0.0.1:8786", loop=loop) as c:
+                c.wait_for_workers(n_workers, timeout="30 seconds")
+
+
+def test_worker_cli_nworkers_with_nprocs_is_an_error():
+    with popen(["dask-scheduler", "--no-dashboard"]):
+        with popen(
+            ["dask-worker", "127.0.0.1:8786", "--nprocs=2", "--nworkers=2"]
+        ) as worker:
+            assert any(
+                b"Both --nprocs and --nworkers" in worker.stderr.readline()
+                for i in range(15)
+            )
+
+
 @pytest.mark.skipif(not LINUX, reason="Need 127.0.0.2 to mean localhost")
 @pytest.mark.parametrize("nanny", ["--nanny", "--no-nanny"])
 @pytest.mark.parametrize(
diff -pruN 2022.01.0+ds.1-1/distributed/client.py 2022.02.0+ds.1-1/distributed/client.py
--- 2022.01.0+ds.1-1/distributed/client.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/client.py	2022-02-11 16:21:14.000000000 +0000
@@ -86,6 +86,7 @@ from .utils import (
     TimeoutError,
     format_dashboard_link,
     has_keyword,
+    import_term,
     log_errors,
     no_default,
     sync,
@@ -156,6 +157,8 @@ class Future(WrappedKey):
         Client that should own this future.  Defaults to _get_global_client()
     inform: bool
         Do we inform the scheduler that we need an update on this future
+    state: FutureState
+        The state of the future
 
     Examples
     --------
@@ -212,21 +215,55 @@ class Future(WrappedKey):
 
     @property
     def executor(self):
+        """Returns the executor, which is the client.
+
+        Returns
+        -------
+        Client
+            The executor
+        """
         return self.client
 
     @property
     def status(self):
+        """Returns the status
+
+        Returns
+        -------
+        str
+            The status
+        """
         return self._state.status
 
     def done(self):
-        """Is the computation complete?"""
+        """Returns whether or not the computation completed.
+
+        Returns
+        -------
+        bool
+            True if the computation is complete, otherwise False
+        """
         return self._state.done()
 
     def result(self, timeout=None):
         """Wait until computation completes, gather result to local process.
 
-        If *timeout* seconds are elapsed before returning, a
-        ``dask.distributed.TimeoutError`` is raised.
+        Parameters
+        ----------
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+
+        Raises
+        ------
+        dask.distributed.TimeoutError
+            If *timeout* seconds are elapsed before returning, a
+            ``dask.distributed.TimeoutError`` is raised.
+
+        Returns
+        -------
+        result
+            The result of the computation. Or a coroutine if the client is asynchronous.
         """
         if self.client.asynchronous:
             return self.client.sync(self._result, callback_timeout=timeout)
@@ -270,8 +307,20 @@ class Future(WrappedKey):
     def exception(self, timeout=None, **kwargs):
         """Return the exception of a failed task
 
-        If *timeout* seconds are elapsed before returning, a
-        ``dask.distributed.TimeoutError`` is raised.
+        Parameters
+        ----------
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+        **kwargs : dict
+            Optional keyword arguments for the function
+
+        Returns
+        -------
+        Exception
+            The exception that was raised
+            If *timeout* seconds are elapsed before returning, a
+            ``dask.distributed.TimeoutError`` is raised.
 
         See Also
         --------
@@ -287,6 +336,11 @@ class Future(WrappedKey):
         errs, or is cancelled
 
         The callback is executed in a separate thread.
+
+        Parameters
+        ----------
+        fn : callable
+            The method or function to be called
         """
         cls = Future
         if cls._cb_executor is None or cls._cb_executor_pid != os.getpid():
@@ -309,7 +363,7 @@ class Future(WrappedKey):
         )
 
     def cancel(self, **kwargs):
-        """Cancel request to run this future
+        """Cancel the request to run this future
 
         See Also
         --------
@@ -327,7 +381,13 @@ class Future(WrappedKey):
         return self.client.retry([self], **kwargs)
 
     def cancelled(self):
-        """Returns True if the future has been cancelled"""
+        """Returns True if the future has been cancelled
+
+        Returns
+        -------
+        bool
+            True if the future was 'cancelled', otherwise False
+        """
         return self._state.status == "cancelled"
 
     async def _traceback(self):
@@ -344,8 +404,13 @@ class Future(WrappedKey):
         ``traceback`` module.  Alternatively if you call ``future.result()``
         this traceback will accompany the raised exception.
 
-        If *timeout* seconds are elapsed before returning, a
-        ``dask.distributed.TimeoutError`` is raised.
+        Parameters
+        ----------
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+            If *timeout* seconds are elapsed before returning, a
+            ``dask.distributed.TimeoutError`` is raised.
 
         Examples
         --------
@@ -354,6 +419,11 @@ class Future(WrappedKey):
         >>> traceback.format_tb(tb)  # doctest: +SKIP
         [...]
 
+        Returns
+        -------
+        traceback
+            The traceback object. Or a coroutine if the client is asynchronous.
+
         See Also
         --------
         Future.exception
@@ -362,11 +432,16 @@ class Future(WrappedKey):
 
     @property
     def type(self):
+        """Returns the type"""
         return self._state.type
 
-    def release(self, _in_destructor=False):
-        # NOTE: this method can be called from different threads
-        # (see e.g. Client.get() or Future.__del__())
+    def release(self):
+        """
+        Notes
+        -----
+        This method can be called from different threads
+        (see e.g. Client.get() or Future.__del__())
+        """
         if not self._cleared and self.client.generation == self._generation:
             self._cleared = True
             try:
@@ -446,25 +521,47 @@ class FutureState:
         return event
 
     def cancel(self):
+        """Cancels the operation"""
         self.status = "cancelled"
         self.exception = CancelledError()
         self._get_event().set()
 
     def finish(self, type=None):
+        """Sets the status to 'finished' and sets the event
+
+        Parameters
+        ----------
+        type : any
+            The type
+        """
         self.status = "finished"
         self._get_event().set()
         if type is not None:
             self.type = type
 
     def lose(self):
+        """Sets the status to 'lost' and clears the event"""
         self.status = "lost"
         self._get_event().clear()
 
     def retry(self):
+        """Sets the status to 'pending' and clears the event"""
         self.status = "pending"
         self._get_event().clear()
 
     def set_error(self, exception, traceback):
+        """Sets the error data
+
+        Sets the status to 'error'. Sets the exception, the traceback,
+        and the event
+
+        Parameters
+        ----------
+        exception: Exception
+            The exception
+        traceback: Exception
+            The traceback
+        """
         _, exception, traceback = clean_exception(exception, traceback)
 
         self.status = "error"
@@ -473,14 +570,24 @@ class FutureState:
         self._get_event().set()
 
     def done(self):
+        """Returns 'True' if the event is not None and the event is set"""
         return self._event is not None and self._event.is_set()
 
     def reset(self):
+        """Sets the status to 'pending' and clears the event"""
         self.status = "pending"
         if self._event is not None:
             self._event.clear()
 
     async def wait(self, timeout=None):
+        """Wait for the awaitable to complete with a timeout.
+
+        Parameters
+        ----------
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+        """
         await asyncio.wait_for(self._get_event().wait(), timeout)
 
     def __repr__(self):
@@ -488,7 +595,15 @@ class FutureState:
 
 
 async def done_callback(future, callback):
-    """Coroutine that waits on future, then calls callback"""
+    """Coroutine that waits on the future, then calls the callback
+
+    Parameters
+    ----------
+    future : asyncio.Future
+        The future
+    callback : callable
+        The callback
+    """
     while future.status == "pending":
         await future._state.wait()
     callback(future)
@@ -496,6 +611,13 @@ async def done_callback(future, callback
 
 @partial(normalize_token.register, Future)
 def normalize_future(f):
+    """Returns the key and the type as a list
+
+    Parameters
+    ----------
+    list
+        The key and the type
+    """
     return [f.key, type(f)]
 
 
@@ -519,6 +641,21 @@ def _handle_warn(event):
         warnings.warn(msg)
 
 
+def _maybe_call_security_loader(address):
+    security_loader_term = dask.config.get("distributed.client.security-loader")
+    if security_loader_term:
+        try:
+            security_loader = import_term(security_loader_term)
+        except Exception as exc:
+            raise ImportError(
+                f"Failed to import `{security_loader_term}` configured at "
+                f"`distributed.client.security-loader` - is this module "
+                f"installed?"
+            ) from exc
+        return security_loader({"address": address})
+    return None
+
+
 class Client(SyncMethodMixin):
     """Connect to and submit computation to a Dask cluster
 
@@ -539,6 +676,8 @@ class Client(SyncMethodMixin):
     address: string, or Cluster
         This can be the address of a ``Scheduler`` server like a string
         ``'127.0.0.1:8786'`` or a cluster object like ``LocalCluster()``
+    loop
+        The event loop
     timeout: int
         Timeout duration for initial connection to the scheduler
     set_as_default: bool (True)
@@ -556,11 +695,22 @@ class Client(SyncMethodMixin):
     name: string (optional)
         Gives the client a name that will be included in logs generated on
         the scheduler for matters relating to this client
+    heartbeat_interval: int (optional)
+        Time in milliseconds between heartbeats to scheduler
+    serializers
+        Iterable of approaches to use when serializing the object.
+        See :ref:`serialization` for more.
+    deserializers
+        Iterable of approaches to use when deserializing the object.
+        See :ref:`serialization` for more.
+    extensions : list
+        The extensions
     direct_to_workers: bool (optional)
         Whether or not to connect directly to the workers, or to ask
         the scheduler to serve as intermediary.
-    heartbeat_interval: int
-        Time in milliseconds between heartbeats to scheduler
+    connection_limit : int
+        The number of open comms to maintain at once in the connection pool
+
     **kwargs:
         If you do not pass a scheduler address, Client will create a
         ``LocalCluster`` object, passing any extra keyword arguments.
@@ -630,7 +780,7 @@ class Client(SyncMethodMixin):
 
         self.futures = dict()
         self.refcount = defaultdict(lambda: 0)
-        self.coroutines = []
+        self._handle_report_task = None
         if name is None:
             name = dask.config.get("client-name", None)
         self.id = (
@@ -686,6 +836,11 @@ class Client(SyncMethodMixin):
                 )
             )
 
+        # If connecting to an address and no explicit security is configured, attempt
+        # to load security credentials with a security loader (if configured).
+        if security is None and isinstance(address, str):
+            security = _maybe_call_security_loader(address)
+
         if security is None:
             security = Security()
         elif isinstance(security, dict):
@@ -774,9 +929,9 @@ class Client(SyncMethodMixin):
 
     @contextmanager
     def as_current(self):
-        """Thread-local, Task-local context manager that causes the Client.current class
-        method to return self. Any Future objects deserialized inside this context
-        manager will be automatically attached to this Client.
+        """Thread-local, Task-local context manager that causes the Client.current
+        class method to return self. Any Future objects deserialized inside this
+        context manager will be automatically attached to this Client.
         """
         tok = _current_client.set(self)
         try:
@@ -789,8 +944,23 @@ class Client(SyncMethodMixin):
         """When running within the context of `as_client`, return the context-local
         current client. Otherwise, return the latest initialised Client.
         If no Client instances exist, raise ValueError.
-        If allow_global is set to False, raise ValueError if running outside of the
-        `as_client` context manager.
+        If allow_global is set to False, raise ValueError if running outside of
+        the `as_client` context manager.
+
+        Parameters
+        ----------
+        allow_global : bool
+            If True returns the default client
+
+        Returns
+        -------
+        Client
+            The current client
+
+        Raises
+        ------
+        ValueError
+            If there is no client set, a ValueError is raised
         """
         out = _current_client.get()
         if out:
@@ -1011,8 +1181,7 @@ class Client(SyncMethodMixin):
         for topic, handler in Client._default_event_handlers.items():
             self.subscribe_topic(topic, handler)
 
-        self._handle_scheduler_coroutine = asyncio.ensure_future(self._handle_report())
-        self.coroutines.append(self._handle_scheduler_coroutine)
+        self._handle_report_task = asyncio.create_task(self._handle_report())
 
         return self
 
@@ -1028,7 +1197,7 @@ class Client(SyncMethodMixin):
             self.futures.clear()
 
             timeout = self._timeout
-            deadline = self.loop.time() + timeout
+            deadline = time() + timeout
             while timeout > 0 and self.status == "connecting":
                 try:
                     await self._ensure_connected(timeout=timeout)
@@ -1036,7 +1205,7 @@ class Client(SyncMethodMixin):
                 except OSError:
                     # Wait a bit before retrying
                     await asyncio.sleep(0.1)
-                    timeout = deadline - self.loop.time()
+                    timeout = deadline - time()
                 except ImportError:
                     await self._close()
                     break
@@ -1118,6 +1287,7 @@ class Client(SyncMethodMixin):
 
     async def _wait_for_workers(self, n_workers=0, timeout=None):
         info = await self.scheduler.identity()
+        self._scheduler_identity = SchedulerInfo(info)
         if timeout:
             deadline = time() + parse_timedelta(timeout)
         else:
@@ -1130,9 +1300,19 @@ class Client(SyncMethodMixin):
                 )
             await asyncio.sleep(0.1)
             info = await self.scheduler.identity()
+            self._scheduler_identity = SchedulerInfo(info)
 
     def wait_for_workers(self, n_workers=0, timeout=None):
-        """Blocking call to wait for n workers before continuing"""
+        """Blocking call to wait for n workers before continuing
+
+        Parameters
+        ----------
+        n_workers : int
+            The number of workers
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+        """
         return self.sync(self._wait_for_workers, n_workers, timeout=timeout)
 
     def _heartbeat(self):
@@ -1304,12 +1484,16 @@ class Client(SyncMethodMixin):
                 self._send_to_scheduler({"op": "close-client"})
                 self._send_to_scheduler({"op": "close-stream"})
 
+            current_task = asyncio.current_task()
+            handle_report_task = self._handle_report_task
             # Give the scheduler 'stream-closed' message 100ms to come through
             # This makes the shutdown slightly smoother and quieter
-            with suppress(AttributeError, asyncio.CancelledError, TimeoutError):
-                await asyncio.wait_for(
-                    asyncio.shield(self._handle_scheduler_coroutine), 0.1
-                )
+            if (
+                handle_report_task is not None
+                and handle_report_task is not current_task
+            ):
+                with suppress(asyncio.CancelledError, TimeoutError):
+                    await asyncio.wait_for(asyncio.shield(handle_report_task), 0.1)
 
             if (
                 self.scheduler_comm
@@ -1332,19 +1516,12 @@ class Client(SyncMethodMixin):
             if _get_global_client() is self:
                 _set_global_client(None)
 
-            coroutines = set(self.coroutines)
-            for f in self.coroutines:
-                # cancel() works on asyncio futures (Tornado 5)
-                # but is a no-op on Tornado futures
-                with suppress(RuntimeError):
-                    f.cancel()
-                if f.cancelled():
-                    coroutines.remove(f)
-            del self.coroutines[:]
-
-            if not fast:
+            if (
+                handle_report_task is not None
+                and handle_report_task is not current_task
+            ):
                 with suppress(TimeoutError, asyncio.CancelledError):
-                    await asyncio.wait_for(asyncio.gather(*coroutines), 2)
+                    await asyncio.wait_for(handle_report_task, 0 if fast else 2)
 
             with suppress(AttributeError):
                 await self.scheduler.close_rpc()
@@ -1361,6 +1538,13 @@ class Client(SyncMethodMixin):
         If you started a client without arguments like ``Client()`` then this
         will also close the local cluster that was started at the same time.
 
+
+        Parameters
+        ----------
+        timeout : number
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+
         See Also
         --------
         Client.restart
@@ -1424,7 +1608,8 @@ class Client(SyncMethodMixin):
 
     def get_executor(self, **kwargs):
         """
-        Return a concurrent.futures Executor for submitting tasks on this Client
+        Return a concurrent.futures Executor for submitting tasks on this
+        Client
 
         Parameters
         ----------
@@ -1434,8 +1619,9 @@ class Client(SyncMethodMixin):
 
         Returns
         -------
-        An Executor object that's fully compatible with the concurrent.futures
-        API.
+        ClientExecutor
+            An Executor object that's fully compatible with the
+            concurrent.futures API.
         """
         return ClientExecutor(self, **kwargs)
 
@@ -1465,20 +1651,18 @@ class Client(SyncMethodMixin):
             coroutine, it will be run on the main event loop of a worker. Otherwise
             ``func`` will be run in a worker's task executor pool (see
             ``Worker.executors`` for more information.)
-        *args
-        **kwargs
-        pure : bool (defaults to True)
-            Whether or not the function is pure.  Set ``pure=False`` for
-            impure functions like ``np.random.random``.
-            See :ref:`pure functions` for more details.
+        *args : tuple
+            Optional positional arguments
+        key : str
+            Unique identifier for the task.  Defaults to function-name and hash
         workers : string or iterable of strings
             A set of worker addresses or hostnames on which computations may be
             performed. Leave empty to default to all workers (common case)
-        key : str
-            Unique identifier for the task.  Defaults to function-name and hash
-        allow_other_workers : bool (defaults to False)
-            Used with ``workers``. Indicates whether or not the computations
-            may be performed on workers that are not in the `workers` set(s).
+        resources : dict (defaults to {})
+            Defines the ``resources`` each instance of this mapped task
+            requires on the worker; e.g. ``{'GPU': 2}``.
+            See :doc:`worker resources <resources>` for details on defining
+            resources.
         retries : int (default to 0)
             Number of allowed automatic retries if the task fails
         priority : Number
@@ -1486,16 +1670,19 @@ class Client(SyncMethodMixin):
             Higher priorities take precedence
         fifo_timeout : str timedelta (default '100ms')
             Allowed amount of time between calls to consider the same priority
-        resources : dict (defaults to {})
-            Defines the ``resources`` each instance of this mapped task requires
-            on the worker; e.g. ``{'GPU': 2}``.
-            See :doc:`worker resources <resources>` for details on defining
-            resources.
+        allow_other_workers : bool (defaults to False)
+            Used with ``workers``. Indicates whether or not the computations
+            may be performed on workers that are not in the `workers` set(s).
         actor : bool (default False)
             Whether this task should exist on the worker as a stateful actor.
             See :doc:`actors` for additional details.
         actors : bool (default False)
             Alias for `actor`
+        pure : bool (defaults to True)
+            Whether or not the function is pure.  Set ``pure=False`` for
+            impure functions like ``np.random.random``.
+            See :ref:`pure functions` for more details.
+        **kwargs
 
         Examples
         --------
@@ -1504,6 +1691,16 @@ class Client(SyncMethodMixin):
         Returns
         -------
         Future
+            If running in asynchronous mode, returns the future. Otherwise
+            returns the concrete value
+
+        Raises
+        ------
+        TypeError
+            If 'func' is not callable, a TypeError is raised
+        ValueError
+            If 'allow_other_workers'is True and 'workers' is None, a
+            ValueError is raised
 
         See Also
         --------
@@ -1591,40 +1788,41 @@ class Client(SyncMethodMixin):
             List-like objects to map over.  They should have the same length.
         key : str, list
             Prefix for task names if string.  Explicit names if list.
-        pure : bool (defaults to True)
-            Whether or not the function is pure.  Set ``pure=False`` for
-            impure functions like ``np.random.random``.
-            See :ref:`pure functions` for more details.
         workers : string or iterable of strings
             A set of worker hostnames on which computations may be performed.
             Leave empty to default to all workers (common case)
-        allow_other_workers : bool (defaults to False)
-            Used with `workers`. Indicates whether or not the computations
-            may be performed on workers that are not in the `workers` set(s).
         retries : int (default to 0)
             Number of allowed automatic retries if a task fails
-        priority : Number
-            Optional prioritization of task.  Zero is default.
-            Higher priorities take precedence
-        fifo_timeout : str timedelta (default '100ms')
-            Allowed amount of time between calls to consider the same priority
         resources : dict (defaults to {})
             Defines the `resources` each instance of this mapped task requires
             on the worker; e.g. ``{'GPU': 2}``.
             See :doc:`worker resources <resources>` for details on defining
             resources.
+        priority : Number
+            Optional prioritization of task.  Zero is default.
+            Higher priorities take precedence
+        allow_other_workers : bool (defaults to False)
+            Used with `workers`. Indicates whether or not the computations
+            may be performed on workers that are not in the `workers` set(s).
+        fifo_timeout : str timedelta (default '100ms')
+            Allowed amount of time between calls to consider the same priority
         actor : bool (default False)
             Whether these tasks should exist on the worker as stateful actors.
             See :doc:`actors` for additional details.
         actors : bool (default False)
             Alias for `actor`
+        pure : bool (defaults to True)
+            Whether or not the function is pure.  Set ``pure=False`` for
+            impure functions like ``np.random.random``.
+            See :ref:`pure functions` for more details.
         batch_size : int, optional
-            Submit tasks to the scheduler in batches of (at most) ``batch_size``.
+            Submit tasks to the scheduler in batches of (at most)
+            ``batch_size``.
             Larger batch sizes can be useful for very large ``iterables``,
             as the cluster can start processing tasks while later ones are
             submitted asynchronously.
         **kwargs : dict
-            Extra keywords to send to the function.
+            Extra keyword arguments to send to the function.
             Large values will be included explicitly in the task graph.
 
         Examples
@@ -1910,6 +2108,8 @@ class Client(SyncMethodMixin):
             Whether or not to connect directly to the workers, or to ask
             the scheduler to serve as intermediary.  This can also be set when
             creating the Client.
+        asynchronous: bool
+            If True the client is in asynchronous mode
 
         Returns
         -------
@@ -2085,7 +2285,8 @@ class Client(SyncMethodMixin):
             Data to scatter out to workers.  Output type matches input type.
         workers : list of tuples (optional)
             Optionally constrain locations of data.
-            Specify workers as hostname/port pairs, e.g. ``('127.0.0.1', 8787)``.
+            Specify workers as hostname/port pairs, e.g.
+            ``('127.0.0.1', 8787)``.
         broadcast : bool (defaults to False)
             Whether to send each data element to all workers.
             By default we round-robin based on number of cores.
@@ -2096,6 +2297,11 @@ class Client(SyncMethodMixin):
         hash : bool (optional)
             Whether or not to hash data to determine key.
             If False then this uses a random key
+        timeout : number, optional
+            Time in seconds after which to raise a
+            ``dask.distributed.TimeoutError``
+        asynchronous: bool
+            If True the client is in asynchronous mode
 
         Returns
         -------
@@ -2177,7 +2383,10 @@ class Client(SyncMethodMixin):
 
         Parameters
         ----------
-        futures : list of Futures
+        futures : List[Future]
+            The list of Futures
+        asynchronous: bool
+            If True the client is in asynchronous mode
         force : boolean (False)
             Cancel this future even if other clients desire it
         """
@@ -2197,6 +2406,9 @@ class Client(SyncMethodMixin):
         Parameters
         ----------
         futures : list of Futures
+            The list of Futures
+        asynchronous: bool
+            If True the client is in asynchronous mode
         """
         return self.sync(self._retry, futures, asynchronous=asynchronous)
 
@@ -2246,9 +2458,6 @@ class Client(SyncMethodMixin):
         Parameters
         ----------
         args : list of objects to publish as name
-        name : optional name of the dataset to publish
-        override : bool (optional, default False)
-            if true, override any already present dataset with the same name
         kwargs : dict
             named collections to publish on the scheduler
 
@@ -2286,6 +2495,11 @@ class Client(SyncMethodMixin):
         """
         Remove named datasets from scheduler
 
+        Parameters
+        ----------
+        name : str
+            The name of the dataset to unpublish
+
         Examples
         --------
         >>> c.list_datasets()  # doctest: +SKIP
@@ -2329,11 +2543,18 @@ class Client(SyncMethodMixin):
 
         Parameters
         ----------
-        name : name of the dataset to retrieve
-        default : optional, not set by default
-            If set, do not raise a KeyError if the name is not present but return this default
+        name : str
+            name of the dataset to retrieve
+        default : str
+            optional, not set by default
+            If set, do not raise a KeyError if the name is not present but
+            return this default
         kwargs : dict
-            additional arguments to _get_dataset
+            additional keyword arguments to _get_dataset
+
+        Returns
+        -------
+        The dataset from the scheduler, if present
 
         See Also
         --------
@@ -2362,6 +2583,15 @@ class Client(SyncMethodMixin):
         keyword argument ``dask_scheduler=``, which will be given the scheduler
         object itself.
 
+        Parameters
+        ----------
+        function : callable
+            The function to run on the scheduler process
+        *args : tuple
+            Optional arguments for the function
+        **kwargs : dict
+            Optional keyword arguments for the function
+
         Examples
         --------
         >>> def get_number_of_tasks(dask_scheduler=None):
@@ -2452,7 +2682,7 @@ class Client(SyncMethodMixin):
         This calls a function on all currently known workers immediately,
         blocks until those results come back, and returns the results
         asynchronously as a dictionary keyed by worker address.  This method
-        if generally used for side effects, such and collecting diagnostic
+        is generally used for side effects such as collecting diagnostic
         information or installing libraries.
 
         If your function takes an input argument named ``dask_worker`` then
@@ -2461,10 +2691,14 @@ class Client(SyncMethodMixin):
         Parameters
         ----------
         function : callable
-        *args : arguments for remote function
-        **kwargs : keyword arguments for remote function
+            The function to run
+        *args : tuple
+            Optional arguments for the remote function
+        **kwargs : dict
+            Optional keyword arguments for the remote function
         workers : list
-            Workers on which to run the function. Defaults to all known workers.
+            Workers on which to run the function. Defaults to all known
+            workers.
         wait : boolean (optional)
             If the function is asynchronous whether or not to wait until that
             function finishes.
@@ -2540,13 +2774,10 @@ class Client(SyncMethodMixin):
         function : a coroutine function
             (typically a function wrapped in gen.coroutine or
              a Python 3.5+ async function)
-        *args : arguments for remote function
-        **kwargs : keyword arguments for remote function
-        wait : boolean (default True)
-            Whether to wait for coroutines to end.
-        workers : list
-            Workers on which to run the function. Defaults to all known workers.
-
+        *args : tuple
+            Optional arguments for the remote function
+        **kwargs : dict
+            Optional keyword arguments for the remote function
         """
         return self.run(function, *args, **kwargs)
 
@@ -2695,22 +2926,39 @@ class Client(SyncMethodMixin):
         allow_other_workers : bool (defaults to False)
             Used with ``workers``. Indicates whether or not the computations
             may be performed on workers that are not in the `workers` set(s).
-        retries : int (default to 0)
-            Number of allowed automatic retries if computing a result fails
-        priority : Number
-            Optional prioritization of task.  Zero is default.
-            Higher priorities take precedence
         resources : dict (defaults to {})
-            Defines the ``resources`` each instance of this mapped task requires
-            on the worker; e.g. ``{'GPU': 2}``.
+            Defines the ``resources`` each instance of this mapped task
+            requires on the worker; e.g. ``{'GPU': 2}``.
             See :doc:`worker resources <resources>` for details on defining
             resources.
         sync : bool (optional)
             Returns Futures if False or concrete values if True (default).
+        asynchronous: bool
+            If True the client is in asynchronous mode
         direct : bool
             Whether or not to connect directly to the workers, or to ask
             the scheduler to serve as intermediary.  This can also be set when
             creating the Client.
+        retries : int (default to 0)
+            Number of allowed automatic retries if computing a result fails
+        priority : Number
+            Optional prioritization of task.  Zero is default.
+            Higher priorities take precedence
+        fifo_timeout : timedelta str (defaults to '60s')
+            Allowed amount of time between calls to consider the same priority
+        actors : bool or dict (default None)
+            Whether these tasks should exist on the worker as stateful actors.
+            Specified on a global (True/False) or per-task (``{'x': True,
+            'y': False}``) basis. See :doc:`actors` for additional details.
+
+
+        Returns
+        -------
+        results
+            If 'sync' is True, returns the results. Otherwise, returns the
+            known data packed
+            If 'sync' is False, returns the known data. Otherwise, returns
+            the results
 
         Examples
         --------
@@ -2784,6 +3032,16 @@ class Client(SyncMethodMixin):
         known futures within the scheduler.  It returns a copy of the
         collection with a task graph that includes the overlapping futures.
 
+        Parameters
+        ----------
+        collection : dask object
+            Collection like dask.array or dataframe or dask.value objects
+
+        Returns
+        -------
+        collection : dask object
+            Collection with its tasks replaced with any existing futures.
+
         Examples
         --------
         >>> len(x.__dask_graph__())  # x is a dask collection with 100 tasks  # doctest: +SKIP
@@ -3118,6 +3376,8 @@ class Client(SyncMethodMixin):
         ----------
         filename : string
             Filename of .py, .egg or .zip file to send to workers
+        **kwargs : dict
+            Optional keyword arguments for the function
 
         Examples
         --------
@@ -3161,6 +3421,8 @@ class Client(SyncMethodMixin):
             A list of futures to balance, defaults all data
         workers : list, optional
             A list of workers on which to balance, defaults to all workers
+        **kwargs : dict
+            Optional keyword arguments for the function
         """
         return self.sync(self._rebalance, futures, workers, **kwargs)
 
@@ -3194,6 +3456,8 @@ class Client(SyncMethodMixin):
             Defaults to all.
         branching_factor : int, optional
             The number of workers that can copy data in each generation
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3225,6 +3489,8 @@ class Client(SyncMethodMixin):
         workers : list (optional)
             A list of workers that we care about specifically.
             Leave empty to receive information about all workers.
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3256,6 +3522,8 @@ class Client(SyncMethodMixin):
         ----------
         futures : list (optional)
             A list of futures, defaults to all data
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3296,6 +3564,8 @@ class Client(SyncMethodMixin):
         ----------
         workers : list (optional)
             A list of worker addresses, defaults to all
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3366,6 +3636,8 @@ class Client(SyncMethodMixin):
             A list of keys, defaults to all keys
         summary : boolean, (optional)
             Summarize keys into key types
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3516,6 +3788,11 @@ class Client(SyncMethodMixin):
     def scheduler_info(self, **kwargs):
         """Basic information about the workers in the cluster
 
+        Parameters
+        ----------
+        **kwargs : dict
+            Optional keyword arguments for the remote function
+
         Examples
         --------
         >>> c.scheduler_info()  # doctest: +SKIP
@@ -3733,9 +4010,9 @@ class Client(SyncMethodMixin):
         workers : iterable
             List of worker addresses to retrieve.  Gets all workers by default.
         nanny : bool, default False
-            Whether to get the logs from the workers (False) or the nannies (True). If
-            specified, the addresses in `workers` should still be the worker addresses,
-            not the nanny addresses.
+            Whether to get the logs from the workers (False) or the nannies
+            (True). If specified, the addresses in `workers` should still be
+            the worker addresses, not the nanny addresses.
 
         Returns
         -------
@@ -3828,10 +4105,19 @@ class Client(SyncMethodMixin):
         else:
             raise ValueError(f"No event handler known for topic {topic}.")
 
-    def retire_workers(self, workers=None, close_workers=True, **kwargs):
+    def retire_workers(
+        self, workers: list[str] | None = None, close_workers: bool = True, **kwargs
+    ):
         """Retire certain workers on the scheduler
 
-        See dask.distributed.Scheduler.retire_workers for the full docstring.
+        See :meth:`distributed.Scheduler.retire_workers` for the full docstring.
+
+        Parameters
+        ----------
+        workers
+        close_workers
+        **kwargs : dict
+            Optional keyword arguments for the remote function
 
         Examples
         --------
@@ -3939,10 +4225,25 @@ class Client(SyncMethodMixin):
         return result
 
     def futures_of(self, futures):
+        """Wrapper method of futures_of
+
+        Parameters
+        ----------
+        futures : tuple
+            The futures
+        """
         return futures_of(futures, client=self)
 
     def start_ipython(self, *args, **kwargs):
-        """Deprecated - Method moved to start_ipython_workers"""
+        """Deprecated - Method moved to start_ipython_workers
+
+        Parameters
+        ----------
+        *args : tuple
+            Optional arguments for the function
+        **kwargs : dict
+            Optional keyword arguments for the function
+        """
         raise Exception("Method moved to start_ipython_workers")
 
     async def _start_ipython_workers(self, workers):
@@ -4234,15 +4535,25 @@ class Client(SyncMethodMixin):
         Parameters
         ----------
         plugin : SchedulerPlugin
-            Plugin class or object to pass to the scheduler.
+            SchedulerPlugin instance to pass to the scheduler.
         name : str
             Name for the plugin; if None, a name is taken from the
             plugin instance or automatically generated if not present.
         **kwargs : Any
-            Arguments passed to the Plugin class (if Plugin is an
+            deprecated; Arguments passed to the Plugin class (if Plugin is an
             instance kwargs are unused).
 
         """
+        if isinstance(plugin, type):
+            warnings.warn(
+                "Adding plugins by class is deprecated and will be disabled in a "
+                "future release. Please add plugins by instance instead.",
+                category=FutureWarning,
+            )
+            # note: plugin is constructed in async def _register_scheduler_plugin
+        elif kwargs:
+            raise ValueError("kwargs provided but plugin is already an instance")
+
         if name is None:
             name = _get_plugin_name(plugin)
 
@@ -4293,9 +4604,9 @@ class Client(SyncMethodMixin):
         Registers a lifecycle worker plugin for all current and future workers.
 
         This registers a new object to handle setup, task state transitions and
-        teardown for workers in this cluster. The plugin will instantiate itself
-        on all currently connected workers. It will also be run on any worker
-        that connects in the future.
+        teardown for workers in this cluster. The plugin will instantiate
+        itself on all currently connected workers. It will also be run on any
+        worker that connects in the future.
 
         The plugin may include methods ``setup``, ``teardown``, ``transition``,
         and ``release_key``.  See the
@@ -4313,7 +4624,7 @@ class Client(SyncMethodMixin):
         Parameters
         ----------
         plugin : WorkerPlugin or NannyPlugin
-            The plugin object to register.
+            WorkerPlugin or NannyPlugin instance to register.
         name : str, optional
             A name for the plugin.
             Registering a plugin with the same name will have no effect.
@@ -4321,8 +4632,9 @@ class Client(SyncMethodMixin):
         nanny : bool, optional
             Whether to register the plugin with workers or nannies.
         **kwargs : optional
-            If you pass a class as the plugin, instead of a class instance, then the
-            class will be instantiated with any extra keyword arguments.
+            Deprecated; If you pass a class as the plugin, instead of a class
+            instance, then the class will be instantiated with any extra
+            keyword arguments.
 
         Examples
         --------
@@ -4333,7 +4645,8 @@ class Client(SyncMethodMixin):
         ...         pass
         ...     def teardown(self, worker: dask.distributed.Worker):
         ...         pass
-        ...     def transition(self, key: str, start: str, finish: str, **kwargs):
+        ...     def transition(self, key: str, start: str, finish: str,
+        ...                    **kwargs):
         ...         pass
         ...     def release_key(self, key: str, state: str, cause: str | None, reason: None, report: bool):
         ...         pass
@@ -4357,7 +4670,14 @@ class Client(SyncMethodMixin):
         unregister_worker_plugin
         """
         if isinstance(plugin, type):
+            warnings.warn(
+                "Adding plugins by class is deprecated and will be disabled in a "
+                "future release. Please add plugins by instance instead.",
+                category=FutureWarning,
+            )
             plugin = plugin(**kwargs)
+        elif kwargs:
+            raise ValueError("kwargs provided but plugin is already an instance")
 
         if name is None:
             name = _get_plugin_name(plugin)
@@ -4493,9 +4813,10 @@ def wait(fs, timeout=None, return_when=A
 
     Parameters
     ----------
-    fs : list of futures
+    fs : List[Future]
     timeout : number, optional
-        Time in seconds after which to raise a ``dask.distributed.TimeoutError``
+        Time in seconds after which to raise a
+        ``dask.distributed.TimeoutError``
     return_when : str, optional
         One of `ALL_COMPLETED` or `FIRST_COMPLETED`
 
@@ -4556,8 +4877,8 @@ class as_completed:
         Whether to wait and include results of futures as well;
         in this case `as_completed` yields a tuple of (future, result)
     raise_errors: bool (True)
-        Whether we should raise when the result of a future raises an exception;
-        only affects behavior when `with_results=True`.
+        Whether we should raise when the result of a future raises an
+        exception; only affects behavior when `with_results=True`.
 
     Examples
     --------
@@ -4787,7 +5108,18 @@ def AsCompleted(*args, **kwargs):
 
 
 def default_client(c=None):
-    """Return a client if one has started"""
+    """Return a client if one has started
+
+    Parameters
+    ----------
+    c : Client
+        The client to return. If None, the default client is returned.
+
+    Returns
+    -------
+    c : Client
+        The client, if one has started
+    """
     c = c or _get_global_client()
     if c:
         return c
@@ -4801,12 +5133,36 @@ def default_client(c=None):
 
 
 def ensure_default_client(client):
-    """Ensures the client passed as argument is set as the default"""
+    """Ensures the client passed as argument is set as the default
+
+    Parameters
+    ----------
+    client : Client
+        The client
+    """
     dask.config.set(scheduler="dask.distributed")
     _set_global_client(client)
 
 
 def redict_collection(c, dsk):
+    """Change the dictionary in the collection
+
+    Parameters
+    ----------
+    c : collection
+        The collection
+    dsk : dict
+        The dictionary
+
+    Returns
+    -------
+    c : Delayed
+        If the collection is a 'Delayed' object the collection is returned
+    cc : collection
+        If the collection is not a 'Delayed' object a copy of the
+        collection with xthe new dictionary is returned
+
+    """
     from dask.delayed import Delayed
 
     if isinstance(c, Delayed):
@@ -4824,6 +5180,8 @@ def futures_of(o, client=None):
     ----------
     o : collection
         A possibly nested collection of Dask objects
+    client : Client, optional
+        The client
 
     Examples
     --------
@@ -4831,6 +5189,11 @@ def futures_of(o, client=None):
     [<Future: finished key: ...>,
      <Future: pending  key: ...>]
 
+    Raises
+    ------
+    CancelledError
+        If one of the futures is cancelled a CancelledError is raised
+
     Returns
     -------
     futures : List[Future]
@@ -5084,12 +5447,13 @@ def temp_default_client(c):
     """Set the default client for the duration of the context
 
     .. note::
-       This function should be used exclusively for unit testing the default client
-       functionality. In all other cases, please use ``Client.as_current`` instead.
+       This function should be used exclusively for unit testing the default
+       client functionality. In all other cases, please use
+       ``Client.as_current`` instead.
 
     .. note::
-       Unlike ``Client.as_current``, this context manager is neither thread-local nor
-       task-local.
+       Unlike ``Client.as_current``, this context manager is neither
+       thread-local nor task-local.
 
     Parameters
     ----------
diff -pruN 2022.01.0+ds.1-1/distributed/comm/core.py 2022.02.0+ds.1-1/distributed/comm/core.py
--- 2022.01.0+ds.1-1/distributed/comm/core.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/comm/core.py	2022-02-11 16:21:14.000000000 +0000
@@ -297,12 +297,14 @@ async def connect(
         except (asyncio.TimeoutError, OSError) as exc:
             active_exception = exc
 
-            # The intermediate capping is mostly relevant for the initial
-            # connect. Afterwards we should be more forgiving
-            intermediate_cap = intermediate_cap * 1.5
+            # As descibed above, the intermediate timeout is used to distributed
+            # initial, bulk connect attempts homogeneously. In particular with
+            # the jitter upon retries we should not be worred about overloading
+            # any more DNS servers
+            intermediate_cap = timeout
             # FullJitter see https://aws.amazon.com/blogs/architecture/exponential-backoff-and-jitter/
 
-            upper_cap = min(time_left(), backoff_base * (2 ** attempt))
+            upper_cap = min(time_left(), backoff_base * (2**attempt))
             backoff = random.uniform(0, upper_cap)
             attempt += 1
             logger.debug(
diff -pruN 2022.01.0+ds.1-1/distributed/comm/tcp.py 2022.02.0+ds.1-1/distributed/comm/tcp.py
--- 2022.01.0+ds.1-1/distributed/comm/tcp.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/comm/tcp.py	2022-02-11 16:21:14.000000000 +0000
@@ -7,6 +7,7 @@ import struct
 import sys
 import weakref
 from ssl import SSLCertVerificationError, SSLError
+from typing import ClassVar
 
 from tornado import gen
 
@@ -268,7 +269,7 @@ class TCP(Comm):
         frames_nbytes = [nbytes(header), *frames_nbytes]
         frames_nbytes_total += frames_nbytes[0]
 
-        if frames_nbytes_total < 2 ** 17:  # 128kiB
+        if frames_nbytes_total < 2**17:  # 128kiB
             # small enough, send in one go
             frames = [b"".join(frames)]
             frames_nbytes = [frames_nbytes_total]
@@ -384,7 +385,29 @@ class RequireEncryptionMixin:
 
 
 class BaseTCPConnector(Connector, RequireEncryptionMixin):
-    _executor = ThreadPoolExecutor(2, thread_name_prefix="TCP-Executor")
+    _executor: ClassVar[ThreadPoolExecutor] = ThreadPoolExecutor(
+        2, thread_name_prefix="TCP-Executor"
+    )
+    _client: ClassVar[TCPClient]
+
+    @classmethod
+    def warmup(cls) -> None:
+        """Pre-start threads and sockets to avoid catching them in checks for thread and
+        fd leaks
+        """
+        ex = cls._executor
+        while len(ex._threads) < ex._max_workers:
+            ex._adjust_thread_count()
+        cls._get_client()
+
+    @classmethod
+    def _get_client(cls):
+        if not hasattr(cls, "_client"):
+            resolver = netutil.ExecutorResolver(
+                close_executor=False, executor=cls._executor
+            )
+            cls._client = TCPClient(resolver=resolver)
+        return cls._client
 
     @property
     def client(self):
@@ -392,13 +415,7 @@ class BaseTCPConnector(Connector, Requir
         # excess `ThreadPoolExecutor`s. We delay creation until inside an async
         # function to avoid accessing an IOLoop from a context where a backing
         # event loop doesn't exist.
-        cls = type(self)
-        if not hasattr(type(self), "_client"):
-            resolver = netutil.ExecutorResolver(
-                close_executor=False, executor=cls._executor
-            )
-            cls._client = TCPClient(resolver=resolver)
-        return cls._client
+        return self._get_client()
 
     async def connect(self, address, deserialize=True, **connection_args):
         self._check_encryption(address, connection_args)
diff -pruN 2022.01.0+ds.1-1/distributed/comm/tests/test_comms.py 2022.02.0+ds.1-1/distributed/comm/tests/test_comms.py
--- 2022.01.0+ds.1-1/distributed/comm/tests/test_comms.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/comm/tests/test_comms.py	2022-02-11 16:21:14.000000000 +0000
@@ -1092,7 +1092,7 @@ async def check_deserialize(addr):
     # as a separate payload
     # TODO: currently bytestrings are not transferred as a separate payload
 
-    _uncompressible = os.urandom(1024 ** 2) * 4  # end size: 8 MB
+    _uncompressible = os.urandom(1024**2) * 4  # end size: 8 MB
 
     msg = {
         "op": "update",
@@ -1151,7 +1151,7 @@ async def check_deserialize_roundtrip(ad
     """
     # Test with long bytestrings, large enough to be transferred
     # as a separate payload
-    _uncompressible = os.urandom(1024 ** 2) * 4  # end size: 4 MB
+    _uncompressible = os.urandom(1024**2) * 4  # end size: 4 MB
 
     msg = {
         "op": "update",
diff -pruN 2022.01.0+ds.1-1/distributed/comm/tests/test_ucx_config.py 2022.02.0+ds.1-1/distributed/comm/tests/test_ucx_config.py
--- 2022.01.0+ds.1-1/distributed/comm/tests/test_ucx_config.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/comm/tests/test_ucx_config.py	2022-02-11 16:21:14.000000000 +0000
@@ -119,7 +119,7 @@ def test_ucx_config_w_env_var(cleanup, l
             ],
             env=env,
         ):
-            with Client(sched_addr, loop=loop, timeout=10) as c:
+            with Client(sched_addr, loop=loop, timeout=60) as c:
                 while not c.scheduler_info()["workers"]:
                     sleep(0.1)
 
diff -pruN 2022.01.0+ds.1-1/distributed/comm/ucx.py 2022.02.0+ds.1-1/distributed/comm/ucx.py
--- 2022.01.0+ds.1-1/distributed/comm/ucx.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/comm/ucx.py	2022-02-11 16:21:14.000000000 +0000
@@ -417,7 +417,7 @@ class UCXConnector(Connector):
             getattr(ucp.exceptions, "UCXConnectionReset", ()),
             getattr(ucp.exceptions, "UCXNotConnected", ()),
             getattr(ucp.exceptions, "UCXUnreachable", ()),
-        ):
+        ):  # type: ignore
             raise CommClosedError("Connection closed before handshake completed")
         return self.comm_class(
             ep,
diff -pruN 2022.01.0+ds.1-1/distributed/core.py 2022.02.0+ds.1-1/distributed/core.py
--- 2022.01.0+ds.1-1/distributed/core.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/core.py	2022-02-11 16:21:14.000000000 +0000
@@ -69,6 +69,11 @@ class Status(Enum):
 
 
 Status.lookup = {s.name: s for s in Status}  # type: ignore
+Status.ANY_RUNNING = {  # type: ignore
+    Status.running,
+    Status.paused,
+    Status.closing_gracefully,
+}
 
 
 class RPCClosed(IOError):
@@ -257,7 +262,7 @@ class Server:
         async def _():
             timeout = getattr(self, "death_timeout", 0)
             async with self._startup_lock:
-                if self.status in (Status.running, Status.paused):
+                if self.status in Status.ANY_RUNNING:
                     return self
                 if timeout:
                     try:
@@ -519,7 +524,7 @@ class Server:
                             self._ongoing_coroutines.add(result)
                             result = await result
                     except (CommClosedError, asyncio.CancelledError):
-                        if self.status in (Status.running, Status.paused):
+                        if self.status in Status.ANY_RUNNING:
                             logger.info("Lost connection to %r", address, exc_info=True)
                         break
                     except Exception as e:
diff -pruN 2022.01.0+ds.1-1/distributed/dashboard/components/scheduler.py 2022.02.0+ds.1-1/distributed/dashboard/components/scheduler.py
--- 2022.01.0+ds.1-1/distributed/dashboard/components/scheduler.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/dashboard/components/scheduler.py	2022-02-11 16:21:14.000000000 +0000
@@ -3,6 +3,7 @@ import math
 import operator
 import os
 from collections import OrderedDict, defaultdict
+from datetime import datetime
 from numbers import Number
 
 import numpy as np
@@ -3284,13 +3285,24 @@ class WorkerTable(DashboardComponent):
 
 
 class SchedulerLogs:
-    def __init__(self, scheduler):
-        logs = Log(
-            "\n".join(line for level, line in scheduler.get_logs())
-        )._repr_html_()
+    def __init__(self, scheduler, start=None):
+        logs = scheduler.get_logs(start=start, timestamps=True)
+
+        if not logs:
+            logs_html = (
+                '<p style="font-family: monospace; margin: 0;">No logs to report</p>'
+            )
+        else:
+            logs_html = Log(
+                "\n".join(
+                    "%s - %s"
+                    % (datetime.fromtimestamp(time).strftime("%H:%M:%S.%f"), line)
+                    for time, level, line in logs
+                )
+            )._repr_html_()
 
         self.root = Div(
-            text=logs,
+            text=logs_html,
             style={
                 "width": "100%",
                 "height": "100%",
diff -pruN 2022.01.0+ds.1-1/distributed/deploy/cluster.py 2022.02.0+ds.1-1/distributed/deploy/cluster.py
--- 2022.01.0+ds.1-1/distributed/deploy/cluster.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/deploy/cluster.py	2022-02-11 16:21:14.000000000 +0000
@@ -151,7 +151,7 @@ class Cluster(SyncMethodMixin):
                         exc_info=True,
                     )
             # Sleep, with error backoff
-            interval = min(max_interval, self._sync_interval * 1.5 ** err_count)
+            interval = min(max_interval, self._sync_interval * 1.5**err_count)
             await asyncio.sleep(interval)
 
     async def _close(self):
diff -pruN 2022.01.0+ds.1-1/distributed/deploy/old_ssh.py 2022.02.0+ds.1-1/distributed/deploy/old_ssh.py
--- 2022.01.0+ds.1-1/distributed/deploy/old_ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/deploy/old_ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -2,14 +2,17 @@ import logging
 import os
 import socket
 import sys
-import time
 import traceback
+import warnings
 from queue import Queue
 from threading import Thread
+from time import sleep
 
 from tlz import merge
 from tornado import gen
 
+from ..metrics import time
+
 logger = logging.getLogger(__name__)
 
 
@@ -57,7 +60,7 @@ def async_ssh(cmd_dict):
             print(
                 "[ dask-ssh ] : "
                 + bcolors.FAIL
-                + "SSH connection error when connecting to {addr}:{port}"
+                + "SSH connection error when connecting to {addr}:{port} "
                 "to run '{cmd}'".format(
                     addr=cmd_dict["address"],
                     port=cmd_dict["ssh_port"],
@@ -99,7 +102,7 @@ def async_ssh(cmd_dict):
                 + bcolors.ENDC
             )
 
-            time.sleep(1)
+            sleep(1)
 
     # Execute the command, and grab file handles for stdout and stderr. Note
     # that we run the command using the user's default shell, but force it to
@@ -182,19 +185,19 @@ def async_ssh(cmd_dict):
     # thread to shut itself down.
     while cmd_dict["input_queue"].empty():
         # Kill some time so that this thread does not hog the CPU.
-        time.sleep(1.0)
+        sleep(1.0)
         # Send noise down the pipe to keep connection active
         transport.send_ignore()
         if communicate():
             break
 
     # Ctrl-C the executing command and wait a bit for command to end cleanly
-    start = time.time()
-    while time.time() < start + 5.0:
+    start = time()
+    while time() < start + 5.0:
         channel.send(b"\x03")  # Ctrl-C
         if communicate():
             break
-        time.sleep(1.0)
+        sleep(1.0)
 
     # Shutdown the channel, and close the SSH connection
     channel.close()
@@ -249,7 +252,7 @@ def start_worker(
     scheduler_port,
     worker_addr,
     nthreads,
-    nprocs,
+    n_workers,
     ssh_username,
     ssh_port,
     ssh_private_key,
@@ -265,7 +268,7 @@ def start_worker(
     cmd = (
         "{python} -m {remote_dask_worker} "
         "{scheduler_addr}:{scheduler_port} "
-        "--nthreads {nthreads}" + (" --nprocs {nprocs}" if nprocs != 1 else "")
+        "--nthreads {nthreads}" + (" --nworkers {n_workers}" if n_workers != 1 else "")
     )
 
     if not nohost:
@@ -287,7 +290,7 @@ def start_worker(
         scheduler_port=scheduler_port,
         worker_addr=worker_addr,
         nthreads=nthreads,
-        nprocs=nprocs,
+        n_workers=n_workers,
         memory_limit=memory_limit,
         worker_port=worker_port,
         nanny_port=nanny_port,
@@ -337,7 +340,7 @@ class SSHCluster:
         scheduler_port,
         worker_addrs,
         nthreads=0,
-        nprocs=1,
+        n_workers=None,
         ssh_username=None,
         ssh_port=22,
         ssh_private_key=None,
@@ -349,12 +352,32 @@ class SSHCluster:
         nanny_port=None,
         remote_dask_worker="distributed.cli.dask_worker",
         local_directory=None,
+        **kwargs,
     ):
 
         self.scheduler_addr = scheduler_addr
         self.scheduler_port = scheduler_port
         self.nthreads = nthreads
-        self.nprocs = nprocs
+        nprocs = kwargs.pop("nprocs", None)
+        if kwargs:
+            raise TypeError(
+                f"__init__() got an unexpected keyword argument {', '.join(kwargs.keys())}"
+            )
+        if nprocs is not None and n_workers is not None:
+            raise ValueError(
+                "Both nprocs and n_workers were specified. Use n_workers only."
+            )
+        elif nprocs is not None:
+            warnings.warn(
+                "The nprocs argument will be removed in a future release. It has been "
+                "renamed to n_workers.",
+                FutureWarning,
+            )
+            n_workers = nprocs
+        elif n_workers is None:
+            n_workers = 1
+
+        self.n_workers = n_workers
 
         self.ssh_username = ssh_username
         self.ssh_port = ssh_port
@@ -411,6 +434,24 @@ class SSHCluster:
         pass
 
     @property
+    def nprocs(self):
+        warnings.warn(
+            "The nprocs attribute will be removed in a future release. It has been "
+            "renamed to n_workers.",
+            FutureWarning,
+        )
+        return self.n_workers
+
+    @nprocs.setter
+    def nprocs(self, value):
+        warnings.warn(
+            "The nprocs attribute will be removed in a future release. It has been "
+            "renamed to n_workers.",
+            FutureWarning,
+        )
+        self.n_workers = value
+
+    @property
     def scheduler_address(self):
         return "%s:%d" % (self.scheduler_addr, self.scheduler_port)
 
@@ -427,7 +468,7 @@ class SSHCluster:
 
                 # Kill some time and free up CPU before starting the next sweep
                 # through the processes.
-                time.sleep(0.1)
+                sleep(0.1)
 
             # end while true
 
@@ -442,7 +483,7 @@ class SSHCluster:
                 self.scheduler_port,
                 address,
                 self.nthreads,
-                self.nprocs,
+                self.n_workers,
                 self.ssh_username,
                 self.ssh_port,
                 self.ssh_private_key,
diff -pruN 2022.01.0+ds.1-1/distributed/deploy/ssh.py 2022.02.0+ds.1-1/distributed/deploy/ssh.py
--- 2022.01.0+ds.1-1/distributed/deploy/ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/deploy/ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -89,7 +89,37 @@ class Worker(Process):
         self.kwargs = copy.copy(kwargs)
         self.name = name
         self.remote_python = remote_python
-        self.nprocs = self.kwargs.pop("nprocs", 1)
+        if kwargs.get("nprocs") is not None and kwargs.get("n_workers") is not None:
+            raise ValueError(
+                "Both nprocs and n_workers were specified. Use n_workers only."
+            )
+        elif kwargs.get("nprocs") is not None:
+            warnings.warn(
+                "The nprocs argument will be removed in a future release. It has been "
+                "renamed to n_workers.",
+                FutureWarning,
+            )
+            self.n_workers = self.kwargs.pop("nprocs", 1)
+        else:
+            self.n_workers = self.kwargs.pop("n_workers", 1)
+
+    @property
+    def nprocs(self):
+        warnings.warn(
+            "The nprocs attribute will be removed in a future release. It has been "
+            "renamed to n_workers.",
+            FutureWarning,
+        )
+        return self.n_workers
+
+    @nprocs.setter
+    def nprocs(self, value):
+        warnings.warn(
+            "The nprocs attribute will be removed in a future release. It has been "
+            "renamed to n_workers.",
+            FutureWarning,
+        )
+        self.n_workers = value
 
     async def start(self):
         try:
@@ -138,7 +168,7 @@ class Worker(Process):
                                 **self.kwargs,
                             },
                         }
-                        for i in range(self.nprocs)
+                        for i in range(self.n_workers)
                     }
                 ),
             ]
@@ -148,7 +178,7 @@ class Worker(Process):
 
         # We watch stderr in order to get the address, then we return
         started_workers = 0
-        while started_workers < self.nprocs:
+        while started_workers < self.n_workers:
             line = await self.proc.stderr.readline()
             if not line.strip():
                 raise Exception("Worker failed to start")
@@ -248,6 +278,7 @@ old_cluster_kwargs = {
     "worker_addrs",
     "nthreads",
     "nprocs",
+    "n_workers",
     "ssh_username",
     "ssh_port",
     "ssh_private_key",
@@ -336,7 +367,7 @@ def SSHCluster(
     >>> cluster = SSHCluster(
     ...     ["localhost", "localhost", "localhost", "localhost"],
     ...     connect_options={"known_hosts": None},
-    ...     worker_options={"nthreads": 2, "nprocs": 2},
+    ...     worker_options={"nthreads": 2, "n_workers": 2},
     ...     scheduler_options={"port": 0, "dashboard_address": ":8797"}
     ... )
     >>> client = Client(cluster)
diff -pruN 2022.01.0+ds.1-1/distributed/deploy/tests/test_old_ssh.py 2022.02.0+ds.1-1/distributed/deploy/tests/test_old_ssh.py
--- 2022.01.0+ds.1-1/distributed/deploy/tests/test_old_ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/deploy/tests/test_old_ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -28,3 +28,50 @@ def test_cluster(loop):
             while len(e.ncores()) != 3:
                 sleep(0.01)
                 assert time() < start + 5
+
+
+def test_old_ssh_nprocs_renamed_to_n_workers():
+    with pytest.warns(FutureWarning, match="renamed to n_workers"):
+        with SSHCluster(
+            scheduler_addr="127.0.0.1",
+            scheduler_port=8687,
+            worker_addrs=["127.0.0.1", "127.0.0.1"],
+            nprocs=2,
+        ) as c:
+            assert c.n_workers == 2
+
+
+def test_nprocs_attribute_is_deprecated():
+    with SSHCluster(
+        scheduler_addr="127.0.0.1",
+        scheduler_port=8687,
+        worker_addrs=["127.0.0.1", "127.0.0.1"],
+    ) as c:
+        assert c.n_workers == 1
+        with pytest.warns(FutureWarning, match="renamed to n_workers"):
+            assert c.nprocs == 1
+        with pytest.warns(FutureWarning, match="renamed to n_workers"):
+            c.nprocs = 3
+
+        assert c.n_workers == 3
+
+
+def test_old_ssh_n_workers_with_nprocs_is_an_error():
+    with pytest.raises(ValueError, match="Both nprocs and n_workers"):
+        SSHCluster(
+            scheduler_addr="127.0.0.1",
+            scheduler_port=8687,
+            worker_addrs=(),
+            nprocs=2,
+            n_workers=2,
+        )
+
+
+def test_extra_kwargs_is_an_error():
+    with pytest.raises(TypeError, match="unexpected keyword argument"):
+        SSHCluster(
+            scheduler_addr="127.0.0.1",
+            scheduler_port=8687,
+            worker_addrs=["127.0.0.1", "127.0.0.1"],
+            unknown_kwarg=None,
+        )
diff -pruN 2022.01.0+ds.1-1/distributed/deploy/tests/test_ssh.py 2022.02.0+ds.1-1/distributed/deploy/tests/test_ssh.py
--- 2022.01.0+ds.1-1/distributed/deploy/tests/test_ssh.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/deploy/tests/test_ssh.py	2022-02-11 16:21:14.000000000 +0000
@@ -59,13 +59,13 @@ async def test_basic():
 
 
 @pytest.mark.asyncio
-async def test_nprocs():
+async def test_n_workers():
     async with SSHCluster(
         ["127.0.0.1"] * 3,
         connect_options=dict(known_hosts=None),
         asynchronous=True,
         scheduler_options={"idle_timeout": "5s"},
-        worker_options={"death_timeout": "5s", "nprocs": 2},
+        worker_options={"death_timeout": "5s", "n_workers": 2},
     ) as cluster:
         assert len(cluster.workers) == 2
         async with Client(cluster, asynchronous=True) as client:
@@ -78,6 +78,54 @@ async def test_nprocs():
 
 
 @pytest.mark.asyncio
+async def test_nprocs_attribute_is_deprecated():
+    async with SSHCluster(
+        ["127.0.0.1"] * 2,
+        connect_options=dict(known_hosts=None),
+        asynchronous=True,
+        scheduler_options={"idle_timeout": "5s"},
+        worker_options={"death_timeout": "5s"},
+    ) as cluster:
+        assert len(cluster.workers) == 1
+        worker = cluster.workers[0]
+        assert worker.n_workers == 1
+        with pytest.warns(FutureWarning, match="renamed to n_workers"):
+            assert worker.nprocs == 1
+        with pytest.warns(FutureWarning, match="renamed to n_workers"):
+            worker.nprocs = 3
+
+        assert worker.n_workers == 3
+
+
+@pytest.mark.asyncio
+async def test_ssh_nprocs_renamed_to_n_workers():
+    with pytest.warns(FutureWarning, match="renamed to n_workers"):
+        async with SSHCluster(
+            ["127.0.0.1"] * 3,
+            connect_options=dict(known_hosts=None),
+            asynchronous=True,
+            scheduler_options={"idle_timeout": "5s"},
+            worker_options={"death_timeout": "5s", "nprocs": 2},
+        ) as cluster:
+            assert len(cluster.workers) == 2
+            async with Client(cluster, asynchronous=True) as client:
+                await client.wait_for_workers(4)
+
+
+@pytest.mark.asyncio
+async def test_ssh_n_workers_with_nprocs_is_an_error():
+    with pytest.raises(ValueError, match="Both nprocs and n_workers"):
+        async with SSHCluster(
+            ["127.0.0.1"] * 3,
+            connect_options=dict(known_hosts=None),
+            asynchronous=True,
+            scheduler_options={},
+            worker_options={"n_workers": 2, "nprocs": 2},
+        ) as cluster:
+            assert not cluster
+
+
+@pytest.mark.asyncio
 async def test_keywords():
     async with SSHCluster(
         ["127.0.0.1"] * 3,
diff -pruN 2022.01.0+ds.1-1/distributed/diagnostics/memory_sampler.py 2022.02.0+ds.1-1/distributed/diagnostics/memory_sampler.py
--- 2022.01.0+ds.1-1/distributed/diagnostics/memory_sampler.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/diagnostics/memory_sampler.py	2022-02-11 16:21:14.000000000 +0000
@@ -161,7 +161,7 @@ class MemorySampler:
         kwargs
             Passed verbatim to :meth:`pandas.DataFrame.plot`
         """
-        df = self.to_pandas(align=align) / 2 ** 30
+        df = self.to_pandas(align=align) / 2**30
         return df.plot(
             xlabel="time",
             ylabel="Cluster memory (GiB)",
diff -pruN 2022.01.0+ds.1-1/distributed/diagnostics/progress.py 2022.02.0+ds.1-1/distributed/diagnostics/progress.py
--- 2022.01.0+ds.1-1/distributed/diagnostics/progress.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/diagnostics/progress.py	2022-02-11 16:21:14.000000000 +0000
@@ -2,7 +2,6 @@ from __future__ import annotations
 
 import asyncio
 import logging
-import time
 from collections import defaultdict
 from timeit import default_timer
 
@@ -11,6 +10,7 @@ from tlz import groupby, valmap
 from dask.base import tokenize
 from dask.utils import stringify
 
+from ..metrics import time
 from ..utils import key_split
 from .plugin import SchedulerPlugin
 
@@ -312,7 +312,7 @@ class GroupTiming(SchedulerPlugin):
 
     def _init(self):
         """Shared initializatoin code between __init__ and restart"""
-        now = time.time()
+        now = time()
 
         # Timestamps for tracking compute durations by task group.
         # Start with length 2 so that we always can compute a valid dt later.
@@ -329,13 +329,14 @@ class GroupTiming(SchedulerPlugin):
         if start == "processing" and finish == "memory":
             startstops = kwargs.get("startstops")
             if not startstops:
-                logger.warn(
-                    f"Task {key} finished processing, but timing information seems to be missing"
+                logger.warning(
+                    f"Task {key} finished processing, but timing information seems to "
+                    "be missing"
                 )
                 return
 
             # Possibly extend the timeseries if another dt has passed
-            now = time.time()
+            now = time()
             self.time[-1] = now
             while self.time[-1] - self.time[-2] > self.dt:
                 self.time[-1] = self.time[-2] + self.dt
diff -pruN 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_memory_sampler.py 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_memory_sampler.py
--- 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_memory_sampler.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_memory_sampler.py	2022-02-11 16:21:14.000000000 +0000
@@ -52,9 +52,9 @@ async def test_multi_sample(c, s, a, b):
     s2 = ms.sample("process", interval=0.2)
     async with s1, s2:
         idle_mem = s.memory.process
-        f = c.submit(lambda: "x" * 100 * 2 ** 20)  # 100 MiB
+        f = c.submit(lambda: "x" * 100 * 2**20)  # 100 MiB
         await f
-        while s.memory.process < idle_mem + 80 * 2 ** 20:
+        while s.memory.process < idle_mem + 80 * 2**20:
             # Wait for heartbeat
             await asyncio.sleep(0.01)
         await asyncio.sleep(0.6)
@@ -63,10 +63,10 @@ async def test_multi_sample(c, s, a, b):
     p = ms.samples["process"]
     assert len(m) >= 2
     assert m[0][1] == 0
-    assert m[-1][1] >= 100 * 2 ** 20
+    assert m[-1][1] >= 100 * 2**20
     assert len(p) >= 2
-    assert p[0][1] > 2 ** 20  # Assume > 1 MiB for idle process
-    assert p[-1][1] > p[0][1] + 80 * 2 ** 20
+    assert p[0][1] > 2**20  # Assume > 1 MiB for idle process
+    assert p[-1][1] > p[0][1] + 80 * 2**20
     assert m[-1][1] < p[-1][1]
 
 
diff -pruN 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_scheduler_plugin.py 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_scheduler_plugin.py
--- 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_scheduler_plugin.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_scheduler_plugin.py	2022-02-11 16:21:14.000000000 +0000
@@ -170,11 +170,11 @@ async def test_register_scheduler_plugin
             scheduler.foo = "bar"
 
     assert not hasattr(s, "foo")
-    await c.register_scheduler_plugin(Dummy1)
+    await c.register_scheduler_plugin(Dummy1())
     assert s.foo == "bar"
 
     with pytest.warns(UserWarning) as w:
-        await c.register_scheduler_plugin(Dummy1)
+        await c.register_scheduler_plugin(Dummy1())
     assert "Scheduler already contains" in w[0].message.args[0]
 
     class Dummy2(SchedulerPlugin):
@@ -185,7 +185,7 @@ async def test_register_scheduler_plugin
 
     n_plugins = len(s.plugins)
     with pytest.raises(RuntimeError, match="raising in start method"):
-        await c.register_scheduler_plugin(Dummy2)
+        await c.register_scheduler_plugin(Dummy2())
     # total number of plugins should be unchanged
     assert n_plugins == len(s.plugins)
 
@@ -198,10 +198,30 @@ async def test_register_scheduler_plugin
 
     n_plugins = len(s.plugins)
     with pytest.raises(ValueError) as excinfo:
-        await c.register_scheduler_plugin(Dummy1)
+        await c.register_scheduler_plugin(Dummy1())
 
     msg = str(excinfo.value)
     assert "disallowed from deserializing" in msg
     assert "distributed.scheduler.pickle" in msg
 
     assert n_plugins == len(s.plugins)
+
+
+@gen_cluster(nthreads=[], client=True)
+async def test_plugin_class_warns(c, s):
+    class EmptyPlugin(SchedulerPlugin):
+        pass
+
+    with pytest.warns(FutureWarning, match=r"Adding plugins by class is deprecated"):
+        await c.register_scheduler_plugin(EmptyPlugin)
+
+
+@gen_cluster(nthreads=[], client=True)
+async def test_unused_kwargs_throws(c, s):
+    class EmptyPlugin(SchedulerPlugin):
+        pass
+
+    with pytest.raises(
+        ValueError, match=r"kwargs provided but plugin is already an instance"
+    ):
+        await c.register_scheduler_plugin(EmptyPlugin(), data=789)
diff -pruN 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_worker_plugin.py 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_worker_plugin.py
--- 2022.01.0+ds.1-1/distributed/diagnostics/tests/test_worker_plugin.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/diagnostics/tests/test_worker_plugin.py	2022-02-11 16:21:14.000000000 +0000
@@ -85,17 +85,39 @@ async def test_remove_with_client_raises
 
 @gen_cluster(client=True, nthreads=[])
 async def test_create_with_client_and_plugin_from_class(c, s):
-    await c.register_worker_plugin(MyPlugin, data=456)
+    with pytest.warns(FutureWarning, match=r"Adding plugins by class is deprecated"):
+        await c.register_worker_plugin(MyPlugin, data=456)
 
     worker = await Worker(s.address, loop=s.loop)
     assert worker._my_plugin_status == "setup"
     assert worker._my_plugin_data == 456
 
     # Give the plugin a new name so that it registers
-    await c.register_worker_plugin(MyPlugin, name="new", data=789)
+    with pytest.warns(FutureWarning, match=r"Adding plugins by class is deprecated"):
+        await c.register_worker_plugin(MyPlugin, data=789, name="new")
     assert worker._my_plugin_data == 789
 
 
+@gen_cluster(nthreads=[], client=True)
+async def test_plugin_class_warns(c, s):
+    class EmptyPlugin:
+        pass
+
+    with pytest.warns(FutureWarning, match=r"Adding plugins by class is deprecated"):
+        await c.register_worker_plugin(EmptyPlugin)
+
+
+@gen_cluster(nthreads=[], client=True)
+async def test_unused_kwargs_throws(c, s):
+    class EmptyPlugin:
+        pass
+
+    with pytest.raises(
+        ValueError, match=r"kwargs provided but plugin is already an instance"
+    ):
+        await c.register_worker_plugin(EmptyPlugin(), data=789)
+
+
 @gen_cluster(client=True, worker_kwargs={"plugins": [MyPlugin(5)]})
 async def test_create_on_construction(c, s, a, b):
     assert len(a.plugins) == len(b.plugins) == 1
@@ -207,7 +229,8 @@ async def test_default_name(c, s, w):
     assert next(iter(w.plugins)).startswith("MyCustomPlugin-")
 
 
-def test_release_key_deprecated():
+@gen_cluster(client=True, nthreads=[("", 1)])
+async def test_release_key_deprecated(c, s, a):
     class ReleaseKeyDeprecated(WorkerPlugin):
         def __init__(self):
             self._called = False
@@ -222,20 +245,18 @@ def test_release_key_deprecated():
             assert self._called
             return super().teardown(worker)
 
-    @gen_cluster(client=True, nthreads=[("", 1)])
-    async def test(c, s, a):
-
-        await c.register_worker_plugin(ReleaseKeyDeprecated())
-        fut = await c.submit(inc, 1, key="task")
-        assert fut == 2
+    await c.register_worker_plugin(ReleaseKeyDeprecated())
 
-    with pytest.deprecated_call(
-        match="The `WorkerPlugin.release_key` hook is depreacted"
+    with pytest.warns(
+        FutureWarning, match="The `WorkerPlugin.release_key` hook is deprecated"
     ):
-        test()
+        assert await c.submit(inc, 1, key="x") == 2
+        while "x" in a.tasks:
+            await asyncio.sleep(0.01)
 
 
-def test_assert_no_warning_no_overload():
+@gen_cluster(client=True, nthreads=[("", 1)])
+async def test_assert_no_warning_no_overload(c, s, a):
     """Assert we do not receive a deprecation warning if we do not overload any
     methods
     """
@@ -243,15 +264,11 @@ def test_assert_no_warning_no_overload()
     class Dummy(WorkerPlugin):
         pass
 
-    @gen_cluster(client=True, nthreads=[("", 1)])
-    async def test(c, s, a):
-
-        await c.register_worker_plugin(Dummy())
-        fut = await c.submit(inc, 1, key="task")
-        assert fut == 2
-
     with pytest.warns(None):
-        test()
+        await c.register_worker_plugin(Dummy())
+        assert await c.submit(inc, 1, key="x") == 2
+        while "x" in a.tasks:
+            await asyncio.sleep(0.01)
 
 
 @gen_cluster(nthreads=[("127.0.0.1", 1)], client=True)
@@ -269,7 +286,7 @@ async def test_WorkerPlugin_overwrite(c,
         def teardown(self, worker):
             del self.worker.foo
 
-    await c.register_worker_plugin(MyCustomPlugin)
+    await c.register_worker_plugin(MyCustomPlugin())
 
     assert w.foo == 0
 
@@ -292,7 +309,7 @@ async def test_WorkerPlugin_overwrite(c,
         def teardown(self, worker):
             del self.worker.bar
 
-    await c.register_worker_plugin(MyCustomPlugin)
+    await c.register_worker_plugin(MyCustomPlugin())
 
     assert not hasattr(w, "foo")
     assert w.bar == 0
diff -pruN 2022.01.0+ds.1-1/distributed/distributed-schema.yaml 2022.02.0+ds.1-1/distributed/distributed-schema.yaml
--- 2022.01.0+ds.1-1/distributed/distributed-schema.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/distributed-schema.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -286,7 +286,10 @@ properties:
               For a list of handlers see the `dask.distributed.Scheduler.handlers` attribute.
 
           multiprocessing-method:
-            type: string
+            enum:
+              - spawn
+              - fork
+              - forkserver
             description: |
               How we create new workers, one of "spawn", "forkserver", or "fork"
 
@@ -557,6 +560,17 @@ properties:
             type: string
             description: Interval between scheduler-info updates
 
+          security-loader:
+            type: [string, 'null']
+            description: |
+              A fully qualified name (e.g. ``module.submodule.function``) of
+              a callback to use for loading security credentials for the
+              client. If no security object is explicitly passed when creating
+              a ``Client``, this callback is called with a dict containing
+              client information (currently just ``address``), and should
+              return a ``Security`` object to use for this client, or ``None``
+              to fallback to the default security configuration.
+
       deploy:
         type: object
         description: Configuration settings for general Dask deployment
diff -pruN 2022.01.0+ds.1-1/distributed/distributed.yaml 2022.02.0+ds.1-1/distributed/distributed.yaml
--- 2022.01.0+ds.1-1/distributed/distributed.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/distributed.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -162,6 +162,7 @@ distributed:
   client:
     heartbeat: 5s  # Interval between client heartbeats
     scheduler-info-interval: 2s  # Interval between scheduler-info updates
+    security-loader: null  # A callable to load security credentials if none are provided explicitly
 
   deploy:
     lost-worker-timeout: 15s  # Interval after which to hard-close a lost worker job
diff -pruN 2022.01.0+ds.1-1/distributed/http/proxy.py 2022.02.0+ds.1-1/distributed/http/proxy.py
--- 2022.01.0+ds.1-1/distributed/http/proxy.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/http/proxy.py	2022-02-11 16:21:14.000000000 +0000
@@ -67,7 +67,6 @@ try:
             # returns ProxyHandler coroutine
             return super().proxy(self.host, port, proxied_path)
 
-
 except ImportError:
     logger.info(
         "To route to workers diagnostics web server "
diff -pruN 2022.01.0+ds.1-1/distributed/locket.py 2022.02.0+ds.1-1/distributed/locket.py
--- 2022.01.0+ds.1-1/distributed/locket.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/locket.py	2022-02-11 16:21:14.000000000 +0000
@@ -4,8 +4,10 @@
 from __future__ import annotations
 
 import threading
-import time
 import weakref
+from time import sleep
+
+from .metrics import time
 
 __all__ = ["lock_file"]
 
@@ -51,7 +53,6 @@ except ImportError:
         def _unlock_file(file_):
             _WinAPI_UnlockFile(msvcrt.get_osfhandle(file_.fileno()), 0, 0, 1, 0)
 
-
 else:
     _lock_file_blocking_available = True
 
@@ -97,15 +98,15 @@ def _acquire_non_blocking(acquire, timeo
     if retry_period is None:
         retry_period = 0.05
 
-    start_time = time.time()
+    start_time = time()
     while True:
         success = acquire()
         if success:
             return
-        elif timeout is not None and time.time() - start_time > timeout:
+        elif timeout is not None and time() - start_time > timeout:
             raise LockError(f"Couldn't lock {path}")
         else:
-            time.sleep(retry_period)
+            sleep(retry_period)
 
 
 class _LockSet:
diff -pruN 2022.01.0+ds.1-1/distributed/nanny.py 2022.02.0+ds.1-1/distributed/nanny.py
--- 2022.01.0+ds.1-1/distributed/nanny.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/nanny.py	2022-02-11 16:21:14.000000000 +0000
@@ -28,6 +28,7 @@ from .comm import get_address_host, unpa
 from .comm.addressing import address_from_user_args
 from .core import CommClosedError, RPCClosed, Status, coerce_to_address, error_message
 from .diagnostics.plugin import _get_plugin_name
+from .metrics import time
 from .node import ServerNode
 from .process import AsyncProcess
 from .proctitle import enable_proctitle_on_children
@@ -87,7 +88,6 @@ class Nanny(ServerNode):
         scheduler_file=None,
         worker_port=0,
         nthreads=None,
-        ncores=None,
         loop=None,
         local_dir=None,
         local_directory=None,
@@ -171,10 +171,6 @@ class Nanny(ServerNode):
             if len(protocol_address) == 2:
                 protocol = protocol_address[0]
 
-        if ncores is not None:
-            warnings.warn("the ncores= parameter has moved to nthreads=")
-            nthreads = ncores
-
         self._given_worker_port = worker_port
         self.nthreads = nthreads or CPU_COUNT
         self.reconnect = reconnect
@@ -356,8 +352,8 @@ class Nanny(ServerNode):
         if self.process is None:
             return "OK"
 
-        deadline = self.loop.time() + timeout
-        await self.process.kill(timeout=0.8 * (deadline - self.loop.time()))
+        deadline = time() + timeout
+        await self.process.kill(timeout=0.8 * (deadline - time()))
 
     async def instantiate(self, comm=None) -> Status:
         """Start a local worker process
@@ -756,13 +752,12 @@ class WorkerProcess:
             if self.on_exit is not None:
                 self.on_exit(r)
 
-    async def kill(self, timeout=2, executor_wait=True):
+    async def kill(self, timeout: float = 2, executor_wait: bool = True):
         """
         Ensure the worker process is stopped, waiting at most
         *timeout* seconds before terminating it abruptly.
         """
-        loop = IOLoop.current()
-        deadline = loop.time() + timeout
+        deadline = time() + timeout
 
         if self.status == Status.stopped:
             return
@@ -776,19 +771,19 @@ class WorkerProcess:
         self.child_stop_q.put(
             {
                 "op": "stop",
-                "timeout": max(0, deadline - loop.time()) * 0.8,
+                "timeout": max(0, deadline - time()) * 0.8,
                 "executor_wait": executor_wait,
             }
         )
         await asyncio.sleep(0)  # otherwise we get broken pipe errors
         self.child_stop_q.close()
 
-        while process.is_alive() and loop.time() < deadline:
+        while process.is_alive() and time() < deadline:
             await asyncio.sleep(0.05)
 
         if process.is_alive():
             logger.warning(
-                "Worker process still alive after %d seconds, killing", timeout
+                f"Worker process still alive after {timeout} seconds, killing"
             )
             try:
                 await process.terminate()
diff -pruN 2022.01.0+ds.1-1/distributed/node.py 2022.02.0+ds.1-1/distributed/node.py
--- 2022.01.0+ds.1-1/distributed/node.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/node.py	2022-02-11 16:21:14.000000000 +0000
@@ -87,14 +87,35 @@ class ServerNode(Server):
         logger.addHandler(self._deque_handler)
         weakref.finalize(self, logger.removeHandler, self._deque_handler)
 
-    def get_logs(self, comm=None, n=None):
+    def get_logs(self, comm=None, start=None, n=None, timestamps=False):
+        """
+        Fetch log entries for this node
+
+        Parameters
+        ----------
+        start : float, optional
+            A time (in seconds) to begin filtering log entries from
+        n : int, optional
+            Maximum number of log entries to return from filtered results
+        timestamps : bool, default False
+            Do we want log entries to include the time they were generated?
+
+        Returns
+        -------
+        List of tuples containing the log level, message, and (optional) timestamp for each filtered entry
+        """
         deque_handler = self._deque_handler
-        if n is None:
-            L = list(deque_handler.deque)
-        else:
-            L = deque_handler.deque
-            L = [L[-i] for i in range(min(n, len(L)))]
-        return [(msg.levelname, deque_handler.format(msg)) for msg in L]
+        if start is None:
+            start = -1
+        L = []
+        for count, msg in enumerate(deque_handler.deque):
+            if n and count >= n or msg.created < start:
+                break
+            if timestamps:
+                L.append((msg.created, msg.levelname, deque_handler.format(msg)))
+            else:
+                L.append((msg.levelname, deque_handler.format(msg)))
+        return L
 
     def start_http_server(
         self, routes, dashboard_address, default_port=0, ssl_options=None
diff -pruN 2022.01.0+ds.1-1/distributed/protocol/compression.py 2022.02.0+ds.1-1/distributed/protocol/compression.py
--- 2022.01.0+ds.1-1/distributed/protocol/compression.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/protocol/compression.py	2022-02-11 16:21:14.000000000 +0000
@@ -197,7 +197,7 @@ def maybe_compress(
         return None, payload
     if len(payload) < min_size:
         return None, payload
-    if len(payload) > 2 ** 31:  # Too large, compression libraries often fail
+    if len(payload) > 2**31:  # Too large, compression libraries often fail
         return None, payload
 
     min_size = int(min_size)
diff -pruN 2022.01.0+ds.1-1/distributed/protocol/tests/test_serialize.py 2022.02.0+ds.1-1/distributed/protocol/tests/test_serialize.py
--- 2022.01.0+ds.1-1/distributed/protocol/tests/test_serialize.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/protocol/tests/test_serialize.py	2022-02-11 16:21:14.000000000 +0000
@@ -242,8 +242,8 @@ def test_serialize_bytes(kwargs):
         "abc",
         np.arange(5),
         b"ab" * int(40e6),
-        int(2 ** 26) * b"ab",
-        (int(2 ** 25) * b"ab", int(2 ** 25) * b"ab"),
+        int(2**26) * b"ab",
+        (int(2**25) * b"ab", int(2**25) * b"ab"),
     ]:
         b = serialize_bytes(x, **kwargs)
         assert isinstance(b, bytes)
@@ -447,14 +447,14 @@ def test_different_compression_families(
         if not isinstance(obj, MyObjWithCompression):
             raise NotImplementedError()
         header = {"compression": [True]}
-        return header, [bytes(2 ** 20)]
+        return header, [bytes(2**20)]
 
     def my_dumps_no_compression(obj, context=None):
         if not isinstance(obj, MyObjWithNoCompression):
             raise NotImplementedError()
 
         header = {"compression": [False]}
-        return header, [bytes(2 ** 20)]
+        return header, [bytes(2**20)]
 
     def my_loads(header, frames):
         return pickle.loads(frames[0])
@@ -473,7 +473,7 @@ def test_different_compression_families(
 
 @gen_test()
 async def test_frame_split():
-    data = b"1234abcd" * (2 ** 20)  # 8 MiB
+    data = b"1234abcd" * (2**20)  # 8 MiB
     assert dask.sizeof.sizeof(data) == dask.utils.parse_bytes("8MiB")
 
     size = dask.utils.parse_bytes("3MiB")
diff -pruN 2022.01.0+ds.1-1/distributed/protocol/utils.py 2022.02.0+ds.1-1/distributed/protocol/utils.py
--- 2022.01.0+ds.1-1/distributed/protocol/utils.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/protocol/utils.py	2022-02-11 16:21:14.000000000 +0000
@@ -12,7 +12,7 @@ BIG_BYTES_SHARD_SIZE = dask.utils.parse_
 
 
 msgpack_opts = {
-    ("max_%s_len" % x): 2 ** 31 - 1 for x in ["str", "bin", "array", "map", "ext"]
+    ("max_%s_len" % x): 2**31 - 1 for x in ["str", "bin", "array", "map", "ext"]
 }
 msgpack_opts["strict_map_key"] = False
 msgpack_opts["raw"] = False
diff -pruN 2022.01.0+ds.1-1/distributed/pytest_resourceleaks.py 2022.02.0+ds.1-1/distributed/pytest_resourceleaks.py
--- 2022.01.0+ds.1-1/distributed/pytest_resourceleaks.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/pytest_resourceleaks.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,311 +1,393 @@
+"""A pytest plugin to trace resource leaks.
+
+Usage
+-----
+This plugin is enabled from the command line with -L / --leaks.
+See `pytest --help` for further configuration settings.
+
+You may mark individual tests as known to be leaking with the fixture
+
+    @pytest.mark.leaking(check1, check2, ...)
+
+Where the valid checks are 'fds', 'memory', 'threads', 'processes', 'tracemalloc'.
+
+e.g.
+
+    @pytest.mark.leaking("threads")
+
+If you do, the specified checks won't report errors.
+
+Known issues
+------------
+- Tests that contain imports will be flagged as leaking RAM (memory and tracemallock
+  checks) if it's the first time in the test suite that the import happens; e.g.
+
+      def test1():
+          pytest.importorskip("numpy")
+
+  Same issue happens with tests that populate global caches (e.g. linecache, re).
+  A previous version of this plugin had an option to silently retry a test once after a
+  failure; that version is no longer working as of the latest pytest. Reinstating the
+  flag would solve this issue. See pytest_rerunfailures code for inspiration.
+
+- The @gen_cluster fixture leaks 2 fds on the first test decorated with it within a test
+  suite; this is likely caused by an incomplete warmup routine of
+  distributed.comm.tcp.BaseTCPConnector.
+  This issue would also be fixed by rerunning failing tests.
+
+- The @pytest.mark.flaky decorator (pytest_rerunfailures) completely disables this
+  plugin for the decorated tests.
+
+- You cannot expect the process memory to go down immediately and deterministically as
+  soon as you garbage collect Python objects. This makes the 'memory' check very
+  unreliable. On Linux, this can be improved by reducing the MALLOC_TRIM glibc setting
+  (see distributed.yaml).
 """
-A pytest plugin to trace resource leaks.
-"""
-import collections
+from __future__ import annotations
+
 import gc
 import os
 import sys
 import threading
-import time
+import tracemalloc
+from collections import defaultdict
+from time import sleep
+from typing import Any, ClassVar
 
+import psutil
 import pytest
 
+from .comm.tcp import BaseTCPConnector
+from .compatibility import WINDOWS
+from .metrics import time
+
 
 def pytest_addoption(parser):
     group = parser.getgroup("resource leaks")
+    known_checkers = ", ".join(sorted("'%s'" % s for s in all_checkers))
     group.addoption(
         "-L",
         "--leaks",
-        action="store",
-        dest="leaks",
-        help="""\
-List of resources to monitor for leaks before and after each test.
-Can be 'all' or a comma-separated list of resource names
-(possible values: {known_checkers}).
-""".format(
-            known_checkers=", ".join(sorted("'%s'" % s for s in all_checkers))
-        ),
+        help="List of resources to monitor for leaks before and after each test. "
+        "Can be 'all' or a comma-separated list of resource names "
+        f"(possible values: {known_checkers}).",
     )
     group.addoption(
         "--leaks-timeout",
-        action="store",
         type=float,
-        dest="leaks_timeout",
         default=0.5,
-        help="""\
-Wait at most this number of seconds to mark a test leaking
-(default: %(default)s).
-""",
+        help="Wait at most these many seconds before marking a test as leaking "
+        "(default: %(default)s)",
     )
     group.addoption(
         "--leaks-fail",
         action="store_true",
-        dest="leaks_mark_failed",
-        default=False,
-        help="""Mark leaked tests failed.""",
-    )
-    group.addoption(
-        "--leak-retries",
-        action="store",
-        type=int,
-        dest="leak_retries",
-        default=1,
-        help="""\
-Max number of times to retry a test when it leaks, to ignore
-warmup-related issues (default: 1).
-""",
+        help="Mark leaked tests as failed",
     )
 
 
-def pytest_configure(config):
+def pytest_configure(config) -> None:
     leaks = config.getvalue("leaks")
-    if leaks:
-        if leaks == "all":
-            leaks = sorted(all_checkers)
-        else:
-            leaks = leaks.split(",")
-        unknown = sorted(set(leaks) - set(all_checkers))
-        if unknown:
-            raise ValueError(f"unknown resources: {unknown!r}")
-
-        checkers = [all_checkers[leak]() for leak in leaks]
-        checker = LeakChecker(
-            checkers=checkers,
-            grace_delay=config.getvalue("leaks_timeout"),
-            mark_failed=config.getvalue("leaks_mark_failed"),
-            max_retries=config.getvalue("leak_retries"),
-        )
-        config.pluginmanager.register(checker, "leaks_checker")
+    if not leaks:
+        return
+    if leaks == "all":
+        leaks = sorted(c for c in all_checkers if c != "demo")
+    else:
+        leaks = leaks.split(",")
+    unknown = sorted(set(leaks) - set(all_checkers))
+    if unknown:
+        raise ValueError(f"unknown resources: {unknown!r}")
+
+    checkers = [all_checkers[leak]() for leak in leaks]
+    checker = LeakChecker(
+        checkers=checkers,
+        grace_delay=config.getvalue("leaks_timeout"),
+        mark_failed=config.getvalue("leaks_fail"),
+    )
+    config.pluginmanager.register(checker, "leaks_checker")
 
 
-all_checkers = {}
+all_checkers: dict[str, type[ResourceChecker]] = {}
 
 
-def register_checker(name):
-    def decorate(cls):
-        assert issubclass(cls, ResourceChecker), cls
+class ResourceChecker:
+    name: ClassVar[str]
+
+    def __init_subclass__(cls, name: str):
         assert name not in all_checkers
+        cls.name = name
         all_checkers[name] = cls
-        return cls
 
-    return decorate
-
-
-class ResourceChecker:
-    def on_start_test(self):
+    def on_start_test(self) -> None:
         pass
 
-    def on_stop_test(self):
+    def on_stop_test(self) -> None:
         pass
 
-    def on_retry(self):
+    def on_retry(self) -> None:
         pass
 
-    def measure(self):
+    def measure(self) -> Any:
         raise NotImplementedError
 
-    def has_leak(self, before, after):
+    def has_leak(self, before: Any, after: Any) -> bool:
         raise NotImplementedError
 
-    def format(self, before, after):
+    def format(self, before: Any, after: Any) -> str:
         raise NotImplementedError
 
 
-@register_checker("fds")
-class FDChecker(ResourceChecker):
-    def measure(self):
-        if os.name == "posix":
-            import psutil
+class DemoChecker(ResourceChecker, name="demo"):
+    """Checker that always leaks. Used to test the core LeakChecker functionality."""
 
-            return psutil.Process().num_fds()
-        else:
+    i: int
+
+    def __init__(self):
+        self.i = 0
+
+    def measure(self) -> int:
+        self.i += 1
+        return self.i
+
+    def has_leak(self, before: int, after: int) -> bool:
+        return after > before
+
+    def format(self, before: int, after: int) -> str:
+        return f"counter increased from {before} to {after}"
+
+
+class FDChecker(ResourceChecker, name="fds"):
+    def __init__(self):
+        BaseTCPConnector.warmup()
+
+    def measure(self) -> int:
+        if WINDOWS:
+            # Don't use num_handles(); you'll get tens of thousands of reported leaks
             return 0
+        return psutil.Process().num_fds()
 
-    def has_leak(self, before, after):
+    def has_leak(self, before: int, after: int) -> bool:
         return after > before
 
-    def format(self, before, after):
-        return "leaked %d file descriptor(s)" % (after - before)
+    def format(self, before: int, after: int) -> str:
+        return f"leaked {after - before} file descriptor(s) ({before}->{after})"
 
 
-@register_checker("memory")
-class RSSMemoryChecker(ResourceChecker):
-    def measure(self):
-        import psutil
+class RSSMemoryChecker(ResourceChecker, name="memory"):
+    LEAK_THRESHOLD = 10 * 2**20
 
+    def measure(self) -> int:
         return psutil.Process().memory_info().rss
 
-    def has_leak(self, before, after):
-        return after > before + 1e7
+    def has_leak(self, before: int, after: int) -> bool:
+        return after > before + self.LEAK_THRESHOLD
 
-    def format(self, before, after):
-        return "leaked %d MB of RSS memory" % ((after - before) / 1e6)
+    def format(self, before: int, after: int) -> str:
+        return f"leaked {(after - before) / 2**20:.1f} MiB of RSS memory"
 
 
-@register_checker("threads")
-class ActiveThreadsChecker(ResourceChecker):
-    def measure(self):
+class ActiveThreadsChecker(ResourceChecker, name="threads"):
+    def __init__(self):
+        BaseTCPConnector.warmup()
+
+    def measure(self) -> set[threading.Thread]:
         return set(threading.enumerate())
 
-    def has_leak(self, before, after):
+    def has_leak(
+        self, before: set[threading.Thread], after: set[threading.Thread]
+    ) -> bool:
         return not after <= before
 
-    def format(self, before, after):
+    def format(
+        self, before: set[threading.Thread], after: set[threading.Thread]
+    ) -> str:
         leaked = after - before
         assert leaked
-        return "leaked %d Python threads: %s" % (len(leaked), sorted(leaked, key=str))
+        return f"leaked {len(leaked)} Python thread(s): {sorted(leaked, key=str)}"
+
+
+class ChildProcess:
+    """Child process info
+
+    We use pid and creation time as keys to disambiguate between processes (and protect
+    against pid reuse); other properties such as cmdline may change for a given process
+    """
+
+    pid: int
+    name: str
+    cmdline: list[str]
+    create_time: float
+
+    def __init__(self, p: psutil.Process):
+        self.pid = p.pid
+        self.name = p.name()
+        self.cmdline = p.cmdline()
+        self.create_time = p.create_time()
+
+    def __hash__(self) -> int:
+        return self.pid
+
+    def __eq__(self, other: object) -> bool:
+        return (
+            isinstance(other, ChildProcess)
+            and self.pid == other.pid
+            and self.create_time == other.create_time
+        )
+
+    def __lt__(self, other: object) -> bool:
+        if not isinstance(other, ChildProcess):
+            raise TypeError(other)
+        return self.pid < other.pid
 
 
-class _ChildProcess(
-    collections.namedtuple("_ChildProcess", ("pid", "name", "cmdline"))
-):
-    @classmethod
-    def from_process(cls, p):
-        return cls(p.pid, p.name(), p.cmdline())
-
-
-@register_checker("processes")
-class ChildProcessesChecker(ResourceChecker):
-    def measure(self):
-        import psutil
-
-        # We use pid and creation time as keys to disambiguate between
-        # processes (and protect against pid reuse)
-        # Other properties such as cmdline may change for a given process
-        children = {}
+class ChildProcessesChecker(ResourceChecker, name="processes"):
+    def measure(self) -> set[ChildProcess]:
+        children = set()
         p = psutil.Process()
         for c in p.children(recursive=True):
             try:
                 with c.oneshot():
-                    if c.ppid() == p.pid and os.path.samefile(c.exe(), sys.executable):
-                        cmdline = c.cmdline()
-                        if any(
+                    if (
+                        c.ppid() == p.pid
+                        and os.path.samefile(c.exe(), sys.executable)
+                        and any(
+                            # Skip multiprocessing resource tracker
                             a.startswith(
-                                "from multiprocessing.semaphore_tracker import main"
+                                "from multiprocessing.resource_tracker import main"
                             )
-                            for a in cmdline
-                        ):
-                            # Skip multiprocessing semaphore tracker
-                            continue
-                        if any(
-                            a.startswith("from multiprocessing.forkserver import main")
-                            for a in cmdline
-                        ):
-                            # Skip forkserver process, the forkserver's children
+                            # Skip forkserver process; the forkserver's children
                             # however will be recorded normally
-                            continue
-                    children[(c.pid, c.create_time())] = _ChildProcess.from_process(c)
+                            or a.startswith(
+                                "from multiprocessing.forkserver import main"
+                            )
+                            for a in c.cmdline()
+                        )
+                    ):
+                        continue
+
+                    children.add(ChildProcess(c))
             except psutil.NoSuchProcess:
                 pass
         return children
 
-    def has_leak(self, before, after):
-        return not set(after) <= set(before)
+    def has_leak(self, before: set[ChildProcess], after: set[ChildProcess]) -> bool:
+        return not after <= before
 
-    def format(self, before, after):
-        leaked = set(after) - set(before)
+    def format(self, before: set[ChildProcess], after: set[ChildProcess]) -> str:
+        leaked = sorted(after - before)
         assert leaked
-        formatted = []
-        for key in sorted(leaked):
-            p = after[key]
-            formatted.append(
-                "  - pid={p.pid}, name={p.name!r}, cmdline={p.cmdline!r}".format(p=p)
-            )
-        return "leaked %d processes:\n%s" % (len(leaked), "\n".join(formatted))
+        return f"leaked {len(leaked)} processes:\n" + "\n".join(
+            f"  - pid={p.pid}, name={p.name!r}, cmdline={p.cmdline!r}" for p in leaked
+        )
 
 
-@register_checker("tracemalloc")
-class TracemallocMemoryChecker(ResourceChecker):
-    def __init__(self):
-        global tracemalloc
-        import tracemalloc
+class TracemallocMemoryChecker(ResourceChecker, name="tracemalloc"):
+    # Report a leak if the traced memory increased by at least this many bytes
+    LEAK_THRESHOLD = 2**20
+    # Report at most this many leaks
+    NDIFF = 5
+    # Report less than NDIFF leaks if they amount to less than this many bytes
+    MIN_SIZE_DIFF = 200 * 1024
 
-    def on_start_test(self):
+    def on_start_test(self) -> None:
         tracemalloc.start(1)
 
-    def on_stop_test(self):
+    def on_stop_test(self) -> None:
         tracemalloc.stop()
 
-    def measure(self):
-        import tracemalloc
-
-        current, peak = tracemalloc.get_traced_memory()
+    def measure(self) -> tuple[int, tracemalloc.Snapshot]:
+        current, _ = tracemalloc.get_traced_memory()
         snap = tracemalloc.take_snapshot()
         return current, snap
 
-    def has_leak(self, before, after):
-        return after[0] > before[0] + 1e6
-
-    def format(self, before, after):
+    def has_leak(
+        self,
+        before: tuple[int, tracemalloc.Snapshot],
+        after: tuple[int, tracemalloc.Snapshot],
+    ):
+        return after[0] > before[0] + self.LEAK_THRESHOLD
+
+    def format(
+        self,
+        before: tuple[int, tracemalloc.Snapshot],
+        after: tuple[int, tracemalloc.Snapshot],
+    ) -> str:
         bytes_before, snap_before = before
         bytes_after, snap_after = after
         diff = snap_after.compare_to(snap_before, "traceback")
-        ndiff = 5
-        min_size_diff = 2e5
 
-        lines = []
-        lines += [
-            "leaked %.1f MB of traced Python memory"
-            % ((bytes_after - bytes_before) / 1e6)
+        lines = [
+            f"leaked {(bytes_after - bytes_before) / 2 ** 20:.1f} MiB "
+            "of traced Python memory"
         ]
-        for stat in diff[:ndiff]:
+        for stat in diff[: self.NDIFF]:
             size_diff = stat.size_diff or stat.size
-            if size_diff < min_size_diff:
+            if size_diff < self.MIN_SIZE_DIFF:
                 break
             count = stat.count_diff or stat.count
-            lines += ["  - leaked %.1f MB in %d calls at:" % (size_diff / 1e6, count)]
+            lines += [f"  - leaked {size_diff / 2**20:.1f} MiB in {count} calls at:"]
             lines += ["    " + line for line in stat.traceback.format()]
 
         return "\n".join(lines)
 
 
 class LeakChecker:
-    def __init__(self, checkers, grace_delay, mark_failed, max_retries):
+    checkers: list[ResourceChecker]
+    grace_delay: float
+    mark_failed: bool
+
+    # {nodeid: {checkers}}
+    skip_checkers: dict[str, set[ResourceChecker]]
+    # {nodeid: {checker: [(before, after)]}}
+    counters: dict[str, dict[ResourceChecker, list[tuple[Any, Any]]]]
+    # {nodeid: [(checker, before, after)]}
+    leaks: dict[str, list[tuple[ResourceChecker, Any, Any]]]
+    # {nodeid: {outcomes}}
+    outcomes: defaultdict[str, set[str]]
+
+    def __init__(
+        self,
+        checkers: list[ResourceChecker],
+        grace_delay: float,
+        mark_failed: bool,
+    ):
         self.checkers = checkers
         self.grace_delay = grace_delay
         self.mark_failed = mark_failed
-        self.max_retries = max_retries
 
-        # {nodeid: {checkers}}
         self.skip_checkers = {}
-        # {nodeid: {checker: [(before, after)]}}
         self.counters = {}
-        # {nodeid: [(checker, before, after)]}
         self.leaks = {}
-        # {nodeid: {outcomes}}
-        self.outcomes = collections.defaultdict(set)
+        self.outcomes = defaultdict(set)
 
-        # Reentrancy guard
-        self._retrying = False
-
-    def cleanup(self):
+    def cleanup(self) -> None:
         gc.collect()
 
-    def checks_for_item(self, nodeid):
+    def checks_for_item(self, nodeid: str) -> list[ResourceChecker]:
         return [c for c in self.checkers if c not in self.skip_checkers.get(nodeid, ())]
 
-    def measure(self, nodeid):
+    def measure(self, nodeid: str) -> list[tuple[ResourceChecker, Any]]:
         # Return items in order
         return [(c, c.measure()) for c in self.checks_for_item(nodeid)]
 
-    def measure_before_test(self, nodeid):
+    def measure_before_test(self, nodeid: str) -> None:
         for checker in self.checks_for_item(nodeid):
             checker.on_start_test()
         for checker, before in self.measure(nodeid):
             assert before is not None
             self.counters[nodeid][checker].append((before, None))
 
-    def measure_after_test(self, nodeid):
+    def measure_after_test(self, nodeid: str) -> None:
         outcomes = self.outcomes[nodeid]
-        assert outcomes
-        if outcomes != {"passed"}:
+        # pytest_rerunfailures (@pytest.mark.flaky) breaks this plugin and causes
+        # outcomes to be empty.
+        if "passed" not in outcomes:
             # Test failed or skipped
             return
 
-        def run_measurements():
+        def run_measurements() -> list[tuple[ResourceChecker, Any, Any]]:
             leaks = []
             for checker, after in self.measure(nodeid):
-                assert after is not None
                 c = self.counters[nodeid][checker]
                 before, _ = c[-1]
                 c[-1] = (before, after)
@@ -313,7 +395,7 @@ class LeakChecker:
                     leaks.append((checker, before, after))
             return leaks
 
-        t1 = time.time()
+        t1 = time()
         deadline = t1 + self.grace_delay
         leaks = run_measurements()
         if leaks:
@@ -322,8 +404,8 @@ class LeakChecker:
                 c.on_retry()
             leaks = run_measurements()
 
-        while leaks and time.time() < deadline:
-            time.sleep(0.1)
+        while leaks and time() < deadline:
+            sleep(0.1)
             self.cleanup()
             for c, _, _ in leaks:
                 c.on_retry()
@@ -337,34 +419,6 @@ class LeakChecker:
         for checker in self.checks_for_item(nodeid):
             checker.on_stop_test()
 
-    def maybe_retry(self, item, nextitem=None):
-        def run_test_again():
-            # This invokes our setup/teardown hooks again
-            # Inspired by https://pypi.python.org/pypi/pytest-rerunfailures
-            from _pytest.runner import runtestprotocol
-
-            item._initrequest()  # Re-init fixtures
-            runtestprotocol(item, nextitem=nextitem, log=False)
-
-        nodeid = item.nodeid
-        leaks = self.leaks.get(nodeid)
-        if leaks:
-            self._retrying = True
-            try:
-                for i in range(self.max_retries):
-                    run_test_again()
-            except Exception:
-                print("--- Exception when re-running test ---")
-                import traceback
-
-                traceback.print_exc()
-            else:
-                leaks = self.leaks.get(nodeid)
-            finally:
-                self._retrying = False
-
-        return leaks
-
     # Note on hook execution order:
     #   pytest_runtest_protocol
     #       pytest_runtest_setup
@@ -378,22 +432,25 @@ class LeakChecker:
 
     @pytest.hookimpl(hookwrapper=True)
     def pytest_runtest_protocol(self, item, nextitem):
-        if not self._retrying:
-            nodeid = item.nodeid
-            assert nodeid not in self.counters
-            self.counters[nodeid] = {c: [] for c in self.checkers}
-
-            leaking = item.get_marker("leaking")
-            if leaking is not None:
-                unknown = sorted(set(leaking.args) - set(all_checkers))
-                if unknown:
-                    raise ValueError(
-                        f"pytest.mark.leaking: unknown resources {unknown!r}"
-                    )
-                classes = tuple(all_checkers[a] for a in leaking.args)
-                self.skip_checkers[nodeid] = {
-                    c for c in self.checkers if isinstance(c, classes)
-                }
+        if not self.checkers:
+            return
+
+        nodeid = item.nodeid
+        assert nodeid not in self.counters
+        self.counters[nodeid] = {c: [] for c in self.checkers}
+
+        leaking_mark = item.get_closest_marker("leaking")
+        if leaking_mark:
+            unknown = sorted(set(leaking_mark.args) - set(all_checkers))
+            if unknown:
+                raise ValueError(
+                    f"pytest.mark.leaking: unknown resources {unknown}; "
+                    f"must be one of {list(all_checkers)}"
+                )
+            classes = tuple(all_checkers[a] for a in leaking_mark.args)
+            self.skip_checkers[nodeid] = {
+                c for c in self.checkers if isinstance(c, classes)
+            }
 
         yield
 
@@ -406,34 +463,31 @@ class LeakChecker:
     def pytest_runtest_teardown(self, item):
         yield
         self.measure_after_test(item.nodeid)
-        if not self._retrying:
-            leaks = self.maybe_retry(item)
-            if leaks and self.mark_failed:
-                # Trigger fail here to allow stopping with `-x`
-                pytest.fail()
+        leaks = self.leaks.get(item.nodeid)
+        if leaks and self.mark_failed:
+            # Trigger fail here to allow stopping with `-x`
+            pytest.fail()
 
     @pytest.hookimpl(hookwrapper=True, trylast=True)
     def pytest_report_teststatus(self, report):
         nodeid = report.nodeid
-        outcomes = self.outcomes[nodeid]
-        outcomes.add(report.outcome)
+        self.outcomes[nodeid].add(report.outcome)
         outcome = yield
-        if not self._retrying:
-            if report.when == "teardown":
-                leaks = self.leaks.get(report.nodeid)
-                if leaks:
-                    if self.mark_failed:
-                        outcome.force_result(("failed", "L", "LEAKED"))
-                        report.outcome = "failed"
-                        report.longrepr = "\n".join(
-                            [
-                                f"{nodeid} {checker.format(before, after)}"
-                                for checker, before, after in leaks
-                            ]
-                        )
-                    else:
-                        outcome.force_result(("leaked", "L", "LEAKED"))
-                # XXX should we log retried tests
+        if report.when == "teardown":
+            leaks = self.leaks.get(report.nodeid)
+            if leaks:
+                if self.mark_failed:
+                    outcome.force_result(("failed", "L", "LEAKED"))
+                    report.outcome = "failed"
+                    report.longrepr = "\n".join(
+                        [
+                            f"{nodeid} leaking {checker.name}: "
+                            f"{checker.format(before, after)}"
+                            for checker, before, after in leaks
+                        ]
+                    )
+                else:
+                    outcome.force_result(("leaked", "L", "LEAKED"))
 
     @pytest.hookimpl
     def pytest_terminal_summary(self, terminalreporter, exitstatus):
@@ -446,4 +500,7 @@ class LeakChecker:
             for rep in leaked:
                 nodeid = rep.nodeid
                 for checker, before, after in self.leaks[nodeid]:
-                    tr.line(f"{rep.nodeid} {checker.format(before, after)}")
+                    tr.line(
+                        f"{rep.nodeid} leaking {checker.name}: "
+                        f"{checker.format(before, after)}"
+                    )
diff -pruN 2022.01.0+ds.1-1/distributed/scheduler.py 2022.02.0+ds.1-1/distributed/scheduler.py
--- 2022.01.0+ds.1-1/distributed/scheduler.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/scheduler.py	2022-02-11 16:21:14.000000000 +0000
@@ -56,7 +56,7 @@ from distributed.utils import recursive_
 
 from . import preloading, profile
 from . import versions as version_module
-from .active_memory_manager import ActiveMemoryManagerExtension
+from .active_memory_manager import ActiveMemoryManagerExtension, RetireWorker
 from .batched import BatchedSend
 from .comm import (
     Comm,
@@ -751,12 +751,12 @@ class WorkerState:
         return ws
 
     def __repr__(self):
-        return "<WorkerState %r, name: %s, status: %s, memory: %d, processing: %d>" % (
-            self._address,
-            self._name,
-            self._status.name,
-            len(self._has_what),
-            len(self._processing),
+        name = f", name: {self.name}" if self.name != self.address else ""
+        return (
+            f"<WorkerState {self._address!r}{name}, "
+            f"status: {self._status.name}, "
+            f"memory: {len(self._has_what)}, "
+            f"processing: {len(self._processing)}>"
         )
 
     def _repr_html_(self):
@@ -787,11 +787,6 @@ class WorkerState:
             **self._extra,
         }
 
-    @property
-    def ncores(self):
-        warnings.warn("WorkerState.ncores has moved to WorkerState.nthreads")
-        return self._nthreads
-
 
 @final
 @cclass
@@ -3984,11 +3979,11 @@ class Scheduler(SchedulerState, ServerNo
 
     def __repr__(self):
         parent: SchedulerState = cast(SchedulerState, self)
-        return '<Scheduler: "%s" workers: %d cores: %d, tasks: %d>' % (
-            self.address,
-            len(parent._workers_dv),
-            parent._total_nthreads,
-            len(parent._tasks),
+        return (
+            f"<Scheduler {self.address!r}, "
+            f"workers: {len(parent._workers_dv)}, "
+            f"cores: {parent._total_nthreads}, "
+            f"tasks: {len(parent._tasks)}>"
         )
 
     def _repr_html_(self):
@@ -4374,6 +4369,9 @@ class Scheduler(SchedulerState, ServerNo
                     await comm.write(msg)
                 return
 
+            self.log_event(address, {"action": "add-worker"})
+            self.log_event("all", {"action": "add-worker", "worker": address})
+
             ws: WorkerState
             parent._workers[address] = ws = WorkerState(
                 address=address,
@@ -4483,8 +4481,6 @@ class Scheduler(SchedulerState, ServerNo
 
             self.send_all(client_msgs, worker_msgs)
 
-            self.log_event(address, {"action": "add-worker"})
-            self.log_event("all", {"action": "add-worker", "worker": address})
             logger.info("Register worker %s", ws)
 
             msg = {
@@ -4986,13 +4982,14 @@ class Scheduler(SchedulerState, ServerNo
 
             ws: WorkerState = parent._workers_dv[address]
 
-            self.log_event(
-                ["all", address],
-                {
-                    "action": "remove-worker",
-                    "processing-tasks": dict(ws._processing),
-                },
-            )
+            event_msg = {
+                "action": "remove-worker",
+                "processing-tasks": dict(ws._processing),
+            }
+            self.log_event(address, event_msg.copy())
+            event_msg["worker"] = address
+            self.log_event("all", event_msg)
+
             logger.info("Remove worker %s", ws)
             if close:
                 with suppress(AttributeError, CommClosedError):
@@ -5481,6 +5478,22 @@ class Scheduler(SchedulerState, ServerNo
         self.send_all(client_msgs, worker_msgs)
 
     def handle_missing_data(self, key=None, errant_worker=None, **kwargs):
+        """Signal that `errant_worker` does not hold `key`
+
+        This may either indicate that `errant_worker` is dead or that we may be
+        working with stale data and need to remove `key` from the workers
+        `has_what`.
+
+        If no replica of a task is available anymore, the task is transitioned
+        back to released and rescheduled, if possible.
+
+        Parameters
+        ----------
+        key : str, optional
+            Task key that could not be found, by default None
+        errant_worker : str, optional
+            Address of the worker supposed to hold a replica, by default None
+        """
         parent: SchedulerState = cast(SchedulerState, self)
         logger.debug("handle missing data key=%s worker=%s", key, errant_worker)
         self.log_event(errant_worker, {"action": "missing-data", "key": key})
@@ -5488,9 +5501,10 @@ class Scheduler(SchedulerState, ServerNo
         if ts is None:
             return
         ws: WorkerState = parent._workers_dv.get(errant_worker)
+
         if ws is not None and ws in ts._who_has:
             parent.remove_replica(ts, ws)
-        if not ts._who_has:
+        if ts.state == "memory" and not ts._who_has:
             if ts._run_spec:
                 self.transitions({key: "released"})
             else:
@@ -6805,29 +6819,31 @@ class Scheduler(SchedulerState, ServerNo
     async def retire_workers(
         self,
         comm=None,
-        workers=None,
-        remove=True,
-        close_workers=False,
-        names=None,
-        lock=True,
+        *,
+        workers: "list[str] | None" = None,
+        names: "list | None" = None,
+        close_workers: bool = False,
+        remove: bool = True,
         **kwargs,
     ) -> dict:
         """Gracefully retire workers from cluster
 
         Parameters
         ----------
-        workers: list (optional)
+        workers: list[str] (optional)
             List of worker addresses to retire.
-            If not provided we call ``workers_to_close`` which finds a good set
         names: list (optional)
             List of worker names to retire.
-        remove: bool (defaults to True)
-            Whether or not to remove the worker metadata immediately or else
-            wait for the worker to contact us
+            Mutually exclusive with ``workers``.
+            If neither ``workers`` nor ``names`` are provided, we call
+            ``workers_to_close`` which finds a good set.
         close_workers: bool (defaults to False)
             Whether or not to actually close the worker explicitly from here.
             Otherwise we expect some external job scheduler to finish off the
             worker.
+        remove: bool (defaults to True)
+            Whether or not to remove the worker metadata immediately or else
+            wait for the worker to contact us
         **kwargs: dict
             Extra options to pass to workers_to_close to determine which
             workers we should drop
@@ -6845,78 +6861,126 @@ class Scheduler(SchedulerState, ServerNo
         ws: WorkerState
         ts: TaskState
         with log_errors():
-            async with self._lock if lock else empty_context:
+            # This lock makes retire_workers, rebalance, and replicate mutually
+            # exclusive and will no longer be necessary once rebalance and replicate are
+            # migrated to the Active Memory Manager.
+            # Note that, incidentally, it also prevents multiple calls to retire_workers
+            # from running in parallel - this is unnecessary.
+            async with self._lock:
                 if names is not None:
                     if workers is not None:
                         raise TypeError("names and workers are mutually exclusive")
                     if names:
                         logger.info("Retire worker names %s", names)
-                    names = set(map(str, names))
-                    workers = {
-                        ws._address
+                    # Support cases where names are passed through a CLI and become
+                    # strings
+                    names_set = {str(name) for name in names}
+                    wss = {
+                        ws
                         for ws in parent._workers_dv.values()
-                        if str(ws._name) in names
+                        if str(ws._name) in names_set
                     }
-                elif workers is None:
-                    while True:
-                        try:
-                            workers = self.workers_to_close(**kwargs)
-                            if not workers:
-                                return {}
-                            return await self.retire_workers(
-                                workers=workers,
-                                remove=remove,
+                elif workers is not None:
+                    wss = {
+                        parent._workers_dv[address]
+                        for address in workers
+                        if address in parent._workers_dv
+                    }
+                else:
+                    wss = {
+                        parent._workers_dv[address]
+                        for address in self.workers_to_close(**kwargs)
+                    }
+                if not wss:
+                    return {}
+
+                stop_amm = False
+                amm: ActiveMemoryManagerExtension = self.extensions["amm"]
+                if not amm.running:
+                    amm = ActiveMemoryManagerExtension(
+                        self, policies=set(), register=False, start=True, interval=2.0
+                    )
+                    stop_amm = True
+
+                try:
+                    coros = []
+                    for ws in wss:
+                        logger.info("Retiring worker %s", ws._address)
+
+                        policy = RetireWorker(ws._address)
+                        amm.add_policy(policy)
+
+                        # Change Worker.status to closing_gracefully. Immediately set
+                        # the same on the scheduler to prevent race conditions.
+                        prev_status = ws.status
+                        ws.status = Status.closing_gracefully
+                        self.running.discard(ws)
+                        self.stream_comms[ws.address].send(
+                            {"op": "worker-status-change", "status": ws.status.name}
+                        )
+
+                        coros.append(
+                            self._track_retire_worker(
+                                ws,
+                                policy,
+                                prev_status=prev_status,
                                 close_workers=close_workers,
-                                lock=False,
+                                remove=remove,
                             )
-                        except KeyError:  # keys left during replicate
-                            pass
+                        )
 
-                workers = {
-                    parent._workers_dv[w] for w in workers if w in parent._workers_dv
-                }
-                if not workers:
-                    return {}
-                logger.info("Retire workers %s", workers)
+                    # Give the AMM a kick, in addition to its periodic running. This is
+                    # to avoid unnecessarily waiting for a potentially arbitrarily long
+                    # time (depending on interval settings)
+                    amm.run_once()
 
-                # Keys orphaned by retiring those workers
-                keys = {k for w in workers for k in w.has_what}
-                keys = {ts._key for ts in keys if ts._who_has.issubset(workers)}
-
-                if keys:
-                    other_workers = set(parent._workers_dv.values()) - workers
-                    if not other_workers:
-                        return {}
-                    logger.info("Moving %d keys to other workers", len(keys))
-                    await self.replicate(
-                        keys=keys,
-                        workers=[ws._address for ws in other_workers],
-                        n=1,
-                        delete=False,
-                        lock=False,
-                    )
+                    workers_info = dict(await asyncio.gather(*coros))
+                    workers_info.pop(None, None)
+                finally:
+                    if stop_amm:
+                        amm.stop()
 
-                worker_keys = {ws._address: ws.identity() for ws in workers}
-                if close_workers:
-                    await asyncio.gather(
-                        *[self.close_worker(worker=w, safe=True) for w in worker_keys]
-                    )
-                if remove:
-                    await asyncio.gather(
-                        *[self.remove_worker(address=w, safe=True) for w in worker_keys]
-                    )
+            self.log_event("all", {"action": "retire-workers", "workers": workers_info})
+            self.log_event(list(workers_info), {"action": "retired"})
 
-                self.log_event(
-                    "all",
-                    {
-                        "action": "retire-workers",
-                        "workers": worker_keys,
-                        "moved-keys": len(keys),
-                    },
+            return workers_info
+
+    async def _track_retire_worker(
+        self,
+        ws: WorkerState,
+        policy: RetireWorker,
+        prev_status: Status,
+        close_workers: bool,
+        remove: bool,
+    ) -> tuple:  # tuple[str | None, dict]
+        parent: SchedulerState = cast(SchedulerState, self)
+
+        while not policy.done():
+            if policy.no_recipients:
+                # Abort retirement. This time we don't need to worry about race
+                # conditions and we can wait for a scheduler->worker->scheduler
+                # round-trip.
+                self.stream_comms[ws.address].send(
+                    {"op": "worker-status-change", "status": prev_status.name}
                 )
-                self.log_event(list(worker_keys), {"action": "retired"})
+                return None, {}
+
+            # Sleep 0.01s when there are 4 tasks or less
+            # Sleep 0.5s when there are 200 or more
+            poll_interval = max(0.01, min(0.5, len(ws.has_what) / 400))
+            await asyncio.sleep(poll_interval)
+
+        logger.debug(
+            "All unique keys on worker %s have been replicated elsewhere", ws._address
+        )
+
+        if close_workers and ws._address in parent._workers_dv:
+            await self.close_worker(worker=ws._address, safe=True)
+        if remove:
+            await self.remove_worker(address=ws._address, safe=True)
 
-                return worker_keys
+        logger.info("Retired worker %s", ws._address)
+        return ws._address, ws.identity()
 
     def add_keys(self, comm=None, worker=None, keys=(), stimulus_id=None):
         """
@@ -7635,7 +7699,7 @@ class Scheduler(SchedulerState, ServerNo
         # Scheduler logs
         from distributed.dashboard.components.scheduler import SchedulerLogs
 
-        logs = SchedulerLogs(self)
+        logs = SchedulerLogs(self, start=start)
 
         from bokeh.models import Div, Panel, Tabs
 
diff -pruN 2022.01.0+ds.1-1/distributed/shuffle/__init__.py 2022.02.0+ds.1-1/distributed/shuffle/__init__.py
--- 2022.01.0+ds.1-1/distributed/shuffle/__init__.py	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/shuffle/__init__.py	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,9 @@
+from .shuffle import rearrange_by_column_p2p
+from .shuffle_extension import ShuffleId, ShuffleMetadata, ShuffleWorkerExtension
+
+__all__ = [
+    "rearrange_by_column_p2p",
+    "ShuffleId",
+    "ShuffleMetadata",
+    "ShuffleWorkerExtension",
+]
diff -pruN 2022.01.0+ds.1-1/distributed/shuffle/shuffle_extension.py 2022.02.0+ds.1-1/distributed/shuffle/shuffle_extension.py
--- 2022.01.0+ds.1-1/distributed/shuffle/shuffle_extension.py	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/shuffle/shuffle_extension.py	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,331 @@
+from __future__ import annotations
+
+import asyncio
+import math
+from collections import defaultdict
+from dataclasses import dataclass
+from typing import TYPE_CHECKING, NewType
+
+from distributed.protocol import to_serialize
+from distributed.utils import sync
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+    from distributed.worker import Worker
+
+ShuffleId = NewType("ShuffleId", str)
+
+
+# NOTE: we use these dataclasses primarily for type-checking benefits.
+# They take the place of positional arguments to `shuffle_init`,
+# which the type-checker can't validate when it's called as an RPC.
+
+
+@dataclass(frozen=True, eq=False)
+class NewShuffleMetadata:
+    "Metadata to create a shuffle"
+    id: ShuffleId
+    empty: pd.DataFrame
+    column: str
+    npartitions: int
+
+
+@dataclass(frozen=True, eq=False)
+class ShuffleMetadata(NewShuffleMetadata):
+    """
+    Metadata every worker needs to share about a shuffle.
+
+    A `ShuffleMetadata` is created with a task and sent to all workers
+    over the `ShuffleWorkerExtension.shuffle_init` RPC.
+    """
+
+    workers: list[str]
+
+    def worker_for(self, output_partition: int) -> str:
+        "Get the address of the worker which should hold this output partition number"
+        assert output_partition >= 0, f"Negative output partition: {output_partition}"
+        if output_partition >= self.npartitions:
+            raise IndexError(
+                f"Output partition {output_partition} does not exist in a shuffle producing {self.npartitions} partitions"
+            )
+        i = len(self.workers) * output_partition // self.npartitions
+        return self.workers[i]
+
+    def _partition_range(self, worker: str) -> tuple[int, int]:
+        "Get the output partition numbers (inclusive) that a worker will hold"
+        i = self.workers.index(worker)
+        first = math.ceil(self.npartitions * i / len(self.workers))
+        last = math.ceil(self.npartitions * (i + 1) / len(self.workers)) - 1
+        return first, last
+
+    def npartitions_for(self, worker: str) -> int:
+        "Get the number of output partitions a worker will hold"
+        first, last = self._partition_range(worker)
+        return last - first + 1
+
+
+class Shuffle:
+    "State for a single active shuffle"
+
+    def __init__(self, metadata: ShuffleMetadata, worker: Worker) -> None:
+        self.metadata = metadata
+        self.worker = worker
+        self.output_partitions: defaultdict[int, list[pd.DataFrame]] = defaultdict(list)
+        self.output_partitions_left = metadata.npartitions_for(worker.address)
+        self.transferred = False
+
+    def receive(self, output_partition: int, data: pd.DataFrame) -> None:
+        assert not self.transferred, "`receive` called after barrier task"
+        self.output_partitions[output_partition].append(data)
+
+    async def add_partition(self, data: pd.DataFrame) -> None:
+        assert not self.transferred, "`add_partition` called after barrier task"
+        tasks = []
+        # NOTE: `groupby` blocks the event loop, but it also holds the GIL,
+        # so we don't bother offloading to a thread. See bpo-7946.
+        for output_partition, data in data.groupby(self.metadata.column):
+            # NOTE: `column` must refer to an integer column, which is the output partition number for the row.
+            # This is always `_partitions`, added by `dask/dataframe/shuffle.py::shuffle`.
+            addr = self.metadata.worker_for(int(output_partition))
+            task = asyncio.create_task(
+                self.worker.rpc(addr).shuffle_receive(
+                    shuffle_id=self.metadata.id,
+                    output_partition=output_partition,
+                    data=to_serialize(data),
+                )
+            )
+            tasks.append(task)
+
+        # TODO Once RerunGroup logic exists (https://github.com/dask/distributed/issues/5403),
+        # handle errors and cancellation here in a way that lets other workers cancel & clean up their shuffles.
+        # Without it, letting errors kill the task is all we can do.
+        await asyncio.gather(*tasks)
+
+    def get_output_partition(self, i: int) -> pd.DataFrame:
+        import pandas as pd
+
+        assert self.transferred, "`get_output_partition` called before barrier task"
+
+        assert self.metadata.worker_for(i) == self.worker.address, (
+            f"Output partition {i} belongs on {self.metadata.worker_for(i)}, "
+            f"not {self.worker.address}. {self.metadata!r}"
+        )
+        # ^ NOTE: this check isn't necessary, just a nice validation to prevent incorrect
+        # data in the case something has gone very wrong
+
+        assert (
+            self.output_partitions_left > 0
+        ), f"No outputs remaining, but requested output partition {i} on {self.worker.address}."
+        self.output_partitions_left -= 1
+
+        try:
+            parts = self.output_partitions.pop(i)
+        except KeyError:
+            return self.metadata.empty
+
+        assert parts, f"Empty entry for output partition {i}"
+        return pd.concat(parts, copy=False)
+
+    def inputs_done(self) -> None:
+        assert not self.transferred, "`inputs_done` called multiple times"
+        self.transferred = True
+
+    def done(self) -> bool:
+        return self.transferred and self.output_partitions_left == 0
+
+
+class ShuffleWorkerExtension:
+    "Extend the Worker with routes and state for peer-to-peer shuffles"
+
+    def __init__(self, worker: Worker) -> None:
+        # Attach to worker
+        worker.handlers["shuffle_receive"] = self.shuffle_receive
+        worker.handlers["shuffle_init"] = self.shuffle_init
+        worker.handlers["shuffle_inputs_done"] = self.shuffle_inputs_done
+        worker.extensions["shuffle"] = self
+
+        # Initialize
+        self.worker: Worker = worker
+        self.shuffles: dict[ShuffleId, Shuffle] = {}
+
+    # Handlers
+    ##########
+    # NOTE: handlers are not threadsafe, but they're called from async comms, so that's okay
+
+    def shuffle_init(self, comm: object, metadata: ShuffleMetadata) -> None:
+        """
+        Hander: Register a new shuffle that is about to begin.
+        Using a shuffle with an already-known ID is an error.
+        """
+        if metadata.id in self.shuffles:
+            raise ValueError(
+                f"Shuffle {metadata.id!r} is already registered on worker {self.worker.address}"
+            )
+        self.shuffles[metadata.id] = Shuffle(metadata, self.worker)
+
+    def shuffle_receive(
+        self,
+        comm: object,
+        shuffle_id: ShuffleId,
+        output_partition: int,
+        data: pd.DataFrame,
+    ) -> None:
+        """
+        Hander: Receive an incoming shard of data from a peer worker.
+        Using an unknown ``shuffle_id`` is an error.
+        """
+        self._get_shuffle(shuffle_id).receive(output_partition, data)
+
+    def shuffle_inputs_done(self, comm: object, shuffle_id: ShuffleId) -> None:
+        """
+        Hander: Inform the extension that all input partitions have been handed off to extensions.
+        Using an unknown ``shuffle_id`` is an error.
+        """
+        shuffle = self._get_shuffle(shuffle_id)
+        shuffle.inputs_done()
+        if shuffle.done():
+            # If the shuffle has no output partitions, remove it now;
+            # `get_output_partition` will never be called.
+            # This happens when there are fewer output partitions than workers.
+            del self.shuffles[shuffle_id]
+
+    # Tasks
+    #######
+
+    def create_shuffle(self, new_metadata: NewShuffleMetadata) -> ShuffleMetadata:
+        return sync(self.worker.loop, self._create_shuffle, new_metadata)  # type: ignore
+
+    async def _create_shuffle(
+        self, new_metadata: NewShuffleMetadata
+    ) -> ShuffleMetadata:
+        """
+        Task: Create a new shuffle and broadcast it to all workers.
+        """
+        # TODO would be nice to not have to have the RPC in this method, and have shuffles started implicitly
+        # by the first `receive`/`add_partition`. To do that, shuffle metadata would be passed into
+        # every task, and from there into the extension (rather than stored within a `Shuffle`),
+        # However:
+        # 1. It makes scheduling much harder, since it's a widely-shared common dep
+        #    (https://github.com/dask/distributed/pull/5325)
+        # 2. Passing in metadata everywhere feels contrived when it would be so easy to store
+        # 3. The metadata may not be _that_ small (1000s of columns + 1000s of workers);
+        #    serializing and transferring it repeatedly adds overhead.
+        if new_metadata.id in self.shuffles:
+            raise ValueError(
+                f"Shuffle {new_metadata.id!r} is already registered on worker {self.worker.address}"
+            )
+
+        identity = await self.worker.scheduler.identity()
+
+        workers = list(identity["workers"])
+        metadata = ShuffleMetadata(
+            new_metadata.id,
+            new_metadata.empty,
+            new_metadata.column,
+            new_metadata.npartitions,
+            workers,
+        )
+
+        # Start the shuffle on all peers
+        # Note that this will call `shuffle_init` on our own worker as well
+        await asyncio.gather(
+            *(
+                self.worker.rpc(addr).shuffle_init(metadata=to_serialize(metadata))
+                for addr in metadata.workers
+            ),
+        )
+        # TODO handle errors from peers, and cancellation.
+        # If any peers can't start the shuffle, tell successful peers to cancel it.
+
+        return metadata  # NOTE: unused in tasks, just handy for tests
+
+    def add_partition(self, data: pd.DataFrame, shuffle_id: ShuffleId) -> None:
+        sync(self.worker.loop, self._add_partition, data, shuffle_id)
+
+    async def _add_partition(self, data: pd.DataFrame, shuffle_id: ShuffleId) -> None:
+        """
+        Task: Hand off an input partition to the ShuffleExtension.
+
+        This will block until the extension is ready to receive another input partition.
+
+        Using an unknown ``shuffle_id`` is an error.
+        """
+        await self._get_shuffle(shuffle_id).add_partition(data)
+
+    def barrier(self, shuffle_id: ShuffleId) -> None:
+        sync(self.worker.loop, self._barrier, shuffle_id)
+
+    async def _barrier(self, shuffle_id: ShuffleId) -> None:
+        """
+        Task: Note that the barrier task has been reached (`add_partition` called for all input partitions)
+
+        Using an unknown ``shuffle_id`` is an error. Calling this before all partitions have been
+        added is undefined.
+        """
+        # NOTE: in this basic shuffle implementation, doing things during the barrier
+        # is mostly unnecessary. We only need it to inform workers that don't receive
+        # any output partitions that they can clean up.
+        # (Otherwise, they'd have no way to know if they needed to keep the `Shuffle` around
+        # for more input partitions, which might come at some point. Workers that _do_ receive
+        # output partitions could infer this, since once `get_output_partition` gets called the
+        # first time, they can assume there are no more inputs.)
+        #
+        # Technically right now, we could call the `shuffle_inputs_done` RPC only on workers
+        # where `metadata.npartitions_for(worker) == 0`.
+        # However, when we have buffering, this barrier step will become important for
+        # all workers, since they'll use it to flush their buffers and send any leftover shards
+        # to their peers.
+
+        metadata = self._get_shuffle(shuffle_id).metadata
+
+        # Set worker restrictions for unpack tasks
+
+        # Could do this during `create_shuffle`, but we might as well overlap it with the time
+        # workers will be flushing buffers to each other.
+        name = "shuffle-unpack-" + metadata.id  # TODO single-source task name
+
+        # FIXME TODO XXX what about when culling means not all of the output tasks actually exist??!
+        # - these restrictions are invalid
+        # - get_output_partition won't be called enough times, so cleanup won't happen
+        # - also, we're transferring data we don't need to transfer
+        restrictions = {
+            f"('{name}', {i})": [metadata.worker_for(i)]
+            for i in range(metadata.npartitions)
+        }
+
+        # Tell all peers that we've reached the barrier
+
+        # Note that this will call `shuffle_inputs_done` on our own worker as well
+        await asyncio.gather(
+            *(
+                self.worker.rpc(worker).shuffle_inputs_done(shuffle_id=shuffle_id)
+                for worker in metadata.workers
+            ),
+            self.worker.scheduler.set_restrictions(worker=restrictions),
+        )
+        # TODO handle errors from workers and scheduler, and cancellation.
+
+    def get_output_partition(
+        self, shuffle_id: ShuffleId, output_partition: int
+    ) -> pd.DataFrame:
+        """
+        Task: Retrieve a shuffled output partition from the ShuffleExtension.
+
+        Calling this for a ``shuffle_id`` which is unknown or incomplete is an error.
+        """
+        shuffle = self._get_shuffle(shuffle_id)
+        output = shuffle.get_output_partition(output_partition)
+        if shuffle.done():
+            # key missing if another thread got to it first
+            self.shuffles.pop(shuffle_id, None)
+        return output
+
+    def _get_shuffle(self, shuffle_id: ShuffleId) -> Shuffle:
+        "Get a shuffle by ID; raise ValueError if it's not registered."
+        try:
+            return self.shuffles[shuffle_id]
+        except KeyError:
+            raise ValueError(
+                f"Shuffle {shuffle_id!r} is not registered on worker {self.worker.address}"
+            ) from None
diff -pruN 2022.01.0+ds.1-1/distributed/shuffle/shuffle.py 2022.02.0+ds.1-1/distributed/shuffle/shuffle.py
--- 2022.01.0+ds.1-1/distributed/shuffle/shuffle.py	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/shuffle/shuffle.py	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,114 @@
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from dask.base import tokenize
+from dask.delayed import Delayed, delayed
+from dask.highlevelgraph import HighLevelGraph
+
+from .shuffle_extension import NewShuffleMetadata, ShuffleId, ShuffleWorkerExtension
+
+if TYPE_CHECKING:
+    import pandas as pd
+
+    from dask.dataframe import DataFrame
+
+
+def get_ext() -> ShuffleWorkerExtension:
+    from distributed import get_worker
+
+    try:
+        worker = get_worker()
+    except ValueError as e:
+        raise RuntimeError(
+            "`shuffle='p2p'` requires Dask's distributed scheduler. This task is not running on a Worker; "
+            "please confirm that you've created a distributed Client and are submitting this computation through it."
+        ) from e
+    extension: ShuffleWorkerExtension | None = worker.extensions.get("shuffle")
+    if not extension:
+        raise RuntimeError(
+            f"The worker {worker.address} does not have a ShuffleExtension. "
+            "Is pandas installed on the worker?"
+        )
+    return extension
+
+
+def shuffle_setup(metadata: NewShuffleMetadata) -> None:
+    get_ext().create_shuffle(metadata)
+
+
+def shuffle_transfer(input: pd.DataFrame, id: ShuffleId, setup=None) -> None:
+    get_ext().add_partition(input, id)
+
+
+def shuffle_unpack(id: ShuffleId, output_partition: int, barrier=None) -> pd.DataFrame:
+    return get_ext().get_output_partition(id, output_partition)
+
+
+def shuffle_barrier(id: ShuffleId, transfers: list[None]) -> None:
+    get_ext().barrier(id)
+
+
+def rearrange_by_column_p2p(
+    df: DataFrame,
+    column: str,
+    npartitions: int | None = None,
+):
+    from dask.dataframe import DataFrame
+
+    npartitions = npartitions or df.npartitions
+    token = tokenize(df, column, npartitions)
+
+    setup = delayed(shuffle_setup, pure=True)(
+        NewShuffleMetadata(
+            ShuffleId(token),
+            df._meta,
+            column,
+            npartitions,
+        )
+    )
+
+    transferred = df.map_partitions(
+        shuffle_transfer,
+        token,
+        setup,
+        meta=df,
+        enforce_metadata=False,
+        transform_divisions=False,
+    )
+
+    barrier_key = "shuffle-barrier-" + token
+    barrier_dsk = {barrier_key: (shuffle_barrier, token, transferred.__dask_keys__())}
+    barrier = Delayed(
+        barrier_key,
+        HighLevelGraph.from_collections(
+            barrier_key, barrier_dsk, dependencies=[transferred]
+        ),
+    )
+
+    name = "shuffle-unpack-" + token
+    dsk = {
+        (name, i): (shuffle_unpack, token, i, barrier_key) for i in range(npartitions)
+    }
+    # TODO: update to use blockwise.
+    # Changes task names, so breaks setting worker restrictions at the moment.
+    # Also maybe would be nice if the `DataFrameIOLayer` interface supported this?
+    # dsk = blockwise(
+    #     shuffle_unpack,
+    #     name,
+    #     "i",
+    #     token,
+    #     None,
+    #     BlockwiseDepDict({(i,): i for i in range(npartitions)}),
+    #     "i",
+    #     barrier_key,
+    #     None,
+    #     numblocks={},
+    # )
+
+    return DataFrame(
+        HighLevelGraph.from_collections(name, dsk, [barrier]),
+        name,
+        df._meta,
+        [None] * (npartitions + 1),
+    )
diff -pruN 2022.01.0+ds.1-1/distributed/shuffle/tests/test_graph.py 2022.02.0+ds.1-1/distributed/shuffle/tests/test_graph.py
--- 2022.01.0+ds.1-1/distributed/shuffle/tests/test_graph.py	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/shuffle/tests/test_graph.py	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,93 @@
+from __future__ import annotations
+
+import asyncio
+from typing import TYPE_CHECKING
+
+import pytest
+
+pd = pytest.importorskip("pandas")
+pytest.importorskip("dask.dataframe")
+
+import dask
+import dask.dataframe as dd
+from dask.blockwise import Blockwise
+from dask.dataframe.shuffle import partitioning_index, rearrange_by_column_tasks
+from dask.utils_test import hlg_layer_topological
+
+from distributed.utils_test import gen_cluster
+
+from ..shuffle import rearrange_by_column_p2p
+from ..shuffle_extension import ShuffleWorkerExtension
+
+if TYPE_CHECKING:
+    from distributed import Client, Scheduler, Worker
+
+
+def shuffle(
+    df: dd.DataFrame, on: str, rearrange=rearrange_by_column_p2p
+) -> dd.DataFrame:
+    "Simple version of `DataFrame.shuffle`, so we don't need dask to know about 'p2p'"
+    return (
+        df.assign(
+            partition=lambda df: df[on].map_partitions(
+                partitioning_index, df.npartitions, transform_divisions=False
+            )
+        )
+        .pipe(rearrange, "partition")
+        .drop("partition", axis=1)
+    )
+
+
+def test_shuffle_helper(client: Client):
+    df = dd.demo.make_timeseries(freq="15D", partition_freq="30D")
+    shuffle_helper = shuffle(df, "id", rearrange=rearrange_by_column_tasks)
+    dask_shuffle = df.shuffle("id", shuffle="tasks")
+    dd.utils.assert_eq(shuffle_helper, dask_shuffle, scheduler=client)
+
+
+def test_basic(client: Client):
+    df = dd.demo.make_timeseries(freq="15D", partition_freq="30D")
+    shuffled = shuffle(df, "id")
+
+    (opt,) = dask.optimize(shuffled)
+    assert isinstance(hlg_layer_topological(opt.dask, 1), Blockwise)
+    # setup -> blockwise -> barrier -> unpack -> drop_by_shallow_copy
+    assert len(opt.dask.layers) == 5
+
+    dd.utils.assert_eq(shuffled, df.shuffle("id", shuffle="tasks"), scheduler=client)
+    # ^ NOTE: this works because `assert_eq` sorts the rows before comparing
+
+
+@gen_cluster([("", 2)] * 4, client=True)
+async def test_basic_state(c: Client, s: Scheduler, *workers: Worker):
+    df = dd.demo.make_timeseries(freq="15D", partition_freq="30D")
+    shuffled = shuffle(df, "id")
+
+    exts: list[ShuffleWorkerExtension] = [w.extensions["shuffle"] for w in workers]
+    for ext in exts:
+        assert not ext.shuffles
+
+    f = c.compute(shuffled)
+    # TODO this is a bad/pointless test. the `f.done()` is necessary in case the shuffle is really fast.
+    # To test state more thoroughly, we'd need a way to 'stop the world' at various stages. Like have the
+    # scheduler pause everything when the barrier is reached. Not sure yet how to implement that.
+    while not all(len(ext.shuffles) == 1 for ext in exts) and not f.done():
+        await asyncio.sleep(0.1)
+
+    await f
+    assert all(not ext.shuffles for ext in exts)
+
+
+def test_multiple_linear(client: Client):
+    df = dd.demo.make_timeseries(freq="15D", partition_freq="30D")
+    s1 = shuffle(df, "id")
+    s1["x"] = s1["x"] + 1
+    s2 = shuffle(s1, "x")
+
+    # TODO eventually test for fusion between s1's unpacks, the `+1`, and s2's `transfer`s
+
+    dd.utils.assert_eq(
+        s2,
+        df.assign(x=lambda df: df.x + 1).shuffle("x", shuffle="tasks"),
+        scheduler=client,
+    )
diff -pruN 2022.01.0+ds.1-1/distributed/shuffle/tests/test_shuffle_extension.py 2022.02.0+ds.1-1/distributed/shuffle/tests/test_shuffle_extension.py
--- 2022.01.0+ds.1-1/distributed/shuffle/tests/test_shuffle_extension.py	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/shuffle/tests/test_shuffle_extension.py	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,288 @@
+from __future__ import annotations
+
+import asyncio
+import string
+from collections import Counter
+from typing import TYPE_CHECKING
+
+import pytest
+
+pd = pytest.importorskip("pandas")
+dd = pytest.importorskip("dask.dataframe")
+
+from distributed.utils_test import gen_cluster
+
+from ..shuffle_extension import (
+    NewShuffleMetadata,
+    ShuffleId,
+    ShuffleMetadata,
+    ShuffleWorkerExtension,
+)
+
+if TYPE_CHECKING:
+    from distributed import Client, Future, Scheduler, Worker
+
+
+@pytest.mark.parametrize("npartitions", [1, 2, 3, 5])
+@pytest.mark.parametrize("n_workers", [1, 2, 3, 5])
+def test_worker_for_distribution(npartitions: int, n_workers: int):
+    "Test that `worker_for` distributes evenly"
+    metadata = ShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": []}),
+        "A",
+        npartitions,
+        list(string.ascii_lowercase[:n_workers]),
+    )
+
+    with pytest.raises(AssertionError, match="Negative"):
+        metadata.worker_for(-1)
+
+    assignments = [metadata.worker_for(i) for i in range(metadata.npartitions)]
+
+    # Test internal `_partition_range` method
+    for w in metadata.workers:
+        first, last = metadata._partition_range(w)
+        assert all(
+            [
+                first <= p_i <= last if a == w else p_i < first or p_i > last
+                for p_i, a in enumerate(assignments)
+            ]
+        )
+
+    counter = Counter(assignments)
+    assert len(counter) == min(npartitions, n_workers)
+
+    # Test `npartitions_for`
+    calculated_counter = {w: metadata.npartitions_for(w) for w in metadata.workers}
+    assert counter == {
+        w: count for w, count in calculated_counter.items() if count != 0
+    }
+    assert calculated_counter.keys() == set(metadata.workers)
+    # ^ this also checks that workers receiving 0 output partitions were calculated properly
+
+    # Test the distribution of worker assignments.
+    # All workers should be assigned the same number of partitions, or if
+    # there's an odd number, some workers will be assigned only one extra partition.
+    counts = set(counter.values())
+    assert len(counts) <= 2
+    if len(counts) == 2:
+        lo, hi = sorted(counts)
+        assert lo == hi - 1
+
+    with pytest.raises(IndexError, match="does not exist"):
+        metadata.worker_for(npartitions)
+
+
+@gen_cluster([("", 1)])
+async def test_installation(s: Scheduler, worker: Worker):
+    ext = worker.extensions["shuffle"]
+    assert isinstance(ext, ShuffleWorkerExtension)
+    assert worker.handlers["shuffle_receive"] == ext.shuffle_receive
+    assert worker.handlers["shuffle_init"] == ext.shuffle_init
+    assert worker.handlers["shuffle_inputs_done"] == ext.shuffle_inputs_done
+
+
+@gen_cluster([("", 1)])
+async def test_init(s: Scheduler, worker: Worker):
+    ext: ShuffleWorkerExtension = worker.extensions["shuffle"]
+    assert not ext.shuffles
+    metadata = ShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": []}),
+        "A",
+        5,
+        [worker.address],
+    )
+
+    ext.shuffle_init(None, metadata)
+    assert list(ext.shuffles) == [metadata.id]
+
+    with pytest.raises(ValueError, match="already registered"):
+        ext.shuffle_init(None, metadata)
+
+    assert list(ext.shuffles) == [metadata.id]
+
+
+async def add_dummy_unpack_keys(
+    new_metadata: NewShuffleMetadata, client: Client
+) -> dict[str, Future]:
+    """
+    Add dummy keys to the scheduler, so setting worker restrictions during `barrier` succeeds.
+
+    Note: you must hang onto the Futures returned by this function, so they don't get released prematurely.
+    """
+    # NOTE: `scatter` is just used as an easy way to create keys on the scheduler that won't actually
+    # be scheduled. It would be reasonable if this stops working in the future, if some validation is
+    # added preventing worker restrictions on scattered data (since it makes no sense).
+    fs = await client.scatter(
+        {
+            str(("shuffle-unpack-" + new_metadata.id, i)): None
+            for i in range(new_metadata.npartitions)
+        }
+    )  # type: ignore
+    await asyncio.gather(*fs.values())
+    return fs
+
+
+@gen_cluster([("", 1)] * 4)
+async def test_create(s: Scheduler, *workers: Worker):
+    exts: list[ShuffleWorkerExtension] = [w.extensions["shuffle"] for w in workers]
+
+    new_metadata = NewShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": []}),
+        "A",
+        5,
+    )
+
+    metadata = await exts[0]._create_shuffle(new_metadata)
+    assert sorted(metadata.workers) == sorted(w.address for w in workers)
+
+    # Check shuffle was created on all workers
+    for ext in exts:
+        assert len(ext.shuffles) == 1
+        shuffle = ext.shuffles[new_metadata.id]
+        assert shuffle.metadata.workers == metadata.workers
+
+    # TODO (resilience stage) what happens if some workers already have
+    # the ID registered, but others don't?
+
+    with pytest.raises(ValueError, match="already registered"):
+        await exts[0]._create_shuffle(new_metadata)
+
+
+@gen_cluster([("", 1)] * 4)
+async def test_add_partition(s: Scheduler, *workers: Worker):
+    exts: dict[str, ShuffleWorkerExtension] = {
+        w.address: w.extensions["shuffle"] for w in workers
+    }
+
+    new_metadata = NewShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": [], "partition": []}),
+        "partition",
+        8,
+    )
+
+    ext = next(iter(exts.values()))
+    metadata = await ext._create_shuffle(new_metadata)
+    partition = pd.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e", "f", "g", "h"],
+            "partition": [0, 1, 2, 3, 4, 5, 6, 7],
+        }
+    )
+    await ext._add_partition(partition, new_metadata.id)
+
+    with pytest.raises(ValueError, match="not registered"):
+        await ext._add_partition(partition, ShuffleId("bar"))
+
+    for i, data in partition.groupby(new_metadata.column):
+        addr = metadata.worker_for(int(i))
+        ext = exts[addr]
+        received = ext.shuffles[metadata.id].output_partitions[int(i)]
+        assert len(received) == 1
+        dd.utils.assert_eq(data, received[0])
+
+    # TODO (resilience stage) test failed sends
+
+
+@gen_cluster([("", 1)] * 4, client=True)
+async def test_barrier(c: Client, s: Scheduler, *workers: Worker):
+    exts: dict[str, ShuffleWorkerExtension] = {
+        w.address: w.extensions["shuffle"] for w in workers
+    }
+
+    new_metadata = NewShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": [], "partition": []}),
+        "partition",
+        4,
+    )
+    fs = await add_dummy_unpack_keys(new_metadata, c)
+
+    ext = next(iter(exts.values()))
+    metadata = await ext._create_shuffle(new_metadata)
+    partition = pd.DataFrame(
+        {
+            "A": ["a", "b", "c"],
+            "partition": [0, 1, 2],
+        }
+    )
+    await ext._add_partition(partition, metadata.id)
+
+    await ext._barrier(metadata.id)
+
+    # Check scheduler restrictions were set for unpack tasks
+    for i, key in enumerate(fs):
+        assert s.tasks[key].worker_restrictions == {metadata.worker_for(i)}
+
+    # Check all workers have been informed of the barrier
+    for addr, ext in exts.items():
+        if metadata.npartitions_for(addr):
+            shuffle = ext.shuffles[metadata.id]
+            assert shuffle.transferred
+            assert not shuffle.done()
+        else:
+            # No output partitions on this worker; shuffle already cleaned up
+            assert not ext.shuffles
+
+
+@gen_cluster([("", 1)] * 4, client=True)
+async def test_get_partition(c: Client, s: Scheduler, *workers: Worker):
+    exts: dict[str, ShuffleWorkerExtension] = {
+        w.address: w.extensions["shuffle"] for w in workers
+    }
+
+    new_metadata = NewShuffleMetadata(
+        ShuffleId("foo"),
+        pd.DataFrame({"A": [], "partition": []}),
+        "partition",
+        8,
+    )
+    _ = await add_dummy_unpack_keys(new_metadata, c)
+
+    ext = next(iter(exts.values()))
+    metadata = await ext._create_shuffle(new_metadata)
+    p1 = pd.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e", "f", "g", "h"],
+            "partition": [0, 1, 2, 3, 4, 5, 6, 6],
+        }
+    )
+    p2 = pd.DataFrame(
+        {
+            "A": ["a", "b", "c", "d", "e", "f", "g", "h"],
+            "partition": [0, 1, 2, 3, 0, 0, 2, 3],
+        }
+    )
+    await asyncio.gather(
+        ext._add_partition(p1, metadata.id), ext._add_partition(p2, metadata.id)
+    )
+    await ext._barrier(metadata.id)
+
+    for addr, ext in exts.items():
+        if metadata.worker_for(0) != addr:
+            with pytest.raises(AssertionError, match="belongs on"):
+                ext.get_output_partition(metadata.id, 0)
+
+    full = pd.concat([p1, p2])
+    expected_groups = full.groupby("partition")
+    for output_i in range(metadata.npartitions):
+        addr = metadata.worker_for(output_i)
+        ext = exts[addr]
+        result = ext.get_output_partition(metadata.id, output_i)
+        try:
+            expected = expected_groups.get_group(output_i)
+        except KeyError:
+            expected = metadata.empty
+        dd.utils.assert_eq(expected, result)
+        # ^ NOTE: use `assert_eq` instead of `pd.testing.assert_frame_equal` directly
+        # to ignore order of the rows (`assert_eq` pre-sorts its inputs).
+
+    # Once all partitions are retrieved, shuffles are cleaned up
+    for ext in exts.values():
+        assert not ext.shuffles
+        with pytest.raises(ValueError, match="not registered"):
+            ext.get_output_partition(metadata.id, 0)
diff -pruN 2022.01.0+ds.1-1/distributed/stealing.py 2022.02.0+ds.1-1/distributed/stealing.py
--- 2022.01.0+ds.1-1/distributed/stealing.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/stealing.py	2022-02-11 16:21:14.000000000 +0000
@@ -15,7 +15,7 @@ import dask
 from dask.utils import parse_timedelta
 
 from .comm.addressing import get_address_host
-from .core import CommClosedError
+from .core import CommClosedError, Status
 from .diagnostics.plugin import SchedulerPlugin
 from .utils import log_errors, recursive_to_dict
 
@@ -393,22 +393,23 @@ class WorkStealing(SchedulerPlugin):
 
         with log_errors():
             i = 0
-            idle = s.idle.values()
-            saturated = s.saturated
+            # Paused and closing workers must never become thieves
+            idle = [ws for ws in s.idle.values() if ws.status == Status.running]
             if not idle or len(idle) == len(s.workers):
                 return
 
             log = []
             start = time()
 
-            if not s.saturated:
+            saturated = s.saturated
+            if not saturated:
                 saturated = topk(10, s.workers.values(), key=combined_occupancy)
                 saturated = [
                     ws
                     for ws in saturated
                     if combined_occupancy(ws) > 0.2 and len(ws.processing) > ws.nthreads
                 ]
-            elif len(s.saturated) < 20:
+            elif len(saturated) < 20:
                 saturated = sorted(saturated, key=combined_occupancy, reverse=True)
             if len(idle) < 20:
                 idle = sorted(idle, key=combined_occupancy)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_active_memory_manager.py 2022.02.0+ds.1-1/distributed/tests/test_active_memory_manager.py
--- 2022.01.0+ds.1-1/distributed/tests/test_active_memory_manager.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_active_memory_manager.py	2022-02-11 16:21:14.000000000 +0000
@@ -4,10 +4,11 @@ import asyncio
 import logging
 import random
 from contextlib import contextmanager
+from time import sleep
 
 import pytest
 
-from distributed import Nanny
+from distributed import Nanny, wait
 from distributed.active_memory_manager import (
     ActiveMemoryManagerExtension,
     ActiveMemoryManagerPolicy,
@@ -337,9 +338,9 @@ async def test_drop_from_worker_with_lea
     futures = await c.scatter({"x": 1}, broadcast=True)
     assert s.tasks["x"].who_has == {ws1, ws2, ws3, ws4}
     # Allocate enough RAM to be safely more than unmanaged memory
-    clog = c.submit(lambda: "x" * 2 ** 29, workers=[a3])  # 512 MiB
+    clog = c.submit(lambda: "x" * 2**29, workers=[a3])  # 512 MiB
     # await wait(clog) is not enough; we need to wait for the heartbeats
-    while ws3.memory.optimistic < 2 ** 29:
+    while ws3.memory.optimistic < 2**29:
         await asyncio.sleep(0.01)
     s.extensions["amm"].run_once()
 
@@ -403,7 +404,12 @@ async def test_drop_with_bad_candidates(
     assert s.tasks["x"].who_has == {ws0, ws1}
 
 
-@gen_cluster(client=True, nthreads=[("", 1)] * 10, config=demo_config("drop", n=1))
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 10,
+    config=demo_config("drop", n=1),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_drop_prefers_paused_workers(c, s, *workers):
     x = await c.scatter({"x": 1}, broadcast=True)
     ts = s.tasks["x"]
@@ -420,7 +426,11 @@ async def test_drop_prefers_paused_worke
 
 
 @pytest.mark.slow
-@gen_cluster(client=True, config=demo_config("drop"))
+@gen_cluster(
+    client=True,
+    config=demo_config("drop"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_drop_with_paused_workers_with_running_tasks_1(c, s, a, b):
     """If there is exactly 1 worker that holds a replica of a task that isn't paused or
     retiring, and there are 1+ paused/retiring workers with the same task, don't drop
@@ -431,13 +441,13 @@ async def test_drop_with_paused_workers_
     b is running and has no dependent tasks
     """
     x = (await c.scatter({"x": 1}, broadcast=True))["x"]
-    y = c.submit(slowinc, x, delay=2, key="y", workers=[a.address])
+    y = c.submit(slowinc, x, delay=2.5, key="y", workers=[a.address])
+
     while "y" not in a.tasks or a.tasks["y"].state != "executing":
         await asyncio.sleep(0.01)
     a.memory_pause_fraction = 1e-15
     while s.workers[a.address].status != Status.paused:
         await asyncio.sleep(0.01)
-    assert s.tasks["y"].state == "processing"
     assert a.tasks["y"].state == "executing"
 
     s.extensions["amm"].run_once()
@@ -445,7 +455,11 @@ async def test_drop_with_paused_workers_
     assert len(s.tasks["x"].who_has) == 2
 
 
-@gen_cluster(client=True, config=demo_config("drop"))
+@gen_cluster(
+    client=True,
+    config=demo_config("drop"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_drop_with_paused_workers_with_running_tasks_2(c, s, a, b):
     """If there is exactly 1 worker that holds a replica of a task that isn't paused or
     retiring, and there are 1+ paused/retiring workers with the same task, don't drop
@@ -467,7 +481,11 @@ async def test_drop_with_paused_workers_
 
 @pytest.mark.slow
 @pytest.mark.parametrize("pause", [True, False])
-@gen_cluster(client=True, config=demo_config("drop"))
+@gen_cluster(
+    client=True,
+    config=demo_config("drop"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_drop_with_paused_workers_with_running_tasks_3_4(c, s, a, b, pause):
     """If there is exactly 1 worker that holds a replica of a task that isn't paused or
     retiring, and there are 1+ paused/retiring workers with the same task, don't drop
@@ -482,7 +500,7 @@ async def test_drop_with_paused_workers_
     b is running and has no dependent tasks
     """
     x = (await c.scatter({"x": 1}, broadcast=True))["x"]
-    y = c.submit(slowinc, x, delay=2, key="y", workers=[a.address])
+    y = c.submit(slowinc, x, delay=2.5, key="y", workers=[a.address])
     while "y" not in a.tasks or a.tasks["y"].state != "executing":
         await asyncio.sleep(0.01)
 
@@ -501,7 +519,12 @@ async def test_drop_with_paused_workers_
 
 
 @pytest.mark.slow
-@gen_cluster(client=True, nthreads=[("", 1)] * 3, config=demo_config("drop"))
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 3,
+    config=demo_config("drop"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_drop_with_paused_workers_with_running_tasks_5(c, s, w1, w2, w3):
     """If there is exactly 1 worker that holds a replica of a task that isn't paused or
     retiring, and there are 1+ paused/retiring workers with the same task, don't drop
@@ -513,27 +536,28 @@ async def test_drop_with_paused_workers_
     w3 is running and with dependent tasks executing on it
     """
     x = (await c.scatter({"x": 1}, broadcast=True))["x"]
-    y1 = c.submit(slowinc, x, delay=2, key="y1", workers=[w1.address])
-    y2 = c.submit(slowinc, x, delay=2, key="y2", workers=[w3.address])
-    while (
-        "y1" not in w1.tasks
-        or w1.tasks["y1"].state != "executing"
-        or "y2" not in w3.tasks
-        or w3.tasks["y2"].state != "executing"
-    ):
+    y1 = c.submit(slowinc, x, delay=2.5, key="y1", workers=[w1.address])
+    y2 = c.submit(slowinc, x, delay=2.5, key="y2", workers=[w3.address])
+
+    def executing() -> bool:
+        return (
+            "y1" in w1.tasks
+            and w1.tasks["y1"].state == "executing"
+            and "y2" in w3.tasks
+            and w3.tasks["y2"].state == "executing"
+        )
+
+    while not executing():
         await asyncio.sleep(0.01)
     w1.memory_pause_fraction = 1e-15
     while s.workers[w1.address].status != Status.paused:
         await asyncio.sleep(0.01)
-    assert s.tasks["y1"].state == "processing"
-    assert s.tasks["y2"].state == "processing"
-    assert w1.tasks["y1"].state == "executing"
-    assert w3.tasks["y2"].state == "executing"
+    assert executing()
 
     s.extensions["amm"].run_once()
-    await y1
-    await y2
-    assert {ws.address for ws in s.tasks["x"].who_has} == {w1.address, w3.address}
+    while {ws.address for ws in s.tasks["x"].who_has} != {w1.address, w3.address}:
+        await asyncio.sleep(0.01)
+    assert executing()
 
 
 @gen_cluster(nthreads=[("", 1)] * 4, client=True, config=demo_config("replicate", n=2))
@@ -596,11 +620,11 @@ async def test_replicate_to_worker_with_
     futures = await c.scatter({"x": 1}, workers=[a1])
     assert s.tasks["x"].who_has == {ws1}
     # Allocate enough RAM to be safely more than unmanaged memory
-    clog2 = c.submit(lambda: "x" * 2 ** 29, workers=[a2])  # 512 MiB
-    clog4 = c.submit(lambda: "x" * 2 ** 29, workers=[a4])  # 512 MiB
+    clog2 = c.submit(lambda: "x" * 2**29, workers=[a2])  # 512 MiB
+    clog4 = c.submit(lambda: "x" * 2**29, workers=[a4])  # 512 MiB
     # await wait(clog) is not enough; we need to wait for the heartbeats
     for ws in (ws2, ws4):
-        while ws.memory.optimistic < 2 ** 29:
+        while ws.memory.optimistic < 2**29:
             await asyncio.sleep(0.01)
     s.extensions["amm"].run_once()
 
@@ -644,7 +668,12 @@ async def test_replicate_to_candidates_w
     assert s.tasks["x"].who_has == {ws0}
 
 
-@gen_cluster(client=True, nthreads=[("", 1)] * 3, config=demo_config("replicate"))
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 3,
+    config=demo_config("replicate"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_replicate_avoids_paused_workers_1(c, s, w0, w1, w2):
     w1.memory_pause_fraction = 1e-15
     while s.workers[w1.address].status != Status.paused:
@@ -658,7 +687,11 @@ async def test_replicate_avoids_paused_w
     assert "x" not in w1.data
 
 
-@gen_cluster(client=True, config=demo_config("replicate"))
+@gen_cluster(
+    client=True,
+    config=demo_config("replicate"),
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_replicate_avoids_paused_workers_2(c, s, a, b):
     b.memory_pause_fraction = 1e-15
     while s.workers[b.address].status != Status.paused:
@@ -718,6 +751,218 @@ async def test_ReduceReplicas(c, s, *wor
         await asyncio.sleep(0.01)
 
 
+@pytest.mark.parametrize("start_amm", [False, True])
+@gen_cluster(client=True)
+async def test_RetireWorker_amm_on_off(c, s, a, b, start_amm):
+    """retire_workers must work both with and without the AMM started"""
+    if start_amm:
+        await c.amm.start()
+    else:
+        await c.amm.stop()
+
+    futures = await c.scatter({"x": 1}, workers=[a.address])
+    await c.retire_workers([a.address])
+    assert a.address not in s.workers
+    assert "x" in b.data
+
+
+@gen_cluster(
+    client=True,
+    config={
+        "distributed.scheduler.active-memory-manager.start": True,
+        "distributed.scheduler.active-memory-manager.interval": 0.1,
+        "distributed.scheduler.active-memory-manager.policies": [],
+    },
+)
+async def test_RetireWorker_no_remove(c, s, a, b):
+    """Test RetireWorker behaviour on retire_workers(..., remove=False)"""
+
+    x = await c.scatter({"x": "x"}, workers=[a.address])
+    await c.retire_workers([a.address], close_workers=False, remove=False)
+    # Wait 2 AMM iterations
+    # retire_workers may return before all keys have been dropped from a
+    while s.tasks["x"].who_has != {s.workers[b.address]}:
+        await asyncio.sleep(0.01)
+    assert a.address in s.workers
+    # Policy has been removed without waiting for worker to disappear from
+    # Scheduler.workers
+    assert not s.extensions["amm"].policies
+
+
+@pytest.mark.slow
+@pytest.mark.parametrize("use_ReduceReplicas", [False, True])
+@gen_cluster(
+    client=True,
+    Worker=Nanny,
+    config={
+        "distributed.scheduler.active-memory-manager.start": True,
+        "distributed.scheduler.active-memory-manager.interval": 0.1,
+        "distributed.scheduler.active-memory-manager.policies": [
+            {"class": "distributed.active_memory_manager.ReduceReplicas"},
+        ],
+    },
+)
+async def test_RetireWorker_with_ReduceReplicas(c, s, *nannies, use_ReduceReplicas):
+    """RetireWorker and ReduceReplicas work well with each other.
+
+    If ReduceReplicas is enabled,
+    1. On the first AMM iteration, either ReduceReplicas or RetireWorker (arbitrarily
+       depending on which comes first in the iteration of
+       ActiveMemoryManagerExtension.policies) deletes non-unique keys, choosing from
+       workers to be retired first. At the same time, RetireWorker replicates unique
+       keys.
+    2. On the second AMM iteration, either ReduceReplicas or RetireWorker deletes the
+       keys replicated at the previous round from the worker to be retired.
+
+    If ReduceReplicas is not enabled, all drops are performed by RetireWorker.
+
+    This test fundamentally relies on workers in the process of being retired to be
+    always picked first by ActiveMemoryManagerExtension._find_dropper.
+    """
+    ws_a, ws_b = s.workers.values()
+    if not use_ReduceReplicas:
+        s.extensions["amm"].policies.clear()
+
+    x = c.submit(lambda: "x" * 2**26, key="x", workers=[ws_a.address])  # 64 MiB
+    y = c.submit(lambda: "y" * 2**26, key="y", workers=[ws_a.address])  # 64 MiB
+    z = c.submit(lambda x: None, x, key="z", workers=[ws_b.address])  # copy x to ws_b
+    # Make sure that the worker NOT being retired has the most RAM usage to test that
+    # it is not being picked first since there's a retiring worker.
+    w = c.submit(lambda: "w" * 2**28, key="w", workers=[ws_b.address])  # 256 MiB
+    await wait([x, y, z, w])
+
+    await c.retire_workers([ws_a.address], remove=False)
+    # retire_workers may return before all keys have been dropped from a
+    while ws_a.has_what:
+        await asyncio.sleep(0.01)
+    assert {ts.key for ts in ws_b.has_what} == {"x", "y", "z", "w"}
+
+
+@gen_cluster(client=True, nthreads=[("", 1)] * 3, config=NO_AMM_START)
+async def test_RetireWorker_all_replicas_are_being_retired(c, s, w1, w2, w3):
+    """There are multiple replicas of a key, but they all reside on workers that are
+    being retired
+    """
+    ws1 = s.workers[w1.address]
+    ws2 = s.workers[w2.address]
+    ws3 = s.workers[w3.address]
+    fut = await c.scatter({"x": "x"}, workers=[w1.address, w2.address], broadcast=True)
+    assert s.tasks["x"].who_has == {ws1, ws2}
+    await c.retire_workers([w1.address, w2.address])
+    assert s.tasks["x"].who_has == {ws3}
+
+
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 4,
+    config={
+        "distributed.scheduler.active-memory-manager.start": True,
+        # test that we're having a manual amm.run_once() "kick" from retire_workers
+        "distributed.scheduler.active-memory-manager.interval": 999,
+        "distributed.scheduler.active-memory-manager.policies": [],
+    },
+)
+async def test_RetireWorker_no_recipients(c, s, w1, w2, w3, w4):
+    """All workers are retired at once.
+
+    Test use cases:
+    1. (w1) worker contains no data -> it is retired
+    2. (w2) worker contains unique data -> it is not retired
+    3. (w3, w4) worker contains non-unique data, but all replicas are on workers that
+       are being retired -> all but one are retired
+    """
+    x = await c.scatter({"x": "x"}, workers=[w2.address])
+    y = await c.scatter({"y": "y"}, workers=[w3.address, w4.address], broadcast=True)
+
+    out = await c.retire_workers([w1.address, w2.address, w3.address, w4.address])
+
+    assert set(out) in ({w1.address, w3.address}, {w1.address, w4.address})
+    assert not s.extensions["amm"].policies
+    assert set(s.workers) in ({w2.address, w3.address}, {w2.address, w4.address})
+    # After a Scheduler -> Worker -> WorkerState roundtrip, workers that failed to
+    # retire went back from closing_gracefully to running and can run tasks
+    while any(ws.status != Status.running for ws in s.workers.values()):
+        await asyncio.sleep(0.01)
+    assert await c.submit(inc, 1) == 2
+
+
+@gen_cluster(
+    client=True,
+    config={
+        "distributed.scheduler.active-memory-manager.start": True,
+        "distributed.scheduler.active-memory-manager.interval": 999,
+        "distributed.scheduler.active-memory-manager.policies": [],
+    },
+)
+async def test_RetireWorker_all_recipients_are_paused(c, s, a, b):
+    ws_a = s.workers[a.address]
+    ws_b = s.workers[b.address]
+
+    b.memory_pause_fraction = 1e-15
+    while ws_b.status != Status.paused:
+        await asyncio.sleep(0.01)
+
+    x = await c.scatter("x", workers=[a.address])
+    out = await c.retire_workers([a.address])
+    assert out == {}
+    assert not s.extensions["amm"].policies
+    assert set(s.workers) == {a.address, b.address}
+
+    # After a Scheduler -> Worker -> WorkerState roundtrip, workers that failed to
+    # retire went back from closing_gracefully to running and can run tasks
+    while ws_a.status != Status.running:
+        await asyncio.sleep(0.01)
+    assert await c.submit(inc, 1) == 2
+
+
+# FIXME can't drop runtime of this test below 10s; see distributed#5585
+@pytest.mark.slow
+@gen_cluster(
+    client=True,
+    Worker=Nanny,
+    nthreads=[("", 1)] * 3,
+    config={
+        "distributed.scheduler.worker-ttl": "500ms",
+        "distributed.scheduler.active-memory-manager.start": True,
+        "distributed.scheduler.active-memory-manager.interval": 0.1,
+        "distributed.scheduler.active-memory-manager.policies": [],
+    },
+)
+async def test_RetireWorker_faulty_recipient(c, s, *nannies):
+    """RetireWorker requests to replicate a key onto a unresponsive worker.
+    The AMM will iterate multiple times, repeating the command, until eventually the
+    scheduler declares the worker dead and removes it from the pool; at that point the
+    AMM will choose another valid worker and complete the job.
+    """
+    # ws1 is being retired
+    # ws2 has the lowest RAM usage and is chosen as a recipient, but is unresponsive
+    ws1, ws2, ws3 = s.workers.values()
+    f = c.submit(lambda: "x", key="x", workers=[ws1.address])
+    await wait(f)
+    assert s.tasks["x"].who_has == {ws1}
+
+    # Fill ws3 with 200 MB of managed memory
+    # We're using plenty to make sure it's safely more than the unmanaged memory of ws2
+    clutter = c.map(lambda i: "x" * 4_000_000, range(50), workers=[ws3.address])
+    await wait([f] + clutter)
+    while ws3.memory.process < 200_000_000:
+        # Wait for heartbeat
+        await asyncio.sleep(0.01)
+    assert ws2.memory.process < ws3.memory.process
+
+    # Make ws2 unresponsive
+    clog_fut = asyncio.create_task(c.run(sleep, 3600, workers=[ws2.address]))
+    await asyncio.sleep(0.2)
+    assert ws2.address in s.workers
+
+    await c.retire_workers([ws1.address])
+    assert ws1.address not in s.workers
+    # The AMM tried over and over to send the data to ws2, until it was declared dead
+    assert ws2.address not in s.workers
+    assert s.tasks["x"].who_has == {ws3}
+    clog_fut.cancel()
+
+
 class DropEverything(ActiveMemoryManagerPolicy):
     """Inanely suggest to drop every single key in the cluster"""
 
@@ -792,3 +1037,48 @@ async def test_ReduceReplicas_stress(c,
     policy must not disrupt the computation too much.
     """
     await tensordot_stress(c)
+
+
+@pytest.mark.slow
+@pytest.mark.avoid_ci(reason="distributed#5371")
+@pytest.mark.parametrize("use_ReduceReplicas", [False, True])
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 10,
+    Worker=Nanny,
+    config={
+        "distributed.scheduler.active-memory-manager.start": True,
+        # If interval is too low, then the AMM will rerun while tasks have not yet have
+        # the time to migrate. This is OK if it happens occasionally, but if this
+        # setting is too aggressive the cluster will get flooded with repeated comm
+        # requests.
+        "distributed.scheduler.active-memory-manager.interval": 2.0,
+        "distributed.scheduler.active-memory-manager.policies": [
+            {"class": "distributed.active_memory_manager.ReduceReplicas"},
+        ],
+    },
+)
+async def test_RetireWorker_stress(c, s, *nannies, use_ReduceReplicas):
+    """It is safe to retire the best part of a cluster in the middle of a computation"""
+    if not use_ReduceReplicas:
+        s.extensions["amm"].policies.clear()
+
+    addrs = list(s.workers)
+    random.shuffle(addrs)
+    print(f"Removing all workers except {addrs[-1]}")
+
+    # Note: Scheduler._lock effectively prevents multiple calls to retire_workers from
+    # running at the same time. However, the lock only exists for the benefit of legacy
+    # (non-AMM) rebalance() and replicate() methods. Once the lock is removed, these
+    # calls will become parallel and the test *should* continue working.
+
+    tasks = [asyncio.create_task(tensordot_stress(c))]
+    await asyncio.sleep(1)
+    tasks.append(asyncio.create_task(c.retire_workers(addrs[0:2])))
+    await asyncio.sleep(1)
+    tasks.append(asyncio.create_task(c.retire_workers(addrs[2:5])))
+    await asyncio.sleep(1)
+    tasks.append(asyncio.create_task(c.retire_workers(addrs[5:9])))
+
+    await asyncio.gather(*tasks)
+    assert set(s.workers) == {addrs[9]}
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_actor.py 2022.02.0+ds.1-1/distributed/tests/test_actor.py
--- 2022.01.0+ds.1-1/distributed/tests/test_actor.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_actor.py	2022-02-11 16:21:14.000000000 +0000
@@ -71,13 +71,11 @@ class ParameterServer:
 
 
 @pytest.mark.parametrize("direct_to_workers", [True, False])
-def test_client_actions(direct_to_workers):
-    @gen_cluster(client=True)
-    async def test(c, s, a, b):
-        c = await Client(
-            s.address, asynchronous=True, direct_to_workers=direct_to_workers
-        )
-
+@gen_cluster()
+async def test_client_actions(s, a, b, direct_to_workers):
+    async with Client(
+        s.address, asynchronous=True, direct_to_workers=direct_to_workers
+    ) as c:
         counter = c.submit(Counter, workers=[a.address], actor=True)
         assert isinstance(counter, Future)
         counter = await counter
@@ -86,8 +84,7 @@ def test_client_actions(direct_to_worker
         assert hasattr(counter, "add")
         assert hasattr(counter, "n")
 
-        n = await counter.n
-        assert n == 0
+        assert await counter.n == 0
 
         assert counter._address == a.address
 
@@ -96,45 +93,36 @@ def test_client_actions(direct_to_worker
 
         await asyncio.gather(counter.increment(), counter.increment())
 
-        n = await counter.n
-        assert n == 2
+        assert await counter.n == 2
 
         counter.add(10)
         while (await counter.n) != 10 + 2:
-            n = await counter.n
             await asyncio.sleep(0.01)
 
-        await c.close()
-
-    test()
-
 
 @pytest.mark.parametrize("separate_thread", [False, True])
-def test_worker_actions(separate_thread):
-    @gen_cluster(client=True)
-    async def test(c, s, a, b):
-        counter = c.submit(Counter, workers=[a.address], actor=True)
-        a_address = a.address
-
-        def f(counter):
-            start = counter.n
-
-            assert type(counter) is Actor
-            assert counter._address == a_address
-
-            future = counter.increment(separate_thread=separate_thread)
-            assert isinstance(future, ActorFuture)
-            assert "Future" in type(future).__name__
-            end = future.result(timeout=1)
-            assert end > start
-
-        futures = [c.submit(f, counter, pure=False) for _ in range(10)]
-        await c.gather(futures)
+@gen_cluster(client=True)
+async def test_worker_actions(c, s, a, b, separate_thread):
+    counter = c.submit(Counter, workers=[a.address], actor=True)
+    a_address = a.address
+
+    def f(counter):
+        start = counter.n
+
+        assert type(counter) is Actor
+        assert counter._address == a_address
+
+        future = counter.increment(separate_thread=separate_thread)
+        assert isinstance(future, ActorFuture)
+        assert "Future" in type(future).__name__
+        end = future.result(timeout=1)
+        assert end > start
 
-        counter = await counter
-        assert await counter.n == 10
+    futures = [c.submit(f, counter, pure=False) for _ in range(10)]
+    await c.gather(futures)
 
-    test()
+    counter = await counter
+    assert await counter.n == 10
 
 
 @gen_cluster(client=True)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_asyncprocess.py 2022.02.0+ds.1-1/distributed/tests/test_asyncprocess.py
--- 2022.01.0+ds.1-1/distributed/tests/test_asyncprocess.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_asyncprocess.py	2022-02-11 16:21:14.000000000 +0000
@@ -243,7 +243,7 @@ async def test_exit_callback():
     assert not evt.is_set()
 
     to_child.put(None)
-    await evt.wait(timedelta(seconds=3))
+    await evt.wait(timedelta(seconds=5))
     assert evt.is_set()
     assert not proc.is_alive()
 
@@ -259,7 +259,7 @@ async def test_exit_callback():
     assert not evt.is_set()
 
     await proc.terminate()
-    await evt.wait(timedelta(seconds=3))
+    await evt.wait(timedelta(seconds=5))
     assert evt.is_set()
 
 
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_client_executor.py 2022.02.0+ds.1-1/distributed/tests/test_client_executor.py
--- 2022.01.0+ds.1-1/distributed/tests/test_client_executor.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_client_executor.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,5 +1,4 @@
 import random
-import time
 from concurrent.futures import (
     FIRST_COMPLETED,
     FIRST_EXCEPTION,
@@ -8,10 +7,12 @@ from concurrent.futures import (
     as_completed,
     wait,
 )
+from time import sleep
 
 import pytest
 from tlz import take
 
+from distributed.metrics import time
 from distributed.utils import CancelledError
 from distributed.utils_test import inc, slowadd, slowdec, slowinc, throws, varying
 
@@ -86,19 +87,19 @@ def test_wait(client):
 
 def test_cancellation(client):
     with client.get_executor(pure=False) as e:
-        fut = e.submit(time.sleep, 2.0)
-        start = time.time()
+        fut = e.submit(sleep, 2.0)
+        start = time()
         while number_of_processing_tasks(client) == 0:
-            assert time.time() < start + 30
-            time.sleep(0.01)
+            assert time() < start + 30
+            sleep(0.01)
         assert not fut.done()
 
         fut.cancel()
         assert fut.cancelled()
-        start = time.time()
+        start = time()
         while number_of_processing_tasks(client) != 0:
-            assert time.time() < start + 30
-            time.sleep(0.01)
+            assert time() < start + 30
+            sleep(0.01)
 
         with pytest.raises(CancelledError):
             fut.result()
@@ -155,7 +156,7 @@ def test_map(client):
         assert number_of_processing_tasks(client) > 0
         # Garbage collect the iterator => remaining tasks are cancelled
         del it
-        time.sleep(0.5)
+        sleep(0.5)
         assert number_of_processing_tasks(client) == 0
 
 
@@ -219,27 +220,27 @@ def test_retries(client):
 def test_shutdown_wait(client):
     # shutdown(wait=True) waits for pending tasks to finish
     e = client.get_executor()
-    start = time.time()
-    fut = e.submit(time.sleep, 1.0)
+    start = time()
+    fut = e.submit(sleep, 1.0)
     e.shutdown()
-    assert time.time() >= start + 1.0
-    time.sleep(0.1)  # wait for future outcome to propagate
+    assert time() >= start + 1.0
+    sleep(0.1)  # wait for future outcome to propagate
     assert fut.done()
     fut.result()  # doesn't raise
 
     with pytest.raises(RuntimeError):
-        e.submit(time.sleep, 1.0)
+        e.submit(sleep, 1.0)
 
 
 def test_shutdown_nowait(client):
     # shutdown(wait=False) cancels pending tasks
     e = client.get_executor()
-    start = time.time()
-    fut = e.submit(time.sleep, 5.0)
+    start = time()
+    fut = e.submit(sleep, 5.0)
     e.shutdown(wait=False)
-    assert time.time() < start + 2.0
-    time.sleep(0.1)  # wait for future outcome to propagate
+    assert time() < start + 2.0
+    sleep(0.1)  # wait for future outcome to propagate
     assert fut.cancelled()
 
     with pytest.raises(RuntimeError):
-        e.submit(time.sleep, 1.0)
+        e.submit(sleep, 1.0)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_client.py 2022.02.0+ds.1-1/distributed/tests/test_client.py
--- 2022.01.0+ds.1-1/distributed/tests/test_client.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_client.py	2022-02-11 16:21:14.000000000 +0000
@@ -12,15 +12,18 @@ import subprocess
 import sys
 import threading
 import traceback
+import types
 import warnings
 import weakref
 import zipfile
 from collections import deque
-from contextlib import suppress
+from collections.abc import Generator
+from contextlib import contextmanager, suppress
 from functools import partial
 from operator import add
 from threading import Semaphore
 from time import sleep
+from typing import Any
 
 import psutil
 import pytest
@@ -86,6 +89,7 @@ from distributed.utils_test import (
     gen_cluster,
     gen_test,
     geninc,
+    get_cert,
     inc,
     map_varying,
     nodebug,
@@ -97,6 +101,7 @@ from distributed.utils_test import (
     slowdec,
     slowinc,
     throws,
+    tls_only_security,
     varying,
     wait_for,
 )
@@ -238,7 +243,7 @@ async def test_custom_key_with_batches(c
     """Test of <https://github.com/dask/distributed/issues/4588>"""
 
     futs = c.map(
-        lambda x: x ** 2,
+        lambda x: x**2,
         range(10),
         batch_size=5,
         key=[str(x) for x in range(10)],
@@ -2929,95 +2934,63 @@ async def test_badly_serialized_exceptio
         await x
 
 
-@gen_cluster(
-    client=True,
-    Worker=Nanny,
-    worker_kwargs={"memory_limit": "1 GiB"},
-    config={"distributed.worker.memory.rebalance.sender-min": 0.3},
-)
-async def test_rebalance(c, s, *_):
+# Set rebalance() to work predictably on small amounts of managed memory. By default, it
+# uses optimistic memory, which would only be possible to test by allocating very large
+# amounts of managed memory, so that they would hide variations in unmanaged memory.
+REBALANCE_MANAGED_CONFIG = {
+    "distributed.worker.memory.rebalance.measure": "managed",
+    "distributed.worker.memory.rebalance.sender-min": 0,
+    "distributed.worker.memory.rebalance.sender-recipient-gap": 0,
+}
+
+
+@gen_cluster(client=True, config=REBALANCE_MANAGED_CONFIG)
+async def test_rebalance(c, s, a, b):
     """Test Client.rebalance(). These are just to test the Client wrapper around
     Scheduler.rebalance(); for more thorough tests on the latter see test_scheduler.py.
     """
-    # We used nannies to have separate processes for each worker
-    a, b = s.workers
-
-    # Generate 10 buffers worth 512 MiB total on worker a. This sends its memory
-    # utilisation slightly above 50% (after counting unmanaged) which is above the
-    # distributed.worker.memory.rebalance.sender-min threshold.
-    futures = c.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-    await wait(futures)
-    # Wait for heartbeats
-    while s.memory.process < 2 ** 29:
-        await asyncio.sleep(0.1)
-
-    assert await c.run(lambda dask_worker: len(dask_worker.data)) == {a: 10, b: 0}
-
+    futures = await c.scatter(range(100), workers=[a.address])
+    assert len(a.data) == 100
+    assert len(b.data) == 0
     await c.rebalance()
-
-    ndata = await c.run(lambda dask_worker: len(dask_worker.data))
-    # Allow for some uncertainty as the unmanaged memory is not stable
-    assert sum(ndata.values()) == 10
-    assert 3 <= ndata[a] <= 7
-    assert 3 <= ndata[b] <= 7
+    assert len(a.data) == 50
+    assert len(b.data) == 50
 
 
-@gen_cluster(
-    nthreads=[("127.0.0.1", 1)] * 3,
-    client=True,
-    Worker=Nanny,
-    worker_kwargs={"memory_limit": "1 GiB"},
-)
-async def test_rebalance_workers_and_keys(client, s, *_):
+@gen_cluster(nthreads=[("", 1)] * 3, client=True, config=REBALANCE_MANAGED_CONFIG)
+async def test_rebalance_workers_and_keys(client, s, a, b, c):
     """Test Client.rebalance(). These are just to test the Client wrapper around
     Scheduler.rebalance(); for more thorough tests on the latter see test_scheduler.py.
     """
-    a, b, c = s.workers
-    futures = client.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-    await wait(futures)
-    # Wait for heartbeats
-    while s.memory.process < 2 ** 29:
-        await asyncio.sleep(0.1)
+    futures = await client.scatter(range(100), workers=[a.address])
+    assert (len(a.data), len(b.data), len(c.data)) == (100, 0, 0)
 
     # Passing empty iterables is not the same as omitting the arguments
     await client.rebalance([])
     await client.rebalance(workers=[])
-    assert await client.run(lambda dask_worker: len(dask_worker.data)) == {
-        a: 10,
-        b: 0,
-        c: 0,
-    }
+    assert (len(a.data), len(b.data), len(c.data)) == (100, 0, 0)
 
     # Limit rebalancing to two arbitrary keys and two arbitrary workers.
-    await client.rebalance([futures[3], futures[7]], [a, b])
-    assert await client.run(lambda dask_worker: len(dask_worker.data)) == {
-        a: 8,
-        b: 2,
-        c: 0,
-    }
+    await client.rebalance([futures[3], futures[7]], [a.address, b.address])
+    assert (len(a.data), len(b.data), len(c.data)) == (98, 2, 0)
 
     with pytest.raises(KeyError):
         await client.rebalance(workers=["notexist"])
 
 
 def test_rebalance_sync():
-    # can't use the 'c' fixture because we need workers to run in a separate process
-    with Client(n_workers=2, memory_limit="1 GiB", dashboard_address=":0") as c:
-        s = c.cluster.scheduler
-        a, b = (ws.address for ws in s.workers.values())
-        futures = c.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-        wait(futures)
-        # Wait for heartbeat
-        while s.memory.process < 2 ** 29:
-            sleep(0.1)
-
-        assert c.run(lambda dask_worker: len(dask_worker.data)) == {a: 10, b: 0}
-        c.rebalance()
-        ndata = c.run(lambda dask_worker: len(dask_worker.data))
-        # Allow for some uncertainty as the unmanaged memory is not stable
-        assert sum(ndata.values()) == 10
-        assert 3 <= ndata[a] <= 7
-        assert 3 <= ndata[b] <= 7
+    with dask.config.set(REBALANCE_MANAGED_CONFIG):
+        with Client(n_workers=2, processes=False, dashboard_address=":0") as c:
+            s = c.cluster.scheduler
+            a = c.cluster.workers[0]
+            b = c.cluster.workers[1]
+            futures = c.scatter(range(100), workers=[a.address])
+
+            assert len(a.data) == 100
+            assert len(b.data) == 0
+            c.rebalance()
+            assert len(a.data) == 50
+            assert len(b.data) == 50
 
 
 @gen_cluster(client=True)
@@ -3782,9 +3755,35 @@ def test_reconnect(loop):
     c.close()
 
 
+class UnhandledException(Exception):
+    pass
+
+
+@contextmanager
+def catch_unhandled_exceptions() -> Generator[None, None, None]:
+    loop = asyncio.get_running_loop()
+    ctx: dict[str, Any] | None = None
+
+    old_handler = loop.get_exception_handler()
+
+    @loop.set_exception_handler
+    def _(loop: object, context: dict[str, Any]) -> None:
+        nonlocal ctx
+        ctx = context
+
+    try:
+        yield
+    finally:
+        loop.set_exception_handler(old_handler)
+    if ctx:
+        raise UnhandledException(ctx["message"]) from ctx.get("exception")
+
+
 @gen_cluster(client=True, nthreads=[], client_kwargs={"timeout": 0.5})
 async def test_reconnect_timeout(c, s):
-    with captured_logger(logging.getLogger("distributed.client")) as logger:
+    with catch_unhandled_exceptions(), captured_logger(
+        logging.getLogger("distributed.client")
+    ) as logger:
         await s.close()
         while c.status != "closed":
             await c._update_scheduler_info()
@@ -4365,7 +4364,7 @@ async def test_retire_workers_2(c, s, a,
     assert a.address not in s.workers
 
 
-@gen_cluster(client=True, nthreads=[("127.0.0.1", 1)] * 10)
+@gen_cluster(client=True, nthreads=[("", 1)] * 10)
 async def test_retire_many_workers(c, s, *workers):
     futures = await c.scatter(list(range(100)))
 
@@ -4381,8 +4380,16 @@ async def test_retire_many_workers(c, s,
 
     assert all(future.done() for future in futures)
     assert all(s.tasks[future.key].state == "memory" for future in futures)
-    for w, keys in s.has_what.items():
-        assert 15 < len(keys) < 50
+    assert await c.gather(futures) == list(range(100))
+
+    # Don't count how many task landed on each worker.
+    # Normally, tasks would be distributed evenly over the surviving workers. However,
+    # here all workers share the same process memory, so you'll get an unintuitive
+    # distribution of tasks if for any reason one transfer take longer than 2 seconds
+    # and as a consequence the Active Memory Manager ends up running for two iterations.
+    # This is something that will happen more frequently on low-powered CI machines.
+    # See test_active_memory_manager.py for tests that robustly verify the statistical
+    # distribution of tasks after worker retirement.
 
 
 @gen_cluster(
@@ -4500,7 +4507,7 @@ async def test_normalize_collection_dask
 def test_normalize_collection_with_released_futures(c):
     da = pytest.importorskip("dask.array")
 
-    x = da.arange(2 ** 20, chunks=2 ** 10)
+    x = da.arange(2**20, chunks=2**10)
     y = x.persist()
     wait(y)
     sol = y.sum().compute()
@@ -4635,13 +4642,14 @@ async def test_client_timeout():
     """`await Client(...)` keeps retrying for 10 seconds if it can't find the Scheduler
     straight away
     """
-    c = Client("127.0.0.1:57484", asynchronous=True)
-    client_start_fut = asyncio.ensure_future(c)
-    await asyncio.sleep(4)
-    async with Scheduler(port=57484, dashboard_address=":0"):
-        await client_start_fut
-        assert await c.run_on_scheduler(lambda: 123) == 123
-        await c.close()
+    with dask.config.set({"distributed.comm.timeouts.connect": "10s"}):
+        c = Client("127.0.0.1:57484", asynchronous=True)
+        client_start_fut = asyncio.ensure_future(c)
+        await asyncio.sleep(2)
+        async with Scheduler(port=57484, dashboard_address=":0"):
+            await client_start_fut
+            assert await c.run_on_scheduler(lambda: 123) == 123
+            await c.close()
 
 
 @gen_cluster(client=True)
@@ -5146,7 +5154,7 @@ def test_get_client_no_cluster():
     Worker._instances.clear()
 
     msg = "No global client found and no address provided"
-    with pytest.raises(ValueError, match=fr"^{msg}$"):
+    with pytest.raises(ValueError, match=rf"^{msg}$"):
         get_client()
 
 
@@ -5747,43 +5755,37 @@ async def test_client_active_bad_port():
 
 
 @pytest.mark.parametrize("direct", [True, False])
-def test_turn_off_pickle(direct):
-    @gen_cluster()
-    async def test(s, a, b):
-        np = pytest.importorskip("numpy")
+@gen_cluster(client=True, client_kwargs={"serializers": ["dask", "msgpack"]})
+async def test_turn_off_pickle(c, s, a, b, direct):
+    np = pytest.importorskip("numpy")
 
-        async with Client(
-            s.address, asynchronous=True, serializers=["dask", "msgpack"]
-        ) as c:
-            assert (await c.submit(inc, 1)) == 2
-            await c.submit(np.ones, 5)
-            await c.scatter(1)
-
-            # Can't send complex data
-            with pytest.raises(TypeError):
-                future = await c.scatter(inc)
-
-            # can send complex tasks (this uses pickle regardless)
-            future = c.submit(lambda x: x, inc)
-            await wait(future)
-
-            # but can't receive complex results
-            with pytest.raises(TypeError):
-                await c.gather(future, direct=direct)
-
-            # Run works
-            result = await c.run(lambda: 1)
-            assert list(result.values()) == [1, 1]
-            result = await c.run_on_scheduler(lambda: 1)
-            assert result == 1
-
-            # But not with complex return values
-            with pytest.raises(TypeError):
-                await c.run(lambda: inc)
-            with pytest.raises(TypeError):
-                await c.run_on_scheduler(lambda: inc)
+    assert (await c.submit(inc, 1)) == 2
+    await c.submit(np.ones, 5)
+    await c.scatter(1)
+
+    # Can't send complex data
+    with pytest.raises(TypeError):
+        await c.scatter(inc)
 
-    test()
+    # can send complex tasks (this uses pickle regardless)
+    future = c.submit(lambda x: x, inc)
+    await wait(future)
+
+    # but can't receive complex results
+    with pytest.raises(TypeError):
+        await c.gather(future, direct=direct)
+
+    # Run works
+    result = await c.run(lambda: 1)
+    assert list(result.values()) == [1, 1]
+    result = await c.run_on_scheduler(lambda: 1)
+    assert result == 1
+
+    # But not with complex return values
+    with pytest.raises(TypeError):
+        await c.run(lambda: inc)
+    with pytest.raises(TypeError):
+        await c.run_on_scheduler(lambda: inc)
 
 
 @gen_cluster()
@@ -5872,7 +5874,11 @@ async def test_scatter_error_cancel(c, s
 @pytest.mark.parametrize("workers_arg", [False, True])
 @pytest.mark.parametrize("direct", [False, True])
 @pytest.mark.parametrize("broadcast", [False, True, 10])
-@gen_cluster(client=True, nthreads=[("", 1)] * 10)
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 10,
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_scatter_and_replicate_avoid_paused_workers(
     c, s, *workers, workers_arg, direct, broadcast
 ):
@@ -6434,7 +6440,7 @@ async def test_performance_report(c, s,
     assert "Dask Performance Report" in data
     assert "x = da.random" in data
     assert "Threads: 4" in data
-    assert "distributed.scheduler - INFO - Clear task state" in data
+    assert "No logs to report" in data
     assert dask.__version__ in data
 
     # stacklevel=2 captures code two frames back -- which in this case
@@ -6566,6 +6572,16 @@ async def test_get_task_metadata_multipl
 
 
 @gen_cluster(client=True)
+async def test_register_worker_plugin_exception(c, s, a, b):
+    class MyPlugin:
+        def setup(self, worker=None):
+            raise ValueError("Setup failed")
+
+    with pytest.raises(ValueError, match="Setup failed"):
+        await c.register_worker_plugin(MyPlugin())
+
+
+@gen_cluster(client=True)
 async def test_log_event(c, s, a, b):
 
     # Log an event from inside a task
@@ -6610,21 +6626,19 @@ async def test_annotations_task_state(c,
 
 
 @pytest.mark.parametrize("fn", ["compute", "persist"])
-def test_annotations_compute_time(fn):
+@gen_cluster(client=True)
+async def test_annotations_compute_time(c, s, a, b, fn):
     da = pytest.importorskip("dask.array")
+    x = da.ones(10, chunks=(5,))
 
-    @gen_cluster(client=True)
-    async def test(c, s, a, b):
-        x = da.ones(10, chunks=(5,))
-
-        with dask.annotate(foo="bar"):
-            # Turn off optimization to avoid rewriting layers and picking up annotations
-            # that way. Instead, we want `compute`/`persist` to be able to pick them up.
-            x = await getattr(c, fn)(x, optimize_graph=False)
-
-        assert all({"foo": "bar"} == ts.annotations for ts in s.tasks.values())
-
-    test()
+    with dask.annotate(foo="bar"):
+        # Turn off optimization to avoid rewriting layers and picking up annotations
+        # that way. Instead, we want `compute`/`persist` to be able to pick them up.
+        fut = getattr(c, fn)(x, optimize_graph=False)
+
+    await wait(fut)
+    assert s.tasks
+    assert all(ts.annotations == {"foo": "bar"} for ts in s.tasks.values())
 
 
 @pytest.mark.xfail(reason="https://github.com/dask/dask/issues/7036")
@@ -7139,6 +7153,28 @@ async def test_events_subscribe_topic(c,
 
 
 @gen_cluster(client=True, nthreads=[("", 1)])
+async def test_events_subscribe_topic_cancelled(c, s, a):
+    event_handler_started = asyncio.Event()
+    exc_info = None
+
+    async def user_event_handler(event):
+        nonlocal exc_info
+        c.unsubscribe_topic("test-topic")
+        event_handler_started.set()
+        with pytest.raises(asyncio.CancelledError) as exc_info:
+            await asyncio.sleep(0.5)
+
+    c.subscribe_topic("test-topic", user_event_handler)
+    while not s.event_subscriber["test-topic"]:
+        await asyncio.sleep(0.01)
+
+    a.log_event("test-topic", {})
+    await event_handler_started.wait()
+    await c._close(fast=True)
+    assert exc_info is not None
+
+
+@gen_cluster(client=True, nthreads=[("", 1)])
 async def test_events_all_servers_use_same_channel(c, s, a):
     """Ensure that logs from all server types (scheduler, worker, nanny)
     and the clients themselves arrive"""
@@ -7309,3 +7345,102 @@ async def test_dump_cluster_state_error(
     )
     assert isinstance(state["workers"][b.address], dict)
     assert state["versions"]["workers"].keys() == {b.address}
+
+
+class TestClientSecurityLoader:
+    @contextmanager
+    def config_loader(self, monkeypatch, loader):
+        module_name = "totally_fake_module_name_1"
+        module = types.ModuleType(module_name)
+        module.loader = loader
+        with monkeypatch.context() as m:
+            m.setitem(sys.modules, module_name, module)
+            with dask.config.set(
+                {"distributed.client.security-loader": f"{module_name}.loader"}
+            ):
+                yield
+
+    @pytest.mark.asyncio
+    async def test_security_loader(self, monkeypatch):
+        security = tls_only_security()
+
+        async with Scheduler(
+            dashboard_address=":0", protocol="tls", security=security
+        ) as scheduler:
+
+            def loader(info):
+                assert info == {"address": scheduler.address}
+                return security
+
+            with self.config_loader(monkeypatch, loader):
+                async with Client(scheduler.address, asynchronous=True) as client:
+                    assert client.security is security
+
+    @pytest.mark.asyncio
+    async def test_security_loader_ignored_if_explicit_security_provided(
+        self, monkeypatch
+    ):
+        security = tls_only_security()
+
+        def loader(info):
+            assert False
+
+        async with Scheduler(
+            dashboard_address=":0", protocol="tls", security=security
+        ) as scheduler:
+            with self.config_loader(monkeypatch, loader):
+                async with Client(
+                    scheduler.address, security=security, asynchronous=True
+                ) as client:
+                    assert client.security is security
+
+    @pytest.mark.asyncio
+    async def test_security_loader_ignored_if_returns_none(self, monkeypatch):
+        """Test that if a security loader is configured, but it returns `None`,
+        then the default security configuration is used"""
+        ca_file = get_cert("tls-ca-cert.pem")
+        keycert = get_cert("tls-key-cert.pem")
+
+        config = {
+            "distributed.comm.require-encryption": True,
+            "distributed.comm.tls.ca-file": ca_file,
+            "distributed.comm.tls.client.cert": keycert,
+            "distributed.comm.tls.scheduler.cert": keycert,
+            "distributed.comm.tls.worker.cert": keycert,
+        }
+
+        def loader(info):
+            loader.called = True
+            return None
+
+        with dask.config.set(config):
+            async with Scheduler(dashboard_address=":0", protocol="tls") as scheduler:
+                # Smoketest to make sure config was picked up (so we're actually testing something)
+                assert scheduler.security.tls_client_cert
+                assert scheduler.security.tls_scheduler_cert
+                with self.config_loader(monkeypatch, loader):
+                    async with Client(scheduler.address, asynchronous=True) as client:
+                        assert (
+                            client.security.tls_client_cert
+                            == scheduler.security.tls_client_cert
+                        )
+
+        assert loader.called
+
+    @pytest.mark.asyncio
+    async def test_security_loader_import_failed(self):
+        security = tls_only_security()
+
+        with dask.config.set(
+            {"distributed.client.security-loader": "totally_fake_module_name_2.loader"}
+        ):
+            with pytest.raises(ImportError, match="totally_fake_module_name_2.loader"):
+                async with Client("tls://bad-address:8888", asynchronous=True):
+                    pass
+
+
+@gen_cluster(client=True, nthreads=[])
+async def test_wait_for_workers_updates_info(c, s):
+    async with Worker(s.address):
+        await c.wait_for_workers(1)
+        assert c.scheduler_info()["workers"]
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_collections.py 2022.02.0+ds.1-1/distributed/tests/test_collections.py
--- 2022.01.0+ds.1-1/distributed/tests/test_collections.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_collections.py	2022-02-11 16:21:14.000000000 +0000
@@ -168,7 +168,7 @@ def test_dataframe_groupby_tasks(client)
 
     for ind in [lambda x: "A", lambda x: x.A]:
         a = df.groupby(ind(df)).apply(len)
-        b = ddf.groupby(ind(ddf)).apply(len, meta=int)
+        b = ddf.groupby(ind(ddf)).apply(len, meta=(None, int))
         assert_equal(a, b.compute(scheduler="sync").sort_index())
         assert not any("partd" in k[0] for k in b.dask)
 
@@ -181,7 +181,7 @@ def test_dataframe_groupby_tasks(client)
         ddf.groupby(ddf[["A", "B"]]).apply(len, meta=int)
 
     a = df.groupby(["A", "B"]).apply(len)
-    b = ddf.groupby(["A", "B"]).apply(len, meta=int)
+    b = ddf.groupby(["A", "B"]).apply(len, meta=(None, int))
 
     assert_equal(a, b.compute(scheduler="sync").sort_index())
 
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_diskutils.py 2022.02.0+ds.1-1/distributed/tests/test_diskutils.py
--- 2022.01.0+ds.1-1/distributed/tests/test_diskutils.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_diskutils.py	2022-02-11 16:21:14.000000000 +0000
@@ -12,6 +12,7 @@ import pytest
 
 import dask
 
+from distributed.compatibility import WINDOWS
 from distributed.diskutils import WorkSpace
 from distributed.metrics import time
 from distributed.utils import mp_context
@@ -214,8 +215,7 @@ def _workspace_concurrency(base_dir, pur
 
 
 @pytest.mark.slow
-@pytest.mark.parametrize("timeout,max_procs", [(5.0, 6), (10.0, 16)])
-def test_workspace_concurrency(tmpdir, timeout, max_procs):
+def test_workspace_concurrency(tmpdir):
     """WorkSpace concurrency test. We merely check that no exception or
     deadlock happens.
     """
@@ -228,6 +228,10 @@ def test_workspace_concurrency(tmpdir, t
     # Make sure purging only happens in the child processes
     ws._purge_leftovers = lambda: None
 
+    # Windows (or at least Windows GitHub CI) has been observed to be exceptionally
+    # slow. Don't stress it too much.
+    max_procs = 2 if WINDOWS else 16
+
     # Run a bunch of child processes that will try to purge concurrently
     barrier = mp_context.Barrier(parties=max_procs + 1)
     processes = [
@@ -244,14 +248,16 @@ def test_workspace_concurrency(tmpdir, t
     n_purged = 0
     t1 = time()
     try:
-        while time() - t1 < timeout:
-            # Add a bunch of locks, and simulate forgetting them.
+        # On Linux, you will typically end with n_created > 10.000
+        # On Windows, it can take 60 seconds to create 50 locks!
+        while time() - t1 < 10:
+            # Add a bunch of locks and simulate forgetting them.
             # The concurrent processes should try to purge them.
-            for i in range(50):
+            for _ in range(100):
                 d = ws.new_work_dir(prefix="workspace-concurrency-")
                 d._finalizer.detach()
                 n_created += 1
-            sleep(0.01)
+
     finally:
         stop_evt.set()
         for p in processes:
@@ -270,6 +276,6 @@ def test_workspace_concurrency(tmpdir, t
             n_purged += purged_q.get_nowait()
     except queue.Empty:
         pass
-    assert n_created >= 100
+
     # We attempted to purge most directories at some point
     assert n_purged >= 0.5 * n_created > 0
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_failed_workers.py 2022.02.0+ds.1-1/distributed/tests/test_failed_workers.py
--- 2022.01.0+ds.1-1/distributed/tests/test_failed_workers.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_failed_workers.py	2022-02-11 16:21:14.000000000 +0000
@@ -29,6 +29,7 @@ from distributed.utils_test import (
 pytestmark = pytest.mark.ci1
 
 
+@pytest.mark.slow()
 def test_submit_after_failed_worker_sync(loop):
     with cluster() as (s, [a, b]):
         with Client(s["address"], loop=loop) as c:
@@ -39,6 +40,7 @@ def test_submit_after_failed_worker_sync
             assert total.result() == sum(map(inc, range(10)))
 
 
+@pytest.mark.slow()
 @gen_cluster(client=True, timeout=60, active_rpc_timeout=10)
 async def test_submit_after_failed_worker_async(c, s, a, b):
     n = await Nanny(s.address, nthreads=2, loop=s.loop)
@@ -100,43 +102,6 @@ async def test_gather_then_submit_after_
 
 
 @pytest.mark.xfail(COMPILED, reason="Fails with cythonized scheduler")
-@gen_cluster(Worker=Nanny, client=True, timeout=60)
-async def test_failed_worker_without_warning(c, s, a, b):
-    L = c.map(inc, range(10))
-    await wait(L)
-
-    original_pid = a.pid
-    with suppress(CommClosedError):
-        await c._run(os._exit, 1, workers=[a.worker_address])
-    start = time()
-    while a.pid == original_pid:
-        await asyncio.sleep(0.01)
-        assert time() - start < 10
-
-    await asyncio.sleep(0.5)
-
-    start = time()
-    while len(s.nthreads) < 2:
-        await asyncio.sleep(0.01)
-        assert time() - start < 10
-
-    await wait(L)
-
-    L2 = c.map(inc, range(10, 20))
-    await wait(L2)
-    assert all(len(keys) > 0 for keys in s.has_what.values())
-    nthreads2 = dict(s.nthreads)
-
-    await c.restart()
-
-    L = c.map(inc, range(10))
-    await wait(L)
-    assert all(len(keys) > 0 for keys in s.has_what.values())
-
-    assert not (set(nthreads2) & set(s.nthreads))  # no overlap
-
-
-@pytest.mark.xfail(COMPILED, reason="Fails with cythonized scheduler")
 @gen_cluster(Worker=Nanny, client=True, timeout=60)
 async def test_restart(c, s, a, b):
     assert s.nthreads == {a.worker_address: 1, b.worker_address: 2}
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_nanny.py 2022.02.0+ds.1-1/distributed/tests/test_nanny.py
--- 2022.01.0+ds.1-1/distributed/tests/test_nanny.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_nanny.py	2022-02-11 16:21:14.000000000 +0000
@@ -4,6 +4,7 @@ import logging
 import multiprocessing as mp
 import os
 import random
+import sys
 from contextlib import suppress
 from time import sleep
 from unittest import mock
@@ -187,8 +188,8 @@ async def test_nanny_death_timeout(s):
 @gen_cluster(client=True, Worker=Nanny)
 async def test_random_seed(c, s, a, b):
     async def check_func(func):
-        x = c.submit(func, 0, 2 ** 31, pure=False, workers=a.worker_address)
-        y = c.submit(func, 0, 2 ** 31, pure=False, workers=b.worker_address)
+        x = c.submit(func, 0, 2**31, pure=False, workers=a.worker_address)
+        y = c.submit(func, 0, 2**31, pure=False, workers=b.worker_address)
         assert x.key != y.key
         x = await x
         y = await y
@@ -610,3 +611,31 @@ async def test_environ_plugin(c, s, a, b
         assert results[a.worker_address] == "123"
         assert results[b.worker_address] == "123"
         assert results[n.worker_address] == "123"
+
+
+@pytest.mark.parametrize(
+    "modname",
+    [
+        pytest.param(
+            "numpy",
+            marks=pytest.mark.xfail(reason="distributed#5723, distributed#5729"),
+        ),
+        "scipy",
+        pytest.param("pandas", marks=pytest.mark.xfail(reason="distributed#5723")),
+    ],
+)
+@gen_cluster(client=True, Worker=Nanny, nthreads=[("", 1)])
+async def test_no_unnecessary_imports_on_worker(c, s, a, modname):
+    """
+    Regression test against accidentally importing unnecessary modules at worker startup.
+
+    Importing modules like pandas slows down worker startup, especially if workers are
+    loading their software environment from NFS or other non-local filesystems.
+    It also slightly increases memory footprint.
+    """
+
+    def assert_no_import(dask_worker):
+        assert modname not in sys.modules
+
+    await c.wait_for_workers(1)
+    await c.run(assert_no_import)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_preload.py 2022.02.0+ds.1-1/distributed/tests/test_preload.py
--- 2022.01.0+ds.1-1/distributed/tests/test_preload.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_preload.py	2022-02-11 16:21:14.000000000 +0000
@@ -3,9 +3,9 @@ import os
 import shutil
 import sys
 import tempfile
-import time
 import urllib.error
 import urllib.request
+from time import sleep
 
 import pytest
 import tornado
@@ -15,6 +15,7 @@ import dask
 
 from distributed import Client, Nanny, Scheduler, Worker
 from distributed.compatibility import MACOS
+from distributed.metrics import time
 from distributed.utils_test import captured_logger, cluster, gen_cluster, gen_test
 
 PY_VERSION = sys.version_info[:2]
@@ -179,13 +180,13 @@ def create_preload_application():
 def scheduler_preload():
     p = multiprocessing.Process(target=create_preload_application)
     p.start()
-    start = time.time()
+    start = time()
     while not p.is_alive():
-        if time.time() > start + 5:
+        if time() > start + 5:
             raise AssertionError("Process didn't come up")
-        time.sleep(0.5)
+        sleep(0.5)
     # Make sure we can query the server
-    start = time.time()
+    start = time()
     request = urllib.request.Request("http://127.0.0.1:12345/preload", method="GET")
     while True:
         try:
@@ -193,9 +194,9 @@ def scheduler_preload():
             if response.status == 200:
                 break
         except urllib.error.URLError as e:
-            if time.time() > start + 10:
+            if time() > start + 10:
                 raise AssertionError("Webserver didn't come up", e)
-            time.sleep(0.5)
+            sleep(0.5)
 
     yield
     p.kill()
@@ -256,23 +257,23 @@ def create_worker_preload_application():
 def worker_preload():
     p = multiprocessing.Process(target=create_worker_preload_application)
     p.start()
-    start = time.time()
+    start = time()
     while not p.is_alive():
-        if time.time() > start + 5:
+        if time() > start + 5:
             raise AssertionError("Process didn't come up")
-        time.sleep(0.5)
+        sleep(0.5)
     # Make sure we can query the server
     request = urllib.request.Request("http://127.0.0.1:12346/preload", method="GET")
-    start = time.time()
+    start = time()
     while True:
         try:
             response = urllib.request.urlopen(request)
             if response.status == 200:
                 break
         except urllib.error.URLError as e:
-            if time.time() > start + 10:
+            if time() > start + 10:
                 raise AssertionError("Webserver didn't come up", e)
-            time.sleep(0.5)
+            sleep(0.5)
 
     yield
     p.kill()
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_profile.py 2022.02.0+ds.1-1/distributed/tests/test_profile.py
--- 2022.01.0+ds.1-1/distributed/tests/test_profile.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_profile.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,12 +1,12 @@
 import sys
 import threading
-import time
+from time import sleep
 
 import pytest
 from tlz import first
 
-from distributed import metrics
 from distributed.compatibility import WINDOWS
+from distributed.metrics import time
 from distributed.profile import (
     call_stack,
     create,
@@ -22,10 +22,10 @@ from distributed.profile import (
 
 def test_basic():
     def test_g():
-        time.sleep(0.01)
+        sleep(0.01)
 
     def test_h():
-        time.sleep(0.02)
+        sleep(0.02)
 
     def test_f():
         for i in range(100):
@@ -39,7 +39,7 @@ def test_basic():
     state = create()
 
     for i in range(100):
-        time.sleep(0.02)
+        sleep(0.02)
         frame = sys._current_frames()[thread.ident]
         process(frame, None, state)
 
@@ -70,7 +70,7 @@ def test_basic_low_level():
     state = create()
 
     for i in range(100):
-        time.sleep(0.02)
+        sleep(0.02)
         frame = sys._current_frames()[threading.get_ident()]
         llframes = {threading.get_ident(): ll_get_stack(threading.get_ident())}
         for f in llframes.values():
@@ -179,24 +179,24 @@ def test_identifier():
 
 
 def test_watch():
-    start = metrics.time()
+    start = time()
 
     def stop():
-        return metrics.time() > start + 0.500
+        return time() > start + 0.500
 
     start_threads = threading.active_count()
 
     log = watch(interval="10ms", cycle="50ms", stop=stop)
 
-    start = metrics.time()  # wait until thread starts up
+    start = time()  # wait until thread starts up
     while threading.active_count() <= start_threads:
-        assert metrics.time() < start + 2
-        time.sleep(0.01)
+        assert time() < start + 2
+        sleep(0.01)
 
-    time.sleep(0.5)
+    sleep(0.5)
     assert 1 < len(log) < 10
 
-    start = metrics.time()
+    start = time()
     while threading.active_count() > start_threads:
-        assert metrics.time() < start + 2
-        time.sleep(0.01)
+        assert time() < start + 2
+        sleep(0.01)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_scheduler.py 2022.02.0+ds.1-1/distributed/tests/test_scheduler.py
--- 2022.01.0+ds.1-1/distributed/tests/test_scheduler.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_scheduler.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,5 +1,4 @@
 import asyncio
-import gc
 import json
 import logging
 import operator
@@ -140,14 +139,16 @@ def test_decide_worker_coschedule_order_
         nthreads=nthreads,
         config={"distributed.scheduler.work-stealing": False},
     )
-    async def test(c, s, *workers):
+    async def test_decide_worker_coschedule_order_neighbors_(c, s, *workers):
         r"""
-        Ensure that sibling root tasks are scheduled to the same node, reducing future data transfer.
+        Ensure that sibling root tasks are scheduled to the same node, reducing future
+        data transfer.
 
-        We generate a wide layer of "root" tasks (random NumPy arrays). All of those tasks share 0-5
-        trivial dependencies. The ``ndeps=0`` and ``ndeps=1`` cases are most common in real-world use
-        (``ndeps=1`` is basically ``da.from_array(..., inline_array=False)`` or ``da.from_zarr``).
-        The graph is structured like this (though the number of tasks and workers is different):
+        We generate a wide layer of "root" tasks (random NumPy arrays). All of those
+        tasks share 0-5 trivial dependencies. The ``ndeps=0`` and ``ndeps=1`` cases are
+        most common in real-world use (``ndeps=1`` is basically ``da.from_array(...,
+        inline_array=False)`` or ``da.from_zarr``). The graph is structured like this
+        (though the number of tasks and workers is different):
 
             |-W1-|  |-W2-| |-W3-|  |-W4-|   < ---- ideal task scheduling
 
@@ -159,9 +160,9 @@ def test_decide_worker_coschedule_order_
             \   \   \   |   |   /   /   /
                    TRIVIAL * 0..5
 
-        Neighboring `random-` tasks should be scheduled on the same worker. We test that generally,
-        only one worker holds each row of the array, that the `random-` tasks are never transferred,
-        and that there are few transfers overall.
+        Neighboring `random-` tasks should be scheduled on the same worker. We test that
+        generally, only one worker holds each row of the array, that the `random-` tasks
+        are never transferred, and that there are few transfers overall.
         """
         da = pytest.importorskip("dask.array")
         np = pytest.importorskip("numpy")
@@ -222,16 +223,18 @@ def test_decide_worker_coschedule_order_
                 keys = log["keys"]
                 # The root-ish tasks should never be transferred
                 assert not any(k.startswith("random") for k in keys), keys
-                # `object-` keys (the trivial deps of the root random tasks) should be transferred
+                # `object-` keys (the trivial deps of the root random tasks) should be
+                # transferred
                 if any(not k.startswith("object") for k in keys):
                     # But not many other things should be
                     unexpected_transfers.append(list(keys))
 
-        # A transfer at the very end to move aggregated results is fine (necessary with unbalanced workers in fact),
-        # but generally there should be very very few transfers.
+        # A transfer at the very end to move aggregated results is fine (necessary with
+        # unbalanced workers in fact), but generally there should be very very few
+        # transfers.
         assert len(unexpected_transfers) <= 3, unexpected_transfers
 
-    test()
+    test_decide_worker_coschedule_order_neighbors_()
 
 
 @gen_cluster(client=True, nthreads=[("127.0.0.1", 1)] * 3)
@@ -2353,7 +2356,7 @@ async def test_unknown_task_duration_con
 @gen_cluster(client=True)
 async def test_retire_state_change(c, s, a, b):
     np = pytest.importorskip("numpy")
-    y = c.map(lambda x: x ** 2, range(10))
+    y = c.map(lambda x: x**2, range(10))
     await c.scatter(y)
     coros = []
     for x in range(2):
@@ -2454,134 +2457,176 @@ def test_memorystate_adds_up(process, un
     assert m.optimistic + m.unmanaged_recent == m.process
 
 
+_test_leak = []
+
+
 def leaking(out_mib, leak_mib, sleep_time):
-    if leak_mib:
-        global __test_leak
-        __test_leak = "x" * (leak_mib * 2 ** 20)
-    out = "x" * (out_mib * 2 ** 20)
+    out = "x" * (out_mib * 2**20)
+    _test_leak.append("x" * (leak_mib * 2**20))
     sleep(sleep_time)
     return out
 
 
 def clear_leak():
-    global __test_leak
-    del __test_leak
-    gc.collect()
+    _test_leak.clear()
 
 
-async def assert_memory(scheduler_or_workerstate, attr: str, min_, max_, timeout=10):
+async def assert_memory(
+    scheduler_or_workerstate,
+    attr: str,
+    min_mib: float,
+    max_mib: float,
+    *,
+    timeout: float = 10,
+) -> None:
     t0 = time()
     while True:
         minfo = scheduler_or_workerstate.memory
-        nmib = getattr(minfo, attr) / 2 ** 20
-        if min_ <= nmib <= max_:
+        nmib = getattr(minfo, attr) / 2**20
+        if min_mib <= nmib <= max_mib:
             return
         if time() - t0 > timeout:
-            raise TimeoutError(
-                f"Expected {min_} MiB <= {attr} <= {max_} MiB; got:\n{minfo!r}"
+            raise AssertionError(
+                f"Expected {min_mib} MiB <= {attr} <= {max_mib} MiB; got:\n{minfo!r}"
             )
-        await asyncio.sleep(0.1)
+        await asyncio.sleep(0.01)
 
 
-# ~31s runtime, or distributed.worker.memory.recent-to-old-time + 1s.
-# On Windows, it can take ~65s due to worker memory needing to stabilize first.
 @pytest.mark.slow
-@pytest.mark.flaky(condition=LINUX, reason="see comments", reruns=10, reruns_delay=5)
 @gen_cluster(
-    client=True, Worker=Nanny, worker_kwargs={"memory_limit": "500 MiB"}, timeout=120
+    client=True,
+    Worker=Nanny,
+    config={
+        "distributed.worker.memory.recent-to-old-time": "4s",
+        "distributed.worker.memory.spill": 0.7,
+    },
+    worker_kwargs={
+        "heartbeat_interval": "20ms",
+        "memory_limit": "700 MiB",
+    },
 )
-async def test_memory(c, s, *_):
+async def test_memory(c, s, *nannies):
     # WorkerState objects, as opposed to the Nanny objects passed by gen_cluster
     a, b = s.workers.values()
 
+    def print_memory_info(msg: str) -> None:
+        print(f"==== {msg} ====")
+        print(f"---- a ----\n{a.memory}")
+        print(f"---- b ----\n{b.memory}")
+        print(f"---- s ----\n{s.memory}")
+
     s_m0 = s.memory
     assert s_m0.process == a.memory.process + b.memory.process
     assert s_m0.managed == 0
     assert a.memory.managed == 0
     assert b.memory.managed == 0
 
-    # When a worker first goes online, its RAM is immediately counted as unmanaged_old.
-    # On Windows, however, there is somehow enough time between the worker start and
-    # this line for 2 heartbeats and the memory keeps growing substantially for a while.
-    # Sometimes there is a single heartbeat but on the consecutive test we observe
-    # a large unexplained increase in unmanaged_recent memory.
-    # Wait for the situation to stabilize.
-    if WINDOWS:
-        await asyncio.sleep(10)
-        initial_timeout = 40
-    else:
-        initial_timeout = 0
+    # Trigger potential imports inside WorkerPlugin.transition
+    await c.submit(inc, 0, workers=[a.address])
+    await c.submit(inc, 1, workers=[b.address])
+    # Wait for the memory readings to stabilize after workers go online
+    await asyncio.sleep(2)
+    await asyncio.gather(
+        assert_memory(a, "unmanaged_recent", 0, 5, timeout=10),
+        assert_memory(b, "unmanaged_recent", 0, 5, timeout=10),
+        assert_memory(s, "unmanaged_recent", 0, 10, timeout=10.1),
+    )
+
+    print()
+    print_memory_info("Starting memory")
+
+    # 50 MiB heap + 100 MiB leak
+    # Note that runtime=2s is less than recent-to-old-time=4s
+    f1 = c.submit(leaking, 50, 100, 2, key="f1", workers=[a.name])
+    f2 = c.submit(leaking, 50, 100, 2, key="f2", workers=[b.name])
 
-    await assert_memory(s, "unmanaged_recent", 0, 40, timeout=initial_timeout)
-    await assert_memory(a, "unmanaged_recent", 0, 20, timeout=initial_timeout)
-    await assert_memory(b, "unmanaged_recent", 0, 20, timeout=initial_timeout)
-
-    f1 = c.submit(leaking, 100, 50, 10, pure=False, workers=[a.name])
-    f2 = c.submit(leaking, 100, 50, 10, pure=False, workers=[b.name])
-    await assert_memory(s, "unmanaged_recent", 300, 380)
-    await assert_memory(a, "unmanaged_recent", 150, 190)
-    await assert_memory(b, "unmanaged_recent", 150, 190)
+    await asyncio.gather(
+        assert_memory(a, "unmanaged_recent", 150, 170, timeout=1.8),
+        assert_memory(b, "unmanaged_recent", 150, 170, timeout=1.8),
+        assert_memory(s, "unmanaged_recent", 300, 340, timeout=1.9),
+    )
     await wait([f1, f2])
 
-    # On each worker, we now have 100 MiB managed + 50 MiB fresh leak
-    await assert_memory(s, "managed_in_memory", 200, 201)
-    await assert_memory(a, "managed_in_memory", 100, 101)
-    await assert_memory(b, "managed_in_memory", 100, 101)
-    await assert_memory(s, "unmanaged_recent", 100, 180)
-    await assert_memory(a, "unmanaged_recent", 50, 90)
-    await assert_memory(b, "unmanaged_recent", 50, 90)
-
-    # Force the output of f1 and f2 to spill to disk.
-    # With spill=0.7 and memory_limit=500 MiB, we'll start spilling at 350 MiB process
-    # memory per worker, or up to 20 iterations of the below depending on how much RAM
-    # the interpreter is using.
-    more_futs = []
-    while not s.memory.managed_spilled:
-        if a.memory.process < 0.7 * 500 * 2 ** 20:
-            more_futs.append(c.submit(leaking, 10, 0, 0, pure=False, workers=[a.name]))
-        if b.memory.process < 0.7 * 500 * 2 ** 20:
-            more_futs.append(c.submit(leaking, 10, 0, 0, pure=False, workers=[b.name]))
-        await wait(more_futs)
-        await asyncio.sleep(1)
+    # On each worker, we now have 50 MiB managed + 100 MiB fresh leak
+    await asyncio.gather(
+        assert_memory(a, "managed_in_memory", 50, 51, timeout=0),
+        assert_memory(b, "managed_in_memory", 50, 51, timeout=0),
+        assert_memory(s, "managed_in_memory", 100, 101, timeout=0),
+        assert_memory(a, "unmanaged_recent", 100, 120, timeout=0),
+        assert_memory(b, "unmanaged_recent", 100, 120, timeout=0),
+        assert_memory(s, "unmanaged_recent", 200, 240, timeout=0),
+    )
+
+    # Force the output of f1 and f2 to spill to disk
+    print_memory_info("Before spill")
+    a_leak = round(700 * 0.7 - a.memory.process / 2**20)
+    b_leak = round(700 * 0.7 - b.memory.process / 2**20)
+    assert a_leak > 50 and b_leak > 50
+    a_leak += 10
+    b_leak += 10
+    print(f"Leaking additional memory: a_leak={a_leak}; b_leak={b_leak}")
+    await wait(
+        [
+            c.submit(leaking, 0, a_leak, 0, pure=False, workers=[a.name]),
+            c.submit(leaking, 0, b_leak, 0, pure=False, workers=[b.name]),
+        ]
+    )
+
+    # Timeout needs to be enough to spill 100 MiB to disk
+    await asyncio.gather(
+        assert_memory(a, "managed_spilled", 50, 51, timeout=10),
+        assert_memory(b, "managed_spilled", 50, 51, timeout=10),
+        assert_memory(s, "managed_spilled", 100, 101, timeout=10.1),
+    )
+    # FIXME on Windows and MacOS we occasionally observe managed_in_memory = 49 bytes
+    await asyncio.gather(
+        assert_memory(a, "managed_in_memory", 0, 0.1, timeout=0),
+        assert_memory(b, "managed_in_memory", 0, 0.1, timeout=0),
+        assert_memory(s, "managed_in_memory", 0, 0.1, timeout=0),
+    )
 
-    # Wait for the spilling to finish. Note that this does not make the test take
-    # longer as we're waiting for recent-to-old-time anyway.
-    await asyncio.sleep(10)
+    print_memory_info("After spill")
 
     # Delete spilled keys
-    prev = s.memory
     del f1
     del f2
-    await assert_memory(s, "managed_spilled", 0, prev.managed_spilled / 2 ** 20 - 19)
-
-    # Empty the cluster, with the exception of leaked memory
-    del more_futs
-    await assert_memory(s, "managed", 0, 0)
+    await asyncio.gather(
+        assert_memory(a, "managed_spilled", 0, 0, timeout=3),
+        assert_memory(b, "managed_spilled", 0, 0, timeout=3),
+        assert_memory(s, "managed_spilled", 0, 0, timeout=3.1),
+    )
 
-    orig_unmanaged = s_m0.unmanaged / 2 ** 20
-    orig_old = s_m0.unmanaged_old / 2 ** 20
+    print_memory_info("After clearing spilled keys")
 
-    # Wait until 30s have passed since the spill to observe unmanaged_recent
+    # Wait until 4s have passed since the spill to observe unmanaged_recent
     # transition into unmanaged_old
-    await c.run(gc.collect)
-    await assert_memory(s, "unmanaged_recent", 0, 90, timeout=40)
-    await assert_memory(s, "unmanaged_old", orig_old + 90, 9999, timeout=40)
+    await asyncio.gather(
+        assert_memory(a, "unmanaged_recent", 0, 5, timeout=4.5),
+        assert_memory(b, "unmanaged_recent", 0, 5, timeout=4.5),
+        assert_memory(s, "unmanaged_recent", 0, 10, timeout=4.6),
+    )
 
     # When the leaked memory is cleared, unmanaged and unmanaged_old drop.
     # On MacOS and Windows, the process memory of the Python interpreter does not shrink
     # as fast as on Linux. Note that this behaviour is heavily impacted by OS tweaks,
     # meaning that what you observe on your local host may behave differently on CI.
-    # Even on Linux, this occasionally glitches - hence why there is a flaky marker on
-    # this test.
     if not LINUX:
         return
 
-    orig_unmanaged = s.memory.unmanaged / 2 ** 20
-    orig_old = s.memory.unmanaged_old / 2 ** 20
+    print_memory_info("Before clearing memory leak")
+
+    prev_unmanaged_a = a.memory.unmanaged / 2**20
+    prev_unmanaged_b = b.memory.unmanaged / 2**20
     await c.run(clear_leak)
-    await assert_memory(s, "unmanaged", 0, orig_unmanaged - 60)
-    await assert_memory(s, "unmanaged_old", 0, orig_old - 60)
-    await assert_memory(s, "unmanaged_recent", 0, 90)
+
+    await asyncio.gather(
+        assert_memory(a, "unmanaged", 0, prev_unmanaged_a - 50, timeout=10),
+        assert_memory(b, "unmanaged", 0, prev_unmanaged_b - 50, timeout=10),
+    )
+    await asyncio.gather(
+        assert_memory(a, "unmanaged_recent", 0, 5, timeout=0),
+        assert_memory(b, "unmanaged_recent", 0, 5, timeout=0),
+    )
 
 
 @gen_cluster(client=True, worker_kwargs={"memory_limit": 0})
@@ -2594,7 +2639,7 @@ async def test_memory_no_zict(c, s, a, b
     assert isinstance(b.data, dict)
     f = c.submit(leaking, 10, 0, 0)
     await f
-    assert 10 * 2 ** 20 < s.memory.managed_in_memory < 11 * 2 ** 20
+    assert 10 * 2**20 < s.memory.managed_in_memory < 11 * 2**20
     assert s.memory.managed_spilled == 0
 
 
@@ -2671,52 +2716,66 @@ async def assert_ndata(client, by_addr,
     worker_kwargs={"memory_limit": "1 GiB"},
     config={"distributed.worker.memory.rebalance.sender-min": 0.3},
 )
-async def test_rebalance(c, s, *_):
+async def test_rebalance(c, s, a, b):
     # We used nannies to have separate processes for each worker
-    a, b = s.workers
-
-    # Generate 10 buffers worth 512 MiB total on worker a. This sends its memory
+    # Generate 500 buffers worth 512 MiB total on worker a. This sends its memory
     # utilisation slightly above 50% (after counting unmanaged) which is above the
     # distributed.worker.memory.rebalance.sender-min threshold.
-    futures = c.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
+    futures = c.map(
+        lambda _: "x" * (2**29 // 500), range(500), workers=[a.worker_address]
+    )
     await wait(futures)
     # Wait for heartbeats
     await assert_memory(s, "process", 512, 1024)
-    await assert_ndata(c, {a: 10, b: 0})
+    await assert_ndata(c, {a.worker_address: 500, b.worker_address: 0})
     await s.rebalance()
     # Allow for some uncertainty as the unmanaged memory is not stable
-    await assert_ndata(c, {a: (3, 7), b: (3, 7)}, total=10)
+    await assert_ndata(
+        c, {a.worker_address: (50, 450), b.worker_address: (50, 450)}, total=500
+    )
 
     # rebalance() when there is nothing to do
     await s.rebalance()
-    await assert_ndata(c, {a: (3, 7), b: (3, 7)}, total=10)
+    await assert_ndata(
+        c, {a.worker_address: (50, 450), b.worker_address: (50, 450)}, total=500
+    )
 
 
-@gen_cluster(
-    nthreads=[("127.0.0.1", 1)] * 3,
-    client=True,
-    Worker=Nanny,
-    worker_kwargs={"memory_limit": "1 GiB"},
-)
-async def test_rebalance_workers_and_keys(client, s, *_):
-    a, b, c = s.workers
-    futures = client.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-    await wait(futures)
-    # Wait for heartbeats
-    await assert_memory(s, "process", 512, 1024)
+# Set rebalance() to work predictably on small amounts of managed memory. By default, it
+# uses optimistic memory, which would only be possible to test by allocating very large
+# amounts of managed memory, so that they would hide variations in unmanaged memory.
+REBALANCE_MANAGED_CONFIG = {
+    "distributed.worker.memory.rebalance.measure": "managed",
+    "distributed.worker.memory.rebalance.sender-min": 0,
+    "distributed.worker.memory.rebalance.sender-recipient-gap": 0,
+}
+
+
+@gen_cluster(client=True, config=REBALANCE_MANAGED_CONFIG)
+async def test_rebalance_managed_memory(c, s, a, b):
+    futures = await c.scatter(range(100), workers=[a.address])
+    assert len(a.data) == 100
+    assert len(b.data) == 0
+    await s.rebalance()
+    assert len(a.data) == 50
+    assert len(b.data) == 50
+
+
+@gen_cluster(nthreads=[("", 1)] * 3, client=True, config=REBALANCE_MANAGED_CONFIG)
+async def test_rebalance_workers_and_keys(client, s, a, b, c):
+    futures = await client.scatter(range(100), workers=[a.address])
+    assert (len(a.data), len(b.data), len(c.data)) == (100, 0, 0)
 
     # Passing empty iterables is not the same as omitting the arguments
     await s.rebalance(keys=[])
-    await assert_ndata(client, {a: 10, b: 0, c: 0})
     await s.rebalance(workers=[])
-    await assert_ndata(client, {a: 10, b: 0, c: 0})
-    # Limit operation to workers that have nothing to do
-    await s.rebalance(workers=[b, c])
-    await assert_ndata(client, {a: 10, b: 0, c: 0})
-
-    # Limit rebalancing to two arbitrary keys and two arbitrary workers
-    await s.rebalance(keys=[futures[3].key, futures[7].key], workers=[a, b])
-    await assert_ndata(client, {a: 8, b: 2, c: 0}, total=10)
+    assert (len(a.data), len(b.data), len(c.data)) == (100, 0, 0)
+
+    # Limit rebalancing to two arbitrary keys and two arbitrary workers.
+    await s.rebalance(
+        keys=[futures[3].key, futures[7].key], workers=[a.address, b.address]
+    )
+    assert (len(a.data), len(b.data), len(c.data)) == (98, 2, 0)
 
     with pytest.raises(KeyError):
         await s.rebalance(workers=["notexist"])
@@ -2742,24 +2801,20 @@ async def test_rebalance_missing_data2(c
 
 
 @pytest.mark.parametrize("explicit", [False, True])
-@gen_cluster(client=True, Worker=Nanny, worker_kwargs={"memory_limit": "1 GiB"})
-async def test_rebalance_raises_missing_data3(c, s, *_, explicit):
+@gen_cluster(client=True, config=REBALANCE_MANAGED_CONFIG)
+async def test_rebalance_raises_missing_data3(c, s, a, b, explicit):
     """keys exist when the sync part of rebalance runs, but are gone by the time the
     actual data movement runs.
     There is an error message only if the keys are explicitly listed in the API call.
     """
-    a, _ = s.workers
-    futures = c.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-    await wait(futures)
-    # Wait for heartbeats
-    await assert_memory(s, "process", 512, 1024)
+    futures = await c.scatter(range(100), workers=[a.address])
 
     if explicit:
         keys = [f.key for f in futures]
         del futures
         out = await s.rebalance(keys=keys)
         assert out["status"] == "partial-fail"
-        assert 1 <= len(out["keys"]) <= 10
+        assert 1 <= len(out["keys"]) <= 100
     else:
         del futures
         out = await s.rebalance()
@@ -2773,46 +2828,17 @@ async def test_rebalance_no_workers(s):
 
 @gen_cluster(
     client=True,
-    Worker=Nanny,
-    worker_kwargs={"memory_limit": "1000 MiB"},
-    config={
-        "distributed.worker.memory.rebalance.measure": "managed",
-        "distributed.worker.memory.rebalance.sender-min": 0.3,
-    },
-)
-async def test_rebalance_managed_memory(c, s, *_):
-    a, b = s.workers
-    # Generate 100 buffers worth 400 MiB total on worker a. This sends its memory
-    # utilisation to exactly 40%, ignoring unmanaged, which is above the
-    # distributed.worker.memory.rebalance.sender-min threshold.
-    futures = c.map(lambda _: "x" * (2 ** 22), range(100), workers=[a])
-    await wait(futures)
-    # Even if we're just using managed memory, which is instantaneously accounted for as
-    # soon as the tasks finish, MemoryState.managed is still capped by the process
-    # memory, so we need to wait for the heartbeat.
-    await assert_memory(s, "managed", 400, 401)
-    await assert_ndata(c, {a: 100, b: 0})
-    await s.rebalance()
-    # We can expect an exact, stable result because we are completely bypassing the
-    # unpredictability of unmanaged memory.
-    await assert_ndata(c, {a: 62, b: 38})
-
-
-@gen_cluster(
-    client=True,
     worker_kwargs={"memory_limit": 0},
     config={"distributed.worker.memory.rebalance.measure": "managed"},
 )
 async def test_rebalance_no_limit(c, s, a, b):
-    # See notes in test_rebalance_managed_memory
-    futures = c.map(lambda _: "x", range(100), workers=[a.address])
-    await wait(futures)
-    # No reason to wait for memory here as we're allocating hundreds of bytes, so
-    # there's plenty of unmanaged process memory to pad it out
-    await assert_ndata(c, {a.address: 100, b.address: 0})
+    futures = await c.scatter(range(100), workers=[a.address])
+    assert len(a.data) == 100
+    assert len(b.data) == 0
     await s.rebalance()
     # Disabling memory_limit made us ignore all % thresholds set in the config
-    await assert_ndata(c, {a.address: 50, b.address: 50})
+    assert len(a.data) == 50
+    assert len(b.data) == 50
 
 
 @gen_cluster(
@@ -2825,33 +2851,32 @@ async def test_rebalance_no_limit(c, s,
         "distributed.worker.memory.rebalance.recipient-max": 0.1,
     },
 )
-async def test_rebalance_no_recipients(c, s, *_):
+async def test_rebalance_no_recipients(c, s, a, b):
     """There are sender workers, but no recipient workers"""
-    a, b = s.workers
-    fut_a = c.map(lambda _: "x" * (2 ** 20), range(250), workers=[a])  # 25%
-    fut_b = c.map(lambda _: "x" * (2 ** 20), range(100), workers=[b])  # 10%
+    # Fill 25% of the memory of a and 10% of the memory of b
+    fut_a = c.map(lambda _: "x" * (2**20), range(250), workers=[a.worker_address])
+    fut_b = c.map(lambda _: "x" * (2**20), range(100), workers=[b.worker_address])
     await wait(fut_a + fut_b)
     await assert_memory(s, "managed", 350, 351)
-    await assert_ndata(c, {a: 250, b: 100})
+    await assert_ndata(c, {a.worker_address: 250, b.worker_address: 100})
     await s.rebalance()
-    await assert_ndata(c, {a: 250, b: 100})
+    await assert_ndata(c, {a.worker_address: 250, b.worker_address: 100})
 
 
 @gen_cluster(
-    nthreads=[("127.0.0.1", 1)] * 3,
+    nthreads=[("", 1)] * 3,
     client=True,
     worker_kwargs={"memory_limit": 0},
     config={"distributed.worker.memory.rebalance.measure": "managed"},
 )
 async def test_rebalance_skip_recipient(client, s, a, b, c):
     """A recipient is skipped because it already holds a copy of the key to be sent"""
-    futures = client.map(lambda _: "x", range(10), workers=[a.address])
-    await wait(futures)
+    futures = await client.scatter(range(10), workers=[a.address])
     await client.replicate(futures[0:2], workers=[a.address, b.address])
     await client.replicate(futures[2:4], workers=[a.address, c.address])
-    await assert_ndata(client, {a.address: 10, b.address: 2, c.address: 2})
+    assert (len(a.data), len(b.data), len(c.data)) == (10, 2, 2)
     await client.rebalance(futures[:2])
-    await assert_ndata(client, {a.address: 8, b.address: 2, c.address: 4})
+    assert (len(a.data), len(b.data), len(c.data)) == (8, 2, 4)
 
 
 @gen_cluster(
@@ -2861,12 +2886,12 @@ async def test_rebalance_skip_recipient(
 )
 async def test_rebalance_skip_all_recipients(c, s, a, b):
     """All recipients are skipped because they already hold copies"""
-    futures = c.map(lambda _: "x", range(10), workers=[a.address])
+    futures = await c.scatter(range(10), workers=[a.address])
     await wait(futures)
     await c.replicate([futures[0]])
-    await assert_ndata(c, {a.address: 10, b.address: 1})
+    assert (len(a.data), len(b.data)) == (10, 1)
     await c.rebalance(futures[:2])
-    await assert_ndata(c, {a.address: 9, b.address: 2})
+    assert (len(a.data), len(b.data)) == (9, 2)
 
 
 @gen_cluster(
@@ -2878,9 +2903,9 @@ async def test_rebalance_skip_all_recipi
 async def test_rebalance_sender_below_mean(c, s, *_):
     """A task remains on the sender because moving it would send it below the mean"""
     a, b = s.workers
-    f1 = c.submit(lambda: "x" * (400 * 2 ** 20), workers=[a])
+    f1 = c.submit(lambda: "x" * (400 * 2**20), workers=[a])
     await wait([f1])
-    f2 = c.submit(lambda: "x" * (10 * 2 ** 20), workers=[a])
+    f2 = c.submit(lambda: "x" * (10 * 2**20), workers=[a])
     await wait([f2])
     await assert_memory(s, "managed", 410, 411)
     await assert_ndata(c, {a: 2, b: 0})
@@ -2909,7 +2934,7 @@ async def test_rebalance_least_recently_
     await s.rebalance()
     await assert_ndata(c, {a: 10, b: 0})
 
-    large_future = c.submit(lambda: "x" * (300 * 2 ** 20), workers=[a])
+    large_future = c.submit(lambda: "x" * (300 * 2**20), workers=[a])
     await wait([large_future])
     await assert_memory(s, "managed", 300, 301)
     await assert_ndata(c, {a: 11, b: 0})
@@ -3233,17 +3258,28 @@ async def test_set_restrictions(c, s, a,
     await f
 
 
-@gen_cluster(client=True, nthreads=[("", 1)] * 3)
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 3,
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_avoid_paused_workers(c, s, w1, w2, w3):
     w2.memory_pause_fraction = 1e-15
     while s.workers[w2.address].status != Status.paused:
         await asyncio.sleep(0.01)
     futures = c.map(slowinc, range(8), delay=0.1)
-    while (len(w1.tasks), len(w2.tasks), len(w3.tasks)) != (4, 0, 4):
-        await asyncio.sleep(0.01)
+    await wait(futures)
+    assert w1.data
+    assert not w2.data
+    assert w3.data
+    assert len(w1.data) + len(w3.data) == 8
 
 
-@gen_cluster(client=True, nthreads=[("", 1)])
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)],
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
 async def test_unpause_schedules_unrannable_tasks(c, s, a):
     a.memory_pause_fraction = 1e-15
     while s.workers[a.address].status != Status.paused:
@@ -3346,3 +3382,29 @@ async def test_non_idempotent_plugins(s)
     await s.register_scheduler_plugin(plugin=dumps(second), idempotent=False)
     assert "nonidempotentplugin" in s.plugins
     assert s.plugins["nonidempotentplugin"].instance == "second"
+
+
+@gen_cluster(nthreads=[("", 1)])
+async def test_repr(s, a):
+    async with Worker(s.address, nthreads=2) as b:  # name = address by default
+        ws_a = s.workers[a.address]
+        ws_b = s.workers[b.address]
+        while ws_b.status != Status.running:
+            await asyncio.sleep(0.01)
+        assert repr(s) == f"<Scheduler {s.address!r}, workers: 2, cores: 3, tasks: 0>"
+        assert (
+            repr(a)
+            == f"<Worker {a.address!r}, name: 0, status: running, stored: 0, running: 0/1, ready: 0, comm: 0, waiting: 0>"
+        )
+        assert (
+            repr(b)
+            == f"<Worker {b.address!r}, status: running, stored: 0, running: 0/2, ready: 0, comm: 0, waiting: 0>"
+        )
+        assert (
+            repr(ws_a)
+            == f"<WorkerState {a.address!r}, name: 0, status: running, memory: 0, processing: 0>"
+        )
+        assert (
+            repr(ws_b)
+            == f"<WorkerState {b.address!r}, status: running, memory: 0, processing: 0>"
+        )
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_sizeof.py 2022.02.0+ds.1-1/distributed/tests/test_sizeof.py
--- 2022.01.0+ds.1-1/distributed/tests/test_sizeof.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_sizeof.py	2022-02-11 16:21:14.000000000 +0000
@@ -28,7 +28,7 @@ def test_safe_sizeof_logs_on_failure():
 
     # Can provide custom `default_size`
     with captured_logger(logging.getLogger("distributed.sizeof")) as logs:
-        default_size = 2 * (1024 ** 2)  # 2 MiB
+        default_size = 2 * (1024**2)  # 2 MiB
         assert safe_sizeof(foo, default_size=default_size) == default_size
 
     assert "Defaulting to 2.00 MiB" in logs.getvalue()
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_steal.py 2022.02.0+ds.1-1/distributed/tests/test_steal.py
--- 2022.01.0+ds.1-1/distributed/tests/test_steal.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_steal.py	2022-02-11 16:21:14.000000000 +0000
@@ -16,6 +16,7 @@ import dask
 from distributed import Lock, Nanny, Worker, wait, worker_client
 from distributed.compatibility import LINUX, WINDOWS
 from distributed.config import config
+from distributed.core import Status
 from distributed.metrics import time
 from distributed.scheduler import key_split
 from distributed.system import MEMORY_LIMIT
@@ -699,6 +700,7 @@ async def assert_balanced(inp, expected,
     raise Exception(f"Expected: {expected2}; got: {result2}")
 
 
+@pytest.mark.slow
 @pytest.mark.parametrize(
     "inp,expected",
     [
@@ -732,19 +734,15 @@ async def assert_balanced(inp, expected,
     ],
 )
 def test_balance(inp, expected):
-    async def test(*args, **kwargs):
+    async def test_balance_(*args, **kwargs):
         await assert_balanced(inp, expected, *args, **kwargs)
 
-    test = gen_cluster(
-        client=True,
-        nthreads=[("127.0.0.1", 1)] * len(inp),
-        config={
-            "distributed.scheduler.default-task-durations": {
-                str(i): 1 for i in range(10)
-            }
-        },
-    )(test)
-    test()
+    config = {
+        "distributed.scheduler.default-task-durations": {str(i): 1 for i in range(10)}
+    }
+    gen_cluster(client=True, nthreads=[("", 1)] * len(inp), config=config)(
+        test_balance_
+    )()
 
 
 @gen_cluster(client=True, nthreads=[("127.0.0.1", 1)] * 2, Worker=Nanny, timeout=60)
@@ -816,28 +814,51 @@ async def test_steal_twice(c, s, a, b):
 
     while len(s.tasks) < 100:  # tasks are all allocated
         await asyncio.sleep(0.01)
+    # Wait for b to start stealing tasks
+    while len(b.tasks) < 30:
+        await asyncio.sleep(0.01)
 
     # Army of new workers arrives to help
-    workers = await asyncio.gather(*(Worker(s.address, loop=s.loop) for _ in range(20)))
+    workers = await asyncio.gather(*(Worker(s.address) for _ in range(20)))
 
     await wait(futures)
 
-    has_what = dict(s.has_what)  # take snapshot
-    empty_workers = [w for w, keys in has_what.items() if not len(keys)]
-    if len(empty_workers) > 2:
-        pytest.fail(
-            "Too many workers without keys (%d out of %d)"
-            % (len(empty_workers), len(has_what))
-        )
-    assert max(map(len, has_what.values())) < 30
+    # Note: this includes a and b
+    empty_workers = [w for w, keys in s.has_what.items() if not keys]
+    assert (
+        len(empty_workers) < 3
+    ), f"Too many workers without keys ({len(empty_workers)} out of {len(s.workers)})"
+    # This also tests that some tasks were stolen from b
+    # (see `while len(b.tasks) < 30` above)
+    assert max(map(len, s.has_what.values())) < 30
 
     assert a.in_flight_tasks == 0
     assert b.in_flight_tasks == 0
 
-    await c._close()
     await asyncio.gather(*(w.close() for w in workers))
 
 
+@gen_cluster(
+    client=True,
+    nthreads=[("", 1)] * 3,
+    worker_kwargs={"memory_monitor_interval": "20ms"},
+)
+async def test_paused_workers_must_not_steal(c, s, w1, w2, w3):
+    w2.memory_pause_fraction = 1e-15
+    while s.workers[w2.address].status != Status.paused:
+        await asyncio.sleep(0.01)
+
+    x = c.submit(inc, 1, workers=w1.address)
+    await wait(x)
+
+    futures = [c.submit(slowadd, x, i, delay=0.1) for i in range(10)]
+    await wait(futures)
+
+    assert w1.data
+    assert not w2.data
+    assert w3.data
+
+
 @gen_cluster(client=True)
 async def test_dont_steal_already_released(c, s, a, b):
     future = c.submit(slowinc, 1, delay=0.05, workers=a.address)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_stress.py 2022.02.0+ds.1-1/distributed/tests/test_stress.py
--- 2022.01.0+ds.1-1/distributed/tests/test_stress.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_stress.py	2022-02-11 16:21:14.000000000 +0000
@@ -33,7 +33,7 @@ teardown_module = nodebug_teardown_modul
 
 @gen_cluster(client=True)
 async def test_stress_1(c, s, a, b):
-    n = 2 ** 6
+    n = 2**6
 
     seq = c.map(inc, range(n))
     while len(seq) > 1:
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_tls_functional.py 2022.02.0+ds.1-1/distributed/tests/test_tls_functional.py
--- 2022.01.0+ds.1-1/distributed/tests/test_tls_functional.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_tls_functional.py	2022-02-11 16:21:14.000000000 +0000
@@ -99,34 +99,24 @@ async def test_nanny(c, s, a, b):
 
 @gen_tls_cluster(
     client=True,
-    Worker=Nanny,
-    worker_kwargs={"memory_limit": "1 GiB"},
-    config={"distributed.worker.memory.rebalance.sender-min": 0.3},
+    config={
+        "distributed.worker.memory.rebalance.measure": "managed",
+        "distributed.worker.memory.rebalance.sender-min": 0,
+        "distributed.worker.memory.rebalance.sender-recipient-gap": 0,
+    },
 )
-async def test_rebalance(c, s, *_):
-    # We used nannies to have separate processes for each worker
-    a, b = s.workers
-    assert a.startswith("tls://")
-
-    # Generate 10 buffers worth 512 MiB total on worker a. This sends its memory
-    # utilisation slightly above 50% (after counting unmanaged) which is above the
-    # distributed.worker.memory.rebalance.sender-min threshold.
-    futures = c.map(lambda _: "x" * (2 ** 29 // 10), range(10), workers=[a])
-    await wait(futures)
-
-    # Wait for heartbeats
-    while s.memory.process < 2 ** 29:
-        await asyncio.sleep(0.1)
-
-    assert await c.run(lambda dask_worker: len(dask_worker.data)) == {a: 10, b: 0}
-
+async def test_rebalance(c, s, a, b):
+    """Test Client.rebalance(). This test is just to test the TLS Client wrapper around
+    Scheduler.rebalance(); for more thorough tests on the latter see test_scheduler.py.
+    """
+    assert a.address.startswith("tls://")
+
+    futures = await c.scatter(range(100), workers=[a.address])
+    assert len(a.data) == 100
+    assert len(b.data) == 0
     await c.rebalance()
-
-    ndata = await c.run(lambda dask_worker: len(dask_worker.data))
-    # Allow for some uncertainty as the unmanaged memory is not stable
-    assert sum(ndata.values()) == 10
-    assert 3 <= ndata[a] <= 7
-    assert 3 <= ndata[b] <= 7
+    assert len(a.data) == 50
+    assert len(b.data) == 50
 
 
 @gen_tls_cluster(client=True, nthreads=[("tls://127.0.0.1", 2)] * 2)
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_utils_test.py 2022.02.0+ds.1-1/distributed/tests/test_utils_test.py
--- 2022.01.0+ds.1-1/distributed/tests/test_utils_test.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_utils_test.py	2022-02-11 16:21:14.000000000 +0000
@@ -1,6 +1,7 @@
 import asyncio
 import os
 import pathlib
+import signal
 import socket
 import threading
 from contextlib import contextmanager
@@ -10,14 +11,18 @@ import pytest
 import yaml
 from tornado import gen
 
+import dask.config
+
 from distributed import Client, Nanny, Scheduler, Worker, config, default_client
+from distributed.compatibility import WINDOWS
 from distributed.core import Server, rpc
 from distributed.metrics import time
-from distributed.utils import get_ip
+from distributed.utils import mp_context
 from distributed.utils_test import (
     _LockedCommPool,
     _UnhashableCallable,
     assert_worker_story,
+    check_process_leak,
     cluster,
     dump_cluster_state,
     gen_cluster,
@@ -25,7 +30,6 @@ from distributed.utils_test import (
     inc,
     new_config,
     tls_only_security,
-    wait_for_port,
 )
 
 
@@ -100,11 +104,8 @@ async def test_gen_cluster_parametrized_
 )
 async def test_gen_cluster_set_config_nanny(c, s, a, b):
     def assert_config():
-        import dask
-
         assert dask.config.get("distributed.comm.timeouts.connect") == "1s"
         assert dask.config.get("new.config.value") == "foo"
-        return dask.config
 
     await c.run(assert_config)
     await c.run_on_scheduler(assert_config)
@@ -233,26 +234,6 @@ def _listen(delay=0):
         t.join(5.0)
 
 
-def test_wait_for_port():
-    t1 = time()
-    with pytest.raises(RuntimeError):
-        wait_for_port((get_ip(), 9999), 0.5)
-    t2 = time()
-    assert t2 - t1 >= 0.5
-
-    with _listen(0) as s1:
-        t1 = time()
-        wait_for_port(s1.getsockname())
-        t2 = time()
-        assert t2 - t1 <= 1.0
-
-    with _listen(1) as s1:
-        t1 = time()
-        wait_for_port(s1.getsockname())
-        t2 = time()
-        assert t2 - t1 <= 2.0
-
-
 def test_new_config():
     c = config.copy()
     with new_config({"xyzzy": 5}):
@@ -535,12 +516,11 @@ async def test_dump_cluster_state_unresp
 @gen_cluster(
     client=True,
     Worker=Nanny,
-    config={"distributed.comm.timeouts.connect": "200ms"},
+    config={"distributed.comm.timeouts.connect": "600ms"},
 )
 async def test_dump_cluster_unresponsive_remote_worker(c, s, a, b, tmpdir):
-    addr1, addr2 = s.workers
     clog_fut = asyncio.create_task(
-        c.run(lambda dask_scheduler: dask_scheduler.stop(), workers=[addr1])
+        c.run(lambda dask_scheduler: dask_scheduler.stop(), workers=[a.worker_address])
     )
     await asyncio.sleep(0.2)
 
@@ -549,7 +529,64 @@ async def test_dump_cluster_unresponsive
         out = yaml.safe_load(fh)
 
     assert out.keys() == {"scheduler", "workers", "versions"}
-    assert isinstance(out["workers"][addr2], dict)
-    assert out["workers"][addr1].startswith("OSError('Timed out trying to connect to")
+    assert isinstance(out["workers"][b.worker_address], dict)
+    assert out["workers"][a.worker_address].startswith(
+        "OSError('Timed out trying to connect to"
+    )
 
     clog_fut.cancel()
+
+
+def garbage_process(barrier, ignore_sigterm: bool = False, t: float = 3600) -> None:
+    if ignore_sigterm:
+        for signum in (signal.SIGTERM, signal.SIGHUP, signal.SIGINT):
+            signal.signal(signum, signal.SIG_IGN)
+    barrier.wait()
+    sleep(t)
+
+
+def test_check_process_leak():
+    barrier = mp_context.Barrier(parties=2)
+    with pytest.raises(AssertionError):
+        with check_process_leak(check=True, check_timeout=0.01):
+            p = mp_context.Process(target=garbage_process, args=(barrier,))
+            p.start()
+            barrier.wait()
+    assert not p.is_alive()
+
+
+def test_check_process_leak_slow_cleanup():
+    """check_process_leak waits a bit for processes to terminate themselves"""
+    barrier = mp_context.Barrier(parties=2)
+    with check_process_leak(check=True):
+        p = mp_context.Process(target=garbage_process, args=(barrier, False, 0.2))
+        p.start()
+        barrier.wait()
+    assert not p.is_alive()
+
+
+@pytest.mark.parametrize(
+    "ignore_sigterm",
+    [False, pytest.param(True, marks=pytest.mark.skipif(WINDOWS, reason="no SIGKILL"))],
+)
+def test_check_process_leak_pre_cleanup(ignore_sigterm):
+    barrier = mp_context.Barrier(parties=2)
+    p = mp_context.Process(target=garbage_process, args=(barrier, ignore_sigterm))
+    p.start()
+    barrier.wait()
+
+    with check_process_leak(term_timeout=0.2):
+        assert not p.is_alive()
+
+
+@pytest.mark.parametrize(
+    "ignore_sigterm",
+    [False, pytest.param(True, marks=pytest.mark.skipif(WINDOWS, reason="no SIGKILL"))],
+)
+def test_check_process_leak_post_cleanup(ignore_sigterm):
+    barrier = mp_context.Barrier(parties=2)
+    with check_process_leak(check=False, term_timeout=0.2):
+        p = mp_context.Process(target=garbage_process, args=(barrier, ignore_sigterm))
+        p.start()
+        barrier.wait()
+    assert not p.is_alive()
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_variable.py 2022.02.0+ds.1-1/distributed/tests/test_variable.py
--- 2022.01.0+ds.1-1/distributed/tests/test_variable.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_variable.py	2022-02-11 16:21:14.000000000 +0000
@@ -5,7 +5,6 @@ from datetime import timedelta
 from time import monotonic, sleep
 
 import pytest
-from tornado.ioloop import IOLoop
 
 from distributed import Client, Nanny, TimeoutError, Variable, wait, worker_client
 from distributed.compatibility import WINDOWS
@@ -130,10 +129,10 @@ async def test_timeout(c, s, a, b):
 
 def test_timeout_sync(client):
     v = Variable("v")
-    start = IOLoop.current().time()
+    start = time()
     with pytest.raises(TimeoutError):
         v.get(timeout=0.2)
-    stop = IOLoop.current().time()
+    stop = time()
 
     if WINDOWS:
         assert 0.1 < stop - start < 2.0
diff -pruN 2022.01.0+ds.1-1/distributed/tests/test_worker.py 2022.02.0+ds.1-1/distributed/tests/test_worker.py
--- 2022.01.0+ds.1-1/distributed/tests/test_worker.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/tests/test_worker.py	2022-02-11 16:21:14.000000000 +0000
@@ -39,6 +39,7 @@ from distributed.core import CommClosedE
 from distributed.diagnostics import nvml
 from distributed.diagnostics.plugin import PipInstall
 from distributed.metrics import time
+from distributed.protocol import pickle
 from distributed.scheduler import Scheduler
 from distributed.utils import TimeoutError
 from distributed.utils_test import (
@@ -56,7 +57,14 @@ from distributed.utils_test import (
     slowinc,
     slowsum,
 )
-from distributed.worker import Worker, error_message, logger, parse_memory_limit
+from distributed.worker import (
+    TaskState,
+    UniqueTaskHeap,
+    Worker,
+    error_message,
+    logger,
+    parse_memory_limit,
+)
 
 pytestmark = pytest.mark.ci1
 
@@ -380,6 +388,64 @@ async def test_chained_error_message(c,
         assert "Bar" in str(e.__cause__)
 
 
+@pytest.mark.asyncio
+async def test_plugin_exception(cleanup):
+    class MyPlugin:
+        def setup(self, worker=None):
+            raise ValueError("Setup failed")
+
+    async with Scheduler(port=0) as s:
+        with pytest.raises(ValueError, match="Setup failed"):
+            async with Worker(
+                s.address,
+                plugins={
+                    MyPlugin(),
+                },
+            ) as w:
+                pass
+
+
+@pytest.mark.asyncio
+async def test_plugin_multiple_exceptions(cleanup):
+    class MyPlugin1:
+        def setup(self, worker=None):
+            raise ValueError("MyPlugin1 Error")
+
+    class MyPlugin2:
+        def setup(self, worker=None):
+            raise RuntimeError("MyPlugin2 Error")
+
+    async with Scheduler(port=0) as s:
+        # There's no guarantee on the order of which exception is raised first
+        with pytest.raises((ValueError, RuntimeError), match="MyPlugin.* Error"):
+            with captured_logger("distributed.worker") as logger:
+                async with Worker(
+                    s.address,
+                    plugins={
+                        MyPlugin1(),
+                        MyPlugin2(),
+                    },
+                ) as w:
+                    pass
+
+            text = logger.getvalue()
+            assert "MyPlugin1 Error" in text
+            assert "MyPlugin2 Error" in text
+
+
+@pytest.mark.asyncio
+async def test_plugin_internal_exception(cleanup):
+    async with Scheduler(port=0) as s:
+        with pytest.raises(UnicodeDecodeError, match="codec can't decode"):
+            async with Worker(
+                s.address,
+                plugins={
+                    b"corrupting pickle" + pickle.dumps(lambda: None, protocol=4),
+                },
+            ) as w:
+                pass
+
+
 @gen_cluster(client=True)
 async def test_gather(c, s, a, b):
     x, y = await c.scatter(["x", "y"], workers=[b.address])
@@ -441,37 +507,6 @@ async def test_io_loop(s):
         assert w.io_loop is s.loop
 
 
-@gen_cluster(client=True, nthreads=[])
-async def test_spill_to_disk(c, s):
-    np = pytest.importorskip("numpy")
-    w = await Worker(
-        s.address,
-        loop=s.loop,
-        memory_limit=1200 / 0.6,
-        memory_pause_fraction=None,
-        memory_spill_fraction=None,
-    )
-
-    x = c.submit(np.random.randint, 0, 255, size=500, dtype="u1", key="x")
-    await wait(x)
-    y = c.submit(np.random.randint, 0, 255, size=500, dtype="u1", key="y")
-    await wait(y)
-
-    assert set(w.data) == {x.key, y.key}
-    assert set(w.data.memory) == {x.key, y.key}
-
-    z = c.submit(np.random.randint, 0, 255, size=500, dtype="u1", key="z")
-    await wait(z)
-    assert set(w.data) == {x.key, y.key, z.key}
-    assert set(w.data.memory) == {y.key, z.key}
-    assert set(w.data.disk) == {x.key}
-
-    await x
-    assert set(w.data.memory) == {x.key, z.key}
-    assert set(w.data.disk) == {y.key}
-    await w.close()
-
-
 @gen_cluster(client=True)
 async def test_access_key(c, s, a, b):
     def f(i):
@@ -516,23 +551,6 @@ async def test_Executor(c, s):
             assert e._threads  # had to do some work
 
 
-@pytest.mark.skip(
-    reason="Other tests leak memory, so process-level checks trigger immediately"
-)
-@gen_cluster(
-    client=True,
-    nthreads=[("127.0.0.1", 1)],
-    timeout=30,
-    worker_kwargs={"memory_limit": 10e6},
-)
-async def test_spill_by_default(c, s, w):
-    da = pytest.importorskip("dask.array")
-    x = da.ones(int(10e6 * 0.7), chunks=1e6, dtype="u1")
-    y = c.persist(x)
-    await wait(y)
-    assert len(w.data.disk)  # something is on disk
-
-
 @gen_cluster(nthreads=[("127.0.0.1", 1)], worker_kwargs={"reconnect": False})
 async def test_close_on_disconnect(s, w):
     await s.close()
@@ -1114,69 +1132,164 @@ async def test_statistical_profiling_2(c
 
 
 @gen_cluster(
-    nthreads=[("127.0.0.1", 1)],
     client=True,
-    worker_kwargs={"memory_monitor_interval": 10},
+    nthreads=[("", 1)],
+    worker_kwargs=dict(
+        memory_limit=1200 / 0.6,
+        memory_target_fraction=0.6,
+        memory_spill_fraction=False,
+        memory_pause_fraction=False,
+    ),
 )
-async def test_robust_to_bad_sizeof_estimates(c, s, a):
-    np = pytest.importorskip("numpy")
-    memory = psutil.Process().memory_info().rss
+async def test_spill_target_threshold(c, s, a):
+    """Test distributed.worker.memory.target threshold. Note that in this test we
+    disabled spill and pause thresholds, which work on the process memory, and just left
+    the target threshold, which works on managed memory so it is unperturbed by the
+    several hundreds of MB of unmanaged memory that are typical of the test suite.
+    """
+    x = c.submit(lambda: "x" * 500, key="x")
+    await wait(x)
+    y = c.submit(lambda: "y" * 500, key="y")
+    await wait(y)
+
+    assert set(a.data) == {"x", "y"}
+    assert set(a.data.memory) == {"x", "y"}
+
+    z = c.submit(lambda: "z" * 500, key="z")
+    await wait(z)
+    assert set(a.data) == {"x", "y", "z"}
+    assert set(a.data.memory) == {"y", "z"}
+    assert set(a.data.disk) == {"x"}
+
+    await x
+    assert set(a.data.memory) == {"x", "z"}
+    assert set(a.data.disk) == {"y"}
+
+
+@gen_cluster(
+    nthreads=[("", 1)],
+    client=True,
+    worker_kwargs=dict(
+        memory_monitor_interval="10ms",
+        memory_target_fraction=False,
+        memory_spill_fraction=0.7,
+        memory_pause_fraction=False,
+    ),
+)
+async def test_spill_spill_threshold(c, s, a):
+    """Test distributed.worker.memory.spill threshold.
+    Test that the spill threshold uses the process memory and not the managed memory
+    reported by sizeof(), which may be inaccurate.
+    """
+    # Reach 'spill' threshold after 400MB of managed data. We need to be generous in
+    # order to avoid flakiness due to fluctuations in unmanaged memory.
+    # FIXME https://github.com/dask/distributed/issues/5367
+    #       This works just by luck for the purpose of the spill and pause thresholds,
+    #       and does NOT work for the target threshold.
+    memory = s.workers[a.address].memory.process
     a.memory_limit = memory / 0.7 + 400e6
 
-    class BadAccounting:
-        def __init__(self, data):
-            self.data = data
+    class UnderReport:
+        """100 MB process memory, 10 bytes reported managed memory"""
+
+        def __init__(self, *args):
+            self.data = "x" * int(100e6)
 
         def __sizeof__(self):
             return 10
 
-    def f(n):
-        x = np.ones(int(n), dtype="u1")
-        result = BadAccounting(x)
-        return result
+        def __reduce__(self):
+            """Speed up test by writing very little to disk when spilling"""
+            return UnderReport, ()
 
-    futures = c.map(f, [100e6] * 8, pure=False)
+    futures = c.map(UnderReport, range(8))
 
-    start = time()
     while not a.data.disk:
-        await asyncio.sleep(0.1)
-        assert time() < start + 5
+        await asyncio.sleep(0.01)
+
+
+@gen_cluster(
+    nthreads=[("", 1)],
+    client=True,
+    worker_kwargs=dict(
+        # FIXME https://github.com/dask/distributed/issues/5367
+        #       Can't reconfigure the absolute target threshold after the worker
+        #       started, so we're setting it here to something extremely small and then
+        #       increasing the memory_limit dynamically below in order to test the
+        #       spill threshold.
+        memory_limit=1,
+        memory_monitor_interval="10ms",
+        memory_target_fraction=False,
+        memory_spill_fraction=0.7,
+        memory_pause_fraction=False,
+    ),
+)
+async def test_spill_no_target_threshold(c, s, a):
+    """Test that you can enable the spill threshold while leaving the target threshold
+    to False
+    """
+    memory = s.workers[a.address].memory.process
+    a.memory_limit = memory / 0.7 + 400e6
+
+    class OverReport:
+        """Configurable process memory, 10 GB reported managed memory"""
+
+        def __init__(self, size):
+            self.data = "x" * size
+
+        def __sizeof__(self):
+            return int(10e9)
+
+        def __reduce__(self):
+            """Speed up test by writing very little to disk when spilling"""
+            return OverReport, (len(self.data),)
+
+    f1 = c.submit(OverReport, 0, key="f1")
+    await wait(f1)
+    assert set(a.data.memory) == {"f1"}
+
+    futures = c.map(OverReport, range(int(100e6), int(100e6) + 8))
+
+    while not a.data.disk:
+        await asyncio.sleep(0.01)
+    assert "f1" in a.data.disk
 
 
 @pytest.mark.slow
 @gen_cluster(
-    nthreads=[("127.0.0.1", 2)],
+    nthreads=[("", 1)],
     client=True,
-    worker_kwargs={
-        "memory_monitor_interval": 10,
-        "memory_spill_fraction": False,  # don't spill
-        "memory_target_fraction": False,
-        "memory_pause_fraction": 0.5,
-    },
+    worker_kwargs=dict(
+        memory_monitor_interval="10ms",
+        memory_target_fraction=False,
+        memory_spill_fraction=False,
+        memory_pause_fraction=0.8,
+    ),
 )
 async def test_pause_executor(c, s, a):
+    # See notes in test_spill_spill_threshold
     memory = psutil.Process().memory_info().rss
-    a.memory_limit = memory / 0.5 + 200e6
-    np = pytest.importorskip("numpy")
+    a.memory_limit = memory / 0.8 + 200e6
 
     def f():
-        x = np.ones(int(400e6), dtype="u1")
+        x = "x" * int(250e6)
         sleep(1)
 
     with captured_logger(logging.getLogger("distributed.worker")) as logger:
-        future = c.submit(f)
+        future = c.submit(f, key="x")
         futures = c.map(slowinc, range(30), delay=0.1)
 
         while a.status != Status.paused:
             await asyncio.sleep(0.01)
 
-        out = logger.getvalue()
-        assert "memory" in out.lower()
-        assert "pausing" in out.lower()
+        assert "Pausing worker" in logger.getvalue()
+        assert sum(f.status == "finished" for f in futures) < 4
 
-    assert sum(f.status == "finished" for f in futures) < 4
+        while a.status != Status.running:
+            await asyncio.sleep(0.01)
 
-    await wait(futures)
+        assert "Resuming worker" in logger.getvalue()
+        await wait(futures)
 
 
 @gen_cluster(client=True, worker_kwargs={"profile_cycle_interval": "50 ms"})
@@ -1331,7 +1444,6 @@ async def test_prefer_gather_from_local_
 @gen_cluster(
     client=True,
     nthreads=[("127.0.0.1", 1)] * 20,
-    timeout=30,
     config={"distributed.worker.connections.incoming": 1},
 )
 async def test_avoid_oversubscription(c, s, *workers):
@@ -1566,34 +1678,89 @@ async def test_worker_listens_on_same_in
             assert s.ip == w.ip
 
 
+def assert_amm_transfer_story(key: str, w_from: Worker, w_to: Worker) -> None:
+    """Test that an in-memory key was transferred from worker w_from to worker w_to by
+    the Active Memory Manager and it was not recalculated on w_to
+    """
+    assert_worker_story(
+        w_to.story(key),
+        [
+            (key, "ensure-task-exists", "released"),
+            (key, "released", "fetch", "fetch", {}),
+            ("gather-dependencies", w_from.address, lambda set_: key in set_),
+            (key, "fetch", "flight", "flight", {}),
+            ("request-dep", w_from.address, lambda set_: key in set_),
+            ("receive-dep", w_from.address, lambda set_: key in set_),
+            (key, "put-in-memory"),
+            (key, "flight", "memory", "memory", {}),
+        ],
+        # There may be additional ('missing', 'fetch', 'fetch') events if transfers
+        # are slow enough that the Active Memory Manager ends up requesting them a
+        # second time. Here we're asserting that no matter how slow CI is, all
+        # transfers will be completed within 2 seconds (hardcoded interval in
+        # Scheduler.retire_worker when AMM is not enabled).
+        strict=True,
+    )
+    assert key in w_to.data
+    # The key may or may not still be in w_from.data, depending if the AMM had the
+    # chance to run a second time after the copy was successful.
+
+
+@pytest.mark.slow
 @gen_cluster(client=True)
 async def test_close_gracefully(c, s, a, b):
-    futures = c.map(slowinc, range(200), delay=0.1)
+    futures = c.map(slowinc, range(200), delay=0.1, workers=[b.address])
 
-    while not b.data:
+    # Note: keys will appear in b.data several milliseconds before they switch to
+    # status=memory in s.tasks. It's important to sample the in-memory keys from the
+    # scheduler side, because those that the scheduler thinks are still processing won't
+    # be replicated by retire_workers().
+    while True:
+        mem = {k for k, ts in s.tasks.items() if ts.state == "memory"}
+        if len(mem) >= 8 and any(ts.state == "executing" for ts in b.tasks.values()):
+            break
         await asyncio.sleep(0.01)
-    mem = set(b.data)
-    proc = {ts for ts in b.tasks.values() if ts.state == "executing"}
-    assert proc
+
+    assert any(ts for ts in b.tasks.values() if ts.state == "executing")
 
     await b.close_gracefully()
 
     assert b.status == Status.closed
     assert b.address not in s.workers
-    assert mem.issubset(a.data.keys())
-    for ts in proc:
-        assert ts.state in ("executing", "memory")
+
+    # All tasks that were in memory in b have been copied over to a;
+    # they have not been recomputed
+    for key in mem:
+        assert_amm_transfer_story(key, b, a)
 
 
 @pytest.mark.slow
-@gen_cluster(client=True, nthreads=[])
-async def test_lifetime(c, s):
-    async with Worker(s.address) as a, Worker(s.address, lifetime="1 seconds") as b:
-        futures = c.map(slowinc, range(200), delay=0.1, worker=[b.address])
-        await asyncio.sleep(1.5)
-        assert b.status not in (Status.running, Status.paused)
-        await b.finished()
-        assert set(b.data) == set(a.data)  # successfully moved data over
+@gen_cluster(client=True, nthreads=[("", 1)], timeout=10)
+async def test_lifetime(c, s, a):
+    # Note: test was occasionally failing with lifetime="1 seconds"
+    async with Worker(s.address, lifetime="2 seconds") as b:
+        futures = c.map(slowinc, range(200), delay=0.1, workers=[b.address])
+
+        # Note: keys will appear in b.data several milliseconds before they switch to
+        # status=memory in s.tasks. It's important to sample the in-memory keys from the
+        # scheduler side, because those that the scheduler thinks are still processing
+        # won't be replicated by retire_workers().
+        while True:
+            mem = {k for k, ts in s.tasks.items() if ts.state == "memory"}
+            if len(mem) >= 8:
+                break
+            await asyncio.sleep(0.01)
+
+        assert b.status == Status.running
+        assert not a.data
+
+        while b.status != Status.closed:
+            await asyncio.sleep(0.01)
+
+    # All tasks that were in memory in b have been copied over to a;
+    # they have not been recomputed
+    for key in mem:
+        assert_amm_transfer_story(key, b, a)
 
 
 @gen_cluster(worker_kwargs={"lifetime": "10s", "lifetime_stagger": "2s"})
@@ -2087,12 +2254,26 @@ async def test_gpu_executor(c, s, w):
         assert "gpu" not in w.executors
 
 
-def assert_task_states_on_worker(expected, worker):
-    for dep_key, expected_state in expected.items():
-        assert dep_key in worker.tasks, (worker.name, dep_key, worker.tasks)
-        dep_ts = worker.tasks[dep_key]
-        assert dep_ts.state == expected_state, (worker.name, dep_ts, expected_state)
-    assert set(expected) == set(worker.tasks)
+async def assert_task_states_on_worker(expected, worker):
+    active_exc = None
+    for _ in range(10):
+        try:
+            for dep_key, expected_state in expected.items():
+                assert dep_key in worker.tasks, (worker.name, dep_key, worker.tasks)
+                dep_ts = worker.tasks[dep_key]
+                assert dep_ts.state == expected_state, (
+                    worker.name,
+                    dep_ts,
+                    expected_state,
+                )
+            assert set(expected) == set(worker.tasks)
+            return
+        except AssertionError as exc:
+            active_exc = exc
+            await asyncio.sleep(0.1)
+    # If after a second the workers are not in equilibrium, they are broken
+    assert active_exc
+    raise active_exc
 
 
 @gen_cluster(client=True)
@@ -2136,7 +2317,7 @@ async def test_worker_state_error_releas
         g.key: "memory",
         res.key: "error",
     }
-    assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, a)
     # Expected states after we release references to the futures
     f.release()
     g.release()
@@ -2152,7 +2333,7 @@ async def test_worker_state_error_releas
         res.key: "error",
     }
 
-    assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, a)
 
     res.release()
 
@@ -2205,7 +2386,7 @@ async def test_worker_state_error_releas
         g.key: "memory",
         res.key: "error",
     }
-    assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, a)
     # Expected states after we release references to the futures
 
     res.release()
@@ -2219,7 +2400,7 @@ async def test_worker_state_error_releas
         g.key: "memory",
     }
 
-    assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, a)
 
     f.release()
     g.release()
@@ -2270,7 +2451,7 @@ async def test_worker_state_error_releas
         g.key: "memory",
         res.key: "error",
     }
-    assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, a)
     # Expected states after we release references to the futures
 
     f.release()
@@ -2284,8 +2465,8 @@ async def test_worker_state_error_releas
         g.key: "memory",
     }
 
-    assert_task_states_on_worker(expected_states, a)
-    assert_task_states_on_worker(expected_states, b)
+    await assert_task_states_on_worker(expected_states, a)
+    await assert_task_states_on_worker(expected_states, b)
 
     g.release()
 
@@ -2319,8 +2500,7 @@ async def test_worker_state_error_long_c
         g.key: "memory",
         h.key: "memory",
     }
-    await asyncio.sleep(0.05)
-    assert_task_states_on_worker(expected_states_A, a)
+    await assert_task_states_on_worker(expected_states_A, a)
 
     expected_states_B = {
         f.key: "memory",
@@ -2328,8 +2508,7 @@ async def test_worker_state_error_long_c
         h.key: "memory",
         res.key: "error",
     }
-    await asyncio.sleep(0.05)
-    assert_task_states_on_worker(expected_states_B, b)
+    await assert_task_states_on_worker(expected_states_B, b)
 
     f.release()
 
@@ -2337,8 +2516,7 @@ async def test_worker_state_error_long_c
         g.key: "memory",
         h.key: "memory",
     }
-    await asyncio.sleep(0.05)
-    assert_task_states_on_worker(expected_states_A, a)
+    await assert_task_states_on_worker(expected_states_A, a)
 
     expected_states_B = {
         f.key: "released",
@@ -2346,8 +2524,7 @@ async def test_worker_state_error_long_c
         h.key: "memory",
         res.key: "error",
     }
-    await asyncio.sleep(0.05)
-    assert_task_states_on_worker(expected_states_B, b)
+    await assert_task_states_on_worker(expected_states_B, b)
 
     g.release()
 
@@ -2355,8 +2532,7 @@ async def test_worker_state_error_long_c
         g.key: "released",
         h.key: "memory",
     }
-    await asyncio.sleep(0.05)
-    assert_task_states_on_worker(expected_states_A, a)
+    await assert_task_states_on_worker(expected_states_A, a)
 
     # B must not forget a task since all have a still valid dependent
     expected_states_B = {
@@ -2364,19 +2540,18 @@ async def test_worker_state_error_long_c
         h.key: "memory",
         res.key: "error",
     }
-    assert_task_states_on_worker(expected_states_B, b)
+    await assert_task_states_on_worker(expected_states_B, b)
     h.release()
-    await asyncio.sleep(0.05)
 
     expected_states_A = {}
-    assert_task_states_on_worker(expected_states_A, a)
+    await assert_task_states_on_worker(expected_states_A, a)
     expected_states_B = {
         f.key: "released",
         h.key: "released",
         res.key: "error",
     }
 
-    assert_task_states_on_worker(expected_states_B, b)
+    await assert_task_states_on_worker(expected_states_B, b)
     res.release()
 
     # We no longer hold any refs. Cluster should reset completely
@@ -2411,13 +2586,11 @@ async def test_hold_on_to_replicas(c, s,
         await asyncio.sleep(0.01)
 
 
-@gen_cluster(
-    client=True,
-    nthreads=[
-        ("", 1),
-        ("", 1),
-    ],
+@pytest.mark.xfail(
+    WINDOWS and sys.version_info[:2] == (3, 8),
+    reason="https://github.com/dask/distributed/issues/5621",
 )
+@gen_cluster(client=True, nthreads=[("", 1), ("", 1)])
 async def test_worker_reconnects_mid_compute(c, s, a, b):
     """Ensure that, if a worker disconnects while computing a result, the scheduler will
     still accept the result.
@@ -2485,13 +2658,11 @@ async def test_worker_reconnects_mid_com
         await asyncio.sleep(0.001)
 
 
-@gen_cluster(
-    client=True,
-    nthreads=[
-        ("", 1),
-        ("", 1),
-    ],
+@pytest.mark.xfail(
+    WINDOWS and sys.version_info[:2] == (3, 8),
+    reason="https://github.com/dask/distributed/issues/5621",
 )
+@gen_cluster(client=True, nthreads=[("", 1), ("", 1)])
 async def test_worker_reconnects_mid_compute_multiple_states_on_scheduler(c, s, a, b):
     """
     Ensure that a reconnecting worker does not break the scheduler regardless of
@@ -3031,33 +3202,38 @@ async def test_missing_released_zombie_t
 
 @gen_cluster(client=True)
 async def test_missing_released_zombie_tasks_2(c, s, a, b):
-    a.total_in_connections = 0
-    f1 = c.submit(inc, 1, key="f1", workers=[a.address])
-    f2 = c.submit(inc, f1, key="f2", workers=[b.address])
+    # If get_data_from_worker raises this will suggest a dead worker to B and it
+    # will transition the task to missing. We want to make sure that a missing
+    # task is properly released and not left as a zombie
+    with mock.patch.object(
+        distributed.worker,
+        "get_data_from_worker",
+        side_effect=CommClosedError,
+    ):
+        f1 = c.submit(inc, 1, key="f1", workers=[a.address])
+        f2 = c.submit(inc, f1, key="f2", workers=[b.address])
 
-    while f1.key not in b.tasks:
-        await asyncio.sleep(0)
+        while f1.key not in b.tasks:
+            await asyncio.sleep(0)
 
-    ts = b.tasks[f1.key]
-    assert ts.state == "fetch"
+        ts = b.tasks[f1.key]
+        assert ts.state == "fetch"
 
-    # A few things can happen to clear who_has. The dominant process is upon
-    # connection failure to a worker. Regardless of how the set was cleared, the
-    # task will be transitioned to missing where the worker is trying to
-    # reaquire this information from the scheduler. While this is happening on
-    # worker side, the tasks are released and we want to ensure that no dangling
-    # zombie tasks are left on the worker
-    ts.who_has.clear()
+        while ts.state != "missing":
+            # If we sleep for a longer time, the worker will spin into an
+            # endless loop of asking the scheduler who_has and trying to connect
+            # to A
+            await asyncio.sleep(0)
 
-    del f1, f2
+        del f1, f2
 
-    while b.tasks:
-        await asyncio.sleep(0.01)
+        while b.tasks:
+            await asyncio.sleep(0.01)
 
-    assert_worker_story(
-        b.story(ts),
-        [("f1", "missing", "released", "released", {"f1": "forgotten"})],
-    )
+        assert_worker_story(
+            b.story(ts),
+            [("f1", "missing", "released", "released", {"f1": "forgotten"})],
+        )
 
 
 @pytest.mark.slow
@@ -3066,7 +3242,7 @@ async def test_missing_released_zombie_t
     Worker=Nanny,
     nthreads=[("", 1)],
     config={"distributed.worker.memory.pause": 0.5},
-    worker_kwargs={"memory_limit": 2 ** 29},  # 500 MiB
+    worker_kwargs={"memory_limit": 2**29},  # 500 MiB
 )
 async def test_worker_status_sync(c, s, a):
     (ws,) = s.workers.values()
@@ -3075,7 +3251,7 @@ async def test_worker_status_sync(c, s,
         await asyncio.sleep(0.01)
 
     def leak():
-        distributed._test_leak = "x" * 2 ** 28  # 250 MiB
+        distributed._test_leak = "x" * 2**28  # 250 MiB
 
     def clear_leak():
         del distributed._test_leak
@@ -3127,6 +3303,69 @@ async def _wait_for_state(key: str, work
 
 
 @gen_cluster(client=True)
+async def test_task_flight_compute_oserror(c, s, a, b):
+    """If the remote worker dies while a task is in flight, the task may be
+    rescheduled to be computed on the worker trying to fetch the data.
+    However, the OSError caused by the dead remote would try to transition the
+    task to missing which is not what we want. This test ensures that the task
+    is properly transitioned to executing and the scheduler doesn't reschedule
+    anything and rejects the "false missing" signal from the worker, if there is
+    any.
+    """
+
+    write_queue = asyncio.Queue()
+    write_event = asyncio.Event()
+    b.rpc = _LockedCommPool(
+        b.rpc,
+        write_queue=write_queue,
+        write_event=write_event,
+    )
+    futs = c.submit(map, inc, range(10), workers=[a.address], allow_other_workers=True)
+    await wait(futs)
+    assert a.data
+    assert write_queue.empty()
+    f1 = c.submit(sum, futs, workers=[b.address])
+    peer, msg = await write_queue.get()
+    assert peer == a.address
+    assert msg["op"] == "get_data"
+    in_flight_tasks = [ts for ts in b.tasks.values() if ts.key != f1.key]
+    assert all(ts.state == "flight" for ts in in_flight_tasks)
+    await a.close()
+    write_event.set()
+
+    await f1
+
+    # If the above doesn't deadlock the behavior should be OK. We're still
+    # asserting a few internals to make sure that if things change this is done
+    # deliberately
+
+    sum_story = b.story(f1.key)
+    expected_sum_story = [
+        (f1.key, "compute-task"),
+        (
+            f1.key,
+            "released",
+            "waiting",
+            "waiting",
+            {ts.key: "fetch" for ts in in_flight_tasks},
+        ),
+        # inc is lost and needs to be recomputed. Therefore, sum is released
+        ("free-keys", (f1.key,)),
+        (f1.key, "release-key"),
+        (f1.key, "waiting", "released", "released", {f1.key: "forgotten"}),
+        (f1.key, "released", "forgotten", "forgotten", {}),
+        # Now, we actually compute the task *once*. This must not cycle back
+        (f1.key, "compute-task"),
+        (f1.key, "released", "waiting", "waiting", {f1.key: "ready"}),
+        (f1.key, "waiting", "ready", "ready", {}),
+        (f1.key, "ready", "executing", "executing", {}),
+        (f1.key, "put-in-memory"),
+        (f1.key, "executing", "memory", "memory", {}),
+    ]
+    assert_worker_story(sum_story, expected_sum_story, strict=True)
+
+
+@gen_cluster(client=True)
 async def test_gather_dep_cancelled_rescheduled(c, s, a, b):
     """At time of writing, the gather_dep implementation filtered tasks again
     for in-flight state. The response parser, however, did not distinguish
@@ -3322,7 +3561,6 @@ async def test_Worker__to_dict(c, s, a):
         "id",
         "scheduler",
         "nthreads",
-        "ncores",
         "memory_limit",
         "address",
         "status",
@@ -3342,6 +3580,8 @@ async def test_Worker__to_dict(c, s, a):
         "config",
         "incoming_transfer_log",
         "outgoing_transfer_log",
+        "data_needed",
+        "pending_data_per_worker",
     }
     assert d["tasks"]["x"]["key"] == "x"
 
@@ -3363,3 +3603,45 @@ async def test_TaskState__to_dict(c, s,
     assert isinstance(tasks["z"], dict)
     assert tasks["x"]["dependents"] == ["<TaskState 'y' memory>"]
     assert tasks["y"]["dependencies"] == ["<TaskState 'x' memory>"]
+
+
+def test_unique_task_heap():
+    heap = UniqueTaskHeap()
+
+    for x in range(10):
+        ts = TaskState(f"f{x}")
+        ts.priority = (0, 0, 1, x % 3)
+        heap.push(ts)
+
+    heap_list = list(heap)
+    # iteration does not empty heap
+    assert len(heap) == 10
+    assert heap_list == sorted(heap_list, key=lambda ts: ts.priority)
+
+    seen = set()
+    last_prio = (0, 0, 0, 0)
+    while heap:
+        peeked = heap.peek()
+        ts = heap.pop()
+        assert peeked == ts
+        seen.add(ts.key)
+        assert ts.priority
+        assert last_prio <= ts.priority
+        last_prio = last_prio
+
+    ts = TaskState("foo")
+    heap.push(ts)
+    heap.push(ts)
+    assert len(heap) == 1
+
+    assert repr(heap) == "<UniqueTaskHeap: 1 items>"
+
+    assert heap.pop() == ts
+    assert not heap
+
+    # Test that we're cleaning the seen set on pop
+    heap.push(ts)
+    assert len(heap) == 1
+    assert heap.pop() == ts
+
+    assert repr(heap) == "<UniqueTaskHeap: 0 items>"
diff -pruN 2022.01.0+ds.1-1/distributed/utils_comm.py 2022.02.0+ds.1-1/distributed/utils_comm.py
--- 2022.01.0+ds.1-1/distributed/utils_comm.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/utils_comm.py	2022-02-11 16:21:14.000000000 +0000
@@ -352,7 +352,7 @@ async def retry(
     Returns
     -------
     Any
-        Whatever `await `coro()` returned
+        Whatever `await coro()` returned
     """
     # this loop is a no-op in case max_retries<=0
     for i_try in range(count):
@@ -363,7 +363,7 @@ async def retry(
             logger.info(
                 f"Retrying {operation} after exception in attempt {i_try}/{count}: {ex}"
             )
-            delay = min(delay_min * (2 ** i_try - 1), delay_max)
+            delay = min(delay_min * (2**i_try - 1), delay_max)
             if jitter_fraction > 0:
                 delay *= 1 + random.random() * jitter_fraction
             await asyncio.sleep(delay)
diff -pruN 2022.01.0+ds.1-1/distributed/utils.py 2022.02.0+ds.1-1/distributed/utils.py
--- 2022.01.0+ds.1-1/distributed/utils.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/utils.py	2022-02-11 16:21:14.000000000 +0000
@@ -46,7 +46,7 @@ from dask import istask
 from dask.utils import parse_timedelta as _parse_timedelta
 from dask.widgets import get_template
 
-from .compatibility import PYPY, WINDOWS
+from .compatibility import WINDOWS
 from .metrics import time
 
 try:
@@ -67,11 +67,9 @@ no_default = "__no_default__"
 
 
 def _initialize_mp_context():
-    if WINDOWS or PYPY:
-        return multiprocessing
-    else:
-        method = dask.config.get("distributed.worker.multiprocessing-method")
-        ctx = multiprocessing.get_context(method)
+    method = dask.config.get("distributed.worker.multiprocessing-method")
+    ctx = multiprocessing.get_context(method)
+    if method == "forkserver":
         # Makes the test suite much faster
         preload = ["distributed"]
         if "pkg_resources" in sys.modules:
@@ -87,7 +85,8 @@ def _initialize_mp_context():
             else:
                 preload.append(pkg)
         ctx.set_forkserver_preload(preload)
-        return ctx
+
+    return ctx
 
 
 mp_context = _initialize_mp_context()
@@ -334,11 +333,11 @@ def sync(loop, func, *args, callback_tim
 
     e = threading.Event()
     main_tid = threading.get_ident()
-    result = [None]
-    error = [False]
+    result = error = future = None  # set up non-locals
 
     @gen.coroutine
     def f():
+        nonlocal result, error, future
         try:
             if main_tid == threading.get_ident():
                 raise RuntimeError("sync() called from thread of running loop")
@@ -346,24 +345,37 @@ def sync(loop, func, *args, callback_tim
             future = func(*args, **kwargs)
             if callback_timeout is not None:
                 future = asyncio.wait_for(future, callback_timeout)
-            result[0] = yield future
+            future = asyncio.ensure_future(future)
+            result = yield future
         except Exception:
-            error[0] = sys.exc_info()
+            error = sys.exc_info()
         finally:
             e.set()
 
+    def cancel():
+        if future is not None:
+            future.cancel()
+
+    def wait(timeout):
+        try:
+            return e.wait(timeout)
+        except KeyboardInterrupt:
+            loop.add_callback(cancel)
+            raise
+
     loop.add_callback(f)
     if callback_timeout is not None:
-        if not e.wait(callback_timeout):
+        if not wait(callback_timeout):
             raise TimeoutError(f"timed out after {callback_timeout} s.")
     else:
         while not e.is_set():
-            e.wait(10)
-    if error[0]:
-        typ, exc, tb = error[0]
+            wait(10)
+
+    if error:
+        typ, exc, tb = error
         raise exc.with_traceback(tb)
     else:
-        return result[0]
+        return result
 
 
 class LoopRunner:
@@ -857,12 +869,12 @@ def read_block(f, offset, length, delimi
     """
     if delimiter:
         f.seek(offset)
-        seek_delimiter(f, delimiter, 2 ** 16)
+        seek_delimiter(f, delimiter, 2**16)
         start = f.tell()
         length -= start - offset
 
         f.seek(start + length)
-        seek_delimiter(f, delimiter, 2 ** 16)
+        seek_delimiter(f, delimiter, 2**16)
         end = f.tell()
 
         offset = start
@@ -1240,8 +1252,8 @@ def cli_keywords(d: dict, cls=None, cmd=
     cmd : string or object
         A string with the name of a module, or the module containing a
         click-generated command with a "main" function, or the function itself.
-        It may be used to parse a module's custom arguments (i.e., arguments that
-        are not part of Worker class), such as nprocs from dask-worker CLI or
+        It may be used to parse a module's custom arguments (that is, arguments that
+        are not part of Worker class), such as nworkers from dask-worker CLI or
         enable_nvlink from dask-cuda-worker CLI.
 
     Examples
diff -pruN 2022.01.0+ds.1-1/distributed/utils_test.py 2022.02.0+ds.1-1/distributed/utils_test.py
--- 2022.01.0+ds.1-1/distributed/utils_test.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/utils_test.py	2022-02-11 16:21:14.000000000 +0000
@@ -8,6 +8,7 @@ import inspect
 import io
 import logging
 import logging.config
+import multiprocessing
 import os
 import queue
 import re
@@ -19,7 +20,6 @@ import sys
 import tempfile
 import threading
 import uuid
-import warnings
 import weakref
 from collections import defaultdict
 from collections.abc import Callable
@@ -54,6 +54,7 @@ from . import system
 from . import versions as version_module
 from .client import Client, _global_clients, default_client
 from .comm import Comm
+from .comm.tcp import BaseTCPConnector
 from .compatibility import WINDOWS
 from .config import initialize_logging
 from .core import CommClosedError, ConnectionPool, Status, connect, rpc
@@ -76,7 +77,7 @@ from .utils import (
     reset_logger_locks,
     sync,
 )
-from .worker import RUNNING, Worker
+from .worker import Worker
 
 try:
     import dask.array  # register config
@@ -678,7 +679,7 @@ def cluster(
             for worker in workers:
                 worker["address"] = worker["queue"].get(timeout=5)
         except queue.Empty:
-            raise pytest.xfail.Exception("Worker failed to start in test")
+            pytest.xfail("Worker failed to start in test")
 
         saddr = scheduler_q.get()
 
@@ -882,7 +883,6 @@ def gen_cluster(
         ("127.0.0.1", 1),
         ("127.0.0.1", 2),
     ],
-    ncores: None = None,  # deprecated
     scheduler="127.0.0.1",
     timeout: float = _TEST_TIMEOUT,
     security: Security | dict[str, Any] | None = None,
@@ -895,7 +895,7 @@ def gen_cluster(
     config: dict[str, Any] = {},
     clean_kwargs: dict[str, Any] = {},
     allow_unclosed: bool = False,
-    cluster_dump_directory: str | Literal[False] = "test_timeout_dump",
+    cluster_dump_directory: str | Literal[False] = "test_cluster_dump",
 ) -> Callable[[Callable], Callable]:
     from distributed import Client
 
@@ -922,9 +922,6 @@ def gen_cluster(
         "timeout should always be set and it should be smaller than the global one from"
         "pytest-timeout"
     )
-    if ncores is not None:
-        warnings.warn("ncores= has moved to nthreads=", stacklevel=2)
-        nthreads = ncores
 
     scheduler_kwargs = merge(
         {"dashboard": False, "dashboard_address": ":0"}, scheduler_kwargs
@@ -979,15 +976,16 @@ def gen_cluster(
                                 **client_kwargs,
                             )
                             args = [c] + args
+
                         try:
                             coro = func(*args, *outer_args, **kwargs)
                             task = asyncio.create_task(coro)
-
                             coro2 = asyncio.wait_for(asyncio.shield(task), timeout)
                             result = await coro2
                             if s.validate:
                                 s.validate_state()
-                        except asyncio.TimeoutError as e:
+
+                        except asyncio.TimeoutError:
                             assert task
                             buffer = io.StringIO()
                             # This stack indicates where the coro/test is suspended
@@ -1004,9 +1002,31 @@ def gen_cluster(
                             task.cancel()
                             while not task.cancelled():
                                 await asyncio.sleep(0.01)
+
+                            # Remove as much of the traceback as possible; it's
+                            # uninteresting boilerplate from utils_test and asyncio and
+                            # not from the code being tested.
                             raise TimeoutError(
-                                f"Test timeout after {timeout}s.\n{buffer.getvalue()}"
-                            ) from e
+                                f"Test timeout after {timeout}s.\n"
+                                "========== Test stack trace starts here ==========\n"
+                                f"{buffer.getvalue()}"
+                            ) from None
+
+                        except pytest.xfail.Exception:
+                            raise
+
+                        except Exception:
+                            if cluster_dump_directory and not has_pytestmark(
+                                test_func, "xfail"
+                            ):
+                                await dump_cluster_state(
+                                    s,
+                                    ws,
+                                    output_dir=cluster_dump_directory,
+                                    func_name=func.__name__,
+                                )
+                            raise
+
                         finally:
                             if client and c.status not in ("closing", "closed"):
                                 await c._close(fast=s.status == Status.closed)
@@ -1124,7 +1144,7 @@ def raises(func, exc=Exception):
         return True
 
 
-def terminate_process(proc):
+def _terminate_process(proc):
     if proc.poll() is None:
         if sys.platform.startswith("win"):
             proc.send_signal(signal.CTRL_BREAK_EVENT)
@@ -1163,7 +1183,7 @@ def popen(args, **kwargs):
 
     finally:
         try:
-            terminate_process(proc)
+            _terminate_process(proc)
         finally:
             # XXX Also dump stdout if return code != 0 ?
             out, err = proc.communicate()
@@ -1175,23 +1195,6 @@ def popen(args, **kwargs):
                 print(out.decode())
 
 
-def wait_for_port(address, timeout=5):
-    assert isinstance(address, tuple)
-    deadline = time() + timeout
-
-    while True:
-        timeout = deadline - time()
-        if timeout < 0:
-            raise RuntimeError(f"Failed to connect to {address}")
-        try:
-            sock = socket.create_connection(address, timeout=timeout)
-        except OSError:
-            pass
-        else:
-            sock.close()
-            break
-
-
 def wait_for(predicate, timeout, fail_func=None, period=0.001):
     deadline = time() + timeout
     while not predicate():
@@ -1242,7 +1245,6 @@ if has_ipv6():
     def requires_ipv6(test_func):
         return test_func
 
-
 else:
     requires_ipv6 = pytest.mark.skip("ipv6 required")
 
@@ -1556,6 +1558,9 @@ def save_sys_modules():
 @contextmanager
 def check_thread_leak():
     """Context manager to ensure we haven't leaked any threads"""
+    # "TCP-Executor" threads are never stopped once they are started
+    BaseTCPConnector.warmup()
+
     active_threads_start = threading.enumerate()
 
     yield
@@ -1566,15 +1571,8 @@ def check_thread_leak():
             thread
             for thread in threading.enumerate()
             if thread not in active_threads_start
-            and "Threaded" not in thread.name
-            and "watch message" not in thread.name
-            and "TCP-Executor" not in thread.name
-            # TODO: Make sure profile thread is cleaned up
-            # and remove the line below
-            and "Profile" not in thread.name
-            # asyncio default executor thread pool is not shut down until loop
-            # is shut down
-            and "asyncio_" not in thread.name
+            # FIXME this looks like a genuine leak that needs fixing
+            and "watch message queue" not in thread.name
         ]
         if not bad_threads:
             break
@@ -1589,24 +1587,67 @@ def check_thread_leak():
             assert False, (bad_thread, call_stacks)
 
 
-@contextmanager
-def check_process_leak(check=True):
-    for proc in mp_context.active_children():
+def wait_active_children(timeout: float) -> list[multiprocessing.Process]:
+    """Wait until timeout for mp_context.active_children() to terminate.
+    Return list of active subprocesses after the timeout expired.
+    """
+    t0 = time()
+    while True:
+        # Do not sample the subprocesses once at the beginning with
+        # `for proc in mp_context.active_children: ...`, assume instead that new
+        # children processes may be spawned before the timeout expires.
+        children = mp_context.active_children()
+        if not children:
+            return []
+        join_timeout = timeout - time() + t0
+        if join_timeout <= 0:
+            return children
+        children[0].join(timeout=join_timeout)
+
+
+def term_or_kill_active_children(timeout: float) -> None:
+    """Send SIGTERM to mp_context.active_children(), wait up to 3 seconds for processes
+    to die, then send SIGKILL to the survivors
+    """
+    children = mp_context.active_children()
+    for proc in children:
         proc.terminate()
 
-    yield
+    children = wait_active_children(timeout=timeout)
+    for proc in children:
+        proc.kill()
+
+    children = wait_active_children(timeout=30)
+    if children:  # pragma: nocover
+        logger.warning("Leaked unkillable children processes: %s", children)
+        # It should be impossible to ignore SIGKILL on Linux/MacOSX
+        assert WINDOWS
 
-    if check:
-        for i in range(200):
-            if not set(mp_context.active_children()):
-                break
-            else:
-                sleep(0.2)
-        else:
-            assert not mp_context.active_children()
 
-    for proc in mp_context.active_children():
-        proc.terminate()
+@contextmanager
+def check_process_leak(
+    check: bool = True, check_timeout: float = 40, term_timeout: float = 3
+):
+    """Terminate any currently-running subprocesses at both the beginning and end of this context
+
+    Parameters
+    ----------
+    check : bool, optional
+        If True, raise AssertionError if any processes survive at the exit
+    check_timeout: float, optional
+        Wait up to these many seconds for subprocesses to terminate before failing
+    term_timeout: float, optional
+        After sending SIGTERM to a subprocess, wait up to these many seconds before
+        sending SIGKILL
+    """
+    term_or_kill_active_children(timeout=term_timeout)
+    try:
+        yield
+        if check:
+            children = wait_active_children(timeout=check_timeout)
+            assert not children, f"Test leaked subprocesses: {children}"
+    finally:
+        term_or_kill_active_children(timeout=term_timeout)
 
 
 @contextmanager
@@ -1635,7 +1676,7 @@ def check_instances():
     for w in Worker._instances:
         with suppress(RuntimeError):  # closed IOLoop
             w.loop.add_callback(w.close, report=False, executor_wait=False)
-            if w.status in RUNNING:
+            if w.status in Status.ANY_RUNNING:
                 w.loop.add_callback(w.close)
     Worker._instances.clear()
 
@@ -1892,3 +1933,14 @@ class BrokenComm(Comm):
 
     def write(self, msg, serializers=None, on_error=None):
         raise OSError()
+
+
+def has_pytestmark(test_func: Callable, name: str) -> bool:
+    """Return True if the test function is marked by the given @pytest.mark.<name>;
+    False otherwise.
+
+    FIXME doesn't work with individually marked parameters inside
+          @pytest.mark.parametrize
+    """
+    marks = getattr(test_func, "pytestmark", [])
+    return any(mark.name == name for mark in marks)
diff -pruN 2022.01.0+ds.1-1/distributed/variable.py 2022.02.0+ds.1-1/distributed/variable.py
--- 2022.01.0+ds.1-1/distributed/variable.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/variable.py	2022-02-11 16:21:14.000000000 +0000
@@ -9,6 +9,7 @@ from tlz import merge
 from dask.utils import parse_timedelta, stringify
 
 from .client import Client, Future
+from .metrics import time
 from .utils import TimeoutError, log_errors
 from .worker import get_client, get_worker
 
@@ -74,10 +75,10 @@ class VariableExtension:
                 self.waiting_conditions[name].notify_all()
 
     async def get(self, comm=None, name=None, client=None, timeout=None):
-        start = self.scheduler.loop.time()
+        start = time()
         while name not in self.variables:
             if timeout is not None:
-                left = timeout - (self.scheduler.loop.time() - start)
+                left = timeout - (time() - start)
             else:
                 left = None
             if left and left < 0:
diff -pruN 2022.01.0+ds.1-1/distributed/worker.py 2022.02.0+ds.1-1/distributed/worker.py
--- 2022.01.0+ds.1-1/distributed/worker.py	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/distributed/worker.py	2022-02-11 16:21:14.000000000 +0000
@@ -13,7 +13,14 @@ import threading
 import warnings
 import weakref
 from collections import defaultdict, deque, namedtuple
-from collections.abc import Callable, Collection, Iterable, Mapping, MutableMapping
+from collections.abc import (
+    Callable,
+    Collection,
+    Iterable,
+    Iterator,
+    Mapping,
+    MutableMapping,
+)
 from concurrent.futures import Executor
 from contextlib import suppress
 from datetime import timedelta
@@ -67,6 +74,7 @@ from .proctitle import setproctitle
 from .protocol import pickle, to_serialize
 from .pubsub import PubSubWorkerExtension
 from .security import Security
+from .shuffle import ShuffleWorkerExtension
 from .sizeof import safe_sizeof as sizeof
 from .threadpoolexecutor import ThreadPoolExecutor
 from .threadpoolexecutor import secede as tpe_secede
@@ -110,12 +118,8 @@ PROCESSING = {
     "resumed",
 }
 READY = {"ready", "constrained"}
-FETCH_INTENDED = {"missing", "fetch", "flight", "cancelled", "resumed"}
-
-# Worker.status subsets
-RUNNING = {Status.running, Status.paused, Status.closing_gracefully}
 
-DEFAULT_EXTENSIONS: list[type] = [PubSubWorkerExtension]
+DEFAULT_EXTENSIONS: list[type] = [PubSubWorkerExtension, ShuffleWorkerExtension]
 
 DEFAULT_METRICS: dict[str, Callable[[Worker], Any]] = {}
 
@@ -191,6 +195,8 @@ class TaskState:
 
     """
 
+    priority: tuple[int, ...] | None
+
     def __init__(self, key, runspec=None):
         assert key is not None
         self.key = key
@@ -265,6 +271,54 @@ class TaskState:
         )
 
 
+class UniqueTaskHeap(Collection):
+    """A heap of TaskState objects ordered by TaskState.priority
+    Ties are broken by string comparison of the key. Keys are guaranteed to be
+    unique. Iterating over this object returns the elements in priority order.
+    """
+
+    def __init__(self, collection: Collection[TaskState] = ()):
+        self._known = {ts.key for ts in collection}
+        self._heap = [(ts.priority, ts.key, ts) for ts in collection]
+        heapq.heapify(self._heap)
+
+    def push(self, ts: TaskState) -> None:
+        """Add a new TaskState instance to the heap. If the key is already
+        known, no object is added.
+
+        Note: This does not update the priority / heap order in case priority
+        changes.
+        """
+        assert isinstance(ts, TaskState)
+        if ts.key not in self._known:
+            heapq.heappush(self._heap, (ts.priority, ts.key, ts))
+            self._known.add(ts.key)
+
+    def pop(self) -> TaskState:
+        """Pop the task with highest priority from the heap."""
+        _, key, ts = heapq.heappop(self._heap)
+        self._known.remove(key)
+        return ts
+
+    def peek(self) -> TaskState:
+        """Get the highest priority TaskState without removing it from the heap"""
+        return self._heap[0][2]
+
+    def __contains__(self, x: object) -> bool:
+        if isinstance(x, TaskState):
+            x = x.key
+        return x in self._known
+
+    def __iter__(self) -> Iterator[TaskState]:
+        return (ts for _, _, ts in sorted(self._heap))
+
+    def __len__(self) -> int:
+        return len(self._known)
+
+    def __repr__(self) -> str:
+        return f"<{type(self).__name__}: {len(self)} items>"
+
+
 class Worker(ServerNode):
     """Worker node in a Dask distributed cluster
 
@@ -340,8 +394,8 @@ class Worker(ServerNode):
     * **data.disk:** ``{key: object}``:
         Dictionary mapping keys to actual values stored on disk. Only
         available if condition for **data** being a zict.Buffer is met.
-    * **data_needed**: deque(keys)
-        The keys which still require data in order to execute, arranged in a deque
+    * **data_needed**: UniqueTaskHeap
+        The tasks which still require data in order to execute, prioritized as a heap
     * **ready**: [keys]
         Keys that are ready to run.  Stored in a LIFO stack
     * **constrained**: [keys]
@@ -356,8 +410,8 @@ class Worker(ServerNode):
         long-running clients.
     * **has_what**: ``{worker: {deps}}``
         The data that we care about that we think a worker has
-    * **pending_data_per_worker**: ``{worker: [dep]}``
-        The data on each worker that we still want, prioritized as a deque
+    * **pending_data_per_worker**: ``{worker: UniqueTaskHeap}``
+        The data on each worker that we still want, prioritized as a heap
     * **in_flight_tasks**: ``int``
         A count of the number of tasks that are coming to us in current
         peer-to-peer connections
@@ -455,10 +509,10 @@ class Worker(ServerNode):
     tasks: dict[str, TaskState]
     waiting_for_data_count: int
     has_what: defaultdict[str, set[str]]  # {worker address: {ts.key, ...}
-    pending_data_per_worker: defaultdict[str, deque[str]]
+    pending_data_per_worker: defaultdict[str, UniqueTaskHeap]
     nanny: Nanny | None
     _lock: threading.Lock
-    data_needed: list[tuple[int, str]]  # heap[(ts.priority, ts.key)]
+    data_needed: UniqueTaskHeap
     in_flight_workers: dict[str, set[str]]  # {worker address: {ts.key, ...}}
     total_out_connections: int
     total_in_connections: int
@@ -532,6 +586,7 @@ class Worker(ServerNode):
     name: Any
     scheduler_delay: float
     stream_comms: dict[str, BatchedSend]
+    heartbeat_interval: float
     heartbeat_active: bool
     _ipython_kernel: Any | None = None
     services: dict[str, Any] = {}
@@ -553,7 +608,6 @@ class Worker(ServerNode):
         scheduler_port: int | None = None,
         *,
         scheduler_file: str | None = None,
-        ncores: None = None,  # Deprecated, use nthreads instead
         nthreads: int | None = None,
         loop: IOLoop | None = None,
         local_dir: None = None,  # Deprecated, use local_directory instead
@@ -570,6 +624,7 @@ class Worker(ServerNode):
         preload_argv: list[str] | list[list[str]] | None = None,
         security: Security | dict[str, Any] | None = None,
         contact_address: str | None = None,
+        heartbeat_interval: Any = "1s",
         memory_monitor_interval: Any = "200ms",
         memory_target_fraction: float | Literal[False] | None = None,
         memory_spill_fraction: float | Literal[False] | None = None,
@@ -607,11 +662,11 @@ class Worker(ServerNode):
         self.tasks = {}
         self.waiting_for_data_count = 0
         self.has_what = defaultdict(set)
-        self.pending_data_per_worker = defaultdict(deque)
+        self.pending_data_per_worker = defaultdict(UniqueTaskHeap)
         self.nanny = nanny
         self._lock = threading.Lock()
 
-        self.data_needed = []
+        self.data_needed = UniqueTaskHeap()
 
         self.in_flight_workers = {}
         self.total_out_connections = dask.config.get(
@@ -662,6 +717,7 @@ class Worker(ServerNode):
             ("resumed", "released"): self.transition_generic_released,
             ("resumed", "waiting"): self.transition_resumed_waiting,
             ("resumed", "fetch"): self.transition_resumed_fetch,
+            ("resumed", "missing"): self.transition_resumed_missing,
             ("constrained", "executing"): self.transition_constrained_executing,
             ("constrained", "released"): self.transition_generic_released,
             ("error", "released"): self.transition_generic_released,
@@ -671,11 +727,11 @@ class Worker(ServerNode):
             ("executing", "released"): self.transition_executing_released,
             ("executing", "rescheduled"): self.transition_executing_rescheduled,
             ("fetch", "flight"): self.transition_fetch_flight,
-            ("fetch", "missing"): self.transition_fetch_missing,
             ("fetch", "released"): self.transition_generic_released,
             ("flight", "error"): self.transition_flight_error,
             ("flight", "fetch"): self.transition_flight_fetch,
             ("flight", "memory"): self.transition_flight_memory,
+            ("flight", "missing"): self.transition_flight_missing,
             ("flight", "released"): self.transition_flight_released,
             ("long-running", "error"): self.transition_generic_error,
             ("long-running", "memory"): self.transition_long_running_memory,
@@ -781,10 +837,6 @@ class Worker(ServerNode):
         self._interface = interface
         self._protocol = protocol
 
-        if ncores is not None:
-            warnings.warn("the ncores= parameter has moved to nthreads=")
-            nthreads = ncores
-
         self.nthreads = nthreads or CPU_COUNT
         if resources is None:
             resources = dask.config.get("distributed.worker.resources", None)
@@ -833,13 +885,15 @@ class Worker(ServerNode):
         ):
             from .spill import SpillBuffer
 
-            self.data = SpillBuffer(
-                os.path.join(self.local_directory, "storage"),
-                target=int(
+            if self.memory_target_fraction:
+                target = int(
                     self.memory_limit
                     * (self.memory_target_fraction or self.memory_spill_fraction)
                 )
-                or sys.maxsize,
+            else:
+                target = sys.maxsize
+            self.data = SpillBuffer(
+                os.path.join(self.local_directory, "storage"), target=target
             )
         else:
             self.data = {}
@@ -928,6 +982,7 @@ class Worker(ServerNode):
             "free-keys": self.handle_free_keys,
             "remove-replicas": self.handle_remove_replicas,
             "steal-request": self.handle_steal_request,
+            "worker-status-change": self.handle_worker_status_change,
         }
 
         super().__init__(
@@ -945,8 +1000,10 @@ class Worker(ServerNode):
             "worker": self,
         }
 
-        pc = PeriodicCallback(self.heartbeat, 1000)
+        self.heartbeat_interval = parse_timedelta(heartbeat_interval, default="ms")
+        pc = PeriodicCallback(self.heartbeat, self.heartbeat_interval * 1000)
         self.periodic_callbacks["heartbeat"] = pc
+
         pc = PeriodicCallback(
             lambda: self.batched_stream.send({"op": "keep-alive"}), 60000
         )
@@ -1012,17 +1069,15 @@ class Worker(ServerNode):
     ##################
 
     def __repr__(self):
-        return "<%s: %r, %s, %s, stored: %d, running: %d/%d, ready: %d, comm: %d, waiting: %d>" % (
-            self.__class__.__name__,
-            self.address,
-            self.name,
-            self.status,
-            len(self.data),
-            self.executing_count,
-            self.nthreads,
-            len(self.ready),
-            self.in_flight_tasks,
-            self.waiting_for_data_count,
+        name = f", name: {self.name}" if self.name != self.address else ""
+        return (
+            f"<{self.__class__.__name__} {self.address!r}{name}, "
+            f"status: {self.status.name}, "
+            f"stored: {len(self.data)}, "
+            f"running: {self.executing_count}/{self.nthreads}, "
+            f"ready: {len(self.ready)}, "
+            f"comm: {self.in_flight_tasks}, "
+            f"waiting: {self.waiting_for_data_count}>"
         )
 
     @property
@@ -1127,7 +1182,6 @@ class Worker(ServerNode):
             "id": self.id,
             "scheduler": self.scheduler.address,
             "nthreads": self.nthreads,
-            "ncores": self.nthreads,  # backwards compatibility
             "memory_limit": self.memory_limit,
         }
 
@@ -1154,6 +1208,10 @@ class Worker(ServerNode):
             "status": self.status,
             "ready": self.ready,
             "constrained": self.constrained,
+            "data_needed": list(self.data_needed),
+            "pending_data_per_worker": {
+                w: list(v) for w, v in self.pending_data_per_worker.items()
+            },
             "long_running": self.long_running,
             "executing_count": self.executing_count,
             "in_flight_tasks": self.in_flight_tasks,
@@ -1288,9 +1346,9 @@ class Worker(ServerNode):
                 # If running, wait up to 0.5s and then re-register self.
                 # Otherwise just exit.
                 start = time()
-                while self.status in RUNNING and time() < start + 0.5:
+                while self.status in Status.ANY_RUNNING and time() < start + 0.5:
                     await asyncio.sleep(0.01)
-                if self.status in RUNNING:
+                if self.status in Status.ANY_RUNNING:
                     await self._register_with_scheduler()
                 return
 
@@ -1322,7 +1380,7 @@ class Worker(ServerNode):
             logger.exception(e)
             raise
         finally:
-            if self.reconnect and self.status in RUNNING:
+            if self.reconnect and self.status in Status.ANY_RUNNING:
                 logger.info("Connection to scheduler broken.  Reconnecting...")
                 self.loop.add_callback(self.heartbeat)
             else:
@@ -1504,9 +1562,23 @@ class Worker(ServerNode):
 
         setproctitle("dask-worker [%s]" % self.address)
 
-        await asyncio.gather(
-            *(self.plugin_add(plugin=plugin) for plugin in self._pending_plugins)
+        plugins_msgs = await asyncio.gather(
+            *(
+                self.plugin_add(plugin=plugin, catch_errors=False)
+                for plugin in self._pending_plugins
+            ),
+            return_exceptions=True,
         )
+        plugins_exceptions = [msg for msg in plugins_msgs if isinstance(msg, Exception)]
+        if len(plugins_exceptions) >= 1:
+            if len(plugins_exceptions) > 1:
+                logger.error(
+                    "Multiple plugin exceptions raised. All exceptions will be logged, the first is raised."
+                )
+                for exc in plugins_exceptions:
+                    logger.error(repr(exc))
+            raise plugins_exceptions[0]
+
         self._pending_plugins = ()
 
         await self._register_with_scheduler()
@@ -1533,7 +1605,7 @@ class Worker(ServerNode):
                 logger.info("Stopping worker at %s", self.address)
             except ValueError:  # address not available if already closed
                 logger.info("Stopping worker")
-            if self.status not in RUNNING:
+            if self.status not in Status.ANY_RUNNING:
                 logger.info("Closed worker has not yet started: %s", self.status)
             self.status = Status.closing
 
@@ -1561,7 +1633,9 @@ class Worker(ServerNode):
                 # If this worker is the last one alive, clean up the worker
                 # initialized clients
                 if not any(
-                    w for w in Worker._instances if w != self and w.status in RUNNING
+                    w
+                    for w in Worker._instances
+                    if w != self and w.status in Status.ANY_RUNNING
                 ):
                     for c in Worker._initialized_clients:
                         # Regardless of what the client was initialized with
@@ -1642,8 +1716,12 @@ class Worker(ServerNode):
             restart = self.lifetime_restart
 
         logger.info("Closing worker gracefully: %s", self.address)
-        self.status = Status.closing_gracefully
-        await self.scheduler.retire_workers(workers=[self.address], remove=False)
+        # Wait for all tasks to leave the worker and don't accept any new ones.
+        # Scheduler.retire_workers will set the status to closing_gracefully and push it
+        # back to this worker.
+        await self.scheduler.retire_workers(
+            workers=[self.address], close_workers=False, remove=False
+        )
         await self.close(safe=True, nanny=not restart)
 
     async def terminate(self, comm=None, report=True, **kwargs):
@@ -1897,7 +1975,6 @@ class Worker(ServerNode):
         ts = self.tasks.get(key)
         if ts and ts.state in READY | {"waiting"}:
             self.log.append((key, "cancel-compute", reason, time()))
-            ts.scheduler_holds_ref = False
             # All possible dependents of TS should not be in state Processing on
             # scheduler side and therefore should not be assigned to a worker,
             # yet.
@@ -1926,7 +2003,7 @@ class Worker(ServerNode):
             if ts.state != "memory":
                 recommendations[ts] = "fetch"
 
-        self.update_who_has(who_has, stimulus_id=stimulus_id)
+        self.update_who_has(who_has)
         self.transitions(recommendations, stimulus_id=stimulus_id)
 
     def ensure_task_exists(
@@ -2022,11 +2099,10 @@ class Worker(ServerNode):
 
         for msg in scheduler_msgs:
             self.batched_stream.send(msg)
+
+        self.update_who_has(who_has)
         self.transitions(recommendations, stimulus_id=stimulus_id)
 
-        # We received new info, that's great but not related to the compute-task
-        # instruction
-        self.update_who_has(who_has, stimulus_id=stimulus_id)
         if nbytes is not None:
             for key, value in nbytes.items():
                 self.tasks[key].nbytes = value
@@ -2039,7 +2115,7 @@ class Worker(ServerNode):
         self._missing_dep_flight.discard(ts)
         ts.state = "fetch"
         ts.done = False
-        heapq.heappush(self.data_needed, (ts.priority, ts.key))
+        self.data_needed.push(ts)
         return {}, []
 
     def transition_missing_released(self, ts, *, stimulus_id):
@@ -2050,10 +2126,11 @@ class Worker(ServerNode):
         assert ts.key in self.tasks
         return recommendations, smsgs
 
-    def transition_fetch_missing(self, ts, *, stimulus_id):
-        # handle_missing will append to self.data_needed if new workers are found
+    def transition_flight_missing(self, ts, *, stimulus_id):
+        assert ts.done
         ts.state = "missing"
         self._missing_dep_flight.add(ts)
+        ts.done = False
         return {}, []
 
     def transition_released_fetch(self, ts, *, stimulus_id):
@@ -2061,10 +2138,10 @@ class Worker(ServerNode):
             assert ts.state == "released"
             assert ts.priority is not None
         for w in ts.who_has:
-            self.pending_data_per_worker[w].append(ts.key)
+            self.pending_data_per_worker[w].push(ts)
         ts.state = "fetch"
         ts.done = False
-        heapq.heappush(self.data_needed, (ts.priority, ts.key))
+        self.data_needed.push(ts)
         return {}, []
 
     def transition_generic_released(self, ts, *, stimulus_id):
@@ -2110,7 +2187,6 @@ class Worker(ServerNode):
         if self.validate:
             assert ts.state == "fetch"
             assert ts.who_has
-            assert ts.key not in self.data_needed
 
         ts.done = False
         ts.state = "flight"
@@ -2231,7 +2307,7 @@ class Worker(ServerNode):
             stimulus_id=stimulus_id,
         )
 
-    def transition_resumed_fetch(self, ts, *, stimulus_id):
+    def _transition_from_resumed(self, ts, finish, *, stimulus_id):
         """`resumed` is an intermediate degenerate state which splits further up
         into two states depending on what the last signal / next state is
         intended to be. There are only two viable choices depending on whether
@@ -2251,33 +2327,37 @@ class Worker(ServerNode):
 
         See also `transition_resumed_waiting`
         """
-        # if the next state is already intended to be fetch or if the
-        # coro/thread is still running (ts.done==False), this is a noop
-        if ts._next == "fetch":
-            return {}, []
-        ts._next = "fetch"
-
+        recs, smsgs = {}, []
         if ts.done:
             next_state = ts._next
-            recs, smsgs = self.transition_generic_released(ts, stimulus_id=stimulus_id)
+            # if the next state is already intended to be waiting or if the
+            # coro/thread is still running (ts.done==False), this is a noop
+            if ts._next != finish:
+                recs, smsgs = self.transition_generic_released(
+                    ts, stimulus_id=stimulus_id
+                )
             recs[ts] = next_state
-            return recs, smsgs
-        return {}, []
+        else:
+            ts._next = finish
+        return recs, smsgs
 
-    def transition_resumed_waiting(self, ts, *, stimulus_id):
+    def transition_resumed_fetch(self, ts, *, stimulus_id):
         """
-        See transition_resumed_fetch
+        See Worker._transition_from_resumed
         """
-        if ts._next == "waiting":
-            return {}, []
-        ts._next = "waiting"
+        return self._transition_from_resumed(ts, "fetch", stimulus_id=stimulus_id)
 
-        if ts.done:
-            next_state = ts._next
-            recs, smsgs = self.transition_generic_released(ts, stimulus_id=stimulus_id)
-            recs[ts] = next_state
-            return recs, smsgs
-        return {}, []
+    def transition_resumed_missing(self, ts, *, stimulus_id):
+        """
+        See Worker._transition_from_resumed
+        """
+        return self._transition_from_resumed(ts, "missing", stimulus_id=stimulus_id)
+
+    def transition_resumed_waiting(self, ts, *, stimulus_id):
+        """
+        See Worker._transition_from_resumed
+        """
+        return self._transition_from_resumed(ts, "waiting", stimulus_id=stimulus_id)
 
     def transition_cancelled_fetch(self, ts, *, stimulus_id):
         if ts.done:
@@ -2409,11 +2489,17 @@ class Worker(ServerNode):
         # we can reset the task and transition to fetch again. If it is not yet
         # finished, this should be a no-op
         if ts.done:
-            recommendations, smsgs = self.transition_generic_released(
-                ts, stimulus_id=stimulus_id
-            )
-            recommendations[ts] = "fetch"
-            return recommendations, smsgs
+            recommendations = {}
+            ts.state = "fetch"
+            ts.coming_from = None
+            ts.done = False
+            if not ts.who_has:
+                recommendations[ts] = "missing"
+            else:
+                self.data_needed.push(ts)
+                for w in ts.who_has:
+                    self.pending_data_per_worker[w].push(ts)
+            return recommendations, []
         else:
             return {}, []
 
@@ -2676,24 +2762,15 @@ class Worker(ServerNode):
                 self.total_out_connections,
             )
 
-            _, key = heapq.heappop(self.data_needed)
-
-            try:
-                ts = self.tasks[key]
-            except KeyError:
-                continue
+            ts = self.data_needed.pop()
 
             if ts.state != "fetch":
                 continue
 
-            if not ts.who_has:
-                self.transition(ts, "missing", stimulus_id=stimulus_id)
-                continue
-
             workers = [w for w in ts.who_has if w not in self.in_flight_workers]
             if not workers:
                 assert ts.priority is not None
-                skipped_worker_in_flight.append((ts.priority, ts.key))
+                skipped_worker_in_flight.append(ts)
                 continue
 
             host = get_address_host(self.address)
@@ -2724,7 +2801,7 @@ class Worker(ServerNode):
             )
 
         for el in skipped_worker_in_flight:
-            heapq.heappush(self.data_needed, el)
+            self.data_needed.push(el)
 
     def _get_task_finished_msg(self, ts):
         if ts.key not in self.data and ts.key not in self.actors:
@@ -2818,13 +2895,12 @@ class Worker(ServerNode):
         L = self.pending_data_per_worker[worker]
 
         while L:
-            d = L.popleft()
-            ts = self.tasks.get(d)
-            if ts is None or ts.state != "fetch":
+            ts = L.pop()
+            if ts.state != "fetch":
                 continue
             if total_bytes + ts.get_nbytes() > self.target_message_size:
                 break
-            deps.add(d)
+            deps.add(ts.key)
             total_bytes += ts.get_nbytes()
 
         return deps, total_bytes
@@ -2834,7 +2910,7 @@ class Worker(ServerNode):
         warnings.warn(
             "The attribute `Worker.total_comm_bytes` has been renamed to `comm_threshold_bytes`. "
             "Future versions will only support the new name.",
-            DeprecationWarning,
+            FutureWarning,
         )
         return self.comm_threshold_bytes
 
@@ -2957,7 +3033,7 @@ class Worker(ServerNode):
         total_nbytes : int
             Total number of bytes for all the dependencies in to_gather combined
         """
-        if self.status not in RUNNING:
+        if self.status not in Status.ANY_RUNNING:  # type: ignore
             return
 
         recommendations: dict[TaskState, str | tuple] = {}
@@ -3061,7 +3137,7 @@ class Worker(ServerNode):
                         self.batched_stream.send(
                             {"op": "missing-data", "errant_worker": worker, "key": d}
                         )
-                        recommendations[ts] = "fetch"
+                        recommendations[ts] = "fetch" if ts.who_has else "missing"
                 del data, response
                 self.transitions(recommendations, stimulus_id=stimulus_id)
                 self.ensure_computing()
@@ -3071,9 +3147,9 @@ class Worker(ServerNode):
                 else:
                     # Exponential backoff to avoid hammering scheduler/worker
                     self.repetitively_busy += 1
-                    await asyncio.sleep(0.100 * 1.5 ** self.repetitively_busy)
+                    await asyncio.sleep(0.100 * 1.5**self.repetitively_busy)
 
-                    await self.query_who_has(*to_gather_keys, stimulus_id=stimulus_id)
+                    await self.query_who_has(*to_gather_keys)
 
                 self.ensure_communicating()
 
@@ -3092,7 +3168,12 @@ class Worker(ServerNode):
                     keys=[ts.key for ts in self._missing_dep_flight],
                 )
                 who_has = {k: v for k, v in who_has.items() if v}
-                self.update_who_has(who_has, stimulus_id=stimulus_id)
+                self.update_who_has(who_has)
+                recommendations = {}
+                for ts in self._missing_dep_flight:
+                    if ts.who_has:
+                        recommendations[ts] = "fetch"
+                self.transitions(recommendations, stimulus_id=stimulus_id)
 
             finally:
                 # This is quite arbitrary but the heartbeat has scaling implemented
@@ -3102,24 +3183,20 @@ class Worker(ServerNode):
                 self.ensure_communicating()
                 self.ensure_computing()
 
-    async def query_who_has(
-        self, *deps: str, stimulus_id: str
-    ) -> dict[str, Collection[str]]:
+    async def query_who_has(self, *deps: str) -> dict[str, Collection[str]]:
         with log_errors():
             who_has = await retry_operation(self.scheduler.who_has, keys=deps)
-            self.update_who_has(who_has, stimulus_id=stimulus_id)
+            self.update_who_has(who_has)
             return who_has
 
-    def update_who_has(
-        self, who_has: dict[str, Collection[str]], *, stimulus_id: str
-    ) -> None:
+    def update_who_has(self, who_has: dict[str, Collection[str]]) -> None:
         try:
-            recommendations = {}
             for dep, workers in who_has.items():
                 if not workers:
                     continue
 
                 if dep in self.tasks:
+                    dep_ts = self.tasks[dep]
                     if self.address in workers and self.tasks[dep].state != "memory":
                         logger.debug(
                             "Scheduler claims worker %s holds data for task %s which is not true.",
@@ -3128,18 +3205,11 @@ class Worker(ServerNode):
                         )
                         # Do not mutate the input dict. That's rude
                         workers = set(workers) - {self.address}
-                    dep_ts = self.tasks[dep]
-                    if dep_ts.state in FETCH_INTENDED:
-                        dep_ts.who_has.update(workers)
-
-                        if dep_ts.state == "missing":
-                            recommendations[dep_ts] = "fetch"
+                    dep_ts.who_has.update(workers)
 
-                        for worker in workers:
-                            self.has_what[worker].add(dep)
-                            self.pending_data_per_worker[worker].append(dep_ts.key)
-
-            self.transitions(recommendations, stimulus_id=stimulus_id)
+                    for worker in workers:
+                        self.has_what[worker].add(dep)
+                        self.pending_data_per_worker[worker].push(dep_ts)
         except Exception as e:
             logger.exception(e)
             if LOG_PDB:
@@ -3169,6 +3239,22 @@ class Worker(ServerNode):
             # `transition_constrained_executing`
             self.transition(ts, "released", stimulus_id=stimulus_id)
 
+    def handle_worker_status_change(self, status: str) -> None:
+        new_status = Status.lookup[status]  # type: ignore
+
+        if (
+            new_status == Status.closing_gracefully
+            and self._status not in Status.ANY_RUNNING  # type: ignore
+        ):
+            logger.error(
+                "Invalid Worker.status transition: %s -> %s", self._status, new_status
+            )
+            # Reiterate the current status to the scheduler to restore sync
+            self._send_worker_status_change()
+        else:
+            # Update status and send confirmation to the Scheduler (see status.setter)
+            self.status = new_status
+
     def release_key(
         self,
         key: str,
@@ -3248,7 +3334,7 @@ class Worker(ServerNode):
     def run_coroutine(self, comm, function, args=(), kwargs=None, wait=True):
         return run(self, comm, function=function, args=args, kwargs=kwargs, wait=wait)
 
-    async def plugin_add(self, comm=None, plugin=None, name=None):
+    async def plugin_add(self, comm=None, plugin=None, name=None, catch_errors=True):
         with log_errors(pdb=False):
             if isinstance(plugin, bytes):
                 plugin = pickle.loads(plugin)
@@ -3270,6 +3356,8 @@ class Worker(ServerNode):
                     if isawaitable(result):
                         result = await result
                 except Exception as e:
+                    if not catch_errors:
+                        raise
                     msg = error_message(e)
                     return msg
 
@@ -3374,7 +3462,7 @@ class Worker(ServerNode):
             raise
 
     def ensure_computing(self):
-        if self.status == Status.paused:
+        if self.status in (Status.paused, Status.closing_gracefully):
             return
         try:
             stimulus_id = f"ensure-computing-{time()}"
@@ -3411,7 +3499,7 @@ class Worker(ServerNode):
             raise
 
     async def execute(self, key, *, stimulus_id):
-        if self.status in (Status.closing, Status.closed, Status.closing_gracefully):
+        if self.status in {Status.closing, Status.closed, Status.closing_gracefully}:
             return
         if key not in self.tasks:
             return
@@ -3793,11 +3881,11 @@ class Worker(ServerNode):
             if hasattr(plugin, method_name):
                 if method_name == "release_key":
                     warnings.warn(
-                        "The `WorkerPlugin.release_key` hook is depreacted and will be "
+                        "The `WorkerPlugin.release_key` hook is deprecated and will be "
                         "removed in a future version. A similar event can now be "
                         "caught by filtering for a `finish=='released'` event in the "
                         "`WorkerPlugin.transition` hook.",
-                        DeprecationWarning,
+                        FutureWarning,
                     )
 
                 try:
@@ -3856,16 +3944,19 @@ class Worker(ServerNode):
         assert ts.key not in self.data
         assert self.address not in ts.who_has
         assert not ts.done
+        assert ts in self.data_needed
+        assert ts.who_has
 
         for w in ts.who_has:
             assert ts.key in self.has_what[w]
+            assert ts in self.pending_data_per_worker[w]
 
     def validate_task_missing(self, ts):
         assert ts.key not in self.data
         assert not ts.who_has
         assert not ts.done
         assert not any(ts.key in has_what for has_what in self.has_what.values())
-        assert ts.key in self._missing_dep_flight
+        assert ts in self._missing_dep_flight
 
     def validate_task_cancelled(self, ts):
         assert ts.key not in self.data
@@ -3885,7 +3976,6 @@ class Worker(ServerNode):
         assert ts not in self._in_flight_tasks
         assert ts not in self._missing_dep_flight
         assert ts not in self._missing_dep_flight
-        assert not ts.who_has
         assert not any(ts.key in has_what for has_what in self.has_what.values())
         assert not ts.waiting_for_data
         assert not ts.done
@@ -3928,7 +4018,7 @@ class Worker(ServerNode):
             ) from e
 
     def validate_state(self):
-        if self.status not in RUNNING:
+        if self.status not in Status.ANY_RUNNING:
             return
         try:
             assert self.executing_count >= 0
@@ -3955,13 +4045,9 @@ class Worker(ServerNode):
                     assert (
                         ts_wait.state
                         in READY | {"executing", "flight", "fetch", "missing"}
-                        or ts_wait.key in self._missing_dep_flight
+                        or ts_wait in self._missing_dep_flight
                         or ts_wait.who_has.issubset(self.in_flight_workers)
                     ), (ts, ts_wait, self.story(ts), self.story(ts_wait))
-                if ts.state == "memory":
-                    assert isinstance(ts.nbytes, int)
-                    assert not ts.waiting_for_data
-                    assert ts.key in self.data or ts.key in self.actors
             assert self.waiting_for_data_count == waiting_for_data_count
             for worker, keys in self.has_what.items():
                 for k in keys:
@@ -4095,7 +4181,11 @@ def get_worker() -> Worker:
         return thread_state.execution_state["worker"]
     except AttributeError:
         try:
-            return first(w for w in Worker._instances if w.status in RUNNING)
+            return first(
+                w
+                for w in Worker._instances
+                if w.status in Status.ANY_RUNNING  # type: ignore
+            )
         except StopIteration:
             raise ValueError("No workers found")
 
diff -pruN 2022.01.0+ds.1-1/docs/source/active_memory_manager.rst 2022.02.0+ds.1-1/docs/source/active_memory_manager.rst
--- 2022.01.0+ds.1-1/docs/source/active_memory_manager.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/active_memory_manager.rst	2022-02-11 16:21:14.000000000 +0000
@@ -257,3 +257,6 @@ API reference
    :undoc-members:
 
 .. autoclass:: distributed.active_memory_manager.ReduceReplicas
+
+.. autoclass:: distributed.active_memory_manager.RetireWorker
+   :members:
diff -pruN 2022.01.0+ds.1-1/docs/source/changelog.rst 2022.02.0+ds.1-1/docs/source/changelog.rst
--- 2022.01.0+ds.1-1/docs/source/changelog.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/changelog.rst	2022-02-11 16:21:14.000000000 +0000
@@ -1,6 +1,118 @@
 Changelog
 =========
 
+.. _v2022.02.0:
+
+2022.02.0
+---------
+
+Released on February 11, 2022
+
+Enhancements
+^^^^^^^^^^^^
+- Update ``client.scheduler_info`` in ``wait_for_workers`` (:pr:`5785`) `Matthew Rocklin`_
+- Increase robustness to ``TimeoutError`` during connect (:pr:`5096`) `Florian Jetter`_
+- Respect ``KeyboardInterrupt`` in ``sync`` (:pr:`5758`) `Thomas Grainger`_
+- Add workflow / recipe to generate Dask/distributed pre-releases (:pr:`5636`) `Charles Blackmon-Luca`_
+- Review ``Scheduler`` / ``Worker`` display repr (:pr:`5746`) `crusaderky`_
+- AMM: Graceful Worker Retirement (:pr:`5381`) `crusaderky`_
+- AMM: tentatively stabilize flaky tests around worker pause (:pr:`5735`) `crusaderky`_
+- AMM: speed up and stabilize test_memory (:pr:`5737`) `crusaderky`_
+- Defer pandas import on worker in P2P shuffle (:pr:`5695`) `Gabe Joseph`_
+
+Bug Fixes
+^^^^^^^^^
+- Fix for ``distributed.worker.memory.target=False`` and ``spill=0.7`` (:pr:`5788`) `crusaderky`_
+- Transition ``flight`` to ``missing`` if no ``who_has`` (:pr:`5653`) `Florian Jetter`_
+
+Deprecations
+^^^^^^^^^^^^
+- Remove deprecated ``ncores`` (:pr:`5780`) `crusaderky`_
+- Deprecate registering plugins by class (:pr:`5699`) `Thomas Grainger`_
+- Deprecate ``--nprocs`` option for ``dask-worker`` CLI (:pr:`5641`) `Bryan W. Weber`_
+
+
+Documentation
+^^^^^^^^^^^^^
+- Fix imbalanced backticks (:pr:`5784`) `Matthias Bussonnier`_
+
+Maintenance
+^^^^^^^^^^^
+- xfail ``test_worker_reconnects_mid_compute`` (:pr:`5797`) `crusaderky`_
+- Fix linting CI build (:pr:`5794`) `James Bourbeau`_
+- Update ``pre-commit`` versions (:pr:`5782`) `James Bourbeau`_
+- Reactivate ``pytest_resourceleaks`` (:pr:`5771`) `crusaderky`_
+- Set test assumption for ``test_client_timeout`` (:pr:`5790`) `Florian Jetter`_
+- Remove client timeout from ``test_ucx_config_w_env_var`` (:pr:`5792`) `Florian Jetter`_
+- Remove ``test_failed_worker_without_warning`` (:pr:`5789`) `Florian Jetter`_
+- Fix longitudinal report (:pr:`5783`) `Ian Rose`_
+- Fix flaky ``test_robust_to_bad_sizeof_estimates`` (:pr:`5753`) `crusaderky`_
+- Revert "Pin coverage to 6.2 (:pr:`5716`)" (:pr:`5770`) `Thomas Grainger`_
+- Trigger test runs periodically to increases failure statistics (:pr:`5769`) `Florian Jetter`_
+- More fault tolerant test report (:pr:`5732`) `Ian Rose`_
+- Pin ``pillow<9`` to work around ``torch`` incompatability (:pr:`5755`) `Thomas Grainger`_
+- Overhaul ``check_process_leak`` (:pr:`5739`) `crusaderky`_
+- Fix flaky ``test_exit_callback test`` (:pr:`5713`) `Jim Crist-Harif`_
+- Generate tests summary (:pr:`5710`) `crusaderky`_
+- Upload different architectured pre-releases separately (:pr:`5741`) `Charles Blackmon-Luca`_
+- Ignore non-test directories (:pr:`5720`) `Gabe Joseph`_
+- Bump gpuCI ``PYTHON_VER`` to 3.9 (:pr:`5738`) `Charles Blackmon-Luca`_
+- Regression: threads noted down before they start (:pr:`5796`) `crusaderky`_
+
+
+.. _v2022.01.1:
+
+2022.01.1
+---------
+
+Released on January 28, 2022
+
+New Features
+^^^^^^^^^^^^
+- P2P shuffle skeleton (:pr:`5520`) `Gabe Joseph`_
+
+Enhancements
+^^^^^^^^^^^^
+- Fix ``<Task pending name='...' coro=<Client._handle_report()>`` (:pr:`5721`) `Thomas Grainger`_
+- Add ``distributed.client.security-loader`` config (:pr:`5693`) `Jim Crist-Harif`_
+- Avoid ``Client._handle_report`` cancelling itself on ``Client._close`` (:pr:`5672`) `Thomas Grainger`_
+- Paused workers shouldn't steal tasks (:pr:`5665`) `crusaderky`_
+- Add option for timestamps from output of ``Node.get_logs`` (:pr:`4932`) `Charles Blackmon-Luca`_
+- Don't use ``time.time()`` or ``IOLoop.time()`` (:pr:`5661`) `crusaderky`_
+
+Bug Fixes
+^^^^^^^^^
+- Raise plugin exceptions on ``Worker.start()`` (:pr:`4298`) `Peter Andreas Entschev`_
+
+Documentation
+^^^^^^^^^^^^^
+- Fixing docstrings (:pr:`5696`) `Julia Signell`_
+- Fix typo in ``Client.run`` docstring (:pr:`5687`) `Thomas Grainger`_
+- Update ``client.py`` docstrings (:pr:`5670`) `Tim Harris`_
+
+Maintenance
+^^^^^^^^^^^
+- Skip shuffle tests if ``pandas`` / ``dask.dataframe`` not installed (:pr:`5730`) `James Bourbeau`_
+- Improve test coverage (:pr:`5655`) `Sarah Charlotte Johnson`_
+- Test report improvements (:pr:`5714`) `Ian Rose`_
+- P2P shuffle: ignore row order in tests (:pr:`5706`) `Gabe Joseph`_
+- Fix flaky ``test_no_reconnect[--no-nanny]`` (:pr:`5686`) `Thomas Grainger`_
+- Pin coverage to 6.2 (:pr:`5716`) `Thomas Grainger`_
+- Check for new name of timeouts artifact and be more fault tolerant (:pr:`5707`) `Ian Rose`_
+- Revisit rebalance unit tests (:pr:`5697`) `crusaderky`_
+- Update comment in ``rearrange_by_column_p2p`` (:pr:`5701`) `James Bourbeau`_
+- Update gpuCI ``RAPIDS_VER`` to ``22.04`` (:pr:`5676`)
+- Fix groupby test after meta requirements got stricter in Dask PR#8563 (:pr:`5694`) `Julia Signell`_
+- Fix flaky ``test_close_gracefully`` and ``test_lifetime`` (:pr:`5677`) `crusaderky`_
+- Fix flaky ``test_workspace_concurrency`` (:pr:`5690`) `crusaderky`_
+- Fix flaky ``test_shuffle_extension.py::test_get_partition`` (:pr:`5689`) `Gabe Joseph`_
+- Fix flaky ``test_dump_cluster_unresponsive_remote_worker`` (:pr:`5679`) `crusaderky`_
+- Dump cluster state on all test failures (:pr:`5674`) `crusaderky`_
+- Update license format (:pr:`5652`) `James Bourbeau`_
+- Fix flaky ``test_drop_with_paused_workers_with_running_tasks_3_4`` (:pr:`5673`) `crusaderky`_
+- Do not raise an exception if the GitHub token cannot be found (:pr:`5668`) `Florian Jetter`_
+
+
 .. _v2022.01.0:
 
 2022.01.0
@@ -3152,3 +3264,7 @@ significantly without many new features.
 .. _`Aneesh Nema`: https://github.com/aneeshnema
 .. _`Deepyaman Datta`: https://github.com/deepyaman
 .. _`Garry O'Donnell`: https://github.com/garryod
+.. _`Thomas Grainger`: https://github.com/graingert
+.. _`Sarah Charlotte Johnson`: https://github.com/scharlottej13
+.. _`Tim Harris`: https://github.com/tharris72
+.. _`Bryan W. Weber`: https://github.com/bryanwweber
diff -pruN 2022.01.0+ds.1-1/docs/source/efficiency.rst 2022.02.0+ds.1-1/docs/source/efficiency.rst
--- 2022.01.0+ds.1-1/docs/source/efficiency.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/efficiency.rst	2022-02-11 16:21:14.000000000 +0000
@@ -85,7 +85,7 @@ worker processes on each node, each usin
 cluster you may want to use the options to the ``dask-worker`` executable as
 follows::
 
-   $ dask-worker ip:port --nprocs 8 --nthreads 1
+   $ dask-worker ip:port --nworkers 8 --nthreads 1
 
 Note that if you're primarily using NumPy, Pandas, SciPy, Scikit Learn, Numba,
 or other C/Fortran/LLVM/Cython-accelerated libraries then this is not an issue
diff -pruN 2022.01.0+ds.1-1/docs/source/examples/word-count.rst 2022.02.0+ds.1-1/docs/source/examples/word-count.rst
--- 2022.01.0+ds.1-1/docs/source/examples/word-count.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/examples/word-count.rst	2022-02-11 16:21:14.000000000 +0000
@@ -168,8 +168,8 @@ words, the results will exist on each wo
 and 16 GB RAM.
 
 Note that because the previous computation is bound by the GIL in Python, we
-can speed it up by starting the ``distributed`` workers with the ``--nprocs 4``
-option.
+can speed it up by starting the ``distributed`` workers with the
+``--nworkers 4`` option.
 
 To sum the word counts for all of the text files, we need to gather some
 information from the ``distributed`` workers. To reduce the amount of data
diff -pruN 2022.01.0+ds.1-1/docs/source/resources.rst 2022.02.0+ds.1-1/docs/source/resources.rst
--- 2022.01.0+ds.1-1/docs/source/resources.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/resources.rst	2022-02-11 16:21:14.000000000 +0000
@@ -103,18 +103,18 @@ This might be the easiest solution if yo
 Resources are applied separately to each worker process
 -------------------------------------------------------
 
-If you are using ``dask-worker --nprocs <nprocs>`` the resource will be applied
-separately to each of the ``nprocs`` worker processes. Suppose you have 2 GPUs
+If you are using ``dask-worker --nworkers <nworkers>`` the resource will be applied
+separately to each of the ``nworkers`` worker processes. Suppose you have 2 GPUs
 on your machine, if you want to use two worker processes, you have 1 GPU per
 worker process so you need to do something like this::
 
-   dask-worker scheduler:8786 --nprocs 2 --resources "GPU=1"
+   dask-worker scheduler:8786 --nworkers 2 --resources "GPU=1"
 
 Here is an example that illustrates how to use resources to ensure each task is
 run inside a separate process, which is useful to execute non thread-safe tasks
 or tasks that uses multithreading internally::
 
-   dask-worker scheduler:8786 --nprocs 3 --nthreads 2 --resources "process=1"
+   dask-worker scheduler:8786 --nworkers 3 --nthreads 2 --resources "process=1"
 
 With the code below, there will be at most 3 tasks running concurrently and
 each task will run in a separate process:
diff -pruN 2022.01.0+ds.1-1/docs/source/worker.rst 2022.02.0+ds.1-1/docs/source/worker.rst
--- 2022.01.0+ds.1-1/docs/source/worker.rst	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/docs/source/worker.rst	2022-02-11 16:21:14.000000000 +0000
@@ -68,7 +68,7 @@ If your computations are mostly Python c
 is advisable to run ``dask-worker`` processes with many processes and one
 thread per process::
 
-   $ dask-worker scheduler:8786 --nprocs 8 --nthreads 1
+   $ dask-worker scheduler:8786 --nworkers 8 --nthreads 1
 
 This will launch 8 worker processes each of which has its own
 ThreadPoolExecutor of size 1.
@@ -440,4 +440,4 @@ API Documentation
 .. _malloc_trim: https://man7.org/linux/man-pages/man3/malloc_trim.3.html
 .. _brk: https://www.man7.org/linux/man-pages/man2/brk.2.html
 .. _jemalloc: http://jemalloc.net
-.. _homebrew: https://brew.sh/
\ No newline at end of file
+.. _homebrew: https://brew.sh/
diff -pruN 2022.01.0+ds.1-1/.github/workflows/conda.yml 2022.02.0+ds.1-1/.github/workflows/conda.yml
--- 2022.01.0+ds.1-1/.github/workflows/conda.yml	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/.github/workflows/conda.yml	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,87 @@
+name: Conda build
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+# When this workflow is queued, automatically cancel any previous running
+# or pending jobs from the same branch
+concurrency:
+  group: conda-${{ github.head_ref }}
+  cancel-in-progress: true
+
+# Required shell entrypoint to have properly activated conda environments
+defaults:
+  run:
+    shell: bash -l {0}
+
+jobs:
+  conda:
+    name: Build (and upload)
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v2
+        with:
+          fetch-depth: 0
+      - name: Set up Python
+        uses: conda-incubator/setup-miniconda@v2
+        with:
+          miniforge-variant: Mambaforge
+          use-mamba: true
+          python-version: 3.8
+      - name: Install dependencies
+        run: |
+          mamba install boa conda-verify
+
+          which python
+          pip list
+          mamba list
+      - name: Build conda packages
+        run: |
+          # suffix for pre-release package versions
+          export VERSION_SUFFIX=a`date +%y%m%d`
+
+          # conda search for the latest dask-core pre-release
+          arr=($(conda search --override-channels -c dask/label/dev dask-core | tail -n 1))
+
+          # extract dask-core pre-release version / build
+          export DASK_VERSION=${arr[1]}
+          export DASK_BUILD=${arr[2]}
+
+          # distributed pre-release build
+          conda mambabuild continuous_integration/recipes/distributed \
+                           --channel dask/label/dev \
+                           --no-anaconda-upload \
+                           --output-folder build
+
+          # dask pre-release build
+          conda mambabuild continuous_integration/recipes/dask \
+                           --channel dask/label/dev \
+                           --no-anaconda-upload \
+                           --output-folder build
+      - name: Upload conda packages
+        if: |
+          github.event_name == 'push'
+          && github.ref == 'refs/heads/main'
+          && github.repository == 'dask/distributed'
+        env:
+          ANACONDA_API_TOKEN: ${{ secrets.DASK_CONDA_TOKEN }}
+        run: |
+          # convert distributed to other architectures
+          cd build && conda convert linux-64/*.tar.bz2 -p osx-64 \
+                                                       -p osx-arm64 \
+                                                       -p linux-ppc64le \
+                                                       -p linux-aarch64 \
+                                                       -p win-64
+
+          # install anaconda for upload
+          mamba install anaconda-client
+
+          anaconda upload --label dev noarch/*.tar.bz2
+          anaconda upload --label dev linux-64/*.tar.bz2
+          anaconda upload --label dev linux-aarch64/*.tar.bz2
+          anaconda upload --label dev linux-ppc64le/*.tar.bz2
+          anaconda upload --label dev osx-64/*.tar.bz2
+          anaconda upload --label dev osx-arm64/*.tar.bz2
+          anaconda upload --label dev win-64/*.tar.bz2
diff -pruN 2022.01.0+ds.1-1/.github/workflows/publish-test-results.yaml 2022.02.0+ds.1-1/.github/workflows/publish-test-results.yaml
--- 2022.01.0+ds.1-1/.github/workflows/publish-test-results.yaml	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/.github/workflows/publish-test-results.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,45 @@
+# Copied from https://github.com/EnricoMi/publish-unit-test-result-action/blob/v1.23/README.md#support-fork-repositories-and-dependabot-branches
+# Warning: changes to this workflow will NOT be picked up until they land in the main branch!
+# See https://docs.github.com/en/actions/using-workflows/events-that-trigger-workflows#workflow_run
+
+name: Publish test results
+
+on:
+  workflow_run:
+    workflows: [Tests]
+    types: [completed]
+
+jobs:
+  publish-test-results:
+    name: Publish test results
+    runs-on: ubuntu-latest
+    if: github.event.workflow_run.conclusion != 'skipped'
+
+    # Needed to post comments on the PR
+    permissions:
+      checks: write
+      pull-requests: write
+
+    steps:
+      - name: Download and extract artifacts
+        env:
+          GITHUB_TOKEN: ${{secrets.GITHUB_TOKEN}}
+        run: |
+          mkdir artifacts && cd artifacts
+
+          artifacts_url=${{ github.event.workflow_run.artifacts_url }}
+
+          gh api "$artifacts_url" -q '.artifacts[] | [.name, .archive_download_url] | @tsv' | while read artifact
+          do
+            IFS=$'\t' read name url <<< "$artifact"
+            gh api $url > "$name.zip"
+            unzip -d "$name" "$name.zip"
+          done
+
+      - name: Publish Unit Test Results
+        uses: EnricoMi/publish-unit-test-result-action@v1
+        with:
+          commit: ${{ github.event.workflow_run.head_sha }}
+          event_file: artifacts/Event File/event.json
+          event_name: ${{ github.event.workflow_run.event }}
+          files: artifacts/**/*.xml
diff -pruN 2022.01.0+ds.1-1/.github/workflows/ssh_debug.yaml 2022.02.0+ds.1-1/.github/workflows/ssh_debug.yaml
--- 2022.01.0+ds.1-1/.github/workflows/ssh_debug.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/.github/workflows/ssh_debug.yaml	1970-01-01 00:00:00.000000000 +0000
@@ -1,47 +0,0 @@
-name: Debug passwordless `ssh localhost`
-
-on: []
-# on: [pull_request]  # Uncomment to enable
-
-jobs:
-  test:
-    runs-on: ${{ matrix.os }}
-    strategy:
-      fail-fast: false
-      matrix:
-        os:
-          - ubuntu-latest
-          - macos-latest
-          # - windows-latest  # FIXME https://github.com/dask/distributed/issues/4509
-
-    steps:
-      - name: Setup SSH
-        shell: bash -l {0}
-        run: bash continuous_integration/scripts/setup_ssh.sh
-
-      - name: Dump SSH config
-        shell: bash -l {0}
-        run: |
-          ls -ld ~ ~/.ssh ~/.ssh/*
-          for f in ~/.ssh/* /etc/ssh/sshd_config; do
-            echo ==================================
-            echo $f
-            echo ==================================
-            cat $f
-          done
-
-      - name: Test SSH vs. localhost
-        shell: bash -l {0}
-        run: ssh -vvv localhost 'echo hello world'
-
-      - name: Test SSH vs. 127.0.0.1
-        shell: bash -l {0}
-        run: ssh -vvv 127.0.0.1 'echo hello world'
-
-      - name: Test SSH vs. hostname
-        shell: bash -l {0}
-        run: ssh -vvv $(hostname) 'echo hello world'
-
-      - name: Debug with tmate on failure
-        if: ${{ failure() }}
-        uses: mxschmitt/action-tmate@v3
diff -pruN 2022.01.0+ds.1-1/.github/workflows/ssh_debug.yaml.donotrun 2022.02.0+ds.1-1/.github/workflows/ssh_debug.yaml.donotrun
--- 2022.01.0+ds.1-1/.github/workflows/ssh_debug.yaml.donotrun	1970-01-01 00:00:00.000000000 +0000
+++ 2022.02.0+ds.1-1/.github/workflows/ssh_debug.yaml.donotrun	2022-02-11 16:21:14.000000000 +0000
@@ -0,0 +1,46 @@
+name: Debug passwordless `ssh localhost`
+
+on: [push, pull_request]
+
+jobs:
+  test:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os:
+          - ubuntu-latest
+          - macos-latest
+          # - windows-latest  # FIXME https://github.com/dask/distributed/issues/4509
+
+    steps:
+      - name: Setup SSH
+        shell: bash -l {0}
+        run: bash continuous_integration/scripts/setup_ssh.sh
+
+      - name: Dump SSH config
+        shell: bash -l {0}
+        run: |
+          ls -ld ~ ~/.ssh ~/.ssh/*
+          for f in ~/.ssh/* /etc/ssh/sshd_config; do
+            echo ==================================
+            echo $f
+            echo ==================================
+            cat $f
+          done
+
+      - name: Test SSH vs. localhost
+        shell: bash -l {0}
+        run: ssh -vvv localhost 'echo hello world'
+
+      - name: Test SSH vs. 127.0.0.1
+        shell: bash -l {0}
+        run: ssh -vvv 127.0.0.1 'echo hello world'
+
+      - name: Test SSH vs. hostname
+        shell: bash -l {0}
+        run: ssh -vvv $(hostname) 'echo hello world'
+
+      - name: Debug with tmate on failure
+        if: ${{ failure() }}
+        uses: mxschmitt/action-tmate@v3
diff -pruN 2022.01.0+ds.1-1/.github/workflows/tests.yaml 2022.02.0+ds.1-1/.github/workflows/tests.yaml
--- 2022.01.0+ds.1-1/.github/workflows/tests.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/.github/workflows/tests.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -1,6 +1,10 @@
 name: Tests
 
-on: [push, pull_request]
+on:
+  push:
+  pull_request:
+  schedule:
+    - cron: "0 6,18 * * *"
 
 # When this workflow is queued, automatically cancel any previous running
 # or pending jobs from the same branch
@@ -120,8 +124,11 @@ jobs:
           fi
           source continuous_integration/scripts/set_ulimit.sh
 
-          pytest distributed -m "not avoid_ci and ${{ matrix.partition }}" --runslow \
-            --junitxml reports/pytest.xml -o junit_suite_name=$TEST_ID --cov=distributed --cov-report=xml
+          pytest distributed \
+            -m "not avoid_ci and ${{ matrix.partition }}" --runslow \
+            --leaks=fds,processes,threads \
+            --junitxml reports/pytest.xml -o junit_suite_name=$TEST_ID \
+            --cov=distributed --cov-report=xml
 
       # - name: Debug with tmate on failure
       #   if: ${{ failure() }}
@@ -130,7 +137,7 @@ jobs:
       - name: Coverage
         uses: codecov/codecov-action@v1
 
-      - name: Upload test artifacts
+      - name: Upload test results
         # ensure this runs even if pytest fails
         if: >
           always() &&
@@ -139,13 +146,24 @@ jobs:
         with:
           name: ${{ env.TEST_ID }}
           path: reports
-      - name: Upload timeout reports
+      - name: Upload gen_cluster dumps for failed tests
         # ensure this runs even if pytest fails
         if: >
           always() &&
           (steps.run_tests.outcome == 'success' || steps.run_tests.outcome == 'failure')
         uses: actions/upload-artifact@v2
         with:
-          name: ${{ env.TEST_ID }}-timeouts
-          path: test_timeout_dump
+          name: ${{ env.TEST_ID }}_cluster_dumps
+          path: test_cluster_dump
           if-no-files-found: ignore
+
+  # Publish an artifact for the event; used by publish-test-results.yaml
+  event_file:
+    name: "Event File"
+    runs-on: ubuntu-latest
+    steps:
+      - name: Upload
+        uses: actions/upload-artifact@v2
+        with:
+          name: Event File
+          path: ${{ github.event_path }}
diff -pruN 2022.01.0+ds.1-1/.gitignore 2022.02.0+ds.1-1/.gitignore
--- 2022.01.0+ds.1-1/.gitignore	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/.gitignore	2022-02-11 16:21:14.000000000 +0000
@@ -30,6 +30,7 @@ dask-worker-space/
 tags
 .ipynb_checkpoints
 .venv/
+.mypy_cache/
 
-# Test timeouts will dump the cluster state in here
-test_timeout_dump/
+# Test failures will dump the cluster state in here
+test_cluster_dump/
diff -pruN 2022.01.0+ds.1-1/LICENSE.txt 2022.02.0+ds.1-1/LICENSE.txt
--- 2022.01.0+ds.1-1/LICENSE.txt	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/LICENSE.txt	2022-02-11 16:21:14.000000000 +0000
@@ -1,28 +1,29 @@
-﻿Copyright (c) 2015-2017, Anaconda, Inc. and contributors
+﻿BSD 3-Clause License
+
+Copyright (c) 2015, Anaconda, Inc. and contributors
 All rights reserved.
 
-Redistribution and use in source and binary forms, with or without modification,
-are permitted provided that the following conditions are met:
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+* Redistributions of source code must retain the above copyright notice, this
+  list of conditions and the following disclaimer.
 
-Redistributions of source code must retain the above copyright notice,
-this list of conditions and the following disclaimer.
+* Redistributions in binary form must reproduce the above copyright notice,
+  this list of conditions and the following disclaimer in the documentation
+  and/or other materials provided with the distribution.
 
-Redistributions in binary form must reproduce the above copyright notice,
-this list of conditions and the following disclaimer in the documentation
-and/or other materials provided with the distribution.
-
-Neither the name of Anaconda nor the names of any contributors
-may be used to endorse or promote products derived from this software
-without specific prior written permission.
+* Neither the name of the copyright holder nor the names of its
+  contributors may be used to endorse or promote products derived from
+  this software without specific prior written permission.
 
 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
-THE POSSIBILITY OF SUCH DAMAGE.
+IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file
diff -pruN 2022.01.0+ds.1-1/.pre-commit-config.yaml 2022.02.0+ds.1-1/.pre-commit-config.yaml
--- 2022.01.0+ds.1-1/.pre-commit-config.yaml	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/.pre-commit-config.yaml	2022-02-11 16:21:14.000000000 +0000
@@ -1,6 +1,6 @@
 repos:
   -   repo: https://github.com/pycqa/isort
-      rev: 5.9.3
+      rev: 5.10.1
       hooks:
       - id: isort
         language_version: python3
@@ -13,7 +13,7 @@ repos:
         args:
           - --py37-plus
   -   repo: https://github.com/psf/black
-      rev: 21.9b0
+      rev: 22.1.0
       hooks:
       - id: black
         language_version: python3
@@ -26,7 +26,7 @@ repos:
       - id: flake8
         language_version: python3
   - repo: https://github.com/pre-commit/mirrors-mypy
-    rev: v0.910
+    rev: v0.931
     hooks:
       - id: mypy
         additional_dependencies:
diff -pruN 2022.01.0+ds.1-1/requirements.txt 2022.02.0+ds.1-1/requirements.txt
--- 2022.01.0+ds.1-1/requirements.txt	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/requirements.txt	2022-02-11 16:21:14.000000000 +0000
@@ -1,6 +1,6 @@
 click >= 6.6
 cloudpickle >= 1.5.0
-dask == 2022.01.0
+dask == 2022.02.0
 jinja2
 msgpack >= 0.6.0
 packaging >= 20.0
diff -pruN 2022.01.0+ds.1-1/setup.cfg 2022.02.0+ds.1-1/setup.cfg
--- 2022.01.0+ds.1-1/setup.cfg	2022-01-14 20:37:43.000000000 +0000
+++ 2022.02.0+ds.1-1/setup.cfg	2022-02-11 16:21:14.000000000 +0000
@@ -39,7 +39,7 @@ tag_prefix =
 parentdir_prefix = distributed-
 
 [tool:pytest]
-addopts = -v -rsxfE --durations=20 --color=yes
+addopts = -v -rsxfE --durations=20 --color=yes --ignore=continuous_integration --ignore=docs --ignore=.github
 filterwarnings =
     error:Since distributed.*:PendingDeprecationWarning
 minversion = 4
@@ -49,6 +49,7 @@ markers =
     avoid_ci: marks tests as flaky or broken on CI on all OSs
     ipython: marks tests as exercising IPython
     gpu: marks tests we want to run on GPUs
+    leaking: ignore leaked resources; see pytest_resourceleaks.py for usage
 
 # pytest-timeout settings
 # 'thread' kills off the whole test suite. 'signal' only kills the offending test.
