diff -pruN 7.2.2-5/buckifier/buckify_rocksdb.py 7.3.1-2/buckifier/buckify_rocksdb.py
--- 7.2.2-5/buckifier/buckify_rocksdb.py	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/buckifier/buckify_rocksdb.py	2022-06-08 21:08:16.000000000 +0000
@@ -145,7 +145,12 @@ def generate_targets(repo_path, deps_map
         # always add range_tree, it's only excluded on ppc64, which we don't use internally
         src_mk["RANGE_TREE_SOURCES"] +
         src_mk["TOOL_LIB_SOURCES"],
-        deps=["//folly/container:f14_hash"])
+        deps=[
+            "//folly/container:f14_hash",
+            "//folly/experimental/coro:blocking_wait",
+            "//folly/experimental/coro:collect",
+            "//folly/experimental/coro:coroutine",
+            "//folly/experimental/coro:task"])
     # rocksdb_whole_archive_lib
     TARGETS.add_library(
         "rocksdb_whole_archive_lib",
@@ -153,7 +158,12 @@ def generate_targets(repo_path, deps_map
         # always add range_tree, it's only excluded on ppc64, which we don't use internally
         src_mk["RANGE_TREE_SOURCES"] +
         src_mk["TOOL_LIB_SOURCES"],
-        deps=["//folly/container:f14_hash"],
+        deps=[
+            "//folly/container:f14_hash",
+            "//folly/experimental/coro:blocking_wait",
+            "//folly/experimental/coro:collect",
+            "//folly/experimental/coro:coroutine",
+            "//folly/experimental/coro:task"],
         headers=None,
         extra_external_deps="",
         link_whole=True)
diff -pruN 7.2.2-5/build_tools/build_detect_platform 7.3.1-2/build_tools/build_detect_platform
--- 7.2.2-5/build_tools/build_detect_platform	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/build_tools/build_detect_platform	2022-06-08 21:08:16.000000000 +0000
@@ -469,7 +469,7 @@ EOF
 
     if ! test $ROCKSDB_DISABLE_MEMKIND; then
         # Test whether memkind library is installed
-        $CXX $PLATFORM_CXXFLAGS $COMMON_FLAGS -lmemkind -x c++ - -o test.o 2>/dev/null  <<EOF
+        $CXX $PLATFORM_CXXFLAGS $LDFLAGS -x c++ - -o test.o -lmemkind 2>/dev/null  <<EOF
           #include <memkind.h>
           int main() {
             memkind_malloc(MEMKIND_DAX_KMEM, 1024);
@@ -662,13 +662,13 @@ else
   fi
 
   if [[ "${PLATFORM}" == "OS_MACOSX" ]]; then
-    # For portability compile for macOS 10.12 (2016) or newer
-    COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.12"
-    PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.12"
+    # For portability compile for macOS 10.13 (2017) or newer
+    COMMON_FLAGS="$COMMON_FLAGS -mmacosx-version-min=10.13"
+    PLATFORM_LDFLAGS="$PLATFORM_LDFLAGS -mmacosx-version-min=10.13"
     # -mmacosx-version-min must come first here.
-    PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.12 $PLATFORM_SHARED_LDFLAGS"
-    PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.12"
-    JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.12"
+    PLATFORM_SHARED_LDFLAGS="-mmacosx-version-min=10.13 $PLATFORM_SHARED_LDFLAGS"
+    PLATFORM_CMAKE_FLAGS="-DCMAKE_OSX_DEPLOYMENT_TARGET=10.13"
+    JAVA_STATIC_DEPS_COMMON_FLAGS="-mmacosx-version-min=10.13"
     JAVA_STATIC_DEPS_LDFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
     JAVA_STATIC_DEPS_CCFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
     JAVA_STATIC_DEPS_CXXFLAGS="$JAVA_STATIC_DEPS_COMMON_FLAGS"
@@ -800,9 +800,6 @@ if [ "$?" = 0 ]; then
   COMMON_FLAGS="$COMMON_FLAGS -DHAVE_UINT128_EXTENSION"
 fi
 
-# thread_local is part of C++11 and later (TODO: clean up this define)
-COMMON_FLAGS="$COMMON_FLAGS -DROCKSDB_SUPPORT_THREAD_LOCAL"
-
 if [ "$FBCODE_BUILD" != "true" -a "$PLATFORM" = OS_LINUX ]; then
   $CXX $COMMON_FLAGS $PLATFORM_SHARED_CFLAGS -x c++ -c - -o test_dl.o 2>/dev/null <<EOF
   void dummy_func() {}
diff -pruN 7.2.2-5/build_tools/dependencies_platform009.sh 7.3.1-2/build_tools/dependencies_platform009.sh
--- 7.2.2-5/build_tools/dependencies_platform009.sh	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/build_tools/dependencies_platform009.sh	2022-06-08 21:08:16.000000000 +0000
@@ -20,3 +20,7 @@ VALGRIND_BASE=/mnt/gvfs/third-party2/val
 LUA_BASE=/mnt/gvfs/third-party2/lua/162efd9561a3d21f6869f4814011e9cf1b3ff4dc/5.3.4/platform009/a6271c4
 BENCHMARK_BASE=/mnt/gvfs/third-party2/benchmark/30bf49ad6414325e17f3425b0edcb64239427ae3/1.6.1/platform009/7f3b187
 BOOST_BASE=/mnt/gvfs/third-party2/boost/201b7d74941e54b436dfa364a063aa6d2cd7de4c/1.69.0/platform009/8a7ffdf
+GLOG_BASE=/mnt/gvfs/third-party2/glog/32d751bd5673375b438158717ab6a57c1cc57e3d/0.3.2_fb/platform009/10a364d/
+FMT_BASE=/mnt/gvfs/third-party2/fmt/ce0c25f67165f4d2c22a29b8ef50f5600d7873ca/6.1.1/platform009/7f3b187/
+DBL_CONV_BASE=/mnt/gvfs/third-party2/double_conversion/109b3d9696d71f1048678cd7da1e22505470543d/20141126/platform009/7f3b187/
+LIBEVENT_BASE=/mnt/gvfs/third-party2/libevent/4a4d3a79a76c2439b6bd471bf3586b3481dde75e/1.4.14b_hphp/platform009/7f3b187/
diff -pruN 7.2.2-5/build_tools/fbcode_config_platform009.sh 7.3.1-2/build_tools/fbcode_config_platform009.sh
--- 7.2.2-5/build_tools/fbcode_config_platform009.sh	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/build_tools/fbcode_config_platform009.sh	2022-06-08 21:08:16.000000000 +0000
@@ -14,7 +14,7 @@ source "$BASEDIR/dependencies_platform00
 CFLAGS=""
 
 # libgcc
-LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/9.3.0"
+LIBGCC_INCLUDE="$LIBGCC_BASE/include/c++/9.3.0 -I $LIBGCC_BASE/include/c++/9.3.0/backward"
 LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
 
 # glibc
@@ -27,28 +27,38 @@ else
   MAYBE_PIC=_pic
 fi
 
-# snappy
-SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
-SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
-CFLAGS+=" -DSNAPPY"
-
-# location of zlib headers and libraries
-ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
-ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
-CFLAGS+=" -DZLIB"
-
-# location of bzip headers and libraries
-BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
-BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
-CFLAGS+=" -DBZIP2"
-
-LZ4_INCLUDE=" -I $LZ4_BASE/include/"
-LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
-CFLAGS+=" -DLZ4"
-
-ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
-ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
-CFLAGS+=" -DZSTD"
+if ! test $ROCKSDB_DISABLE_SNAPPY; then
+  # snappy
+  SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
+  SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
+  CFLAGS+=" -DSNAPPY"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZLIB; then
+  # location of zlib headers and libraries
+  ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
+  ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
+  CFLAGS+=" -DZLIB"
+fi
+
+if ! test $ROCKSDB_DISABLE_BZIP; then
+  # location of bzip headers and libraries
+  BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
+  BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
+  CFLAGS+=" -DBZIP2"
+fi
+
+if ! test $ROCKSDB_DISABLE_LZ4; then
+  LZ4_INCLUDE=" -I $LZ4_BASE/include/"
+  LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
+  CFLAGS+=" -DLZ4"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZSTD; then
+  ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
+  ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
+  CFLAGS+=" -DZSTD"
+fi
 
 # location of gflags headers and libraries
 GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@@ -60,6 +70,18 @@ BENCHMARK_LIBS=" $BENCHMARK_BASE/lib/lib
 
 BOOST_INCLUDE=" -I $BOOST_BASE/include/"
 
+GLOG_INCLUDE=" -I $GLOG_BASE/include/"
+GLOG_LIBS=" $GLOG_BASE/lib/libglog${MAYBE_PIC}.a"
+
+FMT_INCLUDE=" -I $FMT_BASE/include/"
+FMT_LIBS=" $FMT_BASE/lib/libfmt${MAYBE_PIC}.a"
+
+DBL_CONV_INCLUDE=" -I $DBL_CONV_BASE/include/"
+DBL_CONV_LIBS=" $DBL_CONV_BASE/lib/libdouble-conversion${MAYBE_PIC}.a"
+
+LIBEVENT_INCLUDE=" -I $LIBEVENT_BASE/include/"
+LIBEVENT_LIBS=" $LIBEVENT_BASE/lib/libevent${MAYBE_PIC}.a"
+
 # location of jemalloc
 JEMALLOC_INCLUDE=" -I $JEMALLOC_BASE/include/"
 JEMALLOC_LIB=" $JEMALLOC_BASE/lib/libjemalloc${MAYBE_PIC}.a"
@@ -91,7 +113,7 @@ BINUTILS="$BINUTILS_BASE/bin"
 AR="$BINUTILS/ar"
 AS="$BINUTILS/as"
 
-DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE"
+DEPS_INCLUDE="$SNAPPY_INCLUDE $ZLIB_INCLUDE $BZIP_INCLUDE $LZ4_INCLUDE $ZSTD_INCLUDE $GFLAGS_INCLUDE $NUMA_INCLUDE $TBB_INCLUDE $LIBURING_INCLUDE $BENCHMARK_INCLUDE $BOOST_INCLUDE $GLOG_INCLUDE $FMT_INCLUDE $DBL_CONV_INCLUDE $LIBEVENT_INCLUDE"
 
 STDLIBS="-L $GCC_BASE/lib64"
 
@@ -134,7 +156,7 @@ else
 fi
 
 CFLAGS+=" $DEPS_INCLUDE"
-CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
+CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
 CXXFLAGS+=" $CFLAGS"
 
 EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
diff -pruN 7.2.2-5/build_tools/fbcode_config_platform010.sh 7.3.1-2/build_tools/fbcode_config_platform010.sh
--- 7.2.2-5/build_tools/fbcode_config_platform010.sh	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/build_tools/fbcode_config_platform010.sh	2022-06-08 21:08:16.000000000 +0000
@@ -29,28 +29,38 @@ else
   MAYBE_PIC=_pic
 fi
 
-# snappy
-SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
-SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
-CFLAGS+=" -DSNAPPY"
-
-# location of zlib headers and libraries
-ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
-ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
-CFLAGS+=" -DZLIB"
-
-# location of bzip headers and libraries
-BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
-BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
-CFLAGS+=" -DBZIP2"
-
-LZ4_INCLUDE=" -I $LZ4_BASE/include/"
-LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
-CFLAGS+=" -DLZ4"
-
-ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
-ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
-CFLAGS+=" -DZSTD"
+if ! test $ROCKSDB_DISABLE_SNAPPY; then
+  # snappy
+  SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
+  SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy${MAYBE_PIC}.a"
+  CFLAGS+=" -DSNAPPY"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZLIB; then
+  # location of zlib headers and libraries
+  ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
+  ZLIB_LIBS=" $ZLIB_BASE/lib/libz${MAYBE_PIC}.a"
+  CFLAGS+=" -DZLIB"
+fi
+
+if ! test $ROCKSDB_DISABLE_BZIP; then
+  # location of bzip headers and libraries
+  BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
+  BZIP_LIBS=" $BZIP2_BASE/lib/libbz2${MAYBE_PIC}.a"
+  CFLAGS+=" -DBZIP2"
+fi
+
+if ! test $ROCKSDB_DISABLE_LZ4; then
+  LZ4_INCLUDE=" -I $LZ4_BASE/include/"
+  LZ4_LIBS=" $LZ4_BASE/lib/liblz4${MAYBE_PIC}.a"
+  CFLAGS+=" -DLZ4"
+fi
+
+if ! test $ROCKSDB_DISABLE_ZSTD; then
+  ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
+  ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd${MAYBE_PIC}.a"
+  CFLAGS+=" -DZSTD"
+fi
 
 # location of gflags headers and libraries
 GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@@ -108,10 +118,19 @@ if [ -z "$USE_CLANG" ]; then
   CXX="$GCC_BASE/bin/g++"
   AR="$GCC_BASE/bin/gcc-ar"
 
-  
-  CFLAGS+=" -B$BINUTILS"
+  CFLAGS+=" -B$BINUTILS -nostdinc -nostdlib"
+  CFLAGS+=" -I$GCC_BASE/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/install-tools/include"
+  CFLAGS+=" -isystem $GCC_BASE/lib/gcc/x86_64-redhat-linux-gnu/11.2.1/include-fixed/"
   CFLAGS+=" -isystem $LIBGCC_INCLUDE"
   CFLAGS+=" -isystem $GLIBC_INCLUDE"
+  CFLAGS+=" -I$GLIBC_INCLUDE"
+  CFLAGS+=" -I$LIBGCC_BASE/include"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/x86_64-facebook-linux/"
+  CFLAGS+=" -I$LIBGCC_BASE/include/c++/11.x/backward"
+  CFLAGS+=" -isystem $GLIBC_INCLUDE -I$GLIBC_INCLUDE"
   JEMALLOC=1
 else
   # clang
@@ -135,7 +154,7 @@ CFLAGS+=" -isystem $KERNEL_HEADERS_INCLU
 CFLAGS+=" -isystem $KERNEL_HEADERS_INCLUDE "
 
 CFLAGS+=" $DEPS_INCLUDE"
-CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
+CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42 -DROCKSDB_IOURING_PRESENT"
 CXXFLAGS+=" $CFLAGS"
 
 EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS $LIBURING_LIBS $BENCHMARK_LIBS"
diff -pruN 7.2.2-5/build_tools/fbcode_config.sh 7.3.1-2/build_tools/fbcode_config.sh
--- 7.2.2-5/build_tools/fbcode_config.sh	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/build_tools/fbcode_config.sh	2022-06-08 21:08:16.000000000 +0000
@@ -21,38 +21,48 @@ LIBGCC_LIBS=" -L $LIBGCC_BASE/lib"
 GLIBC_INCLUDE="$GLIBC_BASE/include"
 GLIBC_LIBS=" -L $GLIBC_BASE/lib"
 
-# snappy
-SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
-if test -z $PIC_BUILD; then
-  SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
-else
-  SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
-fi
-CFLAGS+=" -DSNAPPY"
-
-if test -z $PIC_BUILD; then
-  # location of zlib headers and libraries
-  ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
-  ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
-  CFLAGS+=" -DZLIB"
-
-  # location of bzip headers and libraries
-  BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
-  BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
-  CFLAGS+=" -DBZIP2"
-
-  LZ4_INCLUDE=" -I $LZ4_BASE/include/"
-  LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
-  CFLAGS+=" -DLZ4"
+if ! test $ROCKSDB_DISABLE_SNAPPY; then
+  # snappy
+  SNAPPY_INCLUDE=" -I $SNAPPY_BASE/include/"
+  if test -z $PIC_BUILD; then
+    SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy.a"
+  else
+    SNAPPY_LIBS=" $SNAPPY_BASE/lib/libsnappy_pic.a"
+  fi
+  CFLAGS+=" -DSNAPPY"
 fi
 
-ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
 if test -z $PIC_BUILD; then
-  ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
-else
-  ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
+  if ! test $ROCKSDB_DISABLE_ZLIB; then
+    # location of zlib headers and libraries
+    ZLIB_INCLUDE=" -I $ZLIB_BASE/include/"
+    ZLIB_LIBS=" $ZLIB_BASE/lib/libz.a"
+    CFLAGS+=" -DZLIB"
+  fi
+
+  if ! test $ROCKSDB_DISABLE_BZIP; then
+    # location of bzip headers and libraries
+    BZIP_INCLUDE=" -I $BZIP2_BASE/include/"
+    BZIP_LIBS=" $BZIP2_BASE/lib/libbz2.a"
+    CFLAGS+=" -DBZIP2"
+  fi
+
+  if ! test $ROCKSDB_DISABLE_LZ4; then
+    LZ4_INCLUDE=" -I $LZ4_BASE/include/"
+    LZ4_LIBS=" $LZ4_BASE/lib/liblz4.a"
+    CFLAGS+=" -DLZ4"
+  fi
+fi
+
+if ! test $ROCKSDB_DISABLE_ZSTD; then
+  ZSTD_INCLUDE=" -I $ZSTD_BASE/include/"
+  if test -z $PIC_BUILD; then
+    ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd.a"
+  else
+    ZSTD_LIBS=" $ZSTD_BASE/lib/libzstd_pic.a"
+  fi
+  CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
 fi
-CFLAGS+=" -DZSTD -DZSTD_STATIC_LINKING_ONLY"
 
 # location of gflags headers and libraries
 GFLAGS_INCLUDE=" -I $GFLAGS_BASE/include/"
@@ -137,7 +147,7 @@ else
 fi
 
 CFLAGS+=" $DEPS_INCLUDE"
-CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DROCKSDB_SUPPORT_THREAD_LOCAL -DHAVE_SSE42"
+CFLAGS+=" -DROCKSDB_PLATFORM_POSIX -DROCKSDB_LIB_IO_POSIX -DROCKSDB_FALLOCATE_PRESENT -DROCKSDB_MALLOC_USABLE_SIZE -DROCKSDB_RANGESYNC_PRESENT -DROCKSDB_SCHED_GETCPU_PRESENT -DHAVE_SSE42"
 CXXFLAGS+=" $CFLAGS"
 
 EXEC_LDFLAGS=" $SNAPPY_LIBS $ZLIB_LIBS $BZIP_LIBS $LZ4_LIBS $ZSTD_LIBS $GFLAGS_LIBS $NUMA_LIB $TBB_LIBS"
diff -pruN 7.2.2-5/cache/cache_test.cc 7.3.1-2/cache/cache_test.cc
--- 7.2.2-5/cache/cache_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/cache/cache_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -14,7 +14,9 @@
 #include <iostream>
 #include <string>
 #include <vector>
+
 #include "cache/clock_cache.h"
+#include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "test_util/testharness.h"
 #include "util/coding.h"
@@ -39,6 +41,7 @@ static int DecodeValue(void* v) {
 
 const std::string kLRU = "lru";
 const std::string kClock = "clock";
+const std::string kFast = "fast";
 
 void dumbDeleter(const Slice& /*key*/, void* /*value*/) {}
 
@@ -83,6 +86,9 @@ class CacheTest : public testing::TestWi
     if (type == kClock) {
       return NewClockCache(capacity);
     }
+    if (type == kFast) {
+      return NewFastLRUCache(capacity);
+    }
     return nullptr;
   }
 
@@ -103,6 +109,10 @@ class CacheTest : public testing::TestWi
       return NewClockCache(capacity, num_shard_bits, strict_capacity_limit,
                            charge_policy);
     }
+    if (type == kFast) {
+      return NewFastLRUCache(capacity, num_shard_bits, strict_capacity_limit,
+                             charge_policy);
+    }
     return nullptr;
   }
 
@@ -183,7 +193,7 @@ TEST_P(CacheTest, UsageTest) {
 
   // make sure the cache will be overloaded
   for (uint64_t i = 1; i < kCapacity; ++i) {
-    auto key = ToString(i);
+    auto key = std::to_string(i);
     ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
                             dumbDeleter));
     ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
@@ -255,7 +265,7 @@ TEST_P(CacheTest, PinnedUsageTest) {
 
   // check that overloading the cache does not change the pinned usage
   for (uint64_t i = 1; i < 2 * kCapacity; ++i) {
-    auto key = ToString(i);
+    auto key = std::to_string(i);
     ASSERT_OK(cache->Insert(key, reinterpret_cast<void*>(value), key.size() + 5,
                             dumbDeleter));
     ASSERT_OK(precise_cache->Insert(key, reinterpret_cast<void*>(value),
@@ -575,7 +585,7 @@ TEST_P(CacheTest, SetCapacity) {
   std::vector<Cache::Handle*> handles(10);
   // Insert 5 entries, but not releasing.
   for (size_t i = 0; i < 5; i++) {
-    std::string key = ToString(i+1);
+    std::string key = std::to_string(i + 1);
     Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
     ASSERT_TRUE(s.ok());
   }
@@ -590,7 +600,7 @@ TEST_P(CacheTest, SetCapacity) {
   // then decrease capacity to 7, final capacity should be 7
   // and usage should be 7
   for (size_t i = 5; i < 10; i++) {
-    std::string key = ToString(i+1);
+    std::string key = std::to_string(i + 1);
     Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
     ASSERT_TRUE(s.ok());
   }
@@ -621,7 +631,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLi
   std::vector<Cache::Handle*> handles(10);
   Status s;
   for (size_t i = 0; i < 10; i++) {
-    std::string key = ToString(i + 1);
+    std::string key = std::to_string(i + 1);
     s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
     ASSERT_OK(s);
     ASSERT_NE(nullptr, handles[i]);
@@ -645,7 +655,7 @@ TEST_P(LRUCacheTest, SetStrictCapacityLi
   // test3: init with flag being true.
   std::shared_ptr<Cache> cache2 = NewCache(5, 0, true);
   for (size_t i = 0; i < 5; i++) {
-    std::string key = ToString(i + 1);
+    std::string key = std::to_string(i + 1);
     s = cache2->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
     ASSERT_OK(s);
     ASSERT_NE(nullptr, handles[i]);
@@ -675,14 +685,14 @@ TEST_P(CacheTest, OverCapacity) {
 
   // Insert n+1 entries, but not releasing.
   for (size_t i = 0; i < n + 1; i++) {
-    std::string key = ToString(i+1);
+    std::string key = std::to_string(i + 1);
     Status s = cache->Insert(key, new Value(i + 1), 1, &deleter, &handles[i]);
     ASSERT_TRUE(s.ok());
   }
 
   // Guess what's in the cache now?
   for (size_t i = 0; i < n + 1; i++) {
-    std::string key = ToString(i+1);
+    std::string key = std::to_string(i + 1);
     auto h = cache->Lookup(key);
     ASSERT_TRUE(h != nullptr);
     if (h) cache->Release(h);
@@ -703,7 +713,7 @@ TEST_P(CacheTest, OverCapacity) {
   // This is consistent with the LRU policy since the element 0
   // was released first
   for (size_t i = 0; i < n + 1; i++) {
-    std::string key = ToString(i+1);
+    std::string key = std::to_string(i + 1);
     auto h = cache->Lookup(key);
     if (h) {
       ASSERT_NE(i, 0U);
@@ -744,9 +754,9 @@ TEST_P(CacheTest, ApplyToAllEntriesTest)
   std::vector<std::string> callback_state;
   const auto callback = [&](const Slice& key, void* value, size_t charge,
                             Cache::DeleterFn deleter) {
-    callback_state.push_back(ToString(DecodeKey(key)) + "," +
-                             ToString(DecodeValue(value)) + "," +
-                             ToString(charge));
+    callback_state.push_back(std::to_string(DecodeKey(key)) + "," +
+                             std::to_string(DecodeValue(value)) + "," +
+                             std::to_string(charge));
     assert(deleter == &CacheTest::Deleter);
   };
 
@@ -755,8 +765,8 @@ TEST_P(CacheTest, ApplyToAllEntriesTest)
 
   for (int i = 0; i < 10; ++i) {
     Insert(i, i * 2, i + 1);
-    inserted.push_back(ToString(i) + "," + ToString(i * 2) + "," +
-                       ToString(i + 1));
+    inserted.push_back(std::to_string(i) + "," + std::to_string(i * 2) + "," +
+                       std::to_string(i + 1));
   }
   cache_->ApplyToAllEntries(callback, /*opts*/ {});
 
@@ -838,11 +848,13 @@ TEST_P(CacheTest, GetChargeAndDeleter) {
 std::shared_ptr<Cache> (*new_clock_cache_func)(
     size_t, int, bool, CacheMetadataChargePolicy) = NewClockCache;
 INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
-                        testing::Values(kLRU, kClock));
+                        testing::Values(kLRU, kClock, kFast));
 #else
-INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest, testing::Values(kLRU));
+INSTANTIATE_TEST_CASE_P(CacheTestInstance, CacheTest,
+                        testing::Values(kLRU, kFast));
 #endif  // SUPPORT_CLOCK_CACHE
-INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest, testing::Values(kLRU));
+INSTANTIATE_TEST_CASE_P(CacheTestInstance, LRUCacheTest,
+                        testing::Values(kLRU, kFast));
 
 }  // namespace ROCKSDB_NAMESPACE
 
diff -pruN 7.2.2-5/cache/fast_lru_cache.cc 7.3.1-2/cache/fast_lru_cache.cc
--- 7.2.2-5/cache/fast_lru_cache.cc	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/cache/fast_lru_cache.cc	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,511 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "cache/fast_lru_cache.h"
+
+#include <cassert>
+#include <cstdint>
+#include <cstdio>
+
+#include "monitoring/perf_context_imp.h"
+#include "monitoring/statistics.h"
+#include "port/lang.h"
+#include "util/mutexlock.h"
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace fast_lru_cache {
+
+LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
+    : length_bits_(/* historical starting size*/ 4),
+      list_(new LRUHandle* [size_t{1} << length_bits_] {}),
+      elems_(0),
+      max_length_bits_(max_upper_hash_bits) {}
+
+LRUHandleTable::~LRUHandleTable() {
+  ApplyToEntriesRange(
+      [](LRUHandle* h) {
+        if (!h->HasRefs()) {
+          h->Free();
+        }
+      },
+      0, uint32_t{1} << length_bits_);
+}
+
+LRUHandle* LRUHandleTable::Lookup(const Slice& key, uint32_t hash) {
+  return *FindPointer(key, hash);
+}
+
+LRUHandle* LRUHandleTable::Insert(LRUHandle* h) {
+  LRUHandle** ptr = FindPointer(h->key(), h->hash);
+  LRUHandle* old = *ptr;
+  h->next_hash = (old == nullptr ? nullptr : old->next_hash);
+  *ptr = h;
+  if (old == nullptr) {
+    ++elems_;
+    if ((elems_ >> length_bits_) > 0) {  // elems_ >= length
+      // Since each cache entry is fairly large, we aim for a small
+      // average linked list length (<= 1).
+      Resize();
+    }
+  }
+  return old;
+}
+
+LRUHandle* LRUHandleTable::Remove(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = FindPointer(key, hash);
+  LRUHandle* result = *ptr;
+  if (result != nullptr) {
+    *ptr = result->next_hash;
+    --elems_;
+  }
+  return result;
+}
+
+LRUHandle** LRUHandleTable::FindPointer(const Slice& key, uint32_t hash) {
+  LRUHandle** ptr = &list_[hash >> (32 - length_bits_)];
+  while (*ptr != nullptr && ((*ptr)->hash != hash || key != (*ptr)->key())) {
+    ptr = &(*ptr)->next_hash;
+  }
+  return ptr;
+}
+
+void LRUHandleTable::Resize() {
+  if (length_bits_ >= max_length_bits_) {
+    // Due to reaching limit of hash information, if we made the table bigger,
+    // we would allocate more addresses but only the same number would be used.
+    return;
+  }
+  if (length_bits_ >= 31) {
+    // Avoid undefined behavior shifting uint32_t by 32.
+    return;
+  }
+
+  uint32_t old_length = uint32_t{1} << length_bits_;
+  int new_length_bits = length_bits_ + 1;
+  std::unique_ptr<LRUHandle* []> new_list {
+    new LRUHandle* [size_t{1} << new_length_bits] {}
+  };
+  uint32_t count = 0;
+  for (uint32_t i = 0; i < old_length; i++) {
+    LRUHandle* h = list_[i];
+    while (h != nullptr) {
+      LRUHandle* next = h->next_hash;
+      uint32_t hash = h->hash;
+      LRUHandle** ptr = &new_list[hash >> (32 - new_length_bits)];
+      h->next_hash = *ptr;
+      *ptr = h;
+      h = next;
+      count++;
+    }
+  }
+  assert(elems_ == count);
+  list_ = std::move(new_list);
+  length_bits_ = new_length_bits;
+}
+
+LRUCacheShard::LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                             CacheMetadataChargePolicy metadata_charge_policy,
+                             int max_upper_hash_bits)
+    : capacity_(0),
+      strict_capacity_limit_(strict_capacity_limit),
+      table_(max_upper_hash_bits),
+      usage_(0),
+      lru_usage_(0) {
+  set_metadata_charge_policy(metadata_charge_policy);
+  // Make empty circular linked list.
+  lru_.next = &lru_;
+  lru_.prev = &lru_;
+  lru_low_pri_ = &lru_;
+  SetCapacity(capacity);
+}
+
+void LRUCacheShard::EraseUnRefEntries() {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    MutexLock l(&mutex_);
+    while (lru_.next != &lru_) {
+      LRUHandle* old = lru_.next;
+      // LRU list contains only elements which can be evicted.
+      assert(old->InCache() && !old->HasRefs());
+      LRU_Remove(old);
+      table_.Remove(old->key(), old->hash);
+      old->SetInCache(false);
+      size_t total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
+      last_reference_list.push_back(old);
+    }
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+}
+
+void LRUCacheShard::ApplyToSomeEntries(
+    const std::function<void(const Slice& key, void* value, size_t charge,
+                             DeleterFn deleter)>& callback,
+    uint32_t average_entries_per_lock, uint32_t* state) {
+  // The state is essentially going to be the starting hash, which works
+  // nicely even if we resize between calls because we use upper-most
+  // hash bits for table indexes.
+  MutexLock l(&mutex_);
+  uint32_t length_bits = table_.GetLengthBits();
+  uint32_t length = uint32_t{1} << length_bits;
+
+  assert(average_entries_per_lock > 0);
+  // Assuming we are called with same average_entries_per_lock repeatedly,
+  // this simplifies some logic (index_end will not overflow).
+  assert(average_entries_per_lock < length || *state == 0);
+
+  uint32_t index_begin = *state >> (32 - length_bits);
+  uint32_t index_end = index_begin + average_entries_per_lock;
+  if (index_end >= length) {
+    // Going to end
+    index_end = length;
+    *state = UINT32_MAX;
+  } else {
+    *state = index_end << (32 - length_bits);
+  }
+
+  table_.ApplyToEntriesRange(
+      [callback](LRUHandle* h) {
+        callback(h->key(), h->value, h->charge, h->deleter);
+      },
+      index_begin, index_end);
+}
+
+void LRUCacheShard::LRU_Remove(LRUHandle* e) {
+  assert(e->next != nullptr);
+  assert(e->prev != nullptr);
+  e->next->prev = e->prev;
+  e->prev->next = e->next;
+  e->prev = e->next = nullptr;
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+  assert(lru_usage_ >= total_charge);
+  lru_usage_ -= total_charge;
+}
+
+void LRUCacheShard::LRU_Insert(LRUHandle* e) {
+  assert(e->next == nullptr);
+  assert(e->prev == nullptr);
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+  // Inset "e" to head of LRU list.
+  e->next = &lru_;
+  e->prev = lru_.prev;
+  e->prev->next = e;
+  e->next->prev = e;
+  lru_usage_ += total_charge;
+}
+
+void LRUCacheShard::EvictFromLRU(size_t charge,
+                                 autovector<LRUHandle*>* deleted) {
+  while ((usage_ + charge) > capacity_ && lru_.next != &lru_) {
+    LRUHandle* old = lru_.next;
+    // LRU list contains only elements which can be evicted.
+    assert(old->InCache() && !old->HasRefs());
+    LRU_Remove(old);
+    table_.Remove(old->key(), old->hash);
+    old->SetInCache(false);
+    size_t old_total_charge = old->CalcTotalCharge(metadata_charge_policy_);
+    assert(usage_ >= old_total_charge);
+    usage_ -= old_total_charge;
+    deleted->push_back(old);
+  }
+}
+
+void LRUCacheShard::SetCapacity(size_t capacity) {
+  autovector<LRUHandle*> last_reference_list;
+  {
+    MutexLock l(&mutex_);
+    capacity_ = capacity;
+    EvictFromLRU(0, &last_reference_list);
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+}
+
+void LRUCacheShard::SetStrictCapacityLimit(bool strict_capacity_limit) {
+  MutexLock l(&mutex_);
+  strict_capacity_limit_ = strict_capacity_limit;
+}
+
+Status LRUCacheShard::InsertItem(LRUHandle* e, Cache::Handle** handle,
+                                 bool free_handle_on_fail) {
+  Status s = Status::OK();
+  autovector<LRUHandle*> last_reference_list;
+  size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+
+  {
+    MutexLock l(&mutex_);
+
+    // Free the space following strict LRU policy until enough space
+    // is freed or the lru list is empty.
+    EvictFromLRU(total_charge, &last_reference_list);
+
+    if ((usage_ + total_charge) > capacity_ &&
+        (strict_capacity_limit_ || handle == nullptr)) {
+      e->SetInCache(false);
+      if (handle == nullptr) {
+        // Don't insert the entry but still return ok, as if the entry inserted
+        // into cache and get evicted immediately.
+        last_reference_list.push_back(e);
+      } else {
+        if (free_handle_on_fail) {
+          delete[] reinterpret_cast<char*>(e);
+          *handle = nullptr;
+        }
+        s = Status::Incomplete("Insert failed due to LRU cache being full.");
+      }
+    } else {
+      // Insert into the cache. Note that the cache might get larger than its
+      // capacity if not enough space was freed up.
+      LRUHandle* old = table_.Insert(e);
+      usage_ += total_charge;
+      if (old != nullptr) {
+        s = Status::OkOverwritten();
+        assert(old->InCache());
+        old->SetInCache(false);
+        if (!old->HasRefs()) {
+          // old is on LRU because it's in cache and its reference count is 0.
+          LRU_Remove(old);
+          size_t old_total_charge =
+              old->CalcTotalCharge(metadata_charge_policy_);
+          assert(usage_ >= old_total_charge);
+          usage_ -= old_total_charge;
+          last_reference_list.push_back(old);
+        }
+      }
+      if (handle == nullptr) {
+        LRU_Insert(e);
+      } else {
+        // If caller already holds a ref, no need to take one here.
+        if (!e->HasRefs()) {
+          e->Ref();
+        }
+        *handle = reinterpret_cast<Cache::Handle*>(e);
+      }
+    }
+  }
+
+  // Free the entries here outside of mutex for performance reasons.
+  for (auto entry : last_reference_list) {
+    entry->Free();
+  }
+
+  return s;
+}
+
+Cache::Handle* LRUCacheShard::Lookup(const Slice& key, uint32_t hash) {
+  LRUHandle* e = nullptr;
+  {
+    MutexLock l(&mutex_);
+    e = table_.Lookup(key, hash);
+    if (e != nullptr) {
+      assert(e->InCache());
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
+        LRU_Remove(e);
+      }
+      e->Ref();
+    }
+  }
+  return reinterpret_cast<Cache::Handle*>(e);
+}
+
+bool LRUCacheShard::Ref(Cache::Handle* h) {
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(h);
+  MutexLock l(&mutex_);
+  // To create another reference - entry must be already externally referenced.
+  assert(e->HasRefs());
+  e->Ref();
+  return true;
+}
+
+bool LRUCacheShard::Release(Cache::Handle* handle, bool erase_if_last_ref) {
+  if (handle == nullptr) {
+    return false;
+  }
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(handle);
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    last_reference = e->Unref();
+    if (last_reference && e->InCache()) {
+      // The item is still in cache, and nobody else holds a reference to it.
+      if (usage_ > capacity_ || erase_if_last_ref) {
+        // The LRU list must be empty since the cache is full.
+        assert(lru_.next == &lru_ || erase_if_last_ref);
+        // Take this opportunity and remove the item.
+        table_.Remove(e->key(), e->hash);
+        e->SetInCache(false);
+      } else {
+        // Put the item back on the LRU list, and don't free it.
+        LRU_Insert(e);
+        last_reference = false;
+      }
+    }
+    // If it was the last reference, then decrement the cache usage.
+    if (last_reference) {
+      size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+      assert(usage_ >= total_charge);
+      usage_ -= total_charge;
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  if (last_reference) {
+    e->Free();
+  }
+  return last_reference;
+}
+
+Status LRUCacheShard::Insert(const Slice& key, uint32_t hash, void* value,
+                             size_t charge, Cache::DeleterFn deleter,
+                             Cache::Handle** handle,
+                             Cache::Priority /*priority*/) {
+  // Allocate the memory here outside of the mutex.
+  // If the cache is full, we'll have to release it.
+  // It shouldn't happen very often though.
+  LRUHandle* e = reinterpret_cast<LRUHandle*>(
+      new char[sizeof(LRUHandle) - 1 + key.size()]);
+
+  e->value = value;
+  e->flags = 0;
+  e->deleter = deleter;
+  e->charge = charge;
+  e->key_length = key.size();
+  e->hash = hash;
+  e->refs = 0;
+  e->next = e->prev = nullptr;
+  e->SetInCache(true);
+  memcpy(e->key_data, key.data(), key.size());
+
+  return InsertItem(e, handle, /* free_handle_on_fail */ true);
+}
+
+void LRUCacheShard::Erase(const Slice& key, uint32_t hash) {
+  LRUHandle* e;
+  bool last_reference = false;
+  {
+    MutexLock l(&mutex_);
+    e = table_.Remove(key, hash);
+    if (e != nullptr) {
+      assert(e->InCache());
+      e->SetInCache(false);
+      if (!e->HasRefs()) {
+        // The entry is in LRU since it's in hash and has no external references
+        LRU_Remove(e);
+        size_t total_charge = e->CalcTotalCharge(metadata_charge_policy_);
+        assert(usage_ >= total_charge);
+        usage_ -= total_charge;
+        last_reference = true;
+      }
+    }
+  }
+
+  // Free the entry here outside of mutex for performance reasons.
+  // last_reference will only be true if e != nullptr.
+  if (last_reference) {
+    e->Free();
+  }
+}
+
+size_t LRUCacheShard::GetUsage() const {
+  MutexLock l(&mutex_);
+  return usage_;
+}
+
+size_t LRUCacheShard::GetPinnedUsage() const {
+  MutexLock l(&mutex_);
+  assert(usage_ >= lru_usage_);
+  return usage_ - lru_usage_;
+}
+
+std::string LRUCacheShard::GetPrintableOptions() const { return std::string{}; }
+
+LRUCache::LRUCache(size_t capacity, int num_shard_bits,
+                   bool strict_capacity_limit,
+                   CacheMetadataChargePolicy metadata_charge_policy)
+    : ShardedCache(capacity, num_shard_bits, strict_capacity_limit) {
+  num_shards_ = 1 << num_shard_bits;
+  shards_ = reinterpret_cast<LRUCacheShard*>(
+      port::cacheline_aligned_alloc(sizeof(LRUCacheShard) * num_shards_));
+  size_t per_shard = (capacity + (num_shards_ - 1)) / num_shards_;
+  for (int i = 0; i < num_shards_; i++) {
+    new (&shards_[i])
+        LRUCacheShard(per_shard, strict_capacity_limit, metadata_charge_policy,
+                      /* max_upper_hash_bits */ 32 - num_shard_bits);
+  }
+}
+
+LRUCache::~LRUCache() {
+  if (shards_ != nullptr) {
+    assert(num_shards_ > 0);
+    for (int i = 0; i < num_shards_; i++) {
+      shards_[i].~LRUCacheShard();
+    }
+    port::cacheline_aligned_free(shards_);
+  }
+}
+
+CacheShard* LRUCache::GetShard(uint32_t shard) {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}
+
+const CacheShard* LRUCache::GetShard(uint32_t shard) const {
+  return reinterpret_cast<CacheShard*>(&shards_[shard]);
+}
+
+void* LRUCache::Value(Handle* handle) {
+  return reinterpret_cast<const LRUHandle*>(handle)->value;
+}
+
+size_t LRUCache::GetCharge(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->charge;
+}
+
+Cache::DeleterFn LRUCache::GetDeleter(Handle* handle) const {
+  auto h = reinterpret_cast<const LRUHandle*>(handle);
+  return h->deleter;
+}
+
+uint32_t LRUCache::GetHash(Handle* handle) const {
+  return reinterpret_cast<const LRUHandle*>(handle)->hash;
+}
+
+void LRUCache::DisownData() {
+  // Leak data only if that won't generate an ASAN/valgrind warning.
+  if (!kMustFreeHeapAllocations) {
+    shards_ = nullptr;
+    num_shards_ = 0;
+  }
+}
+
+}  // namespace fast_lru_cache
+
+std::shared_ptr<Cache> NewFastLRUCache(
+    size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+    CacheMetadataChargePolicy metadata_charge_policy) {
+  if (num_shard_bits >= 20) {
+    return nullptr;  // The cache cannot be sharded into too many fine pieces.
+  }
+  if (num_shard_bits < 0) {
+    num_shard_bits = GetDefaultCacheShardBits(capacity);
+  }
+  return std::make_shared<fast_lru_cache::LRUCache>(
+      capacity, num_shard_bits, strict_capacity_limit, metadata_charge_policy);
+}
+
+}  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/cache/fast_lru_cache.h 7.3.1-2/cache/fast_lru_cache.h
--- 7.2.2-5/cache/fast_lru_cache.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/cache/fast_lru_cache.h	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,299 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+#pragma once
+
+#include <memory>
+#include <string>
+
+#include "cache/sharded_cache.h"
+#include "port/lang.h"
+#include "port/malloc.h"
+#include "port/port.h"
+#include "rocksdb/secondary_cache.h"
+#include "util/autovector.h"
+
+namespace ROCKSDB_NAMESPACE {
+namespace fast_lru_cache {
+
+// An experimental (under development!) alternative to LRUCache
+
+struct LRUHandle {
+  void* value;
+  Cache::DeleterFn deleter;
+  LRUHandle* next_hash;
+  LRUHandle* next;
+  LRUHandle* prev;
+  size_t charge;  // TODO(opt): Only allow uint32_t?
+  size_t key_length;
+  // The hash of key(). Used for fast sharding and comparisons.
+  uint32_t hash;
+  // The number of external refs to this entry. The cache itself is not counted.
+  uint32_t refs;
+
+  enum Flags : uint8_t {
+    // Whether this entry is referenced by the hash table.
+    IN_CACHE = (1 << 0),
+  };
+  uint8_t flags;
+
+  // Beginning of the key (MUST BE THE LAST FIELD IN THIS STRUCT!)
+  char key_data[1];
+
+  Slice key() const { return Slice(key_data, key_length); }
+
+  // Increase the reference count by 1.
+  void Ref() { refs++; }
+
+  // Just reduce the reference count by 1. Return true if it was last reference.
+  bool Unref() {
+    assert(refs > 0);
+    refs--;
+    return refs == 0;
+  }
+
+  // Return true if there are external refs, false otherwise.
+  bool HasRefs() const { return refs > 0; }
+
+  bool InCache() const { return flags & IN_CACHE; }
+
+  void SetInCache(bool in_cache) {
+    if (in_cache) {
+      flags |= IN_CACHE;
+    } else {
+      flags &= ~IN_CACHE;
+    }
+  }
+
+  void Free() {
+    assert(refs == 0);
+    if (deleter) {
+      (*deleter)(key(), value);
+    }
+    delete[] reinterpret_cast<char*>(this);
+  }
+
+  // Calculate the memory usage by metadata.
+  inline size_t CalcTotalCharge(
+      CacheMetadataChargePolicy metadata_charge_policy) {
+    size_t meta_charge = 0;
+    if (metadata_charge_policy == kFullChargeCacheMetadata) {
+#ifdef ROCKSDB_MALLOC_USABLE_SIZE
+      meta_charge += malloc_usable_size(static_cast<void*>(this));
+#else
+      // This is the size that is used when a new handle is created.
+      meta_charge += sizeof(LRUHandle) - 1 + key_length;
+#endif
+    }
+    return charge + meta_charge;
+  }
+};
+
+// We provide our own simple hash table since it removes a whole bunch
+// of porting hacks and is also faster than some of the built-in hash
+// table implementations in some of the compiler/runtime combinations
+// we have tested.  E.g., readrandom speeds up by ~5% over the g++
+// 4.4.3's builtin hashtable.
+class LRUHandleTable {
+ public:
+  // If the table uses more hash bits than `max_upper_hash_bits`,
+  // it will eat into the bits used for sharding, which are constant
+  // for a given LRUHandleTable.
+  explicit LRUHandleTable(int max_upper_hash_bits);
+  ~LRUHandleTable();
+
+  LRUHandle* Lookup(const Slice& key, uint32_t hash);
+  LRUHandle* Insert(LRUHandle* h);
+  LRUHandle* Remove(const Slice& key, uint32_t hash);
+
+  template <typename T>
+  void ApplyToEntriesRange(T func, uint32_t index_begin, uint32_t index_end) {
+    for (uint32_t i = index_begin; i < index_end; i++) {
+      LRUHandle* h = list_[i];
+      while (h != nullptr) {
+        auto n = h->next_hash;
+        assert(h->InCache());
+        func(h);
+        h = n;
+      }
+    }
+  }
+
+  int GetLengthBits() const { return length_bits_; }
+
+ private:
+  // Return a pointer to slot that points to a cache entry that
+  // matches key/hash.  If there is no such cache entry, return a
+  // pointer to the trailing slot in the corresponding linked list.
+  LRUHandle** FindPointer(const Slice& key, uint32_t hash);
+
+  void Resize();
+
+  // Number of hash bits (upper because lower bits used for sharding)
+  // used for table index. Length == 1 << length_bits_
+  int length_bits_;
+
+  // The table consists of an array of buckets where each bucket is
+  // a linked list of cache entries that hash into the bucket.
+  std::unique_ptr<LRUHandle*[]> list_;
+
+  // Number of elements currently in the table.
+  uint32_t elems_;
+
+  // Set from max_upper_hash_bits (see constructor).
+  const int max_length_bits_;
+};
+
+// A single shard of sharded cache.
+class ALIGN_AS(CACHE_LINE_SIZE) LRUCacheShard final : public CacheShard {
+ public:
+  LRUCacheShard(size_t capacity, bool strict_capacity_limit,
+                CacheMetadataChargePolicy metadata_charge_policy,
+                int max_upper_hash_bits);
+  ~LRUCacheShard() override = default;
+
+  // Separate from constructor so caller can easily make an array of LRUCache
+  // if current usage is more than new capacity, the function will attempt to
+  // free the needed space.
+  void SetCapacity(size_t capacity) override;
+
+  // Set the flag to reject insertion if cache if full.
+  void SetStrictCapacityLimit(bool strict_capacity_limit) override;
+
+  // Like Cache methods, but with an extra "hash" parameter.
+  Status Insert(const Slice& key, uint32_t hash, void* value, size_t charge,
+                Cache::DeleterFn deleter, Cache::Handle** handle,
+                Cache::Priority priority) override;
+
+  Status Insert(const Slice& key, uint32_t hash, void* value,
+                const Cache::CacheItemHelper* helper, size_t charge,
+                Cache::Handle** handle, Cache::Priority priority) override {
+    return Insert(key, hash, value, charge, helper->del_cb, handle, priority);
+  }
+
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash,
+                        const Cache::CacheItemHelper* /*helper*/,
+                        const Cache::CreateCallback& /*create_cb*/,
+                        Cache::Priority /*priority*/, bool /*wait*/,
+                        Statistics* /*stats*/) override {
+    return Lookup(key, hash);
+  }
+  Cache::Handle* Lookup(const Slice& key, uint32_t hash) override;
+
+  bool Release(Cache::Handle* handle, bool /*useful*/,
+               bool erase_if_last_ref) override {
+    return Release(handle, erase_if_last_ref);
+  }
+  bool IsReady(Cache::Handle* /*handle*/) override { return true; }
+  void Wait(Cache::Handle* /*handle*/) override {}
+
+  bool Ref(Cache::Handle* handle) override;
+  bool Release(Cache::Handle* handle, bool erase_if_last_ref = false) override;
+  void Erase(const Slice& key, uint32_t hash) override;
+
+  size_t GetUsage() const override;
+  size_t GetPinnedUsage() const override;
+
+  void ApplyToSomeEntries(
+      const std::function<void(const Slice& key, void* value, size_t charge,
+                               DeleterFn deleter)>& callback,
+      uint32_t average_entries_per_lock, uint32_t* state) override;
+
+  void EraseUnRefEntries() override;
+
+  std::string GetPrintableOptions() const override;
+
+ private:
+  friend class LRUCache;
+  // Insert an item into the hash table and, if handle is null, insert into
+  // the LRU list. Older items are evicted as necessary. If the cache is full
+  // and free_handle_on_fail is true, the item is deleted and handle is set to
+  // nullptr.
+  Status InsertItem(LRUHandle* item, Cache::Handle** handle,
+                    bool free_handle_on_fail);
+
+  void LRU_Remove(LRUHandle* e);
+  void LRU_Insert(LRUHandle* e);
+
+  // Free some space following strict LRU policy until enough space
+  // to hold (usage_ + charge) is freed or the lru list is empty
+  // This function is not thread safe - it needs to be executed while
+  // holding the mutex_.
+  void EvictFromLRU(size_t charge, autovector<LRUHandle*>* deleted);
+
+  // Initialized before use.
+  size_t capacity_;
+
+  // Whether to reject insertion if cache reaches its full capacity.
+  bool strict_capacity_limit_;
+
+  // Dummy head of LRU list.
+  // lru.prev is newest entry, lru.next is oldest entry.
+  // LRU contains items which can be evicted, ie reference only by cache
+  LRUHandle lru_;
+
+  // Pointer to head of low-pri pool in LRU list.
+  LRUHandle* lru_low_pri_;
+
+  // ------------^^^^^^^^^^^^^-----------
+  // Not frequently modified data members
+  // ------------------------------------
+  //
+  // We separate data members that are updated frequently from the ones that
+  // are not frequently updated so that they don't share the same cache line
+  // which will lead into false cache sharing
+  //
+  // ------------------------------------
+  // Frequently modified data members
+  // ------------vvvvvvvvvvvvv-----------
+  LRUHandleTable table_;
+
+  // Memory size for entries residing in the cache.
+  size_t usage_;
+
+  // Memory size for entries residing only in the LRU list.
+  size_t lru_usage_;
+
+  // mutex_ protects the following state.
+  // We don't count mutex_ as the cache's internal state so semantically we
+  // don't mind mutex_ invoking the non-const actions.
+  mutable port::Mutex mutex_;
+};
+
+class LRUCache
+#ifdef NDEBUG
+    final
+#endif
+    : public ShardedCache {
+ public:
+  LRUCache(size_t capacity, int num_shard_bits, bool strict_capacity_limit,
+           CacheMetadataChargePolicy metadata_charge_policy =
+               kDontChargeCacheMetadata);
+  ~LRUCache() override;
+  const char* Name() const override { return "LRUCache"; }
+  CacheShard* GetShard(uint32_t shard) override;
+  const CacheShard* GetShard(uint32_t shard) const override;
+  void* Value(Handle* handle) override;
+  size_t GetCharge(Handle* handle) const override;
+  uint32_t GetHash(Handle* handle) const override;
+  DeleterFn GetDeleter(Handle* handle) const override;
+  void DisownData() override;
+
+ private:
+  LRUCacheShard* shards_ = nullptr;
+  int num_shards_ = 0;
+};
+}  // namespace fast_lru_cache
+
+std::shared_ptr<Cache> NewFastLRUCache(
+    size_t capacity, int num_shard_bits = -1,
+    bool strict_capacity_limit = false,
+    CacheMetadataChargePolicy metadata_charge_policy =
+        kDefaultCacheMetadataChargePolicy);
+
+}  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/cache/lru_cache.cc 7.3.1-2/cache/lru_cache.cc
--- 7.2.2-5/cache/lru_cache.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/cache/lru_cache.cc	2022-06-08 21:08:16.000000000 +0000
@@ -19,6 +19,7 @@
 #include "util/mutexlock.h"
 
 namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
 
 LRUHandleTable::LRUHandleTable(int max_upper_hash_bits)
     : length_bits_(/* historical starting size*/ 4),
@@ -759,6 +760,8 @@ void LRUCache::WaitAll(std::vector<Handl
   }
 }
 
+}  // namespace lru_cache
+
 std::shared_ptr<Cache> NewLRUCache(
     size_t capacity, int num_shard_bits, bool strict_capacity_limit,
     double high_pri_pool_ratio,
diff -pruN 7.2.2-5/cache/lru_cache.h 7.3.1-2/cache/lru_cache.h
--- 7.2.2-5/cache/lru_cache.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/cache/lru_cache.h	2022-06-08 21:08:16.000000000 +0000
@@ -19,6 +19,7 @@
 #include "util/autovector.h"
 
 namespace ROCKSDB_NAMESPACE {
+namespace lru_cache {
 
 // LRU cache implementation. This class is not thread-safe.
 
@@ -479,4 +480,10 @@ class LRUCache
   std::shared_ptr<SecondaryCache> secondary_cache_;
 };
 
+}  // namespace lru_cache
+
+using LRUCache = lru_cache::LRUCache;
+using LRUHandle = lru_cache::LRUHandle;
+using LRUCacheShard = lru_cache::LRUCacheShard;
+
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/.circleci/config.yml 7.3.1-2/.circleci/config.yml
--- 7.2.2-5/.circleci/config.yml	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/.circleci/config.yml	2022-06-08 21:08:16.000000000 +0000
@@ -2,7 +2,6 @@ version: 2.1
 
 orbs:
   win: circleci/windows@2.4.0
-  slack: circleci/slack@3.4.2
 
 aliases:
   - &notify-on-main-failure
@@ -57,7 +56,6 @@ commands:
 
   post-steps:
     steps:
-      - slack/status: *notify-on-main-failure
       - store_test_results: # store test result if there's any
           path: /tmp/test-results
       - store_artifacts: # store LOG for debugging if there's any
@@ -113,6 +111,12 @@ commands:
             cmake .. -GNinja -DCMAKE_BUILD_TYPE=Release -DBENCHMARK_ENABLE_GTEST_TESTS=0
             ninja && sudo ninja install
 
+  install-valgrind:
+    steps:
+      - run:
+          name: Install valgrind
+          command: sudo apt-get update -y && sudo apt-get install -y valgrind
+
   upgrade-cmake:
     steps:
       - run:
@@ -148,8 +152,8 @@ commands:
       - run:
           name: Install libprotobuf-mutator libs
           command: |
-            git clone --single-branch --branch master --depth 1 git@github.com:google/libprotobuf-mutator.git ~/libprotobuf-mutator
-            cd ~/libprotobuf-mutator && mkdir build && cd build
+            git clone -b v1.0 git@github.com:google/libprotobuf-mutator.git ~/libprotobuf-mutator
+            cd ~/libprotobuf-mutator && git checkout ffd86a32874e5c08a143019aad1aaf0907294c9f && mkdir build && cd build
             cmake .. -GNinja -DCMAKE_C_COMPILER=clang-13 -DCMAKE_CXX_COMPILER=clang++-13 -DCMAKE_BUILD_TYPE=Release -DLIB_PROTO_MUTATOR_DOWNLOAD_PROTOBUF=ON
             ninja && sudo ninja install
       - run:
@@ -176,7 +180,7 @@ jobs:
       - increase-max-open-files-on-macos
       - install-gflags-on-macos
       - pre-steps-macos
-      - run: ulimit -S -n 1048576 && OPT=-DCIRCLECI make V=1 J=32 -j32 all
+      - run: ulimit -S -n `ulimit -H -n` && OPT=-DCIRCLECI make V=1 J=32 -j32 all
       - post-steps
 
   build-macos-cmake:
@@ -195,7 +199,7 @@ jobs:
       - pre-steps-macos
       - run:
           name: "cmake generate project file"
-          command: ulimit -S -n 1048576 && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
+          command: ulimit -S -n `ulimit -H -n` && mkdir build && cd build && cmake -DWITH_GFLAGS=1 ..
       - run:
           name: "Build tests"
           command: cd build && make V=1 -j32
@@ -204,14 +208,14 @@ jobs:
           steps:
             - run:
                 name: "Run even tests"
-                command: ulimit -S -n 1048576 && cd build && ctest -j32 -I 0,,2
+                command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 0,,2
       - when:
           condition:
             not: << parameters.run_even_tests >>
           steps:
             - run:
                 name: "Run odd tests"
-                command: ulimit -S -n 1048576 && cd build && ctest -j32 -I 1,,2
+                command: ulimit -S -n `ulimit -H -n` && cd build && ctest -j32 -I 1,,2
       - post-steps
 
   build-linux:
@@ -224,14 +228,16 @@ jobs:
       - run: make V=1 J=32 -j32 check
       - post-steps
 
-  build-linux-encrypted-env:
+  build-linux-encrypted_env-no_compression:
     machine:
       image: ubuntu-2004:202111-02
     resource_class: 2xlarge
     steps:
       - pre-steps
       - install-gflags
-      - run: ENCRYPTED_ENV=1 make V=1 J=32 -j32 check
+      - run: ENCRYPTED_ENV=1 ROCKSDB_DISABLE_SNAPPY=1 ROCKSDB_DISABLE_ZLIB=1 ROCKSDB_DISABLE_BZIP=1 ROCKSDB_DISABLE_LZ4=1 ROCKSDB_DISABLE_ZSTD=1 make V=1 J=32 -j32 check
+      - run: |
+          ./sst_dump --help | egrep -q 'Supported compression types: kNoCompression$' # Verify no compiled in compression
       - post-steps
 
   build-linux-shared_lib-alt_namespace-status_checked:
@@ -312,7 +318,7 @@ jobs:
       - pre-steps
       - install-gflags
       - install-clang-10
-      - run: ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
+      - run: COMPILE_WITH_ASAN=1 CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 check # aligned new doesn't work for reason we haven't figured out
       - post-steps
 
   build-linux-clang10-mini-tsan:
@@ -356,6 +362,17 @@ jobs:
       - run: COMPILE_WITH_UBSAN=1 OPT="-fsanitize-blacklist=.circleci/ubsan_suppression_list.txt" CC=clang-10 CXX=clang++-10 ROCKSDB_DISABLE_ALIGNED_NEW=1 USE_CLANG=1 make V=1 -j32 ubsan_check # aligned new doesn't work for reason we haven't figured out
       - post-steps
 
+  build-linux-valgrind:
+    machine:
+      image: ubuntu-2004:202111-02
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - install-gflags
+      - install-valgrind
+      - run: PORTABLE=1 make V=1 -j32 valgrind_test
+      - post-steps
+
   build-linux-clang10-clang-analyze:
     machine:
       image: ubuntu-2004:202111-02
@@ -394,11 +411,13 @@ jobs:
   build-linux-unity-and-headers:
     docker: # executor type
       - image: gcc:latest
+    environment:
+      EXTRA_CXXFLAGS: -mno-avx512f # Warnings-as-error in avx512fintrin.h, would be used on newer hardware
     resource_class: large
     steps:
       - checkout # check out the code in the project directory
       - run: apt-get update -y && apt-get install -y libgflags-dev
-      - run: TEST_TMPDIR=/dev/shm && make V=1 -j8 unity_test
+      - run: make V=1 -j8 unity_test
       - run: make V=1 -j8 -k check-headers # could be moved to a different build
       - post-steps
 
@@ -455,6 +474,19 @@ jobs:
       - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 make -j16 all microbench
       - post-steps
 
+  # Ensure ASAN+UBSAN with folly, and full testsuite with clang 13
+  build-linux-clang-13-asan-ubsan-with-folly:
+    machine:
+      image: ubuntu-2004:202111-02
+    resource_class: 2xlarge
+    steps:
+      - pre-steps
+      - install-clang-13
+      - install-gflags
+      - run: make checkout_folly
+      - run: CC=clang-13 CXX=clang++-13 USE_CLANG=1 USE_FOLLY=1 COMPILE_WITH_UBSAN=1 COMPILE_WITH_ASAN=1 make -j32 check
+      - post-steps
+
   # This job is only to make sure the microbench tests are able to run, the benchmark result is not meaningful as the CI host is changing.
   build-linux-run-microbench:
     machine:
@@ -474,7 +506,7 @@ jobs:
       - pre-steps
       - install-gflags
       - install-compression-libs
-      - run: make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
+      - run: ulimit -S -n `ulimit -H -n` && make V=1 -j8 CRASH_TEST_EXT_ARGS=--duration=960 blackbox_crash_test_with_atomic_flush
       - post-steps
 
   build-windows:
@@ -796,108 +828,76 @@ jobs:
 
 workflows:
   version: 2
-  build-linux:
+  jobs-linux-run-tests:
     jobs:
       - build-linux
-  build-linux-cmake:
-    jobs:
       - build-linux-cmake-with-folly
+      - build-linux-gcc-7-with-folly
       - build-linux-cmake-with-benchmark
-  build-linux-encrypted-env:
-    jobs:
-      - build-linux-encrypted-env
-  build-linux-shared_lib-alt_namespace-status_checked:
-    jobs:
-      - build-linux-shared_lib-alt_namespace-status_checked
-  build-linux-lite:
-    jobs:
+      - build-linux-encrypted_env-no_compression
       - build-linux-lite
-  build-linux-release:
-    jobs:
-      - build-linux-release
-  build-linux-release-rtti:
-    jobs:
-      - build-linux-release-rtti
-  build-linux-lite-release:
-    jobs:
-      - build-linux-lite-release
-  build-linux-clang10-asan:
+  jobs-linux-run-tests-san:
     jobs:
       - build-linux-clang10-asan
-  build-linux-clang10-mini-tsan:
-    jobs:
+      - build-linux-clang10-ubsan
       - build-linux-clang10-mini-tsan:
           start_test: ""
           end_test: "env_test"
       - build-linux-clang10-mini-tsan:
           start_test: "env_test"
           end_test: ""
-  build-linux-clang10-ubsan:
+      - build-linux-shared_lib-alt_namespace-status_checked
+  jobs-linux-no-test-run:
     jobs:
-      - build-linux-clang10-ubsan
-  build-linux-clang10-clang-analyze:
+      - build-linux-release
+      - build-linux-release-rtti
+      - build-linux-lite-release
+      - build-examples
+      - build-fuzzers
+      - build-linux-clang-no_test_run
+      - build-linux-clang-13-no_test_run
+      - build-linux-gcc-8-no_test_run
+      - build-linux-gcc-10-cxx20-no_test_run
+      - build-linux-gcc-11-no_test_run
+      - build-linux-arm-cmake-no_test_run
+  jobs-linux-other-checks:
     jobs:
       - build-linux-clang10-clang-analyze
-  build-linux-unity-and-headers:
-    jobs:
       - build-linux-unity-and-headers
-  build-linux-mini-crashtest:
-    jobs:
       - build-linux-mini-crashtest
-  build-windows-vs2019:
+  jobs-windows:
     jobs:
       - build-windows:
           name: "build-windows-vs2019"
-  build-windows-vs2019-cxx20:
-    jobs:
       - build-windows:
           name: "build-windows-vs2019-cxx20"
           extra_cmake_opt: -DCMAKE_CXX_STANDARD=20
-  build-windows-vs2017:
-    jobs:
       - build-windows:
           name: "build-windows-vs2017"
           vs_year: "2017"
           cmake_generator: "Visual Studio 15 Win64"
-  build-java:
+      - build-cmake-mingw
+  jobs-java:
     jobs:
       - build-linux-java
       - build-linux-java-static
       - build-macos-java
       - build-macos-java-static
       - build-macos-java-static-universal
-  build-examples:
-    jobs:
-      - build-examples
-  build-linux-compilers-no_test_run:
-    jobs:
-      - build-linux-clang-no_test_run
-      - build-linux-clang-13-no_test_run
-      - build-linux-gcc-7-with-folly
-      - build-linux-gcc-8-no_test_run
-      - build-linux-gcc-10-cxx20-no_test_run
-      - build-linux-gcc-11-no_test_run
-      - build-linux-arm-cmake-no_test_run
-  build-macos:
+  jobs-macos:
     jobs:
       - build-macos
       - build-macos-cmake:
           run_even_tests: true
       - build-macos-cmake:
           run_even_tests: false
-  build-cmake-mingw:
-    jobs:
-      - build-cmake-mingw
-  build-linux-arm:
+  jobs-linux-arm:
     jobs:
       - build-linux-arm
-  build-fuzzers:
-    jobs:
-      - build-fuzzers
   nightly:
     triggers:
       - schedule:
-          cron: "0 0 * * *"
+          cron: "0 9 * * *"
           filters:
             branches:
               only:
@@ -907,3 +907,5 @@ workflows:
       - build-linux-arm-test-full
       - build-linux-run-microbench
       - build-linux-non-shm
+      - build-linux-clang-13-asan-ubsan-with-folly
+      - build-linux-valgrind
diff -pruN 7.2.2-5/CMakeLists.txt 7.3.1-2/CMakeLists.txt
--- 7.2.2-5/CMakeLists.txt	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/CMakeLists.txt	2022-06-08 21:08:16.000000000 +0000
@@ -40,6 +40,8 @@ include(GoogleTest)
 get_rocksdb_version(rocksdb_VERSION)
 project(rocksdb
   VERSION ${rocksdb_VERSION}
+  DESCRIPTION "An embeddable persistent key-value store for fast storage"
+  HOMEPAGE_URL https://rocksdb.org/
   LANGUAGES CXX C ASM)
 
 if(POLICY CMP0042)
@@ -335,9 +337,6 @@ endif()
 # Reset the required flags
 set(CMAKE_REQUIRED_FLAGS ${OLD_CMAKE_REQUIRED_FLAGS})
 
-# thread_local is part of C++11 and later (TODO: clean up this define)
-add_definitions(-DROCKSDB_SUPPORT_THREAD_LOCAL)
-
 option(WITH_IOSTATS_CONTEXT "Enable IO stats context" ON)
 if (NOT WITH_IOSTATS_CONTEXT)
   add_definitions(-DNIOSTATS_CONTEXT)
@@ -596,6 +595,7 @@ set(SOURCES
         cache/cache_reservation_manager.cc
         cache/clock_cache.cc
         cache/compressed_secondary_cache.cc
+        cache/fast_lru_cache.cc
         cache/lru_cache.cc
         cache/sharded_cache.cc
         db/arena_wrapped_db_iter.cc
@@ -796,6 +796,8 @@ set(SOURCES
         trace_replay/trace_record_result.cc
         trace_replay/trace_record.cc
         trace_replay/trace_replay.cc
+        util/async_file_reader.cc
+        util/cleanable.cc
         util/coding.cc
         util/compaction_job_stats_impl.cc
         util/comparator.cc
@@ -1119,8 +1121,20 @@ if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDO
     COMPATIBILITY SameMajorVersion
   )
 
+  configure_file(
+    ${CMAKE_CURRENT_SOURCE_DIR}/${PROJECT_NAME}.pc.in
+    ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
+    @ONLY
+  )
+
   install(DIRECTORY include/rocksdb COMPONENT devel DESTINATION "${CMAKE_INSTALL_INCLUDEDIR}")
 
+  foreach (plugin ${PLUGINS})
+    foreach (header ${${plugin}_HEADERS})
+      install(FILES plugin/${plugin}/${header} DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rocksdb/plugin/${plugin})
+    endforeach()
+  endforeach()
+
   install(DIRECTORY "${PROJECT_SOURCE_DIR}/cmake/modules" COMPONENT devel DESTINATION ${package_config_destination})
 
   install(
@@ -1157,6 +1171,13 @@ if(NOT WIN32 OR ROCKSDB_INSTALL_ON_WINDO
     COMPONENT devel
     DESTINATION ${package_config_destination}
   )
+
+  install(
+    FILES
+    ${CMAKE_CURRENT_BINARY_DIR}/${PROJECT_NAME}.pc
+    COMPONENT devel
+    DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig
+  )
 endif()
 
 option(WITH_ALL_TESTS "Build all test, rather than a small subset" ON)
@@ -1202,6 +1223,7 @@ if(WITH_TESTS)
         db/comparator_db_test.cc
         db/corruption_test.cc
         db/cuckoo_table_db_test.cc
+        db/db_readonly_with_timestamp_test.cc
         db/db_with_timestamp_basic_test.cc
         db/db_block_cache_test.cc
         db/db_bloom_filter_test.cc
@@ -1370,6 +1392,7 @@ if(WITH_TESTS)
 
   set(TESTUTIL_SOURCE
       db/db_test_util.cc
+      db/db_with_timestamp_test_util.cc
       monitoring/thread_status_updater_debug.cc
       table/mock_table.cc
       utilities/agg_merge/test_agg_merge.cc
diff -pruN 7.2.2-5/common.mk 7.3.1-2/common.mk
--- 7.2.2-5/common.mk	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/common.mk	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,30 @@
+ifndef PYTHON
+
+# Default to python3. Some distros like CentOS 8 do not have `python`.
+ifeq ($(origin PYTHON), undefined)
+	PYTHON := $(shell which python3 || which python || echo python3)
+endif
+export PYTHON
+
+endif
+
+# To setup tmp directory, first recognize some old variables for setting
+# test tmp directory or base tmp directory. TEST_TMPDIR is usually read
+# by RocksDB tools though Env/FileSystem::GetTestDirectory.
+ifeq ($(TEST_TMPDIR),)
+TEST_TMPDIR := $(TMPD)
+endif
+ifeq ($(TEST_TMPDIR),)
+ifeq ($(BASE_TMPDIR),)
+BASE_TMPDIR :=$(TMPDIR)
+endif
+ifeq ($(BASE_TMPDIR),)
+BASE_TMPDIR :=/tmp
+endif
+# Use /dev/shm if it has the sticky bit set (otherwise, /tmp or other
+# base dir), and create a randomly-named rocksdb.XXXX directory therein.
+TEST_TMPDIR := $(shell f=/dev/shm; test -k $$f || f=$(BASE_TMPDIR); \
+  perl -le 'use File::Temp "tempdir";'	                            \
+    -e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)')
+endif
+export TEST_TMPDIR
diff -pruN 7.2.2-5/crash_test.mk 7.3.1-2/crash_test.mk
--- 7.2.2-5/crash_test.mk	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/crash_test.mk	2022-06-08 21:08:16.000000000 +0000
@@ -5,7 +5,7 @@
 # build DB_STRESS_CMD so it must exist prior.
 DB_STRESS_CMD?=./db_stress
 
-include python.mk
+include common.mk
 
 CRASHTEST_MAKE=$(MAKE) -f crash_test.mk
 CRASHTEST_PY=$(PYTHON) -u tools/db_crashtest.py --stress_cmd=$(DB_STRESS_CMD)
@@ -65,10 +65,10 @@ blackbox_crash_test_with_ts: $(DB_STRESS
 	$(CRASHTEST_PY) --enable_ts blackbox $(CRASH_TEST_EXT_ARGS)
 
 blackbox_crash_test_with_multiops_wc_txn: $(DB_STRESS_CMD)
-	$(PYTHON) -u tools/db_crashtest.py --test_multiops_txn --write_policy write_committed blackbox $(CRASH_TEST_EXT_ARGS)
+	$(CRASHTEST_PY) --test_multiops_txn --write_policy write_committed blackbox $(CRASH_TEST_EXT_ARGS)
 
 blackbox_crash_test_with_multiops_wp_txn: $(DB_STRESS_CMD)
-	$(PYTHON) -u tools/db_crashtest.py --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
+	$(CRASHTEST_PY) --test_multiops_txn --write_policy write_prepared blackbox $(CRASH_TEST_EXT_ARGS)
 
 ifeq ($(CRASH_TEST_KILL_ODD),)
   CRASH_TEST_KILL_ODD=888887
diff -pruN 7.2.2-5/db/arena_wrapped_db_iter.cc 7.3.1-2/db/arena_wrapped_db_iter.cc
--- 7.2.2-5/db/arena_wrapped_db_iter.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/arena_wrapped_db_iter.cc	2022-06-08 21:08:16.000000000 +0000
@@ -23,7 +23,7 @@ Status ArenaWrappedDBIter::GetProperty(s
   if (prop_name == "rocksdb.iterator.super-version-number") {
     // First try to pass the value returned from inner iterator.
     if (!db_iter_->GetProperty(prop_name, prop).ok()) {
-      *prop = ToString(sv_number_);
+      *prop = std::to_string(sv_number_);
     }
     return Status::OK();
   }
diff -pruN 7.2.2-5/db/blob/blob_index.h 7.3.1-2/db/blob/blob_index.h
--- 7.2.2-5/db/blob/blob_index.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/blob/blob_index.h	2022-06-08 21:08:16.000000000 +0000
@@ -96,9 +96,9 @@ class BlobIndex {
     assert(slice.size() > 0);
     type_ = static_cast<Type>(*slice.data());
     if (type_ >= Type::kUnknown) {
-      return Status::Corruption(
-          kErrorMessage,
-          "Unknown blob index type: " + ToString(static_cast<char>(type_)));
+      return Status::Corruption(kErrorMessage,
+                                "Unknown blob index type: " +
+                                    std::to_string(static_cast<char>(type_)));
     }
     slice = Slice(slice.data() + 1, slice.size() - 1);
     if (HasTTL()) {
diff -pruN 7.2.2-5/db/blob/db_blob_index_test.cc 7.3.1-2/db/blob/db_blob_index_test.cc
--- 7.2.2-5/db/blob/db_blob_index_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/blob/db_blob_index_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -153,11 +153,11 @@ TEST_F(DBBlobIndexTest, Write) {
     key_values.reserve(num_key_values);
 
     for (size_t i = 1; i <= num_key_values; ++i) {
-      std::string key = "key" + ToString(i);
+      std::string key = "key" + std::to_string(i);
 
       std::string blob_index;
       BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 9876543210,
-                                  "blob" + ToString(i));
+                                  "blob" + std::to_string(i));
 
       key_values.emplace_back(std::move(key), std::move(blob_index));
     }
@@ -230,7 +230,7 @@ TEST_F(DBBlobIndexTest, Updated) {
     DestroyAndReopen(GetTestOptions());
     WriteBatch batch;
     for (int i = 0; i < 10; i++) {
-      ASSERT_OK(PutBlobIndex(&batch, "key" + ToString(i), blob_index));
+      ASSERT_OK(PutBlobIndex(&batch, "key" + std::to_string(i), blob_index));
     }
     ASSERT_OK(Write(&batch));
     // Avoid blob values from being purged.
@@ -248,7 +248,7 @@ TEST_F(DBBlobIndexTest, Updated) {
     ASSERT_OK(dbfull()->DeleteRange(WriteOptions(), cfh(), "key6", "key9"));
     MoveDataTo(tier);
     for (int i = 0; i < 10; i++) {
-      ASSERT_EQ(blob_index, GetBlobIndex("key" + ToString(i), snapshot));
+      ASSERT_EQ(blob_index, GetBlobIndex("key" + std::to_string(i), snapshot));
     }
     ASSERT_EQ("new_value", Get("key1"));
     if (tier <= kImmutableMemtables) {
@@ -260,7 +260,7 @@ TEST_F(DBBlobIndexTest, Updated) {
     ASSERT_EQ("NOT_FOUND", Get("key4"));
     ASSERT_EQ("a,b,c", GetImpl("key5"));
     for (int i = 6; i < 9; i++) {
-      ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
+      ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i)));
     }
     ASSERT_EQ(blob_index, GetBlobIndex("key9"));
     dbfull()->ReleaseSnapshot(snapshot);
@@ -301,7 +301,7 @@ TEST_F(DBBlobIndexTest, Iterate) {
   };
 
   auto get_value = [&](int index, int version) {
-    return get_key(index) + "_value" + ToString(version);
+    return get_key(index) + "_value" + std::to_string(version);
   };
 
   auto check_iterator = [&](Iterator* iterator, Status::Code expected_status,
@@ -501,7 +501,7 @@ TEST_F(DBBlobIndexTest, IntegratedBlobIt
   auto get_key = [](size_t index) { return ("key" + std::to_string(index)); };
 
   auto get_value = [&](size_t index, size_t version) {
-    return get_key(index) + "_value" + ToString(version);
+    return get_key(index) + "_value" + std::to_string(version);
   };
 
   auto check_iterator = [&](Iterator* iterator, Status expected_status,
diff -pruN 7.2.2-5/db/builder.cc 7.3.1-2/db/builder.cc
--- 7.2.2-5/db/builder.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/builder.cc	2022-06-08 21:08:16.000000000 +0000
@@ -37,6 +37,7 @@
 #include "table/block_based/block_based_table_builder.h"
 #include "table/format.h"
 #include "table/internal_iterator.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "util/stop_watch.h"
 
@@ -115,6 +116,7 @@ Status BuildTable(
   assert(fs);
 
   TableProperties tp;
+  bool table_file_created = false;
   if (iter->Valid() || !range_del_agg->IsEmpty()) {
     std::unique_ptr<CompactionFilter> compaction_filter;
     if (ioptions.compaction_filter_factory != nullptr &&
@@ -158,6 +160,8 @@ Status BuildTable(
             file_checksum_func_name);
         return s;
       }
+
+      table_file_created = true;
       FileTypeSet tmp_set = ioptions.checksum_handoff_file_types;
       file->SetIOPriority(io_priority);
       file->SetWriteLifeTimeHint(write_hint);
@@ -193,6 +197,7 @@ Status BuildTable(
         ShouldReportDetailedTime(env, ioptions.stats),
         true /* internal key corruption is not ok */, range_del_agg.get(),
         blob_file_builder.get(), ioptions.allow_data_in_errors,
+        ioptions.enforce_single_del_contracts,
         /*compaction=*/nullptr, compaction_filter.get(),
         /*shutting_down=*/nullptr,
         /*manual_compaction_paused=*/nullptr,
@@ -306,6 +311,15 @@ Status BuildTable(
       meta->file_checksum_func_name = file_writer->GetFileChecksumFuncName();
       file_checksum = meta->file_checksum;
       file_checksum_func_name = meta->file_checksum_func_name;
+      // Set unique_id only if db_id and db_session_id exist
+      if (!tboptions.db_id.empty() && !tboptions.db_session_id.empty()) {
+        if (!GetSstInternalUniqueId(tboptions.db_id, tboptions.db_session_id,
+                                    meta->fd.GetNumber(), &(meta->unique_id))
+                 .ok()) {
+          // if failed to get unique id, just set it Null
+          meta->unique_id = kNullUniqueId64x2;
+        }
+      }
     }
 
     if (s.ok()) {
@@ -327,10 +341,9 @@ Status BuildTable(
     if (s.ok() && !empty) {
       // Verify that the table is usable
       // We set for_compaction to false and don't OptimizeForCompactionTableRead
-      // here because this is a special case after we finish the table building
+      // here because this is a special case after we finish the table building.
       // No matter whether use_direct_io_for_flush_and_compaction is true,
-      // we will regrad this verification as user reads since the goal is
-      // to cache it here for further user reads
+      // the goal is to cache it here for further user reads.
       ReadOptions read_options;
       std::unique_ptr<InternalIterator> it(table_cache->NewIterator(
           read_options, file_options, tboptions.internal_comparator, *meta,
@@ -371,15 +384,17 @@ Status BuildTable(
 
     constexpr IODebugContext* dbg = nullptr;
 
-    Status ignored = fs->DeleteFile(fname, IOOptions(), dbg);
-    ignored.PermitUncheckedError();
+    if (table_file_created) {
+      Status ignored = fs->DeleteFile(fname, IOOptions(), dbg);
+      ignored.PermitUncheckedError();
+    }
 
     assert(blob_file_additions || blob_file_paths.empty());
 
     if (blob_file_additions) {
       for (const std::string& blob_file_path : blob_file_paths) {
-        ignored = DeleteDBFile(&db_options, blob_file_path, dbname,
-                               /*force_bg=*/false, /*force_fg=*/false);
+        Status ignored = DeleteDBFile(&db_options, blob_file_path, dbname,
+                                      /*force_bg=*/false, /*force_fg=*/false);
         ignored.PermitUncheckedError();
         TEST_SYNC_POINT("BuildTable::AfterDeleteFile");
       }
diff -pruN 7.2.2-5/db/c.cc 7.3.1-2/db/c.cc
--- 7.2.2-5/db/c.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/c.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1163,6 +1163,43 @@ void rocksdb_multi_get_cf(
   }
 }
 
+void rocksdb_batched_multi_get_cf(rocksdb_t* db,
+                                  const rocksdb_readoptions_t* options,
+                                  rocksdb_column_family_handle_t* column_family,
+                                  size_t num_keys, const char* const* keys_list,
+                                  const size_t* keys_list_sizes,
+                                  rocksdb_pinnableslice_t** values, char** errs,
+                                  const bool sorted_input) {
+  Slice* key_slices = new Slice[num_keys];
+  PinnableSlice* value_slices = new PinnableSlice[num_keys];
+  Status* statuses = new Status[num_keys];
+  for (size_t i = 0; i < num_keys; ++i) {
+    key_slices[i] = Slice(keys_list[i], keys_list_sizes[i]);
+  }
+
+  db->rep->MultiGet(options->rep, column_family->rep, num_keys, key_slices,
+                    value_slices, statuses, sorted_input);
+
+  for (size_t i = 0; i < num_keys; ++i) {
+    if (statuses[i].ok()) {
+      values[i] = new (rocksdb_pinnableslice_t);
+      values[i]->rep = std::move(value_slices[i]);
+      errs[i] = nullptr;
+    } else {
+      values[i] = nullptr;
+      if (!statuses[i].IsNotFound()) {
+        errs[i] = strdup(statuses[i].ToString().c_str());
+      } else {
+        errs[i] = nullptr;
+      }
+    }
+  }
+
+  delete[] key_slices;
+  delete[] value_slices;
+  delete[] statuses;
+}
+
 unsigned char rocksdb_key_may_exist(rocksdb_t* db,
                                     const rocksdb_readoptions_t* options,
                                     const char* key, size_t key_len,
@@ -2818,6 +2855,20 @@ void rocksdb_options_set_bottommost_comp
   opt->rep.bottommost_compression_opts.enabled = enabled;
 }
 
+void rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer,
+    unsigned char enabled) {
+  opt->rep.bottommost_compression_opts.use_zstd_dict_trainer =
+      use_zstd_dict_trainer;
+  opt->rep.bottommost_compression_opts.enabled = enabled;
+}
+
+unsigned char
+rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt) {
+  return opt->rep.bottommost_compression_opts.use_zstd_dict_trainer;
+}
+
 void rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
     rocksdb_options_t* opt, uint64_t max_dict_buffer_bytes,
     unsigned char enabled) {
@@ -2845,6 +2896,16 @@ int rocksdb_options_get_compression_opti
   return opt->rep.compression_opts.zstd_max_train_bytes;
 }
 
+void rocksdb_options_set_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt, unsigned char use_zstd_dict_trainer) {
+  opt->rep.compression_opts.use_zstd_dict_trainer = use_zstd_dict_trainer;
+}
+
+unsigned char rocksdb_options_get_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt) {
+  return opt->rep.compression_opts.use_zstd_dict_trainer;
+}
+
 void rocksdb_options_set_compression_options_parallel_threads(
     rocksdb_options_t* opt, int value) {
   opt->rep.compression_opts.parallel_threads = value;
@@ -3636,6 +3697,8 @@ uint64_t rocksdb_perfcontext_metric(rock
       return rep->env_unlock_file_nanos;
     case rocksdb_env_new_logger_nanos:
       return rep->env_new_logger_nanos;
+    case rocksdb_number_async_seek:
+      return rep->number_async_seek;
     default:
       break;
   }
@@ -4193,6 +4256,14 @@ rocksdb_cache_t* rocksdb_cache_create_lr
   return c;
 }
 
+rocksdb_cache_t* rocksdb_cache_create_lru_with_strict_capacity_limit(
+    size_t capacity) {
+  rocksdb_cache_t* c = new rocksdb_cache_t;
+  c->rep = NewLRUCache(capacity);
+  c->rep->SetStrictCapacityLimit(true);
+  return c;
+}
+
 rocksdb_cache_t* rocksdb_cache_create_lru_opts(
     rocksdb_lru_cache_options_t* opt) {
   rocksdb_cache_t* c = new rocksdb_cache_t;
diff -pruN 7.2.2-5/db/column_family.cc 7.3.1-2/db/column_family.cc
--- 7.2.2-5/db/column_family.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/column_family.cc	2022-06-08 21:08:16.000000000 +0000
@@ -136,9 +136,15 @@ Status CheckCompressionSupported(const C
     }
   }
   if (cf_options.compression_opts.zstd_max_train_bytes > 0) {
-    if (!ZSTD_TrainDictionarySupported()) {
+    if (cf_options.compression_opts.use_zstd_dict_trainer) {
+      if (!ZSTD_TrainDictionarySupported()) {
+        return Status::InvalidArgument(
+            "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
+            "is not linked with the binary.");
+      }
+    } else if (!ZSTD_FinalizeDictionarySupported()) {
       return Status::InvalidArgument(
-          "zstd dictionary trainer cannot be used because ZSTD 1.1.3+ "
+          "zstd finalizeDictionary cannot be used because ZSTD 1.4.5+ "
           "is not linked with the binary.");
     }
     if (cf_options.compression_opts.max_dict_bytes == 0) {
@@ -501,7 +507,8 @@ std::vector<std::string> ColumnFamilyDat
   return paths;
 }
 
-const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId = port::kMaxUint32;
+const uint32_t ColumnFamilyData::kDummyColumnFamilyDataId =
+    std::numeric_limits<uint32_t>::max();
 
 ColumnFamilyData::ColumnFamilyData(
     uint32_t id, const std::string& name, Version* _dummy_versions,
@@ -826,8 +833,8 @@ int GetL0ThresholdSpeedupCompaction(int
   // condition.
   // Or twice as compaction trigger, if it is smaller.
   int64_t res = std::min(twice_level0_trigger, one_fourth_trigger_slowdown);
-  if (res >= port::kMaxInt32) {
-    return port::kMaxInt32;
+  if (res >= std::numeric_limits<int32_t>::max()) {
+    return std::numeric_limits<int32_t>::max();
   } else {
     // res fits in int
     return static_cast<int>(res);
diff -pruN 7.2.2-5/db/column_family_test.cc 7.3.1-2/db/column_family_test.cc
--- 7.2.2-5/db/column_family_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/column_family_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -383,7 +383,7 @@ class ColumnFamilyTestBase : public test
 
   int NumTableFilesAtLevel(int level, int cf) {
     return GetProperty(cf,
-                       "rocksdb.num-files-at-level" + ToString(level));
+                       "rocksdb.num-files-at-level" + std::to_string(level));
   }
 
 #ifndef ROCKSDB_LITE
@@ -783,7 +783,7 @@ TEST_P(ColumnFamilyTest, BulkAddDrop) {
   std::vector<std::string> cf_names;
   std::vector<ColumnFamilyHandle*> cf_handles;
   for (int i = 1; i <= kNumCF; i++) {
-    cf_names.push_back("cf1-" + ToString(i));
+    cf_names.push_back("cf1-" + std::to_string(i));
   }
   ASSERT_OK(db_->CreateColumnFamilies(cf_options, cf_names, &cf_handles));
   for (int i = 1; i <= kNumCF; i++) {
@@ -796,7 +796,8 @@ TEST_P(ColumnFamilyTest, BulkAddDrop) {
   }
   cf_handles.clear();
   for (int i = 1; i <= kNumCF; i++) {
-    cf_descriptors.emplace_back("cf2-" + ToString(i), ColumnFamilyOptions());
+    cf_descriptors.emplace_back("cf2-" + std::to_string(i),
+                                ColumnFamilyOptions());
   }
   ASSERT_OK(db_->CreateColumnFamilies(cf_descriptors, &cf_handles));
   for (int i = 1; i <= kNumCF; i++) {
@@ -820,7 +821,7 @@ TEST_P(ColumnFamilyTest, DropTest) {
     Open({"default"});
     CreateColumnFamiliesAndReopen({"pikachu"});
     for (int i = 0; i < 100; ++i) {
-      ASSERT_OK(Put(1, ToString(i), "bar" + ToString(i)));
+      ASSERT_OK(Put(1, std::to_string(i), "bar" + std::to_string(i)));
     }
     ASSERT_OK(Flush(1));
 
@@ -1344,7 +1345,7 @@ TEST_P(ColumnFamilyTest, DifferentCompac
     PutRandomData(1, 10, 12000);
     PutRandomData(1, 1, 10);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
 
   // SETUP column family "two" -- level style with 4 levels
@@ -1352,7 +1353,7 @@ TEST_P(ColumnFamilyTest, DifferentCompac
     PutRandomData(2, 10, 12000);
     PutRandomData(2, 1, 10);
     WaitForFlush(2);
-    AssertFilesPerLevel(ToString(i + 1), 2);
+    AssertFilesPerLevel(std::to_string(i + 1), 2);
   }
 
   // TRIGGER compaction "one"
@@ -1416,7 +1417,7 @@ TEST_P(ColumnFamilyTest, MultipleManualC
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
   bool cf_1_1 = true;
   ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->LoadDependency(
@@ -1446,7 +1447,7 @@ TEST_P(ColumnFamilyTest, MultipleManualC
     PutRandomData(2, 10, 12000);
     PutRandomData(2, 1, 10);
     WaitForFlush(2);
-    AssertFilesPerLevel(ToString(i + 1), 2);
+    AssertFilesPerLevel(std::to_string(i + 1), 2);
   }
   threads.emplace_back([&] {
     TEST_SYNC_POINT("ColumnFamilyTest::MultiManual:1");
@@ -1533,7 +1534,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndMan
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
 
   TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:1");
@@ -1543,7 +1544,7 @@ TEST_P(ColumnFamilyTest, AutomaticAndMan
     PutRandomData(2, 10, 12000);
     PutRandomData(2, 1, 10);
     WaitForFlush(2);
-    AssertFilesPerLevel(ToString(i + 1), 2);
+    AssertFilesPerLevel(std::to_string(i + 1), 2);
   }
   ROCKSDB_NAMESPACE::port::Thread threads([&] {
     CompactRangeOptions compact_options;
@@ -1615,7 +1616,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutoma
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
   bool cf_1_1 = true;
   bool cf_1_2 = true;
@@ -1650,7 +1651,7 @@ TEST_P(ColumnFamilyTest, ManualAndAutoma
     PutRandomData(2, 10, 12000);
     PutRandomData(2, 1, 10);
     WaitForFlush(2);
-    AssertFilesPerLevel(ToString(i + 1), 2);
+    AssertFilesPerLevel(std::to_string(i + 1), 2);
   }
   TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:5");
   threads.join();
@@ -1709,7 +1710,7 @@ TEST_P(ColumnFamilyTest, SameCFManualMan
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
   bool cf_1_1 = true;
   bool cf_1_2 = true;
@@ -1748,8 +1749,8 @@ TEST_P(ColumnFamilyTest, SameCFManualMan
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
-                        1);
+    AssertFilesPerLevel(
+        std::to_string(one.level0_file_num_compaction_trigger + i), 1);
   }
 
   ROCKSDB_NAMESPACE::port::Thread threads1([&] {
@@ -1811,7 +1812,7 @@ TEST_P(ColumnFamilyTest, SameCFManualAut
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
   bool cf_1_1 = true;
   bool cf_1_2 = true;
@@ -1849,8 +1850,8 @@ TEST_P(ColumnFamilyTest, SameCFManualAut
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
-                        1);
+    AssertFilesPerLevel(
+        std::to_string(one.level0_file_num_compaction_trigger + i), 1);
   }
 
   TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
@@ -1904,7 +1905,7 @@ TEST_P(ColumnFamilyTest, SameCFManualAut
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
   bool cf_1_1 = true;
   bool cf_1_2 = true;
@@ -1942,8 +1943,8 @@ TEST_P(ColumnFamilyTest, SameCFManualAut
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(one.level0_file_num_compaction_trigger + i),
-                        1);
+    AssertFilesPerLevel(
+        std::to_string(one.level0_file_num_compaction_trigger + i), 1);
   }
 
   TEST_SYNC_POINT("ColumnFamilyTest::ManualAuto:1");
@@ -2024,7 +2025,7 @@ TEST_P(ColumnFamilyTest, SameCFAutomatic
     PutRandomData(1, 10, 12000, true);
     PutRandomData(1, 1, 10, true);
     WaitForFlush(1);
-    AssertFilesPerLevel(ToString(i + 1), 1);
+    AssertFilesPerLevel(std::to_string(i + 1), 1);
   }
 
   TEST_SYNC_POINT("ColumnFamilyTest::AutoManual:5");
diff -pruN 7.2.2-5/db/compact_files_test.cc 7.3.1-2/db/compact_files_test.cc
--- 7.2.2-5/db/compact_files_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compact_files_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -91,8 +91,8 @@ TEST_F(CompactFilesTest, L0ConflictsFile
   // create couple files
   // Background compaction starts and waits in BackgroundCallCompaction:0
   for (int i = 0; i < kLevel0Trigger * 4; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i), ""));
-    ASSERT_OK(db->Put(WriteOptions(), ToString(100 - i), ""));
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(100 - i), ""));
     ASSERT_OK(db->Flush(FlushOptions()));
   }
 
@@ -136,7 +136,7 @@ TEST_F(CompactFilesTest, MultipleLevel)
   // create couple files in L0, L3, L4 and L5
   for (int i = 5; i > 2; --i) {
     collector->ClearFlushedFiles();
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i), ""));
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i), ""));
     ASSERT_OK(db->Flush(FlushOptions()));
     // Ensure background work is fully finished including listener callbacks
     // before accessing listener state.
@@ -145,11 +145,11 @@ TEST_F(CompactFilesTest, MultipleLevel)
     ASSERT_OK(db->CompactFiles(CompactionOptions(), l0_files, i));
 
     std::string prop;
-    ASSERT_TRUE(
-        db->GetProperty("rocksdb.num-files-at-level" + ToString(i), &prop));
+    ASSERT_TRUE(db->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(i), &prop));
     ASSERT_EQ("1", prop);
   }
-  ASSERT_OK(db->Put(WriteOptions(), ToString(0), ""));
+  ASSERT_OK(db->Put(WriteOptions(), std::to_string(0), ""));
   ASSERT_OK(db->Flush(FlushOptions()));
 
   ColumnFamilyMetaData meta;
@@ -218,7 +218,7 @@ TEST_F(CompactFilesTest, ObsoleteFiles)
 
   // create couple files
   for (int i = 1000; i < 2000; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i),
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
                       std::string(kWriteBufferSize / 10, 'a' + (i % 26))));
   }
 
@@ -257,14 +257,14 @@ TEST_F(CompactFilesTest, NotCutOutputOnL
 
   // create couple files
   for (int i = 0; i < 500; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i),
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
                       std::string(1000, 'a' + (i % 26))));
   }
   ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
   auto l0_files_1 = collector->GetFlushedFiles();
   collector->ClearFlushedFiles();
   for (int i = 0; i < 500; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i),
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
                       std::string(1000, 'a' + (i % 26))));
   }
   ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
@@ -295,7 +295,7 @@ TEST_F(CompactFilesTest, CapturingPendin
 
   // Create 5 files.
   for (int i = 0; i < 5; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), "key" + ToString(i), "value"));
+    ASSERT_OK(db->Put(WriteOptions(), "key" + std::to_string(i), "value"));
     ASSERT_OK(db->Flush(FlushOptions()));
   }
 
@@ -465,7 +465,7 @@ TEST_F(CompactFilesTest, GetCompactionJo
 
   // create couple files
   for (int i = 0; i < 500; ++i) {
-    ASSERT_OK(db->Put(WriteOptions(), ToString(i),
+    ASSERT_OK(db->Put(WriteOptions(), std::to_string(i),
                       std::string(1000, 'a' + (i % 26))));
   }
   ASSERT_OK(static_cast_with_check<DBImpl>(db)->TEST_WaitForFlushMemTable());
diff -pruN 7.2.2-5/db/compaction/compaction.cc 7.3.1-2/db/compaction/compaction.cc
--- 7.2.2-5/db/compaction/compaction.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction.cc	2022-06-08 21:08:16.000000000 +0000
@@ -518,7 +518,7 @@ uint64_t Compaction::OutputFilePrealloca
     }
   }
 
-  if (max_output_file_size_ != port::kMaxUint64 &&
+  if (max_output_file_size_ != std::numeric_limits<uint64_t>::max() &&
       (immutable_options_.compaction_style == kCompactionStyleLevel ||
        output_level() > 0)) {
     preallocation_size = std::min(max_output_file_size_, preallocation_size);
@@ -616,7 +616,7 @@ bool Compaction::DoesInputReferenceBlobF
 
 uint64_t Compaction::MinInputFileOldestAncesterTime(
     const InternalKey* start, const InternalKey* end) const {
-  uint64_t min_oldest_ancester_time = port::kMaxUint64;
+  uint64_t min_oldest_ancester_time = std::numeric_limits<uint64_t>::max();
   const InternalKeyComparator& icmp =
       column_family_data()->internal_comparator();
   for (const auto& level_files : inputs_) {
diff -pruN 7.2.2-5/db/compaction/compaction_iterator.cc 7.3.1-2/db/compaction/compaction_iterator.cc
--- 7.2.2-5/db/compaction/compaction_iterator.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_iterator.cc	2022-06-08 21:08:16.000000000 +0000
@@ -28,7 +28,8 @@ CompactionIterator::CompactionIterator(
     Env* env, bool report_detailed_time, bool expect_valid_internal_key,
     CompactionRangeDelAggregator* range_del_agg,
     BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
-    const Compaction* compaction, const CompactionFilter* compaction_filter,
+    bool enforce_single_del_contracts, const Compaction* compaction,
+    const CompactionFilter* compaction_filter,
     const std::atomic<bool>* shutting_down,
     const std::atomic<int>* manual_compaction_paused,
     const std::atomic<bool>* manual_compaction_canceled,
@@ -38,7 +39,7 @@ CompactionIterator::CompactionIterator(
           input, cmp, merge_helper, last_sequence, snapshots,
           earliest_write_conflict_snapshot, job_snapshot, snapshot_checker, env,
           report_detailed_time, expect_valid_internal_key, range_del_agg,
-          blob_file_builder, allow_data_in_errors,
+          blob_file_builder, allow_data_in_errors, enforce_single_del_contracts,
           std::unique_ptr<CompactionProxy>(
               compaction ? new RealCompaction(compaction) : nullptr),
           compaction_filter, shutting_down, manual_compaction_paused,
@@ -52,6 +53,7 @@ CompactionIterator::CompactionIterator(
     Env* env, bool report_detailed_time, bool expect_valid_internal_key,
     CompactionRangeDelAggregator* range_del_agg,
     BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+    bool enforce_single_del_contracts,
     std::unique_ptr<CompactionProxy> compaction,
     const CompactionFilter* compaction_filter,
     const std::atomic<bool>* shutting_down,
@@ -80,6 +82,7 @@ CompactionIterator::CompactionIterator(
       manual_compaction_canceled_(manual_compaction_canceled),
       info_log_(info_log),
       allow_data_in_errors_(allow_data_in_errors),
+      enforce_single_del_contracts_(enforce_single_del_contracts),
       timestamp_size_(cmp_ ? cmp_->timestamp_size() : 0),
       full_history_ts_low_(full_history_ts_low),
       current_user_key_sequence_(0),
@@ -307,6 +310,14 @@ bool CompactionIterator::InvokeFilterIfN
     // no value associated with delete
     value_.clear();
     iter_stats_.num_record_drop_user++;
+  } else if (filter == CompactionFilter::Decision::kPurge) {
+    // convert the current key to a single delete; key_ is pointing into
+    // current_key_ at this point, so updating current_key_ updates key()
+    ikey_.type = kTypeSingleDeletion;
+    current_key_.UpdateInternalKey(ikey_.sequence, kTypeSingleDeletion);
+    // no value associated with single delete
+    value_.clear();
+    iter_stats_.num_record_drop_user++;
   } else if (filter == CompactionFilter::Decision::kChangeValue) {
     if (ikey_.type == kTypeBlobIndex) {
       // value transfer from blob file to inlined data
@@ -625,24 +636,39 @@ void CompactionIterator::NextFromInput()
 
           TEST_SYNC_POINT_CALLBACK(
               "CompactionIterator::NextFromInput:SingleDelete:2", nullptr);
-          if (next_ikey.type == kTypeSingleDeletion ||
-              next_ikey.type == kTypeDeletion) {
+          if (next_ikey.type == kTypeSingleDeletion) {
             // We encountered two SingleDeletes for same key in a row. This
             // could be due to unexpected user input. If write-(un)prepared
             // transaction is used, this could also be due to releasing an old
             // snapshot between a Put and its matching SingleDelete.
-            // Furthermore, if write-(un)prepared transaction is rolled back
-            // after prepare, we will write a Delete to cancel a prior Put. If
-            // old snapshot is released between a later Put and its matching
-            // SingleDelete, we will end up with a Delete followed by
-            // SingleDelete.
             // Skip the first SingleDelete and let the next iteration decide
-            // how to handle the second SingleDelete or Delete.
+            // how to handle the second SingleDelete.
 
             // First SingleDelete has been skipped since we already called
             // input_.Next().
             ++iter_stats_.num_record_drop_obsolete;
             ++iter_stats_.num_single_del_mismatch;
+          } else if (next_ikey.type == kTypeDeletion) {
+            std::ostringstream oss;
+            oss << "Found SD and type: " << static_cast<int>(next_ikey.type)
+                << " on the same key, violating the contract "
+                   "of SingleDelete. Check your application to make sure the "
+                   "application does not mix SingleDelete and Delete for "
+                   "the same key. If you are using "
+                   "write-prepared/write-unprepared transactions, and use "
+                   "SingleDelete to delete certain keys, then make sure "
+                   "TransactionDBOptions::rollback_deletion_type_callback is "
+                   "configured properly. Mixing SD and DEL can lead to "
+                   "undefined behaviors";
+            ++iter_stats_.num_record_drop_obsolete;
+            ++iter_stats_.num_single_del_mismatch;
+            if (enforce_single_del_contracts_) {
+              ROCKS_LOG_ERROR(info_log_, "%s", oss.str().c_str());
+              valid_ = false;
+              status_ = Status::Corruption(oss.str());
+              return;
+            }
+            ROCKS_LOG_WARN(info_log_, "%s", oss.str().c_str());
           } else if (!is_timestamp_eligible_for_gc) {
             // We cannot drop the SingleDelete as timestamp is enabled, and
             // timestamp of this key is greater than or equal to
diff -pruN 7.2.2-5/db/compaction/compaction_iterator.h 7.3.1-2/db/compaction/compaction_iterator.h
--- 7.2.2-5/db/compaction/compaction_iterator.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_iterator.h	2022-06-08 21:08:16.000000000 +0000
@@ -176,7 +176,7 @@ class CompactionIterator {
       Env* env, bool report_detailed_time, bool expect_valid_internal_key,
       CompactionRangeDelAggregator* range_del_agg,
       BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
-      const Compaction* compaction = nullptr,
+      bool enforce_single_del_contracts, const Compaction* compaction = nullptr,
       const CompactionFilter* compaction_filter = nullptr,
       const std::atomic<bool>* shutting_down = nullptr,
       const std::atomic<int>* manual_compaction_paused = nullptr,
@@ -193,6 +193,7 @@ class CompactionIterator {
       Env* env, bool report_detailed_time, bool expect_valid_internal_key,
       CompactionRangeDelAggregator* range_del_agg,
       BlobFileBuilder* blob_file_builder, bool allow_data_in_errors,
+      bool enforce_single_del_contracts,
       std::unique_ptr<CompactionProxy> compaction,
       const CompactionFilter* compaction_filter = nullptr,
       const std::atomic<bool>* shutting_down = nullptr,
@@ -332,6 +333,8 @@ class CompactionIterator {
 
   bool allow_data_in_errors_;
 
+  const bool enforce_single_del_contracts_;
+
   // Comes from comparator.
   const size_t timestamp_size_;
 
diff -pruN 7.2.2-5/db/compaction/compaction_iterator_test.cc 7.3.1-2/db/compaction/compaction_iterator_test.cc
--- 7.2.2-5/db/compaction/compaction_iterator_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_iterator_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -279,7 +279,8 @@ class CompactionIteratorTest : public te
         snapshot_checker_.get(), Env::Default(),
         false /* report_detailed_time */, false, range_del_agg_.get(),
         nullptr /* blob_file_builder */, true /*allow_data_in_errors*/,
-        std::move(compaction), filter, &shutting_down_,
+        true /*enforce_single_del_contracts*/, std::move(compaction), filter,
+        &shutting_down_,
         /*manual_compaction_paused=*/nullptr,
         /*manual_compaction_canceled=*/nullptr, /*info_log=*/nullptr,
         full_history_ts_low));
@@ -313,7 +314,7 @@ class CompactionIteratorTest : public te
                   key_not_exists_beyond_output_level, full_history_ts_low);
     c_iter_->SeekToFirst();
     for (size_t i = 0; i < expected_keys.size(); i++) {
-      std::string info = "i = " + ToString(i);
+      std::string info = "i = " + std::to_string(i);
       ASSERT_TRUE(c_iter_->Valid()) << info;
       ASSERT_OK(c_iter_->status()) << info;
       ASSERT_EQ(expected_keys[i], c_iter_->key().ToString()) << info;
diff -pruN 7.2.2-5/db/compaction/compaction_job.cc 7.3.1-2/db/compaction/compaction_job.cc
--- 7.2.2-5/db/compaction/compaction_job.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_job.cc	2022-06-08 21:08:16.000000000 +0000
@@ -64,6 +64,7 @@
 #include "table/block_based/block_based_table_factory.h"
 #include "table/merging_iterator.h"
 #include "table/table_builder.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "util/coding.h"
 #include "util/hash.h"
@@ -1047,6 +1048,7 @@ CompactionJob::ProcessKeyValueCompaction
   const Compaction* compaction = sub_compact->compaction;
   CompactionServiceInput compaction_input;
   compaction_input.output_level = compaction->output_level();
+  compaction_input.db_id = db_id_;
 
   const std::vector<CompactionInputFiles>& inputs =
       *(compact_->compaction->inputs());
@@ -1208,6 +1210,7 @@ CompactionJob::ProcessKeyValueCompaction
     meta.oldest_ancester_time = file.oldest_ancester_time;
     meta.file_creation_time = file.file_creation_time;
     meta.marked_for_compaction = file.marked_for_compaction;
+    meta.unique_id = file.unique_id;
 
     auto cfd = compaction->column_family_data();
     sub_compact->outputs.emplace_back(std::move(meta),
@@ -1350,7 +1353,7 @@ void CompactionJob::ProcessKeyValueCompa
   ReadOptions read_options;
   read_options.verify_checksums = true;
   read_options.fill_cache = false;
-  read_options.rate_limiter_priority = Env::IO_LOW;
+  read_options.rate_limiter_priority = GetRateLimiterPriority();
   // Compaction iterators shouldn't be confined to a single prefix.
   // Compactions use Seek() for
   // (a) concurrent compactions,
@@ -1479,9 +1482,9 @@ void CompactionJob::ProcessKeyValueCompa
       snapshot_checker_, env_, ShouldReportDetailedTime(env_, stats_),
       /*expect_valid_internal_key=*/true, &range_del_agg,
       blob_file_builder.get(), db_options_.allow_data_in_errors,
-      sub_compact->compaction, compaction_filter, shutting_down_,
-      manual_compaction_paused_, manual_compaction_canceled_,
-      db_options_.info_log, full_history_ts_low));
+      db_options_.enforce_single_del_contracts, sub_compact->compaction,
+      compaction_filter, shutting_down_, manual_compaction_paused_,
+      manual_compaction_canceled_, db_options_.info_log, full_history_ts_low));
   auto c_iter = sub_compact->c_iter.get();
   c_iter->SeekToFirst();
   if (c_iter->Valid() && sub_compact->compaction->output_level() != 0) {
@@ -1974,7 +1977,8 @@ Status CompactionJob::FinishCompactionOu
         refined_oldest_ancester_time =
             sub_compact->compaction->MinInputFileOldestAncesterTime(
                 &(meta->smallest), &(meta->largest));
-        if (refined_oldest_ancester_time != port::kMaxUint64) {
+        if (refined_oldest_ancester_time !=
+            std::numeric_limits<uint64_t>::max()) {
           meta->oldest_ancester_time = refined_oldest_ancester_time;
         }
       }
@@ -2111,11 +2115,11 @@ Status CompactionJob::InstallCompactionR
 
   {
     Compaction::InputLevelSummaryBuffer inputs_summary;
-    ROCKS_LOG_INFO(db_options_.info_log,
-                   "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes",
-                   compaction->column_family_data()->GetName().c_str(), job_id_,
-                   compaction->InputLevelSummary(&inputs_summary),
-                   compact_->total_bytes + compact_->total_blob_bytes);
+    ROCKS_LOG_BUFFER(log_buffer_,
+                     "[%s] [JOB %d] Compacted %s => %" PRIu64 " bytes",
+                     compaction->column_family_data()->GetName().c_str(),
+                     job_id_, compaction->InputLevelSummary(&inputs_summary),
+                     compact_->total_bytes + compact_->total_blob_bytes);
   }
 
   VersionEdit* const edit = compaction->edit();
@@ -2264,7 +2268,7 @@ Status CompactionJob::OpenCompactionOutp
       sub_compact->compaction->MinInputFileOldestAncesterTime(
           (sub_compact->start != nullptr) ? &tmp_start : nullptr,
           (sub_compact->end != nullptr) ? &tmp_end : nullptr);
-  if (oldest_ancester_time == port::kMaxUint64) {
+  if (oldest_ancester_time == std::numeric_limits<uint64_t>::max()) {
     oldest_ancester_time = current_time;
   }
 
@@ -2276,6 +2280,18 @@ Status CompactionJob::OpenCompactionOutp
     meta.oldest_ancester_time = oldest_ancester_time;
     meta.file_creation_time = current_time;
     meta.temperature = temperature;
+    assert(!db_id_.empty());
+    assert(!db_session_id_.empty());
+    s = GetSstInternalUniqueId(db_id_, db_session_id_, meta.fd.GetNumber(),
+                               &meta.unique_id);
+    if (!s.ok()) {
+      ROCKS_LOG_ERROR(db_options_.info_log,
+                      "[%s] [JOB %d] file #%" PRIu64
+                      " failed to generate unique id: %s.",
+                      cfd->GetName().c_str(), job_id_, meta.fd.GetNumber(),
+                      s.ToString().c_str());
+      return s;
+    }
     sub_compact->outputs.emplace_back(
         std::move(meta), cfd->internal_comparator(),
         /*enable_order_check=*/
@@ -2284,7 +2300,7 @@ Status CompactionJob::OpenCompactionOutp
         /*enable_hash=*/paranoid_file_checks_);
   }
 
-  writable_file->SetIOPriority(Env::IOPriority::IO_LOW);
+  writable_file->SetIOPriority(GetRateLimiterPriority());
   writable_file->SetWriteLifeTimeHint(write_hint_);
   FileTypeSet tmp_set = db_options_.checksum_handoff_file_types;
   writable_file->SetPreallocationBlockSize(static_cast<size_t>(
@@ -2458,7 +2474,7 @@ void CompactionJob::LogCompaction() {
            << "compaction_reason"
            << GetCompactionReasonString(compaction->compaction_reason());
     for (size_t i = 0; i < compaction->num_input_levels(); ++i) {
-      stream << ("files_L" + ToString(compaction->level(i)));
+      stream << ("files_L" + std::to_string(compaction->level(i)));
       stream.StartArray();
       for (auto f : *compaction->inputs(i)) {
         stream << f->fd.GetNumber();
@@ -2475,6 +2491,19 @@ std::string CompactionJob::GetTableFileN
                        file_number, compact_->compaction->output_path_id());
 }
 
+Env::IOPriority CompactionJob::GetRateLimiterPriority() {
+  if (versions_ && versions_->GetColumnFamilySet() &&
+      versions_->GetColumnFamilySet()->write_controller()) {
+    WriteController* write_controller =
+        versions_->GetColumnFamilySet()->write_controller();
+    if (write_controller->NeedsDelay() || write_controller->IsStopped()) {
+      return Env::IO_USER;
+    }
+  }
+
+  return Env::IO_LOW;
+}
+
 #ifndef ROCKSDB_LITE
 std::string CompactionServiceCompactionJob::GetTableFileName(
     uint64_t file_number) {
@@ -2595,7 +2624,7 @@ Status CompactionServiceCompactionJob::R
         meta.fd.largest_seqno, meta.smallest.Encode().ToString(),
         meta.largest.Encode().ToString(), meta.oldest_ancester_time,
         meta.file_creation_time, output_file.validator.GetHash(),
-        meta.marked_for_compaction);
+        meta.marked_for_compaction, meta.unique_id);
   }
   compaction_result_->num_output_records = sub_compact->num_output_records;
   compaction_result_->total_bytes = sub_compact->total_bytes;
@@ -2699,6 +2728,9 @@ static std::unordered_map<std::string, O
     {"output_level",
      {offsetof(struct CompactionServiceInput, output_level), OptionType::kInt,
       OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
+    {"db_id",
+     {offsetof(struct CompactionServiceInput, db_id),
+      OptionType::kEncodedString}},
     {"has_begin",
      {offsetof(struct CompactionServiceInput, has_begin), OptionType::kBoolean,
       OptionVerificationType::kNormal, OptionTypeFlags::kNone}},
@@ -2756,6 +2788,11 @@ static std::unordered_map<std::string, O
          {offsetof(struct CompactionServiceOutputFile, marked_for_compaction),
           OptionType::kBoolean, OptionVerificationType::kNormal,
           OptionTypeFlags::kNone}},
+        {"unique_id",
+         OptionTypeInfo::Array<uint64_t, 2>(
+             offsetof(struct CompactionServiceOutputFile, unique_id),
+             OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+             {0, OptionType::kUInt64T})},
 };
 
 static std::unordered_map<std::string, OptionTypeInfo>
@@ -2951,6 +2988,7 @@ static std::unordered_map<std::string, O
          const void* addr1, const void* addr2, std::string* mismatch) {
         const auto status1 = static_cast<const Status*>(addr1);
         const auto status2 = static_cast<const Status*>(addr2);
+
         StatusSerializationAdapter adatper1(*status1);
         StatusSerializationAdapter adapter2(*status2);
         return OptionTypeInfo::TypesAreEqual(opts, status_adapter_type_info,
@@ -3008,7 +3046,7 @@ Status CompactionServiceInput::Read(cons
   } else {
     return Status::NotSupported(
         "Compaction Service Input data version not supported: " +
-        ToString(format_version));
+        std::to_string(format_version));
   }
 }
 
@@ -3037,7 +3075,7 @@ Status CompactionServiceResult::Read(con
   } else {
     return Status::NotSupported(
         "Compaction Service Result data version not supported: " +
-        ToString(format_version));
+        std::to_string(format_version));
   }
 }
 
diff -pruN 7.2.2-5/db/compaction/compaction_job.h 7.3.1-2/db/compaction/compaction_job.h
--- 7.2.2-5/db/compaction/compaction_job.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_job.h	2022-06-08 21:08:16.000000000 +0000
@@ -137,6 +137,8 @@ class CompactionJob {
   IOStatus io_status_;
 
  private:
+  friend class CompactionJobTestBase;
+
   // Generates a histogram representing potential divisions of key ranges from
   // the input. It adds the starting and/or ending keys of certain input files
   // to the working set and then finds the approximate size of data in between
@@ -234,6 +236,10 @@ class CompactionJob {
   // Get table file name in where it's outputting to, which should also be in
   // `output_directory_`.
   virtual std::string GetTableFileName(uint64_t file_number);
+  // The rate limiter priority (io_priority) is determined dynamically here.
+  // The Compaction Read and Write priorities are the same for different
+  // scenarios, such as write stalled.
+  Env::IOPriority GetRateLimiterPriority();
 };
 
 // CompactionServiceInput is used the pass compaction information between two
@@ -253,6 +259,9 @@ struct CompactionServiceInput {
   std::vector<std::string> input_files;
   int output_level;
 
+  // db_id is used to generate unique id of sst on the remote compactor
+  std::string db_id;
+
   // information for subcompaction
   bool has_begin = false;
   std::string begin;
@@ -284,13 +293,15 @@ struct CompactionServiceOutputFile {
   uint64_t file_creation_time;
   uint64_t paranoid_hash;
   bool marked_for_compaction;
+  UniqueId64x2 unique_id;
 
   CompactionServiceOutputFile() = default;
   CompactionServiceOutputFile(
       const std::string& name, SequenceNumber smallest, SequenceNumber largest,
       std::string _smallest_internal_key, std::string _largest_internal_key,
       uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
-      uint64_t _paranoid_hash, bool _marked_for_compaction)
+      uint64_t _paranoid_hash, bool _marked_for_compaction,
+      UniqueId64x2 _unique_id)
       : file_name(name),
         smallest_seqno(smallest),
         largest_seqno(largest),
@@ -299,7 +310,8 @@ struct CompactionServiceOutputFile {
         oldest_ancester_time(_oldest_ancester_time),
         file_creation_time(_file_creation_time),
         paranoid_hash(_paranoid_hash),
-        marked_for_compaction(_marked_for_compaction) {}
+        marked_for_compaction(_marked_for_compaction),
+        unique_id(std::move(_unique_id)) {}
 };
 
 // CompactionServiceResult contains the compaction result from a different db
diff -pruN 7.2.2-5/db/compaction/compaction_job_stats_test.cc 7.3.1-2/db/compaction/compaction_job_stats_test.cc
--- 7.2.2-5/db/compaction/compaction_job_stats_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_job_stats_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -268,10 +268,10 @@ class CompactionJobStatsTest : public te
     if (cf == 0) {
       // default cfd
       EXPECT_TRUE(db_->GetProperty(
-          "rocksdb.num-files-at-level" + ToString(level), &property));
+          "rocksdb.num-files-at-level" + std::to_string(level), &property));
     } else {
       EXPECT_TRUE(db_->GetProperty(
-          handles_[cf], "rocksdb.num-files-at-level" + ToString(level),
+          handles_[cf], "rocksdb.num-files-at-level" + std::to_string(level),
           &property));
     }
     return atoi(property.c_str());
@@ -672,7 +672,7 @@ TEST_P(CompactionJobStatsTest, Compactio
       snprintf(buf, kBufSize, "%d", ++num_L0_files);
       ASSERT_EQ(std::string(buf), FilesPerLevel(1));
     }
-    ASSERT_EQ(ToString(num_L0_files), FilesPerLevel(1));
+    ASSERT_EQ(std::to_string(num_L0_files), FilesPerLevel(1));
 
     // 2nd Phase: perform L0 -> L1 compaction.
     int L0_compaction_count = 6;
diff -pruN 7.2.2-5/db/compaction/compaction_job_test.cc 7.3.1-2/db/compaction/compaction_job_test.cc
--- 7.2.2-5/db/compaction/compaction_job_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_job_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -27,6 +27,7 @@
 #include "rocksdb/options.h"
 #include "rocksdb/write_buffer_manager.h"
 #include "table/mock_table.h"
+#include "table/unique_id_impl.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
 #include "util/string_util.h"
@@ -206,7 +207,7 @@ class CompactionJobTestBase : public tes
                  oldest_blob_file_number, kUnknownOldestAncesterTime,
                  kUnknownFileCreationTime, kUnknownFileChecksum,
                  kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-                 kDisableUserTimestamp);
+                 kDisableUserTimestamp, kNullUniqueId64x2);
 
     mutex_.Lock();
     EXPECT_OK(
@@ -236,8 +237,8 @@ class CompactionJobTestBase : public tes
     for (int i = 0; i < 2; ++i) {
       auto contents = mock::MakeMockFile();
       for (int k = 0; k < kKeysPerFile; ++k) {
-        auto key = ToString(i * kMatchingKeys + k);
-        auto value = ToString(i * kKeysPerFile + k);
+        auto key = std::to_string(i * kMatchingKeys + k);
+        auto value = std::to_string(i * kKeysPerFile + k);
         InternalKey internal_key(key, ++sequence_number, kTypeValue);
 
         // This is how the key will look like once it's written in bottommost
@@ -321,7 +322,8 @@ class CompactionJobTestBase : public tes
       const std::vector<SequenceNumber>& snapshots = {},
       SequenceNumber earliest_write_conflict_snapshot = kMaxSequenceNumber,
       int output_level = 1, bool verify = true,
-      uint64_t expected_oldest_blob_file_number = kInvalidBlobFileNumber) {
+      uint64_t expected_oldest_blob_file_number = kInvalidBlobFileNumber,
+      bool check_get_priority = false) {
     auto cfd = versions_->GetColumnFamilySet()->GetDefault();
 
     size_t num_input_files = 0;
@@ -359,8 +361,8 @@ class CompactionJobTestBase : public tes
         table_cache_, &event_logger, false, false, dbname_,
         &compaction_job_stats_, Env::Priority::USER, nullptr /* IOTracer */,
         /*manual_compaction_paused=*/nullptr,
-        /*manual_compaction_canceled=*/nullptr, /*db_id=*/"",
-        /*db_session_id=*/"", full_history_ts_low_);
+        /*manual_compaction_canceled=*/nullptr, env_->GenerateUniqueId(),
+        DBImpl::GenerateDbSessionId(nullptr), full_history_ts_low_);
     VerifyInitializationOfCompactionJobStats(compaction_job_stats_);
 
     compaction_job.Prepare();
@@ -390,6 +392,32 @@ class CompactionJobTestBase : public tes
                   expected_oldest_blob_file_number);
       }
     }
+
+    if (check_get_priority) {
+      CheckGetRateLimiterPriority(compaction_job);
+    }
+  }
+
+  void CheckGetRateLimiterPriority(CompactionJob& compaction_job) {
+    // When the state from WriteController is normal.
+    ASSERT_EQ(compaction_job.GetRateLimiterPriority(), Env::IO_LOW);
+
+    WriteController* write_controller =
+        compaction_job.versions_->GetColumnFamilySet()->write_controller();
+
+    {
+      // When the state from WriteController is Delayed.
+      std::unique_ptr<WriteControllerToken> delay_token =
+          write_controller->GetDelayToken(1000000);
+      ASSERT_EQ(compaction_job.GetRateLimiterPriority(), Env::IO_USER);
+    }
+
+    {
+      // When the state from WriteController is Stopped.
+      std::unique_ptr<WriteControllerToken> stop_token =
+          write_controller->GetStopToken();
+      ASSERT_EQ(compaction_job.GetRateLimiterPriority(), Env::IO_USER);
+    }
   }
 
   std::shared_ptr<Env> env_guard_;
@@ -890,10 +918,10 @@ TEST_F(CompactionJobTest, MultiSingleDel
   //      -> Snapshot Put
   // K: SDel SDel Put SDel Put Put Snapshot SDel Put SDel SDel Put SDel
   //      -> Snapshot Put Snapshot SDel
-  // L: SDel Put Del Put SDel Snapshot Del Put Del SDel Put SDel
-  //      -> Snapshot SDel
-  // M: (Put) SDel Put Del Put SDel Snapshot Put Del SDel Put SDel Del
-  //      -> SDel Snapshot Del
+  // L: SDel Put SDel Put SDel Snapshot SDel Put SDel SDel Put SDel
+  //      -> Snapshot SDel Put SDel
+  // M: (Put) SDel Put SDel Put SDel Snapshot Put SDel SDel Put SDel SDel
+  //      -> SDel Snapshot Put SDel
   NewDB();
 
   auto file1 = mock::MakeMockFile({
@@ -924,14 +952,14 @@ TEST_F(CompactionJobTest, MultiSingleDel
       {KeyStr("L", 16U, kTypeSingleDeletion), ""},
       {KeyStr("L", 15U, kTypeValue), "val"},
       {KeyStr("L", 14U, kTypeSingleDeletion), ""},
-      {KeyStr("L", 13U, kTypeDeletion), ""},
+      {KeyStr("L", 13U, kTypeSingleDeletion), ""},
       {KeyStr("L", 12U, kTypeValue), "val"},
-      {KeyStr("L", 11U, kTypeDeletion), ""},
-      {KeyStr("M", 16U, kTypeDeletion), ""},
+      {KeyStr("L", 11U, kTypeSingleDeletion), ""},
+      {KeyStr("M", 16U, kTypeSingleDeletion), ""},
       {KeyStr("M", 15U, kTypeSingleDeletion), ""},
       {KeyStr("M", 14U, kTypeValue), "val"},
       {KeyStr("M", 13U, kTypeSingleDeletion), ""},
-      {KeyStr("M", 12U, kTypeDeletion), ""},
+      {KeyStr("M", 12U, kTypeSingleDeletion), ""},
       {KeyStr("M", 11U, kTypeValue), "val"},
   });
   AddMockFile(file1);
@@ -972,12 +1000,12 @@ TEST_F(CompactionJobTest, MultiSingleDel
       {KeyStr("K", 1U, kTypeSingleDeletion), ""},
       {KeyStr("L", 5U, kTypeSingleDeletion), ""},
       {KeyStr("L", 4U, kTypeValue), "val"},
-      {KeyStr("L", 3U, kTypeDeletion), ""},
+      {KeyStr("L", 3U, kTypeSingleDeletion), ""},
       {KeyStr("L", 2U, kTypeValue), "val"},
       {KeyStr("L", 1U, kTypeSingleDeletion), ""},
       {KeyStr("M", 10U, kTypeSingleDeletion), ""},
       {KeyStr("M", 7U, kTypeValue), "val"},
-      {KeyStr("M", 5U, kTypeDeletion), ""},
+      {KeyStr("M", 5U, kTypeSingleDeletion), ""},
       {KeyStr("M", 4U, kTypeValue), "val"},
       {KeyStr("M", 3U, kTypeSingleDeletion), ""},
   });
@@ -1019,7 +1047,9 @@ TEST_F(CompactionJobTest, MultiSingleDel
                           {KeyStr("K", 8U, kTypeValue), "val3"},
                           {KeyStr("L", 16U, kTypeSingleDeletion), ""},
                           {KeyStr("L", 15U, kTypeValue), ""},
-                          {KeyStr("M", 16U, kTypeDeletion), ""},
+                          {KeyStr("L", 11U, kTypeSingleDeletion), ""},
+                          {KeyStr("M", 15U, kTypeSingleDeletion), ""},
+                          {KeyStr("M", 14U, kTypeValue), ""},
                           {KeyStr("M", 3U, kTypeSingleDeletion), ""}});
 
   SetLastSequence(22U);
@@ -1105,6 +1135,21 @@ TEST_F(CompactionJobTest, OldestBlobFile
                 /* expected_oldest_blob_file_number */ 19);
 }
 
+TEST_F(CompactionJobTest, NoEnforceSingleDeleteContract) {
+  db_options_.enforce_single_del_contracts = false;
+  NewDB();
+
+  auto file =
+      mock::MakeMockFile({{KeyStr("a", 4U, kTypeSingleDeletion), ""},
+                          {KeyStr("a", 3U, kTypeDeletion), "dontcare"}});
+  AddMockFile(file);
+  SetLastSequence(4U);
+
+  auto expected_results = mock::MakeMockFile();
+  auto files = cfd_->current()->storage_info()->LevelFiles(0);
+  RunCompaction({files}, expected_results);
+}
+
 TEST_F(CompactionJobTest, InputSerialization) {
   // Setup a random CompactionServiceInput
   CompactionServiceInput input;
@@ -1210,13 +1255,14 @@ TEST_F(CompactionJobTest, ResultSerializ
   result.status =
       status_list.at(rnd.Uniform(static_cast<int>(status_list.size())));
   while (!rnd.OneIn(10)) {
+    UniqueId64x2 id{rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX)};
     result.output_files.emplace_back(
         rnd.RandomString(rnd.Uniform(kStrMaxLen)), rnd64.Uniform(UINT64_MAX),
         rnd64.Uniform(UINT64_MAX),
         rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
         rnd.RandomBinaryString(rnd.Uniform(kStrMaxLen)),
         rnd64.Uniform(UINT64_MAX), rnd64.Uniform(UINT64_MAX),
-        rnd64.Uniform(UINT64_MAX), rnd.OneIn(2));
+        rnd64.Uniform(UINT64_MAX), rnd.OneIn(2), id);
   }
   result.output_level = rnd.Uniform(10);
   result.output_path = rnd.RandomString(rnd.Uniform(kStrMaxLen));
@@ -1244,6 +1290,16 @@ TEST_F(CompactionJobTest, ResultSerializ
   ASSERT_FALSE(deserialized1.TEST_Equals(&result, &mismatch));
   ASSERT_EQ(mismatch, "stats.num_input_files");
 
+  // Test unique id mismatch
+  if (!result.output_files.empty()) {
+    CompactionServiceResult deserialized_tmp;
+    ASSERT_OK(CompactionServiceResult::Read(output, &deserialized_tmp));
+    deserialized_tmp.output_files[0].unique_id[0] += 1;
+    ASSERT_FALSE(deserialized_tmp.TEST_Equals(&result, &mismatch));
+    ASSERT_EQ(mismatch, "output_files.unique_id");
+    deserialized_tmp.status.PermitUncheckedError();
+  }
+
   // Test unknown field
   CompactionServiceResult deserialized2;
   output.clear();
@@ -1286,6 +1342,17 @@ TEST_F(CompactionJobTest, ResultSerializ
   }
 }
 
+TEST_F(CompactionJobTest, GetRateLimiterPriority) {
+  NewDB();
+
+  auto expected_results = CreateTwoFiles(false);
+  auto cfd = versions_->GetColumnFamilySet()->GetDefault();
+  auto files = cfd->current()->storage_info()->LevelFiles(0);
+  ASSERT_EQ(2U, files.size());
+  RunCompaction({files}, expected_results, {}, kMaxSequenceNumber, 1, true,
+                kInvalidBlobFileNumber, true);
+}
+
 class CompactionJobTimestampTest : public CompactionJobTestBase {
  public:
   CompactionJobTimestampTest()
diff -pruN 7.2.2-5/db/compaction/compaction_picker.cc 7.3.1-2/db/compaction/compaction_picker.cc
--- 7.2.2-5/db/compaction/compaction_picker.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_picker.cc	2022-06-08 21:08:16.000000000 +0000
@@ -65,7 +65,7 @@ bool FindIntraL0Compaction(const std::ve
   size_t compact_bytes = static_cast<size_t>(level_files[start]->fd.file_size);
   uint64_t compensated_compact_bytes =
       level_files[start]->compensated_file_size;
-  size_t compact_bytes_per_del_file = port::kMaxSizet;
+  size_t compact_bytes_per_del_file = std::numeric_limits<size_t>::max();
   // Compaction range will be [start, limit).
   size_t limit;
   // Pull in files until the amount of compaction work per deleted file begins
@@ -401,7 +401,7 @@ Status CompactionPicker::GetCompactionIn
         "Cannot find matched SST files for the following file numbers:");
     for (auto fn : *input_set) {
       message += " ";
-      message += ToString(fn);
+      message += std::to_string(fn);
     }
     return Status::InvalidArgument(message);
   }
@@ -717,7 +717,7 @@ Compaction* CompactionPicker::CompactRan
   // files that are created during the current compaction.
   if (compact_range_options.bottommost_level_compaction ==
           BottommostLevelCompaction::kForceOptimized &&
-      max_file_num_to_ignore != port::kMaxUint64) {
+      max_file_num_to_ignore != std::numeric_limits<uint64_t>::max()) {
     assert(input_level == output_level);
     // inputs_shrunk holds a continuous subset of input files which were all
     // created before the current manual compaction
@@ -1004,14 +1004,14 @@ Status CompactionPicker::SanitizeCompact
     return Status::InvalidArgument(
         "Output level for column family " + cf_meta.name +
         " must between [0, " +
-        ToString(cf_meta.levels[cf_meta.levels.size() - 1].level) + "].");
+        std::to_string(cf_meta.levels[cf_meta.levels.size() - 1].level) + "].");
   }
 
   if (output_level > MaxOutputLevel()) {
     return Status::InvalidArgument(
         "Exceed the maximum output level defined by "
         "the current compaction algorithm --- " +
-        ToString(MaxOutputLevel()));
+        std::to_string(MaxOutputLevel()));
   }
 
   if (output_level < 0) {
@@ -1061,8 +1061,8 @@ Status CompactionPicker::SanitizeCompact
       return Status::InvalidArgument(
           "Cannot compact file to up level, input file: " +
           MakeTableFileName("", file_num) + " level " +
-          ToString(input_file_level) + " > output level " +
-          ToString(output_level));
+          std::to_string(input_file_level) + " > output level " +
+          std::to_string(output_level));
     }
   }
 
diff -pruN 7.2.2-5/db/compaction/compaction_picker_level.cc 7.3.1-2/db/compaction/compaction_picker_level.cc
--- 7.2.2-5/db/compaction/compaction_picker_level.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_picker_level.cc	2022-06-08 21:08:16.000000000 +0000
@@ -504,7 +504,7 @@ bool LevelCompactionBuilder::PickIntraL0
     return false;
   }
   return FindIntraL0Compaction(level_files, kMinFilesForIntraL0Compaction,
-                               port::kMaxUint64,
+                               std::numeric_limits<uint64_t>::max(),
                                mutable_cf_options_.max_compaction_bytes,
                                &start_level_inputs_, earliest_mem_seqno_);
 }
diff -pruN 7.2.2-5/db/compaction/compaction_picker_test.cc 7.3.1-2/db/compaction/compaction_picker_test.cc
--- 7.2.2-5/db/compaction/compaction_picker_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_picker_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,6 +12,7 @@
 #include "db/compaction/compaction_picker_level.h"
 #include "db/compaction/compaction_picker_universal.h"
 #include "db/compaction/file_pri.h"
+#include "table/unique_id_impl.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
 #include "util/string_util.h"
@@ -115,7 +116,7 @@ class CompactionPickerTest : public test
         largest_seq, marked_for_compact, temperature, kInvalidBlobFileNumber,
         kUnknownOldestAncesterTime, kUnknownFileCreationTime,
         kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-        kDisableUserTimestamp, kDisableUserTimestamp);
+        kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
     f->compensated_file_size =
         (compensated_file_size != 0) ? compensated_file_size : file_size;
     f->oldest_ancester_time = oldest_ancestor_time;
@@ -273,9 +274,9 @@ TEST_F(CompactionPickerTest, NeedsCompac
       // start a brand new version in each test.
       NewVersionStorage(kLevels, kCompactionStyleLevel);
       for (int i = 0; i < file_count; ++i) {
-        Add(level, i, ToString((i + 100) * 1000).c_str(),
-            ToString((i + 100) * 1000 + 999).c_str(),
-            file_size, 0, i * 100, i * 100 + 99);
+        Add(level, i, std::to_string((i + 100) * 1000).c_str(),
+            std::to_string((i + 100) * 1000 + 999).c_str(), file_size, 0,
+            i * 100, i * 100 + 99);
       }
       UpdateVersionStorageInfo();
       ASSERT_EQ(vstorage_->CompactionScoreLevel(0), level);
@@ -439,8 +440,8 @@ TEST_F(CompactionPickerTest, NeedsCompac
   for (int i = 1;
        i <= mutable_cf_options_.level0_file_num_compaction_trigger * 2; ++i) {
     NewVersionStorage(1, kCompactionStyleUniversal);
-    Add(0, i, ToString((i + 100) * 1000).c_str(),
-        ToString((i + 100) * 1000 + 999).c_str(), 1000000, 0, i * 100,
+    Add(0, i, std::to_string((i + 100) * 1000).c_str(),
+        std::to_string((i + 100) * 1000 + 999).c_str(), 1000000, 0, i * 100,
         i * 100 + 99);
     UpdateVersionStorageInfo();
     ASSERT_EQ(level_compaction_picker.NeedsCompaction(vstorage_.get()),
@@ -852,17 +853,17 @@ TEST_F(CompactionPickerTest, UniversalIn
   // L3: (1101, 1180) (1201, 1280) ... (7901, 7908)
   // L4: (1130, 1150) (1160, 1210) (1230, 1250) (1260 1310) ... (7960, 8010)
   for (int i = 11; i < 79; i++) {
-    Add(3, 100 + i * 3, ToString(i * 100).c_str(),
-        ToString(i * 100 + 80).c_str(), kFileSize, 0, 200, 251);
+    Add(3, 100 + i * 3, std::to_string(i * 100).c_str(),
+        std::to_string(i * 100 + 80).c_str(), kFileSize, 0, 200, 251);
     // Add a tie breaker
     if (i == 66) {
       Add(3, 10000U, "6690", "6699", kFileSize, 0, 200, 251);
     }
 
-    Add(4, 100 + i * 3 + 1, ToString(i * 100 + 30).c_str(),
-        ToString(i * 100 + 50).c_str(), kFileSize, 0, 200, 251);
-    Add(4, 100 + i * 3 + 2, ToString(i * 100 + 60).c_str(),
-        ToString(i * 100 + 110).c_str(), kFileSize, 0, 200, 251);
+    Add(4, 100 + i * 3 + 1, std::to_string(i * 100 + 30).c_str(),
+        std::to_string(i * 100 + 50).c_str(), kFileSize, 0, 200, 251);
+    Add(4, 100 + i * 3 + 2, std::to_string(i * 100 + 60).c_str(),
+        std::to_string(i * 100 + 110).c_str(), kFileSize, 0, 200, 251);
   }
   UpdateVersionStorageInfo();
 
@@ -899,14 +900,14 @@ TEST_F(CompactionPickerTest, UniversalIn
   // L3: (1101, 1180) (1201, 1280) ... (7901, 7908)
   // L4: (1130, 1150) (1160, 1210) (1230, 1250) (1260 1310) ... (7960, 8010)
   for (int i = 11; i < 70; i++) {
-    Add(3, 100 + i * 3, ToString(i * 100).c_str(),
-        ToString(i * 100 + 80).c_str(),
+    Add(3, 100 + i * 3, std::to_string(i * 100).c_str(),
+        std::to_string(i * 100 + 80).c_str(),
         i % 10 == 9 ? kFileSize * 100 : kFileSize, 0, 200, 251);
 
-    Add(4, 100 + i * 3 + 1, ToString(i * 100 + 30).c_str(),
-        ToString(i * 100 + 50).c_str(), kFileSize, 0, 200, 251);
-    Add(4, 100 + i * 3 + 2, ToString(i * 100 + 60).c_str(),
-        ToString(i * 100 + 110).c_str(), kFileSize, 0, 200, 251);
+    Add(4, 100 + i * 3 + 1, std::to_string(i * 100 + 30).c_str(),
+        std::to_string(i * 100 + 50).c_str(), kFileSize, 0, 200, 251);
+    Add(4, 100 + i * 3 + 2, std::to_string(i * 100 + 60).c_str(),
+        std::to_string(i * 100 + 110).c_str(), kFileSize, 0, 200, 251);
   }
   UpdateVersionStorageInfo();
 
@@ -941,8 +942,8 @@ TEST_F(CompactionPickerTest, NeedsCompac
   // size of L0 files.
   for (int i = 1; i <= kFileCount; ++i) {
     NewVersionStorage(1, kCompactionStyleFIFO);
-    Add(0, i, ToString((i + 100) * 1000).c_str(),
-        ToString((i + 100) * 1000 + 999).c_str(), kFileSize, 0, i * 100,
+    Add(0, i, std::to_string((i + 100) * 1000).c_str(),
+        std::to_string((i + 100) * 1000 + 999).c_str(), kFileSize, 0, i * 100,
         i * 100 + 99);
     UpdateVersionStorageInfo();
     ASSERT_EQ(fifo_compaction_picker.NeedsCompaction(vstorage_.get()),
@@ -2653,8 +2654,8 @@ TEST_F(CompactionPickerTest, UniversalMa
       universal_compaction_picker.CompactRange(
           cf_name_, mutable_cf_options_, mutable_db_options_, vstorage_.get(),
           ColumnFamilyData::kCompactAllLevels, 6, CompactRangeOptions(),
-          nullptr, nullptr, &manual_end, &manual_conflict, port::kMaxUint64,
-          ""));
+          nullptr, nullptr, &manual_end, &manual_conflict,
+          std::numeric_limits<uint64_t>::max(), ""));
 
   ASSERT_TRUE(compaction);
 
diff -pruN 7.2.2-5/db/compaction/compaction_picker_universal.cc 7.3.1-2/db/compaction/compaction_picker_universal.cc
--- 7.2.2-5/db/compaction/compaction_picker_universal.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_picker_universal.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1371,7 +1371,7 @@ Compaction* UniversalCompactionBuilder::
 
 uint64_t UniversalCompactionBuilder::GetMaxOverlappingBytes() const {
   if (!mutable_cf_options_.compaction_options_universal.incremental) {
-    return port::kMaxUint64;
+    return std::numeric_limits<uint64_t>::max();
   } else {
     // Try to align cutting boundary with files at the next level if the
     // file isn't end up with 1/2 of target size, or it would overlap
diff -pruN 7.2.2-5/db/compaction/compaction_service_test.cc 7.3.1-2/db/compaction/compaction_service_test.cc
--- 7.2.2-5/db/compaction/compaction_service_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/compaction/compaction_service_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -15,13 +15,17 @@ class MyTestCompactionService : public C
   MyTestCompactionService(
       std::string db_path, Options& options,
       std::shared_ptr<Statistics>& statistics,
-      std::vector<std::shared_ptr<EventListener>>& listeners)
+      std::vector<std::shared_ptr<EventListener>>& listeners,
+      std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+          table_properties_collector_factories)
       : db_path_(std::move(db_path)),
         options_(options),
         statistics_(statistics),
         start_info_("na", "na", "na", 0, Env::TOTAL),
         wait_info_("na", "na", "na", 0, Env::TOTAL),
-        listeners_(listeners) {}
+        listeners_(listeners),
+        table_properties_collector_factories_(
+            std::move(table_properties_collector_factories)) {}
 
   static const char* kClassName() { return "MyTestCompactionService"; }
 
@@ -78,12 +82,16 @@ class MyTestCompactionService : public C
       options_override.listeners = listeners_;
     }
 
+    if (!table_properties_collector_factories_.empty()) {
+      options_override.table_properties_collector_factories =
+          table_properties_collector_factories_;
+    }
+
     OpenAndCompactOptions options;
     options.canceled = &canceled_;
 
     Status s = DB::OpenAndCompact(
-        options, db_path_,
-        db_path_ + "/" + ROCKSDB_NAMESPACE::ToString(info.job_id),
+        options, db_path_, db_path_ + "/" + std::to_string(info.job_id),
         compaction_input, compaction_service_result, options_override);
     if (is_override_wait_result_) {
       *compaction_service_result = override_wait_result_;
@@ -142,6 +150,8 @@ class MyTestCompactionService : public C
   bool is_override_wait_result_ = false;
   std::string override_wait_result_;
   std::vector<std::shared_ptr<EventListener>> listeners_;
+  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+      table_properties_collector_factories_;
   std::atomic_bool canceled_{false};
 };
 
@@ -158,7 +168,8 @@ class CompactionServiceTest : public DBT
     compactor_statistics_ = CreateDBStatistics();
 
     compaction_service_ = std::make_shared<MyTestCompactionService>(
-        dbname_, *options, compactor_statistics_, remote_listeners);
+        dbname_, *options, compactor_statistics_, remote_listeners,
+        remote_table_properties_collector_factories);
     options->compaction_service = compaction_service_;
     DestroyAndReopen(*options);
   }
@@ -177,7 +188,7 @@ class CompactionServiceTest : public DBT
     for (int i = 0; i < 20; i++) {
       for (int j = 0; j < 10; j++) {
         int key_id = i * 10 + j;
-        ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+        ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
       }
       ASSERT_OK(Flush());
     }
@@ -187,7 +198,7 @@ class CompactionServiceTest : public DBT
     for (int i = 0; i < 10; i++) {
       for (int j = 0; j < 10; j++) {
         int key_id = i * 20 + j * 2;
-        ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+        ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
       }
       ASSERT_OK(Flush());
     }
@@ -199,14 +210,16 @@ class CompactionServiceTest : public DBT
     for (int i = 0; i < 200; i++) {
       auto result = Get(Key(i));
       if (i % 2) {
-        ASSERT_EQ(result, "value" + ToString(i));
+        ASSERT_EQ(result, "value" + std::to_string(i));
       } else {
-        ASSERT_EQ(result, "value_new" + ToString(i));
+        ASSERT_EQ(result, "value_new" + std::to_string(i));
       }
     }
   }
 
   std::vector<std::shared_ptr<EventListener>> remote_listeners;
+  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+      remote_table_properties_collector_factories;
 
  private:
   std::shared_ptr<Statistics> compactor_statistics_;
@@ -224,7 +237,7 @@ TEST_F(CompactionServiceTest, BasicCompa
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -232,7 +245,7 @@ TEST_F(CompactionServiceTest, BasicCompa
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -242,9 +255,9 @@ TEST_F(CompactionServiceTest, BasicCompa
   for (int i = 0; i < 200; i++) {
     auto result = Get(Key(i));
     if (i % 2) {
-      ASSERT_EQ(result, "value" + ToString(i));
+      ASSERT_EQ(result, "value" + std::to_string(i));
     } else {
-      ASSERT_EQ(result, "value_new" + ToString(i));
+      ASSERT_EQ(result, "value_new" + std::to_string(i));
     }
   }
   auto my_cs = GetCompactionService();
@@ -275,13 +288,23 @@ TEST_F(CompactionServiceTest, BasicCompa
         auto s = static_cast<Status*>(status);
         *s = Status::Aborted("MyTestCompactionService failed to compact!");
       });
+
+  // tracking success unique id verification
+  std::atomic_int verify_passed{0};
+  SyncPoint::GetInstance()->SetCallBack(
+      "Version::VerifySstUniqueIds::Passed", [&](void* arg) {
+        // override job status
+        auto id = static_cast<std::string*>(arg);
+        assert(!id->empty());
+        verify_passed++;
+      });
   SyncPoint::GetInstance()->EnableProcessing();
 
   Status s;
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      s = Put(Key(key_id), "value_new" + ToString(key_id));
+      s = Put(Key(key_id), "value_new" + std::to_string(key_id));
       if (s.IsAborted()) {
         break;
       }
@@ -299,6 +322,12 @@ TEST_F(CompactionServiceTest, BasicCompa
     }
   }
   ASSERT_TRUE(s.IsAborted());
+
+  // Test verification
+  ASSERT_EQ(verify_passed, 0);
+  options.verify_sst_unique_id_in_manifest = true;
+  Reopen(options);
+  ASSERT_GT(verify_passed, 0);
 }
 
 TEST_F(CompactionServiceTest, ManualCompaction) {
@@ -468,7 +497,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -476,7 +505,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -490,9 +519,9 @@ TEST_F(CompactionServiceTest, Compaction
     if (i > 5 && i <= 105) {
       ASSERT_EQ(result, "NOT_FOUND");
     } else if (i % 2) {
-      ASSERT_EQ(result, "value" + ToString(i));
+      ASSERT_EQ(result, "value" + std::to_string(i));
     } else {
-      ASSERT_EQ(result, "value_new" + ToString(i));
+      ASSERT_EQ(result, "value_new" + std::to_string(i));
     }
   }
   auto my_cs = GetCompactionService();
@@ -547,9 +576,9 @@ TEST_F(CompactionServiceTest, Concurrent
   for (int i = 0; i < 200; i++) {
     auto result = Get(Key(i));
     if (i % 2) {
-      ASSERT_EQ(result, "value" + ToString(i));
+      ASSERT_EQ(result, "value" + std::to_string(i));
     } else {
-      ASSERT_EQ(result, "value_new" + ToString(i));
+      ASSERT_EQ(result, "value_new" + std::to_string(i));
     }
   }
   auto my_cs = GetCompactionService();
@@ -564,7 +593,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -572,7 +601,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -617,7 +646,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -625,7 +654,7 @@ TEST_F(CompactionServiceTest, Compaction
   for (int i = 0; i < 4; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -653,7 +682,7 @@ TEST_F(CompactionServiceTest, FallbackLo
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -661,7 +690,7 @@ TEST_F(CompactionServiceTest, FallbackLo
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -671,9 +700,9 @@ TEST_F(CompactionServiceTest, FallbackLo
   for (int i = 0; i < 200; i++) {
     auto result = Get(Key(i));
     if (i % 2) {
-      ASSERT_EQ(result, "value" + ToString(i));
+      ASSERT_EQ(result, "value" + std::to_string(i));
     } else {
-      ASSERT_EQ(result, "value_new" + ToString(i));
+      ASSERT_EQ(result, "value_new" + std::to_string(i));
     }
   }
 
@@ -796,7 +825,7 @@ TEST_F(CompactionServiceTest, RemoteEven
   for (int i = 0; i < 20; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -804,7 +833,7 @@ TEST_F(CompactionServiceTest, RemoteEven
   for (int i = 0; i < 10; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value_new" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value_new" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -821,11 +850,101 @@ TEST_F(CompactionServiceTest, RemoteEven
   for (int i = 0; i < 200; i++) {
     auto result = Get(Key(i));
     if (i % 2) {
-      ASSERT_EQ(result, "value" + ToString(i));
+      ASSERT_EQ(result, "value" + std::to_string(i));
     } else {
-      ASSERT_EQ(result, "value_new" + ToString(i));
+      ASSERT_EQ(result, "value_new" + std::to_string(i));
+    }
+  }
+}
+
+TEST_F(CompactionServiceTest, TablePropertiesCollector) {
+  const static std::string kUserPropertyName = "TestCount";
+
+  class TablePropertiesCollectorTest : public TablePropertiesCollector {
+   public:
+    Status Finish(UserCollectedProperties* properties) override {
+      *properties = UserCollectedProperties{
+          {kUserPropertyName, std::to_string(count_)},
+      };
+      return Status::OK();
+    }
+
+    UserCollectedProperties GetReadableProperties() const override {
+      return UserCollectedProperties();
+    }
+
+    const char* Name() const override { return "TablePropertiesCollectorTest"; }
+
+    Status AddUserKey(const Slice& /*user_key*/, const Slice& /*value*/,
+                      EntryType /*type*/, SequenceNumber /*seq*/,
+                      uint64_t /*file_size*/) override {
+      count_++;
+      return Status::OK();
+    }
+
+   private:
+    uint32_t count_ = 0;
+  };
+
+  class TablePropertiesCollectorFactoryTest
+      : public TablePropertiesCollectorFactory {
+   public:
+    TablePropertiesCollector* CreateTablePropertiesCollector(
+        TablePropertiesCollectorFactory::Context /*context*/) override {
+      return new TablePropertiesCollectorTest();
+    }
+
+    const char* Name() const override {
+      return "TablePropertiesCollectorFactoryTest";
+    }
+  };
+
+  auto factory = new TablePropertiesCollectorFactoryTest();
+  remote_table_properties_collector_factories.emplace_back(factory);
+
+  const int kNumSst = 3;
+  const int kLevel0Trigger = 4;
+  Options options = CurrentOptions();
+  options.level0_file_num_compaction_trigger = kLevel0Trigger;
+  ReopenWithCompactionService(&options);
+
+  // generate a few SSTs locally which should not have user property
+  for (int i = 0; i < kNumSst; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+
+  TablePropertiesCollection fname_to_props;
+  ASSERT_OK(db_->GetPropertiesOfAllTables(&fname_to_props));
+  for (const auto& file_props : fname_to_props) {
+    auto properties = file_props.second->user_collected_properties;
+    auto it = properties.find(kUserPropertyName);
+    ASSERT_EQ(it, properties.end());
+  }
+
+  // trigger compaction
+  for (int i = kNumSst; i < kLevel0Trigger; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+  ASSERT_OK(dbfull()->TEST_WaitForCompact(true));
+
+  ASSERT_OK(db_->GetPropertiesOfAllTables(&fname_to_props));
+
+  bool has_user_property = false;
+  for (const auto& file_props : fname_to_props) {
+    auto properties = file_props.second->user_collected_properties;
+    auto it = properties.find(kUserPropertyName);
+    if (it != properties.end()) {
+      has_user_property = true;
+      ASSERT_GT(std::stoi(it->second), 0);
     }
   }
+  ASSERT_TRUE(has_user_property);
 }
 
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/db/comparator_db_test.cc 7.3.1-2/db/comparator_db_test.cc
--- 7.2.2-5/db/comparator_db_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/comparator_db_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -397,7 +397,7 @@ TEST_P(ComparatorDBTest, DoubleComparato
       for (uint32_t j = 0; j < divide_order; j++) {
         to_divide *= 10.0;
       }
-      source_strings.push_back(ToString(r / to_divide));
+      source_strings.push_back(std::to_string(r / to_divide));
     }
 
     DoRandomIteraratorTest(GetDB(), source_strings, &rnd, 200, 1000, 66);
diff -pruN 7.2.2-5/db/convenience.cc 7.3.1-2/db/convenience.cc
--- 7.2.2-5/db/convenience.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/convenience.cc	2022-06-08 21:08:16.000000000 +0000
@@ -40,7 +40,8 @@ Status VerifySstFileChecksum(const Optio
 Status VerifySstFileChecksum(const Options& options,
                              const EnvOptions& env_options,
                              const ReadOptions& read_options,
-                             const std::string& file_path) {
+                             const std::string& file_path,
+                             const SequenceNumber& largest_seqno) {
   std::unique_ptr<FSRandomAccessFile> file;
   uint64_t file_size;
   InternalKeyComparator internal_comparator(options.comparator);
@@ -61,12 +62,13 @@ Status VerifySstFileChecksum(const Optio
           nullptr /* stats */, 0 /* hist_type */, nullptr /* file_read_hist */,
           ioptions.rate_limiter.get()));
   const bool kImmortal = true;
+  auto reader_options = TableReaderOptions(
+      ioptions, options.prefix_extractor, env_options, internal_comparator,
+      false /* skip_filters */, !kImmortal, false /* force_direct_prefetch */,
+      -1 /* level */);
+  reader_options.largest_seqno = largest_seqno;
   s = ioptions.table_factory->NewTableReader(
-      TableReaderOptions(ioptions, options.prefix_extractor, env_options,
-                         internal_comparator, false /* skip_filters */,
-                         !kImmortal, false /* force_direct_prefetch */,
-                         -1 /* level */),
-      std::move(file_reader), file_size, &table_reader,
+      reader_options, std::move(file_reader), file_size, &table_reader,
       false /* prefetch_index_and_filter_in_cache */);
   if (!s.ok()) {
     return s;
diff -pruN 7.2.2-5/db/corruption_test.cc 7.3.1-2/db/corruption_test.cc
--- 7.2.2-5/db/corruption_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/corruption_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -308,7 +308,7 @@ class CorruptionTest : public testing::T
     uint64_t new_size = old_size - bytes_to_truncate;
     // If bytes_to_truncate == 0, it will do full truncation.
     if (bytes_to_truncate == 0) {
-      new_size = old_size;
+      new_size = 0;
     }
     ASSERT_OK(test::TruncateFile(env_, path, new_size));
   }
@@ -354,6 +354,7 @@ TEST_F(CorruptionTest, PostPITRCorruptio
     options_.create_missing_column_families = true;
     std::vector<ColumnFamilyHandle*> cfhs;
     ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_));
+    assert(db_ != nullptr);  // suppress false clang-analyze report
 
     ASSERT_OK(db_->Put(WriteOptions(), cfhs[0], "k", "v"));
     ASSERT_OK(db_->Put(WriteOptions(), cfhs[1], "k", "v"));
@@ -375,6 +376,8 @@ TEST_F(CorruptionTest, PostPITRCorruptio
     options_.avoid_flush_during_recovery = true;
     std::vector<ColumnFamilyHandle*> cfhs;
     ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_));
+    assert(db_ != nullptr);  // suppress false clang-analyze report
+
     // Flush one but not both CFs and write some data so there's a seqno gap
     // between the PITR corruption and the next DB session's first WAL.
     ASSERT_OK(db_->Put(WriteOptions(), cfhs[1], "k2", "v2"));
@@ -391,6 +394,7 @@ TEST_F(CorruptionTest, PostPITRCorruptio
   for (int i = 0; i < 2; ++i) {
     std::vector<ColumnFamilyHandle*> cfhs;
     ASSERT_OK(DB::Open(options_, dbname_, cf_descs, &cfhs, &db_));
+    assert(db_ != nullptr);  // suppress false clang-analyze report
 
     for (auto* cfh : cfhs) {
       delete cfh;
@@ -1045,15 +1049,22 @@ INSTANTIATE_TEST_CASE_P(CorruptionTest,
 // "Column family inconsistency: SST file contains data beyond the point of
 // corruption" error will be hit, causing recovery to fail.
 //
-// After adding the fix, corrupted WALs whose numbers are larger than the
-// corrupted wal and smaller than the new WAL are moved to a separate folder.
-// Only after new WAL is synced, RocksDB persist a new MANIFEST with column
-// families to ensure RocksDB is in consistent state.
+// After adding the fix, only after new WAL is synced, RocksDB persist a new
+// MANIFEST with column families to ensure RocksDB is in consistent state.
 // RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is
 // synced immediately afterwards. The sequence number of the sentinel
 // WriteBatch will be the next sequence number immediately after the largest
 // sequence number recovered from previous WALs and MANIFEST because of which DB
 // will be in consistent state.
+// If a future recovery starts from the new MANIFEST, then it means the new WAL
+// is successfully synced. Due to the sentinel empty write batch at the
+// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point.
+// If future recovery starts from the old MANIFEST, it means the writing the new
+// MANIFEST failed. It won't have the "SST ahead of WAL" error.
+//
+// The combination of corrupting a WAL and injecting an error during subsequent
+// re-open exposes the bug of prematurely persisting a new MANIFEST with
+// advanced ColumnFamilyData::log_number.
 TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecovery) {
   CloseDb();
   Options options;
@@ -1064,7 +1075,7 @@ TEST_P(CrashDuringRecoveryWithCorruption
   options.env = env_;
   ASSERT_OK(DestroyDB(dbname_, options));
   options.create_if_missing = true;
-  options.max_write_buffer_number = 3;
+  options.max_write_buffer_number = 8;
 
   Reopen(&options);
   Status s;
@@ -1090,13 +1101,16 @@ TEST_P(CrashDuringRecoveryWithCorruption
 
     // Write one key to test_cf.
     ASSERT_OK(db_->Put(WriteOptions(), handles[1], "old_key", "dontcare"));
+    ASSERT_OK(db_->Flush(FlushOptions(), handles[1]));
+
     // Write to default_cf and flush this cf several times to advance wal
-    // number.
+    // number. TEST_SwitchMemtable makes sure WALs are not synced and test can
+    // corrupt un-sync WAL.
     for (int i = 0; i < 2; ++i) {
-      ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i), "value"));
+      ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i),
+                         "value" + std::to_string(i)));
       ASSERT_OK(dbimpl->TEST_SwitchMemtable());
     }
-    ASSERT_OK(db_->Put(WriteOptions(), handles[1], "dontcare", "dontcare"));
 
     for (auto* h : handles) {
       delete h;
@@ -1105,12 +1119,13 @@ TEST_P(CrashDuringRecoveryWithCorruption
     CloseDb();
   }
 
-  // 2. Corrupt second last wal file to emulate power reset which caused the DB
-  // to lose the un-synced WAL.
+  // 2. Corrupt second last un-syned wal file to emulate power reset which
+  // caused the DB to lose the un-synced WAL.
   {
     std::vector<uint64_t> file_nums;
     GetSortedWalFiles(file_nums);
     size_t size = file_nums.size();
+    assert(size >= 2);
     uint64_t log_num = file_nums[size - 2];
     CorruptFileWithTruncation(FileType::kWalFile, log_num,
                               /*bytes_to_truncate=*/8);
@@ -1127,25 +1142,31 @@ TEST_P(CrashDuringRecoveryWithCorruption
   // Case2: If avoid_flush_during_recovery = false, all column families have
   // flushed their data from WAL to L0 during recovery, and none of them will
   // ever need to read the WALs again.
+
+  // 4. Fault is injected to fail the recovery.
   {
-    options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+    SyncPoint::GetInstance()->SetCallBack(
+        "DBImpl::GetLogSizeAndMaybeTruncate:0", [&](void* arg) {
+          auto* tmp_s = reinterpret_cast<Status*>(arg);
+          assert(tmp_s);
+          *tmp_s = Status::IOError("Injected");
+        });
+    SyncPoint::GetInstance()->EnableProcessing();
+
+    handles.clear();
+    options.avoid_flush_during_recovery = true;
     s = DB::Open(options, dbname_, cf_descs, &handles, &db_);
-    ASSERT_OK(s);
+    ASSERT_TRUE(s.IsIOError());
+    ASSERT_EQ("IO error: Injected", s.ToString());
     for (auto* h : handles) {
       delete h;
     }
-    handles.clear();
     CloseDb();
-  }
 
-  // 4. Corrupt max_wal_num to emulate second power reset which caused the
-  // DB to again lose the un-synced WAL.
-  {
-    std::vector<uint64_t> file_nums;
-    GetSortedWalFiles(file_nums);
-    size_t size = file_nums.size();
-    uint64_t log_num = file_nums[size - 1];
-    CorruptFileWithTruncation(FileType::kWalFile, log_num);
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
   }
 
   // 5. After second crash reopen the db with second corruption. Default family
@@ -1168,6 +1189,23 @@ TEST_P(CrashDuringRecoveryWithCorruption
   {
     options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
     ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_));
+
+    // Verify that data is not lost.
+    {
+      std::string v;
+      ASSERT_OK(db_->Get(ReadOptions(), handles[1], "old_key", &v));
+      ASSERT_EQ("dontcare", v);
+
+      v.clear();
+      ASSERT_OK(db_->Get(ReadOptions(), "key" + std::to_string(0), &v));
+      ASSERT_EQ("value" + std::to_string(0), v);
+
+      // Since  it's corrupting second last wal, below key is not found.
+      v.clear();
+      ASSERT_EQ(db_->Get(ReadOptions(), "key" + std::to_string(1), &v),
+                Status::NotFound());
+    }
+
     for (auto* h : handles) {
       delete h;
     }
@@ -1183,15 +1221,22 @@ TEST_P(CrashDuringRecoveryWithCorruption
 // file contains data beyond the point of corruption" error will be hit, causing
 // recovery to fail.
 //
-// After adding the fix, corrupted WALs whose numbers are larger than the
-// corrupted wal and smaller than the new WAL are moved to a separate folder.
-// Only after new WAL is synced, RocksDB persist a new MANIFEST with column
-// families to ensure RocksDB is in consistent state.
+// After adding the fix, only after new WAL is synced, RocksDB persist a new
+// MANIFEST with column families to ensure RocksDB is in consistent state.
 // RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is
 // synced immediately afterwards. The sequence number of the sentinel
 // WriteBatch will be the next sequence number immediately after the largest
 // sequence number recovered from previous WALs and MANIFEST because of which DB
 // will be in consistent state.
+// If a future recovery starts from the new MANIFEST, then it means the new WAL
+// is successfully synced. Due to the sentinel empty write batch at the
+// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point.
+// If future recovery starts from the old MANIFEST, it means the writing the new
+// MANIFEST failed. It won't have the "SST ahead of WAL" error.
+//
+// The combination of corrupting a WAL and injecting an error during subsequent
+// re-open exposes the bug of prematurely persisting a new MANIFEST with
+// advanced ColumnFamilyData::log_number.
 TEST_P(CrashDuringRecoveryWithCorruptionTest, TxnDbCrashDuringRecovery) {
   CloseDb();
   Options options;
@@ -1225,7 +1270,6 @@ TEST_P(CrashDuringRecoveryWithCorruption
   // advance wal number so that some column families have advanced log_number
   // while other don't.
   {
-    options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
     ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
                                   &handles, &txn_db));
 
@@ -1234,6 +1278,8 @@ TEST_P(CrashDuringRecoveryWithCorruption
     ASSERT_OK(txn->Put(handles[1], "foo", "value"));
     ASSERT_OK(txn->SetName("txn0"));
     ASSERT_OK(txn->Prepare());
+    ASSERT_OK(txn_db->Flush(FlushOptions()));
+
     delete txn;
     txn = nullptr;
 
@@ -1242,17 +1288,19 @@ TEST_P(CrashDuringRecoveryWithCorruption
 
     // Put and flush cf0
     for (int i = 0; i < 2; ++i) {
-      ASSERT_OK(txn_db->Put(WriteOptions(), "dontcare", "value"));
+      ASSERT_OK(txn_db->Put(WriteOptions(), "key" + std::to_string(i),
+                            "value" + std::to_string(i)));
       ASSERT_OK(dbimpl->TEST_SwitchMemtable());
     }
 
     // Put cf1
     txn = txn_db->BeginTransaction(WriteOptions(), TransactionOptions());
-    ASSERT_OK(txn->Put(handles[1], "foo1", "value"));
+    ASSERT_OK(txn->Put(handles[1], "foo1", "value1"));
     ASSERT_OK(txn->Commit());
 
     delete txn;
     txn = nullptr;
+
     for (auto* h : handles) {
       delete h;
     }
@@ -1266,6 +1314,7 @@ TEST_P(CrashDuringRecoveryWithCorruption
     std::vector<uint64_t> file_nums;
     GetSortedWalFiles(file_nums);
     size_t size = file_nums.size();
+    assert(size >= 2);
     uint64_t log_num = file_nums[size - 2];
     CorruptFileWithTruncation(FileType::kWalFile, log_num,
                               /*bytes_to_truncate=*/8);
@@ -1275,20 +1324,33 @@ TEST_P(CrashDuringRecoveryWithCorruption
   // family has higher log number than corrupted wal number. There may be old
   // WAL files that it must not delete because they can contain data of
   // uncommitted transactions. As a result, min_log_number_to_keep won't change.
+
   {
-    options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
-    ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
-                                  &handles, &txn_db));
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+    SyncPoint::GetInstance()->SetCallBack(
+        "DBImpl::Open::BeforeSyncWAL", [&](void* arg) {
+          auto* tmp_s = reinterpret_cast<Status*>(arg);
+          assert(tmp_s);
+          *tmp_s = Status::IOError("Injected");
+        });
+    SyncPoint::GetInstance()->EnableProcessing();
 
+    handles.clear();
+    s = TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs, &handles,
+                            &txn_db);
+    ASSERT_TRUE(s.IsIOError());
+    ASSERT_EQ("IO error: Injected", s.ToString());
     for (auto* h : handles) {
       delete h;
     }
-    handles.clear();
-    delete txn_db;
+    CloseDb();
+
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
   }
 
-  // 4. Corrupt max_wal_num to emulate second power reset which caused the
-  // DB to again lose the un-synced WAL.
+  // 4. Corrupt max_wal_num.
   {
     std::vector<uint64_t> file_nums;
     GetSortedWalFiles(file_nums);
@@ -1307,10 +1369,29 @@ TEST_P(CrashDuringRecoveryWithCorruption
   // recovered from previous WALs and MANIFEST, db will be in consistent state
   // and opens successfully.
   {
-    options.avoid_flush_during_recovery = false;
-
     ASSERT_OK(TransactionDB::Open(options, txn_db_opts, dbname_, cf_descs,
                                   &handles, &txn_db));
+
+    // Verify that data is not lost.
+    {
+      std::string v;
+      // Key not visible since it's not committed.
+      ASSERT_EQ(txn_db->Get(ReadOptions(), handles[1], "foo", &v),
+                Status::NotFound());
+
+      v.clear();
+      ASSERT_OK(txn_db->Get(ReadOptions(), "key" + std::to_string(0), &v));
+      ASSERT_EQ("value" + std::to_string(0), v);
+
+      // Last WAL is corrupted which contains two keys below.
+      v.clear();
+      ASSERT_EQ(txn_db->Get(ReadOptions(), "key" + std::to_string(1), &v),
+                Status::NotFound());
+      v.clear();
+      ASSERT_EQ(txn_db->Get(ReadOptions(), handles[1], "foo1", &v),
+                Status::NotFound());
+    }
+
     for (auto* h : handles) {
       delete h;
     }
@@ -1318,6 +1399,153 @@ TEST_P(CrashDuringRecoveryWithCorruption
   }
 }
 
+// This test is similar to
+// CrashDuringRecoveryWithCorruptionTest.CrashDuringRecovery except it calls
+// flush and corrupts Last WAL. It calls flush to sync some of the WALs and
+// remaining are unsyned one of which is then corrupted to simulate crash.
+//
+// In case of non-TransactionDB with avoid_flush_during_recovery = true, RocksDB
+// won't flush the data from WAL to L0 for all column families if possible. As a
+// result, not all column families can increase their log_numbers, and
+// min_log_number_to_keep won't change.
+// It may prematurely persist a new MANIFEST even before we can declare the DB
+// is in consistent state after recovery (this is when the new WAL is synced)
+// and advances log_numbers for some column families.
+//
+// If there is power failure before we sync the new WAL, we will end up in
+// a situation in which after persisting the MANIFEST, RocksDB will see some
+// column families' log_numbers larger than the corrupted wal, and
+// "Column family inconsistency: SST file contains data beyond the point of
+// corruption" error will be hit, causing recovery to fail.
+//
+// After adding the fix, only after new WAL is synced, RocksDB persist a new
+// MANIFEST with column families to ensure RocksDB is in consistent state.
+// RocksDB writes an empty WriteBatch as a sentinel to the new WAL which is
+// synced immediately afterwards. The sequence number of the sentinel
+// WriteBatch will be the next sequence number immediately after the largest
+// sequence number recovered from previous WALs and MANIFEST because of which DB
+// will be in consistent state.
+// If a future recovery starts from the new MANIFEST, then it means the new WAL
+// is successfully synced. Due to the sentinel empty write batch at the
+// beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point.
+// If future recovery starts from the old MANIFEST, it means the writing the new
+// MANIFEST failed. It won't have the "SST ahead of WAL" error.
+
+// The combination of corrupting a WAL and injecting an error during subsequent
+// re-open exposes the bug of prematurely persisting a new MANIFEST with
+// advanced ColumnFamilyData::log_number.
+TEST_P(CrashDuringRecoveryWithCorruptionTest, CrashDuringRecoveryWithFlush) {
+  CloseDb();
+  Options options;
+  options.wal_recovery_mode = WALRecoveryMode::kPointInTimeRecovery;
+  options.avoid_flush_during_recovery = false;
+  options.env = env_;
+  options.create_if_missing = true;
+
+  ASSERT_OK(DestroyDB(dbname_, options));
+  Reopen(&options);
+
+  ColumnFamilyHandle* cfh = nullptr;
+  const std::string test_cf_name = "test_cf";
+  Status s = db_->CreateColumnFamily(options, test_cf_name, &cfh);
+  ASSERT_OK(s);
+  delete cfh;
+
+  CloseDb();
+
+  std::vector<ColumnFamilyDescriptor> cf_descs;
+  cf_descs.emplace_back(kDefaultColumnFamilyName, options);
+  cf_descs.emplace_back(test_cf_name, options);
+  std::vector<ColumnFamilyHandle*> handles;
+
+  {
+    ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_));
+
+    // Write one key to test_cf.
+    ASSERT_OK(db_->Put(WriteOptions(), handles[1], "old_key", "dontcare"));
+
+    // Write to default_cf and flush this cf several times to advance wal
+    // number.
+    for (int i = 0; i < 2; ++i) {
+      ASSERT_OK(db_->Put(WriteOptions(), "key" + std::to_string(i),
+                         "value" + std::to_string(i)));
+      ASSERT_OK(db_->Flush(FlushOptions()));
+    }
+
+    ASSERT_OK(db_->Put(WriteOptions(), handles[1], "dontcare", "dontcare"));
+    for (auto* h : handles) {
+      delete h;
+    }
+    handles.clear();
+    CloseDb();
+  }
+
+  // Corrupt second last un-syned wal file to emulate power reset which
+  // caused the DB to lose the un-synced WAL.
+  {
+    std::vector<uint64_t> file_nums;
+    GetSortedWalFiles(file_nums);
+    size_t size = file_nums.size();
+    uint64_t log_num = file_nums[size - 1];
+    CorruptFileWithTruncation(FileType::kWalFile, log_num,
+                              /*bytes_to_truncate=*/8);
+  }
+
+  // Fault is injected to fail the recovery.
+  {
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+    SyncPoint::GetInstance()->SetCallBack(
+        "DBImpl::GetLogSizeAndMaybeTruncate:0", [&](void* arg) {
+          auto* tmp_s = reinterpret_cast<Status*>(arg);
+          assert(tmp_s);
+          *tmp_s = Status::IOError("Injected");
+        });
+    SyncPoint::GetInstance()->EnableProcessing();
+
+    handles.clear();
+    options.avoid_flush_during_recovery = true;
+    s = DB::Open(options, dbname_, cf_descs, &handles, &db_);
+    ASSERT_TRUE(s.IsIOError());
+    ASSERT_EQ("IO error: Injected", s.ToString());
+    for (auto* h : handles) {
+      delete h;
+    }
+    CloseDb();
+
+    SyncPoint::GetInstance()->DisableProcessing();
+    SyncPoint::GetInstance()->ClearAllCallBacks();
+  }
+
+  // Reopen db again
+  {
+    options.avoid_flush_during_recovery = avoid_flush_during_recovery_;
+    ASSERT_OK(DB::Open(options, dbname_, cf_descs, &handles, &db_));
+
+    // Verify that data is not lost.
+    {
+      std::string v;
+      ASSERT_OK(db_->Get(ReadOptions(), handles[1], "old_key", &v));
+      ASSERT_EQ("dontcare", v);
+
+      for (int i = 0; i < 2; ++i) {
+        v.clear();
+        ASSERT_OK(db_->Get(ReadOptions(), "key" + std::to_string(i), &v));
+        ASSERT_EQ("value" + std::to_string(i), v);
+      }
+
+      // Since it's corrupting last wal after Flush, below key is not found.
+      v.clear();
+      ASSERT_EQ(db_->Get(ReadOptions(), handles[1], "dontcare", &v),
+                Status::NotFound());
+    }
+
+    for (auto* h : handles) {
+      delete h;
+    }
+  }
+}
+
 }  // namespace ROCKSDB_NAMESPACE
 
 int main(int argc, char** argv) {
diff -pruN 7.2.2-5/db/c_test.c 7.3.1-2/db/c_test.c
--- 7.2.2-5/db/c_test.c	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/c_test.c	2022-06-08 21:08:16.000000000 +0000
@@ -1260,15 +1260,18 @@ int main(int argc, char** argv) {
     rocksdb_writebatch_clear(wb);
     rocksdb_writebatch_put_cf(wb, handles[1], "bar", 3, "b", 1);
     rocksdb_writebatch_put_cf(wb, handles[1], "box", 3, "c", 1);
+    rocksdb_writebatch_put_cf(wb, handles[1], "buff", 4, "rocksdb", 7);
     rocksdb_writebatch_delete_cf(wb, handles[1], "bar", 3);
     rocksdb_write(db, woptions, wb, &err);
     CheckNoError(err);
     CheckGetCF(db, roptions, handles[1], "baz", NULL);
     CheckGetCF(db, roptions, handles[1], "bar", NULL);
     CheckGetCF(db, roptions, handles[1], "box", "c");
+    CheckGetCF(db, roptions, handles[1], "buff", "rocksdb");
     CheckPinGetCF(db, roptions, handles[1], "baz", NULL);
     CheckPinGetCF(db, roptions, handles[1], "bar", NULL);
     CheckPinGetCF(db, roptions, handles[1], "box", "c");
+    CheckPinGetCF(db, roptions, handles[1], "buff", "rocksdb");
     rocksdb_writebatch_destroy(wb);
 
     rocksdb_flush_wal(db, 1, &err);
@@ -1300,6 +1303,26 @@ int main(int argc, char** argv) {
     }
 
     {
+      const char* batched_keys[4] = {"box", "buff", "barfooxx", "box"};
+      const size_t batched_keys_sizes[4] = {3, 4, 8, 3};
+      const char* expected_value[4] = {"c", "rocksdb", NULL, "c"};
+      char* batched_errs[4];
+
+      rocksdb_pinnableslice_t* pvals[4];
+      rocksdb_batched_multi_get_cf(db, roptions, handles[1], 4, batched_keys,
+                                   batched_keys_sizes, pvals, batched_errs,
+                                   false);
+      const char* val;
+      size_t val_len;
+      for (i = 0; i < 4; ++i) {
+        val = rocksdb_pinnableslice_value(pvals[i], &val_len);
+        CheckNoError(batched_errs[i]);
+        CheckEqual(expected_value[i], val, val_len);
+        rocksdb_pinnableslice_destroy(pvals[i]);
+      }
+    }
+
+    {
       unsigned char value_found = 0;
 
       CheckCondition(!rocksdb_key_may_exist(db, roptions, "invalid_key", 11,
@@ -1330,7 +1353,7 @@ int main(int argc, char** argv) {
     for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) {
       i++;
     }
-    CheckCondition(i == 3);
+    CheckCondition(i == 4);
     rocksdb_iter_get_error(iter, &err);
     CheckNoError(err);
     rocksdb_iter_destroy(iter);
@@ -1354,7 +1377,7 @@ int main(int argc, char** argv) {
     for (i = 0; rocksdb_iter_valid(iter) != 0; rocksdb_iter_next(iter)) {
       i++;
     }
-    CheckCondition(i == 3);
+    CheckCondition(i == 4);
     rocksdb_iter_get_error(iter, &err);
     CheckNoError(err);
     rocksdb_iter_destroy(iter);
@@ -2516,6 +2539,9 @@ int main(int argc, char** argv) {
         200 ==
         rocksdb_options_get_compression_options_max_dict_buffer_bytes(co));
 
+    rocksdb_options_set_compression_options_use_zstd_dict_trainer(co, 0);
+    CheckCondition(
+        0 == rocksdb_options_get_compression_options_use_zstd_dict_trainer(co));
     rocksdb_options_destroy(co);
   }
 
diff -pruN 7.2.2-5/db/cuckoo_table_db_test.cc 7.3.1-2/db/cuckoo_table_db_test.cc
--- 7.2.2-5/db/cuckoo_table_db_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/cuckoo_table_db_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -95,8 +95,8 @@ class CuckooTableDBTest : public testing
 
   int NumTableFilesAtLevel(int level) {
     std::string property;
-    EXPECT_TRUE(db_->GetProperty("rocksdb.num-files-at-level" + ToString(level),
-                                 &property));
+    EXPECT_TRUE(db_->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(level), &property));
     return atoi(property.c_str());
   }
 
diff -pruN 7.2.2-5/db/db_basic_test.cc 7.3.1-2/db/db_basic_test.cc
--- 7.2.2-5/db/db_basic_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_basic_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,6 +12,7 @@
 #include "db/db_test_util.h"
 #include "options/options_helper.h"
 #include "port/stack_trace.h"
+#include "rocksdb/filter_policy.h"
 #include "rocksdb/flush_block_policy.h"
 #include "rocksdb/merge_operator.h"
 #include "rocksdb/perf_context.h"
@@ -1180,10 +1181,17 @@ TEST_F(DBBasicTest, DBCloseFlushError) {
   Destroy(options);
 }
 
-class DBMultiGetTestWithParam : public DBBasicTest,
-                                public testing::WithParamInterface<bool> {};
+class DBMultiGetTestWithParam
+    : public DBBasicTest,
+      public testing::WithParamInterface<std::tuple<bool, bool>> {};
 
 TEST_P(DBMultiGetTestWithParam, MultiGetMultiCF) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
   Options options = CurrentOptions();
   CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
                          "alyosha", "popovich"},
@@ -1240,7 +1248,8 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
     keys.push_back(std::get<1>(cf_kv_vec[i]));
   }
 
-  values = MultiGet(cfs, keys, nullptr, GetParam());
+  values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()),
+                    std::get<1>(GetParam()));
   ASSERT_EQ(values.size(), num_keys);
   for (unsigned int j = 0; j < values.size(); ++j) {
     ASSERT_EQ(values[j], std::get<2>(cf_kv_vec[j]) + "_2");
@@ -1254,7 +1263,8 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
   keys.push_back(std::get<1>(cf_kv_vec[3]));
   cfs.push_back(std::get<0>(cf_kv_vec[4]));
   keys.push_back(std::get<1>(cf_kv_vec[4]));
-  values = MultiGet(cfs, keys, nullptr, GetParam());
+  values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()),
+                    std::get<1>(GetParam()));
   ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[0]) + "_2");
   ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[3]) + "_2");
   ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[4]) + "_2");
@@ -1267,7 +1277,8 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
   keys.push_back(std::get<1>(cf_kv_vec[6]));
   cfs.push_back(std::get<0>(cf_kv_vec[1]));
   keys.push_back(std::get<1>(cf_kv_vec[1]));
-  values = MultiGet(cfs, keys, nullptr, GetParam());
+  values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()),
+                    std::get<1>(GetParam()));
   ASSERT_EQ(values[0], std::get<2>(cf_kv_vec[7]) + "_2");
   ASSERT_EQ(values[1], std::get<2>(cf_kv_vec[6]) + "_2");
   ASSERT_EQ(values[2], std::get<2>(cf_kv_vec[1]) + "_2");
@@ -1283,6 +1294,12 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
 }
 
 TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFMutex) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
   Options options = CurrentOptions();
   CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
                          "alyosha", "popovich"},
@@ -1328,7 +1345,8 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
     keys.push_back("cf" + std::to_string(i) + "_key");
   }
 
-  values = MultiGet(cfs, keys, nullptr, GetParam());
+  values = MultiGet(cfs, keys, nullptr, std::get<0>(GetParam()),
+                    std::get<1>(GetParam()));
   ASSERT_TRUE(last_try);
   ASSERT_EQ(values.size(), 8);
   for (unsigned int j = 0; j < values.size(); ++j) {
@@ -1345,6 +1363,12 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
 }
 
 TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFSnapshot) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
   Options options = CurrentOptions();
   CreateAndReopenWithCF({"pikachu", "ilya", "muromec", "dobrynia", "nikitich",
                          "alyosha", "popovich"},
@@ -1389,7 +1413,8 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
   }
 
   const Snapshot* snapshot = db_->GetSnapshot();
-  values = MultiGet(cfs, keys, snapshot, GetParam());
+  values = MultiGet(cfs, keys, snapshot, std::get<0>(GetParam()),
+                    std::get<1>(GetParam()));
   db_->ReleaseSnapshot(snapshot);
   ASSERT_EQ(values.size(), 8);
   for (unsigned int j = 0; j < values.size(); ++j) {
@@ -1405,6 +1430,12 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
 }
 
 TEST_P(DBMultiGetTestWithParam, MultiGetMultiCFUnsorted) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
   Options options = CurrentOptions();
   CreateAndReopenWithCF({"one", "two"}, options);
 
@@ -1417,8 +1448,9 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
   std::vector<std::string> keys{"foo", "baz", "abc"};
   std::vector<std::string> values;
 
-  values =
-      MultiGet(cfs, keys, /* snapshot */ nullptr, /* batched */ GetParam());
+  values = MultiGet(cfs, keys, /* snapshot */ nullptr,
+                    /* batched */ std::get<0>(GetParam()),
+                    /* async */ std::get<1>(GetParam()));
 
   ASSERT_EQ(values.size(), 3);
   ASSERT_EQ(values[0], "bar");
@@ -1426,10 +1458,18 @@ TEST_P(DBMultiGetTestWithParam, MultiGet
   ASSERT_EQ(values[2], "def");
 }
 
-INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
-                        testing::Bool());
-
-TEST_F(DBBasicTest, MultiGetBatchedSimpleUnsorted) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedSimpleUnsorted) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   do {
     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
     SetPerfLevel(kEnableCount);
@@ -1448,8 +1488,10 @@ TEST_F(DBBasicTest, MultiGetBatchedSimpl
     std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
     std::vector<Status> s(keys.size());
 
-    db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(),
-                  values.data(), s.data(), false);
+    ReadOptions ro;
+    ro.async_io = std::get<1>(GetParam());
+    db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(),
+                  s.data(), false);
 
     ASSERT_EQ(values.size(), keys.size());
     ASSERT_EQ(std::string(values[5].data(), values[5].size()), "v1");
@@ -1470,7 +1512,18 @@ TEST_F(DBBasicTest, MultiGetBatchedSimpl
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedSortedMultiFile) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedSortedMultiFile) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   do {
     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
     SetPerfLevel(kEnableCount);
@@ -1493,8 +1546,10 @@ TEST_F(DBBasicTest, MultiGetBatchedSorte
     std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
     std::vector<Status> s(keys.size());
 
-    db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(),
-                  values.data(), s.data(), true);
+    ReadOptions ro;
+    ro.async_io = std::get<1>(GetParam());
+    db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(),
+                  s.data(), true);
 
     ASSERT_EQ(values.size(), keys.size());
     ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v1");
@@ -1515,7 +1570,18 @@ TEST_F(DBBasicTest, MultiGetBatchedSorte
   } while (ChangeOptions());
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedDuplicateKeys) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedDuplicateKeys) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   Options opts = CurrentOptions();
   opts.merge_operator = MergeOperators::CreateStringAppendOperator();
   CreateAndReopenWithCF({"pikachu"}, opts);
@@ -1546,8 +1612,10 @@ TEST_F(DBBasicTest, MultiGetBatchedDupli
   std::vector<ColumnFamilyHandle*> cfs(keys.size(), handles_[1]);
   std::vector<Status> s(keys.size());
 
-  db_->MultiGet(ReadOptions(), handles_[1], keys.size(), keys.data(),
-                values.data(), s.data(), false);
+  ReadOptions ro;
+  ro.async_io = std::get<1>(GetParam());
+  db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(),
+                s.data(), false);
 
   ASSERT_EQ(values.size(), keys.size());
   ASSERT_EQ(std::string(values[0].data(), values[0].size()), "v8");
@@ -1566,7 +1634,18 @@ TEST_F(DBBasicTest, MultiGetBatchedDupli
   SetPerfLevel(kDisable);
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedMultiLevel) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedMultiLevel) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   Options options = CurrentOptions();
   options.disable_auto_compactions = true;
   Reopen(options);
@@ -1625,7 +1704,7 @@ TEST_F(DBBasicTest, MultiGetBatchedMulti
     keys.push_back("key_" + std::to_string(i));
   }
 
-  values = MultiGet(keys, nullptr);
+  values = MultiGet(keys, nullptr, std::get<1>(GetParam()));
   ASSERT_EQ(values.size(), 16);
   for (unsigned int j = 0; j < values.size(); ++j) {
     int key = j + 64;
@@ -1641,7 +1720,18 @@ TEST_F(DBBasicTest, MultiGetBatchedMulti
   }
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedMultiLevelMerge) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedMultiLevelMerge) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   Options options = CurrentOptions();
   options.disable_auto_compactions = true;
   options.merge_operator = MergeOperators::CreateStringAppendOperator();
@@ -1705,7 +1795,7 @@ TEST_F(DBBasicTest, MultiGetBatchedMulti
     keys.push_back("key_" + std::to_string(i));
   }
 
-  values = MultiGet(keys, nullptr);
+  values = MultiGet(keys, nullptr, std::get<1>(GetParam()));
   ASSERT_EQ(values.size(), keys.size());
   for (unsigned int j = 0; j < 48; ++j) {
     int key = j + 32;
@@ -1727,7 +1817,18 @@ TEST_F(DBBasicTest, MultiGetBatchedMulti
   }
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedValueSizeInMemory) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSizeInMemory) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
   SetPerfLevel(kEnableCount);
   ASSERT_OK(Put(1, "k1", "v_1"));
@@ -1744,6 +1845,7 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
   get_perf_context()->Reset();
   ReadOptions ro;
   ro.value_size_soft_limit = 11;
+  ro.async_io = std::get<1>(GetParam());
   db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(),
                 s.data(), false);
 
@@ -1761,7 +1863,17 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
   SetPerfLevel(kDisable);
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedValueSize) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSize) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    return;
+  }
   do {
     CreateAndReopenWithCF({"pikachu"}, CurrentOptions());
     SetPerfLevel(kEnableCount);
@@ -1801,6 +1913,7 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
 
     ReadOptions ro;
     ro.value_size_soft_limit = 20;
+    ro.async_io = std::get<1>(GetParam());
     db_->MultiGet(ro, handles_[1], keys.size(), keys.data(), values.data(),
                   s.data(), false);
 
@@ -1836,7 +1949,18 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
   } while (ChangeCompactOptions());
 }
 
-TEST_F(DBBasicTest, MultiGetBatchedValueSizeMultiLevelMerge) {
+TEST_P(DBMultiGetTestWithParam, MultiGetBatchedValueSizeMultiLevelMerge) {
+#ifndef USE_COROUTINES
+  if (std::get<1>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
+  // Skip for unbatched MultiGet
+  if (!std::get<0>(GetParam())) {
+    ROCKSDB_GTEST_SKIP("This test is only for batched MultiGet");
+    return;
+  }
   Options options = CurrentOptions();
   options.disable_auto_compactions = true;
   options.merge_operator = MergeOperators::CreateStringAppendOperator();
@@ -1908,6 +2032,7 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
   ReadOptions read_options;
   read_options.verify_checksums = true;
   read_options.value_size_soft_limit = 380;
+  read_options.async_io = std::get<1>(GetParam());
   db_->MultiGet(read_options, dbfull()->DefaultColumnFamily(), keys.size(),
                 keys.data(), values.data(), statuses.data());
 
@@ -1939,6 +2064,217 @@ TEST_F(DBBasicTest, MultiGetBatchedValue
   }
 }
 
+INSTANTIATE_TEST_CASE_P(DBMultiGetTestWithParam, DBMultiGetTestWithParam,
+                        testing::Combine(testing::Bool(), testing::Bool()));
+
+#if USE_COROUTINES
+class DBMultiGetAsyncIOTest : public DBBasicTest {
+ public:
+  DBMultiGetAsyncIOTest()
+      : DBBasicTest(), statistics_(ROCKSDB_NAMESPACE::CreateDBStatistics()) {
+    BlockBasedTableOptions bbto;
+    bbto.filter_policy.reset(NewBloomFilterPolicy(10));
+    Options options = CurrentOptions();
+    options.disable_auto_compactions = true;
+    options.statistics = statistics_;
+    options.table_factory.reset(NewBlockBasedTableFactory(bbto));
+    Reopen(options);
+    int num_keys = 0;
+
+    // Put all keys in the bottommost level, and overwrite some keys
+    // in L0 and L1
+    for (int i = 0; i < 128; ++i) {
+      EXPECT_OK(Put(Key(i), "val_l2_" + std::to_string(i)));
+      num_keys++;
+      if (num_keys == 8) {
+        EXPECT_OK(Flush());
+        num_keys = 0;
+      }
+    }
+    if (num_keys > 0) {
+      EXPECT_OK(Flush());
+      num_keys = 0;
+    }
+    MoveFilesToLevel(2);
+
+    for (int i = 0; i < 128; i += 3) {
+      EXPECT_OK(Put(Key(i), "val_l1_" + std::to_string(i)));
+      num_keys++;
+      if (num_keys == 8) {
+        EXPECT_OK(Flush());
+        num_keys = 0;
+      }
+    }
+    if (num_keys > 0) {
+      EXPECT_OK(Flush());
+      num_keys = 0;
+    }
+    MoveFilesToLevel(1);
+
+    for (int i = 0; i < 128; i += 5) {
+      EXPECT_OK(Put(Key(i), "val_l0_" + std::to_string(i)));
+      num_keys++;
+      if (num_keys == 8) {
+        EXPECT_OK(Flush());
+        num_keys = 0;
+      }
+    }
+    if (num_keys > 0) {
+      EXPECT_OK(Flush());
+      num_keys = 0;
+    }
+    EXPECT_EQ(0, num_keys);
+  }
+
+  const std::shared_ptr<Statistics>& statistics() { return statistics_; }
+
+ private:
+  std::shared_ptr<Statistics> statistics_;
+};
+
+TEST_F(DBMultiGetAsyncIOTest, GetFromL0) {
+  // All 3 keys in L0. The L0 files should be read serially.
+  std::vector<std::string> key_strs{Key(0), Key(40), Key(80)};
+  std::vector<Slice> keys{key_strs[0], key_strs[1], key_strs[2]};
+  std::vector<PinnableSlice> values(key_strs.size());
+  std::vector<Status> statuses(key_strs.size());
+
+  ReadOptions ro;
+  ro.async_io = true;
+  dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
+                     keys.data(), values.data(), statuses.data());
+  ASSERT_EQ(values.size(), 3);
+  ASSERT_OK(statuses[0]);
+  ASSERT_OK(statuses[1]);
+  ASSERT_OK(statuses[2]);
+  ASSERT_EQ(values[0], "val_l0_" + std::to_string(0));
+  ASSERT_EQ(values[1], "val_l0_" + std::to_string(40));
+  ASSERT_EQ(values[2], "val_l0_" + std::to_string(80));
+
+  HistogramData multiget_io_batch_size;
+
+  statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
+
+  // No async IO in this case since we don't do parallel lookup in L0
+  ASSERT_EQ(multiget_io_batch_size.count, 0);
+  ASSERT_EQ(multiget_io_batch_size.max, 0);
+}
+
+TEST_F(DBMultiGetAsyncIOTest, GetFromL1) {
+  std::vector<std::string> key_strs;
+  std::vector<Slice> keys;
+  std::vector<PinnableSlice> values;
+  std::vector<Status> statuses;
+
+  key_strs.push_back(Key(33));
+  key_strs.push_back(Key(54));
+  key_strs.push_back(Key(102));
+  keys.push_back(key_strs[0]);
+  keys.push_back(key_strs[1]);
+  keys.push_back(key_strs[2]);
+  values.resize(keys.size());
+  statuses.resize(keys.size());
+
+  ReadOptions ro;
+  ro.async_io = true;
+  dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
+                     keys.data(), values.data(), statuses.data());
+  ASSERT_EQ(values.size(), 3);
+  ASSERT_EQ(statuses[0], Status::OK());
+  ASSERT_EQ(statuses[1], Status::OK());
+  ASSERT_EQ(statuses[2], Status::OK());
+  ASSERT_EQ(values[0], "val_l1_" + std::to_string(33));
+  ASSERT_EQ(values[1], "val_l1_" + std::to_string(54));
+  ASSERT_EQ(values[2], "val_l1_" + std::to_string(102));
+
+  HistogramData multiget_io_batch_size;
+
+  statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
+
+  // A batch of 3 async IOs is expected, one for each overlapping file in L1
+  ASSERT_EQ(multiget_io_batch_size.count, 1);
+  ASSERT_EQ(multiget_io_batch_size.max, 3);
+}
+
+TEST_F(DBMultiGetAsyncIOTest, LastKeyInFile) {
+  std::vector<std::string> key_strs;
+  std::vector<Slice> keys;
+  std::vector<PinnableSlice> values;
+  std::vector<Status> statuses;
+
+  // 21 is the last key in the first L1 file
+  key_strs.push_back(Key(21));
+  key_strs.push_back(Key(54));
+  key_strs.push_back(Key(102));
+  keys.push_back(key_strs[0]);
+  keys.push_back(key_strs[1]);
+  keys.push_back(key_strs[2]);
+  values.resize(keys.size());
+  statuses.resize(keys.size());
+
+  ReadOptions ro;
+  ro.async_io = true;
+  dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
+                     keys.data(), values.data(), statuses.data());
+  ASSERT_EQ(values.size(), 3);
+  ASSERT_EQ(statuses[0], Status::OK());
+  ASSERT_EQ(statuses[1], Status::OK());
+  ASSERT_EQ(statuses[2], Status::OK());
+  ASSERT_EQ(values[0], "val_l1_" + std::to_string(21));
+  ASSERT_EQ(values[1], "val_l1_" + std::to_string(54));
+  ASSERT_EQ(values[2], "val_l1_" + std::to_string(102));
+
+  HistogramData multiget_io_batch_size;
+
+  statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
+
+  // Since the first MultiGet key is the last key in a file, the MultiGet is
+  // expected to lookup in that file first, before moving on to other files.
+  // So the first file lookup will issue one async read, and the next lookup
+  // will lookup 2 files in parallel and issue 2 async reads
+  ASSERT_EQ(multiget_io_batch_size.count, 2);
+  ASSERT_EQ(multiget_io_batch_size.max, 2);
+}
+
+TEST_F(DBMultiGetAsyncIOTest, GetFromL1AndL2) {
+  std::vector<std::string> key_strs;
+  std::vector<Slice> keys;
+  std::vector<PinnableSlice> values;
+  std::vector<Status> statuses;
+
+  // 33 and 102 are in L1, and 56 is in L2
+  key_strs.push_back(Key(33));
+  key_strs.push_back(Key(56));
+  key_strs.push_back(Key(102));
+  keys.push_back(key_strs[0]);
+  keys.push_back(key_strs[1]);
+  keys.push_back(key_strs[2]);
+  values.resize(keys.size());
+  statuses.resize(keys.size());
+
+  ReadOptions ro;
+  ro.async_io = true;
+  dbfull()->MultiGet(ro, dbfull()->DefaultColumnFamily(), keys.size(),
+                     keys.data(), values.data(), statuses.data());
+  ASSERT_EQ(values.size(), 3);
+  ASSERT_EQ(statuses[0], Status::OK());
+  ASSERT_EQ(statuses[1], Status::OK());
+  ASSERT_EQ(statuses[2], Status::OK());
+  ASSERT_EQ(values[0], "val_l1_" + std::to_string(33));
+  ASSERT_EQ(values[1], "val_l2_" + std::to_string(56));
+  ASSERT_EQ(values[2], "val_l1_" + std::to_string(102));
+
+  HistogramData multiget_io_batch_size;
+
+  statistics()->histogramData(MULTIGET_IO_BATCH_SIZE, &multiget_io_batch_size);
+
+  // There is only one MultiGet key in the bottommost level - 56. Thus
+  // the bottommost level will not use async IO.
+  ASSERT_EQ(multiget_io_batch_size.count, 1);
+  ASSERT_EQ(multiget_io_batch_size.max, 2);
+}
+#endif  // USE_COROUTINES
+
 TEST_F(DBBasicTest, MultiGetStats) {
   Options options;
   options.create_if_missing = true;
@@ -3308,6 +3644,11 @@ class DeadlineRandomAccessFile : public
   IOStatus MultiRead(FSReadRequest* reqs, size_t num_reqs,
                      const IOOptions& options, IODebugContext* dbg) override;
 
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override;
+
  private:
   DeadlineFS& fs_;
   std::unique_ptr<FSRandomAccessFile> file_;
@@ -3448,6 +3789,26 @@ IOStatus DeadlineRandomAccessFile::Read(
   return s;
 }
 
+IOStatus DeadlineRandomAccessFile::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) {
+  const std::chrono::microseconds deadline = fs_.GetDeadline();
+  const std::chrono::microseconds io_timeout = fs_.GetIOTimeout();
+  IOStatus s;
+  if (deadline.count() || io_timeout.count()) {
+    fs_.AssertDeadline(deadline, io_timeout, opts);
+  }
+  if (s.ok()) {
+    s = FSRandomAccessFileWrapper::ReadAsync(req, opts, cb, cb_arg, io_handle,
+                                             del_fn, dbg);
+  }
+  if (s.ok()) {
+    s = fs_.ShouldDelay(opts);
+  }
+  return s;
+}
+
 IOStatus DeadlineRandomAccessFile::MultiRead(FSReadRequest* reqs,
                                              size_t num_reqs,
                                              const IOOptions& options,
@@ -3469,7 +3830,8 @@ IOStatus DeadlineRandomAccessFile::Multi
 
 // A test class for intercepting random reads and injecting artificial
 // delays. Used for testing the MultiGet deadline feature
-class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet {
+class DBBasicTestMultiGetDeadline : public DBBasicTestMultiGet,
+                                    public testing::WithParamInterface<bool> {
  public:
   DBBasicTestMultiGetDeadline()
       : DBBasicTestMultiGet(
@@ -3492,7 +3854,13 @@ class DBBasicTestMultiGetDeadline : publ
   }
 };
 
-TEST_F(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
+TEST_P(DBBasicTestMultiGetDeadline, MultiGetDeadlineExceeded) {
+#ifndef USE_COROUTINES
+  if (GetParam()) {
+    ROCKSDB_GTEST_SKIP("This test requires coroutine support");
+    return;
+  }
+#endif  // USE_COROUTINES
   std::shared_ptr<DeadlineFS> fs = std::make_shared<DeadlineFS>(env_, false);
   std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
   Options options = CurrentOptions();
@@ -3523,6 +3891,7 @@ TEST_F(DBBasicTestMultiGetDeadline, Mult
 
   ReadOptions ro;
   ro.deadline = std::chrono::microseconds{env->NowMicros() + 10000};
+  ro.async_io = GetParam();
   // Delay the first IO
   fs->SetDelayTrigger(ro.deadline, ro.io_timeout, 0);
 
@@ -3625,6 +3994,9 @@ TEST_F(DBBasicTestMultiGetDeadline, Mult
   Close();
 }
 
+INSTANTIATE_TEST_CASE_P(DeadlineIO, DBBasicTestMultiGetDeadline,
+                        ::testing::Bool());
+
 TEST_F(DBBasicTest, ManifestWriteFailure) {
   Options options = GetDefaultOptions();
   options.create_if_missing = true;
@@ -3724,6 +4096,27 @@ TEST_F(DBBasicTest, VerifyFileChecksums)
   Reopen(options);
   ASSERT_TRUE(db_->VerifyFileChecksums(ReadOptions()).IsInvalidArgument());
 }
+
+TEST_F(DBBasicTest, ManualWalSync) {
+  Options options = CurrentOptions();
+  options.track_and_verify_wals_in_manifest = true;
+  options.wal_recovery_mode = WALRecoveryMode::kAbsoluteConsistency;
+  DestroyAndReopen(options);
+
+  ASSERT_OK(Put("x", "y"));
+  // This does not create a new WAL.
+  ASSERT_OK(db_->SyncWAL());
+  EXPECT_FALSE(dbfull()->GetVersionSet()->GetWalSet().GetWals().empty());
+
+  std::unique_ptr<LogFile> wal;
+  Status s = db_->GetCurrentWalFile(&wal);
+  ASSERT_OK(s);
+  Close();
+
+  EXPECT_OK(env_->DeleteFile(LogFileName(dbname_, wal->LogNumber())));
+
+  ASSERT_TRUE(TryReopen(options).IsCorruption());
+}
 #endif  // !ROCKSDB_LITE
 
 // A test class for intercepting random reads and injecting artificial
@@ -3783,7 +4176,7 @@ TEST_P(DBBasicTestDeadline, PointLookupD
 
     Random rnd(301);
     for (int i = 0; i < 400; ++i) {
-      std::string key = "k" + ToString(i);
+      std::string key = "k" + std::to_string(i);
       ASSERT_OK(Put(key, rnd.RandomString(100)));
     }
     ASSERT_OK(Flush());
@@ -3866,7 +4259,7 @@ TEST_P(DBBasicTestDeadline, IteratorDead
 
     Random rnd(301);
     for (int i = 0; i < 400; ++i) {
-      std::string key = "k" + ToString(i);
+      std::string key = "k" + std::to_string(i);
       ASSERT_OK(Put(key, rnd.RandomString(100)));
     }
     ASSERT_OK(Flush());
diff -pruN 7.2.2-5/db/db_block_cache_test.cc 7.3.1-2/db/db_block_cache_test.cc
--- 7.2.2-5/db/db_block_cache_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_block_cache_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -13,6 +13,7 @@
 
 #include "cache/cache_entry_roles.h"
 #include "cache/cache_key.h"
+#include "cache/fast_lru_cache.h"
 #include "cache/lru_cache.h"
 #include "db/column_family.h"
 #include "db/db_impl/db_impl.h"
@@ -75,7 +76,7 @@ class DBBlockCacheTest : public DBTestBa
   void InitTable(const Options& /*options*/) {
     std::string value(kValueSize, 'a');
     for (size_t i = 0; i < kNumBlocks; i++) {
-      ASSERT_OK(Put(ToString(i), value.c_str()));
+      ASSERT_OK(Put(std::to_string(i), value.c_str()));
     }
   }
 
@@ -204,7 +205,7 @@ TEST_F(DBBlockCacheTest, IteratorBlockCa
 
   ASSERT_EQ(0, cache->GetUsage());
   iter = db_->NewIterator(read_options);
-  iter->Seek(ToString(0));
+  iter->Seek(std::to_string(0));
   ASSERT_LT(0, cache->GetUsage());
   delete iter;
   iter = nullptr;
@@ -235,7 +236,7 @@ TEST_F(DBBlockCacheTest, TestWithoutComp
   // Load blocks into cache.
   for (size_t i = 0; i + 1 < kNumBlocks; i++) {
     iter = db_->NewIterator(read_options);
-    iter->Seek(ToString(i));
+    iter->Seek(std::to_string(i));
     ASSERT_OK(iter->status());
     CheckCacheCounters(options, 1, 0, 1, 0);
     iterators[i].reset(iter);
@@ -248,7 +249,7 @@ TEST_F(DBBlockCacheTest, TestWithoutComp
   // Test with strict capacity limit.
   cache->SetStrictCapacityLimit(true);
   iter = db_->NewIterator(read_options);
-  iter->Seek(ToString(kNumBlocks - 1));
+  iter->Seek(std::to_string(kNumBlocks - 1));
   ASSERT_TRUE(iter->status().IsIncomplete());
   CheckCacheCounters(options, 1, 0, 0, 1);
   delete iter;
@@ -262,7 +263,7 @@ TEST_F(DBBlockCacheTest, TestWithoutComp
   ASSERT_EQ(0, cache->GetPinnedUsage());
   for (size_t i = 0; i + 1 < kNumBlocks; i++) {
     iter = db_->NewIterator(read_options);
-    iter->Seek(ToString(i));
+    iter->Seek(std::to_string(i));
     ASSERT_OK(iter->status());
     CheckCacheCounters(options, 0, 1, 0, 0);
     iterators[i].reset(iter);
@@ -288,7 +289,7 @@ TEST_F(DBBlockCacheTest, TestWithCompres
 
   std::string value(kValueSize, 'a');
   for (size_t i = 0; i < kNumBlocks; i++) {
-    ASSERT_OK(Put(ToString(i), value));
+    ASSERT_OK(Put(std::to_string(i), value));
     ASSERT_OK(Flush());
   }
 
@@ -312,7 +313,7 @@ TEST_F(DBBlockCacheTest, TestWithCompres
 
   // Load blocks into cache.
   for (size_t i = 0; i < kNumBlocks - 1; i++) {
-    ASSERT_EQ(value, Get(ToString(i)));
+    ASSERT_EQ(value, Get(std::to_string(i)));
     CheckCacheCounters(options, 1, 0, 1, 0);
     CheckCompressedCacheCounters(options, 1, 0, 1, 0);
   }
@@ -333,7 +334,7 @@ TEST_F(DBBlockCacheTest, TestWithCompres
 
   // Load last key block.
   ASSERT_EQ("Result incomplete: Insert failed due to LRU cache being full.",
-            Get(ToString(kNumBlocks - 1)));
+            Get(std::to_string(kNumBlocks - 1)));
   // Failure will also record the miss counter.
   CheckCacheCounters(options, 1, 0, 0, 1);
   CheckCompressedCacheCounters(options, 1, 0, 1, 0);
@@ -342,7 +343,7 @@ TEST_F(DBBlockCacheTest, TestWithCompres
   // cache and load into block cache.
   cache->SetStrictCapacityLimit(false);
   // Load last key block.
-  ASSERT_EQ(value, Get(ToString(kNumBlocks - 1)));
+  ASSERT_EQ(value, Get(std::to_string(kNumBlocks - 1)));
   CheckCacheCounters(options, 1, 0, 1, 0);
   CheckCompressedCacheCounters(options, 0, 1, 0, 0);
 }
@@ -567,7 +568,7 @@ TEST_F(DBBlockCacheTest, FillCacheAndIte
   Iterator* iter = nullptr;
 
   iter = db_->NewIterator(read_options);
-  iter->Seek(ToString(0));
+  iter->Seek(std::to_string(0));
   while (iter->Valid()) {
     iter->Next();
   }
@@ -645,10 +646,10 @@ TEST_F(DBBlockCacheTest, WarmCacheWithDa
 
   std::string value(kValueSize, 'a');
   for (size_t i = 1; i <= kNumBlocks; i++) {
-    ASSERT_OK(Put(ToString(i), value));
+    ASSERT_OK(Put(std::to_string(i), value));
     ASSERT_OK(Flush());
     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
-    ASSERT_EQ(value, Get(ToString(i)));
+    ASSERT_EQ(value, Get(std::to_string(i)));
     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
   }
@@ -705,7 +706,7 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithB
 
   std::string value(kValueSize, 'a');
   for (size_t i = 1; i <= kNumBlocks; i++) {
-    ASSERT_OK(Put(ToString(i), value));
+    ASSERT_OK(Put(std::to_string(i), value));
     ASSERT_OK(Flush());
     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_ADD));
     if (filter_type == 1) {
@@ -717,7 +718,7 @@ TEST_P(DBBlockCacheTest1, WarmCacheWithB
       ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_INDEX_ADD));
       ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_FILTER_ADD));
     }
-    ASSERT_EQ(value, Get(ToString(i)));
+    ASSERT_EQ(value, Get(std::to_string(i)));
 
     ASSERT_EQ(0, options.statistics->getTickerCount(BLOCK_CACHE_DATA_MISS));
     ASSERT_EQ(i, options.statistics->getTickerCount(BLOCK_CACHE_DATA_HIT));
@@ -772,12 +773,12 @@ TEST_F(DBBlockCacheTest, DynamicallyWarm
   std::string value(kValueSize, 'a');
 
   for (size_t i = 1; i <= 5; i++) {
-    ASSERT_OK(Put(ToString(i), value));
+    ASSERT_OK(Put(std::to_string(i), value));
     ASSERT_OK(Flush());
     ASSERT_EQ(1,
               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
 
-    ASSERT_EQ(value, Get(ToString(i)));
+    ASSERT_EQ(value, Get(std::to_string(i)));
     ASSERT_EQ(0,
               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
     ASSERT_EQ(
@@ -790,12 +791,12 @@ TEST_F(DBBlockCacheTest, DynamicallyWarm
       {{"block_based_table_factory", "{prepopulate_block_cache=kDisable;}"}}));
 
   for (size_t i = 6; i <= kNumBlocks; i++) {
-    ASSERT_OK(Put(ToString(i), value));
+    ASSERT_OK(Put(std::to_string(i), value));
     ASSERT_OK(Flush());
     ASSERT_EQ(0,
               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
 
-    ASSERT_EQ(value, Get(ToString(i)));
+    ASSERT_EQ(value, Get(std::to_string(i)));
     ASSERT_EQ(1,
               options.statistics->getAndResetTickerCount(BLOCK_CACHE_DATA_ADD));
     ASSERT_EQ(
@@ -934,7 +935,8 @@ TEST_F(DBBlockCacheTest, AddRedundantSta
   int iterations_tested = 0;
   for (std::shared_ptr<Cache> base_cache :
        {NewLRUCache(capacity, num_shard_bits),
-        NewClockCache(capacity, num_shard_bits)}) {
+        NewClockCache(capacity, num_shard_bits),
+        NewFastLRUCache(capacity, num_shard_bits)}) {
     if (!base_cache) {
       // Skip clock cache when not supported
       continue;
@@ -1288,7 +1290,8 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleS
   int iterations_tested = 0;
   for (bool partition : {false, true}) {
     for (std::shared_ptr<Cache> cache :
-         {NewLRUCache(capacity), NewClockCache(capacity)}) {
+         {NewLRUCache(capacity), NewClockCache(capacity),
+          NewFastLRUCache(capacity)}) {
       if (!cache) {
         // Skip clock cache when not supported
         continue;
@@ -1406,7 +1409,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleS
 
       for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
         auto role = static_cast<CacheEntryRole>(i);
-        EXPECT_EQ(ToString(expected[i]),
+        EXPECT_EQ(std::to_string(expected[i]),
                   values[BlockCacheEntryStatsMapKeys::EntryCount(role)]);
       }
 
@@ -1419,7 +1422,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleS
         // re-scanning stats, but not totally aggressive.
         // Within some time window, we will get cached entry stats
         env_->MockSleepForSeconds(1);
-        EXPECT_EQ(ToString(prev_expected[static_cast<size_t>(
+        EXPECT_EQ(std::to_string(prev_expected[static_cast<size_t>(
                       CacheEntryRole::kWriteBuffer)]),
                   values[BlockCacheEntryStatsMapKeys::EntryCount(
                       CacheEntryRole::kWriteBuffer)]);
@@ -1429,7 +1432,7 @@ TEST_F(DBBlockCacheTest, CacheEntryRoleS
         ASSERT_TRUE(db_->GetMapProperty(DB::Properties::kBlockCacheEntryStats,
                                         &values));
         EXPECT_EQ(
-            ToString(
+            std::to_string(
                 expected[static_cast<size_t>(CacheEntryRole::kWriteBuffer)]),
             values[BlockCacheEntryStatsMapKeys::EntryCount(
                 CacheEntryRole::kWriteBuffer)]);
@@ -1637,7 +1640,7 @@ TEST_P(DBBlockCacheKeyTest, StableCacheK
   SstFileWriter sst_file_writer(EnvOptions(), options);
   std::vector<std::string> external;
   for (int i = 0; i < 2; ++i) {
-    std::string f = dbname_ + "/external" + ToString(i) + ".sst";
+    std::string f = dbname_ + "/external" + std::to_string(i) + ".sst";
     external.push_back(f);
     ASSERT_OK(sst_file_writer.Open(f));
     ASSERT_OK(sst_file_writer.Put(Key(key_count), "abc"));
@@ -1721,7 +1724,7 @@ class CacheKeyTest : public testing::Tes
     // Like SemiStructuredUniqueIdGen::GenerateNext
     tp_.db_session_id = EncodeSessionId(base_session_upper_,
                                         base_session_lower_ ^ session_counter_);
-    tp_.db_id = ToString(db_id_);
+    tp_.db_id = std::to_string(db_id_);
     tp_.orig_file_number = file_number_;
     bool is_stable;
     std::string cur_session_id = "";  // ignored
diff -pruN 7.2.2-5/db/db_bloom_filter_test.cc 7.3.1-2/db/db_bloom_filter_test.cc
--- 7.2.2-5/db/db_bloom_filter_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_bloom_filter_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -111,6 +111,7 @@ TEST_P(DBBloomFilterTestDefFormatVersion
     options_override.filter_policy = Create(20, bfp_impl_);
     options_override.partition_filters = partition_filters_;
     options_override.metadata_block_size = 32;
+    options_override.full_block_cache = true;
     Options options = CurrentOptions(options_override);
     if (partition_filters_) {
       auto* table_options =
@@ -857,7 +858,7 @@ TEST_F(DBBloomFilterTest, BloomFilterCom
     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
     Reopen(options);
 
-    std::string prefix = ToString(i) + "_";
+    std::string prefix = std::to_string(i) + "_";
     ASSERT_OK(Put(prefix + "A", "val"));
     ASSERT_OK(Put(prefix + "Z", "val"));
     ASSERT_OK(Flush());
@@ -872,7 +873,7 @@ TEST_F(DBBloomFilterTest, BloomFilterCom
     options.table_factory.reset(NewBlockBasedTableFactory(table_options));
     Reopen(options);
     for (size_t j = 0; j < kCompatibilityConfigs.size(); ++j) {
-      std::string prefix = ToString(j) + "_";
+      std::string prefix = std::to_string(j) + "_";
       ASSERT_EQ("val", Get(prefix + "A"));  // Filter positive
       ASSERT_EQ("val", Get(prefix + "Z"));  // Filter positive
       // Filter negative, with high probability
@@ -885,103 +886,27 @@ TEST_F(DBBloomFilterTest, BloomFilterCom
   }
 }
 
-/*
- * A cache wrapper that tracks peaks and increments of filter
- * construction cache reservation.
- *        p0
- *       / \   p1
- *      /   \  /\
- *     /     \/  \
- *  a /       b   \
- * peaks = {p0, p1}
- * increments = {p1-a, p2-b}
- */
-class FilterConstructResPeakTrackingCache : public CacheWrapper {
- public:
-  explicit FilterConstructResPeakTrackingCache(std::shared_ptr<Cache> target)
-      : CacheWrapper(std::move(target)),
-        cur_cache_res_(0),
-        cache_res_peak_(0),
-        cache_res_increment_(0),
-        last_peak_tracked_(false),
-        cache_res_increments_sum_(0) {}
-
-  using Cache::Insert;
-  Status Insert(const Slice& key, void* value, size_t charge,
-                void (*deleter)(const Slice& key, void* value),
-                Handle** handle = nullptr,
-                Priority priority = Priority::LOW) override {
-    Status s = target_->Insert(key, value, charge, deleter, handle, priority);
-    if (deleter == kNoopDeleterForFilterConstruction) {
-      if (last_peak_tracked_) {
-        cache_res_peak_ = 0;
-        cache_res_increment_ = 0;
-        last_peak_tracked_ = false;
-      }
-      cur_cache_res_ += charge;
-      cache_res_peak_ = std::max(cache_res_peak_, cur_cache_res_);
-      cache_res_increment_ += charge;
-    }
-    return s;
-  }
-
-  using Cache::Release;
-  bool Release(Handle* handle, bool erase_if_last_ref = false) override {
-    auto deleter = GetDeleter(handle);
-    if (deleter == kNoopDeleterForFilterConstruction) {
-      if (!last_peak_tracked_) {
-        cache_res_peaks_.push_back(cache_res_peak_);
-        cache_res_increments_sum_ += cache_res_increment_;
-        last_peak_tracked_ = true;
-      }
-      cur_cache_res_ -= GetCharge(handle);
-    }
-    bool is_successful = target_->Release(handle, erase_if_last_ref);
-    return is_successful;
-  }
-
-  std::deque<std::size_t> GetReservedCachePeaks() { return cache_res_peaks_; }
-
-  std::size_t GetReservedCacheIncrementSum() {
-    return cache_res_increments_sum_;
-  }
-
- private:
-  static const Cache::DeleterFn kNoopDeleterForFilterConstruction;
-
-  std::size_t cur_cache_res_;
-  std::size_t cache_res_peak_;
-  std::size_t cache_res_increment_;
-  bool last_peak_tracked_;
-  std::deque<std::size_t> cache_res_peaks_;
-  std::size_t cache_res_increments_sum_;
-};
-
-const Cache::DeleterFn
-    FilterConstructResPeakTrackingCache::kNoopDeleterForFilterConstruction =
-        CacheReservationManagerImpl<
-            CacheEntryRole::kFilterConstruction>::TEST_GetNoopDeleterForRole();
-
 // To align with the type of hash entry being reserved in implementation.
 using FilterConstructionReserveMemoryHash = uint64_t;
 
-class DBFilterConstructionReserveMemoryTestWithParam
+class ChargeFilterConstructionTestWithParam
     : public DBTestBase,
-      public testing::WithParamInterface<
-          std::tuple<bool, std::string, bool, bool>> {
+      public testing::WithParamInterface<std::tuple<
+          CacheEntryRoleOptions::Decision, std::string, bool, bool>> {
  public:
-  DBFilterConstructionReserveMemoryTestWithParam()
+  ChargeFilterConstructionTestWithParam()
       : DBTestBase("db_bloom_filter_tests",
                    /*env_do_fsync=*/true),
         num_key_(0),
-        reserve_table_builder_memory_(std::get<0>(GetParam())),
+        charge_filter_construction_(std::get<0>(GetParam())),
         policy_(std::get<1>(GetParam())),
         partition_filters_(std::get<2>(GetParam())),
         detect_filter_construct_corruption_(std::get<3>(GetParam())) {
-    if (!reserve_table_builder_memory_ || policy_ == kDeprecatedBlock ||
-        policy_ == kLegacyBloom) {
+    if (charge_filter_construction_ ==
+            CacheEntryRoleOptions::Decision::kDisabled ||
+        policy_ == kDeprecatedBlock || policy_ == kLegacyBloom) {
       // For these cases, we only interested in whether filter construction
-      // cache resevation happens instead of its accuracy. Therefore we don't
+      // cache charging happens instead of its accuracy. Therefore we don't
       // need many keys.
       num_key_ = 5;
     } else if (partition_filters_) {
@@ -996,11 +921,11 @@ class DBFilterConstructionReserveMemoryT
                  sizeof(FilterConstructionReserveMemoryHash);
     } else if (policy_ == kFastLocalBloom) {
       // For Bloom Filter + FullFilter case, since we design the num_key_ to
-      // make hash entry cache reservation be a multiple of dummy entries, the
+      // make hash entry cache charging be a multiple of dummy entries, the
       // correct behavior of charging final filter on top of it will trigger at
       // least another dummy entry insertion. Therefore we can assert that
       // behavior and we don't need a large number of keys to verify we
-      // indeed charge the final filter for cache reservation, even though final
+      // indeed charge the final filter for in cache, even though final
       // filter is a lot smaller than hash entries.
       num_key_ = 1 *
                  CacheReservationManagerImpl<
@@ -1010,7 +935,7 @@ class DBFilterConstructionReserveMemoryT
       // For Ribbon Filter + FullFilter case, we need a large enough number of
       // keys so that charging final filter after releasing the hash entries
       // reservation will trigger at least another dummy entry (or equivalently
-      // to saying, causing another peak in cache reservation) as banding
+      // to saying, causing another peak in cache charging) as banding
       // reservation might not be a multiple of dummy entry.
       num_key_ = 12 *
                  CacheReservationManagerImpl<
@@ -1026,7 +951,9 @@ class DBFilterConstructionReserveMemoryT
     // calculation.
     constexpr std::size_t kCacheCapacity = 100 * 1024 * 1024;
 
-    table_options.reserve_table_builder_memory = reserve_table_builder_memory_;
+    table_options.cache_usage_options.options_overrides.insert(
+        {CacheEntryRole::kFilterConstruction,
+         {/*.charged = */ charge_filter_construction_}});
     table_options.filter_policy = Create(10, policy_);
     table_options.partition_filters = partition_filters_;
     if (table_options.partition_filters) {
@@ -1044,7 +971,8 @@ class DBFilterConstructionReserveMemoryT
     lo.capacity = kCacheCapacity;
     lo.num_shard_bits = 0;  // 2^0 shard
     lo.strict_capacity_limit = true;
-    cache_ = std::make_shared<FilterConstructResPeakTrackingCache>(
+    cache_ = std::make_shared<
+        TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>(
         (NewLRUCache(lo)));
     table_options.block_cache = cache_;
 
@@ -1053,56 +981,73 @@ class DBFilterConstructionReserveMemoryT
 
   std::size_t GetNumKey() { return num_key_; }
 
-  bool ReserveTableBuilderMemory() { return reserve_table_builder_memory_; }
+  CacheEntryRoleOptions::Decision ChargeFilterConstructMemory() {
+    return charge_filter_construction_;
+  }
 
   std::string GetFilterPolicy() { return policy_; }
 
   bool PartitionFilters() { return partition_filters_; }
 
-  std::shared_ptr<FilterConstructResPeakTrackingCache>
-  GetFilterConstructResPeakTrackingCache() {
+  std::shared_ptr<
+      TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+  GetCache() {
     return cache_;
   }
 
  private:
   std::size_t num_key_;
-  bool reserve_table_builder_memory_;
+  CacheEntryRoleOptions::Decision charge_filter_construction_;
   std::string policy_;
   bool partition_filters_;
-  std::shared_ptr<FilterConstructResPeakTrackingCache> cache_;
+  std::shared_ptr<
+      TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+      cache_;
   bool detect_filter_construct_corruption_;
 };
 
 INSTANTIATE_TEST_CASE_P(
-    DBFilterConstructionReserveMemoryTestWithParam,
-    DBFilterConstructionReserveMemoryTestWithParam,
-    ::testing::Values(std::make_tuple(false, kFastLocalBloom, false, false),
-
-                      std::make_tuple(true, kFastLocalBloom, false, false),
-                      std::make_tuple(true, kFastLocalBloom, false, true),
-                      std::make_tuple(true, kFastLocalBloom, true, false),
-                      std::make_tuple(true, kFastLocalBloom, true, true),
-
-                      std::make_tuple(true, kStandard128Ribbon, false, false),
-                      std::make_tuple(true, kStandard128Ribbon, false, true),
-                      std::make_tuple(true, kStandard128Ribbon, true, false),
-                      std::make_tuple(true, kStandard128Ribbon, true, true),
+    ChargeFilterConstructionTestWithParam,
+    ChargeFilterConstructionTestWithParam,
+    ::testing::Values(
+        std::make_tuple(CacheEntryRoleOptions::Decision::kDisabled,
+                        kFastLocalBloom, false, false),
 
-                      std::make_tuple(true, kDeprecatedBlock, false, false),
-                      std::make_tuple(true, kLegacyBloom, false, false)));
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kFastLocalBloom, false, false),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kFastLocalBloom, false, true),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kFastLocalBloom, true, false),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kFastLocalBloom, true, true),
+
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kStandard128Ribbon, false, false),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kStandard128Ribbon, false, true),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kStandard128Ribbon, true, false),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kStandard128Ribbon, true, true),
+
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled,
+                        kDeprecatedBlock, false, false),
+        std::make_tuple(CacheEntryRoleOptions::Decision::kEnabled, kLegacyBloom,
+                        false, false)));
 
 // TODO: Speed up this test, and reduce disk space usage (~700MB)
 // The current test inserts many keys (on the scale of dummy entry size)
 // in order to make small memory user (e.g, final filter, partitioned hash
 // entries/filter/banding) , which is proportional to the number of
-// keys, big enough so that its cache reservation triggers dummy entry insertion
+// keys, big enough so that its cache charging triggers dummy entry insertion
 // and becomes observable in the test.
 //
 // However, inserting that many keys slows down this test and leaves future
 // developers an opportunity to speed it up.
 //
 // Possible approaches & challenges:
-// 1. Use sync point during cache reservation of filter construction
+// 1. Use sync point during cache charging of filter construction
 //
 // Benefit: It does not rely on triggering dummy entry insertion
 // but the sync point to verify small memory user is charged correctly.
@@ -1111,7 +1056,7 @@ INSTANTIATE_TEST_CASE_P(
 //
 // 2. Make dummy entry size configurable and set it small in the test
 //
-// Benefit: It increases the precision of cache reservation and therefore
+// Benefit: It increases the precision of cache charging and therefore
 // small memory usage can still trigger insertion of dummy entry.
 //
 // Challenge: change CacheReservationManager related APIs and a hack
@@ -1119,16 +1064,17 @@ INSTANTIATE_TEST_CASE_P(
 // CacheReservationManager used in filter construction for testing
 // since CacheReservationManager is not exposed at the high level.
 //
-TEST_P(DBFilterConstructionReserveMemoryTestWithParam, ReserveMemory) {
+TEST_P(ChargeFilterConstructionTestWithParam, Basic) {
   Options options = CurrentOptions();
   // We set write_buffer_size big enough so that in the case where there is
-  // filter construction cache reservation, flush won't be triggered before we
+  // filter construction cache charging, flush won't be triggered before we
   // manually trigger it for clean testing
   options.write_buffer_size = 640 << 20;
   BlockBasedTableOptions table_options = GetBlockBasedTableOptions();
   options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  std::shared_ptr<FilterConstructResPeakTrackingCache> cache =
-      GetFilterConstructResPeakTrackingCache();
+  std::shared_ptr<
+      TargetCacheChargeTrackingCache<CacheEntryRole::kFilterConstruction>>
+      cache = GetCache();
   options.create_if_missing = true;
   // Disable auto compaction to prevent its unexpected side effect
   // to the number of keys per partition designed by us in the test
@@ -1139,32 +1085,33 @@ TEST_P(DBFilterConstructionReserveMemory
     ASSERT_OK(Put(Key(i), Key(i)));
   }
 
-  ASSERT_EQ(cache->GetReservedCacheIncrementSum(), 0)
+  ASSERT_EQ(cache->GetChargedCacheIncrementSum(), 0)
       << "Flush was triggered too early in the test case with filter "
-         "construction cache reservation - please make sure no flush triggered "
+         "construction cache charging - please make sure no flush triggered "
          "during the key insertions above";
 
   ASSERT_OK(Flush());
 
-  bool reserve_table_builder_memory = ReserveTableBuilderMemory();
+  bool charge_filter_construction = (ChargeFilterConstructMemory() ==
+                                     CacheEntryRoleOptions::Decision::kEnabled);
   std::string policy = GetFilterPolicy();
   bool partition_filters = PartitionFilters();
   bool detect_filter_construct_corruption =
       table_options.detect_filter_construct_corruption;
 
   std::deque<std::size_t> filter_construction_cache_res_peaks =
-      cache->GetReservedCachePeaks();
+      cache->GetChargedCachePeaks();
   std::size_t filter_construction_cache_res_increments_sum =
-      cache->GetReservedCacheIncrementSum();
+      cache->GetChargedCacheIncrementSum();
 
-  if (!reserve_table_builder_memory) {
+  if (!charge_filter_construction) {
     EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0);
     return;
   }
 
   if (policy == kDeprecatedBlock || policy == kLegacyBloom) {
     EXPECT_EQ(filter_construction_cache_res_peaks.size(), 0)
-        << "There shouldn't be filter construction cache reservation as this "
+        << "There shouldn't be filter construction cache charging as this "
            "feature does not support kDeprecatedBlock "
            "nor kLegacyBloom";
     return;
@@ -1238,14 +1185,14 @@ TEST_P(DBFilterConstructionReserveMemory
      */
     if (!partition_filters) {
       EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
-          << "Filter construction cache reservation should have only 1 peak in "
+          << "Filter construction cache charging should have only 1 peak in "
              "case: kFastLocalBloom + FullFilter";
       std::size_t filter_construction_cache_res_peak =
           filter_construction_cache_res_peaks[0];
       EXPECT_GT(filter_construction_cache_res_peak,
                 predicted_hash_entries_cache_res)
           << "The testing number of hash entries is designed to make hash "
-             "entries cache reservation be multiples of dummy entries"
+             "entries cache charging be multiples of dummy entries"
              " so the correct behavior of charging final filter on top of it"
              " should've triggered at least another dummy entry insertion";
 
@@ -1258,7 +1205,7 @@ TEST_P(DBFilterConstructionReserveMemory
       return;
     } else {
       EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
-          << "Filter construction cache reservation should have multiple peaks "
+          << "Filter construction cache charging should have multiple peaks "
              "in case: kFastLocalBloom + "
              "PartitionedFilter";
       std::size_t predicted_filter_construction_cache_res_increments_sum =
@@ -1365,11 +1312,11 @@ TEST_P(DBFilterConstructionReserveMemory
               CacheReservationManagerImpl<
                   CacheEntryRole::kFilterConstruction>::GetDummyEntrySize()),
           1)
-          << "Final filter cache reservation too small for this test - please "
+          << "Final filter cache charging too small for this test - please "
              "increase the number of keys";
       if (!detect_filter_construct_corruption) {
         EXPECT_EQ(filter_construction_cache_res_peaks.size(), 2)
-            << "Filter construction cache reservation should have 2 peaks in "
+            << "Filter construction cache charging should have 2 peaks in "
                "case: kStandard128Ribbon + "
                "FullFilter. "
                "The second peak is resulted from charging the final filter "
@@ -1388,7 +1335,7 @@ TEST_P(DBFilterConstructionReserveMemory
                   predicted_filter_construction_cache_res_peak * 1.1);
       } else {
         EXPECT_EQ(filter_construction_cache_res_peaks.size(), 1)
-            << "Filter construction cache reservation should have 1 peaks in "
+            << "Filter construction cache charging should have 1 peaks in "
                "case: kStandard128Ribbon + FullFilter "
                "+ detect_filter_construct_corruption. "
                "The previous second peak now disappears since we don't "
@@ -1409,13 +1356,13 @@ TEST_P(DBFilterConstructionReserveMemory
     } else {
       if (!detect_filter_construct_corruption) {
         EXPECT_GE(filter_construction_cache_res_peaks.size(), 3)
-            << "Filter construction cache reservation should have more than 3 "
+            << "Filter construction cache charging should have more than 3 "
                "peaks "
                "in case: kStandard128Ribbon + "
                "PartitionedFilter";
       } else {
         EXPECT_GE(filter_construction_cache_res_peaks.size(), 2)
-            << "Filter construction cache reservation should have more than 2 "
+            << "Filter construction cache charging should have more than 2 "
                "peaks "
                "in case: kStandard128Ribbon + "
                "PartitionedFilter + detect_filter_construct_corruption";
@@ -1712,11 +1659,11 @@ class TestingContextCustomFilterPolicy
     test_report_ +=
         OptionsHelper::compaction_style_to_string[context.compaction_style];
     test_report_ += ",n=";
-    test_report_ += ROCKSDB_NAMESPACE::ToString(context.num_levels);
+    test_report_ += std::to_string(context.num_levels);
     test_report_ += ",l=";
-    test_report_ += ROCKSDB_NAMESPACE::ToString(context.level_at_creation);
+    test_report_ += std::to_string(context.level_at_creation);
     test_report_ += ",b=";
-    test_report_ += ROCKSDB_NAMESPACE::ToString(int{context.is_bottommost});
+    test_report_ += std::to_string(int{context.is_bottommost});
     test_report_ += ",r=";
     test_report_ += table_file_creation_reason_to_string[context.reason];
     test_report_ += "\n";
diff -pruN 7.2.2-5/db/db_compaction_filter_test.cc 7.3.1-2/db/db_compaction_filter_test.cc
--- 7.2.2-5/db/db_compaction_filter_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_compaction_filter_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -454,7 +454,7 @@ TEST_F(DBTestCompactionFilter, Compactio
   // put some data
   for (int table = 0; table < 4; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush());
   }
@@ -755,7 +755,7 @@ TEST_F(DBTestCompactionFilter, Compactio
 #ifndef ROCKSDB_LITE
 // Compaction filters aplies to all records, regardless snapshots.
 TEST_F(DBTestCompactionFilter, CompactionFilterIgnoreSnapshot) {
-  std::string five = ToString(5);
+  std::string five = std::to_string(5);
   Options options = CurrentOptions();
   options.compaction_filter_factory = std::make_shared<DeleteISFilterFactory>();
   options.disable_auto_compactions = true;
@@ -766,7 +766,7 @@ TEST_F(DBTestCompactionFilter, Compactio
   const Snapshot* snapshot = nullptr;
   for (int table = 0; table < 4; ++table) {
     for (int i = 0; i < 10; ++i) {
-      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush());
 
@@ -968,6 +968,71 @@ TEST_F(DBTestCompactionFilter, IgnoreSna
   ASSERT_TRUE(TryReopen(options).IsNotSupported());
 }
 
+TEST_F(DBTestCompactionFilter, DropKeyWithSingleDelete) {
+  Options options = GetDefaultOptions();
+  options.create_if_missing = true;
+
+  Reopen(options);
+
+  ASSERT_OK(Put("a", "v0"));
+  ASSERT_OK(Put("b", "v0"));
+  const Snapshot* snapshot = db_->GetSnapshot();
+
+  ASSERT_OK(SingleDelete("b"));
+  ASSERT_OK(Flush());
+
+  {
+    CompactRangeOptions cro;
+    cro.change_level = true;
+    cro.target_level = options.num_levels - 1;
+    ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
+  }
+
+  db_->ReleaseSnapshot(snapshot);
+  Close();
+
+  class DeleteFilterV2 : public CompactionFilter {
+   public:
+    Decision FilterV2(int /*level*/, const Slice& key, ValueType /*value_type*/,
+                      const Slice& /*existing_value*/,
+                      std::string* /*new_value*/,
+                      std::string* /*skip_until*/) const override {
+      if (key.starts_with("b")) {
+        return Decision::kPurge;
+      }
+      return Decision::kRemove;
+    }
+
+    const char* Name() const override { return "DeleteFilterV2"; }
+  } delete_filter_v2;
+
+  options.compaction_filter = &delete_filter_v2;
+  options.level0_file_num_compaction_trigger = 2;
+  Reopen(options);
+
+  ASSERT_OK(Put("b", "v1"));
+  ASSERT_OK(Put("x", "v1"));
+  ASSERT_OK(Flush());
+
+  ASSERT_OK(Put("r", "v1"));
+  ASSERT_OK(Put("z", "v1"));
+  ASSERT_OK(Flush());
+
+  ASSERT_OK(dbfull()->TEST_WaitForCompact());
+
+  Close();
+
+  options.compaction_filter = nullptr;
+  Reopen(options);
+  ASSERT_OK(SingleDelete("b"));
+  ASSERT_OK(Flush());
+  {
+    CompactRangeOptions cro;
+    cro.bottommost_level_compaction = BottommostLevelCompaction::kForce;
+    ASSERT_OK(db_->CompactRange(cro, nullptr, nullptr));
+  }
+}
+
 }  // namespace ROCKSDB_NAMESPACE
 
 int main(int argc, char** argv) {
diff -pruN 7.2.2-5/db/db_compaction_test.cc 7.3.1-2/db/db_compaction_test.cc
--- 7.2.2-5/db/db_compaction_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_compaction_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -2409,6 +2409,30 @@ TEST_P(DBCompactionTestWithParam, LevelC
 
   check_getvalues();
 
+  {  // Also verify GetLiveFilesStorageInfo with db_paths / cf_paths
+    std::vector<LiveFileStorageInfo> new_infos;
+    LiveFilesStorageInfoOptions lfsio;
+    lfsio.wal_size_for_flush = UINT64_MAX;  // no flush
+    ASSERT_OK(db_->GetLiveFilesStorageInfo(lfsio, &new_infos));
+    std::unordered_map<std::string, int> live_sst_by_dir;
+    for (auto& info : new_infos) {
+      if (info.file_type == kTableFile) {
+        live_sst_by_dir[info.directory]++;
+        // Verify file on disk (no directory confusion)
+        uint64_t size;
+        ASSERT_OK(env_->GetFileSize(
+            info.directory + "/" + info.relative_filename, &size));
+        ASSERT_EQ(info.size, size);
+      }
+    }
+    ASSERT_EQ(3U * 3U, live_sst_by_dir.size());
+    for (auto& paths : {options.db_paths, cf_opt1.cf_paths, cf_opt2.cf_paths}) {
+      ASSERT_EQ(1, live_sst_by_dir[paths[0].path]);
+      ASSERT_EQ(4, live_sst_by_dir[paths[1].path]);
+      ASSERT_EQ(2, live_sst_by_dir[paths[2].path]);
+    }
+  }
+
   ReopenWithColumnFamilies({"default", "one", "two"}, option_vector);
 
   check_getvalues();
@@ -2793,7 +2817,7 @@ TEST_P(DBCompactionTestWithParam, DISABL
 
   Random rnd(301);
   for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
-    ASSERT_OK(Put(1, ToString(key), rnd.RandomString(kTestValueSize)));
+    ASSERT_OK(Put(1, std::to_string(key), rnd.RandomString(kTestValueSize)));
   }
   ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
   ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -2825,7 +2849,7 @@ TEST_P(DBCompactionTestWithParam, DISABL
 
   // make sure all key-values are still there.
   for (int key = 64 * kEntriesPerBuffer; key >= 0; --key) {
-    ASSERT_NE(Get(1, ToString(key)), "NOT_FOUND");
+    ASSERT_NE(Get(1, std::to_string(key)), "NOT_FOUND");
   }
 }
 
@@ -4380,7 +4404,8 @@ TEST_F(DBCompactionTest, LevelPeriodicCo
   for (CompactionFilterType comp_filter_type :
        {kUseCompactionFilter, kUseCompactionFilterFactory}) {
     // Assert that periodic compactions are not enabled.
-    ASSERT_EQ(port::kMaxUint64 - 1, options.periodic_compaction_seconds);
+    ASSERT_EQ(std::numeric_limits<uint64_t>::max() - 1,
+              options.periodic_compaction_seconds);
 
     if (comp_filter_type == kUseCompactionFilter) {
       options.compaction_filter = &test_compaction_filter;
@@ -4643,9 +4668,9 @@ TEST_F(DBCompactionTest, CompactRangeSki
   });
 
   TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PreFlush");
-  ASSERT_OK(Put(ToString(0), rnd.RandomString(1024)));
+  ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024)));
   ASSERT_OK(dbfull()->Flush(flush_opts));
-  ASSERT_OK(Put(ToString(0), rnd.RandomString(1024)));
+  ASSERT_OK(Put(std::to_string(0), rnd.RandomString(1024)));
   TEST_SYNC_POINT("DBCompactionTest::CompactRangeSkipFlushAfterDelay:PostFlush");
   manual_compaction_thread.join();
 
@@ -4654,7 +4679,7 @@ TEST_F(DBCompactionTest, CompactRangeSki
   std::string num_keys_in_memtable;
   ASSERT_TRUE(db_->GetProperty(DB::Properties::kNumEntriesActiveMemTable,
                                &num_keys_in_memtable));
-  ASSERT_EQ(ToString(1), num_keys_in_memtable);
+  ASSERT_EQ(std::to_string(1), num_keys_in_memtable);
 
   ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->DisableProcessing();
 }
@@ -4803,7 +4828,7 @@ TEST_F(DBCompactionTest, SubcompactionEv
   for (int i = 0; i < 4; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 10 + j;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -4813,7 +4838,7 @@ TEST_F(DBCompactionTest, SubcompactionEv
   for (int i = 0; i < 2; i++) {
     for (int j = 0; j < 10; j++) {
       int key_id = i * 20 + j * 2;
-      ASSERT_OK(Put(Key(key_id), "value" + ToString(key_id)));
+      ASSERT_OK(Put(Key(key_id), "value" + std::to_string(key_id)));
     }
     ASSERT_OK(Flush());
   }
@@ -5805,7 +5830,7 @@ TEST_P(DBCompactionTestWithBottommostPar
   }
   ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
 
-  ASSERT_EQ(ToString(kSstNum), FilesPerLevel(0));
+  ASSERT_EQ(std::to_string(kSstNum), FilesPerLevel(0));
 
   auto cro = CompactRangeOptions();
   cro.bottommost_level_compaction = bottommost_level_compaction_;
@@ -5818,7 +5843,7 @@ TEST_P(DBCompactionTestWithBottommostPar
     ASSERT_EQ("0,1", FilesPerLevel(0));
   } else {
     // Just trivial move from level 0 -> 1
-    ASSERT_EQ("0," + ToString(kSstNum), FilesPerLevel(0));
+    ASSERT_EQ("0," + std::to_string(kSstNum), FilesPerLevel(0));
   }
 }
 
@@ -7149,7 +7174,7 @@ TEST_F(DBCompactionTest, DisableManualCo
     ASSERT_OK(Put(Key(2), "value2"));
     ASSERT_OK(Flush());
   }
-  ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
+  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
 
   db_->DisableManualCompaction();
 
@@ -7206,7 +7231,7 @@ TEST_F(DBCompactionTest, DisableManualCo
     ASSERT_OK(Put(Key(2), "value2"));
     ASSERT_OK(Flush());
   }
-  ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
+  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
 
   db_->DisableManualCompaction();
 
@@ -7266,7 +7291,7 @@ TEST_F(DBCompactionTest, DBCloseWithManu
     ASSERT_OK(Put(Key(2), "value2"));
     ASSERT_OK(Flush());
   }
-  ASSERT_EQ(ToString(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
+  ASSERT_EQ(std::to_string(kNumL0Files + (kNumL0Files / 2)), FilesPerLevel(0));
 
   // Close DB with manual compaction and auto triggered compaction in the queue.
   auto s = db_->Close();
diff -pruN 7.2.2-5/db/db_filesnapshot.cc 7.3.1-2/db/db_filesnapshot.cc
--- 7.2.2-5/db/db_filesnapshot.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_filesnapshot.cc	2022-06-08 21:08:16.000000000 +0000
@@ -177,7 +177,7 @@ Status DBImpl::GetLiveFilesStorageInfo(
   VectorLogPtr live_wal_files;
   bool flush_memtable = true;
   if (!immutable_db_options_.allow_2pc) {
-    if (opts.wal_size_for_flush == port::kMaxUint64) {
+    if (opts.wal_size_for_flush == std::numeric_limits<uint64_t>::max()) {
       flush_memtable = false;
     } else if (opts.wal_size_for_flush > 0) {
       // If the outstanding log files are small, we skip the flush.
diff -pruN 7.2.2-5/db/db_flush_test.cc 7.3.1-2/db/db_flush_test.cc
--- 7.2.2-5/db/db_flush_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_flush_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -2356,7 +2356,7 @@ TEST_P(DBAtomicFlushTest, PrecomputeMinL
     ASSERT_OK(Flush(cf_ids));
     uint64_t log_num_after_flush = dbfull()->TEST_GetCurrentLogNumber();
 
-    uint64_t min_log_number_to_keep = port::kMaxUint64;
+    uint64_t min_log_number_to_keep = std::numeric_limits<uint64_t>::max();
     autovector<ColumnFamilyData*> flushed_cfds;
     autovector<autovector<VersionEdit*>> flush_edits;
     for (size_t i = 0; i != num_cfs; ++i) {
diff -pruN 7.2.2-5/db/dbformat.h 7.3.1-2/db/dbformat.h
--- 7.2.2-5/db/dbformat.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/dbformat.h	2022-06-08 21:08:16.000000000 +0000
@@ -90,7 +90,8 @@ inline bool IsExtendedValueType(ValueTyp
 // can be packed together into 64-bits.
 static const SequenceNumber kMaxSequenceNumber = ((0x1ull << 56) - 1);
 
-static const SequenceNumber kDisableGlobalSequenceNumber = port::kMaxUint64;
+static const SequenceNumber kDisableGlobalSequenceNumber =
+    std::numeric_limits<uint64_t>::max();
 
 constexpr uint64_t kNumInternalBytes = 8;
 
diff -pruN 7.2.2-5/db/db_impl/db_impl.cc 7.3.1-2/db/db_impl/db_impl.cc
--- 7.2.2-5/db/db_impl/db_impl.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl.cc	2022-06-08 21:08:16.000000000 +0000
@@ -567,7 +567,7 @@ Status DBImpl::CloseHelper() {
   // flushing by first checking if there is a need for
   // flushing (but need to implement something
   // else than imm()->IsFlushPending() because the output
-  // memtables added to imm() dont trigger flushes).
+  // memtables added to imm() don't trigger flushes).
   if (immutable_db_options_.experimental_mempurge_threshold > 0.0) {
     Status flush_ret;
     mutex_.Unlock();
@@ -849,7 +849,8 @@ void DBImpl::PersistStats() {
           if (stats_slice_.find(stat.first) != stats_slice_.end()) {
             uint64_t delta = stat.second - stats_slice_[stat.first];
             s = batch.Put(persist_stats_cf_handle_,
-                          Slice(key, std::min(100, length)), ToString(delta));
+                          Slice(key, std::min(100, length)),
+                          std::to_string(delta));
           }
         }
       }
@@ -1440,12 +1441,13 @@ Status DBImpl::MarkLogsSynced(uint64_t u
   for (auto it = logs_.begin(); it != logs_.end() && it->number <= up_to;) {
     auto& wal = *it;
     assert(wal.getting_synced);
+    if (immutable_db_options_.track_and_verify_wals_in_manifest &&
+        wal.writer->file()->GetFileSize() > 0) {
+      synced_wals.AddWal(wal.number,
+                         WalMetadata(wal.writer->file()->GetFileSize()));
+    }
+
     if (logs_.size() > 1) {
-      if (immutable_db_options_.track_and_verify_wals_in_manifest &&
-          wal.writer->file()->GetFileSize() > 0) {
-        synced_wals.AddWal(wal.number,
-                           WalMetadata(wal.writer->file()->GetFileSize()));
-      }
       logs_to_free_.push_back(wal.ReleaseWriter());
       // To modify logs_ both mutex_ and log_write_mutex_ must be held
       InstrumentedMutexLock l(&log_write_mutex_);
@@ -1722,17 +1724,6 @@ Status DBImpl::Get(const ReadOptions& re
   return s;
 }
 
-namespace {
-class GetWithTimestampReadCallback : public ReadCallback {
- public:
-  explicit GetWithTimestampReadCallback(SequenceNumber seq)
-      : ReadCallback(seq) {}
-  bool IsVisibleFullCheck(SequenceNumber seq) override {
-    return seq <= max_visible_seq_;
-  }
-};
-}  // namespace
-
 Status DBImpl::GetImpl(const ReadOptions& read_options, const Slice& key,
                        GetImplOptions& get_impl_options) {
   assert(get_impl_options.value != nullptr ||
@@ -2589,7 +2580,8 @@ Status DBImpl::MultiGetImpl(
                             ? MultiGetContext::MAX_BATCH_SIZE
                             : keys_left;
     MultiGetContext ctx(sorted_keys, start_key + num_keys - keys_left,
-                        batch_size, snapshot, read_options);
+                        batch_size, snapshot, read_options, GetFileSystem(),
+                        stats_);
     MultiGetRange range = ctx.GetMultiGetRange();
     range.AddValueSize(curr_value_size);
     bool lookup_current = false;
@@ -3355,7 +3347,7 @@ bool DBImpl::GetProperty(ColumnFamilyHan
     bool ret_value =
         GetIntPropertyInternal(cfd, *property_info, false, &int_value);
     if (ret_value) {
-      *value = ToString(int_value);
+      *value = std::to_string(int_value);
     }
     return ret_value;
   } else if (property_info->handle_string) {
@@ -3990,8 +3982,8 @@ Status DBImpl::CheckConsistency() {
       } else if (fsize != md.size) {
         corruption_messages += "Sst file size mismatch: " + file_path +
                                ". Size recorded in manifest " +
-                               ToString(md.size) + ", actual size " +
-                               ToString(fsize) + "\n";
+                               std::to_string(md.size) + ", actual size " +
+                               std::to_string(fsize) + "\n";
       }
     }
   }
@@ -5123,8 +5115,8 @@ Status DBImpl::VerifyChecksumInternal(co
                                      fmeta->file_checksum_func_name, fname,
                                      read_options);
         } else {
-          s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(opts, file_options_,
-                                                       read_options, fname);
+          s = ROCKSDB_NAMESPACE::VerifySstFileChecksum(
+              opts, file_options_, read_options, fname, fd.largest_seqno);
         }
         RecordTick(stats_, VERIFY_CHECKSUM_READ_BYTES,
                    IOSTATS(bytes_read) - prev_bytes_read);
@@ -5338,7 +5330,7 @@ Status DBImpl::ReserveFileNumbersBeforeI
 
 Status DBImpl::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
   if (mutable_db_options_.max_open_files == -1) {
-    uint64_t oldest_time = port::kMaxUint64;
+    uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
     for (auto cfd : *versions_->GetColumnFamilySet()) {
       if (!cfd->IsDropped()) {
         uint64_t ctime;
diff -pruN 7.2.2-5/db/db_impl/db_impl_compaction_flush.cc 7.3.1-2/db/db_impl/db_impl_compaction_flush.cc
--- 7.2.2-5/db/db_impl/db_impl_compaction_flush.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_compaction_flush.cc	2022-06-08 21:08:16.000000000 +0000
@@ -188,7 +188,7 @@ Status DBImpl::FlushMemTableToOutputFile
   // a memtable without knowing such snapshot(s).
   uint64_t max_memtable_id = needs_to_sync_closed_wals
                                  ? cfd->imm()->GetLatestMemTableID()
-                                 : port::kMaxUint64;
+                                 : std::numeric_limits<uint64_t>::max();
 
   // If needs_to_sync_closed_wals is false, then the flush job will pick ALL
   // existing memtables of the column family when PickMemTable() is called
@@ -1041,7 +1041,8 @@ Status DBImpl::CompactRangeInternal(cons
     }
     s = RunManualCompaction(cfd, ColumnFamilyData::kCompactAllLevels,
                             final_output_level, options, begin, end, exclusive,
-                            false, port::kMaxUint64, trim_ts);
+                            false, std::numeric_limits<uint64_t>::max(),
+                            trim_ts);
   } else {
     int first_overlapped_level = kInvalidLevel;
     int max_overlapped_level = kInvalidLevel;
@@ -1078,7 +1079,7 @@ Status DBImpl::CompactRangeInternal(cons
     if (s.ok() && first_overlapped_level != kInvalidLevel) {
       // max_file_num_to_ignore can be used to filter out newly created SST
       // files, useful for bottom level compaction in a manual compaction
-      uint64_t max_file_num_to_ignore = port::kMaxUint64;
+      uint64_t max_file_num_to_ignore = std::numeric_limits<uint64_t>::max();
       uint64_t next_file_number = versions_->current_next_file_number();
       final_output_level = max_overlapped_level;
       int output_level;
@@ -1650,7 +1651,8 @@ Status DBImpl::ReFitLevel(ColumnFamilyDa
           f->smallest, f->largest, f->fd.smallest_seqno, f->fd.largest_seqno,
           f->marked_for_compaction, f->temperature, f->oldest_blob_file_number,
           f->oldest_ancester_time, f->file_creation_time, f->file_checksum,
-          f->file_checksum_func_name, f->min_timestamp, f->max_timestamp);
+          f->file_checksum_func_name, f->min_timestamp, f->max_timestamp,
+          f->unique_id);
     }
     ROCKS_LOG_DEBUG(immutable_db_options_.info_log,
                     "[%s] Apply version edit:\n%s", cfd->GetName().c_str(),
@@ -1855,11 +1857,12 @@ Status DBImpl::RunManualCompaction(
     }
   }
 
-  ROCKS_LOG_INFO(immutable_db_options_.info_log,
-                 "[%s] Manual compaction starting", cfd->GetName().c_str());
-
   LogBuffer log_buffer(InfoLogLevel::INFO_LEVEL,
                        immutable_db_options_.info_log.get());
+
+  ROCKS_LOG_BUFFER(&log_buffer, "[%s] Manual compaction starting",
+                   cfd->GetName().c_str());
+
   // We don't check bg_error_ here, because if we get the error in compaction,
   // the compaction will set manual.status to bg_error_ and set manual.done to
   // true.
@@ -2013,7 +2016,7 @@ Status DBImpl::FlushMemTable(ColumnFamil
       // be created and scheduled, status::OK() will be returned.
       s = SwitchMemtable(cfd, &context);
     }
-    const uint64_t flush_memtable_id = port::kMaxUint64;
+    const uint64_t flush_memtable_id = std::numeric_limits<uint64_t>::max();
     if (s.ok()) {
       if (cfd->imm()->NumNotFlushed() != 0 || !cfd->mem()->IsEmpty() ||
           !cached_recoverable_state_empty_.load()) {
@@ -3275,7 +3278,7 @@ Status DBImpl::BackgroundCompaction(bool
             f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
             f->oldest_blob_file_number, f->oldest_ancester_time,
             f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
-            f->min_timestamp, f->max_timestamp);
+            f->min_timestamp, f->max_timestamp, f->unique_id);
 
         ROCKS_LOG_BUFFER(
             log_buffer,
diff -pruN 7.2.2-5/db/db_impl/db_impl_debug.cc 7.3.1-2/db/db_impl/db_impl_debug.cc
--- 7.2.2-5/db/db_impl/db_impl_debug.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_debug.cc	2022-06-08 21:08:16.000000000 +0000
@@ -118,10 +118,11 @@ Status DBImpl::TEST_CompactRange(int lev
        cfd->ioptions()->compaction_style == kCompactionStyleFIFO)
           ? level
           : level + 1;
-  return RunManualCompaction(cfd, level, output_level, CompactRangeOptions(),
-                             begin, end, true, disallow_trivial_move,
-                             port::kMaxUint64 /*max_file_num_to_ignore*/,
-                             "" /*trim_ts*/);
+  return RunManualCompaction(
+      cfd, level, output_level, CompactRangeOptions(), begin, end, true,
+      disallow_trivial_move,
+      std::numeric_limits<uint64_t>::max() /*max_file_num_to_ignore*/,
+      "" /*trim_ts*/);
 }
 
 Status DBImpl::TEST_SwitchMemtable(ColumnFamilyData* cfd) {
diff -pruN 7.2.2-5/db/db_impl/db_impl_experimental.cc 7.3.1-2/db/db_impl/db_impl_experimental.cc
--- 7.2.2-5/db/db_impl/db_impl_experimental.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_experimental.cc	2022-06-08 21:08:16.000000000 +0000
@@ -137,7 +137,7 @@ Status DBImpl::PromoteL0(ColumnFamilyHan
           f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
           f->oldest_blob_file_number, f->oldest_ancester_time,
           f->file_creation_time, f->file_checksum, f->file_checksum_func_name,
-          f->min_timestamp, f->max_timestamp);
+          f->min_timestamp, f->max_timestamp, f->unique_id);
     }
 
     status = versions_->LogAndApply(cfd, *cfd->GetLatestMutableCFOptions(),
diff -pruN 7.2.2-5/db/db_impl/db_impl_files.cc 7.3.1-2/db/db_impl/db_impl_files.cc
--- 7.2.2-5/db/db_impl/db_impl_files.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_files.cc	2022-06-08 21:08:16.000000000 +0000
@@ -761,7 +761,7 @@ uint64_t PrecomputeMinLogNumberToKeepNon
   assert(!cfds_to_flush.empty());
   assert(cfds_to_flush.size() == edit_lists.size());
 
-  uint64_t min_log_number_to_keep = port::kMaxUint64;
+  uint64_t min_log_number_to_keep = std::numeric_limits<uint64_t>::max();
   for (const auto& edit_list : edit_lists) {
     uint64_t log = 0;
     for (const auto& e : edit_list) {
@@ -773,7 +773,7 @@ uint64_t PrecomputeMinLogNumberToKeepNon
       min_log_number_to_keep = std::min(min_log_number_to_keep, log);
     }
   }
-  if (min_log_number_to_keep == port::kMaxUint64) {
+  if (min_log_number_to_keep == std::numeric_limits<uint64_t>::max()) {
     min_log_number_to_keep = cfds_to_flush[0]->GetLogNumber();
     for (size_t i = 1; i < cfds_to_flush.size(); i++) {
       min_log_number_to_keep =
@@ -863,7 +863,7 @@ uint64_t PrecomputeMinLogNumberToKeep2PC
   return min_log_number_to_keep;
 }
 
-Status DBImpl::SetDBId(bool read_only) {
+Status DBImpl::SetDBId(bool read_only, RecoveryContext* recovery_ctx) {
   Status s;
   // Happens when immutable_db_options_.write_dbid_to_manifest is set to true
   // the very first time.
@@ -890,14 +890,14 @@ Status DBImpl::SetDBId(bool read_only) {
     }
     s = GetDbIdentityFromIdentityFile(&db_id_);
     if (immutable_db_options_.write_dbid_to_manifest && s.ok()) {
+      assert(!read_only);
+      assert(recovery_ctx != nullptr);
+      assert(versions_->GetColumnFamilySet() != nullptr);
       VersionEdit edit;
       edit.SetDBId(db_id_);
-      Options options;
-      MutableCFOptions mutable_cf_options(options);
       versions_->db_id_ = db_id_;
-      s = versions_->LogAndApply(versions_->GetColumnFamilySet()->GetDefault(),
-                                 mutable_cf_options, &edit, &mutex_, nullptr,
-                                 /* new_descriptor_log */ false);
+      recovery_ctx->UpdateVersionEdits(
+          versions_->GetColumnFamilySet()->GetDefault(), edit);
     }
   } else if (!read_only) {
     s = SetIdentityFile(env_, dbname_, db_id_);
@@ -905,7 +905,7 @@ Status DBImpl::SetDBId(bool read_only) {
   return s;
 }
 
-Status DBImpl::DeleteUnreferencedSstFiles() {
+Status DBImpl::DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx) {
   mutex_.AssertHeld();
   std::vector<std::string> paths;
   paths.push_back(NormalizePath(dbname_ + std::string(1, kFilePathSeparator)));
@@ -925,7 +925,6 @@ Status DBImpl::DeleteUnreferencedSstFile
 
   uint64_t next_file_number = versions_->current_next_file_number();
   uint64_t largest_file_number = next_file_number;
-  std::set<std::string> files_to_delete;
   Status s;
   for (const auto& path : paths) {
     std::vector<std::string> files;
@@ -943,8 +942,9 @@ Status DBImpl::DeleteUnreferencedSstFile
       const std::string normalized_fpath = path + fname;
       largest_file_number = std::max(largest_file_number, number);
       if (type == kTableFile && number >= next_file_number &&
-          files_to_delete.find(normalized_fpath) == files_to_delete.end()) {
-        files_to_delete.insert(normalized_fpath);
+          recovery_ctx->files_to_delete_.find(normalized_fpath) ==
+              recovery_ctx->files_to_delete_.end()) {
+        recovery_ctx->files_to_delete_.emplace(normalized_fpath);
       }
     }
   }
@@ -961,21 +961,7 @@ Status DBImpl::DeleteUnreferencedSstFile
   assert(versions_->GetColumnFamilySet());
   ColumnFamilyData* default_cfd = versions_->GetColumnFamilySet()->GetDefault();
   assert(default_cfd);
-  s = versions_->LogAndApply(
-      default_cfd, *default_cfd->GetLatestMutableCFOptions(), &edit, &mutex_,
-      directories_.GetDbDir(), /*new_descriptor_log*/ false);
-  if (!s.ok()) {
-    return s;
-  }
-
-  mutex_.Unlock();
-  for (const auto& fname : files_to_delete) {
-    s = env_->DeleteFile(fname);
-    if (!s.ok()) {
-      break;
-    }
-  }
-  mutex_.Lock();
+  recovery_ctx->UpdateVersionEdits(default_cfd, edit);
   return s;
 }
 
diff -pruN 7.2.2-5/db/db_impl/db_impl.h 7.3.1-2/db/db_impl/db_impl.h
--- 7.2.2-5/db/db_impl/db_impl.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl.h	2022-06-08 21:08:16.000000000 +0000
@@ -1240,6 +1240,39 @@ class DBImpl : public DB {
 
   std::atomic<bool> shutting_down_;
 
+  // RecoveryContext struct stores the context about version edits along
+  // with corresponding column_family_data and column_family_options.
+  class RecoveryContext {
+   public:
+    ~RecoveryContext() {
+      for (auto& edit_list : edit_lists_) {
+        for (auto* edit : edit_list) {
+          delete edit;
+        }
+      }
+    }
+
+    void UpdateVersionEdits(ColumnFamilyData* cfd, const VersionEdit& edit) {
+      assert(cfd != nullptr);
+      if (map_.find(cfd->GetID()) == map_.end()) {
+        uint32_t size = static_cast<uint32_t>(map_.size());
+        map_.emplace(cfd->GetID(), size);
+        cfds_.emplace_back(cfd);
+        mutable_cf_opts_.emplace_back(cfd->GetLatestMutableCFOptions());
+        edit_lists_.emplace_back(autovector<VersionEdit*>());
+      }
+      uint32_t i = map_[cfd->GetID()];
+      edit_lists_[i].emplace_back(new VersionEdit(edit));
+    }
+
+    std::unordered_map<uint32_t, uint32_t> map_;  // cf_id to index;
+    autovector<ColumnFamilyData*> cfds_;
+    autovector<const MutableCFOptions*> mutable_cf_opts_;
+    autovector<autovector<VersionEdit*>> edit_lists_;
+    // files_to_delete_ contains sst files
+    std::unordered_set<std::string> files_to_delete_;
+  };
+
   // Except in DB::Open(), WriteOptionsFile can only be called when:
   // Persist options to options file.
   // If need_mutex_lock = false, the method will lock DB mutex.
@@ -1356,16 +1389,19 @@ class DBImpl : public DB {
   // be made to the descriptor are added to *edit.
   // recovered_seq is set to less than kMaxSequenceNumber if the log's tail is
   // skipped.
+  // recovery_ctx stores the context about version edits and all those
+  // edits are persisted to new Manifest after successfully syncing the new WAL.
   virtual Status Recover(
       const std::vector<ColumnFamilyDescriptor>& column_families,
       bool read_only = false, bool error_if_wal_file_exists = false,
       bool error_if_data_exists_in_wals = false,
-      uint64_t* recovered_seq = nullptr);
+      uint64_t* recovered_seq = nullptr,
+      RecoveryContext* recovery_ctx = nullptr);
 
   virtual bool OwnTablesAndLogs() const { return true; }
 
   // Set DB identity file, and write DB ID to manifest if necessary.
-  Status SetDBId(bool read_only);
+  Status SetDBId(bool read_only, RecoveryContext* recovery_ctx);
 
   // REQUIRES: db mutex held when calling this function, but the db mutex can
   // be released and re-acquired. Db mutex will be held when the function
@@ -1374,12 +1410,15 @@ class DBImpl : public DB {
   // not referenced in the MANIFEST (e.g.
   // 1. It's best effort recovery;
   // 2. The VersionEdits referencing the SST files are appended to
-  // MANIFEST, DB crashes when syncing the MANIFEST, the VersionEdits are
+  // RecoveryContext, DB crashes when syncing the MANIFEST, the VersionEdits are
   // still not synced to MANIFEST during recovery.)
-  // We delete these SST files. In the
+  // It stores the SST files to be deleted in RecoveryContext. In the
   // meantime, we find out the largest file number present in the paths, and
   // bump up the version set's next_file_number_ to be 1 + largest_file_number.
-  Status DeleteUnreferencedSstFiles();
+  // recovery_ctx stores the context about version edits and files to be
+  // deleted. All those edits are persisted to new Manifest after successfully
+  // syncing the new WAL.
+  Status DeleteUnreferencedSstFiles(RecoveryContext* recovery_ctx);
 
   // SetDbSessionId() should be called in the constuctor DBImpl()
   // to ensure that db_session_id_ gets updated every time the DB is opened
@@ -1389,6 +1428,14 @@ class DBImpl : public DB {
   Status FailIfTsSizesMismatch(const ColumnFamilyHandle* column_family,
                                const Slice& ts) const;
 
+  // recovery_ctx stores the context about version edits and
+  // LogAndApplyForRecovery persist all those edits to new Manifest after
+  // successfully syncing new WAL.
+  // LogAndApplyForRecovery should be called only once during recovery and it
+  // should be called when RocksDB writes to a first new MANIFEST since this
+  // recovery.
+  Status LogAndApplyForRecovery(const RecoveryContext& recovery_ctx);
+
  private:
   friend class DB;
   friend class ErrorHandler;
@@ -1645,7 +1692,8 @@ class DBImpl : public DB {
   // corrupted_log_found is set to true if we recover from a corrupted log file.
   Status RecoverLogFiles(const std::vector<uint64_t>& log_numbers,
                          SequenceNumber* next_sequence, bool read_only,
-                         bool* corrupted_log_found);
+                         bool* corrupted_log_found,
+                         RecoveryContext* recovery_ctx);
 
   // The following two methods are used to flush a memtable to
   // storage. The first one is used at database RecoveryTime (when the
@@ -1974,6 +2022,11 @@ class DBImpl : public DB {
   IOStatus CreateWAL(uint64_t log_file_num, uint64_t recycle_log_number,
                      size_t preallocate_block_size, log::Writer** new_log);
 
+  // Verify SST file unique id between Manifest and table properties to make
+  // sure they're the same. Currently only used during DB open when
+  // `verify_sst_unique_id_in_manifest = true`.
+  Status VerifySstUniqueIdInManifest();
+
   // Validate self-consistency of DB options
   static Status ValidateOptions(const DBOptions& db_options);
   // Validate self-consistency of DB options and its consistency with cf options
@@ -2299,7 +2352,7 @@ class DBImpl : public DB {
 
   static const int KEEP_LOG_FILE_NUM = 1000;
   // MSVC version 1800 still does not have constexpr for ::max()
-  static const uint64_t kNoTimeOut = port::kMaxUint64;
+  static const uint64_t kNoTimeOut = std::numeric_limits<uint64_t>::max();
 
   std::string db_absolute_path_;
 
@@ -2395,6 +2448,15 @@ class DBImpl : public DB {
   std::unique_ptr<StallInterface> wbm_stall_;
 };
 
+class GetWithTimestampReadCallback : public ReadCallback {
+ public:
+  explicit GetWithTimestampReadCallback(SequenceNumber seq)
+      : ReadCallback(seq) {}
+  bool IsVisibleFullCheck(SequenceNumber seq) override {
+    return seq <= max_visible_seq_;
+  }
+};
+
 extern Options SanitizeOptions(const std::string& db, const Options& src,
                                bool read_only = false);
 
diff -pruN 7.2.2-5/db/db_impl/db_impl_open.cc 7.3.1-2/db/db_impl/db_impl_open.cc
--- 7.2.2-5/db/db_impl/db_impl_open.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_open.cc	2022-06-08 21:08:16.000000000 +0000
@@ -399,7 +399,7 @@ IOStatus Directories::SetDirectories(Fil
 Status DBImpl::Recover(
     const std::vector<ColumnFamilyDescriptor>& column_families, bool read_only,
     bool error_if_wal_file_exists, bool error_if_data_exists_in_wals,
-    uint64_t* recovered_seq) {
+    uint64_t* recovered_seq, RecoveryContext* recovery_ctx) {
   mutex_.AssertHeld();
 
   bool is_new_db = false;
@@ -518,9 +518,15 @@ Status DBImpl::Recover(
   if (!s.ok()) {
     return s;
   }
-  s = SetDBId(read_only);
+  if (immutable_db_options_.verify_sst_unique_id_in_manifest) {
+    s = VerifySstUniqueIdInManifest();
+    if (!s.ok()) {
+      return s;
+    }
+  }
+  s = SetDBId(read_only, recovery_ctx);
   if (s.ok() && !read_only) {
-    s = DeleteUnreferencedSstFiles();
+    s = DeleteUnreferencedSstFiles(recovery_ctx);
   }
 
   if (immutable_db_options_.paranoid_checks && s.ok()) {
@@ -535,10 +541,6 @@ Status DBImpl::Recover(
       }
     }
   }
-  // DB mutex is already held
-  if (s.ok() && immutable_db_options_.persist_stats_to_disk) {
-    s = InitPersistStatsColumnFamily();
-  }
 
   std::vector<std::string> files_in_wal_dir;
   if (s.ok()) {
@@ -608,7 +610,10 @@ Status DBImpl::Recover(
       WalNumber max_wal_number =
           versions_->GetWalSet().GetWals().rbegin()->first;
       edit.DeleteWalsBefore(max_wal_number + 1);
-      s = versions_->LogAndApplyToDefaultColumnFamily(&edit, &mutex_);
+      assert(recovery_ctx != nullptr);
+      assert(versions_->GetColumnFamilySet() != nullptr);
+      recovery_ctx->UpdateVersionEdits(
+          versions_->GetColumnFamilySet()->GetDefault(), edit);
     }
     if (!s.ok()) {
       return s;
@@ -644,8 +649,8 @@ Status DBImpl::Recover(
       std::sort(wals.begin(), wals.end());
 
       bool corrupted_wal_found = false;
-      s = RecoverLogFiles(wals, &next_sequence, read_only,
-                          &corrupted_wal_found);
+      s = RecoverLogFiles(wals, &next_sequence, read_only, &corrupted_wal_found,
+                          recovery_ctx);
       if (corrupted_wal_found && recovered_seq != nullptr) {
         *recovered_seq = next_sequence;
       }
@@ -698,6 +703,25 @@ Status DBImpl::Recover(
   return s;
 }
 
+Status DBImpl::VerifySstUniqueIdInManifest() {
+  mutex_.AssertHeld();
+  ROCKS_LOG_INFO(
+      immutable_db_options_.info_log,
+      "Verifying SST unique id between MANIFEST and SST file table properties");
+  Status status;
+  for (auto cfd : *versions_->GetColumnFamilySet()) {
+    if (!cfd->IsDropped()) {
+      auto version = cfd->current();
+      version->Ref();
+      mutex_.Unlock();
+      status = version->VerifySstUniqueIds();
+      mutex_.Lock();
+      version->Unref();
+    }
+  }
+  return status;
+}
+
 Status DBImpl::PersistentStatsProcessFormatVersion() {
   mutex_.AssertHeld();
   Status s;
@@ -760,11 +784,11 @@ Status DBImpl::PersistentStatsProcessFor
     WriteBatch batch;
     if (s.ok()) {
       s = batch.Put(persist_stats_cf_handle_, kFormatVersionKeyString,
-                    ToString(kStatsCFCurrentFormatVersion));
+                    std::to_string(kStatsCFCurrentFormatVersion));
     }
     if (s.ok()) {
       s = batch.Put(persist_stats_cf_handle_, kCompatibleVersionKeyString,
-                    ToString(kStatsCFCompatibleFormatVersion));
+                    std::to_string(kStatsCFCompatibleFormatVersion));
     }
     if (s.ok()) {
       WriteOptions wo;
@@ -805,10 +829,30 @@ Status DBImpl::InitPersistStatsColumnFam
   return s;
 }
 
+Status DBImpl::LogAndApplyForRecovery(const RecoveryContext& recovery_ctx) {
+  mutex_.AssertHeld();
+  assert(versions_->descriptor_log_ == nullptr);
+  Status s = versions_->LogAndApply(
+      recovery_ctx.cfds_, recovery_ctx.mutable_cf_opts_,
+      recovery_ctx.edit_lists_, &mutex_, directories_.GetDbDir());
+  if (s.ok() && !(recovery_ctx.files_to_delete_.empty())) {
+    mutex_.Unlock();
+    for (const auto& fname : recovery_ctx.files_to_delete_) {
+      s = env_->DeleteFile(fname);
+      if (!s.ok()) {
+        break;
+      }
+    }
+    mutex_.Lock();
+  }
+  return s;
+}
+
 // REQUIRES: wal_numbers are sorted in ascending order
 Status DBImpl::RecoverLogFiles(const std::vector<uint64_t>& wal_numbers,
                                SequenceNumber* next_sequence, bool read_only,
-                               bool* corrupted_wal_found) {
+                               bool* corrupted_wal_found,
+                               RecoveryContext* recovery_ctx) {
   struct LogReporter : public log::Reader::Reporter {
     Env* env;
     Logger* info_log;
@@ -947,7 +991,6 @@ Status DBImpl::RecoverLogFiles(const std
     // Read all the records and add to a memtable
     std::string scratch;
     Slice record;
-    WriteBatch batch;
 
     TEST_SYNC_POINT_CALLBACK("DBImpl::RecoverLogFiles:BeforeReadWal",
                              /*arg=*/nullptr);
@@ -961,10 +1004,15 @@ Status DBImpl::RecoverLogFiles(const std
         continue;
       }
 
+      // We create a new batch and initialize with a valid prot_info_ to store
+      // the data checksums
+      WriteBatch batch(0, 0, 8, 0);
+
       status = WriteBatchInternal::SetContents(&batch, record);
       if (!status.ok()) {
         return status;
       }
+
       SequenceNumber sequence = WriteBatchInternal::Sequence(&batch);
 
       if (immutable_db_options_.wal_recovery_mode ==
@@ -1262,44 +1310,36 @@ Status DBImpl::RecoverLogFiles(const std
       // VersionSet::next_file_number_ always to be strictly greater than any
       // log number
       versions_->MarkFileNumberUsed(max_wal_number + 1);
+      assert(recovery_ctx != nullptr);
 
-      autovector<ColumnFamilyData*> cfds;
-      autovector<const MutableCFOptions*> cf_opts;
-      autovector<autovector<VersionEdit*>> edit_lists;
       for (auto* cfd : *versions_->GetColumnFamilySet()) {
-        cfds.push_back(cfd);
-        cf_opts.push_back(cfd->GetLatestMutableCFOptions());
         auto iter = version_edits.find(cfd->GetID());
         assert(iter != version_edits.end());
-        edit_lists.push_back({&iter->second});
+        recovery_ctx->UpdateVersionEdits(cfd, iter->second);
       }
 
-      std::unique_ptr<VersionEdit> wal_deletion;
       if (flushed) {
-        wal_deletion = std::make_unique<VersionEdit>();
+        VersionEdit wal_deletion;
         if (immutable_db_options_.track_and_verify_wals_in_manifest) {
-          wal_deletion->DeleteWalsBefore(max_wal_number + 1);
+          wal_deletion.DeleteWalsBefore(max_wal_number + 1);
         }
         if (!allow_2pc()) {
           // In non-2pc mode, flushing the memtables of the column families
           // means we can advance min_log_number_to_keep.
-          wal_deletion->SetMinLogNumberToKeep(max_wal_number + 1);
+          wal_deletion.SetMinLogNumberToKeep(max_wal_number + 1);
         }
-        edit_lists.back().push_back(wal_deletion.get());
+        assert(versions_->GetColumnFamilySet() != nullptr);
+        recovery_ctx->UpdateVersionEdits(
+            versions_->GetColumnFamilySet()->GetDefault(), wal_deletion);
       }
-
-      // write MANIFEST with update
-      status = versions_->LogAndApply(cfds, cf_opts, edit_lists, &mutex_,
-                                      directories_.GetDbDir(),
-                                      /*new_descriptor_log=*/true);
     }
   }
 
   if (status.ok()) {
     if (data_seen && !flushed) {
       status = RestoreAliveLogFiles(wal_numbers);
-    } else {
-      // If there's no data in the WAL, or we flushed all the data, still
+    } else if (!wal_numbers.empty()) {  // If there's no data in the WAL, or we
+                                        // flushed all the data, still
       // truncate the log file. If the process goes into a crash loop before
       // the file is deleted, the preallocated space will never get freed.
       const bool truncate = !read_only;
@@ -1322,6 +1362,7 @@ Status DBImpl::GetLogSizeAndMaybeTruncat
   Status s;
   // This gets the appear size of the wals, not including preallocated space.
   s = env_->GetFileSize(fname, &log.size);
+  TEST_SYNC_POINT_CALLBACK("DBImpl::GetLogSizeAndMaybeTruncate:0", /*arg=*/&s);
   if (s.ok() && truncate) {
     std::unique_ptr<FSWritableFile> last_log;
     Status truncate_status = fs_->ReopenWritableFile(
@@ -1493,13 +1534,14 @@ Status DBImpl::WriteLevel0TableForRecove
   constexpr int level = 0;
 
   if (s.ok() && has_output) {
-    edit->AddFile(
-        level, meta.fd.GetNumber(), meta.fd.GetPathId(), meta.fd.GetFileSize(),
-        meta.smallest, meta.largest, meta.fd.smallest_seqno,
-        meta.fd.largest_seqno, meta.marked_for_compaction, meta.temperature,
-        meta.oldest_blob_file_number, meta.oldest_ancester_time,
-        meta.file_creation_time, meta.file_checksum,
-        meta.file_checksum_func_name, meta.min_timestamp, meta.max_timestamp);
+    edit->AddFile(level, meta.fd.GetNumber(), meta.fd.GetPathId(),
+                  meta.fd.GetFileSize(), meta.smallest, meta.largest,
+                  meta.fd.smallest_seqno, meta.fd.largest_seqno,
+                  meta.marked_for_compaction, meta.temperature,
+                  meta.oldest_blob_file_number, meta.oldest_ancester_time,
+                  meta.file_creation_time, meta.file_checksum,
+                  meta.file_checksum_func_name, meta.min_timestamp,
+                  meta.max_timestamp, meta.unique_id);
 
     for (const auto& blob : blob_file_additions) {
       edit->AddBlobFile(blob);
@@ -1693,6 +1735,7 @@ Status DBImpl::Open(const DBOptions& db_
   }
 
   *dbptr = nullptr;
+  assert(handles);
   handles->clear();
 
   size_t max_write_buffer_size = 0;
@@ -1735,11 +1778,13 @@ Status DBImpl::Open(const DBOptions& db_
   }
 
   impl->wal_in_db_path_ = impl->immutable_db_options_.IsWalDirSameAsDBPath();
-
+  RecoveryContext recovery_ctx;
   impl->mutex_.Lock();
+
   // Handles create_if_missing, error_if_exists
   uint64_t recovered_seq(kMaxSequenceNumber);
-  s = impl->Recover(column_families, false, false, false, &recovered_seq);
+  s = impl->Recover(column_families, false, false, false, &recovered_seq,
+                    &recovery_ctx);
   if (s.ok()) {
     uint64_t new_log_number = impl->versions_->NewFileNumber();
     log::Writer* new_log = nullptr;
@@ -1756,40 +1801,6 @@ Status DBImpl::Open(const DBOptions& db_
     }
 
     if (s.ok()) {
-      // set column family handles
-      for (auto cf : column_families) {
-        auto cfd =
-            impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
-        if (cfd != nullptr) {
-          handles->push_back(
-              new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
-          impl->NewThreadStatusCfInfo(cfd);
-        } else {
-          if (db_options.create_missing_column_families) {
-            // missing column family, create it
-            ColumnFamilyHandle* handle;
-            impl->mutex_.Unlock();
-            s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
-            impl->mutex_.Lock();
-            if (s.ok()) {
-              handles->push_back(handle);
-            } else {
-              break;
-            }
-          } else {
-            s = Status::InvalidArgument("Column family not found", cf.name);
-            break;
-          }
-        }
-      }
-    }
-    if (s.ok()) {
-      SuperVersionContext sv_context(/* create_superversion */ true);
-      for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
-        impl->InstallSuperVersionAndScheduleWork(
-            cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
-      }
-      sv_context.Clean();
       if (impl->two_write_queues_) {
         impl->log_write_mutex_.Lock();
       }
@@ -1821,6 +1832,7 @@ Status DBImpl::Open(const DBOptions& db_
         if (s.ok()) {
           // Need to fsync, otherwise it might get lost after a power reset.
           s = impl->FlushWAL(false);
+          TEST_SYNC_POINT_CALLBACK("DBImpl::Open::BeforeSyncWAL", /*arg=*/&s);
           if (s.ok()) {
             s = log_writer->file()->Sync(impl->immutable_db_options_.use_fsync);
           }
@@ -1828,6 +1840,53 @@ Status DBImpl::Open(const DBOptions& db_
       }
     }
   }
+  if (s.ok()) {
+    s = impl->LogAndApplyForRecovery(recovery_ctx);
+  }
+
+  if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
+    impl->mutex_.AssertHeld();
+    s = impl->InitPersistStatsColumnFamily();
+  }
+
+  if (s.ok()) {
+    // set column family handles
+    for (auto cf : column_families) {
+      auto cfd =
+          impl->versions_->GetColumnFamilySet()->GetColumnFamily(cf.name);
+      if (cfd != nullptr) {
+        handles->push_back(
+            new ColumnFamilyHandleImpl(cfd, impl, &impl->mutex_));
+        impl->NewThreadStatusCfInfo(cfd);
+      } else {
+        if (db_options.create_missing_column_families) {
+          // missing column family, create it
+          ColumnFamilyHandle* handle = nullptr;
+          impl->mutex_.Unlock();
+          s = impl->CreateColumnFamily(cf.options, cf.name, &handle);
+          impl->mutex_.Lock();
+          if (s.ok()) {
+            handles->push_back(handle);
+          } else {
+            break;
+          }
+        } else {
+          s = Status::InvalidArgument("Column family not found", cf.name);
+          break;
+        }
+      }
+    }
+  }
+
+  if (s.ok()) {
+    SuperVersionContext sv_context(/* create_superversion */ true);
+    for (auto cfd : *impl->versions_->GetColumnFamilySet()) {
+      impl->InstallSuperVersionAndScheduleWork(
+          cfd, &sv_context, *cfd->GetLatestMutableCFOptions());
+    }
+    sv_context.Clean();
+  }
+
   if (s.ok() && impl->immutable_db_options_.persist_stats_to_disk) {
     // try to read format version
     s = impl->PersistentStatsProcessFormatVersion();
diff -pruN 7.2.2-5/db/db_impl/db_impl_readonly.cc 7.3.1-2/db/db_impl/db_impl_readonly.cc
--- 7.2.2-5/db/db_impl/db_impl_readonly.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_readonly.cc	2022-06-08 21:08:16.000000000 +0000
@@ -33,20 +33,38 @@ DBImplReadOnly::~DBImplReadOnly() {}
 Status DBImplReadOnly::Get(const ReadOptions& read_options,
                            ColumnFamilyHandle* column_family, const Slice& key,
                            PinnableSlice* pinnable_val) {
+  return Get(read_options, column_family, key, pinnable_val,
+             /*timestamp*/ nullptr);
+}
+
+Status DBImplReadOnly::Get(const ReadOptions& read_options,
+                           ColumnFamilyHandle* column_family, const Slice& key,
+                           PinnableSlice* pinnable_val,
+                           std::string* timestamp) {
   assert(pinnable_val != nullptr);
   // TODO: stopwatch DB_GET needed?, perf timer needed?
   PERF_TIMER_GUARD(get_snapshot_time);
 
   assert(column_family);
+  if (read_options.timestamp) {
+    const Status s =
+        FailIfTsSizesMismatch(column_family, *(read_options.timestamp));
+    if (!s.ok()) {
+      return s;
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return s;
+    }
+  }
   const Comparator* ucmp = column_family->GetComparator();
   assert(ucmp);
-  if (ucmp->timestamp_size() || read_options.timestamp) {
-    // TODO: support timestamp
-    return Status::NotSupported();
-  }
+  std::string* ts = ucmp->timestamp_size() > 0 ? timestamp : nullptr;
 
   Status s;
   SequenceNumber snapshot = versions_->LastSequence();
+  GetWithTimestampReadCallback read_cb(snapshot);
   auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
   auto cfd = cfh->cfd();
   if (tracer_) {
@@ -58,19 +76,23 @@ Status DBImplReadOnly::Get(const ReadOpt
   SuperVersion* super_version = cfd->GetSuperVersion();
   MergeContext merge_context;
   SequenceNumber max_covering_tombstone_seq = 0;
-  LookupKey lkey(key, snapshot);
+  LookupKey lkey(key, snapshot, read_options.timestamp);
   PERF_TIMER_STOP(get_snapshot_time);
-  if (super_version->mem->Get(lkey, pinnable_val->GetSelf(),
-                              /*timestamp=*/nullptr, &s, &merge_context,
-                              &max_covering_tombstone_seq, read_options)) {
+  if (super_version->mem->Get(lkey, pinnable_val->GetSelf(), ts, &s,
+                              &merge_context, &max_covering_tombstone_seq,
+                              read_options, &read_cb)) {
     pinnable_val->PinSelf();
     RecordTick(stats_, MEMTABLE_HIT);
   } else {
     PERF_TIMER_GUARD(get_from_output_files_time);
     PinnedIteratorsManager pinned_iters_mgr;
-    super_version->current->Get(read_options, lkey, pinnable_val,
-                                /*timestamp=*/nullptr, &s, &merge_context,
-                                &max_covering_tombstone_seq, &pinned_iters_mgr);
+    super_version->current->Get(
+        read_options, lkey, pinnable_val, ts, &s, &merge_context,
+        &max_covering_tombstone_seq, &pinned_iters_mgr,
+        /*value_found*/ nullptr,
+        /*key_exists*/ nullptr, /*seq*/ nullptr, &read_cb,
+        /*is_blob*/ nullptr,
+        /*do_merge*/ true);
     RecordTick(stats_, MEMTABLE_MISS);
   }
   RecordTick(stats_, NUMBER_KEYS_READ);
@@ -84,11 +106,17 @@ Status DBImplReadOnly::Get(const ReadOpt
 Iterator* DBImplReadOnly::NewIterator(const ReadOptions& read_options,
                                       ColumnFamilyHandle* column_family) {
   assert(column_family);
-  const Comparator* ucmp = column_family->GetComparator();
-  assert(ucmp);
-  if (ucmp->timestamp_size() || read_options.timestamp) {
-    // TODO: support timestamp
-    return NewErrorIterator(Status::NotSupported());
+  if (read_options.timestamp) {
+    const Status s =
+        FailIfTsSizesMismatch(column_family, *(read_options.timestamp));
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
+  } else {
+    const Status s = FailIfCfHasTs(column_family);
+    if (!s.ok()) {
+      return NewErrorIterator(s);
+    }
   }
   auto cfh = static_cast_with_check<ColumnFamilyHandleImpl>(column_family);
   auto cfd = cfh->cfd();
@@ -118,16 +146,19 @@ Status DBImplReadOnly::NewIterators(
     const std::vector<ColumnFamilyHandle*>& column_families,
     std::vector<Iterator*>* iterators) {
   if (read_options.timestamp) {
-    // TODO: support timestamp
-    return Status::NotSupported();
+    for (auto* cf : column_families) {
+      assert(cf);
+      const Status s = FailIfTsSizesMismatch(cf, *(read_options.timestamp));
+      if (!s.ok()) {
+        return s;
+      }
+    }
   } else {
     for (auto* cf : column_families) {
       assert(cf);
-      const Comparator* ucmp = cf->GetComparator();
-      assert(ucmp);
-      if (ucmp->timestamp_size()) {
-        // TODO: support timestamp
-        return Status::NotSupported();
+      const Status s = FailIfCfHasTs(cf);
+      if (!s.ok()) {
+        return s;
       }
     }
   }
diff -pruN 7.2.2-5/db/db_impl/db_impl_readonly.h 7.3.1-2/db/db_impl/db_impl_readonly.h
--- 7.2.2-5/db/db_impl/db_impl_readonly.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_readonly.h	2022-06-08 21:08:16.000000000 +0000
@@ -27,6 +27,9 @@ class DBImplReadOnly : public DBImpl {
   virtual Status Get(const ReadOptions& options,
                      ColumnFamilyHandle* column_family, const Slice& key,
                      PinnableSlice* value) override;
+  Status Get(const ReadOptions& options, ColumnFamilyHandle* column_family,
+             const Slice& key, PinnableSlice* value,
+             std::string* timestamp) override;
 
   // TODO: Implement ReadOnly MultiGet?
 
diff -pruN 7.2.2-5/db/db_impl/db_impl_secondary.cc 7.3.1-2/db/db_impl/db_impl_secondary.cc
--- 7.2.2-5/db/db_impl/db_impl_secondary.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_secondary.cc	2022-06-08 21:08:16.000000000 +0000
@@ -33,7 +33,8 @@ DBImplSecondary::~DBImplSecondary() {}
 Status DBImplSecondary::Recover(
     const std::vector<ColumnFamilyDescriptor>& column_families,
     bool /*readonly*/, bool /*error_if_wal_file_exists*/,
-    bool /*error_if_data_exists_in_wals*/, uint64_t*) {
+    bool /*error_if_data_exists_in_wals*/, uint64_t*,
+    RecoveryContext* /*recovery_ctx*/) {
   mutex_.AssertHeld();
 
   JobContext job_context(0);
@@ -247,15 +248,16 @@ Status DBImplSecondary::RecoverLogFiles(
           if (seq_of_batch <= seq) {
             continue;
           }
-          auto curr_log_num = port::kMaxUint64;
+          auto curr_log_num = std::numeric_limits<uint64_t>::max();
           if (cfd_to_current_log_.count(cfd) > 0) {
             curr_log_num = cfd_to_current_log_[cfd];
           }
           // If the active memtable contains records added by replaying an
           // earlier WAL, then we need to seal the memtable, add it to the
           // immutable memtable list and create a new active memtable.
-          if (!cfd->mem()->IsEmpty() && (curr_log_num == port::kMaxUint64 ||
-                                         curr_log_num != log_number)) {
+          if (!cfd->mem()->IsEmpty() &&
+              (curr_log_num == std::numeric_limits<uint64_t>::max() ||
+               curr_log_num != log_number)) {
             const MutableCFOptions mutable_cf_options =
                 *cfd->GetLatestMutableCFOptions();
             MemTable* new_mem =
@@ -771,12 +773,19 @@ Status DBImplSecondary::CompactWithoutIn
 
   const int job_id = next_job_id_.fetch_add(1);
 
+  // use primary host's db_id for running the compaction, but db_session_id is
+  // using the local one, which is to make sure the unique id is unique from
+  // the remote compactors. Because the id is generated from db_id,
+  // db_session_id and orig_file_number, unlike the local compaction, remote
+  // compaction cannot guarantee the uniqueness of orig_file_number, the file
+  // number is only assigned when compaction is done.
   CompactionServiceCompactionJob compaction_job(
       job_id, c.get(), immutable_db_options_, mutable_db_options_,
       file_options_for_compaction_, versions_.get(), &shutting_down_,
       &log_buffer, output_dir.get(), stats_, &mutex_, &error_handler_,
       input.snapshots, table_cache_, &event_logger_, dbname_, io_tracer_,
-      options.canceled, db_id_, db_session_id_, secondary_path_, input, result);
+      options.canceled, input.db_id, db_session_id_, secondary_path_, input,
+      result);
 
   mutex_.Unlock();
   s = compaction_job.Run();
@@ -831,6 +840,8 @@ Status DB::OpenAndCompact(
       override_options.table_factory;
   compaction_input.column_family.options.sst_partitioner_factory =
       override_options.sst_partitioner_factory;
+  compaction_input.column_family.options.table_properties_collector_factories =
+      override_options.table_properties_collector_factories;
   compaction_input.db_options.listeners = override_options.listeners;
 
   std::vector<ColumnFamilyDescriptor> column_families;
diff -pruN 7.2.2-5/db/db_impl/db_impl_secondary.h 7.3.1-2/db/db_impl/db_impl_secondary.h
--- 7.2.2-5/db/db_impl/db_impl_secondary.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_impl/db_impl_secondary.h	2022-06-08 21:08:16.000000000 +0000
@@ -81,8 +81,8 @@ class DBImplSecondary : public DBImpl {
   // and log_readers_ to facilitate future operations.
   Status Recover(const std::vector<ColumnFamilyDescriptor>& column_families,
                  bool read_only, bool error_if_wal_file_exists,
-                 bool error_if_data_exists_in_wals,
-                 uint64_t* = nullptr) override;
+                 bool error_if_data_exists_in_wals, uint64_t* = nullptr,
+                 RecoveryContext* recovery_ctx = nullptr) override;
 
   // Implementations of the DB interface
   using DB::Get;
diff -pruN 7.2.2-5/db/db_info_dumper.cc 7.3.1-2/db/db_info_dumper.cc
--- 7.2.2-5/db/db_info_dumper.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_info_dumper.cc	2022-06-08 21:08:16.000000000 +0000
@@ -35,10 +35,12 @@ void DumpDBFileSummary(const ImmutableDB
   Header(options.info_log, "DB SUMMARY\n");
   Header(options.info_log, "DB Session ID:  %s\n", session_id.c_str());
 
+  Status s;
   // Get files in dbname dir
-  if (!env->GetChildren(dbname, &files).ok()) {
-    Error(options.info_log,
-          "Error when reading %s dir\n", dbname.c_str());
+  s = env->GetChildren(dbname, &files);
+  if (!s.ok()) {
+    Error(options.info_log, "Error when reading %s dir %s\n", dbname.c_str(),
+          s.ToString().c_str());
   }
   std::sort(files.begin(), files.end());
   for (const std::string& file : files) {
@@ -53,24 +55,27 @@ void DumpDBFileSummary(const ImmutableDB
         Header(options.info_log, "IDENTITY file:  %s\n", file.c_str());
         break;
       case kDescriptorFile:
-        if (env->GetFileSize(dbname + "/" + file, &file_size).ok()) {
+        s = env->GetFileSize(dbname + "/" + file, &file_size);
+        if (s.ok()) {
           Header(options.info_log,
                  "MANIFEST file:  %s size: %" PRIu64 " Bytes\n", file.c_str(),
                  file_size);
         } else {
-          Error(options.info_log, "Error when reading MANIFEST file: %s/%s\n",
-                dbname.c_str(), file.c_str());
+          Error(options.info_log,
+                "Error when reading MANIFEST file: %s/%s %s\n", dbname.c_str(),
+                file.c_str(), s.ToString().c_str());
         }
         break;
       case kWalFile:
-        if (env->GetFileSize(dbname + "/" + file, &file_size).ok()) {
+        s = env->GetFileSize(dbname + "/" + file, &file_size);
+        if (s.ok()) {
           wal_info.append(file)
               .append(" size: ")
               .append(std::to_string(file_size))
               .append(" ; ");
         } else {
-          Error(options.info_log, "Error when reading LOG file: %s/%s\n",
-                dbname.c_str(), file.c_str());
+          Error(options.info_log, "Error when reading LOG file: %s/%s %s\n",
+                dbname.c_str(), file.c_str(), s.ToString().c_str());
         }
         break;
       case kTableFile:
@@ -86,10 +91,10 @@ void DumpDBFileSummary(const ImmutableDB
   // Get sst files in db_path dir
   for (auto& db_path : options.db_paths) {
     if (dbname.compare(db_path.path) != 0) {
-      if (!env->GetChildren(db_path.path, &files).ok()) {
-        Error(options.info_log,
-            "Error when reading %s dir\n",
-            db_path.path.c_str());
+      s = env->GetChildren(db_path.path, &files);
+      if (!s.ok()) {
+        Error(options.info_log, "Error when reading %s dir %s\n",
+              db_path.path.c_str(), s.ToString().c_str());
         continue;
       }
       std::sort(files.begin(), files.end());
@@ -111,22 +116,25 @@ void DumpDBFileSummary(const ImmutableDB
   // Get wal file in wal_dir
   const auto& wal_dir = options.GetWalDir(dbname);
   if (!options.IsWalDirSameAsDBPath(dbname)) {
-    if (!env->GetChildren(wal_dir, &files).ok()) {
-      Error(options.info_log, "Error when reading %s dir\n", wal_dir.c_str());
+    s = env->GetChildren(wal_dir, &files);
+    if (!s.ok()) {
+      Error(options.info_log, "Error when reading %s dir %s\n", wal_dir.c_str(),
+            s.ToString().c_str());
       return;
     }
     wal_info.clear();
     for (const std::string& file : files) {
       if (ParseFileName(file, &number, &type)) {
         if (type == kWalFile) {
-          if (env->GetFileSize(wal_dir + "/" + file, &file_size).ok()) {
+          s = env->GetFileSize(wal_dir + "/" + file, &file_size);
+          if (s.ok()) {
             wal_info.append(file)
                 .append(" size: ")
                 .append(std::to_string(file_size))
                 .append(" ; ");
           } else {
-            Error(options.info_log, "Error when reading LOG file %s/%s\n",
-                  wal_dir.c_str(), file.c_str());
+            Error(options.info_log, "Error when reading LOG file %s/%s %s\n",
+                  wal_dir.c_str(), file.c_str(), s.ToString().c_str());
           }
         }
       }
diff -pruN 7.2.2-5/db/db_iterator_test.cc 7.3.1-2/db/db_iterator_test.cc
--- 7.2.2-5/db/db_iterator_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_iterator_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -111,9 +111,12 @@ TEST_P(DBIteratorTest, PersistedTierOnIt
 TEST_P(DBIteratorTest, NonBlockingIteration) {
   do {
     ReadOptions non_blocking_opts, regular_opts;
-    Options options = CurrentOptions();
+    anon::OptionsOverride options_override;
+    options_override.full_block_cache = true;
+    Options options = CurrentOptions(options_override);
     options.statistics = ROCKSDB_NAMESPACE::CreateDBStatistics();
     non_blocking_opts.read_tier = kBlockCacheTier;
+
     CreateAndReopenWithCF({"pikachu"}, options);
     // write one kv to the database.
     ASSERT_OK(Put(1, "a", "b"));
@@ -3157,7 +3160,7 @@ TEST_F(DBIteratorWithReadCallbackTest, R
   uint64_t num_versions =
       CurrentOptions().max_sequential_skip_in_iterations + 10;
   for (uint64_t i = 0; i < num_versions; i++) {
-    ASSERT_OK(Put("bar", ToString(i)));
+    ASSERT_OK(Put("bar", std::to_string(i)));
   }
   SequenceNumber seq3 = db_->GetLatestSequenceNumber();
   TestReadCallback callback2(seq3);
@@ -3186,7 +3189,7 @@ TEST_F(DBIteratorWithReadCallbackTest, R
   ASSERT_TRUE(iter->Valid());
   ASSERT_OK(iter->status());
   ASSERT_EQ("bar", iter->key());
-  ASSERT_EQ(ToString(num_versions - 1), iter->value());
+  ASSERT_EQ(std::to_string(num_versions - 1), iter->value());
 
   delete iter;
 }
diff -pruN 7.2.2-5/db/db_iter_stress_test.cc 7.3.1-2/db/db_iter_stress_test.cc
--- 7.2.2-5/db/db_iter_stress_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_iter_stress_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -414,7 +414,7 @@ TEST_F(DBIteratorStressTest, StressTest)
       a /= 10;
       ++len;
     }
-    std::string s = ToString(rnd.Next() % static_cast<uint64_t>(max_key));
+    std::string s = std::to_string(rnd.Next() % static_cast<uint64_t>(max_key));
     s.insert(0, len - (int)s.size(), '0');
     return s;
   };
@@ -444,12 +444,13 @@ TEST_F(DBIteratorStressTest, StressTest)
           for (double mutation_probability : {0.01, 0.5}) {
             for (double target_hidden_fraction : {0.1, 0.5}) {
               std::string trace_str =
-                  "entries: " + ToString(num_entries) +
-                  ", key_space: " + ToString(key_space) +
-                  ", error_probability: " + ToString(error_probability) +
-                  ", mutation_probability: " + ToString(mutation_probability) +
+                  "entries: " + std::to_string(num_entries) +
+                  ", key_space: " + std::to_string(key_space) +
+                  ", error_probability: " + std::to_string(error_probability) +
+                  ", mutation_probability: " +
+                  std::to_string(mutation_probability) +
                   ", target_hidden_fraction: " +
-                  ToString(target_hidden_fraction);
+                  std::to_string(target_hidden_fraction);
               SCOPED_TRACE(trace_str);
               if (trace) {
                 std::cout << trace_str << std::endl;
@@ -470,7 +471,7 @@ TEST_F(DBIteratorStressTest, StressTest)
                       types[rnd.Next() % (sizeof(types) / sizeof(types[0]))];
                 }
                 e.sequence = i;
-                e.value = "v" + ToString(i);
+                e.value = "v" + std::to_string(i);
                 ParsedInternalKey internal_key(e.key, e.sequence, e.type);
                 AppendInternalKey(&e.ikey, internal_key);
 
diff -pruN 7.2.2-5/db/db_iter_test.cc 7.3.1-2/db/db_iter_test.cc
--- 7.2.2-5/db/db_iter_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_iter_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -766,7 +766,7 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       internal_iter->AddMerge("b", "merge_1");
       internal_iter->AddMerge("a", "merge_2");
       for (size_t k = 0; k < 200; ++k) {
-        internal_iter->AddPut("c", ToString(k));
+        internal_iter->AddPut("c", std::to_string(k));
       }
       internal_iter->Finish();
 
@@ -780,7 +780,7 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       ASSERT_TRUE(db_iter->Valid());
 
       ASSERT_EQ(db_iter->key().ToString(), "c");
-      ASSERT_EQ(db_iter->value().ToString(), ToString(i));
+      ASSERT_EQ(db_iter->value().ToString(), std::to_string(i));
       db_iter->Prev();
       ASSERT_TRUE(db_iter->Valid());
 
@@ -925,11 +925,11 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       internal_iter->AddMerge("b", "merge_1");
       internal_iter->AddMerge("a", "merge_2");
       for (size_t k = 0; k < 200; ++k) {
-        internal_iter->AddPut("d", ToString(k));
+        internal_iter->AddPut("d", std::to_string(k));
       }
 
       for (size_t k = 0; k < 200; ++k) {
-        internal_iter->AddPut("c", ToString(k));
+        internal_iter->AddPut("c", std::to_string(k));
       }
       internal_iter->Finish();
 
@@ -942,7 +942,7 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       ASSERT_TRUE(db_iter->Valid());
 
       ASSERT_EQ(db_iter->key().ToString(), "d");
-      ASSERT_EQ(db_iter->value().ToString(), ToString(i));
+      ASSERT_EQ(db_iter->value().ToString(), std::to_string(i));
       db_iter->Prev();
       ASSERT_TRUE(db_iter->Valid());
 
@@ -966,7 +966,7 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       internal_iter->AddMerge("b", "b");
       internal_iter->AddMerge("a", "a");
       for (size_t k = 0; k < 200; ++k) {
-        internal_iter->AddMerge("c", ToString(k));
+        internal_iter->AddMerge("c", std::to_string(k));
       }
       internal_iter->Finish();
 
@@ -981,7 +981,7 @@ TEST_F(DBIteratorTest, DBIteratorUseSkip
       ASSERT_EQ(db_iter->key().ToString(), "c");
       std::string merge_result = "0";
       for (size_t j = 1; j <= i; ++j) {
-        merge_result += "," + ToString(j);
+        merge_result += "," + std::to_string(j);
       }
       ASSERT_EQ(db_iter->value().ToString(), merge_result);
 
@@ -3156,7 +3156,7 @@ TEST_F(DBIteratorTest, ReverseToForwardW
   internal_iter->AddPut("a", "A");
   internal_iter->AddPut("b", "B");
   for (int i = 0; i < 100; ++i) {
-    internal_iter->AddPut("c" + ToString(i), "");
+    internal_iter->AddPut("c" + std::to_string(i), "");
   }
   internal_iter->Finish();
 
diff -pruN 7.2.2-5/db/db_kv_checksum_test.cc 7.3.1-2/db/db_kv_checksum_test.cc
--- 7.2.2-5/db/db_kv_checksum_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_kv_checksum_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -79,7 +79,7 @@ class DbKvChecksumTest
 
   void CorruptNextByteCallBack(void* arg) {
     Slice encoded = *static_cast<Slice*>(arg);
-    if (entry_len_ == port::kMaxSizet) {
+    if (entry_len_ == std::numeric_limits<size_t>::max()) {
       // We learn the entry size on the first attempt
       entry_len_ = encoded.size();
     }
@@ -96,7 +96,7 @@ class DbKvChecksumTest
   WriteBatchOpType op_type_;
   char corrupt_byte_addend_;
   size_t corrupt_byte_offset_ = 0;
-  size_t entry_len_ = port::kMaxSizet;
+  size_t entry_len_ = std::numeric_limits<size_t>::max();
 };
 
 std::string GetTestNameSuffix(
diff -pruN 7.2.2-5/db/db_log_iter_test.cc 7.3.1-2/db/db_log_iter_test.cc
--- 7.2.2-5/db/db_log_iter_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_log_iter_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -187,7 +187,7 @@ TEST_F(DBTestXactLogIterator, Transactio
     DestroyAndReopen(options);
 
     for (int i = 0; i < 1024; i++) {
-      ASSERT_OK(Put("key" + ToString(i), DummyString(10)));
+      ASSERT_OK(Put("key" + std::to_string(i), DummyString(10)));
     }
 
     ASSERT_OK(Flush());
@@ -263,20 +263,20 @@ TEST_F(DBTestXactLogIterator, Transactio
   struct Handler : public WriteBatch::Handler {
     std::string seen;
     Status PutCF(uint32_t cf, const Slice& key, const Slice& value) override {
-      seen += "Put(" + ToString(cf) + ", " + key.ToString() + ", " +
-              ToString(value.size()) + ")";
+      seen += "Put(" + std::to_string(cf) + ", " + key.ToString() + ", " +
+              std::to_string(value.size()) + ")";
       return Status::OK();
     }
     Status MergeCF(uint32_t cf, const Slice& key, const Slice& value) override {
-      seen += "Merge(" + ToString(cf) + ", " + key.ToString() + ", " +
-              ToString(value.size()) + ")";
+      seen += "Merge(" + std::to_string(cf) + ", " + key.ToString() + ", " +
+              std::to_string(value.size()) + ")";
       return Status::OK();
     }
     void LogData(const Slice& blob) override {
       seen += "LogData(" + blob.ToString() + ")";
     }
     Status DeleteCF(uint32_t cf, const Slice& key) override {
-      seen += "Delete(" + ToString(cf) + ", " + key.ToString() + ")";
+      seen += "Delete(" + std::to_string(cf) + ", " + key.ToString() + ")";
       return Status::OK();
     }
   } handler;
diff -pruN 7.2.2-5/db/db_memtable_test.cc 7.3.1-2/db/db_memtable_test.cc
--- 7.2.2-5/db/db_memtable_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_memtable_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -97,7 +97,7 @@ class MockMemTableRepFactory : public Me
 
  private:
   MockMemTableRep* mock_rep_;
-  // workaround since there's no port::kMaxUint32 yet.
+  // workaround since there's no std::numeric_limits<uint32_t>::max() yet.
   uint32_t last_column_family_id_ = static_cast<uint32_t>(-1);
 };
 
@@ -171,7 +171,7 @@ TEST_F(DBMemTableTest, DuplicateSeq) {
     if (!insert_dup) {
       seq++;
     }
-    Status s = mem->Add(seq, kTypeValue, "foo", "value" + ToString(seq),
+    Status s = mem->Add(seq, kTypeValue, "foo", "value" + std::to_string(seq),
                         nullptr /* kv_prot_info */);
     if (insert_dup) {
       ASSERT_TRUE(s.IsTryAgain());
diff -pruN 7.2.2-5/db/db_options_test.cc 7.3.1-2/db/db_options_test.cc
--- 7.2.2-5/db/db_options_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_options_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -95,6 +95,22 @@ TEST_F(DBOptionsTest, ImmutableTrackAndV
   ASSERT_FALSE(s.ok());
 }
 
+TEST_F(DBOptionsTest, ImmutableVerifySstUniqueIdInManifest) {
+  Options options;
+  options.env = env_;
+  options.verify_sst_unique_id_in_manifest = true;
+
+  ImmutableDBOptions db_options(options);
+  ASSERT_TRUE(db_options.verify_sst_unique_id_in_manifest);
+
+  Reopen(options);
+  ASSERT_TRUE(dbfull()->GetDBOptions().verify_sst_unique_id_in_manifest);
+
+  Status s =
+      dbfull()->SetDBOptions({{"verify_sst_unique_id_in_manifest", "false"}});
+  ASSERT_FALSE(s.ok());
+}
+
 // RocksDB lite don't support dynamic options.
 #ifndef ROCKSDB_LITE
 
@@ -424,8 +440,8 @@ TEST_F(DBOptionsTest, WritableFileMaxBuf
   ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->EnableProcessing();
   int i = 0;
   for (; i < 3; i++) {
-    ASSERT_OK(Put("foo", ToString(i)));
-    ASSERT_OK(Put("bar", ToString(i)));
+    ASSERT_OK(Put("foo", std::to_string(i)));
+    ASSERT_OK(Put("bar", std::to_string(i)));
     ASSERT_OK(Flush());
   }
   ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -442,8 +458,8 @@ TEST_F(DBOptionsTest, WritableFileMaxBuf
             dbfull()->GetDBOptions().writable_file_max_buffer_size);
   i = 0;
   for (; i < 3; i++) {
-    ASSERT_OK(Put("foo", ToString(i)));
-    ASSERT_OK(Put("bar", ToString(i)));
+    ASSERT_OK(Put("foo", std::to_string(i)));
+    ASSERT_OK(Put("bar", std::to_string(i)));
     ASSERT_OK(Flush());
   }
   ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -573,8 +589,8 @@ TEST_F(DBOptionsTest, SetOptionsMayTrigg
   Reopen(options);
   for (int i = 0; i < 3; i++) {
     // Need to insert two keys to avoid trivial move.
-    ASSERT_OK(Put("foo", ToString(i)));
-    ASSERT_OK(Put("bar", ToString(i)));
+    ASSERT_OK(Put("foo", std::to_string(i)));
+    ASSERT_OK(Put("bar", std::to_string(i)));
     ASSERT_OK(Flush());
   }
   ASSERT_EQ("3", FilesPerLevel());
@@ -717,8 +733,8 @@ TEST_F(DBOptionsTest, SetStatsDumpPeriod
 
   for (int i = 0; i < 20; i++) {
     unsigned int num = rand() % 5000 + 1;
-    ASSERT_OK(
-        dbfull()->SetDBOptions({{"stats_dump_period_sec", ToString(num)}}));
+    ASSERT_OK(dbfull()->SetDBOptions(
+        {{"stats_dump_period_sec", std::to_string(num)}}));
     ASSERT_EQ(num, dbfull()->GetDBOptions().stats_dump_period_sec);
   }
   Close();
@@ -909,7 +925,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionO
   for (int i = 0; i < 10; i++) {
     // Generate and flush a file about 10KB.
     for (int j = 0; j < 10; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
   }
@@ -940,7 +956,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionO
   for (int i = 0; i < 10; i++) {
     // Generate and flush a file about 10KB.
     for (int j = 0; j < 10; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
   }
@@ -972,7 +988,7 @@ TEST_F(DBOptionsTest, SetFIFOCompactionO
   for (int i = 0; i < 10; i++) {
     // Generate and flush a file about 10KB.
     for (int j = 0; j < 10; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
   }
@@ -1036,7 +1052,7 @@ TEST_F(DBOptionsTest, FIFOTtlBackwardCom
   for (int i = 0; i < 10; i++) {
     // Generate and flush a file about 10KB.
     for (int j = 0; j < 10; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
   }
diff -pruN 7.2.2-5/db/db_properties_test.cc 7.3.1-2/db/db_properties_test.cc
--- 7.2.2-5/db/db_properties_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_properties_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -593,9 +593,9 @@ TEST_F(DBPropertiesTest, AggregatedTable
     ASSERT_OK(db_->CompactRange(CompactRangeOptions(), nullptr, nullptr));
     ResetTableProperties(&sum_tp);
     for (int level = 0; level < kMaxLevel; ++level) {
-      db_->GetProperty(
-          DB::Properties::kAggregatedTablePropertiesAtLevel + ToString(level),
-          &level_tp_strings[level]);
+      db_->GetProperty(DB::Properties::kAggregatedTablePropertiesAtLevel +
+                           std::to_string(level),
+                       &level_tp_strings[level]);
       ParseTablePropertiesString(level_tp_strings[level], &level_tps[level]);
       sum_tp.data_size += level_tps[level].data_size;
       sum_tp.index_size += level_tps[level].index_size;
@@ -1091,7 +1091,7 @@ TEST_F(DBPropertiesTest, EstimateCompres
     for (int j = 0; j < kNumEntriesPerFile; ++j) {
       // Put common data ("key") at end to prevent delta encoding from
       // compressing the key effectively
-      std::string key = ToString(i) + ToString(j) + "key";
+      std::string key = std::to_string(i) + std::to_string(j) + "key";
       ASSERT_OK(dbfull()->Put(WriteOptions(), key, kVal));
     }
     ASSERT_OK(Flush());
@@ -1185,7 +1185,7 @@ class CountingDeleteTabPropCollector : p
 
   Status Finish(UserCollectedProperties* properties) override {
     *properties =
-        UserCollectedProperties{{"num_delete", ToString(num_deletes_)}};
+        UserCollectedProperties{{"num_delete", std::to_string(num_deletes_)}};
     return Status::OK();
   }
 
@@ -1215,7 +1215,7 @@ class BlockCountingTablePropertiesCollec
 
   Status Finish(UserCollectedProperties* properties) override {
     (*properties)[kNumSampledBlocksPropertyName] =
-        ToString(num_sampled_blocks_);
+        std::to_string(num_sampled_blocks_);
     return Status::OK();
   }
 
@@ -1235,7 +1235,7 @@ class BlockCountingTablePropertiesCollec
 
   UserCollectedProperties GetReadableProperties() const override {
     return UserCollectedProperties{
-        {kNumSampledBlocksPropertyName, ToString(num_sampled_blocks_)},
+        {kNumSampledBlocksPropertyName, std::to_string(num_sampled_blocks_)},
     };
   }
 
@@ -1272,7 +1272,8 @@ TEST_F(DBPropertiesTest, GetUserDefinedT
   // Create 4 tables
   for (int table = 0; table < 4; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(db_->Put(WriteOptions(), ToString(table * 100 + i), "val"));
+      ASSERT_OK(
+          db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(db_->Flush(FlushOptions()));
   }
@@ -1312,7 +1313,7 @@ TEST_F(DBPropertiesTest, UserDefinedTabl
   // Create 2 files
   for (int table = 0; table < 2; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(1, ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(1, std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush(1));
   }
@@ -1322,7 +1323,7 @@ TEST_F(DBPropertiesTest, UserDefinedTabl
   // Trigger automatic compactions.
   for (int table = 0; table < 3; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(1, ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(1, std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush(1));
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -1339,7 +1340,7 @@ TEST_F(DBPropertiesTest, UserDefinedTabl
   // Create 4 tables in default column family
   for (int table = 0; table < 2; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush());
   }
@@ -1349,7 +1350,7 @@ TEST_F(DBPropertiesTest, UserDefinedTabl
   // Trigger automatic compactions.
   for (int table = 0; table < 3; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(Flush());
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -1545,7 +1546,7 @@ TEST_F(DBPropertiesTest, BlockAddForComp
                   user_props.end());
       ASSERT_EQ(user_props.at(BlockCountingTablePropertiesCollector::
                                   kNumSampledBlocksPropertyName),
-                ToString(sample_for_compression ? 1 : 0));
+                std::to_string(sample_for_compression ? 1 : 0));
     }
   }
 }
@@ -1742,11 +1743,11 @@ TEST_F(DBPropertiesTest, SstFilesSize) {
   Reopen(options);
 
   for (int i = 0; i < 10; i++) {
-    ASSERT_OK(Put("key" + ToString(i), std::string(1000, 'v')));
+    ASSERT_OK(Put("key" + std::to_string(i), std::string(1000, 'v')));
   }
   ASSERT_OK(Flush());
   for (int i = 0; i < 5; i++) {
-    ASSERT_OK(Delete("key" + ToString(i)));
+    ASSERT_OK(Delete("key" + std::to_string(i)));
   }
   ASSERT_OK(Flush());
   uint64_t sst_size;
diff -pruN 7.2.2-5/db/db_range_del_test.cc 7.3.1-2/db/db_range_del_test.cc
--- 7.2.2-5/db/db_range_del_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_range_del_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -190,9 +190,10 @@ TEST_F(DBRangeDelTest, MaxCompactionByte
   ASSERT_EQ(0, NumTableFilesAtLevel(0));
   ASSERT_EQ(NumTableFilesAtLevel(2), 2);
 
-  ASSERT_OK(db_->SetOptions(
-      db_->DefaultColumnFamily(),
-      {{"target_file_size_base", ToString(100 * opts.max_compaction_bytes)}}));
+  ASSERT_OK(
+      db_->SetOptions(db_->DefaultColumnFamily(),
+                      {{"target_file_size_base",
+                        std::to_string(100 * opts.max_compaction_bytes)}}));
 
   // It spans the whole key-range, thus will be included in all output files
   ASSERT_OK(db_->DeleteRange(WriteOptions(), db_->DefaultColumnFamily(),
@@ -500,7 +501,8 @@ TEST_F(DBRangeDelTest, ValidUniversalSub
       1 /* input_level */, 2 /* output_level */, CompactRangeOptions(),
       nullptr /* begin */, nullptr /* end */, true /* exclusive */,
       true /* disallow_trivial_move */,
-      port::kMaxUint64 /* max_file_num_to_ignore */, "" /*trim_ts*/));
+      std::numeric_limits<uint64_t>::max() /* max_file_num_to_ignore */,
+      "" /*trim_ts*/));
 }
 #endif  // ROCKSDB_LITE
 
diff -pruN 7.2.2-5/db/db_readonly_with_timestamp_test.cc 7.3.1-2/db/db_readonly_with_timestamp_test.cc
--- 7.2.2-5/db/db_readonly_with_timestamp_test.cc	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/db/db_readonly_with_timestamp_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,331 @@
+//  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_with_timestamp_test_util.h"
+#include "test_util/testutil.h"
+
+namespace ROCKSDB_NAMESPACE {
+class DBReadOnlyTestWithTimestamp : public DBBasicTestWithTimestampBase {
+ public:
+  DBReadOnlyTestWithTimestamp()
+      : DBBasicTestWithTimestampBase("db_readonly_test_with_timestamp") {}
+};
+
+#ifndef ROCKSDB_LITE
+TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGetReadTimestampSizeMismatch) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  const size_t kTimestampSize = Timestamp(0, 0).size();
+  TestComparator test_cmp(kTimestampSize);
+  options.comparator = &test_cmp;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  const std::string write_timestamp = Timestamp(1, 0);
+  WriteOptions write_opts;
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    Status s = db_->Put(write_opts, Key1(key), write_timestamp,
+                        "value" + std::to_string(key));
+    ASSERT_OK(s);
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  ReadOptions read_opts;
+  std::string different_size_read_timestamp;
+  PutFixed32(&different_size_read_timestamp, 2);
+  Slice different_size_read_ts = different_size_read_timestamp;
+  read_opts.timestamp = &different_size_read_ts;
+  {
+    std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
+    ASSERT_FALSE(iter->Valid());
+    ASSERT_TRUE(iter->status().IsInvalidArgument());
+  }
+
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    std::string value_from_get;
+    std::string timestamp;
+    ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
+                    .IsInvalidArgument());
+  }
+
+  Close();
+}
+
+TEST_F(DBReadOnlyTestWithTimestamp,
+       IteratorAndGetReadTimestampSpecifiedWithoutWriteTimestamp) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  WriteOptions write_opts;
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key));
+    ASSERT_OK(s);
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  ReadOptions read_opts;
+  const std::string read_timestamp = Timestamp(2, 0);
+  Slice read_ts = read_timestamp;
+  read_opts.timestamp = &read_ts;
+  {
+    std::unique_ptr<Iterator> iter(db_->NewIterator(read_opts));
+    ASSERT_FALSE(iter->Valid());
+    ASSERT_TRUE(iter->status().IsInvalidArgument());
+  }
+
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    std::string value_from_get;
+    std::string timestamp;
+    ASSERT_TRUE(db_->Get(read_opts, Key1(key), &value_from_get, &timestamp)
+                    .IsInvalidArgument());
+  }
+
+  Close();
+}
+
+TEST_F(DBReadOnlyTestWithTimestamp, IteratorAndGet) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  const size_t kTimestampSize = Timestamp(0, 0).size();
+  TestComparator test_cmp(kTimestampSize);
+  options.comparator = &test_cmp;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  const std::vector<uint64_t> start_keys = {1, 0};
+  const std::vector<std::string> write_timestamps = {Timestamp(1, 0),
+                                                     Timestamp(3, 0)};
+  const std::vector<std::string> read_timestamps = {Timestamp(2, 0),
+                                                    Timestamp(4, 0)};
+  for (size_t i = 0; i < write_timestamps.size(); ++i) {
+    WriteOptions write_opts;
+    for (uint64_t key = start_keys[i]; key <= kMaxKey; ++key) {
+      Status s = db_->Put(write_opts, Key1(key), write_timestamps[i],
+                          "value" + std::to_string(i));
+      ASSERT_OK(s);
+    }
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+
+  auto get_value_and_check = [](DB* db, ReadOptions read_opts, Slice key,
+                                Slice expected_value, std::string expected_ts) {
+    std::string value_from_get;
+    std::string timestamp;
+    ASSERT_OK(db->Get(read_opts, key.ToString(), &value_from_get, &timestamp));
+    ASSERT_EQ(expected_value, value_from_get);
+    ASSERT_EQ(expected_ts, timestamp);
+  };
+  for (size_t i = 0; i < read_timestamps.size(); ++i) {
+    ReadOptions read_opts;
+    Slice read_ts = read_timestamps[i];
+    read_opts.timestamp = &read_ts;
+    std::unique_ptr<Iterator> it(db_->NewIterator(read_opts));
+    int count = 0;
+    uint64_t key = 0;
+    // Forward iterate.
+    for (it->Seek(Key1(0)), key = start_keys[i]; it->Valid();
+         it->Next(), ++count, ++key) {
+      CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
+                         "value" + std::to_string(i), write_timestamps[i]);
+      get_value_and_check(db_, read_opts, it->key(), it->value(),
+                          write_timestamps[i]);
+    }
+    size_t expected_count = kMaxKey - start_keys[i] + 1;
+    ASSERT_EQ(expected_count, count);
+
+    // Backward iterate.
+    count = 0;
+    for (it->SeekForPrev(Key1(kMaxKey)), key = kMaxKey; it->Valid();
+         it->Prev(), ++count, --key) {
+      CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
+                         "value" + std::to_string(i), write_timestamps[i]);
+      get_value_and_check(db_, read_opts, it->key(), it->value(),
+                          write_timestamps[i]);
+    }
+    ASSERT_EQ(static_cast<size_t>(kMaxKey) - start_keys[i] + 1, count);
+
+    // SeekToFirst()/SeekToLast() with lower/upper bounds.
+    // Then iter with lower and upper bounds.
+    uint64_t l = 0;
+    uint64_t r = kMaxKey + 1;
+    while (l < r) {
+      std::string lb_str = Key1(l);
+      Slice lb = lb_str;
+      std::string ub_str = Key1(r);
+      Slice ub = ub_str;
+      read_opts.iterate_lower_bound = &lb;
+      read_opts.iterate_upper_bound = &ub;
+      it.reset(db_->NewIterator(read_opts));
+      for (it->SeekToFirst(), key = std::max(l, start_keys[i]), count = 0;
+           it->Valid(); it->Next(), ++key, ++count) {
+        CheckIterUserEntry(it.get(), Key1(key), kTypeValue,
+                           "value" + std::to_string(i), write_timestamps[i]);
+        get_value_and_check(db_, read_opts, it->key(), it->value(),
+                            write_timestamps[i]);
+      }
+      ASSERT_EQ(r - std::max(l, start_keys[i]), count);
+
+      for (it->SeekToLast(), key = std::min(r, kMaxKey + 1), count = 0;
+           it->Valid(); it->Prev(), --key, ++count) {
+        CheckIterUserEntry(it.get(), Key1(key - 1), kTypeValue,
+                           "value" + std::to_string(i), write_timestamps[i]);
+        get_value_and_check(db_, read_opts, it->key(), it->value(),
+                            write_timestamps[i]);
+      }
+      l += (kMaxKey / 100);
+      r -= (kMaxKey / 100);
+    }
+  }
+  Close();
+}
+
+TEST_F(DBReadOnlyTestWithTimestamp, Iterators) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  const size_t kTimestampSize = Timestamp(0, 0).size();
+  TestComparator test_cmp(kTimestampSize);
+  options.comparator = &test_cmp;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  const std::string write_timestamp = Timestamp(1, 0);
+  const std::string read_timestamp = Timestamp(2, 0);
+  WriteOptions write_opts;
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    Status s = db_->Put(write_opts, Key1(key), write_timestamp,
+                        "value" + std::to_string(key));
+    ASSERT_OK(s);
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  ReadOptions read_opts;
+  Slice read_ts = read_timestamp;
+  read_opts.timestamp = &read_ts;
+  std::vector<Iterator*> iters;
+  ASSERT_OK(db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters));
+  ASSERT_EQ(static_cast<uint64_t>(1), iters.size());
+
+  int count = 0;
+  uint64_t key = 0;
+  // Forward iterate.
+  for (iters[0]->Seek(Key1(0)), key = 0; iters[0]->Valid();
+       iters[0]->Next(), ++count, ++key) {
+    CheckIterUserEntry(iters[0], Key1(key), kTypeValue,
+                       "value" + std::to_string(key), write_timestamp);
+  }
+
+  size_t expected_count = kMaxKey - 0 + 1;
+  ASSERT_EQ(expected_count, count);
+  delete iters[0];
+
+  Close();
+}
+
+TEST_F(DBReadOnlyTestWithTimestamp, IteratorsReadTimestampSizeMismatch) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  const size_t kTimestampSize = Timestamp(0, 0).size();
+  TestComparator test_cmp(kTimestampSize);
+  options.comparator = &test_cmp;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  const std::string write_timestamp = Timestamp(1, 0);
+  WriteOptions write_opts;
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    Status s = db_->Put(write_opts, Key1(key), write_timestamp,
+                        "value" + std::to_string(key));
+    ASSERT_OK(s);
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  ReadOptions read_opts;
+  std::string different_size_read_timestamp;
+  PutFixed32(&different_size_read_timestamp, 2);
+  Slice different_size_read_ts = different_size_read_timestamp;
+  read_opts.timestamp = &different_size_read_ts;
+  {
+    std::vector<Iterator*> iters;
+    ASSERT_TRUE(
+        db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)
+            .IsInvalidArgument());
+  }
+
+  Close();
+}
+
+TEST_F(DBReadOnlyTestWithTimestamp,
+       IteratorsReadTimestampSpecifiedWithoutWriteTimestamp) {
+  const int kNumKeysPerFile = 128;
+  const uint64_t kMaxKey = 1024;
+  Options options = CurrentOptions();
+  options.env = env_;
+  options.create_if_missing = true;
+  options.memtable_factory.reset(
+      test::NewSpecialSkipListFactory(kNumKeysPerFile));
+  DestroyAndReopen(options);
+  WriteOptions write_opts;
+  for (uint64_t key = 0; key <= kMaxKey; ++key) {
+    Status s = db_->Put(write_opts, Key1(key), "value" + std::to_string(key));
+    ASSERT_OK(s);
+  }
+
+  // Reopen the database in read only mode to test its timestamp support.
+  Close();
+  ASSERT_OK(ReadOnlyReopen(options));
+  ReadOptions read_opts;
+  const std::string read_timestamp = Timestamp(2, 0);
+  Slice read_ts = read_timestamp;
+  read_opts.timestamp = &read_ts;
+  {
+    std::vector<Iterator*> iters;
+    ASSERT_TRUE(
+        db_->NewIterators(read_opts, {db_->DefaultColumnFamily()}, &iters)
+            .IsInvalidArgument());
+  }
+
+  Close();
+}
+#endif  // !ROCKSDB_LITE
+}  // namespace ROCKSDB_NAMESPACE
+
+int main(int argc, char** argv) {
+  ROCKSDB_NAMESPACE::port::InstallStackTraceHandler();
+  ::testing::InitGoogleTest(&argc, argv);
+  RegisterCustomObjects(argc, argv);
+  return RUN_ALL_TESTS();
+}
diff -pruN 7.2.2-5/db/db_secondary_test.cc 7.3.1-2/db/db_secondary_test.cc
--- 7.2.2-5/db/db_secondary_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_secondary_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -181,6 +181,7 @@ TEST_F(DBSecondaryTest, SimpleInternalCo
   ASSERT_EQ(input.input_files.size(), 3);
 
   input.output_level = 1;
+  ASSERT_OK(db_->GetDbIdentity(input.db_id));
   Close();
 
   options.max_open_files = -1;
@@ -212,20 +213,20 @@ TEST_F(DBSecondaryTest, InternalCompacti
   const int kRangeL2 = 10;
   const int kRangeL1 = 30;
   for (int i = 0; i < 10; i++) {
-    ASSERT_OK(Put(Key(i * kRangeL2), "value" + ToString(i)));
-    ASSERT_OK(Put(Key((i + 1) * kRangeL2 - 1), "value" + ToString(i)));
+    ASSERT_OK(Put(Key(i * kRangeL2), "value" + std::to_string(i)));
+    ASSERT_OK(Put(Key((i + 1) * kRangeL2 - 1), "value" + std::to_string(i)));
     ASSERT_OK(Flush());
   }
   MoveFilesToLevel(2);
   for (int i = 0; i < 5; i++) {
-    ASSERT_OK(Put(Key(i * kRangeL1), "value" + ToString(i)));
-    ASSERT_OK(Put(Key((i + 1) * kRangeL1 - 1), "value" + ToString(i)));
+    ASSERT_OK(Put(Key(i * kRangeL1), "value" + std::to_string(i)));
+    ASSERT_OK(Put(Key((i + 1) * kRangeL1 - 1), "value" + std::to_string(i)));
     ASSERT_OK(Flush());
   }
   MoveFilesToLevel(1);
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK(Put(Key(i * 30), "value" + ToString(i)));
-    ASSERT_OK(Put(Key(i * 30 + 50), "value" + ToString(i)));
+    ASSERT_OK(Put(Key(i * 30), "value" + std::to_string(i)));
+    ASSERT_OK(Put(Key(i * 30 + 50), "value" + std::to_string(i)));
     ASSERT_OK(Flush());
   }
 
@@ -241,6 +242,7 @@ TEST_F(DBSecondaryTest, InternalCompacti
   input1.input_files.push_back(meta.levels[1].files[2].name);
 
   input1.output_level = 1;
+  ASSERT_OK(db_->GetDbIdentity(input1.db_id));
 
   options.max_open_files = -1;
   Close();
@@ -261,6 +263,7 @@ TEST_F(DBSecondaryTest, InternalCompacti
   }
 
   input2.output_level = 2;
+  input2.db_id = input1.db_id;
   ASSERT_OK(db_secondary_full()->TEST_CompactWithoutInstallation(
       OpenAndCompactOptions(), cfh, input2, &result));
   ASSERT_OK(result.status);
@@ -305,6 +308,7 @@ TEST_F(DBSecondaryTest, InternalCompacti
   ASSERT_EQ(input.input_files.size(), 3);
 
   input.output_level = 1;
+  ASSERT_OK(db_->GetDbIdentity(input.db_id));
 
   // trigger compaction to delete the files for secondary instance compaction
   ASSERT_OK(Put("foo", "foo_value" + std::to_string(3)));
@@ -346,6 +350,7 @@ TEST_F(DBSecondaryTest, InternalCompacti
   ASSERT_EQ(input.input_files.size(), 3);
 
   input.output_level = 1;
+  ASSERT_OK(db_->GetDbIdentity(input.db_id));
 
   Close();
 
diff -pruN 7.2.2-5/db/db_sst_test.cc 7.3.1-2/db/db_sst_test.cc
--- 7.2.2-5/db/db_sst_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_sst_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -280,6 +280,58 @@ TEST_F(DBSSTTest, DeleteObsoleteFilesPen
   listener->VerifyMatchedCount(1);
 }
 
+// Test that producing an empty .sst file does not write it out to
+// disk, and that the DeleteFile() env method is not called for
+// removing the non-existing file later.
+TEST_F(DBSSTTest, DeleteFileNotCalledForNotCreatedSSTFile) {
+  Options options = CurrentOptions();
+  options.env = env_;
+
+  OnFileDeletionListener* listener = new OnFileDeletionListener();
+  options.listeners.emplace_back(listener);
+
+  Reopen(options);
+
+  // Flush the empty database.
+  ASSERT_OK(Flush());
+  ASSERT_EQ("", FilesPerLevel(0));
+
+  // We expect no .sst files.
+  std::vector<LiveFileMetaData> metadata;
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(metadata.size(), 0U);
+
+  // We expect no file deletions.
+  listener->VerifyMatchedCount(0);
+}
+
+// Test that producing a non-empty .sst file does write it out to
+// disk, and that the DeleteFile() env method is not called for removing
+// the file later.
+TEST_F(DBSSTTest, DeleteFileNotCalledForCreatedSSTFile) {
+  Options options = CurrentOptions();
+  options.env = env_;
+
+  OnFileDeletionListener* listener = new OnFileDeletionListener();
+  options.listeners.emplace_back(listener);
+
+  Reopen(options);
+
+  ASSERT_OK(Put("pika", "choo"));
+
+  // Flush the non-empty database.
+  ASSERT_OK(Flush());
+  ASSERT_EQ("1", FilesPerLevel(0));
+
+  // We expect 1 .sst files.
+  std::vector<LiveFileMetaData> metadata;
+  db_->GetLiveFilesMetaData(&metadata);
+  ASSERT_EQ(metadata.size(), 1U);
+
+  // We expect no file deletions.
+  listener->VerifyMatchedCount(0);
+}
+
 TEST_F(DBSSTTest, DBWithSstFileManager) {
   std::shared_ptr<SstFileManager> sst_file_manager(NewSstFileManager(env_));
   auto sfm = static_cast<SstFileManagerImpl*>(sst_file_manager.get());
@@ -947,7 +999,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipl
 
   // Create 4 files in L0
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A'), wo));
+    ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A'), wo));
     ASSERT_OK(Flush());
   }
   // We created 4 sst files in L0
@@ -963,7 +1015,7 @@ TEST_F(DBSSTTest, DeleteSchedulerMultipl
 
   // Create 4 files in L0
   for (int i = 4; i < 8; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'B'), wo));
+    ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'B'), wo));
     ASSERT_OK(Flush());
   }
   ASSERT_EQ("4,1", FilesPerLevel(0));
@@ -1009,7 +1061,7 @@ TEST_F(DBSSTTest, DestroyDBWithRateLimit
 
   // Create 4 files in L0
   for (int i = 0; i < 4; i++) {
-    ASSERT_OK(Put("Key" + ToString(i), DummyString(1024, 'A')));
+    ASSERT_OK(Put("Key" + std::to_string(i), DummyString(1024, 'A')));
     ASSERT_OK(Flush());
   }
   // We created 4 sst files in L0
@@ -1396,7 +1448,9 @@ TEST_F(DBSSTTest, OpenDBWithInfiniteMaxO
 }
 
 TEST_F(DBSSTTest, OpenDBWithInfiniteMaxOpenFilesSubjectToMemoryLimit) {
-  for (bool reserve_table_builder_memory : {true, false}) {
+  for (CacheEntryRoleOptions::Decision charge_table_reader :
+       {CacheEntryRoleOptions::Decision::kEnabled,
+        CacheEntryRoleOptions::Decision::kDisabled}) {
     // Open DB with infinite max open files
     //  - First iteration use 1 thread to open files
     //  - Second iteration use 5 threads to open files
@@ -1436,7 +1490,9 @@ TEST_F(DBSSTTest, OpenDBWithInfiniteMaxO
       }
       Close();
 
-      table_options.reserve_table_reader_memory = reserve_table_builder_memory;
+      table_options.cache_usage_options.options_overrides.insert(
+          {CacheEntryRole::kBlockBasedTableReader,
+           {/*.charged = */ charge_table_reader}});
       table_options.block_cache =
           NewLRUCache(1024 /* capacity */, 0 /* num_shard_bits */,
                       true /* strict_capacity_limit */);
@@ -1445,8 +1501,13 @@ TEST_F(DBSSTTest, OpenDBWithInfiniteMaxO
       // Reopening the DB will try to load all existing files, conditionally
       // subject to memory limit
       Status s = TryReopen(options);
-      if (table_options.reserve_table_reader_memory) {
+
+      if (charge_table_reader == CacheEntryRoleOptions::Decision::kEnabled) {
         EXPECT_TRUE(s.IsMemoryLimit());
+        EXPECT_TRUE(s.ToString().find(
+                        kCacheEntryRoleToCamelString[static_cast<std::uint32_t>(
+                            CacheEntryRole::kBlockBasedTableReader)]) !=
+                    std::string::npos);
         EXPECT_TRUE(s.ToString().find("memory limit based on cache capacity") !=
                     std::string::npos);
 
@@ -1478,7 +1539,7 @@ TEST_F(DBSSTTest, GetTotalSstFilesSize)
   // Generate 5 files in L0
   for (int i = 0; i < 5; i++) {
     for (int j = 0; j < 10; j++) {
-      std::string val = "val_file_" + ToString(i);
+      std::string val = "val_file_" + std::to_string(i);
       ASSERT_OK(Put(Key(j), val));
     }
     ASSERT_OK(Flush());
diff -pruN 7.2.2-5/db/db_table_properties_test.cc 7.3.1-2/db/db_table_properties_test.cc
--- 7.2.2-5/db/db_table_properties_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_table_properties_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -84,7 +84,8 @@ TEST_F(DBTablePropertiesTest, GetPropert
     }
     // Build file
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(db_->Put(WriteOptions(), ToString(table * 100 + i), "val"));
+      ASSERT_OK(
+          db_->Put(WriteOptions(), std::to_string(table * 100 + i), "val"));
     }
     ASSERT_OK(db_->Flush(FlushOptions()));
   }
@@ -113,7 +114,7 @@ TEST_F(DBTablePropertiesTest, GetPropert
   // fetch key from 1st and 2nd table, which will internally place that table to
   // the table cache.
   for (int i = 0; i < 2; ++i) {
-    Get(ToString(i * 100 + 0));
+    Get(std::to_string(i * 100 + 0));
   }
 
   VerifyTableProperties(db_, 10 + 11 + 12 + 13);
@@ -122,7 +123,7 @@ TEST_F(DBTablePropertiesTest, GetPropert
   Reopen(options);
   // fetch key from all tables, which will place them in table cache.
   for (int i = 0; i < 4; ++i) {
-    Get(ToString(i * 100 + 0));
+    Get(std::to_string(i * 100 + 0));
   }
   VerifyTableProperties(db_, 10 + 11 + 12 + 13);
 
@@ -156,7 +157,7 @@ TEST_F(DBTablePropertiesTest, GetPropert
     } else {
       bool found_corruption = false;
       for (int i = 0; i < 4; ++i) {
-        std::string result = Get(ToString(i * 100 + 0));
+        std::string result = Get(std::to_string(i * 100 + 0));
         if (result.find_first_of("Corruption: block checksum mismatch") !=
             std::string::npos) {
           found_corruption = true;
@@ -187,7 +188,7 @@ TEST_F(DBTablePropertiesTest, InvalidIgn
 
   // Build file
   for (int i = 0; i < 10; ++i) {
-    ASSERT_OK(db_->Put(WriteOptions(), ToString(i), "val"));
+    ASSERT_OK(db_->Put(WriteOptions(), std::to_string(i), "val"));
   }
   ASSERT_OK(db_->Flush(FlushOptions()));
 
diff -pruN 7.2.2-5/db/db_test2.cc 7.3.1-2/db/db_test2.cc
--- 7.2.2-5/db/db_test2.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_test2.cc	2022-06-08 21:08:16.000000000 +0000
@@ -38,10 +38,10 @@ class DBTest2 : public DBTestBase {
 #ifndef ROCKSDB_LITE
   uint64_t GetSstSizeHelper(Temperature temperature) {
     std::string prop;
-    EXPECT_TRUE(
-        dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
-                                  ToString(static_cast<uint8_t>(temperature)),
-                              &prop));
+    EXPECT_TRUE(dbfull()->GetProperty(
+        DB::Properties::kLiveSstFilesSizeAtTemperature +
+            std::to_string(static_cast<uint8_t>(temperature)),
+        &prop));
     return static_cast<uint64_t>(std::atoi(prop.c_str()));
   }
 #endif  // ROCKSDB_LITE
@@ -1296,6 +1296,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
   enum DictionaryTypes : int {
     kWithoutDict,
     kWithDict,
+    kWithZSTDfinalizeDict,
     kWithZSTDTrainedDict,
     kDictEnd,
   };
@@ -1304,6 +1305,7 @@ TEST_F(DBTest2, PresetCompressionDict) {
     options.compression = compression_type;
     size_t bytes_without_dict = 0;
     size_t bytes_with_dict = 0;
+    size_t bytes_with_zstd_finalize_dict = 0;
     size_t bytes_with_zstd_trained_dict = 0;
     for (int i = kWithoutDict; i < kDictEnd; i++) {
       // First iteration: compress without preset dictionary
@@ -1323,12 +1325,21 @@ TEST_F(DBTest2, PresetCompressionDict) {
           options.compression_opts.max_dict_bytes = kBlockSizeBytes;
           options.compression_opts.zstd_max_train_bytes = 0;
           break;
+        case kWithZSTDfinalizeDict:
+          if (compression_type != kZSTD) {
+            continue;
+          }
+          options.compression_opts.max_dict_bytes = kBlockSizeBytes;
+          options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
+          options.compression_opts.use_zstd_dict_trainer = false;
+          break;
         case kWithZSTDTrainedDict:
           if (compression_type != kZSTD) {
             continue;
           }
           options.compression_opts.max_dict_bytes = kBlockSizeBytes;
           options.compression_opts.zstd_max_train_bytes = kL0FileBytes;
+          options.compression_opts.use_zstd_dict_trainer = true;
           break;
         default:
           assert(false);
@@ -1365,6 +1376,8 @@ TEST_F(DBTest2, PresetCompressionDict) {
         bytes_without_dict = total_sst_bytes;
       } else if (i == kWithDict) {
         bytes_with_dict = total_sst_bytes;
+      } else if (i == kWithZSTDfinalizeDict) {
+        bytes_with_zstd_finalize_dict = total_sst_bytes;
       } else if (i == kWithZSTDTrainedDict) {
         bytes_with_zstd_trained_dict = total_sst_bytes;
       }
@@ -1376,6 +1389,13 @@ TEST_F(DBTest2, PresetCompressionDict) {
       if (i == kWithDict) {
         ASSERT_GT(bytes_without_dict, bytes_with_dict);
       } else if (i == kWithZSTDTrainedDict) {
+        // In zstd compression, it is sometimes possible that using a finalized
+        // dictionary does not get as good a compression ratio as raw content
+        // dictionary. But using a dictionary should always get better
+        // compression ratio than not using one.
+        ASSERT_TRUE(bytes_with_dict > bytes_with_zstd_finalize_dict ||
+                    bytes_without_dict > bytes_with_zstd_finalize_dict);
+      } else if (i == kWithZSTDTrainedDict) {
         // In zstd compression, it is sometimes possible that using a trained
         // dictionary does not get as good a compression ratio as without
         // training.
@@ -1694,9 +1714,9 @@ class CompactionCompressionListener : pu
     int bottommost_level = 0;
     for (int level = 0; level < db->NumberLevels(); level++) {
       std::string files_at_level;
-      ASSERT_TRUE(db->GetProperty(
-          "rocksdb.num-files-at-level" + ROCKSDB_NAMESPACE::ToString(level),
-          &files_at_level));
+      ASSERT_TRUE(
+          db->GetProperty("rocksdb.num-files-at-level" + std::to_string(level),
+                          &files_at_level));
       if (files_at_level != "0") {
         bottommost_level = level;
       }
@@ -2492,14 +2512,14 @@ TEST_F(DBTest2, TestPerfContextIterCpuTi
 
   const size_t kNumEntries = 10;
   for (size_t i = 0; i < kNumEntries; ++i) {
-    ASSERT_OK(Put("k" + ToString(i), "v" + ToString(i)));
+    ASSERT_OK(Put("k" + std::to_string(i), "v" + std::to_string(i)));
   }
   ASSERT_OK(Flush());
   for (size_t i = 0; i < kNumEntries; ++i) {
-    ASSERT_EQ("v" + ToString(i), Get("k" + ToString(i)));
+    ASSERT_EQ("v" + std::to_string(i), Get("k" + std::to_string(i)));
   }
-  std::string last_key = "k" + ToString(kNumEntries - 1);
-  std::string last_value = "v" + ToString(kNumEntries - 1);
+  std::string last_key = "k" + std::to_string(kNumEntries - 1);
+  std::string last_value = "v" + std::to_string(kNumEntries - 1);
   env_->now_cpu_count_.store(0);
   env_->SetMockSleep();
 
@@ -3987,12 +4007,14 @@ TEST_F(DBTest2, RateLimitedCompactionRea
 
       // should be slightly above 512KB due to non-data blocks read. Arbitrarily
       // chose 1MB as the upper bound on the total bytes read.
-      size_t rate_limited_bytes =
-          options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL);
-      // There must be no charges at non-`IO_LOW` priorities.
+      size_t rate_limited_bytes = static_cast<size_t>(
+          options.rate_limiter->GetTotalBytesThrough(Env::IO_TOTAL));
+      // The charges can exist for `IO_LOW` and `IO_USER` priorities.
+      size_t rate_limited_bytes_by_pri =
+          options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
+          options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
       ASSERT_EQ(rate_limited_bytes,
-                static_cast<size_t>(
-                    options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW)));
+                static_cast<size_t>(rate_limited_bytes_by_pri));
       // Include the explicit prefetch of the footer in direct I/O case.
       size_t direct_io_extra = use_direct_io ? 512 * 1024 : 0;
       ASSERT_GE(
@@ -4010,9 +4032,11 @@ TEST_F(DBTest2, RateLimitedCompactionRea
       }
       delete iter;
       // bytes read for user iterator shouldn't count against the rate limit.
+      rate_limited_bytes_by_pri =
+          options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW) +
+          options.rate_limiter->GetTotalBytesThrough(Env::IO_USER);
       ASSERT_EQ(rate_limited_bytes,
-                static_cast<size_t>(
-                    options.rate_limiter->GetTotalBytesThrough(Env::IO_LOW)));
+                static_cast<size_t>(rate_limited_bytes_by_pri));
     }
   }
 }
@@ -5553,7 +5577,7 @@ TEST_F(DBTest2, MultiDBParallelOpenTest)
   Options options = CurrentOptions();
   std::vector<std::string> dbnames;
   for (int i = 0; i < kNumDbs; ++i) {
-    dbnames.emplace_back(test::PerThreadDBPath(env_, "db" + ToString(i)));
+    dbnames.emplace_back(test::PerThreadDBPath(env_, "db" + std::to_string(i)));
     ASSERT_OK(DestroyDB(dbnames.back(), options));
   }
 
@@ -6305,115 +6329,118 @@ TEST_F(DBTest2, BlockBasedTablePrefixGet
 
 #ifndef ROCKSDB_LITE
 TEST_F(DBTest2, AutoPrefixMode1) {
-  // create a DB with block prefix index
-  BlockBasedTableOptions table_options;
-  Options options = CurrentOptions();
-  table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
-  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
-  options.prefix_extractor.reset(NewFixedPrefixTransform(1));
-  options.statistics = CreateDBStatistics();
-
-  Reopen(options);
-
-  Random rnd(301);
-  std::string large_value = rnd.RandomString(500);
-
-  ASSERT_OK(Put("a1", large_value));
-  ASSERT_OK(Put("x1", large_value));
-  ASSERT_OK(Put("y1", large_value));
-  ASSERT_OK(Flush());
-
-  ReadOptions ro;
-  ro.total_order_seek = false;
-  ro.auto_prefix_mode = true;
-  {
-    std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
-    iterator->Seek("b1");
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("x1", iterator->key().ToString());
-    ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
-    ASSERT_OK(iterator->status());
-  }
+  do {
+    // create a DB with block prefix index
+    Options options = CurrentOptions();
+    BlockBasedTableOptions table_options =
+        *options.table_factory->GetOptions<BlockBasedTableOptions>();
+    table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
+    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+    options.prefix_extractor.reset(NewFixedPrefixTransform(1));
+    options.statistics = CreateDBStatistics();
 
-  std::string ub_str = "b9";
-  Slice ub(ub_str);
-  ro.iterate_upper_bound = &ub;
+    Reopen(options);
 
-  {
-    std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
-    iterator->Seek("b1");
-    ASSERT_FALSE(iterator->Valid());
-    ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
-    ASSERT_OK(iterator->status());
-  }
+    Random rnd(301);
+    std::string large_value = rnd.RandomString(500);
 
-  ub_str = "z";
-  ub = Slice(ub_str);
-  {
-    std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
-    iterator->Seek("b1");
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("x1", iterator->key().ToString());
-    ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
-    ASSERT_OK(iterator->status());
-  }
+    ASSERT_OK(Put("a1", large_value));
+    ASSERT_OK(Put("x1", large_value));
+    ASSERT_OK(Put("y1", large_value));
+    ASSERT_OK(Flush());
 
-  ub_str = "c";
-  ub = Slice(ub_str);
-  {
-    std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
-    iterator->Seek("b1");
-    ASSERT_FALSE(iterator->Valid());
-    ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
-    ASSERT_OK(iterator->status());
-  }
+    ReadOptions ro;
+    ro.total_order_seek = false;
+    ro.auto_prefix_mode = true;
+    {
+      std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
+      iterator->Seek("b1");
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("x1", iterator->key().ToString());
+      ASSERT_EQ(0, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      ASSERT_OK(iterator->status());
+    }
 
-  // The same queries without recreating iterator
-  {
-    ub_str = "b9";
-    ub = Slice(ub_str);
+    std::string ub_str = "b9";
+    Slice ub(ub_str);
     ro.iterate_upper_bound = &ub;
 
-    std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
-    iterator->Seek("b1");
-    ASSERT_FALSE(iterator->Valid());
-    ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
-    ASSERT_OK(iterator->status());
+    {
+      std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
+      iterator->Seek("b1");
+      ASSERT_FALSE(iterator->Valid());
+      ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      ASSERT_OK(iterator->status());
+    }
 
     ub_str = "z";
     ub = Slice(ub_str);
-
-    iterator->Seek("b1");
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("x1", iterator->key().ToString());
-    ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+    {
+      std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
+      iterator->Seek("b1");
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("x1", iterator->key().ToString());
+      ASSERT_EQ(1, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      ASSERT_OK(iterator->status());
+    }
 
     ub_str = "c";
     ub = Slice(ub_str);
+    {
+      std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
+      iterator->Seek("b1");
+      ASSERT_FALSE(iterator->Valid());
+      ASSERT_EQ(2, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      ASSERT_OK(iterator->status());
+    }
 
-    iterator->Seek("b1");
-    ASSERT_FALSE(iterator->Valid());
-    ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+    // The same queries without recreating iterator
+    {
+      ub_str = "b9";
+      ub = Slice(ub_str);
+      ro.iterate_upper_bound = &ub;
 
-    ub_str = "b9";
-    ub = Slice(ub_str);
-    ro.iterate_upper_bound = &ub;
-    iterator->SeekForPrev("b1");
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("a1", iterator->key().ToString());
-    ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      std::unique_ptr<Iterator> iterator(db_->NewIterator(ro));
+      iterator->Seek("b1");
+      ASSERT_FALSE(iterator->Valid());
+      ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+      ASSERT_OK(iterator->status());
 
-    ub_str = "zz";
-    ub = Slice(ub_str);
-    ro.iterate_upper_bound = &ub;
-    iterator->SeekToLast();
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("y1", iterator->key().ToString());
+      ub_str = "z";
+      ub = Slice(ub_str);
 
-    iterator->SeekToFirst();
-    ASSERT_TRUE(iterator->Valid());
-    ASSERT_EQ("a1", iterator->key().ToString());
-  }
+      iterator->Seek("b1");
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("x1", iterator->key().ToString());
+      ASSERT_EQ(3, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+
+      ub_str = "c";
+      ub = Slice(ub_str);
+
+      iterator->Seek("b1");
+      ASSERT_FALSE(iterator->Valid());
+      ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+
+      ub_str = "b9";
+      ub = Slice(ub_str);
+      ro.iterate_upper_bound = &ub;
+      iterator->SeekForPrev("b1");
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("a1", iterator->key().ToString());
+      ASSERT_EQ(4, TestGetTickerCount(options, BLOOM_FILTER_PREFIX_CHECKED));
+
+      ub_str = "zz";
+      ub = Slice(ub_str);
+      ro.iterate_upper_bound = &ub;
+      iterator->SeekToLast();
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("y1", iterator->key().ToString());
+
+      iterator->SeekToFirst();
+      ASSERT_TRUE(iterator->Valid());
+      ASSERT_EQ("a1", iterator->key().ToString());
+    }
+  } while (ChangeOptions(kSkipPlainTable));
 }
 
 class RenameCurrentTest : public DBTestBase,
@@ -7145,6 +7172,112 @@ TEST_F(DBTest2, RenameDirectory) {
   dbname_ = old_dbname;
 }
 
+TEST_F(DBTest2, SstUniqueIdVerifyBackwardCompatible) {
+  const int kNumSst = 3;
+  const int kLevel0Trigger = 4;
+  auto options = CurrentOptions();
+  options.level0_file_num_compaction_trigger = kLevel0Trigger;
+  options.statistics = CreateDBStatistics();
+
+  // Existing manifest doesn't have unique id
+  SyncPoint::GetInstance()->SetCallBack(
+      "VersionEdit::EncodeTo:UniqueId", [&](void* arg) {
+        auto unique_id = static_cast<UniqueId64x2*>(arg);
+        // remove id before writing it to manifest
+        (*unique_id)[0] = 0;
+        (*unique_id)[1] = 0;
+      });
+  std::atomic_int skipped = 0;
+  SyncPoint::GetInstance()->SetCallBack("Version::VerifySstUniqueIds::Skipped",
+                                        [&](void* /*arg*/) { skipped++; });
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  // generate a few SSTs
+  for (int i = 0; i < kNumSst; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+
+  // Reopen without verification
+  Reopen(options);
+
+  // Reopen with verification, but it's skipped because manifest doesn't have id
+  options.verify_sst_unique_id_in_manifest = true;
+  Reopen(options);
+  ASSERT_EQ(skipped, kNumSst);
+
+  // test compaction generated Sst
+  for (int i = kNumSst; i < kLevel0Trigger; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+  ASSERT_OK(dbfull()->TEST_WaitForCompact());
+
+#ifndef ROCKSDB_LITE
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+#endif  // ROCKSDB_LITE
+
+  // Reopen with verification should fail
+  options.verify_sst_unique_id_in_manifest = true;
+  skipped = 0;
+  Reopen(options);
+  ASSERT_EQ(skipped, 1);
+}
+
+TEST_F(DBTest2, SstUniqueIdVerify) {
+  const int kNumSst = 3;
+  const int kLevel0Trigger = 4;
+  auto options = CurrentOptions();
+  options.level0_file_num_compaction_trigger = kLevel0Trigger;
+
+  SyncPoint::GetInstance()->SetCallBack(
+      "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
+        auto props = static_cast<TableProperties*>(props_vs);
+        // update table property session_id to a different one
+        props->db_session_id = DBImpl::GenerateDbSessionId(nullptr);
+      });
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  // generate a few SSTs
+  for (int i = 0; i < kNumSst; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+
+  // Reopen with verification should report corruption
+  options.verify_sst_unique_id_in_manifest = true;
+  auto s = TryReopen(options);
+  ASSERT_TRUE(s.IsCorruption());
+
+  // Reopen without verification should be fine
+  options.verify_sst_unique_id_in_manifest = false;
+  Reopen(options);
+
+  // test compaction generated Sst
+  for (int i = kNumSst; i < kLevel0Trigger; i++) {
+    for (int j = 0; j < 100; j++) {
+      ASSERT_OK(Put(Key(i * 10 + j), "value"));
+    }
+    ASSERT_OK(Flush());
+  }
+  ASSERT_OK(dbfull()->TEST_WaitForCompact());
+
+#ifndef ROCKSDB_LITE
+  ASSERT_EQ("0,1", FilesPerLevel(0));
+#endif  // ROCKSDB_LITE
+
+  // Reopen with verification should fail
+  options.verify_sst_unique_id_in_manifest = true;
+  s = TryReopen(options);
+  ASSERT_TRUE(s.IsCorruption());
+}
+
 #ifndef ROCKSDB_LITE
 TEST_F(DBTest2, GetLatestSeqAndTsForKey) {
   Destroy(last_options_);
diff -pruN 7.2.2-5/db/db_test.cc 7.3.1-2/db/db_test.cc
--- 7.2.2-5/db/db_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -2427,8 +2427,8 @@ TEST_F(DBTest, SnapshotFiles) {
 
     // Also test GetLiveFilesStorageInfo
     std::vector<LiveFileStorageInfo> new_infos;
-    ASSERT_OK(dbfull()->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
-                                                &new_infos));
+    ASSERT_OK(db_->GetLiveFilesStorageInfo(LiveFilesStorageInfoOptions(),
+                                           &new_infos));
 
     // Close DB (while deletions disabled)
     Close();
@@ -2734,7 +2734,7 @@ TEST_P(MultiThreadedDBTest, MultiThreade
   Options options = CurrentOptions(options_override);
   std::vector<std::string> cfs;
   for (int i = 1; i < kColumnFamilies; ++i) {
-    cfs.push_back(ToString(i));
+    cfs.push_back(std::to_string(i));
   }
   Reopen(options);
   CreateAndReopenWithCF(cfs, options);
@@ -2786,7 +2786,7 @@ static void GCThreadBody(void* arg) {
   WriteOptions wo;
 
   for (int i = 0; i < kGCNumKeys; ++i) {
-    std::string kv(ToString(i + id * kGCNumKeys));
+    std::string kv(std::to_string(i + id * kGCNumKeys));
     ASSERT_OK(db->Put(wo, kv, kv));
   }
   t->done = true;
@@ -2822,7 +2822,7 @@ TEST_F(DBTest, GroupCommitTest) {
 
     std::vector<std::string> expected_db;
     for (int i = 0; i < kGCNumThreads * kGCNumKeys; ++i) {
-      expected_db.push_back(ToString(i));
+      expected_db.push_back(std::to_string(i));
     }
     std::sort(expected_db.begin(), expected_db.end());
 
@@ -3591,7 +3591,7 @@ TEST_P(DBTestWithParam, FIFOCompactionTe
     Random rnd(301);
     for (int i = 0; i < 6; ++i) {
       for (int j = 0; j < 110; ++j) {
-        ASSERT_OK(Put(ToString(i * 100 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 100 + j), rnd.RandomString(980)));
       }
       // flush should happen here
       ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable());
@@ -3607,7 +3607,7 @@ TEST_P(DBTestWithParam, FIFOCompactionTe
     ASSERT_EQ(NumTableFilesAtLevel(0), 5);
     for (int i = 0; i < 50; ++i) {
       // these keys should be deleted in previous compaction
-      ASSERT_EQ("NOT_FOUND", Get(ToString(i)));
+      ASSERT_EQ("NOT_FOUND", Get(std::to_string(i)));
     }
   }
 }
@@ -3629,7 +3629,7 @@ TEST_F(DBTest, FIFOCompactionTestWithCom
   for (int i = 0; i < 60; i++) {
     // Generate and flush a file about 20KB.
     for (int j = 0; j < 20; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3640,7 +3640,7 @@ TEST_F(DBTest, FIFOCompactionTestWithCom
   for (int i = 0; i < 60; i++) {
     // Generate and flush a file about 20KB.
     for (int j = 0; j < 20; j++) {
-      ASSERT_OK(Put(ToString(i * 20 + j + 2000), rnd.RandomString(980)));
+      ASSERT_OK(Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980)));
     }
     ASSERT_OK(Flush());
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3670,27 +3670,27 @@ TEST_F(DBTest, FIFOCompactionStyleWithCo
   Random rnd(301);
   for (int i = 0; i < 3; i++) {
     // Each file contains a different key which will be dropped later.
-    ASSERT_OK(Put("a" + ToString(i), rnd.RandomString(500)));
-    ASSERT_OK(Put("key" + ToString(i), ""));
-    ASSERT_OK(Put("z" + ToString(i), rnd.RandomString(500)));
+    ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500)));
+    ASSERT_OK(Put("key" + std::to_string(i), ""));
+    ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500)));
     ASSERT_OK(Flush());
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
   }
   ASSERT_EQ(NumTableFilesAtLevel(0), 1);
   for (int i = 0; i < 3; i++) {
-    ASSERT_EQ("", Get("key" + ToString(i)));
+    ASSERT_EQ("", Get("key" + std::to_string(i)));
   }
   for (int i = 0; i < 3; i++) {
     // Each file contains a different key which will be dropped later.
-    ASSERT_OK(Put("a" + ToString(i), rnd.RandomString(500)));
-    ASSERT_OK(Delete("key" + ToString(i)));
-    ASSERT_OK(Put("z" + ToString(i), rnd.RandomString(500)));
+    ASSERT_OK(Put("a" + std::to_string(i), rnd.RandomString(500)));
+    ASSERT_OK(Delete("key" + std::to_string(i)));
+    ASSERT_OK(Put("z" + std::to_string(i), rnd.RandomString(500)));
     ASSERT_OK(Flush());
     ASSERT_OK(dbfull()->TEST_WaitForCompact());
   }
   ASSERT_EQ(NumTableFilesAtLevel(0), 2);
   for (int i = 0; i < 3; i++) {
-    ASSERT_EQ("NOT_FOUND", Get("key" + ToString(i)));
+    ASSERT_EQ("NOT_FOUND", Get("key" + std::to_string(i)));
   }
 }
 
@@ -3759,7 +3759,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 10; i++) {
       // Generate and flush a file about 10KB.
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3791,7 +3791,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 10; i++) {
       // Generate and flush a file about 10KB.
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3807,7 +3807,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     // Create 1 more file to trigger TTL compaction. The old files are dropped.
     for (int i = 0; i < 1; i++) {
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
     }
@@ -3833,7 +3833,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 3; i++) {
       // Generate and flush a file about 10KB.
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3848,7 +3848,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
 
     for (int i = 0; i < 5; i++) {
       for (int j = 0; j < 140; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3871,7 +3871,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 10; i++) {
       // Generate and flush a file about 10KB.
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3890,7 +3890,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     // Create 10 more files. The old 5 files are dropped as their ttl expired.
     for (int i = 0; i < 10; i++) {
       for (int j = 0; j < 10; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3915,7 +3915,7 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 60; i++) {
       // Generate and flush a file about 20KB.
       for (int j = 0; j < 20; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j), rnd.RandomString(980)));
+        ASSERT_OK(Put(std::to_string(i * 20 + j), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -3926,7 +3926,8 @@ TEST_F(DBTest, FIFOCompactionWithTTLTest
     for (int i = 0; i < 60; i++) {
       // Generate and flush a file about 20KB.
       for (int j = 0; j < 20; j++) {
-        ASSERT_OK(Put(ToString(i * 20 + j + 2000), rnd.RandomString(980)));
+        ASSERT_OK(
+            Put(std::to_string(i * 20 + j + 2000), rnd.RandomString(980)));
       }
       ASSERT_OK(Flush());
       ASSERT_OK(dbfull()->TEST_WaitForCompact());
@@ -4207,7 +4208,7 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
       std::vector<port::Thread> threads;
       threads.emplace_back([&] {
         for (size_t i = 0; i < cnt; i++) {
-          auto istr = ToString(i);
+          auto istr = std::to_string(i);
           ASSERT_OK(db_->Put(wopt, db_->DefaultColumnFamily(), "a" + istr,
                              "b" + istr));
         }
@@ -4215,7 +4216,7 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
       if (two_write_queues) {
         threads.emplace_back([&] {
           for (size_t i = cnt; i < 2 * cnt; i++) {
-            auto istr = ToString(i);
+            auto istr = std::to_string(i);
             WriteBatch batch;
             ASSERT_OK(batch.Put("a" + istr, "b" + istr));
             ASSERT_OK(
@@ -4236,7 +4237,7 @@ TEST_F(DBTest, ConcurrentFlushWAL) {
       Reopen(options);
       for (size_t i = 0; i < cnt; i++) {
         PinnableSlice pval;
-        auto istr = ToString(i);
+        auto istr = std::to_string(i);
         ASSERT_OK(
             db_->Get(ropt, db_->DefaultColumnFamily(), "a" + istr, &pval));
         ASSERT_TRUE(pval == ("b" + istr));
@@ -4259,7 +4260,7 @@ TEST_F(DBTest, ManualFlushWalAndWriteRac
 
   port::Thread writeThread([&]() {
     for (int i = 0; i < 100; i++) {
-      auto istr = ToString(i);
+      auto istr = std::to_string(i);
       ASSERT_OK(dbfull()->Put(wopts, "key_" + istr, "value_" + istr));
     }
   });
@@ -4607,7 +4608,7 @@ TEST_P(DBTestWithParam, ThreadStatusSing
     // The Put Phase.
     for (int file = 0; file < kNumL0Files; ++file) {
       for (int key = 0; key < kEntriesPerBuffer; ++key) {
-        ASSERT_OK(Put(ToString(key + file * kEntriesPerBuffer),
+        ASSERT_OK(Put(std::to_string(key + file * kEntriesPerBuffer),
                       rnd.RandomString(kTestValueSize)));
       }
       ASSERT_OK(Flush());
@@ -4758,7 +4759,7 @@ TEST_P(DBTestWithParam, PreShutdownMulti
   int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
   for (int file = 0; file < 16 * kNumL0Files; ++file) {
     for (int k = 0; k < kEntriesPerBuffer; ++k) {
-      ASSERT_OK(Put(ToString(key++), rnd.RandomString(kTestValueSize)));
+      ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize)));
     }
 
     ASSERT_OK(env_->GetThreadList(&thread_list));
@@ -4845,7 +4846,7 @@ TEST_P(DBTestWithParam, PreShutdownCompa
   int operation_count[ThreadStatus::NUM_OP_TYPES] = {0};
   for (int file = 0; file < 16 * kNumL0Files; ++file) {
     for (int k = 0; k < kEntriesPerBuffer; ++k) {
-      ASSERT_OK(Put(ToString(key++), rnd.RandomString(kTestValueSize)));
+      ASSERT_OK(Put(std::to_string(key++), rnd.RandomString(kTestValueSize)));
     }
 
     ASSERT_OK(env_->GetThreadList(&thread_list));
@@ -5156,8 +5157,9 @@ TEST_F(DBTest, DynamicCompactionOptions)
   // Writing to 64KB L0 files should trigger a compaction. Since these
   // 2 L0 files have the same key range, compaction merge them and should
   // result in 2 32KB L1 files.
-  ASSERT_OK(dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
-                                  {"target_file_size_base", ToString(k32KB)}}));
+  ASSERT_OK(
+      dbfull()->SetOptions({{"level0_file_num_compaction_trigger", "2"},
+                            {"target_file_size_base", std::to_string(k32KB)}}));
 
   gen_l0_kb(0, 64, 1);
   ASSERT_EQ("1,1", FilesPerLevel());
@@ -5176,8 +5178,8 @@ TEST_F(DBTest, DynamicCompactionOptions)
   // Increase level base size to 256KB and write enough data that will
   // fill L1 and L2. L1 size should be around 256KB while L2 size should be
   // around 256KB x 4.
-  ASSERT_OK(
-      dbfull()->SetOptions({{"max_bytes_for_level_base", ToString(k1MB)}}));
+  ASSERT_OK(dbfull()->SetOptions(
+      {{"max_bytes_for_level_base", std::to_string(k1MB)}}));
 
   // writing 96 x 64KB => 6 * 1024KB
   // (L1 + L2) = (1 + 4) * 1024KB
@@ -5196,9 +5198,9 @@ TEST_F(DBTest, DynamicCompactionOptions)
   // max_bytes_for_level_base. Now, reduce both mulitplier and level base,
   // After filling enough data that can fit in L1 - L3, we should see L1 size
   // reduces to 128KB from 256KB which was asserted previously. Same for L2.
-  ASSERT_OK(
-      dbfull()->SetOptions({{"max_bytes_for_level_multiplier", "2"},
-                            {"max_bytes_for_level_base", ToString(k128KB)}}));
+  ASSERT_OK(dbfull()->SetOptions(
+      {{"max_bytes_for_level_multiplier", "2"},
+       {"max_bytes_for_level_base", std::to_string(k128KB)}}));
 
   // writing 20 x 64KB = 10 x 128KB
   // (L1 + L2 + L3) = (1 + 2 + 4) * 128KB
@@ -5854,7 +5856,7 @@ TEST_P(DBTestWithParam, FilterCompaction
   // put some data
   for (int table = 0; table < 4; ++table) {
     for (int i = 0; i < 10 + table; ++i) {
-      ASSERT_OK(Put(ToString(table * 100 + i), "val"));
+      ASSERT_OK(Put(std::to_string(table * 100 + i), "val"));
       ++n;
     }
     ASSERT_OK(Flush());
@@ -6238,7 +6240,7 @@ TEST_F(DBTest, LargeBatchWithColumnFamil
               (write_size / 1024 / 1024), pass);
       for (;;) {
         std::string data(3000, j++ % 127 + 20);
-        data += ToString(j);
+        data += std::to_string(j);
         ASSERT_OK(batch.Put(handles_[0], Slice(data), Slice(data)));
         if (batch.GetDataSize() > write_size) {
           break;
diff -pruN 7.2.2-5/db/db_test_util.cc 7.3.1-2/db/db_test_util.cc
--- 7.2.2-5/db/db_test_util.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_test_util.cc	2022-06-08 21:08:16.000000000 +0000
@@ -9,8 +9,11 @@
 
 #include "db/db_test_util.h"
 
+#include "cache/cache_reservation_manager.h"
 #include "db/forward_iterator.h"
 #include "env/mock_env.h"
+#include "port/lang.h"
+#include "rocksdb/cache.h"
 #include "rocksdb/convenience.h"
 #include "rocksdb/env_encryption.h"
 #include "rocksdb/unique_id.h"
@@ -360,6 +363,17 @@ Options DBTestBase::GetOptions(
   ROCKSDB_NAMESPACE::SyncPoint::GetInstance()->ClearCallBack(
       "NewWritableFile:O_DIRECT");
 #endif
+  // kMustFreeHeapAllocations -> indicates ASAN build
+  if (kMustFreeHeapAllocations && !options_override.full_block_cache) {
+    // Detecting block cache use-after-free is normally difficult in unit
+    // tests, because as a cache, it tends to keep unreferenced entries in
+    // memory, and we normally want unit tests to take advantage of block
+    // cache for speed. However, we also want a strong chance of detecting
+    // block cache use-after-free in unit tests in ASAN builds, so for ASAN
+    // builds we use a trivially small block cache to which entries can be
+    // added but are immediately freed on no more references.
+    table_options.block_cache = NewLRUCache(/* too small */ 1);
+  }
 
   bool can_allow_mmap = IsMemoryMappedAccessSupported();
   switch (option_config) {
@@ -816,10 +830,12 @@ std::string DBTestBase::Get(int cf, cons
 std::vector<std::string> DBTestBase::MultiGet(std::vector<int> cfs,
                                               const std::vector<std::string>& k,
                                               const Snapshot* snapshot,
-                                              const bool batched) {
+                                              const bool batched,
+                                              const bool async) {
   ReadOptions options;
   options.verify_checksums = true;
   options.snapshot = snapshot;
+  options.async_io = async;
   std::vector<ColumnFamilyHandle*> handles;
   std::vector<Slice> keys;
   std::vector<std::string> result;
@@ -831,7 +847,7 @@ std::vector<std::string> DBTestBase::Mul
   std::vector<Status> s;
   if (!batched) {
     s = db_->MultiGet(options, handles, keys, &result);
-    for (unsigned int i = 0; i < s.size(); ++i) {
+    for (size_t i = 0; i < s.size(); ++i) {
       if (s[i].IsNotFound()) {
         result[i] = "NOT_FOUND";
       } else if (!s[i].ok()) {
@@ -844,13 +860,16 @@ std::vector<std::string> DBTestBase::Mul
     s.resize(cfs.size());
     db_->MultiGet(options, cfs.size(), handles.data(), keys.data(),
                   pin_values.data(), s.data());
-    for (unsigned int i = 0; i < s.size(); ++i) {
+    for (size_t i = 0; i < s.size(); ++i) {
       if (s[i].IsNotFound()) {
         result[i] = "NOT_FOUND";
       } else if (!s[i].ok()) {
         result[i] = s[i].ToString();
       } else {
         result[i].assign(pin_values[i].data(), pin_values[i].size());
+        // Increase likelihood of detecting potential use-after-free bugs with
+        // PinnableSlices tracking the same resource
+        pin_values[i].Reset();
       }
     }
   }
@@ -858,28 +877,32 @@ std::vector<std::string> DBTestBase::Mul
 }
 
 std::vector<std::string> DBTestBase::MultiGet(const std::vector<std::string>& k,
-                                              const Snapshot* snapshot) {
+                                              const Snapshot* snapshot,
+                                              const bool async) {
   ReadOptions options;
   options.verify_checksums = true;
   options.snapshot = snapshot;
+  options.async_io = async;
   std::vector<Slice> keys;
-  std::vector<std::string> result;
+  std::vector<std::string> result(k.size());
   std::vector<Status> statuses(k.size());
   std::vector<PinnableSlice> pin_values(k.size());
 
-  for (unsigned int i = 0; i < k.size(); ++i) {
+  for (size_t i = 0; i < k.size(); ++i) {
     keys.push_back(k[i]);
   }
   db_->MultiGet(options, dbfull()->DefaultColumnFamily(), keys.size(),
                 keys.data(), pin_values.data(), statuses.data());
-  result.resize(k.size());
-  for (auto iter = result.begin(); iter != result.end(); ++iter) {
-    iter->assign(pin_values[iter - result.begin()].data(),
-                 pin_values[iter - result.begin()].size());
-  }
-  for (unsigned int i = 0; i < statuses.size(); ++i) {
+  for (size_t i = 0; i < statuses.size(); ++i) {
     if (statuses[i].IsNotFound()) {
       result[i] = "NOT_FOUND";
+    } else if (!statuses[i].ok()) {
+      result[i] = statuses[i].ToString();
+    } else {
+      result[i].assign(pin_values[i].data(), pin_values[i].size());
+      // Increase likelihood of detecting potential use-after-free bugs with
+      // PinnableSlices tracking the same resource
+      pin_values[i].Reset();
     }
   }
   return result;
@@ -1068,12 +1091,12 @@ int DBTestBase::NumTableFilesAtLevel(int
   std::string property;
   if (cf == 0) {
     // default cfd
-    EXPECT_TRUE(db_->GetProperty("rocksdb.num-files-at-level" + ToString(level),
-                                 &property));
+    EXPECT_TRUE(db_->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(level), &property));
   } else {
-    EXPECT_TRUE(db_->GetProperty(handles_[cf],
-                                 "rocksdb.num-files-at-level" + ToString(level),
-                                 &property));
+    EXPECT_TRUE(db_->GetProperty(
+        handles_[cf], "rocksdb.num-files-at-level" + std::to_string(level),
+        &property));
   }
   return atoi(property.c_str());
 }
@@ -1083,10 +1106,12 @@ double DBTestBase::CompressionRatioAtLev
   if (cf == 0) {
     // default cfd
     EXPECT_TRUE(db_->GetProperty(
-        "rocksdb.compression-ratio-at-level" + ToString(level), &property));
+        "rocksdb.compression-ratio-at-level" + std::to_string(level),
+        &property));
   } else {
     EXPECT_TRUE(db_->GetProperty(
-        handles_[cf], "rocksdb.compression-ratio-at-level" + ToString(level),
+        handles_[cf],
+        "rocksdb.compression-ratio-at-level" + std::to_string(level),
         &property));
   }
   return std::stod(property);
@@ -1663,4 +1688,61 @@ void VerifySstUniqueIds(const TablePrope
   }
 }
 
+template <CacheEntryRole R>
+TargetCacheChargeTrackingCache<R>::TargetCacheChargeTrackingCache(
+    std::shared_ptr<Cache> target)
+    : CacheWrapper(std::move(target)),
+      cur_cache_charge_(0),
+      cache_charge_peak_(0),
+      cache_charge_increment_(0),
+      last_peak_tracked_(false),
+      cache_charge_increments_sum_(0) {}
+
+template <CacheEntryRole R>
+Status TargetCacheChargeTrackingCache<R>::Insert(
+    const Slice& key, void* value, size_t charge,
+    void (*deleter)(const Slice& key, void* value), Handle** handle,
+    Priority priority) {
+  Status s = target_->Insert(key, value, charge, deleter, handle, priority);
+  if (deleter == kNoopDeleter) {
+    if (last_peak_tracked_) {
+      cache_charge_peak_ = 0;
+      cache_charge_increment_ = 0;
+      last_peak_tracked_ = false;
+    }
+    if (s.ok()) {
+      cur_cache_charge_ += charge;
+    }
+    cache_charge_peak_ = std::max(cache_charge_peak_, cur_cache_charge_);
+    cache_charge_increment_ += charge;
+  }
+
+  return s;
+}
+
+template <CacheEntryRole R>
+bool TargetCacheChargeTrackingCache<R>::Release(Handle* handle,
+                                                bool erase_if_last_ref) {
+  auto deleter = GetDeleter(handle);
+  if (deleter == kNoopDeleter) {
+    if (!last_peak_tracked_) {
+      cache_charge_peaks_.push_back(cache_charge_peak_);
+      cache_charge_increments_sum_ += cache_charge_increment_;
+      last_peak_tracked_ = true;
+    }
+    cur_cache_charge_ -= GetCharge(handle);
+  }
+  bool is_successful = target_->Release(handle, erase_if_last_ref);
+  return is_successful;
+}
+
+template <CacheEntryRole R>
+const Cache::DeleterFn TargetCacheChargeTrackingCache<R>::kNoopDeleter =
+    CacheReservationManagerImpl<R>::TEST_GetNoopDeleterForRole();
+
+template class TargetCacheChargeTrackingCache<
+    CacheEntryRole::kFilterConstruction>;
+template class TargetCacheChargeTrackingCache<
+    CacheEntryRole::kBlockBasedTableReader>;
+
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/db/db_test_util.h 7.3.1-2/db/db_test_util.h
--- 7.2.2-5/db/db_test_util.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_test_util.h	2022-06-08 21:08:16.000000000 +0000
@@ -104,6 +104,9 @@ struct OptionsOverride {
   std::shared_ptr<const FilterPolicy> filter_policy = nullptr;
   // These will be used only if filter_policy is set
   bool partition_filters = false;
+  // Force using a default block cache. (Setting to false allows ASAN build
+  // use a trivially small block cache for better UAF error detection.)
+  bool full_block_cache = false;
   uint64_t metadata_block_size = 1024;
 
   // Used as a bit mask of individual enums in which to skip an XF test point
@@ -949,6 +952,51 @@ class CacheWrapper : public Cache {
   std::shared_ptr<Cache> target_;
 };
 
+/*
+ * A cache wrapper that tracks certain CacheEntryRole's cache charge, its
+ * peaks and increments
+ *
+ *        p0
+ *       / \   p1
+ *      /   \  /\
+ *     /     \/  \
+ *  a /       b   \
+ * peaks = {p0, p1}
+ * increments = {p1-a, p2-b}
+ */
+template <CacheEntryRole R>
+class TargetCacheChargeTrackingCache : public CacheWrapper {
+ public:
+  explicit TargetCacheChargeTrackingCache(std::shared_ptr<Cache> target);
+
+  using Cache::Insert;
+  Status Insert(const Slice& key, void* value, size_t charge,
+                void (*deleter)(const Slice& key, void* value),
+                Handle** handle = nullptr,
+                Priority priority = Priority::LOW) override;
+
+  using Cache::Release;
+  bool Release(Handle* handle, bool erase_if_last_ref = false) override;
+
+  std::size_t GetCacheCharge() { return cur_cache_charge_; }
+
+  std::deque<std::size_t> GetChargedCachePeaks() { return cache_charge_peaks_; }
+
+  std::size_t GetChargedCacheIncrementSum() {
+    return cache_charge_increments_sum_;
+  }
+
+ private:
+  static const Cache::DeleterFn kNoopDeleter;
+
+  std::size_t cur_cache_charge_;
+  std::size_t cache_charge_peak_;
+  std::size_t cache_charge_increment_;
+  bool last_peak_tracked_;
+  std::deque<std::size_t> cache_charge_peaks_;
+  std::size_t cache_charge_increments_sum_;
+};
+
 class DBTestBase : public testing::Test {
  public:
   // Sequence of option configurations to try
@@ -1151,10 +1199,12 @@ class DBTestBase : public testing::Test
   std::vector<std::string> MultiGet(std::vector<int> cfs,
                                     const std::vector<std::string>& k,
                                     const Snapshot* snapshot,
-                                    const bool batched);
+                                    const bool batched,
+                                    const bool async = false);
 
   std::vector<std::string> MultiGet(const std::vector<std::string>& k,
-                                    const Snapshot* snapshot = nullptr);
+                                    const Snapshot* snapshot = nullptr,
+                                    const bool async = false);
 
   uint64_t GetNumSnapshots();
 
diff -pruN 7.2.2-5/db/db_universal_compaction_test.cc 7.3.1-2/db/db_universal_compaction_test.cc
--- 7.2.2-5/db/db_universal_compaction_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_universal_compaction_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -549,7 +549,7 @@ TEST_P(DBTestUniversalCompaction, Compac
   ASSERT_EQ(options.compaction_style, kCompactionStyleUniversal);
   Random rnd(301);
   for (int key = 1024 * kEntriesPerBuffer; key >= 0; --key) {
-    ASSERT_OK(Put(1, ToString(key), rnd.RandomString(kTestValueSize)));
+    ASSERT_OK(Put(1, std::to_string(key), rnd.RandomString(kTestValueSize)));
   }
   ASSERT_OK(dbfull()->TEST_WaitForFlushMemTable(handles_[1]));
   ASSERT_OK(dbfull()->TEST_WaitForCompact());
diff -pruN 7.2.2-5/db/db_wal_test.cc 7.3.1-2/db/db_wal_test.cc
--- 7.2.2-5/db/db_wal_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_wal_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1009,7 +1009,7 @@ TEST_F(DBWALTest, RecoveryWithLogDataFor
       if (log_files.size() > 0) {
         earliest_log_nums[i] = log_files[0]->LogNumber();
       } else {
-        earliest_log_nums[i] = port::kMaxUint64;
+        earliest_log_nums[i] = std::numeric_limits<uint64_t>::max();
       }
     }
     // Check at least the first WAL was cleaned up during the recovery.
@@ -1288,7 +1288,7 @@ class RecoveryTestHelper {
 
       WriteBatch batch;
       for (int i = 0; i < kKeysPerWALFile; i++) {
-        std::string key = "key" + ToString((*count)++);
+        std::string key = "key" + std::to_string((*count)++);
         std::string value = test->DummyString(kValueSize);
         ASSERT_NE(current_log_writer.get(), nullptr);
         uint64_t seq = versions->LastSequence() + 1;
@@ -1319,7 +1319,7 @@ class RecoveryTestHelper {
   static size_t GetData(DBWALTestBase* test) {
     size_t count = 0;
     for (size_t i = 0; i < kWALFilesCount * kKeysPerWALFile; i++) {
-      if (test->Get("key" + ToString(i)) != "NOT_FOUND") {
+      if (test->Get("key" + std::to_string(i)) != "NOT_FOUND") {
         ++count;
       }
     }
@@ -1616,7 +1616,7 @@ TEST_P(DBWALTestWithParams, kPointInTime
   if (!trunc || corrupt_offset != 0) {
     bool expect_data = true;
     for (size_t k = 0; k < maxkeys; ++k) {
-      bool found = Get("key" + ToString(k)) != "NOT_FOUND";
+      bool found = Get("key" + std::to_string(k)) != "NOT_FOUND";
       if (expect_data && !found) {
         expect_data = false;
       }
@@ -1752,7 +1752,7 @@ TEST_F(DBWALTest, RecoverWithoutFlush) {
   size_t count = RecoveryTestHelper::FillData(this, &options);
   auto validateData = [this, count]() {
     for (size_t i = 0; i < count; i++) {
-      ASSERT_NE(Get("key" + ToString(i)), "NOT_FOUND");
+      ASSERT_NE(Get("key" + std::to_string(i)), "NOT_FOUND");
     }
   };
   Reopen(options);
@@ -1891,7 +1891,7 @@ TEST_P(DBWALTestWithParamsVaryingRecover
   ASSERT_OK(TryReopen(options));
   // Append some more data.
   for (int k = 0; k < kAppendKeys; k++) {
-    std::string key = "extra_key" + ToString(k);
+    std::string key = "extra_key" + std::to_string(k);
     std::string value = DummyString(RecoveryTestHelper::kValueSize);
     ASSERT_OK(Put(key, value));
   }
@@ -1925,7 +1925,7 @@ TEST_F(DBWALTest, RestoreTotalLogSizeAft
   std::string value_300k(300 * kKB, 'v');
   ASSERT_OK(Put(0, "foo", "v1"));
   for (int i = 0; i < 9; i++) {
-    ASSERT_OK(Put(1, "key" + ToString(i), value_100k));
+    ASSERT_OK(Put(1, "key" + std::to_string(i), value_100k));
   }
   // Get log files before reopen.
   VectorLogPtr log_files_before;
diff -pruN 7.2.2-5/db/db_with_timestamp_basic_test.cc 7.3.1-2/db/db_with_timestamp_basic_test.cc
--- 7.2.2-5/db/db_with_timestamp_basic_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_with_timestamp_basic_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -7,7 +7,7 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 
-#include "db/db_test_util.h"
+#include "db/db_with_timestamp_test_util.h"
 #include "port/stack_trace.h"
 #include "rocksdb/perf_context.h"
 #include "rocksdb/utilities/debug.h"
@@ -20,176 +20,6 @@
 #include "utilities/fault_injection_env.h"
 
 namespace ROCKSDB_NAMESPACE {
-class DBBasicTestWithTimestampBase : public DBTestBase {
- public:
-  explicit DBBasicTestWithTimestampBase(const std::string& dbname)
-      : DBTestBase(dbname, /*env_do_fsync=*/true) {}
-
- protected:
-  static std::string Key1(uint64_t k) {
-    std::string ret;
-    PutFixed64(&ret, k);
-    std::reverse(ret.begin(), ret.end());
-    return ret;
-  }
-
-  static std::string KeyWithPrefix(std::string prefix, uint64_t k) {
-    std::string ret;
-    PutFixed64(&ret, k);
-    std::reverse(ret.begin(), ret.end());
-    return prefix + ret;
-  }
-
-  static std::vector<Slice> ConvertStrToSlice(
-      std::vector<std::string>& strings) {
-    std::vector<Slice> ret;
-    for (const auto& s : strings) {
-      ret.emplace_back(s);
-    }
-    return ret;
-  }
-
-  class TestComparator : public Comparator {
-   private:
-    const Comparator* cmp_without_ts_;
-
-   public:
-    explicit TestComparator(size_t ts_sz)
-        : Comparator(ts_sz), cmp_without_ts_(nullptr) {
-      cmp_without_ts_ = BytewiseComparator();
-    }
-
-    const char* Name() const override { return "TestComparator"; }
-
-    void FindShortSuccessor(std::string*) const override {}
-
-    void FindShortestSeparator(std::string*, const Slice&) const override {}
-
-    int Compare(const Slice& a, const Slice& b) const override {
-      int r = CompareWithoutTimestamp(a, b);
-      if (r != 0 || 0 == timestamp_size()) {
-        return r;
-      }
-      return -CompareTimestamp(
-          Slice(a.data() + a.size() - timestamp_size(), timestamp_size()),
-          Slice(b.data() + b.size() - timestamp_size(), timestamp_size()));
-    }
-
-    using Comparator::CompareWithoutTimestamp;
-    int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
-                                bool b_has_ts) const override {
-      if (a_has_ts) {
-        assert(a.size() >= timestamp_size());
-      }
-      if (b_has_ts) {
-        assert(b.size() >= timestamp_size());
-      }
-      Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, timestamp_size()) : a;
-      Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, timestamp_size()) : b;
-      return cmp_without_ts_->Compare(lhs, rhs);
-    }
-
-    int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override {
-      if (!ts1.data() && !ts2.data()) {
-        return 0;
-      } else if (ts1.data() && !ts2.data()) {
-        return 1;
-      } else if (!ts1.data() && ts2.data()) {
-        return -1;
-      }
-      assert(ts1.size() == ts2.size());
-      uint64_t low1 = 0;
-      uint64_t low2 = 0;
-      uint64_t high1 = 0;
-      uint64_t high2 = 0;
-      const size_t kSize = ts1.size();
-      std::unique_ptr<char[]> ts1_buf(new char[kSize]);
-      memcpy(ts1_buf.get(), ts1.data(), ts1.size());
-      std::unique_ptr<char[]> ts2_buf(new char[kSize]);
-      memcpy(ts2_buf.get(), ts2.data(), ts2.size());
-      Slice ts1_copy = Slice(ts1_buf.get(), kSize);
-      Slice ts2_copy = Slice(ts2_buf.get(), kSize);
-      auto* ptr1 = const_cast<Slice*>(&ts1_copy);
-      auto* ptr2 = const_cast<Slice*>(&ts2_copy);
-      if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) ||
-          !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) {
-        assert(false);
-      }
-      if (high1 < high2) {
-        return -1;
-      } else if (high1 > high2) {
-        return 1;
-      }
-      if (low1 < low2) {
-        return -1;
-      } else if (low1 > low2) {
-        return 1;
-      }
-      return 0;
-    }
-  };
-
-  std::string Timestamp(uint64_t low, uint64_t high) {
-    std::string ts;
-    PutFixed64(&ts, low);
-    PutFixed64(&ts, high);
-    return ts;
-  }
-
-  void CheckIterUserEntry(const Iterator* it, const Slice& expected_key,
-                          ValueType expected_value_type,
-                          const Slice& expected_value,
-                          const Slice& expected_ts) const {
-    ASSERT_TRUE(it->Valid());
-    ASSERT_OK(it->status());
-    ASSERT_EQ(expected_key, it->key());
-    if (kTypeValue == expected_value_type) {
-      ASSERT_EQ(expected_value, it->value());
-    }
-    ASSERT_EQ(expected_ts, it->timestamp());
-  }
-
-  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
-                      SequenceNumber expected_seq, ValueType expected_val_type,
-                      const Slice& expected_value, const Slice& expected_ts) {
-    ASSERT_TRUE(it->Valid());
-    ASSERT_OK(it->status());
-    std::string ukey_and_ts;
-    ukey_and_ts.assign(expected_ukey.data(), expected_ukey.size());
-    ukey_and_ts.append(expected_ts.data(), expected_ts.size());
-    ParsedInternalKey parsed_ikey;
-    ASSERT_OK(
-        ParseInternalKey(it->key(), &parsed_ikey, true /* log_err_key */));
-    ASSERT_EQ(ukey_and_ts, parsed_ikey.user_key);
-    ASSERT_EQ(expected_val_type, parsed_ikey.type);
-    ASSERT_EQ(expected_seq, parsed_ikey.sequence);
-    if (expected_val_type == kTypeValue) {
-      ASSERT_EQ(expected_value, it->value());
-    }
-    ASSERT_EQ(expected_ts, it->timestamp());
-  }
-
-  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
-                      ValueType expected_val_type, const Slice& expected_value,
-                      const Slice& expected_ts) {
-    ASSERT_TRUE(it->Valid());
-    ASSERT_OK(it->status());
-    std::string ukey_and_ts;
-    ukey_and_ts.assign(expected_ukey.data(), expected_ukey.size());
-    ukey_and_ts.append(expected_ts.data(), expected_ts.size());
-
-    ParsedInternalKey parsed_ikey;
-    ASSERT_OK(
-        ParseInternalKey(it->key(), &parsed_ikey, true /* log_err_key */));
-    ASSERT_EQ(expected_val_type, parsed_ikey.type);
-    ASSERT_EQ(Slice(ukey_and_ts), parsed_ikey.user_key);
-    if (expected_val_type == kTypeValue) {
-      ASSERT_EQ(expected_value, it->value());
-    }
-    ASSERT_EQ(expected_ts, it->timestamp());
-  }
-};
-
 class DBBasicTestWithTimestamp : public DBBasicTestWithTimestampBase {
  public:
   DBBasicTestWithTimestamp()
@@ -1492,8 +1322,8 @@ TEST_F(DBBasicTestWithTimestamp, MultiGe
 
   // random data
   for (int i = 0; i < 3; i++) {
-    auto key = ToString(i * 10);
-    auto value = ToString(i * 10);
+    auto key = std::to_string(i * 10);
+    auto value = std::to_string(i * 10);
     Slice key_slice = key;
     Slice value_slice = value;
     ASSERT_OK(db_->Put(write_opts, key_slice, ts, value_slice));
@@ -1824,8 +1654,8 @@ class DataVisibilityTest : public DBBasi
   DataVisibilityTest() : DBBasicTestWithTimestampBase("data_visibility_test") {
     // Initialize test data
     for (int i = 0; i < kTestDataSize; i++) {
-      test_data_[i].key = "key" + ToString(i);
-      test_data_[i].value = "value" + ToString(i);
+      test_data_[i].key = "key" + std::to_string(i);
+      test_data_[i].value = "value" + std::to_string(i);
       test_data_[i].timestamp = Timestamp(i, 0);
       test_data_[i].ts = i;
       test_data_[i].seq_num = kMaxSequenceNumber;
diff -pruN 7.2.2-5/db/db_with_timestamp_test_util.cc 7.3.1-2/db/db_with_timestamp_test_util.cc
--- 7.2.2-5/db/db_with_timestamp_test_util.cc	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/db/db_with_timestamp_test_util.cc	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,96 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#include "db/db_with_timestamp_test_util.h"
+
+namespace ROCKSDB_NAMESPACE {
+std::string DBBasicTestWithTimestampBase::Key1(uint64_t k) {
+  std::string ret;
+  PutFixed64(&ret, k);
+  std::reverse(ret.begin(), ret.end());
+  return ret;
+}
+
+std::string DBBasicTestWithTimestampBase::KeyWithPrefix(std::string prefix,
+                                                        uint64_t k) {
+  std::string ret;
+  PutFixed64(&ret, k);
+  std::reverse(ret.begin(), ret.end());
+  return prefix + ret;
+}
+
+std::vector<Slice> DBBasicTestWithTimestampBase::ConvertStrToSlice(
+    std::vector<std::string>& strings) {
+  std::vector<Slice> ret;
+  for (const auto& s : strings) {
+    ret.emplace_back(s);
+  }
+  return ret;
+}
+
+std::string DBBasicTestWithTimestampBase::Timestamp(uint64_t low,
+                                                    uint64_t high) {
+  std::string ts;
+  PutFixed64(&ts, low);
+  PutFixed64(&ts, high);
+  return ts;
+}
+
+void DBBasicTestWithTimestampBase::CheckIterUserEntry(
+    const Iterator* it, const Slice& expected_key,
+    ValueType expected_value_type, const Slice& expected_value,
+    const Slice& expected_ts) const {
+  ASSERT_TRUE(it->Valid());
+  ASSERT_OK(it->status());
+  ASSERT_EQ(expected_key, it->key());
+  if (kTypeValue == expected_value_type) {
+    ASSERT_EQ(expected_value, it->value());
+  }
+  ASSERT_EQ(expected_ts, it->timestamp());
+}
+
+void DBBasicTestWithTimestampBase::CheckIterEntry(
+    const Iterator* it, const Slice& expected_ukey, SequenceNumber expected_seq,
+    ValueType expected_val_type, const Slice& expected_value,
+    const Slice& expected_ts) const {
+  ASSERT_TRUE(it->Valid());
+  ASSERT_OK(it->status());
+  std::string ukey_and_ts;
+  ukey_and_ts.assign(expected_ukey.data(), expected_ukey.size());
+  ukey_and_ts.append(expected_ts.data(), expected_ts.size());
+  ParsedInternalKey parsed_ikey;
+  ASSERT_OK(ParseInternalKey(it->key(), &parsed_ikey, true /* log_err_key */));
+  ASSERT_EQ(ukey_and_ts, parsed_ikey.user_key);
+  ASSERT_EQ(expected_val_type, parsed_ikey.type);
+  ASSERT_EQ(expected_seq, parsed_ikey.sequence);
+  if (expected_val_type == kTypeValue) {
+    ASSERT_EQ(expected_value, it->value());
+  }
+  ASSERT_EQ(expected_ts, it->timestamp());
+}
+
+void DBBasicTestWithTimestampBase::CheckIterEntry(
+    const Iterator* it, const Slice& expected_ukey, ValueType expected_val_type,
+    const Slice& expected_value, const Slice& expected_ts) const {
+  ASSERT_TRUE(it->Valid());
+  ASSERT_OK(it->status());
+  std::string ukey_and_ts;
+  ukey_and_ts.assign(expected_ukey.data(), expected_ukey.size());
+  ukey_and_ts.append(expected_ts.data(), expected_ts.size());
+
+  ParsedInternalKey parsed_ikey;
+  ASSERT_OK(ParseInternalKey(it->key(), &parsed_ikey, true /* log_err_key */));
+  ASSERT_EQ(expected_val_type, parsed_ikey.type);
+  ASSERT_EQ(Slice(ukey_and_ts), parsed_ikey.user_key);
+  if (expected_val_type == kTypeValue) {
+    ASSERT_EQ(expected_value, it->value());
+  }
+  ASSERT_EQ(expected_ts, it->timestamp());
+}
+}  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/db/db_with_timestamp_test_util.h 7.3.1-2/db/db_with_timestamp_test_util.h
--- 7.2.2-5/db/db_with_timestamp_test_util.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/db/db_with_timestamp_test_util.h	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,126 @@
+// Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+//
+// Copyright (c) 2011 The LevelDB Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file. See the AUTHORS file for names of contributors.
+
+#pragma once
+
+#include "db/db_test_util.h"
+#include "port/stack_trace.h"
+#include "test_util/testutil.h"
+
+namespace ROCKSDB_NAMESPACE {
+class DBBasicTestWithTimestampBase : public DBTestBase {
+ public:
+  explicit DBBasicTestWithTimestampBase(const std::string& dbname)
+      : DBTestBase(dbname, /*env_do_fsync=*/true) {}
+
+ protected:
+  static std::string Key1(uint64_t k);
+
+  static std::string KeyWithPrefix(std::string prefix, uint64_t k);
+
+  static std::vector<Slice> ConvertStrToSlice(
+      std::vector<std::string>& strings);
+
+  class TestComparator : public Comparator {
+   private:
+    const Comparator* cmp_without_ts_;
+
+   public:
+    explicit TestComparator(size_t ts_sz)
+        : Comparator(ts_sz), cmp_without_ts_(nullptr) {
+      cmp_without_ts_ = BytewiseComparator();
+    }
+
+    const char* Name() const override { return "TestComparator"; }
+
+    void FindShortSuccessor(std::string*) const override {}
+
+    void FindShortestSeparator(std::string*, const Slice&) const override {}
+
+    int Compare(const Slice& a, const Slice& b) const override {
+      int r = CompareWithoutTimestamp(a, b);
+      if (r != 0 || 0 == timestamp_size()) {
+        return r;
+      }
+      return -CompareTimestamp(
+          Slice(a.data() + a.size() - timestamp_size(), timestamp_size()),
+          Slice(b.data() + b.size() - timestamp_size(), timestamp_size()));
+    }
+
+    using Comparator::CompareWithoutTimestamp;
+    int CompareWithoutTimestamp(const Slice& a, bool a_has_ts, const Slice& b,
+                                bool b_has_ts) const override {
+      if (a_has_ts) {
+        assert(a.size() >= timestamp_size());
+      }
+      if (b_has_ts) {
+        assert(b.size() >= timestamp_size());
+      }
+      Slice lhs = a_has_ts ? StripTimestampFromUserKey(a, timestamp_size()) : a;
+      Slice rhs = b_has_ts ? StripTimestampFromUserKey(b, timestamp_size()) : b;
+      return cmp_without_ts_->Compare(lhs, rhs);
+    }
+
+    int CompareTimestamp(const Slice& ts1, const Slice& ts2) const override {
+      if (!ts1.data() && !ts2.data()) {
+        return 0;
+      } else if (ts1.data() && !ts2.data()) {
+        return 1;
+      } else if (!ts1.data() && ts2.data()) {
+        return -1;
+      }
+      assert(ts1.size() == ts2.size());
+      uint64_t low1 = 0;
+      uint64_t low2 = 0;
+      uint64_t high1 = 0;
+      uint64_t high2 = 0;
+      const size_t kSize = ts1.size();
+      std::unique_ptr<char[]> ts1_buf(new char[kSize]);
+      memcpy(ts1_buf.get(), ts1.data(), ts1.size());
+      std::unique_ptr<char[]> ts2_buf(new char[kSize]);
+      memcpy(ts2_buf.get(), ts2.data(), ts2.size());
+      Slice ts1_copy = Slice(ts1_buf.get(), kSize);
+      Slice ts2_copy = Slice(ts2_buf.get(), kSize);
+      auto* ptr1 = const_cast<Slice*>(&ts1_copy);
+      auto* ptr2 = const_cast<Slice*>(&ts2_copy);
+      if (!GetFixed64(ptr1, &low1) || !GetFixed64(ptr1, &high1) ||
+          !GetFixed64(ptr2, &low2) || !GetFixed64(ptr2, &high2)) {
+        assert(false);
+      }
+      if (high1 < high2) {
+        return -1;
+      } else if (high1 > high2) {
+        return 1;
+      }
+      if (low1 < low2) {
+        return -1;
+      } else if (low1 > low2) {
+        return 1;
+      }
+      return 0;
+    }
+  };
+
+  std::string Timestamp(uint64_t low, uint64_t high);
+
+  void CheckIterUserEntry(const Iterator* it, const Slice& expected_key,
+                          ValueType expected_value_type,
+                          const Slice& expected_value,
+                          const Slice& expected_ts) const;
+
+  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
+                      SequenceNumber expected_seq, ValueType expected_val_type,
+                      const Slice& expected_value,
+                      const Slice& expected_ts) const;
+
+  void CheckIterEntry(const Iterator* it, const Slice& expected_ukey,
+                      ValueType expected_val_type, const Slice& expected_value,
+                      const Slice& expected_ts) const;
+};
+}  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/db/db_write_test.cc 7.3.1-2/db/db_write_test.cc
--- 7.2.2-5/db/db_write_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/db_write_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -289,7 +289,7 @@ TEST_P(DBWriteTest, IOErrorOnWALWritePro
     threads.push_back(port::Thread(
         [&](int index) {
           // All threads should fail.
-          auto res = Put("key" + ToString(index), "value");
+          auto res = Put("key" + std::to_string(index), "value");
           if (options.manual_wal_flush) {
             ASSERT_TRUE(res.ok());
             // we should see fs error when we do the flush
@@ -322,13 +322,13 @@ TEST_P(DBWriteTest, ManualWalFlushInEffe
   Options options = GetOptions();
   Reopen(options);
   // try the 1st WAL created during open
-  ASSERT_TRUE(Put("key" + ToString(0), "value").ok());
+  ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok());
   ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
   ASSERT_TRUE(dbfull()->FlushWAL(false).ok());
   ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty());
   // try the 2nd wal created during SwitchWAL
   ASSERT_OK(dbfull()->TEST_SwitchWAL());
-  ASSERT_TRUE(Put("key" + ToString(0), "value").ok());
+  ASSERT_TRUE(Put("key" + std::to_string(0), "value").ok());
   ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
   ASSERT_TRUE(dbfull()->FlushWAL(false).ok());
   ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty());
@@ -344,7 +344,7 @@ TEST_P(DBWriteTest, IOErrorOnWALWriteTri
     // Forcibly fail WAL write for the first Put only. Subsequent Puts should
     // fail due to read-only mode
     mock_env->SetFilesystemActive(i != 0);
-    auto res = Put("key" + ToString(i), "value");
+    auto res = Put("key" + std::to_string(i), "value");
     // TSAN reports a false alarm for lock-order-inversion but Open and
     // FlushWAL are not run concurrently. Disabling this until TSAN is
     // fixed.
@@ -398,14 +398,14 @@ TEST_P(DBWriteTest, LockWalInEffect) {
   Options options = GetOptions();
   Reopen(options);
   // try the 1st WAL created during open
-  ASSERT_OK(Put("key" + ToString(0), "value"));
+  ASSERT_OK(Put("key" + std::to_string(0), "value"));
   ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
   ASSERT_OK(dbfull()->LockWAL());
   ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false));
   ASSERT_OK(dbfull()->UnlockWAL());
   // try the 2nd wal created during SwitchWAL
   ASSERT_OK(dbfull()->TEST_SwitchWAL());
-  ASSERT_OK(Put("key" + ToString(0), "value"));
+  ASSERT_OK(Put("key" + std::to_string(0), "value"));
   ASSERT_TRUE(options.manual_wal_flush != dbfull()->TEST_WALBufferIsEmpty());
   ASSERT_OK(dbfull()->LockWAL());
   ASSERT_TRUE(dbfull()->TEST_WALBufferIsEmpty(false));
diff -pruN 7.2.2-5/db/deletefile_test.cc 7.3.1-2/db/deletefile_test.cc
--- 7.2.2-5/db/deletefile_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/deletefile_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -56,7 +56,7 @@ class DeleteFileTest : public DBTestBase
     options.sync = false;
     ReadOptions roptions;
     for (int i = startkey; i < (numkeys + startkey) ; i++) {
-      std::string temp = ToString(i);
+      std::string temp = std::to_string(i);
       Slice key(temp);
       Slice value(temp);
       ASSERT_OK(db_->Put(options, key, value));
diff -pruN 7.2.2-5/db/error_handler_fs_test.cc 7.3.1-2/db/error_handler_fs_test.cc
--- 7.2.2-5/db/error_handler_fs_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/error_handler_fs_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1583,11 +1583,11 @@ TEST_F(DBErrorHandlingFSTest, MultiDBCom
     std::string prop;
     ASSERT_EQ(listener[i]->WaitForRecovery(5000000), true);
     ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true));
-    EXPECT_TRUE(
-        db[i]->GetProperty("rocksdb.num-files-at-level" + ToString(0), &prop));
+    EXPECT_TRUE(db[i]->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(0), &prop));
     EXPECT_EQ(atoi(prop.c_str()), 0);
-    EXPECT_TRUE(
-        db[i]->GetProperty("rocksdb.num-files-at-level" + ToString(1), &prop));
+    EXPECT_TRUE(db[i]->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(1), &prop));
     EXPECT_EQ(atoi(prop.c_str()), 1);
   }
 
@@ -1720,11 +1720,11 @@ TEST_F(DBErrorHandlingFSTest, MultiDBVar
     if (i == 1) {
       ASSERT_OK(static_cast<DBImpl*>(db[i])->TEST_WaitForCompact(true));
     }
-    EXPECT_TRUE(
-        db[i]->GetProperty("rocksdb.num-files-at-level" + ToString(0), &prop));
+    EXPECT_TRUE(db[i]->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(0), &prop));
     EXPECT_EQ(atoi(prop.c_str()), 0);
-    EXPECT_TRUE(
-        db[i]->GetProperty("rocksdb.num-files-at-level" + ToString(1), &prop));
+    EXPECT_TRUE(db[i]->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(1), &prop));
     EXPECT_EQ(atoi(prop.c_str()), 1);
   }
 
diff -pruN 7.2.2-5/db/experimental.cc 7.3.1-2/db/experimental.cc
--- 7.2.2-5/db/experimental.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/experimental.cc	2022-06-08 21:08:16.000000000 +0000
@@ -112,7 +112,7 @@ Status UpdateManifestForFilesState(
                            lf->oldest_blob_file_number,
                            lf->oldest_ancester_time, lf->file_creation_time,
                            lf->file_checksum, lf->file_checksum_func_name,
-                           lf->min_timestamp, lf->max_timestamp);
+                           lf->min_timestamp, lf->max_timestamp, lf->unique_id);
             }
           }
         } else {
diff -pruN 7.2.2-5/db/external_sst_file_basic_test.cc 7.3.1-2/db/external_sst_file_basic_test.cc
--- 7.2.2-5/db/external_sst_file_basic_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/external_sst_file_basic_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -91,7 +91,7 @@ class ExternalSSTFileBasicTest
       bool write_global_seqno, bool verify_checksums_before_ingest,
       std::map<std::string, std::string>* true_data) {
     assert(value_types.size() == 1 || keys.size() == value_types.size());
-    std::string file_path = sst_files_dir_ + ToString(file_id);
+    std::string file_path = sst_files_dir_ + std::to_string(file_id);
     SstFileWriter sst_file_writer(EnvOptions(), options);
 
     Status s = sst_file_writer.Open(file_path);
@@ -123,7 +123,7 @@ class ExternalSSTFileBasicTest
     }
     for (size_t i = 0; i < keys.size(); i++) {
       std::string key = Key(keys[i]);
-      std::string value = Key(keys[i]) + ToString(file_id);
+      std::string value = Key(keys[i]) + std::to_string(file_id);
       ValueType value_type =
           (value_types.size() == 1 ? value_types[0] : value_types[i]);
       switch (value_type) {
@@ -190,10 +190,10 @@ class ExternalSSTFileBasicTest
 #ifndef ROCKSDB_LITE
   uint64_t GetSstSizeHelper(Temperature temperature) {
     std::string prop;
-    EXPECT_TRUE(
-        dbfull()->GetProperty(DB::Properties::kLiveSstFilesSizeAtTemperature +
-                                  ToString(static_cast<uint8_t>(temperature)),
-                              &prop));
+    EXPECT_TRUE(dbfull()->GetProperty(
+        DB::Properties::kLiveSstFilesSizeAtTemperature +
+            std::to_string(static_cast<uint8_t>(temperature)),
+        &prop));
     return static_cast<uint64_t>(std::atoi(prop.c_str()));
   }
 #endif  // ROCKSDB_LITE
@@ -1184,7 +1184,7 @@ TEST_F(ExternalSSTFileBasicTest, SyncFai
     std::unique_ptr<SstFileWriter> sst_file_writer(
         new SstFileWriter(EnvOptions(), sst_file_writer_options));
     std::string file_name =
-        sst_files_dir_ + "sync_failure_test_" + ToString(i) + ".sst";
+        sst_files_dir_ + "sync_failure_test_" + std::to_string(i) + ".sst";
     ASSERT_OK(sst_file_writer->Open(file_name));
     ASSERT_OK(sst_file_writer->Put("bar", "v2"));
     ASSERT_OK(sst_file_writer->Finish());
@@ -1514,13 +1514,13 @@ TEST_P(ExternalSSTFileBasicTest, IngestF
   EnvOptions env_options;
   do {
     Options options = CurrentOptions();
-    std::string file_path = sst_files_dir_ + ToString(file_id++);
+    std::string file_path = sst_files_dir_ + std::to_string(file_id++);
     SstFileWriter sst_file_writer(env_options, options);
     Status s = sst_file_writer.Open(file_path);
     ASSERT_OK(s);
     for (int i = 0; i != 100; ++i) {
       std::string key = Key(i);
-      std::string value = Key(i) + ToString(0);
+      std::string value = Key(i) + std::to_string(0);
       ASSERT_OK(sst_file_writer.Put(key, value));
     }
     ASSERT_OK(sst_file_writer.Finish());
@@ -1585,14 +1585,14 @@ TEST_P(ExternalSSTFileBasicTest, IngestE
   int file_id = 0;
   Random64 rand(time(nullptr));
   do {
-    std::string file_path = sst_files_dir_ + ToString(file_id++);
+    std::string file_path = sst_files_dir_ + std::to_string(file_id++);
     Options options = CurrentOptions();
     SstFileWriter sst_file_writer(EnvOptions(), options);
     Status s = sst_file_writer.Open(file_path);
     ASSERT_OK(s);
     for (int i = 0; i != 100; ++i) {
       std::string key = Key(i);
-      std::string value = Key(i) + ToString(0);
+      std::string value = Key(i) + std::to_string(0);
       ASSERT_OK(sst_file_writer.Put(key, value));
     }
     ASSERT_OK(sst_file_writer.Finish());
@@ -1799,7 +1799,7 @@ TEST_F(ExternalSSTFileBasicTest, IngestW
 TEST_F(ExternalSSTFileBasicTest, FailIfNotBottommostLevel) {
   Options options = GetDefaultOptions();
 
-  std::string file_path = sst_files_dir_ + ToString(1);
+  std::string file_path = sst_files_dir_ + std::to_string(1);
   SstFileWriter sfw(EnvOptions(), options);
 
   ASSERT_OK(sfw.Open(file_path));
@@ -1844,6 +1844,85 @@ TEST_F(ExternalSSTFileBasicTest, FailIfN
   }
 }
 
+TEST_F(ExternalSSTFileBasicTest, VerifyChecksum) {
+  const std::string kPutVal = "put_val";
+  const std::string kIngestedVal = "ingested_val";
+
+  ASSERT_OK(Put("k", kPutVal, WriteOptions()));
+  ASSERT_OK(Flush());
+
+  std::string external_file = sst_files_dir_ + "/file_to_ingest.sst";
+  {
+    SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()};
+
+    ASSERT_OK(sst_file_writer.Open(external_file));
+    ASSERT_OK(sst_file_writer.Put("k", kIngestedVal));
+    ASSERT_OK(sst_file_writer.Finish());
+  }
+
+  ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file},
+                                    IngestExternalFileOptions()));
+
+  ASSERT_OK(db_->VerifyChecksum());
+}
+
+TEST_F(ExternalSSTFileBasicTest, VerifySstUniqueId) {
+  const std::string kPutVal = "put_val";
+  const std::string kIngestedVal = "ingested_val";
+
+  ASSERT_OK(Put("k", kPutVal, WriteOptions()));
+  ASSERT_OK(Flush());
+
+  std::string external_file = sst_files_dir_ + "/file_to_ingest.sst";
+  {
+    SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()};
+
+    ASSERT_OK(sst_file_writer.Open(external_file));
+    ASSERT_OK(sst_file_writer.Put("k", kIngestedVal));
+    ASSERT_OK(sst_file_writer.Finish());
+  }
+
+  ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file},
+                                    IngestExternalFileOptions()));
+  auto options = CurrentOptions();
+  options.verify_sst_unique_id_in_manifest = true;
+  Reopen(options);
+
+  // Test ingest file without session_id and db_id (for example generated by an
+  // older version of sst_writer)
+  SyncPoint::GetInstance()->SetCallBack(
+      "PropertyBlockBuilder::AddTableProperty:Start", [&](void* props_vs) {
+        auto props = static_cast<TableProperties*>(props_vs);
+        // update table property session_id to a different one
+        props->db_session_id = "";
+        props->db_id = "";
+      });
+  std::atomic_int skipped = 0;
+  SyncPoint::GetInstance()->SetCallBack("Version::VerifySstUniqueIds::Skipped",
+                                        [&](void* /*arg*/) { skipped++; });
+  SyncPoint::GetInstance()->EnableProcessing();
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  external_file = sst_files_dir_ + "/file_to_ingest2.sst";
+  {
+    SstFileWriter sst_file_writer{EnvOptions(), CurrentOptions()};
+
+    ASSERT_OK(sst_file_writer.Open(external_file));
+    ASSERT_OK(sst_file_writer.Put("k", kIngestedVal));
+    ASSERT_OK(sst_file_writer.Finish());
+  }
+
+  ASSERT_OK(db_->IngestExternalFile(db_->DefaultColumnFamily(), {external_file},
+                                    IngestExternalFileOptions()));
+
+  options.statistics = CreateDBStatistics();
+  options.verify_sst_unique_id_in_manifest = true;
+  ASSERT_EQ(skipped, 0);
+  Reopen(options);
+  // only one sst file is not verified because of missing unique_id
+  ASSERT_EQ(skipped, 1);
+}
+
 INSTANTIATE_TEST_CASE_P(ExternalSSTFileBasicTest, ExternalSSTFileBasicTest,
                         testing::Values(std::make_tuple(true, true),
                                         std::make_tuple(true, false),
diff -pruN 7.2.2-5/db/external_sst_file_ingestion_job.cc 7.3.1-2/db/external_sst_file_ingestion_job.cc
--- 7.2.2-5/db/external_sst_file_ingestion_job.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/external_sst_file_ingestion_job.cc	2022-06-08 21:08:16.000000000 +0000
@@ -22,6 +22,7 @@
 #include "table/scoped_arena_iterator.h"
 #include "table/sst_file_writer_collectors.h"
 #include "table/table_builder.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "util/stop_watch.h"
 
@@ -142,6 +143,9 @@ Status ExternalSstFileIngestionJob::Prep
                  ingestion_options_.failed_move_fall_back_to_copy) {
         // Original file is on a different FS, use copy instead of hard linking.
         f.copy_file = true;
+        ROCKS_LOG_INFO(db_options_.info_log,
+                       "Triy to link file %s but it's not supported : %s",
+                       path_outside_db.c_str(), status.ToString().c_str());
       }
     } else {
       f.copy_file = true;
@@ -446,8 +450,8 @@ Status ExternalSstFileIngestionJob::Run(
         f.smallest_internal_key, f.largest_internal_key, f.assigned_seqno,
         f.assigned_seqno, false, f.file_temperature, kInvalidBlobFileNumber,
         oldest_ancester_time, current_time, f.file_checksum,
-        f.file_checksum_func_name, kDisableUserTimestamp,
-        kDisableUserTimestamp);
+        f.file_checksum_func_name, kDisableUserTimestamp, kDisableUserTimestamp,
+        f.unique_id);
     f_metadata.temperature = f.file_temperature;
     edit_.AddFile(f.picked_level, f_metadata);
   }
@@ -727,6 +731,16 @@ Status ExternalSstFileIngestionJob::GetI
 
   file_to_ingest->table_properties = *props;
 
+  auto s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                  props->orig_file_number,
+                                  &(file_to_ingest->unique_id));
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Failed to get SST unique id for file %s",
+                   file_to_ingest->internal_file_path.c_str());
+    file_to_ingest->unique_id = kNullUniqueId64x2;
+  }
+
   return status;
 }
 
diff -pruN 7.2.2-5/db/external_sst_file_ingestion_job.h 7.3.1-2/db/external_sst_file_ingestion_job.h
--- 7.2.2-5/db/external_sst_file_ingestion_job.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/external_sst_file_ingestion_job.h	2022-06-08 21:08:16.000000000 +0000
@@ -70,6 +70,8 @@ struct IngestedFileInfo {
   std::string file_checksum_func_name;
   // The temperature of the file to be ingested
   Temperature file_temperature = Temperature::kUnknown;
+  // Unique id of the file to be ingested
+  UniqueId64x2 unique_id{};
 };
 
 class ExternalSstFileIngestionJob {
diff -pruN 7.2.2-5/db/external_sst_file_test.cc 7.3.1-2/db/external_sst_file_test.cc
--- 7.2.2-5/db/external_sst_file_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/external_sst_file_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -118,7 +118,7 @@ class ExternalSSTFileTest
           });
       data.resize(uniq_iter - data.begin());
     }
-    std::string file_path = sst_files_dir_ + ToString(file_id);
+    std::string file_path = sst_files_dir_ + std::to_string(file_id);
     SstFileWriter sst_file_writer(EnvOptions(), options, cfh);
     Status s = sst_file_writer.Open(file_path);
     if (!s.ok()) {
@@ -172,7 +172,7 @@ class ExternalSSTFileTest
           });
       data.resize(uniq_iter - data.begin());
     }
-    std::string file_path = sst_files_dir_ + ToString(file_id);
+    std::string file_path = sst_files_dir_ + std::to_string(file_id);
     SstFileWriter sst_file_writer(EnvOptions(), options, cfh);
 
     Status s = sst_file_writer.Open(file_path);
@@ -270,7 +270,7 @@ class ExternalSSTFileTest
       ColumnFamilyHandle* cfh = nullptr) {
     std::vector<std::pair<std::string, std::string>> file_data;
     for (auto& k : keys) {
-      file_data.emplace_back(Key(k), Key(k) + ToString(file_id));
+      file_data.emplace_back(Key(k), Key(k) + std::to_string(file_id));
     }
     return GenerateAndAddExternalFile(options, file_data, file_id,
                                       allow_global_seqno, write_global_seqno,
@@ -966,7 +966,7 @@ TEST_F(ExternalSSTFileTest, MultiThreade
   // Generate file names
   std::vector<std::string> file_names;
   for (int i = 0; i < num_files; i++) {
-    std::string file_name = "file_" + ToString(i) + ".sst";
+    std::string file_name = "file_" + std::to_string(i) + ".sst";
     file_names.push_back(sst_files_dir_ + file_name);
   }
 
@@ -1116,7 +1116,7 @@ TEST_F(ExternalSSTFileTest, OverlappingR
       int range_end = key_ranges[i].second;
 
       Status s;
-      std::string range_val = "range_" + ToString(i);
+      std::string range_val = "range_" + std::to_string(i);
 
       // For 20% of ranges we use DB::Put, for 80% we use DB::AddFile
       if (i && i % 5 == 0) {
@@ -1456,7 +1456,7 @@ TEST_F(ExternalSSTFileTest, CompactDurin
     ASSERT_EQ(Get(Key(range_start)), Key(range_start)) << rid;
     ASSERT_EQ(Get(Key(range_end)), Key(range_end)) << rid;
     for (int k = range_start + 1; k < range_end; k++) {
-      std::string v = Key(k) + ToString(rid);
+      std::string v = Key(k) + std::to_string(rid);
       ASSERT_EQ(Get(Key(k)), v) << rid;
     }
   }
@@ -2405,7 +2405,7 @@ TEST_P(ExternalSSTBlockChecksumTest, DIS
     SstFileWriter sst_file_writer(EnvOptions(), options);
 
     // 2^32 - 1, will lead to data block with more than 2^32 bytes
-    size_t huge_size = port::kMaxUint32;
+    size_t huge_size = std::numeric_limits<uint32_t>::max();
 
     std::string f = sst_files_dir_ + "f.sst";
     ASSERT_OK(sst_file_writer.Open(f));
diff -pruN 7.2.2-5/db/file_indexer.h 7.3.1-2/db/file_indexer.h
--- 7.2.2-5/db/file_indexer.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/file_indexer.h	2022-06-08 21:08:16.000000000 +0000
@@ -58,10 +58,7 @@ class FileIndexer {
   void UpdateIndex(Arena* arena, const size_t num_levels,
                    std::vector<FileMetaData*>* const files);
 
-  enum {
-    // MSVC version 1800 still does not have constexpr for ::max()
-    kLevelMaxIndex = ROCKSDB_NAMESPACE::port::kMaxInt32
-  };
+  enum { kLevelMaxIndex = std::numeric_limits<int32_t>::max() };
 
  private:
   size_t num_levels_;
diff -pruN 7.2.2-5/db/flush_job.cc 7.3.1-2/db/flush_job.cc
--- 7.2.2-5/db/flush_job.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/flush_job.cc	2022-06-08 21:08:16.000000000 +0000
@@ -9,9 +9,8 @@
 
 #include "db/flush_job.h"
 
-#include <cinttypes>
-
 #include <algorithm>
+#include <cinttypes>
 #include <vector>
 
 #include "db/builder.h"
@@ -464,6 +463,7 @@ Status FlushJob::MemPurge() {
         env, ShouldReportDetailedTime(env, ioptions->stats),
         true /* internal key corruption is not ok */, range_del_agg.get(),
         nullptr, ioptions->allow_data_in_errors,
+        ioptions->enforce_single_del_contracts,
         /*compaction=*/nullptr, compaction_filter.get(),
         /*shutting_down=*/nullptr,
         /*manual_compaction_paused=*/nullptr,
@@ -809,6 +809,7 @@ Status FlushJob::WriteLevel0Table() {
 
   {
     auto write_hint = cfd_->CalculateSSTWriteHint(0);
+    Env::IOPriority io_priority = GetRateLimiterPriorityForWrite();
     db_mutex_->Unlock();
     if (log_buffer_) {
       log_buffer_->FlushBufferToLog();
@@ -924,16 +925,16 @@ Status FlushJob::WriteLevel0Table() {
           snapshot_checker_, mutable_cf_options_.paranoid_file_checks,
           cfd_->internal_stats(), &io_s, io_tracer_,
           BlobFileCreationReason::kFlush, event_logger_, job_context_->job_id,
-          Env::IO_HIGH, &table_properties_, write_hint, full_history_ts_low,
+          io_priority, &table_properties_, write_hint, full_history_ts_low,
           blob_callback_, &num_input_entries, &memtable_payload_bytes,
           &memtable_garbage_bytes);
       // TODO: Cleanup io_status in BuildTable and table builders
       assert(!s.ok() || io_s.ok());
       io_s.PermitUncheckedError();
       if (num_input_entries != total_num_entries && s.ok()) {
-        std::string msg = "Expected " + ToString(total_num_entries) +
+        std::string msg = "Expected " + std::to_string(total_num_entries) +
                           " entries in memtables, but read " +
-                          ToString(num_input_entries);
+                          std::to_string(num_input_entries);
         ROCKS_LOG_WARN(db_options_.info_log, "[%s] [JOB %d] Level-0 flush %s",
                        cfd_->GetName().c_str(), job_context_->job_id,
                        msg.c_str());
@@ -950,14 +951,14 @@ Status FlushJob::WriteLevel0Table() {
       }
       LogFlush(db_options_.info_log);
     }
-    ROCKS_LOG_INFO(db_options_.info_log,
-                   "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64
-                   " bytes %s"
-                   "%s",
-                   cfd_->GetName().c_str(), job_context_->job_id,
-                   meta_.fd.GetNumber(), meta_.fd.GetFileSize(),
-                   s.ToString().c_str(),
-                   meta_.marked_for_compaction ? " (needs compaction)" : "");
+    ROCKS_LOG_BUFFER(log_buffer_,
+                     "[%s] [JOB %d] Level-0 flush table #%" PRIu64 ": %" PRIu64
+                     " bytes %s"
+                     "%s",
+                     cfd_->GetName().c_str(), job_context_->job_id,
+                     meta_.fd.GetNumber(), meta_.fd.GetFileSize(),
+                     s.ToString().c_str(),
+                     meta_.marked_for_compaction ? " (needs compaction)" : "");
 
     if (s.ok() && output_file_directory_ != nullptr && sync_output_directory_) {
       s = output_file_directory_->FsyncWithDirOptions(
@@ -987,7 +988,7 @@ Status FlushJob::WriteLevel0Table() {
                    meta_.oldest_blob_file_number, meta_.oldest_ancester_time,
                    meta_.file_creation_time, meta_.file_checksum,
                    meta_.file_checksum_func_name, meta_.min_timestamp,
-                   meta_.max_timestamp);
+                   meta_.max_timestamp, meta_.unique_id);
 
     edit_->SetBlobFileAdditions(std::move(blob_file_additions));
   }
@@ -1031,6 +1032,19 @@ Status FlushJob::WriteLevel0Table() {
   return s;
 }
 
+Env::IOPriority FlushJob::GetRateLimiterPriorityForWrite() {
+  if (versions_ && versions_->GetColumnFamilySet() &&
+      versions_->GetColumnFamilySet()->write_controller()) {
+    WriteController* write_controller =
+        versions_->GetColumnFamilySet()->write_controller();
+    if (write_controller->IsStopped() || write_controller->NeedsDelay()) {
+      return Env::IO_USER;
+    }
+  }
+
+  return Env::IO_HIGH;
+}
+
 #ifndef ROCKSDB_LITE
 std::unique_ptr<FlushJobInfo> FlushJob::GetFlushJobInfo() const {
   db_mutex_->AssertHeld();
@@ -1063,7 +1077,6 @@ std::unique_ptr<FlushJobInfo> FlushJob::
   }
   return info;
 }
-
 #endif  // !ROCKSDB_LITE
 
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/db/flush_job.h 7.3.1-2/db/flush_job.h
--- 7.2.2-5/db/flush_job.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/flush_job.h	2022-06-08 21:08:16.000000000 +0000
@@ -94,6 +94,8 @@ class FlushJob {
 #endif  // !ROCKSDB_LITE
 
  private:
+  friend class FlushJobTest_GetRateLimiterPriorityForWrite_Test;
+
   void ReportStartedFlush();
   void ReportFlushInputSize(const autovector<MemTable*>& mems);
   void RecordFlushIOStats();
@@ -121,6 +123,8 @@ class FlushJob {
   // process has not matured yet.
   Status MemPurge();
   bool MemPurgeDecider();
+  // The rate limiter priority (io_priority) is determined dynamically here.
+  Env::IOPriority GetRateLimiterPriorityForWrite();
 #ifndef ROCKSDB_LITE
   std::unique_ptr<FlushJobInfo> GetFlushJobInfo() const;
 #endif  // !ROCKSDB_LITE
diff -pruN 7.2.2-5/db/flush_job_test.cc 7.3.1-2/db/flush_job_test.cc
--- 7.2.2-5/db/flush_job_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/flush_job_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -164,12 +164,12 @@ TEST_F(FlushJobTest, Empty) {
   SnapshotChecker* snapshot_checker = nullptr;  // not relavant
   FlushJob flush_job(
       dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
-      *cfd->GetLatestMutableCFOptions(), port::kMaxUint64 /* memtable_id */,
-      env_options_, versions_.get(), &mutex_, &shutting_down_, {},
-      kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr,
-      nullptr, kNoCompression, nullptr, &event_logger, false,
-      true /* sync_output_directory */, true /* write_manifest */,
-      Env::Priority::USER, nullptr /*IOTracer*/);
+      *cfd->GetLatestMutableCFOptions(),
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      nullptr, &event_logger, false, true /* sync_output_directory */,
+      true /* write_manifest */, Env::Priority::USER, nullptr /*IOTracer*/);
   {
     InstrumentedMutexLock l(&mutex_);
     flush_job.PickMemTable();
@@ -191,7 +191,7 @@ TEST_F(FlushJobTest, NonEmpty) {
   //   range-delete "9995" -> "9999" at seqno 10000
   //   blob references with seqnos 10001..10006
   for (int i = 1; i < 10000; ++i) {
-    std::string key(ToString((i + 1000) % 10000));
+    std::string key(std::to_string((i + 1000) % 10000));
     std::string value("value" + key);
     ASSERT_OK(new_mem->Add(SequenceNumber(i), kTypeValue, key, value,
                            nullptr /* kv_prot_info */));
@@ -214,7 +214,7 @@ TEST_F(FlushJobTest, NonEmpty) {
   constexpr std::array<uint64_t, 6> blob_file_numbers{{
       kInvalidBlobFileNumber, 5, 103, 17, 102, 101}};
   for (size_t i = 0; i < blob_file_numbers.size(); ++i) {
-    std::string key(ToString(i + 10001));
+    std::string key(std::to_string(i + 10001));
     std::string blob_index;
     if (i == 0) {
       BlobIndex::EncodeInlinedTTL(&blob_index, /* expiration */ 1234567890ULL,
@@ -248,11 +248,12 @@ TEST_F(FlushJobTest, NonEmpty) {
   SnapshotChecker* snapshot_checker = nullptr;  // not relavant
   FlushJob flush_job(
       dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
-      *cfd->GetLatestMutableCFOptions(), port::kMaxUint64 /* memtable_id */,
-      env_options_, versions_.get(), &mutex_, &shutting_down_, {},
-      kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr,
-      nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
-      true, true /* sync_output_directory */, true /* write_manifest */,
+      *cfd->GetLatestMutableCFOptions(),
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      db_options_.statistics.get(), &event_logger, true,
+      true /* sync_output_directory */, true /* write_manifest */,
       Env::Priority::USER, nullptr /*IOTracer*/);
 
   HistogramData hist;
@@ -264,7 +265,7 @@ TEST_F(FlushJobTest, NonEmpty) {
   db_options_.statistics->histogramData(FLUSH_TIME, &hist);
   ASSERT_GT(hist.average, 0.0);
 
-  ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString());
+  ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
   ASSERT_EQ("9999a", file_meta.largest.user_key().ToString());
   ASSERT_EQ(1, file_meta.fd.smallest_seqno);
   ASSERT_EQ(10006, file_meta.fd.largest_seqno);
@@ -290,7 +291,7 @@ TEST_F(FlushJobTest, FlushMemTablesSingl
     memtable_ids.push_back(mem->GetID());
 
     for (size_t j = 0; j < num_keys_per_table; ++j) {
-      std::string key(ToString(j + i * num_keys_per_table));
+      std::string key(std::to_string(j + i * num_keys_per_table));
       std::string value("value" + key);
       ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue,
                          key, value, nullptr /* kv_prot_info */));
@@ -325,7 +326,7 @@ TEST_F(FlushJobTest, FlushMemTablesSingl
   db_options_.statistics->histogramData(FLUSH_TIME, &hist);
   ASSERT_GT(hist.average, 0.0);
 
-  ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString());
+  ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
   ASSERT_EQ("99", file_meta.largest.user_key().ToString());
   ASSERT_EQ(0, file_meta.fd.smallest_seqno);
   ASSERT_EQ(SequenceNumber(num_mems_to_flush * num_keys_per_table - 1),
@@ -363,7 +364,7 @@ TEST_F(FlushJobTest, FlushMemtablesMulti
       mem->Ref();
 
       for (size_t j = 0; j != num_keys_per_memtable; ++j) {
-        std::string key(ToString(j + i * num_keys_per_memtable));
+        std::string key(std::to_string(j + i * num_keys_per_memtable));
         std::string value("value" + key);
         ASSERT_OK(mem->Add(curr_seqno++, kTypeValue, key, value,
                            nullptr /* kv_prot_info */));
@@ -438,7 +439,7 @@ TEST_F(FlushJobTest, FlushMemtablesMulti
   ASSERT_GT(hist.average, 0.0);
   k = 0;
   for (const auto& file_meta : file_metas) {
-    ASSERT_EQ(ToString(0), file_meta.smallest.user_key().ToString());
+    ASSERT_EQ(std::to_string(0), file_meta.smallest.user_key().ToString());
     ASSERT_EQ("999", file_meta.largest.user_key()
                          .ToString());  // max key by bytewise comparator
     ASSERT_EQ(smallest_seqs[k], file_meta.fd.smallest_seqno);
@@ -479,7 +480,7 @@ TEST_F(FlushJobTest, Snapshots) {
   SequenceNumber current_seqno = 0;
   auto inserted_keys = mock::MakeMockFile();
   for (int i = 1; i < keys; ++i) {
-    std::string key(ToString(i));
+    std::string key(std::to_string(i));
     int insertions = rnd.Uniform(max_inserts_per_keys);
     for (int j = 0; j < insertions; ++j) {
       std::string value(rnd.HumanReadableString(10));
@@ -509,11 +510,12 @@ TEST_F(FlushJobTest, Snapshots) {
   SnapshotChecker* snapshot_checker = nullptr;  // not relavant
   FlushJob flush_job(
       dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
-      *cfd->GetLatestMutableCFOptions(), port::kMaxUint64 /* memtable_id */,
-      env_options_, versions_.get(), &mutex_, &shutting_down_, snapshots,
-      kMaxSequenceNumber, snapshot_checker, &job_context, nullptr, nullptr,
-      nullptr, kNoCompression, db_options_.statistics.get(), &event_logger,
-      true, true /* sync_output_directory */, true /* write_manifest */,
+      *cfd->GetLatestMutableCFOptions(),
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      db_options_.statistics.get(), &event_logger, true,
+      true /* sync_output_directory */, true /* write_manifest */,
       Env::Priority::USER, nullptr /*IOTracer*/);
   mutex_.Lock();
   flush_job.PickMemTable();
@@ -526,6 +528,72 @@ TEST_F(FlushJobTest, Snapshots) {
   job_context.Clean();
 }
 
+TEST_F(FlushJobTest, GetRateLimiterPriorityForWrite) {
+  // Prepare a FlushJob that flush MemTables of Single Column Family.
+  const size_t num_mems = 2;
+  const size_t num_mems_to_flush = 1;
+  const size_t num_keys_per_table = 100;
+  JobContext job_context(0);
+  ColumnFamilyData* cfd = versions_->GetColumnFamilySet()->GetDefault();
+  std::vector<uint64_t> memtable_ids;
+  std::vector<MemTable*> new_mems;
+  for (size_t i = 0; i != num_mems; ++i) {
+    MemTable* mem = cfd->ConstructNewMemtable(*cfd->GetLatestMutableCFOptions(),
+                                              kMaxSequenceNumber);
+    mem->SetID(i);
+    mem->Ref();
+    new_mems.emplace_back(mem);
+    memtable_ids.push_back(mem->GetID());
+
+    for (size_t j = 0; j < num_keys_per_table; ++j) {
+      std::string key(std::to_string(j + i * num_keys_per_table));
+      std::string value("value" + key);
+      ASSERT_OK(mem->Add(SequenceNumber(j + i * num_keys_per_table), kTypeValue,
+                         key, value, nullptr /* kv_prot_info */));
+    }
+  }
+
+  autovector<MemTable*> to_delete;
+  for (auto mem : new_mems) {
+    cfd->imm()->Add(mem, &to_delete);
+  }
+
+  EventLogger event_logger(db_options_.info_log.get());
+  SnapshotChecker* snapshot_checker = nullptr;  // not relavant
+
+  assert(memtable_ids.size() == num_mems);
+  uint64_t smallest_memtable_id = memtable_ids.front();
+  uint64_t flush_memtable_id = smallest_memtable_id + num_mems_to_flush - 1;
+  FlushJob flush_job(
+      dbname_, versions_->GetColumnFamilySet()->GetDefault(), db_options_,
+      *cfd->GetLatestMutableCFOptions(), flush_memtable_id, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, {}, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      db_options_.statistics.get(), &event_logger, true,
+      true /* sync_output_directory */, true /* write_manifest */,
+      Env::Priority::USER, nullptr /*IOTracer*/);
+
+  // When the state from WriteController is normal.
+  ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_HIGH);
+
+  WriteController* write_controller =
+      flush_job.versions_->GetColumnFamilySet()->write_controller();
+
+  {
+    // When the state from WriteController is Delayed.
+    std::unique_ptr<WriteControllerToken> delay_token =
+        write_controller->GetDelayToken(1000000);
+    ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER);
+  }
+
+  {
+    // When the state from WriteController is Stopped.
+    std::unique_ptr<WriteControllerToken> stop_token =
+        write_controller->GetStopToken();
+    ASSERT_EQ(flush_job.GetRateLimiterPriorityForWrite(), Env::IO_USER);
+  }
+}
+
 class FlushJobTimestampTest : public FlushJobTestBase {
  public:
   FlushJobTimestampTest()
@@ -577,9 +645,9 @@ TEST_F(FlushJobTimestampTest, AllKeysExp
   PutFixed64(&full_history_ts_low, std::numeric_limits<uint64_t>::max());
   FlushJob flush_job(
       dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
-      port::kMaxUint64 /* memtable_id */, env_options_, versions_.get(),
-      &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, snapshot_checker,
-      &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
       db_options_.statistics.get(), &event_logger, true,
       true /* sync_output_directory */, true /* write_manifest */,
       Env::Priority::USER, nullptr /*IOTracer*/, /*db_id=*/"",
@@ -628,9 +696,9 @@ TEST_F(FlushJobTimestampTest, NoKeyExpir
   PutFixed64(&full_history_ts_low, 0);
   FlushJob flush_job(
       dbname_, cfd, db_options_, *cfd->GetLatestMutableCFOptions(),
-      port::kMaxUint64 /* memtable_id */, env_options_, versions_.get(),
-      &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber, snapshot_checker,
-      &job_context, nullptr, nullptr, nullptr, kNoCompression,
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, env_options_,
+      versions_.get(), &mutex_, &shutting_down_, snapshots, kMaxSequenceNumber,
+      snapshot_checker, &job_context, nullptr, nullptr, nullptr, kNoCompression,
       db_options_.statistics.get(), &event_logger, true,
       true /* sync_output_directory */, true /* write_manifest */,
       Env::Priority::USER, nullptr /*IOTracer*/, /*db_id=*/"",
diff -pruN 7.2.2-5/db/forward_iterator.cc 7.3.1-2/db/forward_iterator.cc
--- 7.2.2-5/db/forward_iterator.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/forward_iterator.cc	2022-06-08 21:08:16.000000000 +0000
@@ -604,7 +604,7 @@ bool ForwardIterator::PrepareValue() {
 Status ForwardIterator::GetProperty(std::string prop_name, std::string* prop) {
   assert(prop != nullptr);
   if (prop_name == "rocksdb.iterator.super-version-number") {
-    *prop = ToString(sv_->version_number);
+    *prop = std::to_string(sv_->version_number);
     return Status::OK();
   }
   return Status::InvalidArgument();
diff -pruN 7.2.2-5/db/import_column_family_job.cc 7.3.1-2/db/import_column_family_job.cc
--- 7.2.2-5/db/import_column_family_job.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/import_column_family_job.cc	2022-06-08 21:08:16.000000000 +0000
@@ -15,6 +15,7 @@
 #include "table/scoped_arena_iterator.h"
 #include "table/sst_file_writer_collectors.h"
 #include "table/table_builder.h"
+#include "table/unique_id_impl.h"
 #include "util/stop_watch.h"
 
 namespace ROCKSDB_NAMESPACE {
@@ -97,6 +98,9 @@ Status ImportColumnFamilyJob::Prepare(ui
       if (status.IsNotSupported()) {
         // Original file is on a different FS, use copy instead of hard linking
         hardlink_files = false;
+        ROCKS_LOG_INFO(db_options_.info_log,
+                       "Try to link file %s but it's not supported : %s",
+                       f.internal_file_path.c_str(), status.ToString().c_str());
       }
     }
     if (!hardlink_files) {
@@ -156,7 +160,7 @@ Status ImportColumnFamilyJob::Run() {
                   file_metadata.largest_seqno, false, file_metadata.temperature,
                   kInvalidBlobFileNumber, oldest_ancester_time, current_time,
                   kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                  kDisableUserTimestamp, kDisableUserTimestamp);
+                  kDisableUserTimestamp, kDisableUserTimestamp, f.unique_id);
 
     // If incoming sequence number is higher, update local sequence number.
     if (file_metadata.largest_seqno > versions_->LastSequence()) {
@@ -285,6 +289,15 @@ Status ImportColumnFamilyJob::GetIngeste
 
   file_to_import->table_properties = *props;
 
+  auto s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                  props->orig_file_number,
+                                  &(file_to_import->unique_id));
+  if (!s.ok()) {
+    ROCKS_LOG_WARN(db_options_.info_log,
+                   "Failed to get SST unique id for file %s",
+                   file_to_import->internal_file_path.c_str());
+  }
+
   return status;
 }
 
diff -pruN 7.2.2-5/db/import_column_family_test.cc 7.3.1-2/db/import_column_family_test.cc
--- 7.2.2-5/db/import_column_family_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/import_column_family_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -130,6 +130,12 @@ TEST_F(ImportColumnFamilyTest, ImportSST
     ASSERT_OK(db_->Get(ReadOptions(), import_cfh_, "K4", &value));
     ASSERT_EQ(value, "V2");
   }
+  EXPECT_OK(db_->DestroyColumnFamilyHandle(import_cfh_));
+  import_cfh_ = nullptr;
+
+  // verify sst unique id during reopen
+  options.verify_sst_unique_id_in_manifest = true;
+  ReopenWithColumnFamilies({"default", "koko", "yoyo"}, options);
 }
 
 TEST_F(ImportColumnFamilyTest, ImportSSTFileWriterFilesWithOverlap) {
diff -pruN 7.2.2-5/db/internal_stats.cc 7.3.1-2/db/internal_stats.cc
--- 7.2.2-5/db/internal_stats.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/internal_stats.cc	2022-06-08 21:08:16.000000000 +0000
@@ -704,20 +704,19 @@ void InternalStats::CacheEntryRoleStats:
   auto& v = *values;
   v[BlockCacheEntryStatsMapKeys::CacheId()] = cache_id;
   v[BlockCacheEntryStatsMapKeys::CacheCapacityBytes()] =
-      ROCKSDB_NAMESPACE::ToString(cache_capacity);
+      std::to_string(cache_capacity);
   v[BlockCacheEntryStatsMapKeys::LastCollectionDurationSeconds()] =
-      ROCKSDB_NAMESPACE::ToString(GetLastDurationMicros() / 1000000.0);
+      std::to_string(GetLastDurationMicros() / 1000000.0);
   v[BlockCacheEntryStatsMapKeys::LastCollectionAgeSeconds()] =
-      ROCKSDB_NAMESPACE::ToString((clock->NowMicros() - last_end_time_micros_) /
-                                  1000000U);
+      std::to_string((clock->NowMicros() - last_end_time_micros_) / 1000000U);
   for (size_t i = 0; i < kNumCacheEntryRoles; ++i) {
     auto role = static_cast<CacheEntryRole>(i);
     v[BlockCacheEntryStatsMapKeys::EntryCount(role)] =
-        ROCKSDB_NAMESPACE::ToString(entry_counts[i]);
+        std::to_string(entry_counts[i]);
     v[BlockCacheEntryStatsMapKeys::UsedBytes(role)] =
-        ROCKSDB_NAMESPACE::ToString(total_charges[i]);
+        std::to_string(total_charges[i]);
     v[BlockCacheEntryStatsMapKeys::UsedPercent(role)] =
-        ROCKSDB_NAMESPACE::ToString(100.0 * total_charges[i] / cache_capacity);
+        std::to_string(100.0 * total_charges[i] / cache_capacity);
   }
 }
 
@@ -763,7 +762,7 @@ bool InternalStats::HandleLiveSstFilesSi
     }
   }
 
-  *value = ToString(size);
+  *value = std::to_string(size);
   return true;
 }
 
@@ -919,7 +918,7 @@ bool InternalStats::HandleCompressionRat
   if (!ok || level >= static_cast<uint64_t>(number_levels_)) {
     return false;
   }
-  *value = ToString(
+  *value = std::to_string(
       vstorage->GetEstimatedCompressionRatioAtLevel(static_cast<int>(level)));
   return true;
 }
@@ -1006,7 +1005,7 @@ static std::map<std::string, std::string
     const std::map<std::string, uint64_t>& from) {
   std::map<std::string, std::string> to;
   for (const auto& e : from) {
-    to[e.first] = ToString(e.second);
+    to[e.first] = std::to_string(e.second);
   }
   return to;
 }
@@ -1500,7 +1499,7 @@ void InternalStats::DumpCFMapStats(
   DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
   for (auto const& level_ent : levels_stats) {
     auto level_str =
-        level_ent.first == -1 ? "Sum" : "L" + ToString(level_ent.first);
+        level_ent.first == -1 ? "Sum" : "L" + std::to_string(level_ent.first);
     for (auto const& stat_ent : level_ent.second) {
       auto stat_type = stat_ent.first;
       auto key_str =
@@ -1651,7 +1650,8 @@ void InternalStats::DumpCFStatsNoFileHis
   DumpCFMapStats(vstorage, &levels_stats, &compaction_stats_sum);
   for (int l = 0; l < number_levels_; ++l) {
     if (levels_stats.find(l) != levels_stats.end()) {
-      PrintLevelStats(buf, sizeof(buf), "L" + ToString(l), levels_stats[l]);
+      PrintLevelStats(buf, sizeof(buf), "L" + std::to_string(l),
+                      levels_stats[l]);
       value->append(buf);
     }
   }
diff -pruN 7.2.2-5/db/listener_test.cc 7.3.1-2/db/listener_test.cc
--- 7.2.2-5/db/listener_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/listener_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -436,10 +436,10 @@ TEST_F(EventListenerTest, MultiDBMultiLi
   std::vector<std::vector<ColumnFamilyHandle *>> vec_handles;
 
   for (int d = 0; d < kNumDBs; ++d) {
-    ASSERT_OK(DestroyDB(dbname_ + ToString(d), options));
+    ASSERT_OK(DestroyDB(dbname_ + std::to_string(d), options));
     DB* db;
     std::vector<ColumnFamilyHandle*> handles;
-    ASSERT_OK(DB::Open(options, dbname_ + ToString(d), &db));
+    ASSERT_OK(DB::Open(options, dbname_ + std::to_string(d), &db));
     for (size_t c = 0; c < cf_names.size(); ++c) {
       ColumnFamilyHandle* handle;
       ASSERT_OK(db->CreateColumnFamily(cf_opts, cf_names[c], &handle));
@@ -527,7 +527,8 @@ TEST_F(EventListenerTest, DisableBGCompa
   // keep writing until writes are forced to stop.
   for (int i = 0; static_cast<int>(cf_meta.file_count) < kSlowdownTrigger * 10;
        ++i) {
-    ASSERT_OK(Put(1, ToString(i), std::string(10000, 'x'), WriteOptions()));
+    ASSERT_OK(
+        Put(1, std::to_string(i), std::string(10000, 'x'), WriteOptions()));
     FlushOptions fo;
     fo.allow_write_stall = true;
     ASSERT_OK(db_->Flush(fo, handles_[1]));
diff -pruN 7.2.2-5/db/memtable.cc 7.3.1-2/db/memtable.cc
--- 7.2.2-5/db/memtable.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/memtable.cc	2022-06-08 21:08:16.000000000 +0000
@@ -140,8 +140,8 @@ size_t MemTable::ApproximateMemoryUsage(
   for (size_t usage : usages) {
     // If usage + total_usage >= kMaxSizet, return kMaxSizet.
     // the following variation is to avoid numeric overflow.
-    if (usage >= port::kMaxSizet - total_usage) {
-      return port::kMaxSizet;
+    if (usage >= std::numeric_limits<size_t>::max() - total_usage) {
+      return std::numeric_limits<size_t>::max();
     }
     total_usage += usage;
   }
diff -pruN 7.2.2-5/db/memtable_list.h 7.3.1-2/db/memtable_list.h
--- 7.2.2-5/db/memtable_list.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/memtable_list.h	2022-06-08 21:08:16.000000000 +0000
@@ -315,7 +315,7 @@ class MemTableList {
   // PickMemtablesToFlush() is called.
   void FlushRequested() {
     flush_requested_ = true;
-    // If there are some memtables stored in imm() that dont trigger
+    // If there are some memtables stored in imm() that don't trigger
     // flush (eg: mempurge output memtable), then update imm_flush_needed.
     // Note: if race condition and imm_flush_needed is set to true
     // when there is num_flush_not_started_==0, then there is no
diff -pruN 7.2.2-5/db/memtable_list_test.cc 7.3.1-2/db/memtable_list_test.cc
--- 7.2.2-5/db/memtable_list_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/memtable_list_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -209,7 +209,8 @@ TEST_F(MemTableListTest, Empty) {
   ASSERT_FALSE(list.IsFlushPending());
 
   autovector<MemTable*> mems;
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &mems);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &mems);
   ASSERT_EQ(0, mems.size());
 
   autovector<MemTable*> to_delete;
@@ -418,7 +419,8 @@ TEST_F(MemTableListTest, GetFromHistoryT
   // Flush this memtable from the list.
   // (It will then be a part of the memtable history).
   autovector<MemTable*> to_flush;
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(1, to_flush.size());
 
   MutableCFOptions mutable_cf_options(options);
@@ -472,7 +474,8 @@ TEST_F(MemTableListTest, GetFromHistoryT
   ASSERT_EQ(0, to_delete.size());
 
   to_flush.clear();
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(1, to_flush.size());
 
   // Flush second memtable
@@ -575,15 +578,15 @@ TEST_F(MemTableListTest, FlushPendingTes
     std::string value;
     MergeContext merge_context;
 
-    ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", ToString(i),
+    ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", std::to_string(i),
                        nullptr /* kv_prot_info */));
-    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN",
+    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + std::to_string(i), "valueN",
                        nullptr /* kv_prot_info */));
-    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value",
+    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + std::to_string(i), "value",
                        nullptr /* kv_prot_info */));
-    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM",
+    ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + std::to_string(i), "valueM",
                        nullptr /* kv_prot_info */));
-    ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), "",
+    ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + std::to_string(i), "",
                        nullptr /* kv_prot_info */));
 
     tables.push_back(mem);
@@ -593,7 +596,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_FALSE(list.IsFlushPending());
   ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire));
   autovector<MemTable*> to_flush;
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(0, to_flush.size());
 
   // Request a flush even though there is nothing to flush
@@ -602,7 +606,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire));
 
   // Attempt to 'flush' to clear request for flush
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(0, to_flush.size());
   ASSERT_FALSE(list.IsFlushPending());
   ASSERT_FALSE(list.imm_flush_needed.load(std::memory_order_acquire));
@@ -626,7 +631,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire));
 
   // Pick tables to flush
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(2, to_flush.size());
   ASSERT_EQ(2, list.NumNotFlushed());
   ASSERT_FALSE(list.IsFlushPending());
@@ -647,7 +653,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_EQ(0, to_delete.size());
 
   // Pick tables to flush
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   ASSERT_EQ(3, to_flush.size());
   ASSERT_EQ(3, list.NumNotFlushed());
   ASSERT_FALSE(list.IsFlushPending());
@@ -655,7 +662,8 @@ TEST_F(MemTableListTest, FlushPendingTes
 
   // Pick tables to flush again
   autovector<MemTable*> to_flush2;
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush2);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush2);
   ASSERT_EQ(0, to_flush2.size());
   ASSERT_EQ(3, list.NumNotFlushed());
   ASSERT_FALSE(list.IsFlushPending());
@@ -673,7 +681,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_TRUE(list.imm_flush_needed.load(std::memory_order_acquire));
 
   // Pick tables to flush again
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush2);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush2);
   ASSERT_EQ(1, to_flush2.size());
   ASSERT_EQ(4, list.NumNotFlushed());
   ASSERT_FALSE(list.IsFlushPending());
@@ -694,7 +703,8 @@ TEST_F(MemTableListTest, FlushPendingTes
   ASSERT_EQ(0, to_delete.size());
 
   // Pick tables to flush
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush);
   // Should pick 4 of 5 since 1 table has been picked in to_flush2
   ASSERT_EQ(4, to_flush.size());
   ASSERT_EQ(5, list.NumNotFlushed());
@@ -703,7 +713,8 @@ TEST_F(MemTableListTest, FlushPendingTes
 
   // Pick tables to flush again
   autovector<MemTable*> to_flush3;
-  list.PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */, &to_flush3);
+  list.PickMemtablesToFlush(
+      std::numeric_limits<uint64_t>::max() /* memtable_id */, &to_flush3);
   ASSERT_EQ(0, to_flush3.size());  // nothing not in progress of being flushed
   ASSERT_EQ(5, list.NumNotFlushed());
   ASSERT_FALSE(list.IsFlushPending());
@@ -849,15 +860,15 @@ TEST_F(MemTableListTest, AtomicFlusTest)
 
       std::string value;
 
-      ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", ToString(i),
+      ASSERT_OK(mem->Add(++seq, kTypeValue, "key1", std::to_string(i),
                          nullptr /* kv_prot_info */));
-      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + ToString(i), "valueN",
+      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyN" + std::to_string(i),
+                         "valueN", nullptr /* kv_prot_info */));
+      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + std::to_string(i), "value",
                          nullptr /* kv_prot_info */));
-      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyX" + ToString(i), "value",
-                         nullptr /* kv_prot_info */));
-      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + ToString(i), "valueM",
-                         nullptr /* kv_prot_info */));
-      ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + ToString(i), "",
+      ASSERT_OK(mem->Add(++seq, kTypeValue, "keyM" + std::to_string(i),
+                         "valueM", nullptr /* kv_prot_info */));
+      ASSERT_OK(mem->Add(++seq, kTypeDeletion, "keyX" + std::to_string(i), "",
                          nullptr /* kv_prot_info */));
 
       elem.push_back(mem);
@@ -872,8 +883,9 @@ TEST_F(MemTableListTest, AtomicFlusTest)
     auto* list = lists[i];
     ASSERT_FALSE(list->IsFlushPending());
     ASSERT_FALSE(list->imm_flush_needed.load(std::memory_order_acquire));
-    list->PickMemtablesToFlush(port::kMaxUint64 /* memtable_id */,
-                               &flush_candidates[i]);
+    list->PickMemtablesToFlush(
+        std::numeric_limits<uint64_t>::max() /* memtable_id */,
+        &flush_candidates[i]);
     ASSERT_EQ(0, flush_candidates[i].size());
   }
   // Request flush even though there is nothing to flush
diff -pruN 7.2.2-5/db/obsolete_files_test.cc 7.3.1-2/db/obsolete_files_test.cc
--- 7.2.2-5/db/obsolete_files_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/obsolete_files_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -41,7 +41,7 @@ class ObsoleteFilesTest : public DBTestB
     WriteOptions options;
     options.sync = false;
     for (int i = startkey; i < (numkeys + startkey) ; i++) {
-      std::string temp = ToString(i);
+      std::string temp = std::to_string(i);
       Slice key(temp);
       Slice value(temp);
       ASSERT_OK(db_->Put(options, key, value));
diff -pruN 7.2.2-5/db/perf_context_test.cc 7.3.1-2/db/perf_context_test.cc
--- 7.2.2-5/db/perf_context_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/perf_context_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -75,21 +75,21 @@ TEST_F(PerfContextTest, SeekIntoDeletion
   ReadOptions read_options;
 
   for (int i = 0; i < FLAGS_total_keys; ++i) {
-    std::string key = "k" + ToString(i);
-    std::string value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string value = "v" + std::to_string(i);
 
     ASSERT_OK(db->Put(write_options, key, value));
   }
 
   for (int i = 0; i < FLAGS_total_keys -1 ; ++i) {
-    std::string key = "k" + ToString(i);
+    std::string key = "k" + std::to_string(i);
     ASSERT_OK(db->Delete(write_options, key));
   }
 
   HistogramImpl hist_get;
   HistogramImpl hist_get_time;
   for (int i = 0; i < FLAGS_total_keys - 1; ++i) {
-    std::string key = "k" + ToString(i);
+    std::string key = "k" + std::to_string(i);
     std::string value;
 
     get_perf_context()->Reset();
@@ -130,7 +130,7 @@ TEST_F(PerfContextTest, SeekIntoDeletion
   HistogramImpl hist_seek;
   for (int i = 0; i < FLAGS_total_keys; ++i) {
     std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
-    std::string key = "k" + ToString(i);
+    std::string key = "k" + std::to_string(i);
 
     get_perf_context()->Reset();
     StopWatchNano timer(SystemClock::Default().get(), true);
@@ -265,8 +265,8 @@ void ProfileQueries(bool enabled_time =
       continue;
     }
 
-    std::string key = "k" + ToString(i);
-    std::string value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string value = "v" + std::to_string(i);
 
     std::vector<std::string> values;
 
@@ -297,8 +297,8 @@ void ProfileQueries(bool enabled_time =
     if (i == kFlushFlag) {
       continue;
     }
-    std::string key = "k" + ToString(i);
-    std::string expected_value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string expected_value = "v" + std::to_string(i);
     std::string value;
 
     std::vector<Slice> multiget_keys = {Slice(key)};
@@ -415,8 +415,8 @@ void ProfileQueries(bool enabled_time =
     if (i == kFlushFlag) {
       continue;
     }
-    std::string key = "k" + ToString(i);
-    std::string expected_value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string expected_value = "v" + std::to_string(i);
     std::string value;
 
     std::vector<Slice> multiget_keys = {Slice(key)};
@@ -543,8 +543,8 @@ TEST_F(PerfContextTest, SeekKeyCompariso
   SetPerfLevel(kEnableTime);
   StopWatchNano timer(SystemClock::Default().get());
   for (const int i : keys) {
-    std::string key = "k" + ToString(i);
-    std::string value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string value = "v" + std::to_string(i);
 
     get_perf_context()->Reset();
     timer.Start();
@@ -565,8 +565,8 @@ TEST_F(PerfContextTest, SeekKeyCompariso
   HistogramImpl hist_next;
 
   for (int i = 0; i < FLAGS_total_keys; ++i) {
-    std::string key = "k" + ToString(i);
-    std::string value = "v" + ToString(i);
+    std::string key = "k" + std::to_string(i);
+    std::string value = "v" + std::to_string(i);
 
     std::unique_ptr<Iterator> iter(db->NewIterator(read_options));
     get_perf_context()->Reset();
@@ -841,7 +841,7 @@ TEST_F(PerfContextTest, CPUTimer) {
 
   std::string max_str = "0";
   for (int i = 0; i < FLAGS_total_keys; ++i) {
-    std::string i_str = ToString(i);
+    std::string i_str = std::to_string(i);
     std::string key = "k" + i_str;
     std::string value = "v" + i_str;
     max_str = max_str > i_str ? max_str : i_str;
@@ -935,9 +935,9 @@ TEST_F(PerfContextTest, CPUTimer) {
     get_perf_context()->Reset();
     auto count = get_perf_context()->iter_seek_cpu_nanos;
     for (int i = 0; i < FLAGS_total_keys; ++i) {
-      iter->Seek("k" + ToString(i));
+      iter->Seek("k" + std::to_string(i));
       ASSERT_TRUE(iter->Valid());
-      ASSERT_EQ("v" + ToString(i), iter->value().ToString());
+      ASSERT_EQ("v" + std::to_string(i), iter->value().ToString());
       auto next_count = get_perf_context()->iter_seek_cpu_nanos;
       ASSERT_GT(next_count, count);
       count = next_count;
diff -pruN 7.2.2-5/db/plain_table_db_test.cc 7.3.1-2/db/plain_table_db_test.cc
--- 7.2.2-5/db/plain_table_db_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/plain_table_db_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -220,8 +220,8 @@ class PlainTableDBTest : public testing:
 
   int NumTableFilesAtLevel(int level) {
     std::string property;
-    EXPECT_TRUE(db_->GetProperty("rocksdb.num-files-at-level" + ToString(level),
-                                 &property));
+    EXPECT_TRUE(db_->GetProperty(
+        "rocksdb.num-files-at-level" + std::to_string(level), &property));
     return atoi(property.c_str());
   }
 
@@ -889,7 +889,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKe
   };
 
   for (size_t i = 0; i < 7; i++) {
-    ASSERT_OK(Put(key_list[i], ToString(i)));
+    ASSERT_OK(Put(key_list[i], std::to_string(i)));
   }
 
   ASSERT_OK(dbfull()->TEST_FlushMemTable());
@@ -900,7 +900,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKe
   for (size_t i = 0; i < 7; i++) {
     ASSERT_TRUE(iter->Valid());
     ASSERT_EQ(key_list[i], iter->key().ToString());
-    ASSERT_EQ(ToString(i), iter->value().ToString());
+    ASSERT_EQ(std::to_string(i), iter->value().ToString());
     iter->Next();
   }
 
@@ -937,7 +937,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKe
       MakeLongKeyWithPrefix(26, '6')};
 
   for (size_t i = 0; i < 7; i++) {
-    ASSERT_OK(Put(key_list[i], ToString(i)));
+    ASSERT_OK(Put(key_list[i], std::to_string(i)));
   }
 
   ASSERT_OK(dbfull()->TEST_FlushMemTable());
@@ -948,7 +948,7 @@ TEST_P(PlainTableDBTest, IteratorLargeKe
   for (size_t i = 0; i < 7; i++) {
     ASSERT_TRUE(iter->Valid());
     ASSERT_EQ(key_list[i], iter->key().ToString());
-    ASSERT_EQ(ToString(i), iter->value().ToString());
+    ASSERT_EQ(std::to_string(i), iter->value().ToString());
     iter->Next();
   }
 
diff -pruN 7.2.2-5/db/prefix_test.cc 7.3.1-2/db/prefix_test.cc
--- 7.2.2-5/db/prefix_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/prefix_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -628,7 +628,7 @@ TEST_F(PrefixTest, DynamicPrefixIterator
       TestKey test_key(prefix, FLAGS_items_per_prefix / 2);
       std::string s;
       Slice key = TestKeyToSlice(s, test_key);
-      std::string value = "v" + ToString(0);
+      std::string value = "v" + std::to_string(0);
 
       get_perf_context()->Reset();
       StopWatchNano timer(SystemClock::Default().get(), true);
diff -pruN 7.2.2-5/db/repair.cc 7.3.1-2/db/repair.cc
--- 7.2.2-5/db/repair.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/repair.cc	2022-06-08 21:08:16.000000000 +0000
@@ -82,6 +82,7 @@
 #include "rocksdb/options.h"
 #include "rocksdb/write_buffer_manager.h"
 #include "table/scoped_arena_iterator.h"
+#include "table/unique_id_impl.h"
 #include "util/string_util.h"
 
 namespace ROCKSDB_NAMESPACE {
@@ -147,7 +148,7 @@ class Repairer {
     const auto* cf_opts = GetColumnFamilyOptions(cf_name);
     if (cf_opts == nullptr) {
       return Status::Corruption("Encountered unknown column family with name=" +
-                                cf_name + ", id=" + ToString(cf_id));
+                                cf_name + ", id=" + std::to_string(cf_id));
     }
     Options opts(db_options_, *cf_opts);
     MutableCFOptions mut_cf_opts(opts);
@@ -505,6 +506,15 @@ class Repairer {
                                                 t->meta.fd, &props);
     }
     if (status.ok()) {
+      auto s =
+          GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                 props->orig_file_number, &t->meta.unique_id);
+      if (!s.ok()) {
+        ROCKS_LOG_WARN(db_options_.info_log,
+                       "Table #%" PRIu64
+                       ": unable to get unique id, default to Unknown.",
+                       t->meta.fd.GetNumber());
+      }
       t->column_family_id = static_cast<uint32_t>(props->column_family_id);
       if (t->column_family_id ==
           TablePropertiesCollectorFactory::Context::kUnknownColumnFamily) {
@@ -639,7 +649,8 @@ class Repairer {
             table->meta.temperature, table->meta.oldest_blob_file_number,
             table->meta.oldest_ancester_time, table->meta.file_creation_time,
             table->meta.file_checksum, table->meta.file_checksum_func_name,
-            table->meta.min_timestamp, table->meta.max_timestamp);
+            table->meta.min_timestamp, table->meta.max_timestamp,
+            table->meta.unique_id);
       }
       assert(next_file_number_ > 0);
       vset_.MarkFileNumberUsed(next_file_number_ - 1);
diff -pruN 7.2.2-5/db/repair_test.cc 7.3.1-2/db/repair_test.cc
--- 7.2.2-5/db/repair_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/repair_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -43,6 +43,23 @@ class RepairTest : public DBTestBase {
     }
     return s;
   }
+
+  void ReopenWithSstIdVerify() {
+    std::atomic_int verify_passed{0};
+    SyncPoint::GetInstance()->SetCallBack(
+        "Version::VerifySstUniqueIds::Passed", [&](void* arg) {
+          // override job status
+          auto id = static_cast<std::string*>(arg);
+          assert(!id->empty());
+          verify_passed++;
+        });
+    SyncPoint::GetInstance()->EnableProcessing();
+    auto options = CurrentOptions();
+    options.verify_sst_unique_id_in_manifest = true;
+    Reopen(options);
+
+    ASSERT_GT(verify_passed, 0);
+  }
 };
 
 TEST_F(RepairTest, LostManifest) {
@@ -61,7 +78,7 @@ TEST_F(RepairTest, LostManifest) {
   ASSERT_OK(env_->FileExists(manifest_path));
   ASSERT_OK(env_->DeleteFile(manifest_path));
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
 
   ASSERT_EQ(Get("key"), "val");
   ASSERT_EQ(Get("key2"), "val2");
@@ -88,7 +105,9 @@ TEST_F(RepairTest, LostManifestMoreDbFea
   ASSERT_OK(env_->FileExists(manifest_path));
   ASSERT_OK(env_->DeleteFile(manifest_path));
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+
+  // repair from sst should work with unique_id verification
+  ReopenWithSstIdVerify();
 
   ASSERT_EQ(Get("key"), "val");
   ASSERT_EQ(Get("key2"), "NOT_FOUND");
@@ -113,7 +132,8 @@ TEST_F(RepairTest, CorruptManifest) {
   ASSERT_OK(CreateFile(env_->GetFileSystem(), manifest_path, "blah",
                        false /* use_fsync */));
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+
+  ReopenWithSstIdVerify();
 
   ASSERT_EQ(Get("key"), "val");
   ASSERT_EQ(Get("key2"), "val2");
@@ -139,7 +159,8 @@ TEST_F(RepairTest, IncompleteManifest) {
   // Replace the manifest with one that is only aware of the first SST file.
   CopyFile(orig_manifest_path + ".tmp", new_manifest_path);
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+
+  ReopenWithSstIdVerify();
 
   ASSERT_EQ(Get("key"), "val");
   ASSERT_EQ(Get("key2"), "val2");
@@ -157,7 +178,8 @@ TEST_F(RepairTest, PostRepairSstFileNumb
 
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
 
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
+
   uint64_t post_repair_file_num = dbfull()->TEST_Current_Next_FileNo();
   ASSERT_GE(post_repair_file_num, pre_repair_file_num);
 }
@@ -176,7 +198,7 @@ TEST_F(RepairTest, LostSst) {
 
   Close();
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
 
   // Exactly one of the key-value pairs should be in the DB now.
   ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2"));
@@ -198,7 +220,7 @@ TEST_F(RepairTest, CorruptSst) {
 
   Close();
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
 
   // Exactly one of the key-value pairs should be in the DB now.
   ASSERT_TRUE((Get("key") == "val") != (Get("key2") == "val2"));
@@ -226,7 +248,7 @@ TEST_F(RepairTest, UnflushedSst) {
   ASSERT_OK(env_->FileExists(manifest_path));
   ASSERT_OK(env_->DeleteFile(manifest_path));
   ASSERT_OK(RepairDB(dbname_, CurrentOptions()));
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
 
   ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
   ASSERT_EQ(wal_files.size(), 0);
@@ -265,7 +287,7 @@ TEST_F(RepairTest, SeparateWalDir) {
     // make sure that all WALs are converted to SSTables.
     options.wal_dir = "";
 
-    Reopen(options);
+    ReopenWithSstIdVerify();
     ASSERT_OK(dbfull()->GetSortedWalFiles(wal_files));
     ASSERT_EQ(wal_files.size(), 0);
     {
@@ -289,7 +311,7 @@ TEST_F(RepairTest, RepairMultipleColumnF
   CreateAndReopenWithCF({"pikachu1", "pikachu2"}, CurrentOptions());
   for (int i = 0; i < kNumCfs; ++i) {
     for (int j = 0; j < kEntriesPerCf; ++j) {
-      ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j)));
+      ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j)));
       if (j == kEntriesPerCf - 1 && i == kNumCfs - 1) {
         // Leave one unflushed so we can verify WAL entries are properly
         // associated with column families.
@@ -313,7 +335,7 @@ TEST_F(RepairTest, RepairMultipleColumnF
                            CurrentOptions());
   for (int i = 0; i < kNumCfs; ++i) {
     for (int j = 0; j < kEntriesPerCf; ++j) {
-      ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j));
+      ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
     }
   }
 }
@@ -334,7 +356,7 @@ TEST_F(RepairTest, RepairColumnFamilyOpt
                            std::vector<Options>{opts, rev_opts});
   for (int i = 0; i < kNumCfs; ++i) {
     for (int j = 0; j < kEntriesPerCf; ++j) {
-      ASSERT_OK(Put(i, "key" + ToString(j), "val" + ToString(j)));
+      ASSERT_OK(Put(i, "key" + std::to_string(j), "val" + std::to_string(j)));
       if (i == kNumCfs - 1 && j == kEntriesPerCf - 1) {
         // Leave one unflushed so we can verify RepairDB's flush logic
         continue;
@@ -352,7 +374,7 @@ TEST_F(RepairTest, RepairColumnFamilyOpt
                                         std::vector<Options>{opts, rev_opts}));
   for (int i = 0; i < kNumCfs; ++i) {
     for (int j = 0; j < kEntriesPerCf; ++j) {
-      ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j));
+      ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
     }
   }
 
@@ -377,7 +399,7 @@ TEST_F(RepairTest, RepairColumnFamilyOpt
                                         std::vector<Options>{opts, rev_opts}));
   for (int i = 0; i < kNumCfs; ++i) {
     for (int j = 0; j < kEntriesPerCf; ++j) {
-      ASSERT_EQ(Get(i, "key" + ToString(j)), "val" + ToString(j));
+      ASSERT_EQ(Get(i, "key" + std::to_string(j)), "val" + std::to_string(j));
     }
   }
 }
@@ -398,7 +420,7 @@ TEST_F(RepairTest, DbNameContainsTrailin
   Close();
 
   ASSERT_OK(RepairDB(dbname_ + "/", CurrentOptions()));
-  Reopen(CurrentOptions());
+  ReopenWithSstIdVerify();
   ASSERT_EQ(Get("key"), "val");
 }
 #endif  // ROCKSDB_LITE
diff -pruN 7.2.2-5/db/snapshot_checker.h 7.3.1-2/db/snapshot_checker.h
--- 7.2.2-5/db/snapshot_checker.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/snapshot_checker.h	2022-06-08 21:08:16.000000000 +0000
@@ -33,10 +33,9 @@ class DisableGCSnapshotChecker : public
     // By returning kNotInSnapshot, we prevent all the values from being GCed
     return SnapshotCheckerResult::kNotInSnapshot;
   }
-  static DisableGCSnapshotChecker* Instance() { return &instance_; }
+  static DisableGCSnapshotChecker* Instance();
 
  protected:
-  static DisableGCSnapshotChecker instance_;
   explicit DisableGCSnapshotChecker() {}
 };
 
diff -pruN 7.2.2-5/db/table_cache.cc 7.3.1-2/db/table_cache.cc
--- 7.2.2-5/db/table_cache.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/table_cache.cc	2022-06-08 21:08:16.000000000 +0000
@@ -32,14 +32,31 @@
 #include "util/stop_watch.h"
 
 namespace ROCKSDB_NAMESPACE {
-
 namespace {
-
 template <class T>
 static void DeleteEntry(const Slice& /*key*/, void* value) {
   T* typed_value = reinterpret_cast<T*>(value);
   delete typed_value;
 }
+}  // namespace
+}  // namespace ROCKSDB_NAMESPACE
+
+// Generate the regular and coroutine versions of some methods by
+// including table_cache_sync_and_async.h twice
+// Macros in the header will expand differently based on whether
+// WITH_COROUTINES or WITHOUT_COROUTINES is defined
+// clang-format off
+#define WITHOUT_COROUTINES
+#include "db/table_cache_sync_and_async.h"
+#undef WITHOUT_COROUTINES
+#define WITH_COROUTINES
+#include "db/table_cache_sync_and_async.h"
+#undef WITH_COROUTINES
+// clang-format on
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace {
 
 static void UnrefEntry(void* arg1, void* arg2) {
   Cache* cache = reinterpret_cast<Cache*>(arg1);
@@ -468,8 +485,7 @@ Status TableCache::Get(
 #ifndef ROCKSDB_LITE
   // Put the replay log in row cache only if something was found.
   if (!done && s.ok() && row_cache_entry && !row_cache_entry->empty()) {
-    size_t charge =
-        row_cache_key.Size() + row_cache_entry->size() + sizeof(std::string);
+    size_t charge = row_cache_entry->capacity() + sizeof(std::string);
     void* row_ptr = new std::string(std::move(*row_cache_entry));
     // If row cache is full, it's OK to continue.
     ioptions_.row_cache
@@ -479,132 +495,6 @@ Status TableCache::Get(
   }
 #endif  // ROCKSDB_LITE
 
-  if (handle != nullptr) {
-    ReleaseHandle(handle);
-  }
-  return s;
-}
-
-// Batched version of TableCache::MultiGet.
-Status TableCache::MultiGet(
-    const ReadOptions& options,
-    const InternalKeyComparator& internal_comparator,
-    const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
-    const std::shared_ptr<const SliceTransform>& prefix_extractor,
-    HistogramImpl* file_read_hist, bool skip_filters, int level) {
-  auto& fd = file_meta.fd;
-  Status s;
-  TableReader* t = fd.table_reader;
-  Cache::Handle* handle = nullptr;
-  MultiGetRange table_range(*mget_range, mget_range->begin(),
-                            mget_range->end());
-#ifndef ROCKSDB_LITE
-  autovector<std::string, MultiGetContext::MAX_BATCH_SIZE> row_cache_entries;
-  IterKey row_cache_key;
-  size_t row_cache_key_prefix_size = 0;
-  KeyContext& first_key = *table_range.begin();
-  bool lookup_row_cache =
-      ioptions_.row_cache && !first_key.get_context->NeedToReadSequence();
-
-  // Check row cache if enabled. Since row cache does not currently store
-  // sequence numbers, we cannot use it if we need to fetch the sequence.
-  if (lookup_row_cache) {
-    GetContext* first_context = first_key.get_context;
-    CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context,
-                            row_cache_key);
-    row_cache_key_prefix_size = row_cache_key.Size();
-
-    for (auto miter = table_range.begin(); miter != table_range.end();
-         ++miter) {
-      const Slice& user_key = miter->ukey_with_ts;
-
-      GetContext* get_context = miter->get_context;
-
-      if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size,
-                          get_context)) {
-        table_range.SkipKey(miter);
-      } else {
-        row_cache_entries.emplace_back();
-        get_context->SetReplayLog(&(row_cache_entries.back()));
-      }
-    }
-  }
-#endif  // ROCKSDB_LITE
-
-  // Check that table_range is not empty. Its possible all keys may have been
-  // found in the row cache and thus the range may now be empty
-  if (s.ok() && !table_range.empty()) {
-    if (t == nullptr) {
-      s = FindTable(options, file_options_, internal_comparator, fd, &handle,
-                    prefix_extractor,
-                    options.read_tier == kBlockCacheTier /* no_io */,
-                    true /* record_read_stats */, file_read_hist, skip_filters,
-                    level, true /* prefetch_index_and_filter_in_cache */,
-                    0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
-      TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
-      if (s.ok()) {
-        t = GetTableReaderFromHandle(handle);
-        assert(t);
-      }
-    }
-    if (s.ok() && !options.ignore_range_deletions) {
-      std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
-          t->NewRangeTombstoneIterator(options));
-      if (range_del_iter != nullptr) {
-        for (auto iter = table_range.begin(); iter != table_range.end();
-             ++iter) {
-          SequenceNumber* max_covering_tombstone_seq =
-              iter->get_context->max_covering_tombstone_seq();
-          *max_covering_tombstone_seq = std::max(
-              *max_covering_tombstone_seq,
-              range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey_with_ts));
-        }
-      }
-    }
-    if (s.ok()) {
-      t->MultiGet(options, &table_range, prefix_extractor.get(), skip_filters);
-    } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) {
-      for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) {
-        Status* status = iter->s;
-        if (status->IsIncomplete()) {
-          // Couldn't find Table in cache but treat as kFound if no_io set
-          iter->get_context->MarkKeyMayExist();
-          s = Status::OK();
-        }
-      }
-    }
-  }
-
-#ifndef ROCKSDB_LITE
-  if (lookup_row_cache) {
-    size_t row_idx = 0;
-
-    for (auto miter = table_range.begin(); miter != table_range.end();
-         ++miter) {
-      std::string& row_cache_entry = row_cache_entries[row_idx++];
-      const Slice& user_key = miter->ukey_with_ts;
-      ;
-      GetContext* get_context = miter->get_context;
-
-      get_context->SetReplayLog(nullptr);
-      // Compute row cache key.
-      row_cache_key.TrimAppend(row_cache_key_prefix_size, user_key.data(),
-                               user_key.size());
-      // Put the replay log in row cache only if something was found.
-      if (s.ok() && !row_cache_entry.empty()) {
-        size_t charge =
-            row_cache_key.Size() + row_cache_entry.size() + sizeof(std::string);
-        void* row_ptr = new std::string(std::move(row_cache_entry));
-        // If row cache is full, it's OK.
-        ioptions_.row_cache
-            ->Insert(row_cache_key.GetUserKey(), row_ptr, charge,
-                     &DeleteEntry<std::string>)
-            .PermitUncheckedError();
-      }
-    }
-  }
-#endif  // ROCKSDB_LITE
-
   if (handle != nullptr) {
     ReleaseHandle(handle);
   }
diff -pruN 7.2.2-5/db/table_cache.h 7.3.1-2/db/table_cache.h
--- 7.2.2-5/db/table_cache.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/table_cache.h	2022-06-08 21:08:16.000000000 +0000
@@ -24,6 +24,7 @@
 #include "rocksdb/table.h"
 #include "table/table_reader.h"
 #include "trace_replay/block_cache_tracer.h"
+#include "util/coro_utils.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -115,8 +116,8 @@ class TableCache {
   //                   in the embedded GetContext
   // @param skip_filters Disables loading/accessing the filter block
   // @param level The level this table is at, -1 for "not set / don't know"
-  Status MultiGet(
-      const ReadOptions& options,
+  DECLARE_SYNC_AND_ASYNC(
+      Status, MultiGet, const ReadOptions& options,
       const InternalKeyComparator& internal_comparator,
       const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
       const std::shared_ptr<const SliceTransform>& prefix_extractor = nullptr,
diff -pruN 7.2.2-5/db/table_cache_sync_and_async.h 7.3.1-2/db/table_cache_sync_and_async.h
--- 7.2.2-5/db/table_cache_sync_and_async.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/db/table_cache_sync_and_async.h	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,140 @@
+//  Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/coro_utils.h"
+
+#if defined(WITHOUT_COROUTINES) || \
+    (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
+namespace ROCKSDB_NAMESPACE {
+
+#if defined(WITHOUT_COROUTINES)
+#endif
+
+// Batched version of TableCache::MultiGet.
+DEFINE_SYNC_AND_ASYNC(Status, TableCache::MultiGet)
+(const ReadOptions& options, const InternalKeyComparator& internal_comparator,
+ const FileMetaData& file_meta, const MultiGetContext::Range* mget_range,
+ const std::shared_ptr<const SliceTransform>& prefix_extractor,
+ HistogramImpl* file_read_hist, bool skip_filters, int level) {
+  auto& fd = file_meta.fd;
+  Status s;
+  TableReader* t = fd.table_reader;
+  Cache::Handle* handle = nullptr;
+  MultiGetRange table_range(*mget_range, mget_range->begin(),
+                            mget_range->end());
+#ifndef ROCKSDB_LITE
+  autovector<std::string, MultiGetContext::MAX_BATCH_SIZE> row_cache_entries;
+  IterKey row_cache_key;
+  size_t row_cache_key_prefix_size = 0;
+  KeyContext& first_key = *table_range.begin();
+  bool lookup_row_cache =
+      ioptions_.row_cache && !first_key.get_context->NeedToReadSequence();
+
+  // Check row cache if enabled. Since row cache does not currently store
+  // sequence numbers, we cannot use it if we need to fetch the sequence.
+  if (lookup_row_cache) {
+    GetContext* first_context = first_key.get_context;
+    CreateRowCacheKeyPrefix(options, fd, first_key.ikey, first_context,
+                            row_cache_key);
+    row_cache_key_prefix_size = row_cache_key.Size();
+
+    for (auto miter = table_range.begin(); miter != table_range.end();
+         ++miter) {
+      const Slice& user_key = miter->ukey_with_ts;
+
+      GetContext* get_context = miter->get_context;
+
+      if (GetFromRowCache(user_key, row_cache_key, row_cache_key_prefix_size,
+                          get_context)) {
+        table_range.SkipKey(miter);
+      } else {
+        row_cache_entries.emplace_back();
+        get_context->SetReplayLog(&(row_cache_entries.back()));
+      }
+    }
+  }
+#endif  // ROCKSDB_LITE
+
+  // Check that table_range is not empty. Its possible all keys may have been
+  // found in the row cache and thus the range may now be empty
+  if (s.ok() && !table_range.empty()) {
+    if (t == nullptr) {
+      s = FindTable(options, file_options_, internal_comparator, fd, &handle,
+                    prefix_extractor,
+                    options.read_tier == kBlockCacheTier /* no_io */,
+                    true /* record_read_stats */, file_read_hist, skip_filters,
+                    level, true /* prefetch_index_and_filter_in_cache */,
+                    0 /*max_file_size_for_l0_meta_pin*/, file_meta.temperature);
+      TEST_SYNC_POINT_CALLBACK("TableCache::MultiGet:FindTable", &s);
+      if (s.ok()) {
+        t = GetTableReaderFromHandle(handle);
+        assert(t);
+      }
+    }
+    if (s.ok() && !options.ignore_range_deletions) {
+      std::unique_ptr<FragmentedRangeTombstoneIterator> range_del_iter(
+          t->NewRangeTombstoneIterator(options));
+      if (range_del_iter != nullptr) {
+        for (auto iter = table_range.begin(); iter != table_range.end();
+             ++iter) {
+          SequenceNumber* max_covering_tombstone_seq =
+              iter->get_context->max_covering_tombstone_seq();
+          *max_covering_tombstone_seq = std::max(
+              *max_covering_tombstone_seq,
+              range_del_iter->MaxCoveringTombstoneSeqnum(iter->ukey_with_ts));
+        }
+      }
+    }
+    if (s.ok()) {
+      CO_AWAIT(t->MultiGet)
+      (options, &table_range, prefix_extractor.get(), skip_filters);
+    } else if (options.read_tier == kBlockCacheTier && s.IsIncomplete()) {
+      for (auto iter = table_range.begin(); iter != table_range.end(); ++iter) {
+        Status* status = iter->s;
+        if (status->IsIncomplete()) {
+          // Couldn't find Table in cache but treat as kFound if no_io set
+          iter->get_context->MarkKeyMayExist();
+          s = Status::OK();
+        }
+      }
+    }
+  }
+
+#ifndef ROCKSDB_LITE
+  if (lookup_row_cache) {
+    size_t row_idx = 0;
+
+    for (auto miter = table_range.begin(); miter != table_range.end();
+         ++miter) {
+      std::string& row_cache_entry = row_cache_entries[row_idx++];
+      const Slice& user_key = miter->ukey_with_ts;
+      ;
+      GetContext* get_context = miter->get_context;
+
+      get_context->SetReplayLog(nullptr);
+      // Compute row cache key.
+      row_cache_key.TrimAppend(row_cache_key_prefix_size, user_key.data(),
+                               user_key.size());
+      // Put the replay log in row cache only if something was found.
+      if (s.ok() && !row_cache_entry.empty()) {
+        size_t charge = row_cache_entry.capacity() + sizeof(std::string);
+        void* row_ptr = new std::string(std::move(row_cache_entry));
+        // If row cache is full, it's OK.
+        ioptions_.row_cache
+            ->Insert(row_cache_key.GetUserKey(), row_ptr, charge,
+                     &DeleteEntry<std::string>)
+            .PermitUncheckedError();
+      }
+    }
+  }
+#endif  // ROCKSDB_LITE
+
+  if (handle != nullptr) {
+    ReleaseHandle(handle);
+  }
+  CO_RETURN s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff -pruN 7.2.2-5/db/version_builder.cc 7.3.1-2/db/version_builder.cc
--- 7.2.2-5/db/version_builder.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_builder.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1144,7 +1144,7 @@ class VersionBuilder::Rep {
 
     size_t table_cache_capacity = table_cache_->get_cache()->GetCapacity();
     bool always_load = (table_cache_capacity == TableCache::kInfiniteCapacity);
-    size_t max_load = port::kMaxSizet;
+    size_t max_load = std::numeric_limits<size_t>::max();
 
     if (!always_load) {
       // If it is initial loading and not set to always loading all the
diff -pruN 7.2.2-5/db/version_builder_test.cc 7.3.1-2/db/version_builder_test.cc
--- 7.2.2-5/db/version_builder_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_builder_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,6 +12,7 @@
 #include "db/version_edit.h"
 #include "db/version_set.h"
 #include "rocksdb/advanced_options.h"
+#include "table/unique_id_impl.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
 #include "util/string_util.h"
@@ -72,7 +73,7 @@ class VersionBuilderTest : public testin
         oldest_blob_file_number, kUnknownOldestAncesterTime,
         kUnknownFileCreationTime, kUnknownFileChecksum,
         kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-        kDisableUserTimestamp);
+        kDisableUserTimestamp, kNullUniqueId64x2);
     f->compensated_file_size = file_size;
     f->num_entries = num_entries;
     f->num_deletions = num_deletions;
@@ -128,13 +129,13 @@ class VersionBuilderTest : public testin
     constexpr SequenceNumber largest_seqno = 300;
     constexpr bool marked_for_compaction = false;
 
-    edit->AddFile(level, table_file_number, path_id, file_size,
-                  GetInternalKey(smallest), GetInternalKey(largest),
-                  smallest_seqno, largest_seqno, marked_for_compaction,
-                  Temperature::kUnknown, blob_file_number,
-                  kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                  kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                  kDisableUserTimestamp, kDisableUserTimestamp);
+    edit->AddFile(
+        level, table_file_number, path_id, file_size, GetInternalKey(smallest),
+        GetInternalKey(largest), smallest_seqno, largest_seqno,
+        marked_for_compaction, Temperature::kUnknown, blob_file_number,
+        kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+        kUnknownFileChecksum, kUnknownFileChecksumFuncName,
+        kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
   }
 
   void UpdateVersionStorageInfo(VersionStorageInfo* vstorage) {
@@ -175,12 +176,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveT
   UpdateVersionStorageInfo();
 
   VersionEdit version_edit;
-  version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
-                       GetInternalKey("350"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
   version_edit.DeleteFile(3, 27U);
 
   EnvOptions env_options;
@@ -219,12 +220,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveT
   UpdateVersionStorageInfo();
 
   VersionEdit version_edit;
-  version_edit.AddFile(3, 666, 0, 100U, GetInternalKey("301"),
-                       GetInternalKey("350"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      3, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
   version_edit.DeleteFile(0, 1U);
   version_edit.DeleteFile(0, 88U);
 
@@ -266,12 +267,12 @@ TEST_F(VersionBuilderTest, ApplyAndSaveT
   UpdateVersionStorageInfo();
 
   VersionEdit version_edit;
-  version_edit.AddFile(4, 666, 0, 100U, GetInternalKey("301"),
-                       GetInternalKey("350"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      4, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
   version_edit.DeleteFile(0, 1U);
   version_edit.DeleteFile(0, 88U);
   version_edit.DeleteFile(4, 6U);
@@ -303,36 +304,36 @@ TEST_F(VersionBuilderTest, ApplyMultiple
   UpdateVersionStorageInfo();
 
   VersionEdit version_edit;
-  version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
-                       GetInternalKey("350"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
-                       GetInternalKey("450"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
-                       GetInternalKey("650"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
-                       GetInternalKey("550"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
-                       GetInternalKey("750"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   EnvOptions env_options;
   constexpr TableCache* table_cache = nullptr;
@@ -367,53 +368,53 @@ TEST_F(VersionBuilderTest, ApplyDeleteAn
                                   kCompactionStyleLevel, nullptr, false);
 
   VersionEdit version_edit;
-  version_edit.AddFile(2, 666, 0, 100U, GetInternalKey("301"),
-                       GetInternalKey("350"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 676, 0, 100U, GetInternalKey("401"),
-                       GetInternalKey("450"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 636, 0, 100U, GetInternalKey("601"),
-                       GetInternalKey("650"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 616, 0, 100U, GetInternalKey("501"),
-                       GetInternalKey("550"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
-  version_edit.AddFile(2, 606, 0, 100U, GetInternalKey("701"),
-                       GetInternalKey("750"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      2, 666, 0, 100U, GetInternalKey("301"), GetInternalKey("350"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 676, 0, 100U, GetInternalKey("401"), GetInternalKey("450"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 636, 0, 100U, GetInternalKey("601"), GetInternalKey("650"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 616, 0, 100U, GetInternalKey("501"), GetInternalKey("550"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
+  version_edit.AddFile(
+      2, 606, 0, 100U, GetInternalKey("701"), GetInternalKey("750"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
   ASSERT_OK(version_builder.Apply(&version_edit));
 
   VersionEdit version_edit2;
-  version_edit.AddFile(2, 808, 0, 100U, GetInternalKey("901"),
-                       GetInternalKey("950"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      2, 808, 0, 100U, GetInternalKey("901"), GetInternalKey("950"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
   version_edit2.DeleteFile(2, 616);
   version_edit2.DeleteFile(2, 636);
-  version_edit.AddFile(2, 806, 0, 100U, GetInternalKey("801"),
-                       GetInternalKey("850"), 200, 200, false,
-                       Temperature::kUnknown, kInvalidBlobFileNumber,
-                       kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                       kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                       kDisableUserTimestamp, kDisableUserTimestamp);
+  version_edit.AddFile(
+      2, 806, 0, 100U, GetInternalKey("801"), GetInternalKey("850"), 200, 200,
+      false, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   ASSERT_OK(version_builder.Apply(&version_edit2));
   ASSERT_OK(version_builder.SaveTo(&new_vstorage));
@@ -525,7 +526,7 @@ TEST_F(VersionBuilderTest, ApplyFileDele
                    kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
                    kUnknownFileCreationTime, kUnknownFileChecksum,
                    kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-                   kDisableUserTimestamp);
+                   kDisableUserTimestamp, kNullUniqueId64x2);
 
   ASSERT_OK(builder.Apply(&addition));
 
@@ -575,7 +576,7 @@ TEST_F(VersionBuilderTest, ApplyFileAddi
                Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
                kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-               kDisableUserTimestamp, kDisableUserTimestamp);
+               kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
 
   const Status s = builder.Apply(&edit);
   ASSERT_TRUE(s.IsCorruption());
@@ -612,7 +613,7 @@ TEST_F(VersionBuilderTest, ApplyFileAddi
                kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
                kUnknownFileCreationTime, kUnknownFileChecksum,
                kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
 
   ASSERT_OK(builder.Apply(&edit));
 
@@ -620,13 +621,13 @@ TEST_F(VersionBuilderTest, ApplyFileAddi
 
   constexpr int new_level = 2;
 
-  other_edit.AddFile(new_level, file_number, path_id, file_size,
-                     GetInternalKey(smallest), GetInternalKey(largest),
-                     smallest_seqno, largest_seqno, marked_for_compaction,
-                     Temperature::kUnknown, kInvalidBlobFileNumber,
-                     kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                     kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                     kDisableUserTimestamp, kDisableUserTimestamp);
+  other_edit.AddFile(
+      new_level, file_number, path_id, file_size, GetInternalKey(smallest),
+      GetInternalKey(largest), smallest_seqno, largest_seqno,
+      marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   const Status s = builder.Apply(&other_edit);
   ASSERT_TRUE(s.IsCorruption());
@@ -657,13 +658,13 @@ TEST_F(VersionBuilderTest, ApplyFileAddi
 
   VersionEdit addition;
 
-  addition.AddFile(level, file_number, path_id, file_size,
-                   GetInternalKey(smallest), GetInternalKey(largest),
-                   smallest_seqno, largest_seqno, marked_for_compaction,
-                   Temperature::kUnknown, kInvalidBlobFileNumber,
-                   kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-                   kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-                   kDisableUserTimestamp, kDisableUserTimestamp);
+  addition.AddFile(
+      level, file_number, path_id, file_size, GetInternalKey(smallest),
+      GetInternalKey(largest), smallest_seqno, largest_seqno,
+      marked_for_compaction, Temperature::kUnknown, kInvalidBlobFileNumber,
+      kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+      kUnknownFileChecksum, kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   ASSERT_OK(builder.Apply(&addition));
 
@@ -1227,12 +1228,13 @@ TEST_F(VersionBuilderTest, SaveBlobFiles
   constexpr uint64_t total_blob_count = 234;
   constexpr uint64_t total_blob_bytes = 1 << 22;
 
-  edit.AddFile(
-      level, table_file_number, path_id, file_size, GetInternalKey(smallest),
-      GetInternalKey(largest), smallest_seqno, largest_seqno,
-      marked_for_compaction, Temperature::kUnknown, blob_file_number,
-      kUnknownOldestAncesterTime, kUnknownFileCreationTime, checksum_value,
-      checksum_method, kDisableUserTimestamp, kDisableUserTimestamp);
+  edit.AddFile(level, table_file_number, path_id, file_size,
+               GetInternalKey(smallest), GetInternalKey(largest),
+               smallest_seqno, largest_seqno, marked_for_compaction,
+               Temperature::kUnknown, blob_file_number,
+               kUnknownOldestAncesterTime, kUnknownFileCreationTime,
+               checksum_value, checksum_method, kDisableUserTimestamp,
+               kDisableUserTimestamp, kNullUniqueId64x2);
   edit.AddBlobFile(blob_file_number, total_blob_count, total_blob_bytes,
                    checksum_method, checksum_value);
 
@@ -1320,7 +1322,7 @@ TEST_F(VersionBuilderTest, CheckConsiste
                /* oldest_blob_file_number */ 16, kUnknownOldestAncesterTime,
                kUnknownFileCreationTime, kUnknownFileChecksum,
                kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
 
   edit.AddFile(/* level */ 1, /* file_number */ 700, /* path_id */ 0,
                /* file_size */ 100, /* smallest */ GetInternalKey("801"),
@@ -1330,7 +1332,7 @@ TEST_F(VersionBuilderTest, CheckConsiste
                /* oldest_blob_file_number */ 1000, kUnknownOldestAncesterTime,
                kUnknownFileCreationTime, kUnknownFileChecksum,
                kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
   edit.AddBlobFile(/* blob_file_number */ 1000, /* total_blob_count */ 2000,
                    /* total_blob_bytes */ 200000,
                    /* checksum_method */ std::string(),
@@ -1552,7 +1554,7 @@ TEST_F(VersionBuilderTest, MaintainLinke
       /* oldest_blob_file_number */ 1, kUnknownOldestAncesterTime,
       kUnknownFileCreationTime, kUnknownFileChecksum,
       kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-      kDisableUserTimestamp);
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   // Add an SST that does not reference any blob files.
   edit.AddFile(
@@ -1563,7 +1565,7 @@ TEST_F(VersionBuilderTest, MaintainLinke
       Temperature::kUnknown, kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
       kUnknownFileCreationTime, kUnknownFileChecksum,
       kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-      kDisableUserTimestamp);
+      kDisableUserTimestamp, kNullUniqueId64x2);
 
   // Delete a file that references a blob file.
   edit.DeleteFile(/* level */ 1, /* file_number */ 6);
@@ -1586,7 +1588,7 @@ TEST_F(VersionBuilderTest, MaintainLinke
                /* oldest_blob_file_number */ 3, kUnknownOldestAncesterTime,
                kUnknownFileCreationTime, kUnknownFileChecksum,
                kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
 
   // Trivially move a file that does not reference any blob files.
   edit.DeleteFile(/* level */ 1, /* file_number */ 13);
@@ -1598,7 +1600,7 @@ TEST_F(VersionBuilderTest, MaintainLinke
                Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
                kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-               kDisableUserTimestamp, kDisableUserTimestamp);
+               kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
 
   // Add one more SST file that references a blob file, then promptly
   // delete it in a second version edit before the new version gets saved.
@@ -1612,7 +1614,7 @@ TEST_F(VersionBuilderTest, MaintainLinke
                /* oldest_blob_file_number */ 5, kUnknownOldestAncesterTime,
                kUnknownFileCreationTime, kUnknownFileChecksum,
                kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
 
   VersionEdit edit2;
 
@@ -1702,11 +1704,9 @@ TEST_F(VersionBuilderTest, EstimatedActi
   const uint32_t kDeletionsPerFile = 100;
   for (uint32_t i = 0; i < kNumFiles; ++i) {
     Add(static_cast<int>(i / kFilesPerLevel), i + 1,
-        ToString((i + 100) * 1000).c_str(),
-        ToString((i + 100) * 1000 + 999).c_str(),
-        100U,  0, 100, 100,
-        kEntriesPerFile, kDeletionsPerFile,
-        (i < kTotalSamples));
+        std::to_string((i + 100) * 1000).c_str(),
+        std::to_string((i + 100) * 1000 + 999).c_str(), 100U, 0, 100, 100,
+        kEntriesPerFile, kDeletionsPerFile, (i < kTotalSamples));
   }
   // minus 2X for the number of deletion entries because:
   // 1x for deletion entry does not count as a data entry.
diff -pruN 7.2.2-5/db/version_edit.cc 7.3.1-2/db/version_edit.cc
--- 7.2.2-5/db/version_edit.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_edit.cc	2022-06-08 21:08:16.000000000 +0000
@@ -13,6 +13,7 @@
 #include "db/version_set.h"
 #include "logging/event_logger.h"
 #include "rocksdb/slice.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "util/coding.h"
 #include "util/string_util.h"
@@ -221,6 +222,14 @@ bool VersionEdit::EncodeTo(std::string*
       PutVarint64(&oldest_blob_file_number, f.oldest_blob_file_number);
       PutLengthPrefixedSlice(dst, Slice(oldest_blob_file_number));
     }
+    UniqueId64x2 unique_id = f.unique_id;
+    TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:UniqueId", &unique_id);
+    if (unique_id != kNullUniqueId64x2) {
+      PutVarint32(dst, NewFileCustomTag::kUniqueId);
+      std::string unique_id_str = EncodeUniqueIdBytes(&unique_id);
+      PutLengthPrefixedSlice(dst, Slice(unique_id_str));
+    }
+
     TEST_SYNC_POINT_CALLBACK("VersionEdit::EncodeTo:NewFile4:CustomizeFields",
                              dst);
 
@@ -392,6 +401,12 @@ const char* VersionEdit::DecodeNewFile4F
         case kMaxTimestamp:
           f.max_timestamp = field.ToString();
           break;
+        case kUniqueId:
+          if (!DecodeUniqueIdBytes(field.ToString(), &f.unique_id).ok()) {
+            f.unique_id = kNullUniqueId64x2;
+            return "invalid unique id";
+          }
+          break;
         default:
           if ((custom_tag & kCustomTagNonSafeIgnoreMask) != 0) {
             // Should not proceed if cannot understand it
@@ -817,7 +832,12 @@ std::string VersionEdit::DebugString(boo
       r.append(" temperature: ");
       // Maybe change to human readable format whenthe feature becomes
       // permanent
-      r.append(ToString(static_cast<int>(f.temperature)));
+      r.append(std::to_string(static_cast<int>(f.temperature)));
+    }
+    if (f.unique_id != kNullUniqueId64x2) {
+      r.append(" unique_id(internal): ");
+      UniqueId64x2 id = f.unique_id;
+      r.append(InternalUniqueIdToHumanString(&id));
     }
   }
 
@@ -928,7 +948,7 @@ std::string VersionEdit::DebugJSON(int e
       jw << "FileChecksum" << Slice(f.file_checksum).ToString(true);
       jw << "FileChecksumFuncName" << f.file_checksum_func_name;
       if (f.temperature != Temperature::kUnknown) {
-        jw << "temperature" << ToString(static_cast<int>(f.temperature));
+        jw << "temperature" << std::to_string(static_cast<int>(f.temperature));
       }
       if (f.oldest_blob_file_number != kInvalidBlobFileNumber) {
         jw << "OldestBlobFile" << f.oldest_blob_file_number;
diff -pruN 7.2.2-5/db/version_edit.h 7.3.1-2/db/version_edit.h
--- 7.2.2-5/db/version_edit.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_edit.h	2022-06-08 21:08:16.000000000 +0000
@@ -85,6 +85,7 @@ enum NewFileCustomTag : uint32_t {
   kTemperature = 9,
   kMinTimestamp = 10,
   kMaxTimestamp = 11,
+  kUniqueId = 12,
 
   // If this bit for the custom tag is set, opening DB should fail if
   // we don't know this field.
@@ -102,6 +103,8 @@ constexpr uint64_t kUnknownFileCreationT
 
 extern uint64_t PackFileNumberAndPathId(uint64_t number, uint64_t path_id);
 
+using UniqueId64x2 = std::array<uint64_t, 2>;
+
 // A copyable structure contains information needed to read data from an SST
 // file. It can contain a pointer to a table reader opened for the file, or
 // file number and size, which can be used to create a new table reader for it.
@@ -217,6 +220,9 @@ struct FileMetaData {
   // Max (newest) timestamp of keys in this file
   std::string max_timestamp;
 
+  // SST unique id
+  UniqueId64x2 unique_id{};
+
   FileMetaData() = default;
 
   FileMetaData(uint64_t file, uint32_t file_path_id, uint64_t file_size,
@@ -227,7 +233,8 @@ struct FileMetaData {
                uint64_t _oldest_ancester_time, uint64_t _file_creation_time,
                const std::string& _file_checksum,
                const std::string& _file_checksum_func_name,
-               std::string _min_timestamp, std::string _max_timestamp)
+               std::string _min_timestamp, std::string _max_timestamp,
+               UniqueId64x2 _unique_id)
       : fd(file, file_path_id, file_size, smallest_seq, largest_seq),
         smallest(smallest_key),
         largest(largest_key),
@@ -239,7 +246,8 @@ struct FileMetaData {
         file_checksum(_file_checksum),
         file_checksum_func_name(_file_checksum_func_name),
         min_timestamp(std::move(_min_timestamp)),
-        max_timestamp(std::move(_max_timestamp)) {
+        max_timestamp(std::move(_max_timestamp)),
+        unique_id(std::move(_unique_id)) {
     TEST_SYNC_POINT_CALLBACK("FileMetaData::FileMetaData", this);
   }
 
@@ -408,7 +416,8 @@ class VersionEdit {
                const std::string& file_checksum,
                const std::string& file_checksum_func_name,
                const std::string& min_timestamp,
-               const std::string& max_timestamp) {
+               const std::string& max_timestamp,
+               const UniqueId64x2& unique_id) {
     assert(smallest_seqno <= largest_seqno);
     new_files_.emplace_back(
         level,
@@ -416,7 +425,7 @@ class VersionEdit {
                      smallest_seqno, largest_seqno, marked_for_compaction,
                      temperature, oldest_blob_file_number, oldest_ancester_time,
                      file_creation_time, file_checksum, file_checksum_func_name,
-                     min_timestamp, max_timestamp));
+                     min_timestamp, max_timestamp, unique_id));
     if (!HasLastSequence() || largest_seqno > GetLastSequence()) {
       SetLastSequence(largest_seqno);
     }
diff -pruN 7.2.2-5/db/version_edit_test.cc 7.3.1-2/db/version_edit_test.cc
--- 7.2.2-5/db/version_edit_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_edit_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -11,6 +11,7 @@
 
 #include "db/blob/blob_index.h"
 #include "rocksdb/advanced_options.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
@@ -43,7 +44,7 @@ TEST_F(VersionEditTest, EncodeDecode) {
                  InternalKey("zoo", kBig + 600 + i, kTypeDeletion),
                  kBig + 500 + i, kBig + 600 + i, false, Temperature::kUnknown,
                  kInvalidBlobFileNumber, 888, 678, "234", "crc32c", "123",
-                 "345");
+                 "345", kNullUniqueId64x2);
     edit.DeleteFile(4, kBig + 700 + i);
   }
 
@@ -62,26 +63,25 @@ TEST_F(VersionEditTest, EncodeDecodeNewF
                InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
                kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123",
-               "234");
+               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123", "234",
+               kNullUniqueId64x2);
   edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
                InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
                kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "345",
-               "543");
+               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "345", "543",
+               kNullUniqueId64x2);
   edit.AddFile(5, 302, 0, 100, InternalKey("foo", kBig + 502, kTypeValue),
                InternalKey("zoo", kBig + 602, kTypeDeletion), kBig + 502,
                kBig + 602, true, Temperature::kUnknown, kInvalidBlobFileNumber,
                666, 888, kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-               "456", "567");
+               "456", "567", kNullUniqueId64x2);
   edit.AddFile(5, 303, 0, 100, InternalKey("foo", kBig + 503, kTypeBlobIndex),
                InternalKey("zoo", kBig + 603, kTypeBlobIndex), kBig + 503,
                kBig + 603, true, Temperature::kUnknown, 1001,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "678",
-               "789");
-  ;
+               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "678", "789",
+               kNullUniqueId64x2);
 
   edit.DeleteFile(4, 700);
 
@@ -129,13 +129,13 @@ TEST_F(VersionEditTest, ForwardCompatibl
                InternalKey("zoo", kBig + 600, kTypeDeletion), kBig + 500,
                kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
-               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123",
-               "234");
+               kUnknownFileChecksum, kUnknownFileChecksumFuncName, "123", "234",
+               kNullUniqueId64x2);
   edit.AddFile(4, 301, 3, 100, InternalKey("foo", kBig + 501, kTypeValue),
                InternalKey("zoo", kBig + 601, kTypeDeletion), kBig + 501,
                kBig + 601, false, Temperature::kUnknown, kInvalidBlobFileNumber,
                686, 868, "234", "crc32c", kDisableUserTimestamp,
-               kDisableUserTimestamp);
+               kDisableUserTimestamp, kNullUniqueId64x2);
   edit.DeleteFile(4, 700);
 
   edit.SetComparatorName("foo");
@@ -188,7 +188,7 @@ TEST_F(VersionEditTest, NewFile4NotSuppo
                kBig + 600, true, Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
                kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-               kDisableUserTimestamp, kDisableUserTimestamp);
+               kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
 
   edit.SetComparatorName("foo");
   edit.SetLogNumber(kBig + 100);
@@ -219,7 +219,7 @@ TEST_F(VersionEditTest, EncodeEmptyFile)
                Temperature::kUnknown, kInvalidBlobFileNumber,
                kUnknownOldestAncesterTime, kUnknownFileCreationTime,
                kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-               kDisableUserTimestamp, kDisableUserTimestamp);
+               kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
   std::string buffer;
   ASSERT_TRUE(!edit.EncodeTo(&buffer));
 }
diff -pruN 7.2.2-5/db/version_set.cc 7.3.1-2/db/version_set.cc
--- 7.2.2-5/db/version_set.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_set.cc	2022-06-08 21:08:16.000000000 +0000
@@ -38,6 +38,10 @@
 #include "db/table_cache.h"
 #include "db/version_builder.h"
 #include "db/version_edit_handler.h"
+#if USE_COROUTINES
+#include "folly/experimental/coro/BlockingWait.h"
+#include "folly/experimental/coro/Collect.h"
+#endif
 #include "file/filename.h"
 #include "file/random_access_file_reader.h"
 #include "file/read_write_util.h"
@@ -59,13 +63,28 @@
 #include "table/plain/plain_table_factory.h"
 #include "table/table_reader.h"
 #include "table/two_level_iterator.h"
+#include "table/unique_id_impl.h"
 #include "test_util/sync_point.h"
 #include "util/cast_util.h"
 #include "util/coding.h"
+#include "util/coro_utils.h"
 #include "util/stop_watch.h"
 #include "util/string_util.h"
 #include "util/user_comparator_wrapper.h"
 
+// Generate the regular and coroutine versions of some methods by
+// including version_set_sync_and_async.h twice
+// Macros in the header will expand differently based on whether
+// WITH_COROUTINES or WITHOUT_COROUTINES is defined
+// clang-format off
+#define WITHOUT_COROUTINES
+#include "db/version_set_sync_and_async.h"
+#undef WITHOUT_COROUTINES
+#define WITH_COROUTINES
+#include "db/version_set_sync_and_async.h"
+#undef WITH_COROUTINES
+// clang-format on
+
 namespace ROCKSDB_NAMESPACE {
 
 namespace {
@@ -504,68 +523,63 @@ class FilePickerMultiGet {
     return file_hit;
   }
 
-  FdWithKeyRange* GetNextFile() {
-    while (!search_ended_) {
-      // Start searching next level.
-      if (batch_iter_ == current_level_range_.end()) {
-        search_ended_ = !PrepareNextLevel();
-        continue;
-      } else {
-        if (maybe_repeat_key_) {
-          maybe_repeat_key_ = false;
-          // Check if we found the final value for the last key in the
-          // previous lookup range. If we did, then there's no need to look
-          // any further for that key, so advance batch_iter_. Else, keep
-          // batch_iter_ positioned on that key so we look it up again in
-          // the next file
-          // For L0, always advance the key because we will look in the next
-          // file regardless for all keys not found yet
-          if (current_level_range_.CheckKeyDone(batch_iter_) ||
-              curr_level_ == 0) {
-            batch_iter_ = upper_key_;
-          }
-        }
-        // batch_iter_prev_ will become the start key for the next file
-        // lookup
-        batch_iter_prev_ = batch_iter_;
-      }
-
-      MultiGetRange next_file_range(current_level_range_, batch_iter_prev_,
-                                    current_level_range_.end());
-      size_t curr_file_index =
-          (batch_iter_ != current_level_range_.end())
-              ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
-              : curr_file_level_->num_files;
-      FdWithKeyRange* f;
-      bool is_last_key_in_file;
-      if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f,
-                                      &is_last_key_in_file)) {
-        search_ended_ = !PrepareNextLevel();
-      } else {
-        if (is_last_key_in_file) {
-          // Since cmp_largest is 0, batch_iter_ still points to the last key
-          // that falls in this file, instead of the next one. Increment
-          // the file index for all keys between batch_iter_ and upper_key_
-          auto tmp_iter = batch_iter_;
-          while (tmp_iter != upper_key_) {
-            ++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level);
-            ++tmp_iter;
-          }
-          maybe_repeat_key_ = true;
-        }
-        // Set the range for this file
-        current_file_range_ =
-            MultiGetRange(next_file_range, batch_iter_prev_, upper_key_);
-        returned_file_level_ = curr_level_;
-        hit_file_level_ = curr_level_;
-        is_hit_file_last_in_level_ =
-            curr_file_index == curr_file_level_->num_files - 1;
-        return f;
-      }
-    }
+  void PrepareNextLevelForSearch() { search_ended_ = !PrepareNextLevel(); }
 
-    // Search ended
-    return nullptr;
+  FdWithKeyRange* GetNextFileInLevel() {
+    if (batch_iter_ == current_level_range_.end() || search_ended_) {
+      return nullptr;
+    } else {
+      if (maybe_repeat_key_) {
+        maybe_repeat_key_ = false;
+        // Check if we found the final value for the last key in the
+        // previous lookup range. If we did, then there's no need to look
+        // any further for that key, so advance batch_iter_. Else, keep
+        // batch_iter_ positioned on that key so we look it up again in
+        // the next file
+        // For L0, always advance the key because we will look in the next
+        // file regardless for all keys not found yet
+        if (current_level_range_.CheckKeyDone(batch_iter_) ||
+            curr_level_ == 0) {
+          batch_iter_ = upper_key_;
+        }
+      }
+      // batch_iter_prev_ will become the start key for the next file
+      // lookup
+      batch_iter_prev_ = batch_iter_;
+    }
+
+    MultiGetRange next_file_range(current_level_range_, batch_iter_prev_,
+                                  current_level_range_.end());
+    size_t curr_file_index =
+        (batch_iter_ != current_level_range_.end())
+            ? fp_ctx_array_[batch_iter_.index()].curr_index_in_curr_level
+            : curr_file_level_->num_files;
+    FdWithKeyRange* f;
+    bool is_last_key_in_file;
+    if (!GetNextFileInLevelWithKeys(&next_file_range, &curr_file_index, &f,
+                                    &is_last_key_in_file)) {
+      return nullptr;
+    } else {
+      if (is_last_key_in_file) {
+        // Since cmp_largest is 0, batch_iter_ still points to the last key
+        // that falls in this file, instead of the next one. Increment
+        // the file index for all keys between batch_iter_ and upper_key_
+        auto tmp_iter = batch_iter_;
+        while (tmp_iter != upper_key_) {
+          ++(fp_ctx_array_[tmp_iter.index()].curr_index_in_curr_level);
+          ++tmp_iter;
+        }
+        maybe_repeat_key_ = true;
+      }
+      // Set the range for this file
+      current_file_range_ =
+          MultiGetRange(next_file_range, batch_iter_prev_, upper_key_);
+      returned_file_level_ = curr_level_;
+      hit_file_level_ = curr_level_;
+      is_hit_file_last_in_level_ =
+          curr_file_index == curr_file_level_->num_files - 1;
+      return f;
+    }
   }
 
   // getter for current file level
@@ -576,8 +590,16 @@ class FilePickerMultiGet {
   // GetNextFile()) is at the last index in its level.
   bool IsHitFileLastInLevel() { return is_hit_file_last_in_level_; }
 
+  bool KeyMaySpanNextFile() { return maybe_repeat_key_; }
+
+  bool IsSearchEnded() { return search_ended_; }
+
   const MultiGetRange& CurrentFileRange() { return current_file_range_; }
 
+  bool RemainingOverlapInLevel() {
+    return !current_level_range_.Suffix(current_file_range_).empty();
+  }
+
  private:
   unsigned int num_levels_;
   unsigned int curr_level_;
@@ -1056,7 +1078,15 @@ void LevelIterator::Seek(const Slice& ta
 
   if (file_iter_.iter() != nullptr) {
     file_iter_.Seek(target);
+    // Status::TryAgain indicates asynchronous request for retrieval of data
+    // blocks has been submitted. So it should return at this point and Seek
+    // should be called again to retrieve the requested block and execute the
+    // remaining code.
+    if (file_iter_.status() == Status::TryAgain()) {
+      return;
+    }
   }
+
   if (SkipEmptyFileForward() && prefix_extractor_ != nullptr &&
       !read_options_.total_order_seek && !read_options_.auto_prefix_mode &&
       file_iter_.iter() != nullptr && file_iter_.Valid()) {
@@ -1517,7 +1547,7 @@ uint64_t Version::GetSstFilesSize() {
 }
 
 void Version::GetCreationTimeOfOldestFile(uint64_t* creation_time) {
-  uint64_t oldest_time = port::kMaxUint64;
+  uint64_t oldest_time = std::numeric_limits<uint64_t>::max();
   for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
     for (FileMetaData* meta : storage_info_.LevelFiles(level)) {
       assert(meta->fd.table_reader != nullptr);
@@ -1534,6 +1564,40 @@ void Version::GetCreationTimeOfOldestFil
   *creation_time = oldest_time;
 }
 
+Status Version::VerifySstUniqueIds() const {
+  for (int level = 0; level < storage_info_.num_non_empty_levels_; level++) {
+    for (FileMetaData* meta : storage_info_.LevelFiles(level)) {
+      if (meta->unique_id != kNullUniqueId64x2) {
+        std::shared_ptr<const TableProperties> props;
+        Status s =
+            GetTableProperties(&props, meta);  // may open the file if it's not
+        if (!s.ok()) {
+          return s;
+        }
+        UniqueId64x2 id;
+        s = GetSstInternalUniqueId(props->db_id, props->db_session_id,
+                                   props->orig_file_number, &id);
+        if (!s.ok() || id != meta->unique_id) {
+          std::ostringstream oss;
+          oss << "SST #" << meta->fd.GetNumber() << " unique ID mismatch. ";
+          oss << "Manifest: "
+              << InternalUniqueIdToHumanString(&(meta->unique_id)) << ", ";
+          if (s.ok()) {
+            oss << "Table Properties: " << InternalUniqueIdToHumanString(&id);
+          } else {
+            oss << "Failed to get Table Properties: " << s.ToString();
+          }
+          return Status::Corruption("VersionSet", oss.str());
+        }
+        TEST_SYNC_POINT_CALLBACK("Version::VerifySstUniqueIds::Passed", &id);
+      } else {
+        TEST_SYNC_POINT_CALLBACK("Version::VerifySstUniqueIds::Skipped", meta);
+      }
+    }
+  }
+  return Status::OK();
+}
+
 uint64_t VersionStorageInfo::GetEstimatedActiveKeys() const {
   // Estimation will be inaccurate when:
   // (1) there exist merge keys
@@ -2183,7 +2247,7 @@ void Version::MultiGet(const ReadOptions
       &file_picker_range,
       &storage_info_.level_files_brief_, storage_info_.num_non_empty_levels_,
       &storage_info_.file_indexer_, user_comparator(), internal_comparator());
-  FdWithKeyRange* f = fp.GetNextFile();
+  FdWithKeyRange* f = fp.GetNextFileInLevel();
   Status s;
   uint64_t num_index_read = 0;
   uint64_t num_filter_read = 0;
@@ -2193,164 +2257,92 @@ void Version::MultiGet(const ReadOptions
   MultiGetRange keys_with_blobs_range(*range, range->begin(), range->end());
   // blob_file => [[blob_idx, it], ...]
   std::unordered_map<uint64_t, BlobReadRequests> blob_rqs;
-  int level = -1;
-
-  while (f != nullptr) {
-    MultiGetRange file_range = fp.CurrentFileRange();
-    bool timer_enabled =
-        GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
-        get_perf_context()->per_level_perf_context_enabled;
+  int prev_level = -1;
 
-    // Report MultiGet stats per level.
-    if (level >= 0 && level != (int)fp.GetHitFileLevel()) {
-      // Dump the stats if the search has moved to the next level and
-      // reset for next level.
-      RecordInHistogram(db_statistics_,
-                        NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
-                        num_index_read + num_filter_read);
-      RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
-                        num_data_read);
-      RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL, num_sst_read);
-      num_filter_read = 0;
-      num_index_read = 0;
-      num_data_read = 0;
-      num_sst_read = 0;
-      level = fp.GetHitFileLevel();
-    }
-
-    StopWatchNano timer(clock_, timer_enabled /* auto_start */);
-    s = table_cache_->MultiGet(
-        read_options, *internal_comparator(), *f->file_metadata, &file_range,
-        mutable_cf_options_.prefix_extractor,
-        cfd_->internal_stats()->GetFileReadHist(fp.GetHitFileLevel()),
-        IsFilterSkipped(static_cast<int>(fp.GetHitFileLevel()),
-                        fp.IsHitFileLastInLevel()),
-        fp.GetHitFileLevel());
-    // TODO: examine the behavior for corrupted key
-    if (timer_enabled) {
-      PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
-                                fp.GetHitFileLevel());
-    }
-    if (!s.ok()) {
-      // TODO: Set status for individual keys appropriately
-      for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
-        *iter->s = s;
-        file_range.MarkKeyDone(iter);
-      }
-      return;
-    }
-    uint64_t batch_size = 0;
-    for (auto iter = file_range.begin(); s.ok() && iter != file_range.end();
-         ++iter) {
-      GetContext& get_context = *iter->get_context;
-      Status* status = iter->s;
-      // The Status in the KeyContext takes precedence over GetContext state
-      // Status may be an error if there were any IO errors in the table
-      // reader. We never expect Status to be NotFound(), as that is
-      // determined by get_context
-      assert(!status->IsNotFound());
-      if (!status->ok()) {
-        file_range.MarkKeyDone(iter);
-        continue;
+  while (!fp.IsSearchEnded()) {
+    // This will be set to true later if we actually look up in a file in L0.
+    // For per level stats purposes, an L0 file is treated as a level
+    bool dump_stats_for_l0_file = false;
+
+    // Avoid using the coroutine version if we're looking in a L0 file, since
+    // L0 files won't be parallelized anyway. The regular synchronous version
+    // is faster.
+    if (!read_options.async_io || !using_coroutines() ||
+        fp.GetHitFileLevel() == 0 || !fp.RemainingOverlapInLevel()) {
+      if (f) {
+        // Call MultiGetFromSST for looking up a single file
+        s = MultiGetFromSST(read_options, fp.CurrentFileRange(),
+                            fp.GetHitFileLevel(), fp.IsHitFileLastInLevel(), f,
+                            blob_rqs, num_filter_read, num_index_read,
+                            num_data_read, num_sst_read);
+        if (fp.GetHitFileLevel() == 0) {
+          dump_stats_for_l0_file = true;
+        }
       }
-
-      if (get_context.sample()) {
-        sample_file_read_inc(f->file_metadata);
+      if (s.ok()) {
+        f = fp.GetNextFileInLevel();
       }
-      batch_size++;
-      num_index_read += get_context.get_context_stats_.num_index_read;
-      num_filter_read += get_context.get_context_stats_.num_filter_read;
-      num_data_read += get_context.get_context_stats_.num_data_read;
-      num_sst_read += get_context.get_context_stats_.num_sst_read;
-      // Reset these stats since they're specific to a level
-      get_context.get_context_stats_.num_index_read = 0;
-      get_context.get_context_stats_.num_filter_read = 0;
-      get_context.get_context_stats_.num_data_read = 0;
-      get_context.get_context_stats_.num_sst_read = 0;
-
-      // report the counters before returning
-      if (get_context.State() != GetContext::kNotFound &&
-          get_context.State() != GetContext::kMerge &&
-          db_statistics_ != nullptr) {
-        get_context.ReportCounters();
-      } else {
-        if (iter->max_covering_tombstone_seq > 0) {
-          // The remaining files we look at will only contain covered keys, so
-          // we stop here for this key
-          file_picker_range.SkipKey(iter);
+#if USE_COROUTINES
+    } else {
+      std::vector<folly::coro::Task<Status>> mget_tasks;
+      while (f != nullptr) {
+        mget_tasks.emplace_back(MultiGetFromSSTCoroutine(
+            read_options, fp.CurrentFileRange(), fp.GetHitFileLevel(),
+            fp.IsHitFileLastInLevel(), f, blob_rqs, num_filter_read,
+            num_index_read, num_data_read, num_sst_read));
+        if (fp.KeyMaySpanNextFile()) {
+          break;
         }
+        f = fp.GetNextFileInLevel();
       }
-      switch (get_context.State()) {
-        case GetContext::kNotFound:
-          // Keep searching in other files
-          break;
-        case GetContext::kMerge:
-          // TODO: update per-level perfcontext user_key_return_count for kMerge
-          break;
-        case GetContext::kFound:
-          if (fp.GetHitFileLevel() == 0) {
-            RecordTick(db_statistics_, GET_HIT_L0);
-          } else if (fp.GetHitFileLevel() == 1) {
-            RecordTick(db_statistics_, GET_HIT_L1);
-          } else if (fp.GetHitFileLevel() >= 2) {
-            RecordTick(db_statistics_, GET_HIT_L2_AND_UP);
+      if (mget_tasks.size() > 0) {
+        // Collect all results so far
+        std::vector<Status> statuses = folly::coro::blockingWait(
+            folly::coro::collectAllRange(std::move(mget_tasks))
+                .scheduleOn(&range->context()->executor()));
+        for (Status stat : statuses) {
+          if (!stat.ok()) {
+            s = stat;
           }
+        }
 
-          PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1,
-                                    fp.GetHitFileLevel());
-
-          file_range.MarkKeyDone(iter);
-
-          if (iter->is_blob_index) {
-            if (iter->value) {
-              TEST_SYNC_POINT_CALLBACK("Version::MultiGet::TamperWithBlobIndex",
-                                       &(*iter));
-
-              const Slice& blob_index_slice = *(iter->value);
-              BlobIndex blob_index;
-              Status tmp_s = blob_index.DecodeFrom(blob_index_slice);
-              if (tmp_s.ok()) {
-                const uint64_t blob_file_num = blob_index.file_number();
-                blob_rqs[blob_file_num].emplace_back(
-                    std::make_pair(blob_index, std::cref(*iter)));
-              } else {
-                *(iter->s) = tmp_s;
-              }
-            }
-          } else {
-            file_range.AddValueSize(iter->value->size());
-            if (file_range.GetValueSize() >
-                read_options.value_size_soft_limit) {
-              s = Status::Aborted();
-              break;
-            }
-          }
-          continue;
-        case GetContext::kDeleted:
-          // Use empty error message for speed
-          *status = Status::NotFound();
-          file_range.MarkKeyDone(iter);
-          continue;
-        case GetContext::kCorrupt:
-          *status =
-              Status::Corruption("corrupted key for ", iter->lkey->user_key());
-          file_range.MarkKeyDone(iter);
-          continue;
-        case GetContext::kUnexpectedBlobIndex:
-          ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
-          *status = Status::NotSupported(
-              "Encounter unexpected blob index. Please open DB with "
-              "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
-          file_range.MarkKeyDone(iter);
-          continue;
+        if (s.ok() && fp.KeyMaySpanNextFile()) {
+          f = fp.GetNextFileInLevel();
+        }
       }
+#endif  // USE_COROUTINES
     }
-
-    RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
+    // If bad status or we found final result for all the keys
     if (!s.ok() || file_picker_range.empty()) {
       break;
     }
-    f = fp.GetNextFile();
+    if (!f) {
+      // Reached the end of this level. Prepare the next level
+      fp.PrepareNextLevelForSearch();
+      if (!fp.IsSearchEnded()) {
+        // Its possible there is no overlap on this level and f is nullptr
+        f = fp.GetNextFileInLevel();
+      }
+      if (dump_stats_for_l0_file ||
+          (prev_level != 0 && prev_level != (int)fp.GetHitFileLevel())) {
+        // Dump the stats if the search has moved to the next level and
+        // reset for next level.
+        if (num_sst_read || (num_filter_read + num_index_read)) {
+          RecordInHistogram(db_statistics_,
+                            NUM_INDEX_AND_FILTER_BLOCKS_READ_PER_LEVEL,
+                            num_index_read + num_filter_read);
+          RecordInHistogram(db_statistics_, NUM_DATA_BLOCKS_READ_PER_LEVEL,
+                            num_data_read);
+          RecordInHistogram(db_statistics_, NUM_SST_READ_PER_LEVEL,
+                            num_sst_read);
+        }
+        num_filter_read = 0;
+        num_index_read = 0;
+        num_data_read = 0;
+        num_sst_read = 0;
+      }
+      prev_level = fp.GetHitFileLevel();
+    }
   }
 
   // Dump stats for most recent level
@@ -3986,7 +3978,7 @@ std::string Version::DebugString(bool he
       }
       if (print_stats) {
         r.append("(");
-        r.append(ToString(
+        r.append(std::to_string(
             files[i]->stats.num_reads_sampled.load(std::memory_order_relaxed)));
         r.append(")");
       }
@@ -5492,13 +5484,14 @@ Status VersionSet::WriteCurrentStateToMa
         for (const auto& f : level_files) {
           assert(f);
 
-          edit.AddFile(
-              level, f->fd.GetNumber(), f->fd.GetPathId(), f->fd.GetFileSize(),
-              f->smallest, f->largest, f->fd.smallest_seqno,
-              f->fd.largest_seqno, f->marked_for_compaction, f->temperature,
-              f->oldest_blob_file_number, f->oldest_ancester_time,
-              f->file_creation_time, f->file_checksum,
-              f->file_checksum_func_name, f->min_timestamp, f->max_timestamp);
+          edit.AddFile(level, f->fd.GetNumber(), f->fd.GetPathId(),
+                       f->fd.GetFileSize(), f->smallest, f->largest,
+                       f->fd.smallest_seqno, f->fd.largest_seqno,
+                       f->marked_for_compaction, f->temperature,
+                       f->oldest_blob_file_number, f->oldest_ancester_time,
+                       f->file_creation_time, f->file_checksum,
+                       f->file_checksum_func_name, f->min_timestamp,
+                       f->max_timestamp, f->unique_id);
         }
       }
 
diff -pruN 7.2.2-5/db/version_set.h 7.3.1-2/db/version_set.h
--- 7.2.2-5/db/version_set.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_set.h	2022-06-08 21:08:16.000000000 +0000
@@ -54,6 +54,7 @@
 #include "table/get_context.h"
 #include "table/multiget_context.h"
 #include "trace_replay/block_cache_tracer.h"
+#include "util/coro_utils.h"
 #include "util/hash_containers.h"
 
 namespace ROCKSDB_NAMESPACE {
@@ -851,6 +852,8 @@ class Version {
 
   const MutableCFOptions& GetMutableCFOptions() { return mutable_cf_options_; }
 
+  Status VerifySstUniqueIds() const;
+
  private:
   Env* env_;
   SystemClock* clock_;
@@ -882,6 +885,14 @@ class Version {
   // This accumulated stats will be used in compaction.
   void UpdateAccumulatedStats();
 
+  DECLARE_SYNC_AND_ASYNC(
+      /* ret_type */ Status, /* func_name */ MultiGetFromSST,
+      const ReadOptions& read_options, MultiGetRange file_range,
+      int hit_file_level, bool is_hit_file_last_in_level, FdWithKeyRange* f,
+      std::unordered_map<uint64_t, BlobReadRequests>& blob_rqs,
+      uint64_t& num_filter_read, uint64_t& num_index_read,
+      uint64_t& num_data_read, uint64_t& num_sst_read);
+
   ColumnFamilyData* cfd_;  // ColumnFamilyData to which this Version belongs
   Logger* info_log_;
   Statistics* db_statistics_;
@@ -1213,7 +1224,7 @@ class VersionSet {
   // new_log_number_for_empty_cf.
   uint64_t PreComputeMinLogNumberWithUnflushedData(
       uint64_t new_log_number_for_empty_cf) const {
-    uint64_t min_log_num = port::kMaxUint64;
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
     for (auto cfd : *column_family_set_) {
       // It's safe to ignore dropped column families here:
       // cfd->IsDropped() becomes true after the drop is persisted in MANIFEST.
@@ -1229,7 +1240,7 @@ class VersionSet {
   // file, except data from `cfd_to_skip`.
   uint64_t PreComputeMinLogNumberWithUnflushedData(
       const ColumnFamilyData* cfd_to_skip) const {
-    uint64_t min_log_num = port::kMaxUint64;
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
     for (auto cfd : *column_family_set_) {
       if (cfd == cfd_to_skip) {
         continue;
@@ -1246,7 +1257,7 @@ class VersionSet {
   // file, except data from `cfds_to_skip`.
   uint64_t PreComputeMinLogNumberWithUnflushedData(
       const std::unordered_set<const ColumnFamilyData*>& cfds_to_skip) const {
-    uint64_t min_log_num = port::kMaxUint64;
+    uint64_t min_log_num = std::numeric_limits<uint64_t>::max();
     for (auto cfd : *column_family_set_) {
       if (cfds_to_skip.count(cfd)) {
         continue;
diff -pruN 7.2.2-5/db/version_set_sync_and_async.h 7.3.1-2/db/version_set_sync_and_async.h
--- 7.2.2-5/db/version_set_sync_and_async.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/db/version_set_sync_and_async.h	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,154 @@
+//  Copyright (c) Meta Platforms, Inc. and its affiliates. All Rights Reserved.
+//  This source code is licensed under both the GPLv2 (found in the
+//  COPYING file in the root directory) and Apache 2.0 License
+//  (found in the LICENSE.Apache file in the root directory).
+
+#include "util/coro_utils.h"
+
+#if defined(WITHOUT_COROUTINES) || \
+    (defined(USE_COROUTINES) && defined(WITH_COROUTINES))
+
+namespace ROCKSDB_NAMESPACE {
+
+// Lookup a batch of keys in a single SST file
+DEFINE_SYNC_AND_ASYNC(Status, Version::MultiGetFromSST)
+(const ReadOptions& read_options, MultiGetRange file_range, int hit_file_level,
+ bool is_hit_file_last_in_level, FdWithKeyRange* f,
+ std::unordered_map<uint64_t, BlobReadRequests>& blob_rqs,
+ uint64_t& num_filter_read, uint64_t& num_index_read, uint64_t& num_data_read,
+ uint64_t& num_sst_read) {
+  bool timer_enabled = GetPerfLevel() >= PerfLevel::kEnableTimeExceptForMutex &&
+                       get_perf_context()->per_level_perf_context_enabled;
+
+  Status s;
+  StopWatchNano timer(clock_, timer_enabled /* auto_start */);
+  s = CO_AWAIT(table_cache_->MultiGet)(
+      read_options, *internal_comparator(), *f->file_metadata, &file_range,
+      mutable_cf_options_.prefix_extractor,
+      cfd_->internal_stats()->GetFileReadHist(hit_file_level),
+      IsFilterSkipped(static_cast<int>(hit_file_level),
+                      is_hit_file_last_in_level),
+      hit_file_level);
+  // TODO: examine the behavior for corrupted key
+  if (timer_enabled) {
+    PERF_COUNTER_BY_LEVEL_ADD(get_from_table_nanos, timer.ElapsedNanos(),
+                              hit_file_level);
+  }
+  if (!s.ok()) {
+    // TODO: Set status for individual keys appropriately
+    for (auto iter = file_range.begin(); iter != file_range.end(); ++iter) {
+      *iter->s = s;
+      file_range.MarkKeyDone(iter);
+    }
+    CO_RETURN s;
+  }
+  uint64_t batch_size = 0;
+  for (auto iter = file_range.begin(); s.ok() && iter != file_range.end();
+       ++iter) {
+    GetContext& get_context = *iter->get_context;
+    Status* status = iter->s;
+    // The Status in the KeyContext takes precedence over GetContext state
+    // Status may be an error if there were any IO errors in the table
+    // reader. We never expect Status to be NotFound(), as that is
+    // determined by get_context
+    assert(!status->IsNotFound());
+    if (!status->ok()) {
+      file_range.MarkKeyDone(iter);
+      continue;
+    }
+
+    if (get_context.sample()) {
+      sample_file_read_inc(f->file_metadata);
+    }
+    batch_size++;
+    num_index_read += get_context.get_context_stats_.num_index_read;
+    num_filter_read += get_context.get_context_stats_.num_filter_read;
+    num_data_read += get_context.get_context_stats_.num_data_read;
+    num_sst_read += get_context.get_context_stats_.num_sst_read;
+    // Reset these stats since they're specific to a level
+    get_context.get_context_stats_.num_index_read = 0;
+    get_context.get_context_stats_.num_filter_read = 0;
+    get_context.get_context_stats_.num_data_read = 0;
+    get_context.get_context_stats_.num_sst_read = 0;
+
+    // report the counters before returning
+    if (get_context.State() != GetContext::kNotFound &&
+        get_context.State() != GetContext::kMerge &&
+        db_statistics_ != nullptr) {
+      get_context.ReportCounters();
+    } else {
+      if (iter->max_covering_tombstone_seq > 0) {
+        // The remaining files we look at will only contain covered keys, so
+        // we stop here for this key
+        file_range.SkipKey(iter);
+      }
+    }
+    switch (get_context.State()) {
+      case GetContext::kNotFound:
+        // Keep searching in other files
+        break;
+      case GetContext::kMerge:
+        // TODO: update per-level perfcontext user_key_return_count for kMerge
+        break;
+      case GetContext::kFound:
+        if (hit_file_level == 0) {
+          RecordTick(db_statistics_, GET_HIT_L0);
+        } else if (hit_file_level == 1) {
+          RecordTick(db_statistics_, GET_HIT_L1);
+        } else if (hit_file_level >= 2) {
+          RecordTick(db_statistics_, GET_HIT_L2_AND_UP);
+        }
+
+        PERF_COUNTER_BY_LEVEL_ADD(user_key_return_count, 1, hit_file_level);
+
+        file_range.MarkKeyDone(iter);
+
+        if (iter->is_blob_index) {
+          if (iter->value) {
+            TEST_SYNC_POINT_CALLBACK("Version::MultiGet::TamperWithBlobIndex",
+                                     &(*iter));
+
+            const Slice& blob_index_slice = *(iter->value);
+            BlobIndex blob_index;
+            Status tmp_s = blob_index.DecodeFrom(blob_index_slice);
+            if (tmp_s.ok()) {
+              const uint64_t blob_file_num = blob_index.file_number();
+              blob_rqs[blob_file_num].emplace_back(
+                  std::make_pair(blob_index, std::cref(*iter)));
+            } else {
+              *(iter->s) = tmp_s;
+            }
+          }
+        } else {
+          file_range.AddValueSize(iter->value->size());
+          if (file_range.GetValueSize() > read_options.value_size_soft_limit) {
+            s = Status::Aborted();
+            break;
+          }
+        }
+        continue;
+      case GetContext::kDeleted:
+        // Use empty error message for speed
+        *status = Status::NotFound();
+        file_range.MarkKeyDone(iter);
+        continue;
+      case GetContext::kCorrupt:
+        *status =
+            Status::Corruption("corrupted key for ", iter->lkey->user_key());
+        file_range.MarkKeyDone(iter);
+        continue;
+      case GetContext::kUnexpectedBlobIndex:
+        ROCKS_LOG_ERROR(info_log_, "Encounter unexpected blob index.");
+        *status = Status::NotSupported(
+            "Encounter unexpected blob index. Please open DB with "
+            "ROCKSDB_NAMESPACE::blob_db::BlobDB instead.");
+        file_range.MarkKeyDone(iter);
+        continue;
+    }
+  }
+
+  RecordInHistogram(db_statistics_, SST_BATCH_SIZE, batch_size);
+  CO_RETURN s;
+}
+}  // namespace ROCKSDB_NAMESPACE
+#endif
diff -pruN 7.2.2-5/db/version_set_test.cc 7.3.1-2/db/version_set_test.cc
--- 7.2.2-5/db/version_set_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/version_set_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -18,6 +18,7 @@
 #include "rocksdb/file_system.h"
 #include "table/block_based/block_based_table_factory.h"
 #include "table/mock_table.h"
+#include "table/unique_id_impl.h"
 #include "test_util/testharness.h"
 #include "test_util/testutil.h"
 #include "util/string_util.h"
@@ -49,7 +50,7 @@ class GenerateLevelFilesBriefTest : publ
         kInvalidBlobFileNumber, kUnknownOldestAncesterTime,
         kUnknownFileCreationTime, kUnknownFileChecksum,
         kUnknownFileChecksumFuncName, kDisableUserTimestamp,
-        kDisableUserTimestamp);
+        kDisableUserTimestamp, kNullUniqueId64x2);
     files_.push_back(f);
   }
 
@@ -158,7 +159,7 @@ class VersionStorageInfoTestBase : publi
         Temperature::kUnknown, oldest_blob_file_number,
         kUnknownOldestAncesterTime, kUnknownFileCreationTime,
         kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-        kDisableUserTimestamp, kDisableUserTimestamp);
+        kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
     f->compensated_file_size = file_size;
     vstorage_.AddFile(level, f);
   }
@@ -3222,11 +3223,11 @@ class VersionSetTestMissingFiles : publi
       s = fs_->GetFileSize(fname, IOOptions(), &file_size, nullptr);
       ASSERT_OK(s);
       ASSERT_NE(0, file_size);
-      file_metas->emplace_back(file_num, /*file_path_id=*/0, file_size, ikey,
-                               ikey, 0, 0, false, Temperature::kUnknown, 0, 0,
-                               0, kUnknownFileChecksum,
-                               kUnknownFileChecksumFuncName,
-                               kDisableUserTimestamp, kDisableUserTimestamp);
+      file_metas->emplace_back(
+          file_num, /*file_path_id=*/0, file_size, ikey, ikey, 0, 0, false,
+          Temperature::kUnknown, 0, 0, 0, kUnknownFileChecksum,
+          kUnknownFileChecksumFuncName, kDisableUserTimestamp,
+          kDisableUserTimestamp, kNullUniqueId64x2);
     }
   }
 
@@ -3282,7 +3283,7 @@ TEST_F(VersionSetTestMissingFiles, Manif
         file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
         largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
         kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-        kDisableUserTimestamp, kDisableUserTimestamp);
+        kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
     added_files.emplace_back(0, meta);
   }
   WriteFileAdditionAndDeletionToManifest(
@@ -3338,7 +3339,7 @@ TEST_F(VersionSetTestMissingFiles, Manif
         file_num, /*file_path_id=*/0, /*file_size=*/12, smallest_ikey,
         largest_ikey, 0, 0, false, Temperature::kUnknown, 0, 0, 0,
         kUnknownFileChecksum, kUnknownFileChecksumFuncName,
-        kDisableUserTimestamp, kDisableUserTimestamp);
+        kDisableUserTimestamp, kDisableUserTimestamp, kNullUniqueId64x2);
     added_files.emplace_back(0, meta);
   }
   WriteFileAdditionAndDeletionToManifest(
diff -pruN 7.2.2-5/db/wal_edit.h 7.3.1-2/db/wal_edit.h
--- 7.2.2-5/db/wal_edit.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/wal_edit.h	2022-06-08 21:08:16.000000000 +0000
@@ -44,7 +44,8 @@ class WalMetadata {
  private:
   // The size of WAL is unknown, used when the WAL is not synced yet or is
   // empty.
-  constexpr static uint64_t kUnknownWalSize = port::kMaxUint64;
+  constexpr static uint64_t kUnknownWalSize =
+      std::numeric_limits<uint64_t>::max();
 
   // Size of the most recently synced WAL in bytes.
   uint64_t synced_size_bytes_ = kUnknownWalSize;
diff -pruN 7.2.2-5/db/wal_manager.cc 7.3.1-2/db/wal_manager.cc
--- 7.2.2-5/db/wal_manager.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/wal_manager.cc	2022-06-08 21:08:16.000000000 +0000
@@ -378,9 +378,8 @@ Status WalManager::ReadFirstRecord(const
   *sequence = 0;
   if (type != kAliveLogFile && type != kArchivedLogFile) {
     ROCKS_LOG_ERROR(db_options_.info_log, "[WalManger] Unknown file type %s",
-                    ToString(type).c_str());
-    return Status::NotSupported(
-        "File Type Not Known " + ToString(type));
+                    std::to_string(type).c_str());
+    return Status::NotSupported("File Type Not Known " + std::to_string(type));
   }
   {
     MutexLock l(&read_first_record_cache_mutex_);
diff -pruN 7.2.2-5/db/wal_manager_test.cc 7.3.1-2/db/wal_manager_test.cc
--- 7.2.2-5/db/wal_manager_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/wal_manager_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -94,7 +94,7 @@ class WalManagerTest : public testing::T
     for (int i = 1; i <= num_logs; ++i) {
       RollTheLog(true);
       for (int k = 0; k < entries_per_log; ++k) {
-        Put(ToString(k), std::string(1024, 'a'));
+        Put(std::to_string(k), std::string(1024, 'a'));
       }
     }
   }
diff -pruN 7.2.2-5/db/write_batch.cc 7.3.1-2/db/write_batch.cc
--- 7.2.2-5/db/write_batch.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/write_batch.cc	2022-06-08 21:08:16.000000000 +0000
@@ -152,14 +152,6 @@ struct SavePoints {
   std::stack<SavePoint, autovector<SavePoint>> stack;
 };
 
-WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes)
-    : content_flags_(0), max_bytes_(max_bytes), rep_() {
-  rep_.reserve((reserved_bytes > WriteBatchInternal::kHeader)
-                   ? reserved_bytes
-                   : WriteBatchInternal::kHeader);
-  rep_.resize(WriteBatchInternal::kHeader);
-}
-
 WriteBatch::WriteBatch(size_t reserved_bytes, size_t max_bytes,
                        size_t protection_bytes_per_key, size_t default_cf_ts_sz)
     : content_flags_(0),
@@ -580,14 +572,16 @@ Status WriteBatchInternal::Iterate(const
         s = handler->MarkBeginPrepare();
         assert(s.ok());
         empty_batch = false;
-        if (!handler->WriteAfterCommit()) {
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kDisabled) {
           s = Status::NotSupported(
               "WriteCommitted txn tag when write_after_commit_ is disabled (in "
               "WritePrepared/WriteUnprepared mode). If it is not due to "
               "corruption, the WAL must be emptied before changing the "
               "WritePolicy.");
         }
-        if (handler->WriteBeforePrepare()) {
+        if (handler->WriteBeforePrepare() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
           s = Status::NotSupported(
               "WriteCommitted txn tag when write_before_prepare_ is enabled "
               "(in WriteUnprepared mode). If it is not due to corruption, the "
@@ -600,7 +594,8 @@ Status WriteBatchInternal::Iterate(const
         s = handler->MarkBeginPrepare();
         assert(s.ok());
         empty_batch = false;
-        if (handler->WriteAfterCommit()) {
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
           s = Status::NotSupported(
               "WritePrepared/WriteUnprepared txn tag when write_after_commit_ "
               "is enabled (in default WriteCommitted mode). If it is not due "
@@ -614,13 +609,15 @@ Status WriteBatchInternal::Iterate(const
         s = handler->MarkBeginPrepare(true /* unprepared */);
         assert(s.ok());
         empty_batch = false;
-        if (handler->WriteAfterCommit()) {
+        if (handler->WriteAfterCommit() ==
+            WriteBatch::Handler::OptionState::kEnabled) {
           s = Status::NotSupported(
               "WriteUnprepared txn tag when write_after_commit_ is enabled (in "
               "default WriteCommitted mode). If it is not due to corruption, "
               "the WAL must be emptied before changing the WritePolicy.");
         }
-        if (!handler->WriteBeforePrepare()) {
+        if (handler->WriteBeforePrepare() ==
+            WriteBatch::Handler::OptionState::kDisabled) {
           s = Status::NotSupported(
               "WriteUnprepared txn tag when write_before_prepare_ is disabled "
               "(in WriteCommitted/WritePrepared mode). If it is not due to "
@@ -748,10 +745,10 @@ Status CheckColumnFamilyTimestampSize(Co
 
 Status WriteBatchInternal::Put(WriteBatch* b, uint32_t column_family_id,
                                const Slice& key, const Slice& value) {
-  if (key.size() > size_t{port::kMaxUint32}) {
+  if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("key is too large");
   }
-  if (value.size() > size_t{port::kMaxUint32}) {
+  if (value.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("value is too large");
   }
 
@@ -828,7 +825,7 @@ Status WriteBatchInternal::CheckSlicePar
   for (int i = 0; i < key.num_parts; ++i) {
     total_key_bytes += key.parts[i].size();
   }
-  if (total_key_bytes >= size_t{port::kMaxUint32}) {
+  if (total_key_bytes >= size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("key is too large");
   }
 
@@ -836,7 +833,7 @@ Status WriteBatchInternal::CheckSlicePar
   for (int i = 0; i < value.num_parts; ++i) {
     total_value_bytes += value.parts[i].size();
   }
-  if (total_value_bytes >= size_t{port::kMaxUint32}) {
+  if (total_value_bytes >= size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("value is too large");
   }
   return Status::OK();
@@ -1295,10 +1292,10 @@ Status WriteBatch::DeleteRange(ColumnFam
 
 Status WriteBatchInternal::Merge(WriteBatch* b, uint32_t column_family_id,
                                  const Slice& key, const Slice& value) {
-  if (key.size() > size_t{port::kMaxUint32}) {
+  if (key.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("key is too large");
   }
-  if (value.size() > size_t{port::kMaxUint32}) {
+  if (value.size() > size_t{std::numeric_limits<uint32_t>::max()}) {
     return Status::InvalidArgument("value is too large");
   }
 
@@ -1494,6 +1491,8 @@ Status WriteBatch::UpdateTimestamps(
   return s;
 }
 
+namespace {
+
 class MemTableInserter : public WriteBatch::Handler {
 
   SequenceNumber sequence_;
@@ -1581,9 +1580,24 @@ class MemTableInserter : public WriteBat
     return res;
   }
 
+  void DecrementProtectionInfoIdxForTryAgain() {
+    if (prot_info_ != nullptr) --prot_info_idx_;
+  }
+
+  void ResetProtectionInfo() {
+    prot_info_idx_ = 0;
+    prot_info_ = nullptr;
+  }
+
  protected:
-  bool WriteBeforePrepare() const override { return write_before_prepare_; }
-  bool WriteAfterCommit() const override { return write_after_commit_; }
+  Handler::OptionState WriteBeforePrepare() const override {
+    return write_before_prepare_ ? Handler::OptionState::kEnabled
+                                 : Handler::OptionState::kDisabled;
+  }
+  Handler::OptionState WriteAfterCommit() const override {
+    return write_after_commit_ ? Handler::OptionState::kEnabled
+                               : Handler::OptionState::kDisabled;
+  }
 
  public:
   // cf_mems should not be shared with concurrent inserters
@@ -1871,15 +1885,25 @@ class MemTableInserter : public WriteBat
   Status PutCF(uint32_t column_family_id, const Slice& key,
                const Slice& value) override {
     const auto* kv_prot_info = NextProtectionInfo();
+    Status ret_status;
     if (kv_prot_info != nullptr) {
       // Memtable needs seqno, doesn't need CF ID
       auto mem_kv_prot_info =
           kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
-      return PutCFImpl(column_family_id, key, value, kTypeValue,
-                       &mem_kv_prot_info);
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
+                             &mem_kv_prot_info);
+    } else {
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeValue,
+                             nullptr /* kv_prot_info */);
     }
-    return PutCFImpl(column_family_id, key, value, kTypeValue,
-                     nullptr /* kv_prot_info */);
+    // TODO: this assumes that if TryAgain status is returned to the caller,
+    // the operation is actually tried again. The proper way to do this is to
+    // pass a `try_again` parameter to the operation itself and decrement
+    // prot_info_idx_ based on that
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
   }
 
   Status DeleteImpl(uint32_t /*column_family_id*/, const Slice& key,
@@ -1926,6 +1950,9 @@ class MemTableInserter : public WriteBat
       } else if (ret_status.ok()) {
         MaybeAdvanceSeq(false /* batch_boundary */);
       }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
       return ret_status;
     }
 
@@ -1957,6 +1984,9 @@ class MemTableInserter : public WriteBat
       ret_status =
           WriteBatchInternal::Delete(rebuilding_trx_, column_family_id, key);
     }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
     return ret_status;
   }
 
@@ -1985,6 +2015,9 @@ class MemTableInserter : public WriteBat
       } else if (ret_status.ok()) {
         MaybeAdvanceSeq(false /* batch_boundary */);
       }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
       return ret_status;
     }
     assert(ret_status.ok());
@@ -2009,6 +2042,9 @@ class MemTableInserter : public WriteBat
       ret_status = WriteBatchInternal::SingleDelete(rebuilding_trx_,
                                                     column_family_id, key);
     }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
     return ret_status;
   }
 
@@ -2038,6 +2074,9 @@ class MemTableInserter : public WriteBat
       } else if (ret_status.ok()) {
         MaybeAdvanceSeq(false /* batch_boundary */);
       }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
       return ret_status;
     }
     assert(ret_status.ok());
@@ -2092,6 +2131,9 @@ class MemTableInserter : public WriteBat
       ret_status = WriteBatchInternal::DeleteRange(
           rebuilding_trx_, column_family_id, begin_key, end_key);
     }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
     return ret_status;
   }
 
@@ -2121,6 +2163,9 @@ class MemTableInserter : public WriteBat
       } else if (ret_status.ok()) {
         MaybeAdvanceSeq(false /* batch_boundary */);
       }
+      if (UNLIKELY(ret_status.IsTryAgain())) {
+        DecrementProtectionInfoIdxForTryAgain();
+      }
       return ret_status;
     }
     assert(ret_status.ok());
@@ -2242,23 +2287,31 @@ class MemTableInserter : public WriteBat
       ret_status = WriteBatchInternal::Merge(rebuilding_trx_, column_family_id,
                                              key, value);
     }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
     return ret_status;
   }
 
   Status PutBlobIndexCF(uint32_t column_family_id, const Slice& key,
                         const Slice& value) override {
     const auto* kv_prot_info = NextProtectionInfo();
+    Status ret_status;
     if (kv_prot_info != nullptr) {
       // Memtable needs seqno, doesn't need CF ID
       auto mem_kv_prot_info =
           kv_prot_info->StripC(column_family_id).ProtectS(sequence_);
       // Same as PutCF except for value type.
-      return PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
-                       &mem_kv_prot_info);
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
+                             &mem_kv_prot_info);
     } else {
-      return PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
-                       nullptr /* kv_prot_info */);
+      ret_status = PutCFImpl(column_family_id, key, value, kTypeBlobIndex,
+                             nullptr /* kv_prot_info */);
     }
+    if (UNLIKELY(ret_status.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+    return ret_status;
   }
 
   void CheckMemtableFull() {
@@ -2401,6 +2454,7 @@ class MemTableInserter : public WriteBat
           const auto& batch_info = trx->batches_.begin()->second;
           // all inserts must reference this trx log number
           log_number_ref_ = batch_info.log_number_;
+          ResetProtectionInfo();
           s = batch_info.batch_->Iterate(this);
           log_number_ref_ = 0;
         }
@@ -2422,6 +2476,10 @@ class MemTableInserter : public WriteBat
     const bool batch_boundry = true;
     MaybeAdvanceSeq(batch_boundry);
 
+    if (UNLIKELY(s.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+
     return s;
   }
 
@@ -2466,6 +2524,7 @@ class MemTableInserter : public WriteBat
                 return ucmp->timestamp_size();
               });
           if (s.ok()) {
+            ResetProtectionInfo();
             s = batch_info.batch_->Iterate(this);
             log_number_ref_ = 0;
           }
@@ -2488,6 +2547,10 @@ class MemTableInserter : public WriteBat
     constexpr bool batch_boundary = true;
     MaybeAdvanceSeq(batch_boundary);
 
+    if (UNLIKELY(s.IsTryAgain())) {
+      DecrementProtectionInfoIdxForTryAgain();
+    }
+
     return s;
   }
 
@@ -2523,6 +2586,8 @@ class MemTableInserter : public WriteBat
   }
 };
 
+}  // namespace
+
 // This function can only be called in these conditions:
 // 1) During Recovery()
 // 2) During Write(), in a single-threaded write thread
@@ -2613,11 +2678,94 @@ Status WriteBatchInternal::InsertInto(
   return s;
 }
 
+namespace {
+
+// This class updates protection info for a WriteBatch.
+class ProtectionInfoUpdater : public WriteBatch::Handler {
+ public:
+  explicit ProtectionInfoUpdater(WriteBatch::ProtectionInfo* prot_info)
+      : prot_info_(prot_info) {}
+
+  ~ProtectionInfoUpdater() override {}
+
+  Status PutCF(uint32_t cf, const Slice& key, const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeValue);
+  }
+
+  Status DeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateProtInfo(cf, key, "", kTypeDeletion);
+  }
+
+  Status SingleDeleteCF(uint32_t cf, const Slice& key) override {
+    return UpdateProtInfo(cf, key, "", kTypeSingleDeletion);
+  }
+
+  Status DeleteRangeCF(uint32_t cf, const Slice& begin_key,
+                       const Slice& end_key) override {
+    return UpdateProtInfo(cf, begin_key, end_key, kTypeRangeDeletion);
+  }
+
+  Status MergeCF(uint32_t cf, const Slice& key, const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeMerge);
+  }
+
+  Status PutBlobIndexCF(uint32_t cf, const Slice& key,
+                        const Slice& val) override {
+    return UpdateProtInfo(cf, key, val, kTypeBlobIndex);
+  }
+
+  Status MarkBeginPrepare(bool /* unprepare */) override {
+    return Status::OK();
+  }
+
+  Status MarkEndPrepare(const Slice& /* xid */) override {
+    return Status::OK();
+  }
+
+  Status MarkCommit(const Slice& /* xid */) override { return Status::OK(); }
+
+  Status MarkCommitWithTimestamp(const Slice& /* xid */,
+                                 const Slice& /* ts */) override {
+    return Status::OK();
+  }
+
+  Status MarkRollback(const Slice& /* xid */) override { return Status::OK(); }
+
+  Status MarkNoop(bool /* empty_batch */) override { return Status::OK(); }
+
+ private:
+  Status UpdateProtInfo(uint32_t cf, const Slice& key, const Slice& val,
+                        const ValueType op_type) {
+    if (prot_info_) {
+      prot_info_->entries_.emplace_back(
+          ProtectionInfo64().ProtectKVO(key, val, op_type).ProtectC(cf));
+    }
+    return Status::OK();
+  }
+
+  // No copy or move.
+  ProtectionInfoUpdater(const ProtectionInfoUpdater&) = delete;
+  ProtectionInfoUpdater(ProtectionInfoUpdater&&) = delete;
+  ProtectionInfoUpdater& operator=(const ProtectionInfoUpdater&) = delete;
+  ProtectionInfoUpdater& operator=(ProtectionInfoUpdater&&) = delete;
+
+  WriteBatch::ProtectionInfo* const prot_info_ = nullptr;
+};
+
+}  // namespace
+
 Status WriteBatchInternal::SetContents(WriteBatch* b, const Slice& contents) {
   assert(contents.size() >= WriteBatchInternal::kHeader);
-  assert(b->prot_info_ == nullptr);
+
   b->rep_.assign(contents.data(), contents.size());
   b->content_flags_.store(ContentFlags::DEFERRED, std::memory_order_relaxed);
+
+  // If we have a prot_info_, update protection info entries for the batch.
+  if (b->prot_info_) {
+    ProtectionInfoUpdater prot_info_updater(b->prot_info_.get());
+    return b->Iterate(&prot_info_updater);
+  }
+
   return Status::OK();
 }
 
diff -pruN 7.2.2-5/db/write_batch_test.cc 7.3.1-2/db/write_batch_test.cc
--- 7.2.2-5/db/write_batch_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/write_batch_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -118,7 +118,7 @@ static std::string PrintContents(WriteBa
           break;
       }
       state.append("@");
-      state.append(ToString(ikey.sequence));
+      state.append(std::to_string(ikey.sequence));
     }
     EXPECT_OK(iter->status());
   }
@@ -253,7 +253,7 @@ namespace {
       if (column_family_id == 0) {
         seen += "Put(" + key.ToString() + ", " + value.ToString() + ")";
       } else {
-        seen += "PutCF(" + ToString(column_family_id) + ", " +
+        seen += "PutCF(" + std::to_string(column_family_id) + ", " +
                 key.ToString() + ", " + value.ToString() + ")";
       }
       return Status::OK();
@@ -262,7 +262,7 @@ namespace {
       if (column_family_id == 0) {
         seen += "Delete(" + key.ToString() + ")";
       } else {
-        seen += "DeleteCF(" + ToString(column_family_id) + ", " +
+        seen += "DeleteCF(" + std::to_string(column_family_id) + ", " +
                 key.ToString() + ")";
       }
       return Status::OK();
@@ -272,7 +272,7 @@ namespace {
       if (column_family_id == 0) {
         seen += "SingleDelete(" + key.ToString() + ")";
       } else {
-        seen += "SingleDeleteCF(" + ToString(column_family_id) + ", " +
+        seen += "SingleDeleteCF(" + std::to_string(column_family_id) + ", " +
                 key.ToString() + ")";
       }
       return Status::OK();
@@ -283,7 +283,7 @@ namespace {
         seen += "DeleteRange(" + begin_key.ToString() + ", " +
                 end_key.ToString() + ")";
       } else {
-        seen += "DeleteRangeCF(" + ToString(column_family_id) + ", " +
+        seen += "DeleteRangeCF(" + std::to_string(column_family_id) + ", " +
                 begin_key.ToString() + ", " + end_key.ToString() + ")";
       }
       return Status::OK();
@@ -293,7 +293,7 @@ namespace {
       if (column_family_id == 0) {
         seen += "Merge(" + key.ToString() + ", " + value.ToString() + ")";
       } else {
-        seen += "MergeCF(" + ToString(column_family_id) + ", " +
+        seen += "MergeCF(" + std::to_string(column_family_id) + ", " +
                 key.ToString() + ", " + value.ToString() + ")";
       }
       return Status::OK();
diff -pruN 7.2.2-5/db/write_controller.h 7.3.1-2/db/write_controller.h
--- 7.2.2-5/db/write_controller.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db/write_controller.h	2022-06-08 21:08:16.000000000 +0000
@@ -52,7 +52,7 @@ class WriteController {
   bool IsStopped() const;
   bool NeedsDelay() const { return total_delayed_.load() > 0; }
   bool NeedSpeedupCompaction() const {
-    return IsStopped() || NeedsDelay() || total_compaction_pressure_ > 0;
+    return IsStopped() || NeedsDelay() || total_compaction_pressure_.load() > 0;
   }
   // return how many microseconds the caller needs to sleep after the call
   // num_bytes: how many number of bytes to put into the DB.
diff -pruN 7.2.2-5/db_stress_tool/db_stress_common.cc 7.3.1-2/db_stress_tool/db_stress_common.cc
--- 7.2.2-5/db_stress_tool/db_stress_common.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_common.cc	2022-06-08 21:08:16.000000000 +0000
@@ -18,11 +18,9 @@
 
 ROCKSDB_NAMESPACE::Env* db_stress_listener_env = nullptr;
 ROCKSDB_NAMESPACE::Env* db_stress_env = nullptr;
-#ifndef NDEBUG
 // If non-null, injects read error at a rate specified by the
 // read_fault_one_in or write_fault_one_in flag
 std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
-#endif // NDEBUG
 enum ROCKSDB_NAMESPACE::CompressionType compression_type_e =
     ROCKSDB_NAMESPACE::kSnappyCompression;
 enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e =
diff -pruN 7.2.2-5/db_stress_tool/db_stress_common.h 7.3.1-2/db_stress_tool/db_stress_common.h
--- 7.2.2-5/db_stress_tool/db_stress_common.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_common.h	2022-06-08 21:08:16.000000000 +0000
@@ -67,6 +67,7 @@
 #include "util/random.h"
 #include "util/string_util.h"
 #include "utilities/blob_db/blob_db.h"
+#include "utilities/fault_injection_fs.h"
 #include "utilities/merge_operators.h"
 
 using GFLAGS_NAMESPACE::ParseCommandLineFlags;
@@ -107,6 +108,7 @@ DECLARE_double(memtable_prefix_bloom_siz
 DECLARE_bool(memtable_whole_key_filtering);
 DECLARE_int32(open_files);
 DECLARE_int64(compressed_cache_size);
+DECLARE_int32(compressed_cache_numshardbits);
 DECLARE_int32(compaction_style);
 DECLARE_int32(num_levels);
 DECLARE_int32(level0_file_num_compaction_trigger);
@@ -134,7 +136,9 @@ DECLARE_int32(set_in_place_one_in);
 DECLARE_int64(cache_size);
 DECLARE_int32(cache_numshardbits);
 DECLARE_bool(cache_index_and_filter_blocks);
-DECLARE_bool(reserve_table_reader_memory);
+DECLARE_bool(charge_compression_dictionary_building_buffer);
+DECLARE_bool(charge_filter_construction);
+DECLARE_bool(charge_table_reader);
 DECLARE_int32(top_level_index_pinning);
 DECLARE_int32(partition_pinning);
 DECLARE_int32(unpartitioned_pinning);
@@ -166,6 +170,8 @@ DECLARE_bool(mock_direct_io);
 DECLARE_bool(statistics);
 DECLARE_bool(sync);
 DECLARE_bool(use_fsync);
+DECLARE_uint64(bytes_per_sync);
+DECLARE_uint64(wal_bytes_per_sync);
 DECLARE_int32(kill_random_test);
 DECLARE_string(kill_exclude_prefixes);
 DECLARE_bool(disable_wal);
@@ -215,6 +221,7 @@ DECLARE_int32(compression_max_dict_bytes
 DECLARE_int32(compression_zstd_max_train_bytes);
 DECLARE_int32(compression_parallel_threads);
 DECLARE_uint64(compression_max_dict_buffer_bytes);
+DECLARE_bool(compression_use_zstd_dict_trainer);
 DECLARE_string(checksum_type);
 DECLARE_string(env_uri);
 DECLARE_string(fs_uri);
@@ -286,6 +293,7 @@ DECLARE_uint64(wp_commit_cache_bits);
 DECLARE_bool(adaptive_readahead);
 DECLARE_bool(async_io);
 DECLARE_string(wal_compression);
+DECLARE_bool(verify_sst_unique_id_in_manifest);
 
 constexpr long KB = 1024;
 constexpr int kRandomValueMaxFactor = 3;
@@ -294,12 +302,7 @@ constexpr int kValueMaxLen = 100;
 // wrapped posix environment
 extern ROCKSDB_NAMESPACE::Env* db_stress_env;
 extern ROCKSDB_NAMESPACE::Env* db_stress_listener_env;
-#ifndef NDEBUG
-namespace ROCKSDB_NAMESPACE {
-class FaultInjectionTestFS;
-}  // namespace ROCKSDB_NAMESPACE
 extern std::shared_ptr<ROCKSDB_NAMESPACE::FaultInjectionTestFS> fault_fs_guard;
-#endif
 
 extern enum ROCKSDB_NAMESPACE::CompressionType compression_type_e;
 extern enum ROCKSDB_NAMESPACE::CompressionType bottommost_compression_type_e;
diff -pruN 7.2.2-5/db_stress_tool/db_stress_compaction_filter.h 7.3.1-2/db_stress_tool/db_stress_compaction_filter.h
--- 7.2.2-5/db_stress_tool/db_stress_compaction_filter.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_compaction_filter.h	2022-06-08 21:08:16.000000000 +0000
@@ -46,11 +46,12 @@ class DbStressCompactionFilter : public
     // Reaching here means we acquired the lock.
 
     bool key_exists = state_->Exists(cf_id_, key_num);
+    const bool allow_overwrite = state_->AllowsOverwrite(key_num);
 
     key_mutex->Unlock();
 
     if (!key_exists) {
-      return Decision::kRemove;
+      return allow_overwrite ? Decision::kRemove : Decision::kPurge;
     }
     return Decision::kKeep;
   }
diff -pruN 7.2.2-5/db_stress_tool/db_stress_driver.cc 7.3.1-2/db_stress_tool/db_stress_driver.cc
--- 7.2.2-5/db_stress_tool/db_stress_driver.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_driver.cc	2022-06-08 21:08:16.000000000 +0000
@@ -58,15 +58,16 @@ void ThreadBody(void* v) {
 
 bool RunStressTest(StressTest* stress) {
   SystemClock* clock = db_stress_env->GetSystemClock().get();
-  stress->InitDb();
   SharedState shared(db_stress_env, stress);
+  stress->InitDb(&shared);
   stress->FinishInitDb(&shared);
 
-#ifndef NDEBUG
   if (FLAGS_sync_fault_injection) {
     fault_fs_guard->SetFilesystemDirectWritable(false);
   }
-#endif
+  if (FLAGS_write_fault_one_in) {
+    fault_fs_guard->EnableWriteErrorInjection();
+  }
 
   uint32_t n = FLAGS_threads;
   uint64_t now = clock->NowMicros();
diff -pruN 7.2.2-5/db_stress_tool/db_stress_gflags.cc 7.3.1-2/db_stress_tool/db_stress_gflags.cc
--- 7.2.2-5/db_stress_tool/db_stress_gflags.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_gflags.cc	2022-06-08 21:08:16.000000000 +0000
@@ -187,9 +187,15 @@ DEFINE_int32(open_files, ROCKSDB_NAMESPA
              "Maximum number of files to keep open at the same time "
              "(use default if == 0)");
 
-DEFINE_int64(compressed_cache_size, -1,
+DEFINE_int64(compressed_cache_size, 0,
              "Number of bytes to use as a cache of compressed data."
-             " Negative means use default settings.");
+             " 0 means use default settings.");
+
+DEFINE_int32(
+    compressed_cache_numshardbits, -1,
+    "Number of shards for the compressed block cache is 2 ** "
+    "compressed_cache_numshardbits. Negative value means default settings. "
+    "This is applied only if compressed_cache_size is greater than 0.");
 
 DEFINE_int32(compaction_style, ROCKSDB_NAMESPACE::Options().compaction_style,
              "");
@@ -304,10 +310,20 @@ DEFINE_int32(cache_numshardbits, 6,
 DEFINE_bool(cache_index_and_filter_blocks, false,
             "True if indexes/filters should be cached in block cache.");
 
-DEFINE_bool(reserve_table_reader_memory, false,
-            "A dynamically updating charge to block cache, loosely based on "
-            "the actual memory usage of table reader, will occur to account "
-            "the memory, if block cache available.");
+DEFINE_bool(charge_compression_dictionary_building_buffer, false,
+            "Setting for "
+            "CacheEntryRoleOptions::charged of"
+            "CacheEntryRole::kCompressionDictionaryBuildingBuffer");
+
+DEFINE_bool(charge_filter_construction, false,
+            "Setting for "
+            "CacheEntryRoleOptions::charged of"
+            "CacheEntryRole::kFilterConstruction");
+
+DEFINE_bool(charge_table_reader, false,
+            "Setting for "
+            "CacheEntryRoleOptions::charged of"
+            "CacheEntryRole::kBlockBasedTableReader");
 
 DEFINE_int32(
     top_level_index_pinning,
@@ -516,6 +532,15 @@ DEFINE_bool(sync, false, "Sync all write
 
 DEFINE_bool(use_fsync, false, "If true, issue fsync instead of fdatasync");
 
+DEFINE_uint64(bytes_per_sync, ROCKSDB_NAMESPACE::Options().bytes_per_sync,
+              "If nonzero, sync SST file data incrementally after every "
+              "`bytes_per_sync` bytes are written");
+
+DEFINE_uint64(wal_bytes_per_sync,
+              ROCKSDB_NAMESPACE::Options().wal_bytes_per_sync,
+              "If nonzero, sync WAL file data incrementally after every "
+              "`bytes_per_sync` bytes are written");
+
 DEFINE_int32(kill_random_test, 0,
              "If non-zero, kill at various points in source code with "
              "probability 1/this");
@@ -727,6 +752,13 @@ DEFINE_uint64(compression_max_dict_buffe
               "Buffering limit for SST file data to sample for dictionary "
               "compression.");
 
+DEFINE_bool(
+    compression_use_zstd_dict_trainer, true,
+    "Use zstd's trainer to generate dictionary. If the options is false, "
+    "zstd's finalizeDictionary() API is used to generate dictionary. "
+    "ZSTD 1.4.5+ is required. If ZSTD 1.4.5+ is not linked with the binary, "
+    "this flag will have the default value true.");
+
 DEFINE_string(bottommost_compression_type, "disable",
               "Algorithm to use to compress bottommost level of the database. "
               "\"disable\" means disabling the feature");
@@ -927,4 +959,10 @@ DEFINE_bool(
 DEFINE_string(wal_compression, "none",
               "Algorithm to use for WAL compression. none to disable.");
 
+DEFINE_bool(
+    verify_sst_unique_id_in_manifest, false,
+    "Enable DB options `verify_sst_unique_id_in_manifest`, if true, during "
+    "DB-open try verifying the SST unique id between MANIFEST and SST "
+    "properties.");
+
 #endif  // GFLAGS
diff -pruN 7.2.2-5/db_stress_tool/db_stress_listener.cc 7.3.1-2/db_stress_tool/db_stress_listener.cc
--- 7.2.2-5/db_stress_tool/db_stress_listener.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_listener.cc	2022-06-08 21:08:16.000000000 +0000
@@ -173,7 +173,10 @@ void DbStressListener::VerifyTableFileUn
     const TableProperties& new_file_properties, const std::string& file_path) {
   // Verify unique ID
   std::string id;
-  Status s = GetUniqueIdFromTableProperties(new_file_properties, &id);
+  // Unit tests verify that GetUniqueIdFromTableProperties returns just a
+  // substring of this, and we're only going to pull out 64 bits, so using
+  // GetExtendedUniqueIdFromTableProperties is arguably stronger testing here.
+  Status s = GetExtendedUniqueIdFromTableProperties(new_file_properties, &id);
   if (!s.ok()) {
     fprintf(stderr, "Error getting SST unique id for %s: %s\n",
             file_path.c_str(), s.ToString().c_str());
diff -pruN 7.2.2-5/db_stress_tool/db_stress_shared_state.cc 7.3.1-2/db_stress_tool/db_stress_shared_state.cc
--- 7.2.2-5/db_stress_tool/db_stress_shared_state.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_shared_state.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,16 +12,6 @@
 #include "db_stress_tool/db_stress_shared_state.h"
 
 namespace ROCKSDB_NAMESPACE {
-const uint32_t SharedState::UNKNOWN_SENTINEL = 0xfffffffe;
-const uint32_t SharedState::DELETION_SENTINEL = 0xffffffff;
-#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
-#if defined(OS_SOLARIS)
-__thread bool SharedState::ignore_read_error;
-#else
 thread_local bool SharedState::ignore_read_error;
-#endif // OS_SOLARIS
-#else
-bool SharedState::ignore_read_error;
-#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
 }  // namespace ROCKSDB_NAMESPACE
 #endif  // GFLAGS
diff -pruN 7.2.2-5/db_stress_tool/db_stress_shared_state.h 7.3.1-2/db_stress_tool/db_stress_shared_state.h
--- 7.2.2-5/db_stress_tool/db_stress_shared_state.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_shared_state.h	2022-06-08 21:08:16.000000000 +0000
@@ -45,23 +45,15 @@ class SharedState {
  public:
   // indicates a key may have any value (or not be present) as an operation on
   // it is incomplete.
-  static const uint32_t UNKNOWN_SENTINEL;
+  static constexpr uint32_t UNKNOWN_SENTINEL = 0xfffffffe;
   // indicates a key should definitely be deleted
-  static const uint32_t DELETION_SENTINEL;
+  static constexpr uint32_t DELETION_SENTINEL = 0xffffffff;
 
   // Errors when reading filter blocks are ignored, so we use a thread
   // local variable updated via sync points to keep track of errors injected
   // while reading filter blocks in order to ignore the Get/MultiGet result
   // for those calls
-#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
-#if defined(OS_SOLARIS)
-  static __thread bool ignore_read_error;
-#else
   static thread_local bool ignore_read_error;
-#endif // OS_SOLARIS
-#else
-  static bool ignore_read_error;
-#endif // ROCKSDB_SUPPORT_THREAD_LOCAL
 
   SharedState(Env* /*env*/, StressTest* stress_test)
       : cv_(&mu_),
@@ -81,36 +73,9 @@ class SharedState {
         stress_test_(stress_test),
         verification_failure_(false),
         should_stop_test_(false),
-        no_overwrite_ids_(FLAGS_column_families),
+        no_overwrite_ids_(GenerateNoOverwriteIds()),
         expected_state_manager_(nullptr),
         printing_verification_results_(false) {
-    // Pick random keys in each column family that will not experience
-    // overwrite
-
-    fprintf(stdout, "Choosing random keys with no overwrite\n");
-    Random64 rnd(seed_);
-    // Start with the identity permutation. Subsequent iterations of
-    // for loop below will start with perm of previous for loop
-    int64_t* permutation = new int64_t[max_key_];
-    for (int64_t i = 0; i < max_key_; i++) {
-      permutation[i] = i;
-    }
-    // Now do the Knuth shuffle
-    int64_t num_no_overwrite_keys = (max_key_ * FLAGS_nooverwritepercent) / 100;
-    // Only need to figure out first num_no_overwrite_keys of permutation
-    no_overwrite_ids_.reserve(num_no_overwrite_keys);
-    for (int64_t i = 0; i < num_no_overwrite_keys; i++) {
-      int64_t rand_index = i + rnd.Next() % (max_key_ - i);
-      // Swap i and rand_index;
-      int64_t temp = permutation[i];
-      permutation[i] = permutation[rand_index];
-      permutation[rand_index] = temp;
-      // Fill no_overwrite_ids_ with the first num_no_overwrite_keys of
-      // permutation
-      no_overwrite_ids_.insert(permutation[i]);
-    }
-    delete[] permutation;
-
     Status status;
     // TODO: We should introduce a way to explicitly disable verification
     // during shutdown. When that is disabled and FLAGS_expected_values_dir
@@ -160,13 +125,21 @@ class SharedState {
     for (int i = 0; i < FLAGS_column_families; ++i) {
       key_locks_[i].reset(new port::Mutex[num_locks]);
     }
-#ifndef NDEBUG
     if (FLAGS_read_fault_one_in) {
+#ifdef NDEBUG
+      // Unsupported in release mode because it relies on
+      // `IGNORE_STATUS_IF_ERROR` to distinguish faults not expected to lead to
+      // failure.
+      fprintf(stderr,
+              "Cannot set nonzero value for --read_fault_one_in in "
+              "release mode.");
+      exit(1);
+#else   // NDEBUG
       SyncPoint::GetInstance()->SetCallBack("FaultInjectionIgnoreError",
                                             IgnoreReadErrorCallback);
       SyncPoint::GetInstance()->EnableProcessing();
+#endif  // NDEBUG
     }
-#endif // NDEBUG
   }
 
   ~SharedState() {
@@ -293,7 +266,7 @@ class SharedState {
                                                 pending);
   }
 
-  bool AllowsOverwrite(int64_t key) {
+  bool AllowsOverwrite(int64_t key) const {
     return no_overwrite_ids_.find(key) == no_overwrite_ids_.end();
   }
 
@@ -335,6 +308,36 @@ class SharedState {
     ignore_read_error = true;
   }
 
+  // Pick random keys in each column family that will not experience overwrite.
+  std::unordered_set<int64_t> GenerateNoOverwriteIds() const {
+    fprintf(stdout, "Choosing random keys with no overwrite\n");
+    // Start with the identity permutation. Subsequent iterations of
+    // for loop below will start with perm of previous for loop
+    std::vector<int64_t> permutation(max_key_);
+    for (int64_t i = 0; i < max_key_; ++i) {
+      permutation[i] = i;
+    }
+    // Now do the Knuth shuffle
+    const int64_t num_no_overwrite_keys =
+        (max_key_ * FLAGS_nooverwritepercent) / 100;
+    // Only need to figure out first num_no_overwrite_keys of permutation
+    std::unordered_set<int64_t> ret;
+    ret.reserve(num_no_overwrite_keys);
+    Random64 rnd(seed_);
+    for (int64_t i = 0; i < num_no_overwrite_keys; i++) {
+      assert(i < max_key_);
+      int64_t rand_index = i + rnd.Next() % (max_key_ - i);
+      // Swap i and rand_index;
+      int64_t temp = permutation[i];
+      permutation[i] = permutation[rand_index];
+      permutation[rand_index] = temp;
+      // Fill no_overwrite_ids_ with the first num_no_overwrite_keys of
+      // permutation
+      ret.insert(permutation[i]);
+    }
+    return ret;
+  }
+
   port::Mutex mu_;
   port::CondVar cv_;
   const uint32_t seed_;
@@ -355,7 +358,7 @@ class SharedState {
   std::atomic<bool> should_stop_test_;
 
   // Keys that should not be overwritten
-  std::unordered_set<size_t> no_overwrite_ids_;
+  const std::unordered_set<int64_t> no_overwrite_ids_;
 
   std::unique_ptr<ExpectedStateManager> expected_state_manager_;
   // Cannot store `port::Mutex` directly in vector since it is not copyable
diff -pruN 7.2.2-5/db_stress_tool/db_stress_test_base.cc 7.3.1-2/db_stress_tool/db_stress_test_base.cc
--- 7.2.2-5/db_stress_tool/db_stress_test_base.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_test_base.cc	2022-06-08 21:08:16.000000000 +0000
@@ -8,6 +8,7 @@
 // found in the LICENSE file. See the AUTHORS file for names of contributors.
 //
 
+#include "util/compression.h"
 #ifdef GFLAGS
 #include "db_stress_tool/db_stress_common.h"
 #include "db_stress_tool/db_stress_compaction_filter.h"
@@ -57,7 +58,8 @@ std::shared_ptr<const FilterPolicy> Crea
 
 StressTest::StressTest()
     : cache_(NewCache(FLAGS_cache_size, FLAGS_cache_numshardbits)),
-      compressed_cache_(NewLRUCache(FLAGS_compressed_cache_size)),
+      compressed_cache_(NewLRUCache(FLAGS_compressed_cache_size,
+                                    FLAGS_compressed_cache_numshardbits)),
       filter_policy_(CreateFilterPolicy()),
       db_(nullptr),
 #ifndef ROCKSDB_LITE
@@ -185,69 +187,69 @@ bool StressTest::BuildOptionsTable() {
 
   std::unordered_map<std::string, std::vector<std::string>> options_tbl = {
       {"write_buffer_size",
-       {ToString(options_.write_buffer_size),
-        ToString(options_.write_buffer_size * 2),
-        ToString(options_.write_buffer_size * 4)}},
+       {std::to_string(options_.write_buffer_size),
+        std::to_string(options_.write_buffer_size * 2),
+        std::to_string(options_.write_buffer_size * 4)}},
       {"max_write_buffer_number",
-       {ToString(options_.max_write_buffer_number),
-        ToString(options_.max_write_buffer_number * 2),
-        ToString(options_.max_write_buffer_number * 4)}},
+       {std::to_string(options_.max_write_buffer_number),
+        std::to_string(options_.max_write_buffer_number * 2),
+        std::to_string(options_.max_write_buffer_number * 4)}},
       {"arena_block_size",
        {
-           ToString(options_.arena_block_size),
-           ToString(options_.write_buffer_size / 4),
-           ToString(options_.write_buffer_size / 8),
+           std::to_string(options_.arena_block_size),
+           std::to_string(options_.write_buffer_size / 4),
+           std::to_string(options_.write_buffer_size / 8),
        }},
-      {"memtable_huge_page_size", {"0", ToString(2 * 1024 * 1024)}},
+      {"memtable_huge_page_size", {"0", std::to_string(2 * 1024 * 1024)}},
       {"max_successive_merges", {"0", "2", "4"}},
       {"inplace_update_num_locks", {"100", "200", "300"}},
       // TODO(ljin): enable test for this option
       // {"disable_auto_compactions", {"100", "200", "300"}},
       {"level0_file_num_compaction_trigger",
        {
-           ToString(options_.level0_file_num_compaction_trigger),
-           ToString(options_.level0_file_num_compaction_trigger + 2),
-           ToString(options_.level0_file_num_compaction_trigger + 4),
+           std::to_string(options_.level0_file_num_compaction_trigger),
+           std::to_string(options_.level0_file_num_compaction_trigger + 2),
+           std::to_string(options_.level0_file_num_compaction_trigger + 4),
        }},
       {"level0_slowdown_writes_trigger",
        {
-           ToString(options_.level0_slowdown_writes_trigger),
-           ToString(options_.level0_slowdown_writes_trigger + 2),
-           ToString(options_.level0_slowdown_writes_trigger + 4),
+           std::to_string(options_.level0_slowdown_writes_trigger),
+           std::to_string(options_.level0_slowdown_writes_trigger + 2),
+           std::to_string(options_.level0_slowdown_writes_trigger + 4),
        }},
       {"level0_stop_writes_trigger",
        {
-           ToString(options_.level0_stop_writes_trigger),
-           ToString(options_.level0_stop_writes_trigger + 2),
-           ToString(options_.level0_stop_writes_trigger + 4),
+           std::to_string(options_.level0_stop_writes_trigger),
+           std::to_string(options_.level0_stop_writes_trigger + 2),
+           std::to_string(options_.level0_stop_writes_trigger + 4),
        }},
       {"max_compaction_bytes",
        {
-           ToString(options_.target_file_size_base * 5),
-           ToString(options_.target_file_size_base * 15),
-           ToString(options_.target_file_size_base * 100),
+           std::to_string(options_.target_file_size_base * 5),
+           std::to_string(options_.target_file_size_base * 15),
+           std::to_string(options_.target_file_size_base * 100),
        }},
       {"target_file_size_base",
        {
-           ToString(options_.target_file_size_base),
-           ToString(options_.target_file_size_base * 2),
-           ToString(options_.target_file_size_base * 4),
+           std::to_string(options_.target_file_size_base),
+           std::to_string(options_.target_file_size_base * 2),
+           std::to_string(options_.target_file_size_base * 4),
        }},
       {"target_file_size_multiplier",
        {
-           ToString(options_.target_file_size_multiplier),
+           std::to_string(options_.target_file_size_multiplier),
            "1",
            "2",
        }},
       {"max_bytes_for_level_base",
        {
-           ToString(options_.max_bytes_for_level_base / 2),
-           ToString(options_.max_bytes_for_level_base),
-           ToString(options_.max_bytes_for_level_base * 2),
+           std::to_string(options_.max_bytes_for_level_base / 2),
+           std::to_string(options_.max_bytes_for_level_base),
+           std::to_string(options_.max_bytes_for_level_base * 2),
        }},
       {"max_bytes_for_level_multiplier",
        {
-           ToString(options_.max_bytes_for_level_multiplier),
+           std::to_string(options_.max_bytes_for_level_multiplier),
            "1",
            "2",
        }},
@@ -281,12 +283,12 @@ bool StressTest::BuildOptionsTable() {
   return true;
 }
 
-void StressTest::InitDb() {
+void StressTest::InitDb(SharedState* shared) {
   uint64_t now = clock_->NowMicros();
   fprintf(stdout, "%s Initializing db_stress\n",
           clock_->TimeToString(now / 1000000).c_str());
   PrintEnv();
-  Open();
+  Open(shared);
   BuildOptionsTable();
 }
 
@@ -418,7 +420,7 @@ Status StressTest::AssertSame(DB* db, Co
   if (snap_state.status != s) {
     return Status::Corruption(
         "The snapshot gave inconsistent results for key " +
-        ToString(Hash(snap_state.key.c_str(), snap_state.key.size(), 0)) +
+        std::to_string(Hash(snap_state.key.c_str(), snap_state.key.size(), 0)) +
         " in cf " + cf->GetName() + ": (" + snap_state.status.ToString() +
         ") vs. (" + s.ToString() + ")");
   }
@@ -568,7 +570,7 @@ void StressTest::PreloadDbAndReopenAsRea
     fprintf(stdout, "%s Reopening database in read-only\n",
             clock_->TimeToString(now / 1000000).c_str());
     // Reopen as read-only, can ignore all options related to updates
-    Open();
+    Open(shared);
   } else {
     fprintf(stderr, "Failed to preload db");
     exit(1);
@@ -674,6 +676,7 @@ void StressTest::OperateDb(ThreadState*
     fault_fs_guard->SetThreadLocalReadErrorContext(thread->shared->GetSeed(),
                                             FLAGS_read_fault_one_in);
   }
+#endif  // NDEBUG
   if (FLAGS_write_fault_one_in) {
     IOStatus error_msg;
     if (FLAGS_injest_error_severity <= 1 || FLAGS_injest_error_severity > 2) {
@@ -691,7 +694,6 @@ void StressTest::OperateDb(ThreadState*
         thread->shared->GetSeed(), FLAGS_write_fault_one_in, error_msg,
         /*inject_for_all_file_types=*/false, types);
   }
-#endif // NDEBUG
   thread->stats.Start();
   for (int open_cnt = 0; open_cnt <= FLAGS_reopen; ++open_cnt) {
     if (thread->shared->HasVerificationFailedYet() ||
@@ -1424,8 +1426,10 @@ void StressTest::TestCompactFiles(Thread
 Status StressTest::TestBackupRestore(
     ThreadState* thread, const std::vector<int>& rand_column_families,
     const std::vector<int64_t>& rand_keys) {
-  std::string backup_dir = FLAGS_db + "/.backup" + ToString(thread->tid);
-  std::string restore_dir = FLAGS_db + "/.restore" + ToString(thread->tid);
+  const std::string backup_dir =
+      FLAGS_db + "/.backup" + std::to_string(thread->tid);
+  const std::string restore_dir =
+      FLAGS_db + "/.restore" + std::to_string(thread->tid);
   BackupEngineOptions backup_opts(backup_dir);
   // For debugging, get info_log from live options
   backup_opts.info_log = db_->GetDBOptions().info_log.get();
@@ -1557,6 +1561,7 @@ Status StressTest::TestBackupRestore(
   // Not yet implemented: opening restored BlobDB or TransactionDB
   if (s.ok() && !FLAGS_use_txn && !FLAGS_use_blob_db) {
     Options restore_options(options_);
+    restore_options.best_efforts_recovery = false;
     restore_options.listeners.clear();
     // Avoid dangling/shared file descriptors, for reliable destroy
     restore_options.sst_file_manager = nullptr;
@@ -1613,11 +1618,17 @@ Status StressTest::TestBackupRestore(
     bool exists = thread->shared->Exists(rand_column_families[i], rand_keys[0]);
     if (get_status.ok()) {
       if (!exists && from_latest && ShouldAcquireMutexOnKey()) {
-        s = Status::Corruption("key exists in restore but not in original db");
+        std::ostringstream oss;
+        oss << "0x" << key.ToString(true)
+            << " exists in restore but not in original db";
+        s = Status::Corruption(oss.str());
       }
     } else if (get_status.IsNotFound()) {
       if (exists && from_latest && ShouldAcquireMutexOnKey()) {
-        s = Status::Corruption("key exists in original db but not in restore");
+        std::ostringstream oss;
+        oss << "0x" << key.ToString(true)
+            << " exists in original db but not in restore";
+        s = Status::Corruption(oss.str());
       }
     } else {
       s = get_status;
@@ -1717,7 +1728,7 @@ Status StressTest::TestCheckpoint(Thread
                                   const std::vector<int>& rand_column_families,
                                   const std::vector<int64_t>& rand_keys) {
   std::string checkpoint_dir =
-      FLAGS_db + "/.checkpoint" + ToString(thread->tid);
+      FLAGS_db + "/.checkpoint" + std::to_string(thread->tid);
   Options tmp_opts(options_);
   tmp_opts.listeners.clear();
   tmp_opts.env = db_stress_env;
@@ -1759,6 +1770,7 @@ Status StressTest::TestCheckpoint(Thread
   DB* checkpoint_db = nullptr;
   if (s.ok()) {
     Options options(options_);
+    options.best_efforts_recovery = false;
     options.listeners.clear();
     // Avoid race condition in trash handling after delete checkpoint_db
     options.sst_file_manager.reset();
@@ -1790,13 +1802,18 @@ Status StressTest::TestCheckpoint(Thread
           thread->shared->Exists(rand_column_families[i], rand_keys[0]);
       if (get_status.ok()) {
         if (!exists && ShouldAcquireMutexOnKey()) {
-          s = Status::Corruption(
-              "key exists in checkpoint but not in original db");
+          std::ostringstream oss;
+          oss << "0x" << key.ToString(true) << " exists in checkpoint "
+              << checkpoint_dir << " but not in original db";
+          s = Status::Corruption(oss.str());
         }
       } else if (get_status.IsNotFound()) {
         if (exists && ShouldAcquireMutexOnKey()) {
-          s = Status::Corruption(
-              "key exists in original db but not in checkpoint");
+          std::ostringstream oss;
+          oss << "0x" << key.ToString(true)
+              << " exists in original db but not in checkpoint "
+              << checkpoint_dir;
+          s = Status::Corruption(oss.str());
         }
       } else {
         s = get_status;
@@ -2029,11 +2046,11 @@ void StressTest::TestAcquireSnapshot(Thr
   if (FLAGS_long_running_snapshots) {
     // Hold 10% of snapshots for 10x more
     if (thread->rand.OneIn(10)) {
-      assert(hold_for < port::kMaxInt64 / 10);
+      assert(hold_for < std::numeric_limits<uint64_t>::max() / 10);
       hold_for *= 10;
       // Hold 1% of snapshots for 100x more
       if (thread->rand.OneIn(10)) {
-        assert(hold_for < port::kMaxInt64 / 10);
+        assert(hold_for < std::numeric_limits<uint64_t>::max() / 10);
         hold_for *= 10;
       }
     }
@@ -2065,8 +2082,9 @@ void StressTest::TestCompactRange(Thread
                                   const Slice& start_key,
                                   ColumnFamilyHandle* column_family) {
   int64_t end_key_num;
-  if (port::kMaxInt64 - rand_key < FLAGS_compact_range_width) {
-    end_key_num = port::kMaxInt64;
+  if (std::numeric_limits<int64_t>::max() - rand_key <
+      FLAGS_compact_range_width) {
+    end_key_num = std::numeric_limits<int64_t>::max();
   } else {
     end_key_num = FLAGS_compact_range_width + rand_key;
   }
@@ -2203,7 +2221,7 @@ void StressTest::PrintEnv() const {
           (unsigned long)FLAGS_ops_per_thread);
   std::string ttl_state("unused");
   if (FLAGS_ttl > 0) {
-    ttl_state = ToString(FLAGS_ttl);
+    ttl_state = std::to_string(FLAGS_ttl);
   }
   fprintf(stdout, "Time to live(sec)         : %s\n", ttl_state.c_str());
   fprintf(stdout, "Read percentage           : %d%%\n", FLAGS_readpercent);
@@ -2298,192 +2316,24 @@ void StressTest::PrintEnv() const {
           static_cast<int>(FLAGS_user_timestamp_size));
   fprintf(stdout, "WAL compression           : %s\n",
           FLAGS_wal_compression.c_str());
+  fprintf(stdout, "Try verify sst unique id  : %d\n",
+          static_cast<int>(FLAGS_verify_sst_unique_id_in_manifest));
 
   fprintf(stdout, "------------------------------------------------\n");
 }
 
-void StressTest::Open() {
+void StressTest::Open(SharedState* shared) {
   assert(db_ == nullptr);
 #ifndef ROCKSDB_LITE
   assert(txn_db_ == nullptr);
-#endif
-  if (FLAGS_options_file.empty()) {
-    BlockBasedTableOptions block_based_options;
-    block_based_options.block_cache = cache_;
-    block_based_options.cache_index_and_filter_blocks =
-        FLAGS_cache_index_and_filter_blocks;
-    block_based_options.metadata_cache_options.top_level_index_pinning =
-        static_cast<PinningTier>(FLAGS_top_level_index_pinning);
-    block_based_options.metadata_cache_options.partition_pinning =
-        static_cast<PinningTier>(FLAGS_partition_pinning);
-    block_based_options.metadata_cache_options.unpartitioned_pinning =
-        static_cast<PinningTier>(FLAGS_unpartitioned_pinning);
-    block_based_options.block_cache_compressed = compressed_cache_;
-    block_based_options.checksum = checksum_type_e;
-    block_based_options.block_size = FLAGS_block_size;
-    block_based_options.reserve_table_reader_memory =
-        FLAGS_reserve_table_reader_memory;
-    block_based_options.format_version =
-        static_cast<uint32_t>(FLAGS_format_version);
-    block_based_options.index_block_restart_interval =
-        static_cast<int32_t>(FLAGS_index_block_restart_interval);
-    block_based_options.filter_policy = filter_policy_;
-    block_based_options.partition_filters = FLAGS_partition_filters;
-    block_based_options.optimize_filters_for_memory =
-        FLAGS_optimize_filters_for_memory;
-    block_based_options.detect_filter_construct_corruption =
-        FLAGS_detect_filter_construct_corruption;
-    block_based_options.index_type =
-        static_cast<BlockBasedTableOptions::IndexType>(FLAGS_index_type);
-    block_based_options.prepopulate_block_cache =
-        static_cast<BlockBasedTableOptions::PrepopulateBlockCache>(
-            FLAGS_prepopulate_block_cache);
-    options_.table_factory.reset(
-        NewBlockBasedTableFactory(block_based_options));
-    options_.db_write_buffer_size = FLAGS_db_write_buffer_size;
-    options_.write_buffer_size = FLAGS_write_buffer_size;
-    options_.max_write_buffer_number = FLAGS_max_write_buffer_number;
-    options_.min_write_buffer_number_to_merge =
-        FLAGS_min_write_buffer_number_to_merge;
-    options_.max_write_buffer_number_to_maintain =
-        FLAGS_max_write_buffer_number_to_maintain;
-    options_.max_write_buffer_size_to_maintain =
-        FLAGS_max_write_buffer_size_to_maintain;
-    options_.memtable_prefix_bloom_size_ratio =
-        FLAGS_memtable_prefix_bloom_size_ratio;
-    options_.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering;
-    options_.disable_auto_compactions = FLAGS_disable_auto_compactions;
-    options_.max_background_compactions = FLAGS_max_background_compactions;
-    options_.max_background_flushes = FLAGS_max_background_flushes;
-    options_.compaction_style =
-        static_cast<ROCKSDB_NAMESPACE::CompactionStyle>(FLAGS_compaction_style);
-    if (FLAGS_prefix_size >= 0) {
-      options_.prefix_extractor.reset(
-          NewFixedPrefixTransform(FLAGS_prefix_size));
-    }
-    options_.max_open_files = FLAGS_open_files;
-    options_.statistics = dbstats;
-    options_.env = db_stress_env;
-    options_.use_fsync = FLAGS_use_fsync;
-    options_.compaction_readahead_size = FLAGS_compaction_readahead_size;
-    options_.allow_mmap_reads = FLAGS_mmap_read;
-    options_.allow_mmap_writes = FLAGS_mmap_write;
-    options_.use_direct_reads = FLAGS_use_direct_reads;
-    options_.use_direct_io_for_flush_and_compaction =
-        FLAGS_use_direct_io_for_flush_and_compaction;
-    options_.recycle_log_file_num =
-        static_cast<size_t>(FLAGS_recycle_log_file_num);
-    options_.target_file_size_base = FLAGS_target_file_size_base;
-    options_.target_file_size_multiplier = FLAGS_target_file_size_multiplier;
-    options_.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
-    options_.max_bytes_for_level_multiplier =
-        FLAGS_max_bytes_for_level_multiplier;
-    options_.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger;
-    options_.level0_slowdown_writes_trigger =
-        FLAGS_level0_slowdown_writes_trigger;
-    options_.level0_file_num_compaction_trigger =
-        FLAGS_level0_file_num_compaction_trigger;
-    options_.compression = compression_type_e;
-    options_.bottommost_compression = bottommost_compression_type_e;
-    options_.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes;
-    options_.compression_opts.zstd_max_train_bytes =
-        FLAGS_compression_zstd_max_train_bytes;
-    options_.compression_opts.parallel_threads =
-        FLAGS_compression_parallel_threads;
-    options_.compression_opts.max_dict_buffer_bytes =
-        FLAGS_compression_max_dict_buffer_bytes;
-    options_.create_if_missing = true;
-    options_.max_manifest_file_size = FLAGS_max_manifest_file_size;
-    options_.inplace_update_support = FLAGS_in_place_update;
-    options_.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
-    options_.allow_concurrent_memtable_write =
-        FLAGS_allow_concurrent_memtable_write;
-    options_.experimental_mempurge_threshold =
-        FLAGS_experimental_mempurge_threshold;
-    options_.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds;
-    options_.ttl = FLAGS_compaction_ttl;
-    options_.enable_pipelined_write = FLAGS_enable_pipelined_write;
-    options_.enable_write_thread_adaptive_yield =
-        FLAGS_enable_write_thread_adaptive_yield;
-    options_.compaction_options_universal.size_ratio =
-        FLAGS_universal_size_ratio;
-    options_.compaction_options_universal.min_merge_width =
-        FLAGS_universal_min_merge_width;
-    options_.compaction_options_universal.max_merge_width =
-        FLAGS_universal_max_merge_width;
-    options_.compaction_options_universal.max_size_amplification_percent =
-        FLAGS_universal_max_size_amplification_percent;
-    options_.atomic_flush = FLAGS_atomic_flush;
-    options_.avoid_unnecessary_blocking_io =
-        FLAGS_avoid_unnecessary_blocking_io;
-    options_.write_dbid_to_manifest = FLAGS_write_dbid_to_manifest;
-    options_.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery;
-    options_.max_write_batch_group_size_bytes =
-        FLAGS_max_write_batch_group_size_bytes;
-    options_.level_compaction_dynamic_level_bytes =
-        FLAGS_level_compaction_dynamic_level_bytes;
-    options_.file_checksum_gen_factory =
-        GetFileChecksumImpl(FLAGS_file_checksum_impl);
-    options_.track_and_verify_wals_in_manifest = true;
-
-    // Integrated BlobDB
-    options_.enable_blob_files = FLAGS_enable_blob_files;
-    options_.min_blob_size = FLAGS_min_blob_size;
-    options_.blob_file_size = FLAGS_blob_file_size;
-    options_.blob_compression_type =
-        StringToCompressionType(FLAGS_blob_compression_type.c_str());
-    options_.enable_blob_garbage_collection =
-        FLAGS_enable_blob_garbage_collection;
-    options_.blob_garbage_collection_age_cutoff =
-        FLAGS_blob_garbage_collection_age_cutoff;
-    options_.blob_garbage_collection_force_threshold =
-        FLAGS_blob_garbage_collection_force_threshold;
-    options_.blob_compaction_readahead_size =
-        FLAGS_blob_compaction_readahead_size;
-
-    options_.wal_compression =
-        StringToCompressionType(FLAGS_wal_compression.c_str());
-  } else {
-#ifdef ROCKSDB_LITE
-    fprintf(stderr, "--options_file not supported in lite mode\n");
-    exit(1);
 #else
-    DBOptions db_options;
-    std::vector<ColumnFamilyDescriptor> cf_descriptors;
-    Status s = LoadOptionsFromFile(FLAGS_options_file, db_stress_env,
-                                   &db_options, &cf_descriptors);
-    db_options.env = new DbStressEnvWrapper(db_stress_env);
-    if (!s.ok()) {
-      fprintf(stderr, "Unable to load options file %s --- %s\n",
-              FLAGS_options_file.c_str(), s.ToString().c_str());
-      exit(1);
-    }
-    options_ = Options(db_options, cf_descriptors[0].options);
-#endif  // ROCKSDB_LITE
-  }
-
-  if (FLAGS_rate_limiter_bytes_per_sec > 0) {
-    options_.rate_limiter.reset(NewGenericRateLimiter(
-        FLAGS_rate_limiter_bytes_per_sec, 1000 /* refill_period_us */,
-        10 /* fairness */,
-        FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly
-                                  : RateLimiter::Mode::kWritesOnly));
-  }
-  if (FLAGS_sst_file_manager_bytes_per_sec > 0 ||
-      FLAGS_sst_file_manager_bytes_per_truncate > 0) {
-    Status status;
-    options_.sst_file_manager.reset(NewSstFileManager(
-        db_stress_env, options_.info_log, "" /* trash_dir */,
-        static_cast<int64_t>(FLAGS_sst_file_manager_bytes_per_sec),
-        true /* delete_existing_trash */, &status,
-        0.25 /* max_trash_db_ratio */,
-        FLAGS_sst_file_manager_bytes_per_truncate));
-    if (!status.ok()) {
-      fprintf(stderr, "SstFileManager creation failed: %s\n",
-              status.ToString().c_str());
-      exit(1);
-    }
+  (void)shared;
+#endif
+  if (!InitializeOptionsFromFile(options_)) {
+    InitializeOptionsFromFlags(cache_, compressed_cache_, filter_policy_,
+                               options_);
   }
+  InitializeOptionsGeneral(cache_, compressed_cache_, filter_policy_, options_);
 
   if (FLAGS_prefix_size == 0 && FLAGS_rep_factory == kHashSkipList) {
     fprintf(stderr,
@@ -2495,40 +2345,6 @@ void StressTest::Open() {
             "WARNING: prefix_size is non-zero but "
             "memtablerep != prefix_hash\n");
   }
-  switch (FLAGS_rep_factory) {
-    case kSkipList:
-      // no need to do anything
-      break;
-#ifndef ROCKSDB_LITE
-    case kHashSkipList:
-      options_.memtable_factory.reset(NewHashSkipListRepFactory(10000));
-      break;
-    case kVectorRep:
-      options_.memtable_factory.reset(new VectorRepFactory());
-      break;
-#else
-    default:
-      fprintf(stderr,
-              "RocksdbLite only supports skip list mem table. Skip "
-              "--rep_factory\n");
-#endif  // ROCKSDB_LITE
-  }
-
-  if (FLAGS_use_full_merge_v1) {
-    options_.merge_operator = MergeOperators::CreateDeprecatedPutOperator();
-  } else {
-    options_.merge_operator = MergeOperators::CreatePutOperator();
-  }
-  if (FLAGS_enable_compaction_filter) {
-    options_.compaction_filter_factory =
-        std::make_shared<DbStressCompactionFilterFactory>();
-  }
-  options_.table_properties_collector_factories.emplace_back(
-      std::make_shared<DbStressTablePropertiesCollectorFactory>());
-
-  options_.best_efforts_recovery = FLAGS_best_efforts_recovery;
-  options_.paranoid_file_checks = FLAGS_paranoid_file_checks;
-  options_.fail_if_options_file_error = FLAGS_fail_if_options_file_error;
 
   if ((options_.enable_blob_files || options_.enable_blob_garbage_collection ||
        FLAGS_allow_setting_blob_options_dynamically) &&
@@ -2556,10 +2372,6 @@ void StressTest::Open() {
 
   Status s;
 
-  if (FLAGS_user_timestamp_size > 0) {
-    CheckAndSetOptionsForUserTimestamp();
-  }
-
   if (FLAGS_ttl == -1) {
     std::vector<std::string> existing_column_families;
     s = DB::ListColumnFamilies(DBOptions(options_), FLAGS_db,
@@ -2603,19 +2415,20 @@ void StressTest::Open() {
       cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_));
     }
     while (cf_descriptors.size() < (size_t)FLAGS_column_families) {
-      std::string name = ToString(new_column_family_name_.load());
+      std::string name = std::to_string(new_column_family_name_.load());
       new_column_family_name_++;
       cf_descriptors.emplace_back(name, ColumnFamilyOptions(options_));
       column_family_names_.push_back(name);
     }
+
     options_.listeners.clear();
 #ifndef ROCKSDB_LITE
     options_.listeners.emplace_back(new DbStressListener(
         FLAGS_db, options_.db_paths, cf_descriptors, db_stress_listener_env));
 #endif  // !ROCKSDB_LITE
-    options_.create_missing_column_families = true;
+    RegisterAdditionalListeners();
+
     if (!FLAGS_use_txn) {
-#ifndef NDEBUG
       // Determine whether we need to ingest file metadata write failures
       // during DB reopen. If it does, enable it.
       // Only ingest metadata error if it is reopening, as initial open
@@ -2657,7 +2470,6 @@ void StressTest::Open() {
         }
       }
       while (true) {
-#endif  // NDEBUG
 #ifndef ROCKSDB_LITE
         // StackableDB-based BlobDB
         if (FLAGS_use_blob_db) {
@@ -2687,7 +2499,6 @@ void StressTest::Open() {
           }
         }
 
-#ifndef NDEBUG
         if (ingest_meta_error || ingest_write_error || ingest_read_error) {
           fault_fs_guard->SetFilesystemDirectWritable(true);
           fault_fs_guard->DisableMetadataWriteErrorInjection();
@@ -2699,7 +2510,7 @@ void StressTest::Open() {
             // wait for all compactions to finish to make sure DB is in
             // clean state before executing queries.
             s = static_cast_with_check<DBImpl>(db_->GetRootDB())
-                    ->TEST_WaitForCompact(true);
+                    ->WaitForCompact(true /* wait_unscheduled */);
             if (!s.ok()) {
               for (auto cf : column_families_) {
                 delete cf;
@@ -2732,7 +2543,6 @@ void StressTest::Open() {
         }
         break;
       }
-#endif  // NDEBUG
     } else {
 #ifndef ROCKSDB_LITE
       TransactionDBOptions txn_db_options;
@@ -2751,6 +2561,7 @@ void StressTest::Open() {
           static_cast<size_t>(FLAGS_wp_snapshot_cache_bits);
       txn_db_options.wp_commit_cache_bits =
           static_cast<size_t>(FLAGS_wp_commit_cache_bits);
+      PrepareTxnDbOptions(shared, txn_db_options);
       s = TransactionDB::Open(options_, txn_db_options, FLAGS_db,
                               cf_descriptors, &column_families_, &txn_db_);
       if (!s.ok()) {
@@ -2910,7 +2721,7 @@ void StressTest::Reopen(ThreadState* thr
   auto now = clock_->NowMicros();
   fprintf(stdout, "%s Reopening database for the %dth time\n",
           clock_->TimeToString(now / 1000000).c_str(), num_times_reopened_);
-  Open();
+  Open(thread->shared);
 
   if ((FLAGS_sync_fault_injection || FLAGS_disable_wal) && IsStateTracked()) {
     Status s = thread->shared->SaveAtAndAfter(db_);
@@ -2922,7 +2733,7 @@ void StressTest::Reopen(ThreadState* thr
   }
 }
 
-void StressTest::CheckAndSetOptionsForUserTimestamp() {
+void CheckAndSetOptionsForUserTimestamp(Options& options) {
   assert(FLAGS_user_timestamp_size > 0);
   const Comparator* const cmp = test::BytewiseComparatorWithU64TsWrapper();
   assert(cmp);
@@ -2980,7 +2791,295 @@ void StressTest::CheckAndSetOptionsForUs
     fprintf(stderr, "Bulk loading may not support timestamp yet.\n");
     exit(1);
   }
-  options_.comparator = cmp;
+  options.comparator = cmp;
+}
+
+bool InitializeOptionsFromFile(Options& options) {
+#ifndef ROCKSDB_LITE
+  DBOptions db_options;
+  std::vector<ColumnFamilyDescriptor> cf_descriptors;
+  if (!FLAGS_options_file.empty()) {
+    Status s = LoadOptionsFromFile(FLAGS_options_file, db_stress_env,
+                                   &db_options, &cf_descriptors);
+    if (!s.ok()) {
+      fprintf(stderr, "Unable to load options file %s --- %s\n",
+              FLAGS_options_file.c_str(), s.ToString().c_str());
+      exit(1);
+    }
+    db_options.env = new DbStressEnvWrapper(db_stress_env);
+    options = Options(db_options, cf_descriptors[0].options);
+    return true;
+  }
+#else
+  (void)options;
+  fprintf(stderr, "--options_file not supported in lite mode\n");
+  exit(1);
+#endif  //! ROCKSDB_LITE
+  return false;
+}
+
+void InitializeOptionsFromFlags(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<Cache>& block_cache_compressed,
+    const std::shared_ptr<const FilterPolicy>& filter_policy,
+    Options& options) {
+  BlockBasedTableOptions block_based_options;
+  block_based_options.block_cache = cache;
+  block_based_options.cache_index_and_filter_blocks =
+      FLAGS_cache_index_and_filter_blocks;
+  block_based_options.metadata_cache_options.top_level_index_pinning =
+      static_cast<PinningTier>(FLAGS_top_level_index_pinning);
+  block_based_options.metadata_cache_options.partition_pinning =
+      static_cast<PinningTier>(FLAGS_partition_pinning);
+  block_based_options.metadata_cache_options.unpartitioned_pinning =
+      static_cast<PinningTier>(FLAGS_unpartitioned_pinning);
+  block_based_options.block_cache_compressed = block_cache_compressed;
+  block_based_options.checksum = checksum_type_e;
+  block_based_options.block_size = FLAGS_block_size;
+  block_based_options.cache_usage_options.options_overrides.insert(
+      {CacheEntryRole::kCompressionDictionaryBuildingBuffer,
+       {/*.charged = */ FLAGS_charge_compression_dictionary_building_buffer
+            ? CacheEntryRoleOptions::Decision::kEnabled
+            : CacheEntryRoleOptions::Decision::kDisabled}});
+  block_based_options.cache_usage_options.options_overrides.insert(
+      {CacheEntryRole::kFilterConstruction,
+       {/*.charged = */ FLAGS_charge_filter_construction
+            ? CacheEntryRoleOptions::Decision::kEnabled
+            : CacheEntryRoleOptions::Decision::kDisabled}});
+  block_based_options.cache_usage_options.options_overrides.insert(
+      {CacheEntryRole::kBlockBasedTableReader,
+       {/*.charged = */ FLAGS_charge_table_reader
+            ? CacheEntryRoleOptions::Decision::kEnabled
+            : CacheEntryRoleOptions::Decision::kDisabled}});
+  block_based_options.format_version =
+      static_cast<uint32_t>(FLAGS_format_version);
+  block_based_options.index_block_restart_interval =
+      static_cast<int32_t>(FLAGS_index_block_restart_interval);
+  block_based_options.filter_policy = filter_policy;
+  block_based_options.partition_filters = FLAGS_partition_filters;
+  block_based_options.optimize_filters_for_memory =
+      FLAGS_optimize_filters_for_memory;
+  block_based_options.detect_filter_construct_corruption =
+      FLAGS_detect_filter_construct_corruption;
+  block_based_options.index_type =
+      static_cast<BlockBasedTableOptions::IndexType>(FLAGS_index_type);
+  block_based_options.prepopulate_block_cache =
+      static_cast<BlockBasedTableOptions::PrepopulateBlockCache>(
+          FLAGS_prepopulate_block_cache);
+  options.table_factory.reset(NewBlockBasedTableFactory(block_based_options));
+  options.db_write_buffer_size = FLAGS_db_write_buffer_size;
+  options.write_buffer_size = FLAGS_write_buffer_size;
+  options.max_write_buffer_number = FLAGS_max_write_buffer_number;
+  options.min_write_buffer_number_to_merge =
+      FLAGS_min_write_buffer_number_to_merge;
+  options.max_write_buffer_number_to_maintain =
+      FLAGS_max_write_buffer_number_to_maintain;
+  options.max_write_buffer_size_to_maintain =
+      FLAGS_max_write_buffer_size_to_maintain;
+  options.memtable_prefix_bloom_size_ratio =
+      FLAGS_memtable_prefix_bloom_size_ratio;
+  options.memtable_whole_key_filtering = FLAGS_memtable_whole_key_filtering;
+  options.disable_auto_compactions = FLAGS_disable_auto_compactions;
+  options.max_background_compactions = FLAGS_max_background_compactions;
+  options.max_background_flushes = FLAGS_max_background_flushes;
+  options.compaction_style =
+      static_cast<ROCKSDB_NAMESPACE::CompactionStyle>(FLAGS_compaction_style);
+  if (FLAGS_prefix_size >= 0) {
+    options.prefix_extractor.reset(NewFixedPrefixTransform(FLAGS_prefix_size));
+  }
+  options.max_open_files = FLAGS_open_files;
+  options.statistics = dbstats;
+  options.env = db_stress_env;
+  options.use_fsync = FLAGS_use_fsync;
+  options.compaction_readahead_size = FLAGS_compaction_readahead_size;
+  options.allow_mmap_reads = FLAGS_mmap_read;
+  options.allow_mmap_writes = FLAGS_mmap_write;
+  options.use_direct_reads = FLAGS_use_direct_reads;
+  options.use_direct_io_for_flush_and_compaction =
+      FLAGS_use_direct_io_for_flush_and_compaction;
+  options.recycle_log_file_num =
+      static_cast<size_t>(FLAGS_recycle_log_file_num);
+  options.target_file_size_base = FLAGS_target_file_size_base;
+  options.target_file_size_multiplier = FLAGS_target_file_size_multiplier;
+  options.max_bytes_for_level_base = FLAGS_max_bytes_for_level_base;
+  options.max_bytes_for_level_multiplier = FLAGS_max_bytes_for_level_multiplier;
+  options.level0_stop_writes_trigger = FLAGS_level0_stop_writes_trigger;
+  options.level0_slowdown_writes_trigger = FLAGS_level0_slowdown_writes_trigger;
+  options.level0_file_num_compaction_trigger =
+      FLAGS_level0_file_num_compaction_trigger;
+  options.compression = compression_type_e;
+  options.bottommost_compression = bottommost_compression_type_e;
+  options.compression_opts.max_dict_bytes = FLAGS_compression_max_dict_bytes;
+  options.compression_opts.zstd_max_train_bytes =
+      FLAGS_compression_zstd_max_train_bytes;
+  options.compression_opts.parallel_threads =
+      FLAGS_compression_parallel_threads;
+  options.compression_opts.max_dict_buffer_bytes =
+      FLAGS_compression_max_dict_buffer_bytes;
+  if (ZSTD_FinalizeDictionarySupported()) {
+    options.compression_opts.use_zstd_dict_trainer =
+        FLAGS_compression_use_zstd_dict_trainer;
+  } else if (!FLAGS_compression_use_zstd_dict_trainer) {
+    fprintf(
+        stderr,
+        "WARNING: use_zstd_dict_trainer is false but zstd finalizeDictionary "
+        "cannot be used because ZSTD 1.4.5+ is not linked with the binary."
+        " zstd dictionary trainer will be used.\n");
+  }
+  options.max_manifest_file_size = FLAGS_max_manifest_file_size;
+  options.inplace_update_support = FLAGS_in_place_update;
+  options.max_subcompactions = static_cast<uint32_t>(FLAGS_subcompactions);
+  options.allow_concurrent_memtable_write =
+      FLAGS_allow_concurrent_memtable_write;
+  options.experimental_mempurge_threshold =
+      FLAGS_experimental_mempurge_threshold;
+  options.periodic_compaction_seconds = FLAGS_periodic_compaction_seconds;
+  options.ttl = FLAGS_compaction_ttl;
+  options.enable_pipelined_write = FLAGS_enable_pipelined_write;
+  options.enable_write_thread_adaptive_yield =
+      FLAGS_enable_write_thread_adaptive_yield;
+  options.compaction_options_universal.size_ratio = FLAGS_universal_size_ratio;
+  options.compaction_options_universal.min_merge_width =
+      FLAGS_universal_min_merge_width;
+  options.compaction_options_universal.max_merge_width =
+      FLAGS_universal_max_merge_width;
+  options.compaction_options_universal.max_size_amplification_percent =
+      FLAGS_universal_max_size_amplification_percent;
+  options.atomic_flush = FLAGS_atomic_flush;
+  options.avoid_unnecessary_blocking_io = FLAGS_avoid_unnecessary_blocking_io;
+  options.write_dbid_to_manifest = FLAGS_write_dbid_to_manifest;
+  options.avoid_flush_during_recovery = FLAGS_avoid_flush_during_recovery;
+  options.max_write_batch_group_size_bytes =
+      FLAGS_max_write_batch_group_size_bytes;
+  options.level_compaction_dynamic_level_bytes =
+      FLAGS_level_compaction_dynamic_level_bytes;
+  options.track_and_verify_wals_in_manifest = true;
+  options.verify_sst_unique_id_in_manifest =
+      FLAGS_verify_sst_unique_id_in_manifest;
+
+  // Integrated BlobDB
+  options.enable_blob_files = FLAGS_enable_blob_files;
+  options.min_blob_size = FLAGS_min_blob_size;
+  options.blob_file_size = FLAGS_blob_file_size;
+  options.blob_compression_type =
+      StringToCompressionType(FLAGS_blob_compression_type.c_str());
+  options.enable_blob_garbage_collection = FLAGS_enable_blob_garbage_collection;
+  options.blob_garbage_collection_age_cutoff =
+      FLAGS_blob_garbage_collection_age_cutoff;
+  options.blob_garbage_collection_force_threshold =
+      FLAGS_blob_garbage_collection_force_threshold;
+  options.blob_compaction_readahead_size = FLAGS_blob_compaction_readahead_size;
+
+  options.wal_compression =
+      StringToCompressionType(FLAGS_wal_compression.c_str());
+
+  switch (FLAGS_rep_factory) {
+    case kSkipList:
+      // no need to do anything
+      break;
+#ifndef ROCKSDB_LITE
+    case kHashSkipList:
+      options.memtable_factory.reset(NewHashSkipListRepFactory(10000));
+      break;
+    case kVectorRep:
+      options.memtable_factory.reset(new VectorRepFactory());
+      break;
+#else
+    default:
+      fprintf(stderr,
+              "RocksdbLite only supports skip list mem table. Skip "
+              "--rep_factory\n");
+#endif  // ROCKSDB_LITE
+  }
+
+  if (FLAGS_use_full_merge_v1) {
+    options.merge_operator = MergeOperators::CreateDeprecatedPutOperator();
+  } else {
+    options.merge_operator = MergeOperators::CreatePutOperator();
+  }
+
+  if (FLAGS_enable_compaction_filter) {
+    options.compaction_filter_factory =
+        std::make_shared<DbStressCompactionFilterFactory>();
+  }
+
+  options.best_efforts_recovery = FLAGS_best_efforts_recovery;
+  options.paranoid_file_checks = FLAGS_paranoid_file_checks;
+  options.fail_if_options_file_error = FLAGS_fail_if_options_file_error;
+
+  if (FLAGS_user_timestamp_size > 0) {
+    CheckAndSetOptionsForUserTimestamp(options);
+  }
 }
+
+void InitializeOptionsGeneral(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<Cache>& block_cache_compressed,
+    const std::shared_ptr<const FilterPolicy>& filter_policy,
+    Options& options) {
+  options.create_missing_column_families = true;
+  options.create_if_missing = true;
+
+  if (!options.statistics) {
+    options.statistics = dbstats;
+  }
+
+  if (options.env == Options().env) {
+    options.env = db_stress_env;
+  }
+
+  assert(options.table_factory);
+  auto table_options =
+      options.table_factory->GetOptions<BlockBasedTableOptions>();
+  if (table_options) {
+    if (FLAGS_cache_size > 0) {
+      table_options->block_cache = cache;
+    }
+    if (!table_options->block_cache_compressed &&
+        FLAGS_compressed_cache_size > 0) {
+      table_options->block_cache_compressed = block_cache_compressed;
+    }
+    if (!table_options->filter_policy) {
+      table_options->filter_policy = filter_policy;
+    }
+  }
+
+  // TODO: row_cache, thread-pool IO priority, CPU priority.
+
+  if (!options.rate_limiter) {
+    if (FLAGS_rate_limiter_bytes_per_sec > 0) {
+      options.rate_limiter.reset(NewGenericRateLimiter(
+          FLAGS_rate_limiter_bytes_per_sec, 1000 /* refill_period_us */,
+          10 /* fairness */,
+          FLAGS_rate_limit_bg_reads ? RateLimiter::Mode::kReadsOnly
+                                    : RateLimiter::Mode::kWritesOnly));
+    }
+  }
+
+  if (!options.file_checksum_gen_factory) {
+    options.file_checksum_gen_factory =
+        GetFileChecksumImpl(FLAGS_file_checksum_impl);
+  }
+
+  if (FLAGS_sst_file_manager_bytes_per_sec > 0 ||
+      FLAGS_sst_file_manager_bytes_per_truncate > 0) {
+    Status status;
+    options.sst_file_manager.reset(NewSstFileManager(
+        db_stress_env, options.info_log, "" /* trash_dir */,
+        static_cast<int64_t>(FLAGS_sst_file_manager_bytes_per_sec),
+        true /* delete_existing_trash */, &status,
+        0.25 /* max_trash_db_ratio */,
+        FLAGS_sst_file_manager_bytes_per_truncate));
+    if (!status.ok()) {
+      fprintf(stderr, "SstFileManager creation failed: %s\n",
+              status.ToString().c_str());
+      exit(1);
+    }
+  }
+
+  options.table_properties_collector_factories.emplace_back(
+      std::make_shared<DbStressTablePropertiesCollectorFactory>());
+}
+
 }  // namespace ROCKSDB_NAMESPACE
 #endif  // GFLAGS
diff -pruN 7.2.2-5/db_stress_tool/db_stress_test_base.h 7.3.1-2/db_stress_tool/db_stress_test_base.h
--- 7.2.2-5/db_stress_tool/db_stress_test_base.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_test_base.h	2022-06-08 21:08:16.000000000 +0000
@@ -16,6 +16,7 @@ namespace ROCKSDB_NAMESPACE {
 class SystemClock;
 class Transaction;
 class TransactionDB;
+struct TransactionDBOptions;
 
 class StressTest {
  public:
@@ -29,7 +30,7 @@ class StressTest {
 
   bool BuildOptionsTable();
 
-  void InitDb();
+  void InitDb(SharedState*);
   // The initialization work is split into two parts to avoid a circular
   // dependency with `SharedState`.
   virtual void FinishInitDb(SharedState*);
@@ -218,11 +219,16 @@ class StressTest {
 
   void PrintEnv() const;
 
-  void Open();
+  void Open(SharedState* shared);
 
   void Reopen(ThreadState* thread);
 
-  void CheckAndSetOptionsForUserTimestamp();
+  virtual void RegisterAdditionalListeners() {}
+
+#ifndef ROCKSDB_LITE
+  virtual void PrepareTxnDbOptions(SharedState* /*shared*/,
+                                   TransactionDBOptions& /*txn_db_opts*/) {}
+#endif
 
   std::shared_ptr<Cache> cache_;
   std::shared_ptr<Cache> compressed_cache_;
@@ -251,5 +257,49 @@ class StressTest {
   bool is_db_stopped_;
 };
 
+// Load options from OPTIONS file and populate `options`.
+extern bool InitializeOptionsFromFile(Options& options);
+
+// Initialize `options` using command line arguments.
+// When this function is called, `cache`, `block_cache_compressed`,
+// `filter_policy` have all been initialized. Therefore, we just pass them as
+// input arguments.
+extern void InitializeOptionsFromFlags(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<Cache>& block_cache_compressed,
+    const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
+
+// Initialize `options` on which `InitializeOptionsFromFile()` and
+// `InitializeOptionsFromFlags()` have both been called already.
+// There are two cases.
+// Case 1: OPTIONS file is not specified. Command line arguments have been used
+//         to initialize `options`. InitializeOptionsGeneral() will use
+//         `cache`, `block_cache_compressed` and `filter_policy` to initialize
+//         corresponding fields of `options`. InitializeOptionsGeneral() will
+//         also set up other fields of `options` so that stress test can run.
+//         Examples include `create_if_missing` and
+//         `create_missing_column_families`, etc.
+// Case 2: OPTIONS file is specified. It is possible that, after loading from
+//         the given OPTIONS files, some shared object fields are still not
+//         initialized because they are not set in the OPTIONS file. In this
+//         case, if command line arguments indicate that the user wants to set
+//         up such shared objects, e.g. block cache, compressed block cache,
+//         row cache, filter policy, then InitializeOptionsGeneral() will honor
+//         the user's choice, thus passing `cache`, `block_cache_compressed`,
+//         `filter_policy` as input arguments.
+//
+// InitializeOptionsGeneral() must not overwrite fields of `options` loaded
+// from OPTIONS file.
+extern void InitializeOptionsGeneral(
+    const std::shared_ptr<Cache>& cache,
+    const std::shared_ptr<Cache>& block_cache_compressed,
+    const std::shared_ptr<const FilterPolicy>& filter_policy, Options& options);
+
+// If no OPTIONS file is specified, set up `options` so that we can test
+// user-defined timestamp which requires `-user_timestamp_size=8`.
+// This function also checks for known (currently) incompatible features with
+// user-defined timestamp.
+extern void CheckAndSetOptionsForUserTimestamp(Options& options);
+
 }  // namespace ROCKSDB_NAMESPACE
 #endif  // GFLAGS
diff -pruN 7.2.2-5/db_stress_tool/db_stress_tool.cc 7.3.1-2/db_stress_tool/db_stress_tool.cc
--- 7.2.2-5/db_stress_tool/db_stress_tool.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/db_stress_tool.cc	2022-06-08 21:08:16.000000000 +0000
@@ -24,9 +24,7 @@
 #include "db_stress_tool/db_stress_common.h"
 #include "db_stress_tool/db_stress_driver.h"
 #include "rocksdb/convenience.h"
-#ifndef NDEBUG
 #include "utilities/fault_injection_fs.h"
-#endif
 
 namespace ROCKSDB_NAMESPACE {
 namespace {
@@ -82,7 +80,6 @@ int db_stress_tool(int argc, char** argv
   dbsl_env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
   db_stress_listener_env = dbsl_env_wrapper_guard.get();
 
-#ifndef NDEBUG
   if (FLAGS_read_fault_one_in || FLAGS_sync_fault_injection ||
       FLAGS_write_fault_one_in || FLAGS_open_metadata_write_fault_one_in ||
       FLAGS_open_write_fault_one_in || FLAGS_open_read_fault_one_in) {
@@ -98,18 +95,10 @@ int db_stress_tool(int argc, char** argv
         std::make_shared<CompositeEnvWrapper>(raw_env, fault_fs_guard);
     raw_env = fault_env_guard.get();
   }
-  if (FLAGS_write_fault_one_in) {
-    SyncPoint::GetInstance()->SetCallBack(
-        "BuildTable:BeforeFinishBuildTable",
-        [&](void*) { fault_fs_guard->EnableWriteErrorInjection(); });
-    SyncPoint::GetInstance()->EnableProcessing();
-  }
-#endif
 
   env_wrapper_guard = std::make_shared<DbStressEnvWrapper>(raw_env);
   db_stress_env = env_wrapper_guard.get();
 
-#ifndef NDEBUG
   if (FLAGS_write_fault_one_in) {
     // In the write injection case, we need to use the FS interface and returns
     // the IOStatus with different error and flags. Therefore,
@@ -118,7 +107,6 @@ int db_stress_tool(int argc, char** argv
     // CompositeEnvWrapper of env and fault_fs.
     db_stress_env = raw_env;
   }
-#endif
 
   FLAGS_rep_factory = StringToRepFactory(FLAGS_memtablerep.c_str());
 
diff -pruN 7.2.2-5/db_stress_tool/expected_state.cc 7.3.1-2/db_stress_tool/expected_state.cc
--- 7.2.2-5/db_stress_tool/expected_state.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/expected_state.cc	2022-06-08 21:08:16.000000000 +0000
@@ -187,8 +187,8 @@ Status FileExpectedStateManager::Open()
     // Check if crash happened after creating state file but before creating
     // trace file.
     if (saved_seqno_ != kMaxSequenceNumber) {
-      std::string saved_seqno_trace_path =
-          GetPathForFilename(ToString(saved_seqno_) + kTraceFilenameSuffix);
+      std::string saved_seqno_trace_path = GetPathForFilename(
+          std::to_string(saved_seqno_) + kTraceFilenameSuffix);
       Status exists_status = Env::Default()->FileExists(saved_seqno_trace_path);
       if (exists_status.ok()) {
         found_trace = true;
@@ -205,7 +205,7 @@ Status FileExpectedStateManager::Open()
     std::unique_ptr<WritableFile> wfile;
     const EnvOptions soptions;
     std::string saved_seqno_trace_path =
-        GetPathForFilename(ToString(saved_seqno_) + kTraceFilenameSuffix);
+        GetPathForFilename(std::to_string(saved_seqno_) + kTraceFilenameSuffix);
     s = Env::Default()->NewWritableFile(saved_seqno_trace_path, &wfile,
                                         soptions);
   }
@@ -257,14 +257,14 @@ Status FileExpectedStateManager::Open()
 Status FileExpectedStateManager::SaveAtAndAfter(DB* db) {
   SequenceNumber seqno = db->GetLatestSequenceNumber();
 
-  std::string state_filename = ToString(seqno) + kStateFilenameSuffix;
+  std::string state_filename = std::to_string(seqno) + kStateFilenameSuffix;
   std::string state_file_temp_path = GetTempPathForFilename(state_filename);
   std::string state_file_path = GetPathForFilename(state_filename);
 
   std::string latest_file_path =
       GetPathForFilename(kLatestBasename + kStateFilenameSuffix);
 
-  std::string trace_filename = ToString(seqno) + kTraceFilenameSuffix;
+  std::string trace_filename = std::to_string(seqno) + kTraceFilenameSuffix;
   std::string trace_file_path = GetPathForFilename(trace_filename);
 
   // Populate a tempfile and then rename it to atomically create "<seqno>.state"
@@ -311,13 +311,13 @@ Status FileExpectedStateManager::SaveAtA
   // again, even if we crash.
   if (s.ok() && old_saved_seqno != kMaxSequenceNumber &&
       old_saved_seqno != saved_seqno_) {
-    s = Env::Default()->DeleteFile(
-        GetPathForFilename(ToString(old_saved_seqno) + kStateFilenameSuffix));
+    s = Env::Default()->DeleteFile(GetPathForFilename(
+        std::to_string(old_saved_seqno) + kStateFilenameSuffix));
   }
   if (s.ok() && old_saved_seqno != kMaxSequenceNumber &&
       old_saved_seqno != saved_seqno_) {
-    s = Env::Default()->DeleteFile(
-        GetPathForFilename(ToString(old_saved_seqno) + kTraceFilenameSuffix));
+    s = Env::Default()->DeleteFile(GetPathForFilename(
+        std::to_string(old_saved_seqno) + kTraceFilenameSuffix));
   }
   return s;
 }
@@ -461,7 +461,8 @@ Status FileExpectedStateManager::Restore
     return Status::Corruption("DB is older than any restorable expected state");
   }
 
-  std::string state_filename = ToString(saved_seqno_) + kStateFilenameSuffix;
+  std::string state_filename =
+      std::to_string(saved_seqno_) + kStateFilenameSuffix;
   std::string state_file_path = GetPathForFilename(state_filename);
 
   std::string latest_file_temp_path =
@@ -469,7 +470,8 @@ Status FileExpectedStateManager::Restore
   std::string latest_file_path =
       GetPathForFilename(kLatestBasename + kStateFilenameSuffix);
 
-  std::string trace_filename = ToString(saved_seqno_) + kTraceFilenameSuffix;
+  std::string trace_filename =
+      std::to_string(saved_seqno_) + kTraceFilenameSuffix;
   std::string trace_file_path = GetPathForFilename(trace_filename);
 
   std::unique_ptr<TraceReader> trace_reader;
diff -pruN 7.2.2-5/db_stress_tool/multi_ops_txns_stress.cc 7.3.1-2/db_stress_tool/multi_ops_txns_stress.cc
--- 7.2.2-5/db_stress_tool/multi_ops_txns_stress.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/multi_ops_txns_stress.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,9 +12,8 @@
 
 #include "rocksdb/utilities/write_batch_with_index.h"
 #include "util/defer.h"
-#ifndef NDEBUG
 #include "utilities/fault_injection_fs.h"
-#endif  // NDEBUG
+#include "utilities/transactions/write_prepared_txn_db.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -31,6 +30,21 @@ DEFINE_int32(delay_snapshot_read_one_in,
              "With a chance of 1/N, inject a random delay between taking "
              "snapshot and read.");
 
+DEFINE_int32(rollback_one_in, 0,
+             "If non-zero, rollback non-read-only transactions with a "
+             "probability of 1/N.");
+
+DEFINE_int32(clear_wp_commit_cache_one_in, 0,
+             "If non-zero, evict all commit entries from commit cache with a "
+             "probability of 1/N. This options applies to write-prepared and "
+             "write-unprepared transactions.");
+
+extern "C" bool rocksdb_write_prepared_TEST_ShouldClearCommitCache(void) {
+  static Random rand(static_cast<uint32_t>(db_stress_env->NowMicros()));
+  return FLAGS_clear_wp_commit_cache_one_in > 0 &&
+         rand.OneIn(FLAGS_clear_wp_commit_cache_one_in);
+}
+
 // MultiOpsTxnsStressTest can either operate on a database with pre-populated
 // data (possibly from previous ones), or create a new db and preload it with
 // data specified via `-lb_a`, `-ub_a`, `-lb_c`, `-ub_c`, etc. Among these, we
@@ -75,8 +89,9 @@ void MultiOpsTxnsStressTest::KeyGenerato
         "Cannot allocate key in [%u, %u)\nStart with a new DB or try change "
         "the number of threads for testing via -threads=<#threads>\n",
         static_cast<unsigned int>(low_), static_cast<unsigned int>(high_));
+    fflush(stdout);
     fflush(stderr);
-    std::terminate();
+    assert(false);
   }
   initialized_ = true;
 }
@@ -131,33 +146,43 @@ void MultiOpsTxnsStressTest::KeyGenerato
 }
 
 std::string MultiOpsTxnsStressTest::Record::EncodePrimaryKey(uint32_t a) {
-  char buf[8];
-  EncodeFixed32(buf, kPrimaryIndexId);
-  std::reverse(buf, buf + 4);
-  EncodeFixed32(buf + 4, a);
-  std::reverse(buf + 4, buf + 8);
-  return std::string(buf, sizeof(buf));
+  std::string ret;
+  PutFixed32(&ret, kPrimaryIndexId);
+  PutFixed32(&ret, a);
+
+  char* const buf = &ret[0];
+  std::reverse(buf, buf + sizeof(kPrimaryIndexId));
+  std::reverse(buf + sizeof(kPrimaryIndexId),
+               buf + sizeof(kPrimaryIndexId) + sizeof(a));
+  return ret;
 }
 
 std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c) {
-  char buf[8];
-  EncodeFixed32(buf, kSecondaryIndexId);
-  std::reverse(buf, buf + 4);
-  EncodeFixed32(buf + 4, c);
-  std::reverse(buf + 4, buf + 8);
-  return std::string(buf, sizeof(buf));
+  std::string ret;
+  PutFixed32(&ret, kSecondaryIndexId);
+  PutFixed32(&ret, c);
+
+  char* const buf = &ret[0];
+  std::reverse(buf, buf + sizeof(kSecondaryIndexId));
+  std::reverse(buf + sizeof(kSecondaryIndexId),
+               buf + sizeof(kSecondaryIndexId) + sizeof(c));
+  return ret;
 }
 
 std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey(uint32_t c,
                                                                uint32_t a) {
-  char buf[12];
-  EncodeFixed32(buf, kSecondaryIndexId);
-  std::reverse(buf, buf + 4);
-  EncodeFixed32(buf + 4, c);
-  EncodeFixed32(buf + 8, a);
-  std::reverse(buf + 4, buf + 8);
-  std::reverse(buf + 8, buf + 12);
-  return std::string(buf, sizeof(buf));
+  std::string ret;
+  PutFixed32(&ret, kSecondaryIndexId);
+  PutFixed32(&ret, c);
+  PutFixed32(&ret, a);
+
+  char* const buf = &ret[0];
+  std::reverse(buf, buf + sizeof(kSecondaryIndexId));
+  std::reverse(buf + sizeof(kSecondaryIndexId),
+               buf + sizeof(kSecondaryIndexId) + sizeof(c));
+  std::reverse(buf + sizeof(kSecondaryIndexId) + sizeof(c),
+               buf + sizeof(kSecondaryIndexId) + sizeof(c) + sizeof(a));
+  return ret;
 }
 
 std::tuple<Status, uint32_t, uint32_t>
@@ -201,40 +226,26 @@ std::string MultiOpsTxnsStressTest::Reco
 }
 
 std::string MultiOpsTxnsStressTest::Record::EncodePrimaryIndexValue() const {
-  char buf[8];
-  EncodeFixed32(buf, b_);
-  EncodeFixed32(buf + 4, c_);
-  return std::string(buf, sizeof(buf));
+  std::string ret;
+  PutFixed32(&ret, b_);
+  PutFixed32(&ret, c_);
+  return ret;
 }
 
 std::pair<std::string, std::string>
 MultiOpsTxnsStressTest::Record::EncodeSecondaryIndexEntry() const {
-  std::string secondary_index_key;
-  char buf[12];
-  EncodeFixed32(buf, kSecondaryIndexId);
-  std::reverse(buf, buf + 4);
-  EncodeFixed32(buf + 4, c_);
-  EncodeFixed32(buf + 8, a_);
-  std::reverse(buf + 4, buf + 8);
-  std::reverse(buf + 8, buf + 12);
-  secondary_index_key.assign(buf, sizeof(buf));
+  std::string secondary_index_key = EncodeSecondaryKey(c_, a_);
 
   // Secondary index value is always 4-byte crc32 of the secondary key
   std::string secondary_index_value;
-  uint32_t crc = crc32c::Value(buf, sizeof(buf));
+  uint32_t crc =
+      crc32c::Value(secondary_index_key.data(), secondary_index_key.size());
   PutFixed32(&secondary_index_value, crc);
-  return std::make_pair(secondary_index_key, secondary_index_value);
+  return std::make_pair(std::move(secondary_index_key), secondary_index_value);
 }
 
 std::string MultiOpsTxnsStressTest::Record::EncodeSecondaryKey() const {
-  char buf[12];
-  EncodeFixed32(buf, kSecondaryIndexId);
-  std::reverse(buf, buf + 4);
-  EncodeFixed32(buf + 4, c_);
-  EncodeFixed32(buf + 8, a_);
-  std::reverse(buf + 4, buf + 8);
-  std::reverse(buf + 8, buf + 12);
-  return std::string(buf, sizeof(buf));
+  return EncodeSecondaryKey(c_, a_);
 }
 
 Status MultiOpsTxnsStressTest::Record::DecodePrimaryIndexEntry(
@@ -244,27 +255,22 @@ Status MultiOpsTxnsStressTest::Record::D
     return Status::Corruption("Primary index key length is not 8");
   }
 
-  const char* const index_id_buf = primary_index_key.data();
-  uint32_t index_id =
-      static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[0])) << 24;
-  index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[1]))
-              << 16;
-  index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[2]))
-              << 8;
-  index_id +=
-      static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[3]));
-  primary_index_key.remove_prefix(sizeof(uint32_t));
+  uint32_t index_id = 0;
+
+  [[maybe_unused]] bool res = GetFixed32(&primary_index_key, &index_id);
+  assert(res);
+  index_id = EndianSwapValue(index_id);
+
   if (index_id != kPrimaryIndexId) {
     std::ostringstream oss;
     oss << "Unexpected primary index id: " << index_id;
     return Status::Corruption(oss.str());
   }
 
-  const char* const buf = primary_index_key.data();
-  a_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[0])) << 24;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[1])) << 16;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[2])) << 8;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[3]));
+  res = GetFixed32(&primary_index_key, &a_);
+  assert(res);
+  a_ = EndianSwapValue(a_);
+  assert(primary_index_key.empty());
 
   if (primary_index_value.size() != 8) {
     return Status::Corruption("Primary index value length is not 8");
@@ -282,33 +288,28 @@ Status MultiOpsTxnsStressTest::Record::D
   uint32_t crc =
       crc32c::Value(secondary_index_key.data(), secondary_index_key.size());
 
-  const char* const index_id_buf = secondary_index_key.data();
-  uint32_t index_id =
-      static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[0])) << 24;
-  index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[1]))
-              << 16;
-  index_id += static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[2]))
-              << 8;
-  index_id +=
-      static_cast<uint32_t>(static_cast<unsigned char>(index_id_buf[3]));
-  secondary_index_key.remove_prefix(sizeof(uint32_t));
+  uint32_t index_id = 0;
+
+  [[maybe_unused]] bool res = GetFixed32(&secondary_index_key, &index_id);
+  assert(res);
+  index_id = EndianSwapValue(index_id);
+
   if (index_id != kSecondaryIndexId) {
     std::ostringstream oss;
     oss << "Unexpected secondary index id: " << index_id;
     return Status::Corruption(oss.str());
   }
 
-  const char* const buf = secondary_index_key.data();
   assert(secondary_index_key.size() == 8);
-  c_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[0])) << 24;
-  c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[1])) << 16;
-  c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[2])) << 8;
-  c_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[3]));
-
-  a_ = static_cast<uint32_t>(static_cast<unsigned char>(buf[4])) << 24;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[5])) << 16;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[6])) << 8;
-  a_ += static_cast<uint32_t>(static_cast<unsigned char>(buf[7]));
+  res = GetFixed32(&secondary_index_key, &c_);
+  assert(res);
+  c_ = EndianSwapValue(c_);
+
+  assert(secondary_index_key.size() == 4);
+  res = GetFixed32(&secondary_index_key, &a_);
+  assert(res);
+  a_ = EndianSwapValue(a_);
+  assert(secondary_index_key.empty());
 
   if (secondary_index_value.size() != 4) {
     return Status::Corruption("Secondary index value length is not 4");
@@ -520,9 +521,35 @@ Status MultiOpsTxnsStressTest::TestCusto
     // Should never reach here.
     assert(false);
   }
+
   return s;
 }
 
+void MultiOpsTxnsStressTest::RegisterAdditionalListeners() {
+  options_.listeners.emplace_back(new MultiOpsTxnsStressListener(this));
+}
+
+#ifndef ROCKSDB_LITE
+void MultiOpsTxnsStressTest::PrepareTxnDbOptions(
+    SharedState* /*shared*/, TransactionDBOptions& txn_db_opts) {
+  // MultiOpsTxnStressTest uses SingleDelete to delete secondary keys, thus we
+  // register this callback to let TxnDb know that when rolling back
+  // a transaction, use only SingleDelete to cancel prior Put from the same
+  // transaction if applicable.
+  txn_db_opts.rollback_deletion_type_callback =
+      [](TransactionDB* /*db*/, ColumnFamilyHandle* /*column_family*/,
+         const Slice& key) {
+        Slice ks = key;
+        uint32_t index_id = 0;
+        [[maybe_unused]] bool res = GetFixed32(&ks, &index_id);
+        assert(res);
+        index_id = EndianSwapValue(index_id);
+        assert(index_id <= Record::kSecondaryIndexId);
+        return index_id == Record::kSecondaryIndexId;
+      };
+}
+#endif  // !ROCKSDB_LITE
+
 Status MultiOpsTxnsStressTest::PrimaryKeyUpdateTxn(ThreadState* thread,
                                                    uint32_t old_a,
                                                    uint32_t old_a_pos,
@@ -561,8 +588,10 @@ Status MultiOpsTxnsStressTest::PrimaryKe
     }
     if (s.IsNotFound()) {
       thread->stats.AddGets(/*ngets=*/1, /*nfounds=*/0);
-    } else if (s.IsBusy()) {
+    } else if (s.IsBusy() || s.IsIncomplete()) {
       // ignore.
+      // Incomplete also means rollback by application. See the transaction
+      // implementations.
     } else {
       thread->stats.AddErrors(1);
     }
@@ -631,6 +660,16 @@ Status MultiOpsTxnsStressTest::PrimaryKe
     return s;
   }
 
+  if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
+    s = Status::Incomplete();
+    return s;
+  }
+
+  s = WriteToCommitTimeWriteBatch(*txn);
+  if (!s.ok()) {
+    return s;
+  }
+
   s = txn->Commit();
 
   auto& key_gen = key_gen_for_a_.at(thread->tid);
@@ -677,11 +716,12 @@ Status MultiOpsTxnsStressTest::Secondary
           Record::kPrimaryIndexEntrySize + Record::kSecondaryIndexEntrySize);
       return;
     } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() ||
-               s.IsMergeInProgress()) {
+               s.IsMergeInProgress() || s.IsIncomplete()) {
       // ww-conflict detected, or
       // lock cannot be acquired, or
       // memtable history is not large enough for conflict checking, or
-      // Merge operation cannot be resolved.
+      // Merge operation cannot be resolved, or
+      // application rollback.
       // TODO (yanqin) add stats for other cases?
     } else if (s.IsNotFound()) {
       // ignore.
@@ -727,8 +767,9 @@ Status MultiOpsTxnsStressTest::Secondary
     Record record;
     s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
     if (!s.ok()) {
-      fprintf(stderr, "Cannot decode secondary key: %s\n",
-              s.ToString().c_str());
+      fprintf(stderr, "Cannot decode secondary key (%s => %s): %s\n",
+              it->key().ToString(true).c_str(),
+              it->value().ToString(true).c_str(), s.ToString().c_str());
       assert(false);
       break;
     }
@@ -749,21 +790,31 @@ Status MultiOpsTxnsStressTest::Secondary
     } else if (s.IsNotFound()) {
       // We can also fail verification here.
       std::ostringstream oss;
-      oss << "pk should exist: " << Slice(pk).ToString(true);
+      auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+      assert(dbimpl);
+      oss << "snap " << read_opts.snapshot->GetSequenceNumber()
+          << " (published " << dbimpl->GetLastPublishedSequence()
+          << "), pk should exist: " << Slice(pk).ToString(true);
       fprintf(stderr, "%s\n", oss.str().c_str());
       assert(false);
       break;
     }
     if (!s.ok()) {
-      fprintf(stderr, "%s\n", s.ToString().c_str());
+      std::ostringstream oss;
+      auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+      assert(dbimpl);
+      oss << "snap " << read_opts.snapshot->GetSequenceNumber()
+          << " (published " << dbimpl->GetLastPublishedSequence() << "), "
+          << s.ToString();
+      fprintf(stderr, "%s\n", oss.str().c_str());
       assert(false);
       break;
     }
     auto result = Record::DecodePrimaryIndexValue(value);
     s = std::get<0>(result);
     if (!s.ok()) {
-      fprintf(stderr, "Cannot decode primary index value: %s\n",
-              s.ToString().c_str());
+      fprintf(stderr, "Cannot decode primary index value %s: %s\n",
+              Slice(value).ToString(true).c_str(), s.ToString().c_str());
       assert(false);
       break;
     }
@@ -771,8 +822,12 @@ Status MultiOpsTxnsStressTest::Secondary
     uint32_t c = std::get<2>(result);
     if (c != old_c) {
       std::ostringstream oss;
-      oss << "c in primary index does not match secondary index: " << c
-          << " != " << old_c;
+      auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+      assert(dbimpl);
+      oss << "snap " << read_opts.snapshot->GetSequenceNumber()
+          << " (published " << dbimpl->GetLastPublishedSequence()
+          << "), pk/sk mismatch. pk: (a=" << record.a_value() << ", "
+          << "c=" << c << "), sk: (c=" << old_c << ")";
       s = Status::Corruption();
       fprintf(stderr, "%s\n", oss.str().c_str());
       assert(false);
@@ -811,6 +866,16 @@ Status MultiOpsTxnsStressTest::Secondary
     return s;
   }
 
+  if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
+    s = Status::Incomplete();
+    return s;
+  }
+
+  s = WriteToCommitTimeWriteBatch(*txn);
+  if (!s.ok()) {
+    return s;
+  }
+
   s = txn->Commit();
 
   if (s.ok()) {
@@ -856,7 +921,7 @@ Status MultiOpsTxnsStressTest::UpdatePri
     } else if (s.IsInvalidArgument()) {
       // ignored.
     } else if (s.IsBusy() || s.IsTimedOut() || s.IsTryAgain() ||
-               s.IsMergeInProgress()) {
+               s.IsMergeInProgress() || s.IsIncomplete()) {
       // ignored.
     } else {
       thread->stats.AddErrors(1);
@@ -874,8 +939,8 @@ Status MultiOpsTxnsStressTest::UpdatePri
   auto result = Record::DecodePrimaryIndexValue(value);
   if (!std::get<0>(result).ok()) {
     s = std::get<0>(result);
-    fprintf(stderr, "Cannot decode primary index value: %s\n",
-            s.ToString().c_str());
+    fprintf(stderr, "Cannot decode primary index value %s: %s\n",
+            Slice(value).ToString(true).c_str(), s.ToString().c_str());
     assert(false);
     return s;
   }
@@ -892,6 +957,17 @@ Status MultiOpsTxnsStressTest::UpdatePri
   if (!s.ok()) {
     return s;
   }
+
+  if (FLAGS_rollback_one_in > 0 && thread->rand.OneIn(FLAGS_rollback_one_in)) {
+    s = Status::Incomplete();
+    return s;
+  }
+
+  s = WriteToCommitTimeWriteBatch(*txn);
+  if (!s.ok()) {
+    return s;
+  }
+
   s = txn->Commit();
   if (s.ok()) {
     delete txn;
@@ -1050,12 +1126,15 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
   // First, iterate primary index.
   size_t primary_index_entries_count = 0;
   {
-    char buf[4];
-    EncodeFixed32(buf, Record::kPrimaryIndexId + 1);
-    std::reverse(buf, buf + sizeof(buf));
-    std::string iter_ub_str(buf, sizeof(buf));
+    std::string iter_ub_str;
+    PutFixed32(&iter_ub_str, Record::kPrimaryIndexId + 1);
+    std::reverse(iter_ub_str.begin(), iter_ub_str.end());
     Slice iter_ub = iter_ub_str;
 
+    std::string start_key;
+    PutFixed32(&start_key, Record::kPrimaryIndexId);
+    std::reverse(start_key.begin(), start_key.end());
+
     // This `ReadOptions` is for validation purposes. Ignore
     // `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
     ReadOptions ropts;
@@ -1064,7 +1143,7 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
     ropts.iterate_upper_bound = &iter_ub;
 
     std::unique_ptr<Iterator> it(db_->NewIterator(ropts));
-    for (it->SeekToFirst(); it->Valid(); it->Next()) {
+    for (it->Seek(start_key); it->Valid(); it->Next()) {
       Record record;
       Status s = record.DecodePrimaryIndexEntry(it->key(), it->value());
       if (!s.ok()) {
@@ -1101,10 +1180,9 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
   // Second, iterate secondary index.
   size_t secondary_index_entries_count = 0;
   {
-    char buf[4];
-    EncodeFixed32(buf, Record::kSecondaryIndexId);
-    std::reverse(buf, buf + sizeof(buf));
-    const std::string start_key(buf, sizeof(buf));
+    std::string start_key;
+    PutFixed32(&start_key, Record::kSecondaryIndexId);
+    std::reverse(start_key.begin(), start_key.end());
 
     // This `ReadOptions` is for validation purposes. Ignore
     // `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
@@ -1118,7 +1196,8 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
       Record record;
       Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
       if (!s.ok()) {
-        oss << "Cannot decode secondary index entry";
+        oss << "Cannot decode secondary index entry "
+            << it->key().ToString(true) << "=>" << it->value().ToString(true);
         VerificationAbort(thread->shared, oss.str(), s);
         assert(false);
         return;
@@ -1132,7 +1211,7 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
       s = db_->Get(ropts, pk, &value);
       if (!s.ok()) {
         oss << "Error searching pk " << Slice(pk).ToString(true) << ". "
-            << s.ToString();
+            << s.ToString() << ". sk " << it->key().ToString(true);
         VerificationAbort(thread->shared, oss.str(), s);
         assert(false);
         return;
@@ -1148,8 +1227,10 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
       }
       uint32_t c_in_primary = std::get<2>(result);
       if (c_in_primary != record.c_value()) {
-        oss << "Pk/sk mismatch. pk: (c=" << c_in_primary
-            << "), sk: (c=" << record.c_value() << ")";
+        oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>"
+            << Slice(value).ToString(true) << " (a=" << record.a_value()
+            << ", c=" << c_in_primary << "), sk: " << it->key().ToString(true)
+            << " (c=" << record.c_value() << ")";
         VerificationAbort(thread->shared, oss.str(), s);
         assert(false);
         return;
@@ -1167,6 +1248,75 @@ void MultiOpsTxnsStressTest::VerifyDb(Th
   }
 }
 
+void MultiOpsTxnsStressTest::VerifyPkSkFast(int job_id) {
+  const Snapshot* const snapshot = db_->GetSnapshot();
+  assert(snapshot);
+  ManagedSnapshot snapshot_guard(db_, snapshot);
+
+  std::ostringstream oss;
+  auto* dbimpl = static_cast_with_check<DBImpl>(db_->GetRootDB());
+  assert(dbimpl);
+
+  oss << "Job " << job_id << ": [" << snapshot->GetSequenceNumber() << ","
+      << dbimpl->GetLastPublishedSequence() << "] ";
+
+  std::string start_key;
+  PutFixed32(&start_key, Record::kSecondaryIndexId);
+  std::reverse(start_key.begin(), start_key.end());
+
+  // This `ReadOptions` is for validation purposes. Ignore
+  // `FLAGS_rate_limit_user_ops` to avoid slowing any validation.
+  ReadOptions ropts;
+  ropts.snapshot = snapshot;
+  ropts.total_order_seek = true;
+
+  std::unique_ptr<Iterator> it(db_->NewIterator(ropts));
+  for (it->Seek(start_key); it->Valid(); it->Next()) {
+    Record record;
+    Status s = record.DecodeSecondaryIndexEntry(it->key(), it->value());
+    if (!s.ok()) {
+      oss << "Cannot decode secondary index entry " << it->key().ToString(true)
+          << "=>" << it->value().ToString(true);
+      fprintf(stderr, "%s\n", oss.str().c_str());
+      fflush(stderr);
+      assert(false);
+    }
+    // After decoding secondary index entry, we know a and c. Crc is verified
+    // in decoding phase.
+    //
+    // Form a primary key and search in the primary index.
+    std::string pk = Record::EncodePrimaryKey(record.a_value());
+    std::string value;
+    s = db_->Get(ropts, pk, &value);
+    if (!s.ok()) {
+      oss << "Error searching pk " << Slice(pk).ToString(true) << ". "
+          << s.ToString() << ". sk " << it->key().ToString(true);
+      fprintf(stderr, "%s\n", oss.str().c_str());
+      fflush(stderr);
+      assert(false);
+    }
+    auto result = Record::DecodePrimaryIndexValue(value);
+    s = std::get<0>(result);
+    if (!s.ok()) {
+      oss << "Error decoding primary index value "
+          << Slice(value).ToString(true) << ". " << s.ToString();
+      fprintf(stderr, "%s\n", oss.str().c_str());
+      fflush(stderr);
+      assert(false);
+    }
+    uint32_t c_in_primary = std::get<2>(result);
+    if (c_in_primary != record.c_value()) {
+      oss << "Pk/sk mismatch. pk: " << Slice(pk).ToString(true) << "=>"
+          << Slice(value).ToString(true) << " (a=" << record.a_value()
+          << ", c=" << c_in_primary << "), sk: " << it->key().ToString(true)
+          << " (c=" << record.c_value() << ")";
+      fprintf(stderr, "%s\n", oss.str().c_str());
+      fflush(stderr);
+      assert(false);
+    }
+  }
+}
+
 std::pair<uint32_t, uint32_t> MultiOpsTxnsStressTest::ChooseExistingA(
     ThreadState* thread) {
   uint32_t tid = thread->tid;
@@ -1193,6 +1343,22 @@ uint32_t MultiOpsTxnsStressTest::Generat
   return key_gen->Allocate();
 }
 
+#ifndef ROCKSDB_LITE
+Status MultiOpsTxnsStressTest::WriteToCommitTimeWriteBatch(Transaction& txn) {
+  WriteBatch* ctwb = txn.GetCommitTimeWriteBatch();
+  assert(ctwb);
+  // Do not change the content in key_buf.
+  static constexpr char key_buf[sizeof(Record::kMetadataPrefix) + 4] = {
+      '\0', '\0', '\0', '\0', '\0', '\0', '\0', '\xff'};
+
+  uint64_t counter_val = counter_.Next();
+  char val_buf[sizeof(counter_val)];
+  EncodeFixed64(val_buf, counter_val);
+  return ctwb->Put(Slice(key_buf, sizeof(key_buf)),
+                   Slice(val_buf, sizeof(val_buf)));
+}
+#endif  // !ROCKSDB_LITE
+
 std::string MultiOpsTxnsStressTest::KeySpaces::EncodeTo() const {
   std::string result;
   PutFixed32(&result, lb_a);
@@ -1428,8 +1594,9 @@ void MultiOpsTxnsStressTest::ScanExistin
       Record record;
       Status s = record.DecodePrimaryIndexEntry(it->key(), it->value());
       if (!s.ok()) {
-        fprintf(stderr, "Cannot decode primary index entry: %s\n",
-                s.ToString().c_str());
+        fprintf(stderr, "Cannot decode primary index entry (%s => %s): %s\n",
+                it->key().ToString(true).c_str(),
+                it->value().ToString(true).c_str(), s.ToString().c_str());
         assert(false);
       }
       uint32_t a = record.a_value();
diff -pruN 7.2.2-5/db_stress_tool/multi_ops_txns_stress.h 7.3.1-2/db_stress_tool/multi_ops_txns_stress.h
--- 7.2.2-5/db_stress_tool/multi_ops_txns_stress.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/multi_ops_txns_stress.h	2022-06-08 21:08:16.000000000 +0000
@@ -111,6 +111,7 @@ class MultiOpsTxnsStressTest : public St
  public:
   class Record {
    public:
+    static constexpr uint32_t kMetadataPrefix = 0;
     static constexpr uint32_t kPrimaryIndexId = 1;
     static constexpr uint32_t kSecondaryIndexId = 2;
 
@@ -261,6 +262,13 @@ class MultiOpsTxnsStressTest : public St
       ThreadState* thread,
       const std::vector<int>& rand_column_families) override;
 
+  void RegisterAdditionalListeners() override;
+
+#ifndef ROCKSDB_LITE
+  void PrepareTxnDbOptions(SharedState* /*shared*/,
+                           TransactionDBOptions& txn_db_opts) override;
+#endif  // !ROCKSDB_LITE
+
   Status PrimaryKeyUpdateTxn(ThreadState* thread, uint32_t old_a,
                              uint32_t old_a_pos, uint32_t new_a);
 
@@ -280,7 +288,17 @@ class MultiOpsTxnsStressTest : public St
     VerifyDb(thread);
   }
 
+  void VerifyPkSkFast(int job_id);
+
  protected:
+  class Counter {
+   public:
+    uint64_t Next() { return value_.fetch_add(1); }
+
+   private:
+    std::atomic<uint64_t> value_ = Env::Default()->NowNanos();
+  };
+
   using KeySet = std::set<uint32_t>;
   class KeyGenerator {
    public:
@@ -330,9 +348,21 @@ class MultiOpsTxnsStressTest : public St
 
   uint32_t GenerateNextC(ThreadState* thread);
 
+#ifndef ROCKSDB_LITE
+  // Some applications, e.g. MyRocks writes a KV pair to the database via
+  // commit-time-write-batch (ctwb) in additional to the transaction's regular
+  // write batch. The key is usually constant representing some system
+  // metadata, while the value is monoticailly increasing which represents the
+  // actual value of the metadata. Method WriteToCommitTimeWriteBatch()
+  // emulates this scenario.
+  Status WriteToCommitTimeWriteBatch(Transaction& txn);
+#endif  //! ROCKSDB_LITE
+
   std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_a_;
   std::vector<std::unique_ptr<KeyGenerator>> key_gen_for_c_;
 
+  Counter counter_{};
+
  private:
   struct KeySpaces {
     uint32_t lb_a = 0;
@@ -370,5 +400,38 @@ class InvariantChecker {
                 "MultiOpsTxnsStressTest::Record::c_ must be 4 bytes");
 };
 
+class MultiOpsTxnsStressListener : public EventListener {
+ public:
+  explicit MultiOpsTxnsStressListener(MultiOpsTxnsStressTest* stress_test)
+      : stress_test_(stress_test) {
+    assert(stress_test_);
+  }
+
+#ifndef ROCKSDB_LITE
+  ~MultiOpsTxnsStressListener() override {}
+
+  void OnFlushCompleted(DB* db, const FlushJobInfo& info) override {
+    assert(db);
+#ifdef NDEBUG
+    (void)db;
+#endif
+    assert(info.cf_id == 0);
+    stress_test_->VerifyPkSkFast(info.job_id);
+  }
+
+  void OnCompactionCompleted(DB* db, const CompactionJobInfo& info) override {
+    assert(db);
+#ifdef NDEBUG
+    (void)db;
+#endif
+    assert(info.cf_id == 0);
+    stress_test_->VerifyPkSkFast(info.job_id);
+  }
+#endif  //! ROCKSDB_LITE
+
+ private:
+  MultiOpsTxnsStressTest* const stress_test_ = nullptr;
+};
+
 }  // namespace ROCKSDB_NAMESPACE
 #endif  // GFLAGS
diff -pruN 7.2.2-5/db_stress_tool/no_batched_ops_stress.cc 7.3.1-2/db_stress_tool/no_batched_ops_stress.cc
--- 7.2.2-5/db_stress_tool/no_batched_ops_stress.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/db_stress_tool/no_batched_ops_stress.cc	2022-06-08 21:08:16.000000000 +0000
@@ -9,9 +9,8 @@
 
 #ifdef GFLAGS
 #include "db_stress_tool/db_stress_common.h"
-#ifndef NDEBUG
 #include "utilities/fault_injection_fs.h"
-#endif // NDEBUG
+#include "rocksdb/utilities/transaction_db.h"
 
 namespace ROCKSDB_NAMESPACE {
 class NonBatchedOpsStressTest : public StressTest {
@@ -84,8 +83,8 @@ class NonBatchedOpsStressTest : public S
             // move to the next item in the iterator
             s = Status::NotFound();
           }
-          VerifyValue(static_cast<int>(cf), i, options, shared, from_db, s,
-                      true);
+          VerifyOrSyncValue(static_cast<int>(cf), i, options, shared, from_db,
+                            s, true);
           if (from_db.length()) {
             PrintKeyValue(static_cast<int>(cf), static_cast<uint32_t>(i),
                           from_db.data(), from_db.length());
@@ -101,8 +100,8 @@ class NonBatchedOpsStressTest : public S
           std::string keystr = Key(i);
           Slice k = keystr;
           Status s = db_->Get(options, column_families_[cf], k, &from_db);
-          VerifyValue(static_cast<int>(cf), i, options, shared, from_db, s,
-                      true);
+          VerifyOrSyncValue(static_cast<int>(cf), i, options, shared, from_db,
+                            s, true);
           if (from_db.length()) {
             PrintKeyValue(static_cast<int>(cf), static_cast<uint32_t>(i),
                           from_db.data(), from_db.length());
@@ -130,8 +129,8 @@ class NonBatchedOpsStressTest : public S
           for (size_t j = 0; j < batch_size; ++j) {
             Status s = statuses[j];
             std::string from_db = values[j].ToString();
-            VerifyValue(static_cast<int>(cf), i + j, options, shared, from_db,
-                        s, true);
+            VerifyOrSyncValue(static_cast<int>(cf), i + j, options, shared,
+                              from_db, s, true);
             if (from_db.length()) {
               PrintKeyValue(static_cast<int>(cf), static_cast<uint32_t>(i + j),
                             from_db.data(), from_db.length());
@@ -174,8 +173,8 @@ class NonBatchedOpsStressTest : public S
           if (number_of_operands) {
             from_db = values[number_of_operands - 1].ToString();
           }
-          VerifyValue(static_cast<int>(cf), i, options, shared, from_db, s,
-                      true);
+          VerifyOrSyncValue(static_cast<int>(cf), i, options, shared, from_db,
+                            s, true);
           if (from_db.length()) {
             PrintKeyValue(static_cast<int>(cf), static_cast<uint32_t>(i),
                           from_db.data(), from_db.length());
@@ -190,7 +189,8 @@ class NonBatchedOpsStressTest : public S
       if (thread->rand.OneInOpt(FLAGS_clear_column_family_one_in)) {
         // drop column family and then create it again (can't drop default)
         int cf = thread->rand.Next() % (FLAGS_column_families - 1) + 1;
-        std::string new_name = ToString(new_column_family_name_.fetch_add(1));
+        std::string new_name =
+            std::to_string(new_column_family_name_.fetch_add(1));
         {
           MutexLock l(thread->shared->GetMutex());
           fprintf(
@@ -233,20 +233,15 @@ class NonBatchedOpsStressTest : public S
     std::string from_db;
     int error_count = 0;
 
-#ifndef NDEBUG
     if (fault_fs_guard) {
       fault_fs_guard->EnableErrorInjection();
       SharedState::ignore_read_error = false;
     }
-#endif // NDEBUG
     Status s = db_->Get(read_opts, cfh, key, &from_db);
-#ifndef NDEBUG
     if (fault_fs_guard) {
       error_count = fault_fs_guard->GetAndResetErrorCount();
     }
-#endif // NDEBUG
     if (s.ok()) {
-#ifndef NDEBUG
       if (fault_fs_guard) {
         if (error_count && !SharedState::ignore_read_error) {
           // Grab mutex so multiple thread don't try to print the
@@ -258,7 +253,6 @@ class NonBatchedOpsStressTest : public S
           std::terminate();
         }
       }
-#endif // NDEBUG
       // found case
       thread->stats.AddGets(1, 1);
     } else if (s.IsNotFound()) {
@@ -272,11 +266,9 @@ class NonBatchedOpsStressTest : public S
         thread->stats.AddVerifiedErrors(1);
       }
     }
-#ifndef NDEBUG
     if (fault_fs_guard) {
       fault_fs_guard->DisableErrorInjection();
     }
-#endif // NDEBUG
     return s;
   }
 
@@ -364,19 +356,15 @@ class NonBatchedOpsStressTest : public S
     }
 
     if (!use_txn) {
-#ifndef NDEBUG
       if (fault_fs_guard) {
         fault_fs_guard->EnableErrorInjection();
         SharedState::ignore_read_error = false;
       }
-#endif // NDEBUG
       db_->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
                     statuses.data());
-#ifndef NDEBUG
       if (fault_fs_guard) {
         error_count = fault_fs_guard->GetAndResetErrorCount();
       }
-#endif // NDEBUG
     } else {
 #ifndef ROCKSDB_LITE
       txn->MultiGet(readoptionscopy, cfh, num_keys, keys.data(), values.data(),
@@ -384,7 +372,6 @@ class NonBatchedOpsStressTest : public S
 #endif
     }
 
-#ifndef NDEBUG
     if (fault_fs_guard && error_count && !SharedState::ignore_read_error) {
       int stat_nok = 0;
       for (const auto& s : statuses) {
@@ -408,7 +395,6 @@ class NonBatchedOpsStressTest : public S
     if (fault_fs_guard) {
       fault_fs_guard->DisableErrorInjection();
     }
-#endif // NDEBUG
 
     for (size_t i = 0; i < statuses.size(); ++i) {
       Status s = statuses[i];
@@ -562,8 +548,8 @@ class NonBatchedOpsStressTest : public S
       Slice k = key_str2;
       std::string from_db;
       Status s = db_->Get(read_opts, cfh, k, &from_db);
-      if (!VerifyValue(rand_column_family, rand_key, read_opts, shared, from_db,
-                       s, true)) {
+      if (!VerifyOrSyncValue(rand_column_family, rand_key, read_opts, shared,
+                             from_db, s, true)) {
         return s;
       }
     }
@@ -631,33 +617,14 @@ class NonBatchedOpsStressTest : public S
   Status TestDelete(ThreadState* thread, WriteOptions& write_opts,
                     const std::vector<int>& rand_column_families,
                     const std::vector<int64_t>& rand_keys,
-                    std::unique_ptr<MutexLock>& lock) override {
+                    std::unique_ptr<MutexLock>& /* lock */) override {
     int64_t rand_key = rand_keys[0];
     int rand_column_family = rand_column_families[0];
     auto shared = thread->shared;
-    int64_t max_key = shared->GetMaxKey();
 
     // OPERATION delete
-    // If the chosen key does not allow overwrite and it does not exist,
-    // choose another key.
-    std::string write_ts_str;
-    Slice write_ts;
-    while (!shared->AllowsOverwrite(rand_key) &&
-           !shared->Exists(rand_column_family, rand_key)) {
-      lock.reset();
-      rand_key = thread->rand.Next() % max_key;
-      rand_column_family = thread->rand.Next() % FLAGS_column_families;
-      lock.reset(
-          new MutexLock(shared->GetMutexForKey(rand_column_family, rand_key)));
-      if (FLAGS_user_timestamp_size > 0) {
-        write_ts_str = NowNanosStr();
-        write_ts = write_ts_str;
-      }
-    }
-    if (write_ts.size() == 0 && FLAGS_user_timestamp_size) {
-      write_ts_str = NowNanosStr();
-      write_ts = write_ts_str;
-    }
+    std::string write_ts_str = NowNanosStr();
+    Slice write_ts = write_ts_str;
 
     std::string key_str = Key(rand_key);
     Slice key = key_str;
@@ -823,7 +790,7 @@ class NonBatchedOpsStressTest : public S
                               const std::vector<int64_t>& rand_keys,
                               std::unique_ptr<MutexLock>& lock) override {
     const std::string sst_filename =
-        FLAGS_db + "/." + ToString(thread->tid) + ".sst";
+        FLAGS_db + "/." + std::to_string(thread->tid) + ".sst";
     Status s;
     if (db_stress_env->FileExists(sst_filename).ok()) {
       // Maybe we terminated abnormally before, so cleanup to give this file
@@ -882,16 +849,24 @@ class NonBatchedOpsStressTest : public S
   }
 #endif  // ROCKSDB_LITE
 
-  bool VerifyValue(int cf, int64_t key, const ReadOptions& /*opts*/,
-                   SharedState* shared, const std::string& value_from_db,
-                   const Status& s, bool strict = false) const {
+  bool VerifyOrSyncValue(int cf, int64_t key, const ReadOptions& /*opts*/,
+                         SharedState* shared, const std::string& value_from_db,
+                         const Status& s, bool strict = false) const {
     if (shared->HasVerificationFailedYet()) {
       return false;
     }
     // compare value_from_db with the value in the shared state
-    char value[kValueMaxLen];
     uint32_t value_base = shared->Get(cf, key);
     if (value_base == SharedState::UNKNOWN_SENTINEL) {
+      if (s.ok()) {
+        // Value exists in db, update state to reflect that
+        Slice slice(value_from_db);
+        value_base = GetValueBase(slice);
+        shared->Put(cf, key, value_base, false);
+      } else if (s.IsNotFound()) {
+        // Value doesn't exist in db, update state to reflect that
+        shared->SingleDelete(cf, key, false);
+      }
       return true;
     }
     if (value_base == SharedState::DELETION_SENTINEL && !strict) {
@@ -899,6 +874,7 @@ class NonBatchedOpsStressTest : public S
     }
 
     if (s.ok()) {
+      char value[kValueMaxLen];
       if (value_base == SharedState::DELETION_SENTINEL) {
         VerificationAbort(shared, "Unexpected value found", cf, key);
         return false;
@@ -921,6 +897,21 @@ class NonBatchedOpsStressTest : public S
     }
     return true;
   }
+
+#ifndef ROCKSDB_LITE
+  void PrepareTxnDbOptions(SharedState* shared,
+                           TransactionDBOptions& txn_db_opts) override {
+    txn_db_opts.rollback_deletion_type_callback =
+        [shared](TransactionDB*, ColumnFamilyHandle*, const Slice& key) {
+          assert(shared);
+          uint64_t key_num = 0;
+          bool ok = GetIntVal(key.ToString(), &key_num);
+          assert(ok);
+          (void)ok;
+          return !shared->AllowsOverwrite(key_num);
+        };
+  }
+#endif  // ROCKSDB_LITE
 };
 
 StressTest* CreateNonBatchedOpsStressTest() {
diff -pruN 7.2.2-5/debian/changelog 7.3.1-2/debian/changelog
--- 7.2.2-5/debian/changelog	2022-06-16 16:29:56.000000000 +0000
+++ 7.3.1-2/debian/changelog	2022-07-31 10:44:49.000000000 +0000
@@ -1,3 +1,17 @@
+rocksdb (7.3.1-2) unstable; urgency=medium
+
+  * Upload to Sid.
+
+ -- Laszlo Boszormenyi (GCS) <gcs@debian.org>  Sun, 31 Jul 2022 12:44:49 +0200
+
+rocksdb (7.3.1-1) experimental; urgency=medium
+
+  * New upstream release.
+  * Update patches.
+  * Library transition from librocksdb7.2 to librocksdb7.3 .
+
+ -- Laszlo Boszormenyi (GCS) <gcs@debian.org>  Sun, 19 Jun 2022 15:39:09 +0200
+
 rocksdb (7.2.2-5) unstable; urgency=medium
 
   * Build with LZ4 support (closes: #1012629, #1012804).
diff -pruN 7.2.2-5/debian/control 7.3.1-2/debian/control
--- 7.2.2-5/debian/control	2022-06-16 16:29:56.000000000 +0000
+++ 7.3.1-2/debian/control	2022-06-19 13:39:09.000000000 +0000
@@ -10,7 +10,7 @@ Homepage: https://rocksdb.org/
 Package: librocksdb-dev
 Section: libdevel
 Architecture: amd64 arm64 armel armhf ppc64el mips mipsel mips64el sparc64 s390x i386 riscv64
-Depends: ${misc:Depends}, librocksdb7.2 (= ${binary:Version}), libgflags-dev,
+Depends: ${misc:Depends}, librocksdb7.3 (= ${binary:Version}), libgflags-dev,
  libsnappy-dev, libbz2-dev, zlib1g-dev, liblz4-dev, libzstd-dev
 Description: persistent Key-Value Store for Flash and RAM Storage (development)
  C++ library providing an embedded key-value store, where keys and values are
@@ -36,7 +36,7 @@ Description: persistent Key-Value Store
  This package contains libraries and header files for developing
  applications that use librocksdb .
 
-Package: librocksdb7.2
+Package: librocksdb7.3
 Section: libs
 Architecture: amd64 arm64 armel armhf ppc64el mips mipsel mips64el sparc64 s390x i386 riscv64
 Depends: ${misc:Depends}, ${shlibs:Depends}
diff -pruN 7.2.2-5/debian/copyright 7.3.1-2/debian/copyright
--- 7.2.2-5/debian/copyright	2022-05-21 17:47:37.000000000 +0000
+++ 7.3.1-2/debian/copyright	2022-06-19 13:39:09.000000000 +0000
@@ -20,7 +20,7 @@ Files: debian/*
 Copyright: Copyright (C) 2015- Laszlo Boszormenyi (GCS) <gcs@debian.org>
 License: GPL-3+
 
-Files: debian/patches/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch debian/patches/ppc64.patch debian/patches/mips.patch debian/patches/arm.patch debian/patches/0001-replace-old-sync-with-new-atomic-builtin-equivalents.patch
+Files: debian/patches/ppc64.patch debian/patches/mips.patch debian/patches/arm.patch debian/patches/0001-replace-old-sync-with-new-atomic-builtin-equivalents.patch
 Copyright: Copyright (C) 2021 Khem Raj <raj.khem@gmail.com>
 License: GPL-2 or Apache-2.0
 
diff -pruN 7.2.2-5/debian/librocksdb7.2.install 7.3.1-2/debian/librocksdb7.2.install
--- 7.2.2-5/debian/librocksdb7.2.install	2021-10-16 17:07:15.000000000 +0000
+++ 7.3.1-2/debian/librocksdb7.2.install	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-usr/lib/${DEB_HOST_MULTIARCH}/lib*.so.*
diff -pruN 7.2.2-5/debian/librocksdb7.3.install 7.3.1-2/debian/librocksdb7.3.install
--- 7.2.2-5/debian/librocksdb7.3.install	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/debian/librocksdb7.3.install	2021-10-16 17:07:15.000000000 +0000
@@ -0,0 +1 @@
+usr/lib/${DEB_HOST_MULTIARCH}/lib*.so.*
diff -pruN 7.2.2-5/debian/librocksdb-dev.install 7.3.1-2/debian/librocksdb-dev.install
--- 7.2.2-5/debian/librocksdb-dev.install	2021-10-16 17:07:15.000000000 +0000
+++ 7.3.1-2/debian/librocksdb-dev.install	2022-06-19 13:39:09.000000000 +0000
@@ -2,3 +2,4 @@ usr/include/rocksdb/
 usr/lib/${DEB_HOST_MULTIARCH}/lib*.a
 usr/lib/${DEB_HOST_MULTIARCH}/lib*.so
 usr/lib/${DEB_HOST_MULTIARCH}/cmake/
+usr/lib/${DEB_HOST_MULTIARCH}/pkgconfig/
diff -pruN 7.2.2-5/debian/patches/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch 7.3.1-2/debian/patches/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch
--- 7.2.2-5/debian/patches/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch	2022-05-19 21:16:32.000000000 +0000
+++ 7.3.1-2/debian/patches/0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,44 +0,0 @@
-From 89c032a9b4011385c0b504ea61e5df0db71f0ff5 Mon Sep 17 00:00:00 2001
-From: Khem Raj <raj.khem@gmail.com>
-Date: Wed, 16 Jun 2021 19:06:02 -0700
-Subject: [PATCH] range_tree: Implement toku_time_now for rv32/rv64 in asm
-
-Upstream-Status: Pending
-Signed-off-by: Khem Raj <raj.khem@gmail.com>
----
- .../range_tree/lib/portability/toku_time.h    | 19 +++++++++++++++++++
- 1 file changed, 19 insertions(+)
-
-diff --git a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-index 4425a4a2e..4ac964f85 100644
---- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-+++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -137,6 +137,25 @@ static inline tokutime_t toku_time_now(v
-   uint64_t result;
-   asm volatile("stckf %0" : "=Q"(result) : : "cc");
-   return result;
-+#elif defined(__riscv) // RISC-V
-+#if __riscv_xlen == 32
-+  uint32_t lo, hi0, hi1;
-+  __asm __volatile__(
-+      "rdcycleh %0\n"
-+      "rdcycle %1\n"
-+      "rdcycleh %2\n"
-+      "sub %0, %0, %2\n"
-+      "seqz %0, %0\n"
-+      "sub %0, zero, %0\n"
-+      "and %1, %1, %0\n"
-+      : "=r"(hi0), "=r"(lo), "=r"(hi1));
-+  return ((uint64_t)hi1 << 32) | lo;
-+#else
-+  uint64_t result;
-+  __asm __volatile__("rdcycle %0" : "=r"(result));
-+  return result;
-+#endif
-+
- #else
- #error No timer implementation for this platform
- #endif
--- 
-2.32.0
-
diff -pruN 7.2.2-5/debian/patches/arm.patch 7.3.1-2/debian/patches/arm.patch
--- 7.2.2-5/debian/patches/arm.patch	2022-05-19 21:16:32.000000000 +0000
+++ 7.3.1-2/debian/patches/arm.patch	2022-06-19 13:39:09.000000000 +0000
@@ -3,7 +3,7 @@ implement timer for arm >= v6
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
 +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -165,6 +165,20 @@ static inline tokutime_t toku_time_now(v
+@@ -168,6 +168,20 @@ static inline tokutime_t toku_time_now(v
    struct timeval tv;
    gettimeofday(&tv, nullptr);
    return (uint64_t)tv.tv_sec * 1000000 + tv.tv_usec;
diff -pruN 7.2.2-5/debian/patches/mips.patch 7.3.1-2/debian/patches/mips.patch
--- 7.2.2-5/debian/patches/mips.patch	2022-05-19 21:16:32.000000000 +0000
+++ 7.3.1-2/debian/patches/mips.patch	2022-06-19 13:39:09.000000000 +0000
@@ -3,11 +3,10 @@ implement timer implementation for mips
 Signed-off-by: Khem Raj <raj.khem@gmail.com>
 --- a/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
 +++ b/utilities/transactions/lock/range/range_tree/lib/portability/toku_time.h
-@@ -159,7 +159,12 @@ static inline tokutime_t toku_time_now(v
-   __asm __volatile__("rdcycle %0" : "=r"(result));
-   return result;
- #endif
--
+@@ -162,6 +162,12 @@ static inline tokutime_t toku_time_now(v
+   uint64_t cycles;
+   asm volatile("rdcycle %0" : "=r"(cycles));
+   return cycles;
 +#elif defined(__mips__)
 +  // mips apparently only allows rdtsc for superusers, so we fall
 +  // back to gettimeofday.  It's possible clock_gettime would be better.
diff -pruN 7.2.2-5/debian/patches/series 7.3.1-2/debian/patches/series
--- 7.2.2-5/debian/patches/series	2022-06-13 18:45:01.000000000 +0000
+++ 7.3.1-2/debian/patches/series	2022-06-19 13:39:09.000000000 +0000
@@ -2,10 +2,9 @@ build_reproducible.patch
 fix_db_test.patch
 no_rpath.patch
 library_version.patch
-0001-range_tree-Implement-toku_time_now-for-rv32-rv64-in-.patch
 ppc64.patch
+armv7_support.patch
 mips.patch
 arm.patch
-armv7_support.patch
 0001-replace-old-sync-with-new-atomic-builtin-equivalents.patch
 rely-on-default-for-optimization-on-Power.patch
diff -pruN 7.2.2-5/debian/source/lintian-overrides 7.3.1-2/debian/source/lintian-overrides
--- 7.2.2-5/debian/source/lintian-overrides	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/debian/source/lintian-overrides	2022-07-31 10:44:49.000000000 +0000
@@ -0,0 +1 @@
+source-is-missing [docs/_includes/footer.html]
diff -pruN 7.2.2-5/docs/Gemfile.lock 7.3.1-2/docs/Gemfile.lock
--- 7.2.2-5/docs/Gemfile.lock	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/docs/Gemfile.lock	2022-06-08 21:08:16.000000000 +0000
@@ -232,7 +232,7 @@ GEM
       jekyll-seo-tag (~> 2.1)
     minitest (5.15.0)
     multipart-post (2.1.1)
-    nokogiri (1.13.4)
+    nokogiri (1.13.6)
       mini_portile2 (~> 2.8.0)
       racc (~> 1.4)
     octokit (4.22.0)
diff -pruN 7.2.2-5/env/composite_env.cc 7.3.1-2/env/composite_env.cc
--- 7.2.2-5/env/composite_env.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/composite_env.cc	2022-06-08 21:08:16.000000000 +0000
@@ -5,6 +5,7 @@
 //
 #include "env/composite_env_wrapper.h"
 #include "rocksdb/utilities/options_type.h"
+#include "util/string_util.h"
 
 namespace ROCKSDB_NAMESPACE {
 namespace {
@@ -382,19 +383,49 @@ Status CompositeEnv::NewDirectory(const
 }
 
 namespace {
-static std::unordered_map<std::string, OptionTypeInfo>
-    composite_env_wrapper_type_info = {
+static std::unordered_map<std::string, OptionTypeInfo> env_wrapper_type_info = {
 #ifndef ROCKSDB_LITE
-        {"target",
-         {0, OptionType::kCustomizable, OptionVerificationType::kByName,
-          OptionTypeFlags::kDontSerialize | OptionTypeFlags::kRawPointer,
-          [](const ConfigOptions& opts, const std::string& /*name*/,
-             const std::string& value, void* addr) {
-            auto target = static_cast<EnvWrapper::Target*>(addr);
-            return Env::CreateFromString(opts, value, &(target->env),
-                                         &(target->guard));
-          },
-          nullptr, nullptr}},
+    {"target",
+     OptionTypeInfo(0, OptionType::kUnknown, OptionVerificationType::kByName,
+                    OptionTypeFlags::kDontSerialize)
+         .SetParseFunc([](const ConfigOptions& opts,
+                          const std::string& /*name*/, const std::string& value,
+                          void* addr) {
+           auto target = static_cast<EnvWrapper::Target*>(addr);
+           return Env::CreateFromString(opts, value, &(target->env),
+                                        &(target->guard));
+         })
+         .SetEqualsFunc([](const ConfigOptions& opts,
+                           const std::string& /*name*/, const void* addr1,
+                           const void* addr2, std::string* mismatch) {
+           const auto target1 = static_cast<const EnvWrapper::Target*>(addr1);
+           const auto target2 = static_cast<const EnvWrapper::Target*>(addr2);
+           if (target1->env != nullptr) {
+             return target1->env->AreEquivalent(opts, target2->env, mismatch);
+           } else {
+             return (target2->env == nullptr);
+           }
+         })
+         .SetPrepareFunc([](const ConfigOptions& opts,
+                            const std::string& /*name*/, void* addr) {
+           auto target = static_cast<EnvWrapper::Target*>(addr);
+           if (target->guard.get() != nullptr) {
+             target->env = target->guard.get();
+           } else if (target->env == nullptr) {
+             target->env = Env::Default();
+           }
+           return target->env->PrepareOptions(opts);
+         })
+         .SetValidateFunc([](const DBOptions& db_opts,
+                             const ColumnFamilyOptions& cf_opts,
+                             const std::string& /*name*/, const void* addr) {
+           const auto target = static_cast<const EnvWrapper::Target*>(addr);
+           if (target->env == nullptr) {
+             return Status::InvalidArgument("Target Env not specified");
+           } else {
+             return target->env->ValidateOptions(db_opts, cf_opts);
+           }
+         })},
 #endif  // ROCKSDB_LITE
 };
 static std::unordered_map<std::string, OptionTypeInfo>
@@ -425,7 +456,7 @@ CompositeEnvWrapper::CompositeEnvWrapper
                                          const std::shared_ptr<FileSystem>& fs,
                                          const std::shared_ptr<SystemClock>& sc)
     : CompositeEnv(fs, sc), target_(env) {
-  RegisterOptions("", &target_, &composite_env_wrapper_type_info);
+  RegisterOptions("", &target_, &env_wrapper_type_info);
   RegisterOptions("", &file_system_, &composite_fs_wrapper_type_info);
   RegisterOptions("", &system_clock_, &composite_clock_wrapper_type_info);
 }
@@ -434,7 +465,7 @@ CompositeEnvWrapper::CompositeEnvWrapper
                                          const std::shared_ptr<FileSystem>& fs,
                                          const std::shared_ptr<SystemClock>& sc)
     : CompositeEnv(fs, sc), target_(env) {
-  RegisterOptions("", &target_, &composite_env_wrapper_type_info);
+  RegisterOptions("", &target_, &env_wrapper_type_info);
   RegisterOptions("", &file_system_, &composite_fs_wrapper_type_info);
   RegisterOptions("", &system_clock_, &composite_clock_wrapper_type_info);
 }
@@ -461,4 +492,46 @@ std::string CompositeEnvWrapper::Seriali
   return options;
 }
 #endif  // ROCKSDB_LITE
+
+EnvWrapper::EnvWrapper(Env* t) : target_(t) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::EnvWrapper(std::unique_ptr<Env>&& t) : target_(std::move(t)) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::EnvWrapper(const std::shared_ptr<Env>& t) : target_(t) {
+  RegisterOptions("", &target_, &env_wrapper_type_info);
+}
+
+EnvWrapper::~EnvWrapper() {}
+
+Status EnvWrapper::PrepareOptions(const ConfigOptions& options) {
+  target_.Prepare();
+  return Env::PrepareOptions(options);
+}
+
+#ifndef ROCKSDB_LITE
+std::string EnvWrapper::SerializeOptions(const ConfigOptions& config_options,
+                                         const std::string& header) const {
+  auto parent = Env::SerializeOptions(config_options, "");
+  if (config_options.IsShallow() || target_.env == nullptr ||
+      target_.env == Env::Default()) {
+    return parent;
+  } else {
+    std::string result = header;
+    if (!StartsWith(parent, OptionTypeInfo::kIdPropName())) {
+      result.append(OptionTypeInfo::kIdPropName()).append("=");
+    }
+    result.append(parent);
+    if (!EndsWith(result, config_options.delimiter)) {
+      result.append(config_options.delimiter);
+    }
+    result.append("target=").append(target_.env->ToString(config_options));
+    return result;
+  }
+}
+#endif  // ROCKSDB_LITE
+
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/env/env.cc 7.3.1-2/env/env.cc
--- 7.2.2-5/env/env.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/env.cc	2022-06-08 21:08:16.000000000 +0000
@@ -26,7 +26,6 @@
 #include "rocksdb/utilities/object_registry.h"
 #include "rocksdb/utilities/options_type.h"
 #include "util/autovector.h"
-#include "util/string_util.h"
 
 namespace ROCKSDB_NAMESPACE {
 namespace {
@@ -1084,65 +1083,6 @@ Status ReadFileToString(Env* env, const
   return ReadFileToString(fs.get(), fname, data);
 }
 
-namespace {
-static std::unordered_map<std::string, OptionTypeInfo> env_wrapper_type_info = {
-#ifndef ROCKSDB_LITE
-    {"target",
-     {0, OptionType::kCustomizable, OptionVerificationType::kByName,
-      OptionTypeFlags::kDontSerialize | OptionTypeFlags::kRawPointer,
-      [](const ConfigOptions& opts, const std::string& /*name*/,
-         const std::string& value, void* addr) {
-        EnvWrapper::Target* target = static_cast<EnvWrapper::Target*>(addr);
-        return Env::CreateFromString(opts, value, &(target->env),
-                                     &(target->guard));
-      },
-      nullptr, nullptr}},
-#endif  // ROCKSDB_LITE
-};
-}  // namespace
-
-EnvWrapper::EnvWrapper(Env* t) : target_(t) {
-  RegisterOptions("", &target_, &env_wrapper_type_info);
-}
-
-EnvWrapper::EnvWrapper(std::unique_ptr<Env>&& t) : target_(std::move(t)) {
-  RegisterOptions("", &target_, &env_wrapper_type_info);
-}
-
-EnvWrapper::EnvWrapper(const std::shared_ptr<Env>& t) : target_(t) {
-  RegisterOptions("", &target_, &env_wrapper_type_info);
-}
-
-EnvWrapper::~EnvWrapper() {
-}
-
-Status EnvWrapper::PrepareOptions(const ConfigOptions& options) {
-  target_.Prepare();
-  return Env::PrepareOptions(options);
-}
-
-#ifndef ROCKSDB_LITE
-std::string EnvWrapper::SerializeOptions(const ConfigOptions& config_options,
-                                         const std::string& header) const {
-  auto parent = Env::SerializeOptions(config_options, "");
-  if (config_options.IsShallow() || target_.env == nullptr ||
-      target_.env == Env::Default()) {
-    return parent;
-  } else {
-    std::string result = header;
-    if (!StartsWith(parent, OptionTypeInfo::kIdPropName())) {
-      result.append(OptionTypeInfo::kIdPropName()).append("=");
-    }
-    result.append(parent);
-    if (!EndsWith(result, config_options.delimiter)) {
-      result.append(config_options.delimiter);
-    }
-    result.append("target=").append(target_.env->ToString(config_options));
-    return result;
-  }
-}
-#endif  // ROCKSDB_LITE
-
 namespace {  // anonymous namespace
 
 void AssignEnvOptions(EnvOptions* env_options, const DBOptions& options) {
diff -pruN 7.2.2-5/env/env_encryption.cc 7.3.1-2/env/env_encryption.cc
--- 7.2.2-5/env/env_encryption.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/env_encryption.cc	2022-06-08 21:08:16.000000000 +0000
@@ -1139,6 +1139,15 @@ CTREncryptionProvider::CTREncryptionProv
   RegisterOptions("Cipher", &cipher_, &ctr_encryption_provider_type_info);
 }
 
+bool CTREncryptionProvider::IsInstanceOf(const std::string& name) const {
+  // Special case for test purposes.
+  if (name == "1://test" && cipher_ != nullptr) {
+    return cipher_->IsInstanceOf(ROT13BlockCipher::kClassName());
+  } else {
+    return EncryptionProvider::IsInstanceOf(name);
+  }
+}
+
 // GetPrefixLength returns the length of the prefix that is added to every file
 // and used for storing encryption options.
 // For optimal performance, the prefix length should be a multiple of
diff -pruN 7.2.2-5/env/env_encryption_ctr.h 7.3.1-2/env/env_encryption_ctr.h
--- 7.2.2-5/env/env_encryption_ctr.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/env_encryption_ctr.h	2022-06-08 21:08:16.000000000 +0000
@@ -66,7 +66,7 @@ class CTREncryptionProvider : public Enc
 
   static const char* kClassName() { return "CTR"; }
   const char* Name() const override { return kClassName(); }
-
+  bool IsInstanceOf(const std::string& name) const override;
   // GetPrefixLength returns the length of the prefix that is added to every
   // file
   // and used for storing encryption options.
diff -pruN 7.2.2-5/env/env_posix.cc 7.3.1-2/env/env_posix.cc
--- 7.2.2-5/env/env_posix.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/env_posix.cc	2022-06-08 21:08:16.000000000 +0000
@@ -130,12 +130,12 @@ class PosixDynamicLibrary : public Dynam
 class PosixClock : public SystemClock {
  public:
   static const char* kClassName() { return "PosixClock"; }
-  const char* Name() const override { return kClassName(); }
-  const char* NickName() const override { return kDefaultName(); }
+  const char* Name() const override { return kDefaultName(); }
+  const char* NickName() const override { return kClassName(); }
 
   uint64_t NowMicros() override {
-    struct timeval tv;
-    gettimeofday(&tv, nullptr);
+    port::TimeVal tv;
+    port::GetTimeOfDay(&tv, nullptr);
     return static_cast<uint64_t>(tv.tv_sec) * 1000000 + tv.tv_usec;
   }
 
@@ -200,7 +200,7 @@ class PosixClock : public SystemClock {
     dummy.reserve(maxsize);
     dummy.resize(maxsize);
     char* p = &dummy[0];
-    localtime_r(&seconds, &t);
+    port::LocalTimeR(&seconds, &t);
     snprintf(p, maxsize, "%04d/%02d/%02d-%02d:%02d:%02d ", t.tm_year + 1900,
              t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min, t.tm_sec);
     return dummy;
@@ -488,6 +488,7 @@ Env* Env::Default() {
   CompressionContextCache::InitSingleton();
   INIT_SYNC_POINT_SINGLETONS();
   // ~PosixEnv must be called on exit
+  //**TODO: Can we make this a STATIC_AVOID_DESTRUCTION?
   static PosixEnv default_env;
   return &default_env;
 }
@@ -496,9 +497,9 @@ Env* Env::Default() {
 // Default Posix SystemClock
 //
 const std::shared_ptr<SystemClock>& SystemClock::Default() {
-  static std::shared_ptr<SystemClock> default_clock =
-      std::make_shared<PosixClock>();
-  return default_clock;
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<SystemClock>, instance)
+  (std::make_shared<PosixClock>());
+  return instance;
 }
 }  // namespace ROCKSDB_NAMESPACE
 
diff -pruN 7.2.2-5/env/env_test.cc 7.3.1-2/env/env_test.cc
--- 7.2.2-5/env/env_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/env_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -44,6 +44,7 @@
 #include "env/unique_id_gen.h"
 #include "logging/log_buffer.h"
 #include "logging/logging.h"
+#include "options/options_helper.h"
 #include "port/malloc.h"
 #include "port/port.h"
 #include "port/stack_trace.h"
@@ -1153,7 +1154,7 @@ TEST_P(EnvPosixTestWithParam, RandomAcce
     IoctlFriendlyTmpdir ift;
     std::vector<std::string> fnames;
     for (int i = 0; i < 1000; ++i) {
-      fnames.push_back(ift.name() + "/" + "testfile" + ToString(i));
+      fnames.push_back(ift.name() + "/" + "testfile" + std::to_string(i));
 
       // Create file.
       std::unique_ptr<WritableFile> wfile;
@@ -1603,9 +1604,9 @@ class TestLogger : public Logger {
 
       if (new_format[0] == '[') {
         // "[DEBUG] "
-        ASSERT_TRUE(n <= 56 + (512 - static_cast<int>(sizeof(struct timeval))));
+        ASSERT_TRUE(n <= 56 + (512 - static_cast<int>(sizeof(port::TimeVal))));
       } else {
-        ASSERT_TRUE(n <= 48 + (512 - static_cast<int>(sizeof(struct timeval))));
+        ASSERT_TRUE(n <= 48 + (512 - static_cast<int>(sizeof(port::TimeVal))));
       }
       va_end(backup_ap);
     }
@@ -1673,9 +1674,9 @@ class TestLogger2 : public Logger {
       va_copy(backup_ap, ap);
       int n = vsnprintf(new_format, sizeof(new_format) - 1, format, backup_ap);
       // 48 bytes for extra information + bytes allocated
-      ASSERT_TRUE(
-          n <= 48 + static_cast<int>(max_log_size_ - sizeof(struct timeval)));
-      ASSERT_TRUE(n > static_cast<int>(max_log_size_ - sizeof(struct timeval)));
+      ASSERT_TRUE(n <=
+                  48 + static_cast<int>(max_log_size_ - sizeof(port::TimeVal)));
+      ASSERT_TRUE(n > static_cast<int>(max_log_size_ - sizeof(port::TimeVal)));
       va_end(backup_ap);
     }
   }
@@ -2937,7 +2938,7 @@ TEST_F(EnvTest, FailureToCreateLockFile)
   ASSERT_OK(DestroyDir(env, dir));
 }
 
-TEST_F(EnvTest, CreateDefaultEnv) {
+TEST_F(CreateEnvTest, CreateDefaultEnv) {
   ConfigOptions options;
   options.ignore_unsupported_options = false;
 
@@ -2989,7 +2990,7 @@ class WrappedEnv : public EnvWrapper {
   }
 };
 }  // namespace
-TEST_F(EnvTest, CreateMockEnv) {
+TEST_F(CreateEnvTest, CreateMockEnv) {
   ConfigOptions options;
   options.ignore_unsupported_options = false;
   WrappedEnv::Register(*(options.registry->AddLibrary("test")), "");
@@ -3017,7 +3018,7 @@ TEST_F(EnvTest, CreateMockEnv) {
   opt_str = copy->ToString(options);
 }
 
-TEST_F(EnvTest, CreateWrappedEnv) {
+TEST_F(CreateEnvTest, CreateWrappedEnv) {
   ConfigOptions options;
   options.ignore_unsupported_options = false;
   WrappedEnv::Register(*(options.registry->AddLibrary("test")), "");
@@ -3054,7 +3055,7 @@ TEST_F(EnvTest, CreateWrappedEnv) {
   ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch));
 }
 
-TEST_F(EnvTest, CreateCompositeEnv) {
+TEST_F(CreateEnvTest, CreateCompositeEnv) {
   ConfigOptions options;
   options.ignore_unsupported_options = false;
   std::shared_ptr<Env> guard, copy;
@@ -3109,6 +3110,18 @@ TEST_F(EnvTest, CreateCompositeEnv) {
   ASSERT_NE(env, nullptr);
   ASSERT_NE(env, Env::Default());
   ASSERT_TRUE(guard->AreEquivalent(options, copy.get(), &mismatch));
+
+  guard.reset(new CompositeEnvWrapper(nullptr, timed_fs, clock));
+  ColumnFamilyOptions cf_opts;
+  DBOptions db_opts;
+  db_opts.env = guard.get();
+  auto comp = db_opts.env->CheckedCast<CompositeEnvWrapper>();
+  ASSERT_NE(comp, nullptr);
+  ASSERT_EQ(comp->Inner(), nullptr);
+  ASSERT_NOK(ValidateOptions(db_opts, cf_opts));
+  ASSERT_OK(db_opts.env->PrepareOptions(options));
+  ASSERT_NE(comp->Inner(), nullptr);
+  ASSERT_OK(ValidateOptions(db_opts, cf_opts));
 }
 #endif  // ROCKSDB_LITE
 
diff -pruN 7.2.2-5/env/file_system_tracer.cc 7.3.1-2/env/file_system_tracer.cc
--- 7.2.2-5/env/file_system_tracer.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/file_system_tracer.cc	2022-06-08 21:08:16.000000000 +0000
@@ -338,6 +338,51 @@ IOStatus FSRandomAccessFileTracingWrappe
   return s;
 }
 
+IOStatus FSRandomAccessFileTracingWrapper::ReadAsync(
+    FSReadRequest& req, const IOOptions& opts,
+    std::function<void(const FSReadRequest&, void*)> cb, void* cb_arg,
+    void** io_handle, IOHandleDeleter* del_fn, IODebugContext* dbg) {
+  // Create a callback and populate info.
+  auto read_async_callback =
+      std::bind(&FSRandomAccessFileTracingWrapper::ReadAsyncCallback, this,
+                std::placeholders::_1, std::placeholders::_2);
+  ReadAsyncCallbackInfo* read_async_cb_info = new ReadAsyncCallbackInfo;
+  read_async_cb_info->cb_ = cb;
+  read_async_cb_info->cb_arg_ = cb_arg;
+  read_async_cb_info->start_time_ = clock_->NowNanos();
+  read_async_cb_info->file_op_ = __func__;
+
+  IOStatus s = target()->ReadAsync(req, opts, read_async_callback,
+                                   read_async_cb_info, io_handle, del_fn, dbg);
+
+  if (!s.ok()) {
+    delete read_async_cb_info;
+  }
+  return s;
+}
+
+void FSRandomAccessFileTracingWrapper::ReadAsyncCallback(
+    const FSReadRequest& req, void* cb_arg) {
+  ReadAsyncCallbackInfo* read_async_cb_info =
+      static_cast<ReadAsyncCallbackInfo*>(cb_arg);
+  assert(read_async_cb_info);
+  assert(read_async_cb_info->cb_);
+
+  uint64_t elapsed = clock_->NowNanos() - read_async_cb_info->start_time_;
+  uint64_t io_op_data = 0;
+  io_op_data |= (1 << IOTraceOp::kIOLen);
+  io_op_data |= (1 << IOTraceOp::kIOOffset);
+  IOTraceRecord io_record(clock_->NowNanos(), TraceType::kIOTracer, io_op_data,
+                          read_async_cb_info->file_op_, elapsed,
+                          req.status.ToString(), file_name_, req.result.size(),
+                          req.offset);
+  io_tracer_->WriteIOOp(io_record, nullptr /*dbg*/);
+
+  // call the underlying callback.
+  read_async_cb_info->cb_(req, read_async_cb_info->cb_arg_);
+  delete read_async_cb_info;
+}
+
 IOStatus FSWritableFileTracingWrapper::Append(const Slice& data,
                                               const IOOptions& options,
                                               IODebugContext* dbg) {
diff -pruN 7.2.2-5/env/file_system_tracer.h 7.3.1-2/env/file_system_tracer.h
--- 7.2.2-5/env/file_system_tracer.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/file_system_tracer.h	2022-06-08 21:08:16.000000000 +0000
@@ -228,11 +228,25 @@ class FSRandomAccessFileTracingWrapper :
 
   IOStatus InvalidateCache(size_t offset, size_t length) override;
 
+  IOStatus ReadAsync(FSReadRequest& req, const IOOptions& opts,
+                     std::function<void(const FSReadRequest&, void*)> cb,
+                     void* cb_arg, void** io_handle, IOHandleDeleter* del_fn,
+                     IODebugContext* dbg) override;
+
+  void ReadAsyncCallback(const FSReadRequest& req, void* cb_arg);
+
  private:
   std::shared_ptr<IOTracer> io_tracer_;
   SystemClock* clock_;
   // Stores file name instead of full path.
   std::string file_name_;
+
+  struct ReadAsyncCallbackInfo {
+    uint64_t start_time_;
+    std::function<void(const FSReadRequest&, void*)> cb_;
+    void* cb_arg_;
+    std::string file_op_;
+  };
 };
 
 // The FSRandomAccessFilePtr is a wrapper class that takes pointer to storage
diff -pruN 7.2.2-5/env/fs_posix.cc 7.3.1-2/env/fs_posix.cc
--- 7.2.2-5/env/fs_posix.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/fs_posix.cc	2022-06-08 21:08:16.000000000 +0000
@@ -51,6 +51,7 @@
 #include "logging/posix_logger.h"
 #include "monitoring/iostats_context_imp.h"
 #include "monitoring/thread_status_updater.h"
+#include "port/lang.h"
 #include "port/port.h"
 #include "rocksdb/options.h"
 #include "rocksdb/slice.h"
@@ -146,6 +147,13 @@ class PosixFileSystem : public FileSyste
   const char* NickName() const override { return kDefaultName(); }
 
   ~PosixFileSystem() override {}
+  bool IsInstanceOf(const std::string& name) const override {
+    if (name == "posix") {
+      return true;
+    } else {
+      return FileSystem::IsInstanceOf(name);
+    }
+  }
 
   void SetFD_CLOEXEC(int fd, const EnvOptions* options) {
     if ((options == nullptr || options->set_fd_cloexec) && fd > 0) {
@@ -606,8 +614,7 @@ class PosixFileSystem : public FileSyste
         return IOStatus::NotFound();
       default:
         assert(err == EIO || err == ENOMEM);
-        return IOStatus::IOError("Unexpected error(" +
-                                 ROCKSDB_NAMESPACE::ToString(err) +
+        return IOStatus::IOError("Unexpected error(" + std::to_string(err) +
                                  ") accessing file `" + fname + "' ");
     }
   }
@@ -810,12 +817,11 @@ class PosixFileSystem : public FileSyste
       errno = ENOLCK;
       // Note that the thread ID printed is the same one as the one in
       // posix logger, but posix logger prints it hex format.
-      return IOError(
-          "lock hold by current process, acquire time " +
-              ROCKSDB_NAMESPACE::ToString(prev_info.acquire_time) +
-              " acquiring thread " +
-              ROCKSDB_NAMESPACE::ToString(prev_info.acquiring_thread),
-          fname, errno);
+      return IOError("lock hold by current process, acquire time " +
+                         std::to_string(prev_info.acquire_time) +
+                         " acquiring thread " +
+                         std::to_string(prev_info.acquiring_thread),
+                     fname, errno);
     }
 
     IOStatus result = IOStatus::OK();
@@ -1205,10 +1211,9 @@ PosixFileSystem::PosixFileSystem()
 // Default Posix FileSystem
 //
 std::shared_ptr<FileSystem> FileSystem::Default() {
-  static PosixFileSystem default_fs;
-  static std::shared_ptr<PosixFileSystem> default_fs_ptr(
-      &default_fs, [](PosixFileSystem*) {});
-  return default_fs_ptr;
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<FileSystem>, instance)
+  (std::make_shared<PosixFileSystem>());
+  return instance;
 }
 
 #ifndef ROCKSDB_LITE
diff -pruN 7.2.2-5/env/io_posix.cc 7.3.1-2/env/io_posix.cc
--- 7.2.2-5/env/io_posix.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/io_posix.cc	2022-06-08 21:08:16.000000000 +0000
@@ -284,9 +284,9 @@ IOStatus PosixSequentialFile::Positioned
   }
   if (r < 0) {
     // An error: return a non-ok status
-    s = IOError(
-        "While pread " + ToString(n) + " bytes from offset " + ToString(offset),
-        filename_, errno);
+    s = IOError("While pread " + std::to_string(n) + " bytes from offset " +
+                    std::to_string(offset),
+                filename_, errno);
   }
   *result = Slice(scratch, (r < 0) ? 0 : n - left);
   return s;
@@ -294,8 +294,8 @@ IOStatus PosixSequentialFile::Positioned
 
 IOStatus PosixSequentialFile::Skip(uint64_t n) {
   if (fseek(file_, static_cast<long int>(n), SEEK_CUR)) {
-    return IOError("While fseek to skip " + ToString(n) + " bytes", filename_,
-                   errno);
+    return IOError("While fseek to skip " + std::to_string(n) + " bytes",
+                   filename_, errno);
   }
   return IOStatus::OK();
 }
@@ -310,8 +310,9 @@ IOStatus PosixSequentialFile::Invalidate
     // free OS pages
     int ret = Fadvise(fd_, offset, length, POSIX_FADV_DONTNEED);
     if (ret != 0) {
-      return IOError("While fadvise NotNeeded offset " + ToString(offset) +
-                         " len " + ToString(length),
+      return IOError("While fadvise NotNeeded offset " +
+                         std::to_string(offset) + " len " +
+                         std::to_string(length),
                      filename_, errno);
     }
   }
@@ -596,9 +597,9 @@ IOStatus PosixRandomAccessFile::Read(uin
   }
   if (r < 0) {
     // An error: return a non-ok status
-    s = IOError(
-        "While pread offset " + ToString(offset) + " len " + ToString(n),
-        filename_, errno);
+    s = IOError("While pread offset " + std::to_string(offset) + " len " +
+                    std::to_string(n),
+                filename_, errno);
   }
   *result = Slice(scratch, (r < 0) ? 0 : n - left);
   return s;
@@ -704,8 +705,8 @@ IOStatus PosixRandomAccessFile::MultiRea
         }
       }
       return IOStatus::IOError("io_uring_submit_and_wait() requested " +
-                               ToString(this_reqs) + " but returned " +
-                               ToString(ret));
+                               std::to_string(this_reqs) + " but returned " +
+                               std::to_string(ret));
     }
 
     for (size_t i = 0; i < this_reqs; i++) {
@@ -718,7 +719,8 @@ IOStatus PosixRandomAccessFile::MultiRea
       TEST_SYNC_POINT_CALLBACK(
           "PosixRandomAccessFile::MultiRead:io_uring_wait_cqe:return", &ret);
       if (ret) {
-        ios = IOStatus::IOError("io_uring_wait_cqe() returns " + ToString(ret));
+        ios = IOStatus::IOError("io_uring_wait_cqe() returns " +
+                                std::to_string(ret));
 
         if (cqe != nullptr) {
           io_uring_cqe_seen(iu, cqe);
@@ -738,7 +740,7 @@ IOStatus PosixRandomAccessFile::MultiRea
                 req_wrap);
         port::PrintStack();
         ios = IOStatus::IOError("io_uring_cqe_get_data() returned " +
-                                ToString((uint64_t)req_wrap));
+                                std::to_string((uint64_t)req_wrap));
         continue;
       }
       wrap_cache.erase(wrap_check);
@@ -801,8 +803,8 @@ IOStatus PosixRandomAccessFile::Prefetch
     r = fcntl(fd_, F_RDADVISE, &advice);
 #endif
     if (r == -1) {
-      s = IOError("While prefetching offset " + ToString(offset) + " len " +
-                      ToString(n),
+      s = IOError("While prefetching offset " + std::to_string(offset) +
+                      " len " + std::to_string(n),
                   filename_, errno);
     }
   }
@@ -855,8 +857,8 @@ IOStatus PosixRandomAccessFile::Invalida
   if (ret == 0) {
     return IOStatus::OK();
   }
-  return IOError("While fadvise NotNeeded offset " + ToString(offset) +
-                     " len " + ToString(length),
+  return IOError("While fadvise NotNeeded offset " + std::to_string(offset) +
+                     " len " + std::to_string(length),
                  filename_, errno);
 #endif
 }
@@ -922,7 +924,7 @@ IOStatus PosixRandomAccessFile::ReadAsyn
   if (ret < 0) {
     fprintf(stderr, "io_uring_submit error: %ld\n", long(ret));
     return IOStatus::IOError("io_uring_submit() requested but returned " +
-                             ToString(ret));
+                             std::to_string(ret));
   }
   return IOStatus::OK();
 #else
@@ -970,8 +972,8 @@ IOStatus PosixMmapReadableFile::Read(uin
   IOStatus s;
   if (offset > length_) {
     *result = Slice();
-    return IOError("While mmap read offset " + ToString(offset) +
-                       " larger than file length " + ToString(length_),
+    return IOError("While mmap read offset " + std::to_string(offset) +
+                       " larger than file length " + std::to_string(length_),
                    filename_, EINVAL);
   } else if (offset + n > length_) {
     n = static_cast<size_t>(length_ - offset);
@@ -991,8 +993,8 @@ IOStatus PosixMmapReadableFile::Invalida
   if (ret == 0) {
     return IOStatus::OK();
   }
-  return IOError("While fadvise not needed. Offset " + ToString(offset) +
-                     " len" + ToString(length),
+  return IOError("While fadvise not needed. Offset " + std::to_string(offset) +
+                     " len" + std::to_string(length),
                  filename_, errno);
 #endif
 }
@@ -1244,9 +1246,9 @@ IOStatus PosixMmapFile::Allocate(uint64_
   if (alloc_status == 0) {
     return IOStatus::OK();
   } else {
-    return IOError(
-        "While fallocate offset " + ToString(offset) + " len " + ToString(len),
-        filename_, errno);
+    return IOError("While fallocate offset " + std::to_string(offset) +
+                       " len " + std::to_string(len),
+                   filename_, errno);
   }
 }
 #endif
@@ -1311,7 +1313,7 @@ IOStatus PosixWritableFile::PositionedAp
   const char* src = data.data();
   size_t nbytes = data.size();
   if (!PosixPositionedWrite(fd_, src, nbytes, static_cast<off_t>(offset))) {
-    return IOError("While pwrite to file at offset " + ToString(offset),
+    return IOError("While pwrite to file at offset " + std::to_string(offset),
                    filename_, errno);
   }
   filesize_ = offset + nbytes;
@@ -1323,8 +1325,8 @@ IOStatus PosixWritableFile::Truncate(uin
   IOStatus s;
   int r = ftruncate(fd_, size);
   if (r < 0) {
-    s = IOError("While ftruncate file to size " + ToString(size), filename_,
-                errno);
+    s = IOError("While ftruncate file to size " + std::to_string(size),
+                filename_, errno);
   } else {
     filesize_ = size;
   }
@@ -1481,9 +1483,9 @@ IOStatus PosixWritableFile::Allocate(uin
   if (alloc_status == 0) {
     return IOStatus::OK();
   } else {
-    return IOError(
-        "While fallocate offset " + ToString(offset) + " len " + ToString(len),
-        filename_, errno);
+    return IOError("While fallocate offset " + std::to_string(offset) +
+                       " len " + std::to_string(len),
+                   filename_, errno);
   }
 }
 #endif
@@ -1508,7 +1510,7 @@ IOStatus PosixWritableFile::RangeSync(ui
                             static_cast<off_t>(nbytes), SYNC_FILE_RANGE_WRITE);
     }
     if (ret != 0) {
-      return IOError("While sync_file_range returned " + ToString(ret),
+      return IOError("While sync_file_range returned " + std::to_string(ret),
                      filename_, errno);
     }
     return IOStatus::OK();
@@ -1544,9 +1546,9 @@ IOStatus PosixRandomRWFile::Write(uint64
   const char* src = data.data();
   size_t nbytes = data.size();
   if (!PosixPositionedWrite(fd_, src, nbytes, static_cast<off_t>(offset))) {
-    return IOError(
-        "While write random read/write file at offset " + ToString(offset),
-        filename_, errno);
+    return IOError("While write random read/write file at offset " +
+                       std::to_string(offset),
+                   filename_, errno);
   }
 
   return IOStatus::OK();
@@ -1566,7 +1568,7 @@ IOStatus PosixRandomRWFile::Read(uint64_
         continue;
       }
       return IOError("While reading random read/write file offset " +
-                         ToString(offset) + " len " + ToString(n),
+                         std::to_string(offset) + " len " + std::to_string(n),
                      filename_, errno);
     } else if (done == 0) {
       // Nothing more to read
diff -pruN 7.2.2-5/env/mock_env.cc 7.3.1-2/env/mock_env.cc
--- 7.2.2-5/env/mock_env.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/env/mock_env.cc	2022-06-08 21:08:16.000000000 +0000
@@ -509,13 +509,13 @@ class TestMemLogger : public Logger {
       char* p = base;
       char* limit = base + bufsize;
 
-      struct timeval now_tv;
-      gettimeofday(&now_tv, nullptr);
+      port::TimeVal now_tv;
+      port::GetTimeOfDay(&now_tv, nullptr);
       const time_t seconds = now_tv.tv_sec;
       struct tm t;
       memset(&t, 0, sizeof(t));
       struct tm* ret __attribute__((__unused__));
-      ret = localtime_r(&seconds, &t);
+      ret = port::LocalTimeR(&seconds, &t);
       assert(ret);
       p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d ",
                     t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
diff -pruN 7.2.2-5/file/delete_scheduler_test.cc 7.3.1-2/file/delete_scheduler_test.cc
--- 7.2.2-5/file/delete_scheduler_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/delete_scheduler_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -30,7 +30,7 @@ class DeleteSchedulerTest : public testi
     for (size_t i = 0; i < kNumDataDirs; ++i) {
       dummy_files_dirs_.emplace_back(
           test::PerThreadDBPath(env_, "delete_scheduler_dummy_data_dir") +
-          ToString(i));
+          std::to_string(i));
       DestroyAndCreateDir(dummy_files_dirs_.back());
     }
     stats_ = ROCKSDB_NAMESPACE::CreateDBStatistics();
@@ -153,7 +153,7 @@ TEST_F(DeleteSchedulerTest, BasicRateLim
     // Create 100 dummy files, every file is 1 Kb
     std::vector<std::string> generated_files;
     for (int i = 0; i < num_files; i++) {
-      std::string file_name = "file" + ToString(i) + ".data";
+      std::string file_name = "file" + std::to_string(i) + ".data";
       generated_files.push_back(NewDummyFile(file_name, file_size));
     }
 
@@ -265,7 +265,7 @@ TEST_F(DeleteSchedulerTest, RateLimiting
     // Create 100 dummy files, every file is 1 Kb
     std::vector<std::string> generated_files;
     for (int i = 0; i < num_files * thread_cnt; i++) {
-      std::string file_name = "file" + ToString(i) + ".data";
+      std::string file_name = "file" + std::to_string(i) + ".data";
       generated_files.push_back(NewDummyFile(file_name, file_size));
     }
 
@@ -405,7 +405,7 @@ TEST_F(DeleteSchedulerTest, BackgroundEr
 
   // Generate 10 dummy files and move them to trash
   for (int i = 0; i < 10; i++) {
-    std::string file_name = "data_" + ToString(i) + ".data";
+    std::string file_name = "data_" + std::to_string(i) + ".data";
     ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), ""));
   }
   ASSERT_EQ(CountNormalFiles(), 0);
@@ -415,7 +415,7 @@ TEST_F(DeleteSchedulerTest, BackgroundEr
   // BackgroundEmptyTrash since we already deleted the files it was
   // goind to delete
   for (int i = 0; i < 10; i++) {
-    std::string file_name = "data_" + ToString(i) + ".data.trash";
+    std::string file_name = "data_" + std::to_string(i) + ".data.trash";
     ASSERT_OK(env_->DeleteFile(dummy_files_dirs_[0] + "/" + file_name));
   }
 
@@ -455,7 +455,7 @@ TEST_F(DeleteSchedulerTest, StartBGEmpty
   for (int run = 1; run <= 5; run++) {
     // Generate kTestFileNum dummy files and move them to trash
     for (int i = 0; i < kTestFileNum; i++) {
-      std::string file_name = "data_" + ToString(i) + ".data";
+      std::string file_name = "data_" + std::to_string(i) + ".data";
       ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), ""));
     }
     ASSERT_EQ(CountNormalFiles(), 0);
@@ -555,7 +555,7 @@ TEST_F(DeleteSchedulerTest, DestructorWi
   NewDeleteScheduler();
 
   for (int i = 0; i < 100; i++) {
-    std::string file_name = "data_" + ToString(i) + ".data";
+    std::string file_name = "data_" + std::to_string(i) + ".data";
     ASSERT_OK(delete_scheduler_->DeleteFile(NewDummyFile(file_name), ""));
   }
 
@@ -610,7 +610,7 @@ TEST_F(DeleteSchedulerTest, DISABLED_Dyn
     // Create 100 dummy files, every file is 1 Kb
     std::vector<std::string> generated_files;
     for (int i = 0; i < num_files; i++) {
-      std::string file_name = "file" + ToString(i) + ".data";
+      std::string file_name = "file" + std::to_string(i) + ".data";
       generated_files.push_back(NewDummyFile(file_name, file_size));
     }
 
@@ -671,7 +671,7 @@ TEST_F(DeleteSchedulerTest, ImmediateDel
 
   std::vector<std::string> generated_files;
   for (int i = 0; i < num_files; i++) {
-    std::string file_name = "file" + ToString(i) + ".data";
+    std::string file_name = "file" + std::to_string(i) + ".data";
     generated_files.push_back(NewDummyFile(file_name, file_size));
   }
 
diff -pruN 7.2.2-5/file/file_prefetch_buffer.cc 7.3.1-2/file/file_prefetch_buffer.cc
--- 7.2.2-5/file/file_prefetch_buffer.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/file_prefetch_buffer.cc	2022-06-08 21:08:16.000000000 +0000
@@ -194,34 +194,7 @@ void FilePrefetchBuffer::CopyDataToBuffe
   }
 }
 
-// If async_read = true:
-// async_read is enabled in case of sequential reads. So when
-// buffers are switched, we clear the curr_ buffer as we assume the data has
-// been consumed because of sequential reads.
-//
-// Scenarios for prefetching asynchronously:
-// Case1: If both buffers are empty, prefetch n bytes
-//        synchronously in curr_
-//        and prefetch readahead_size_/2 async in second buffer.
-// Case2: If second buffer has partial or full data, make it current and
-//        prefetch readahead_size_/2 async in second buffer. In case of
-//        partial data, prefetch remaining bytes from size n synchronously to
-//        fulfill the requested bytes request.
-// Case3: If curr_ has partial data, prefetch remaining bytes from size n
-//        synchronously in curr_ to fulfill the requested bytes request and
-//        prefetch readahead_size_/2 bytes async in second buffer.
-// Case4: If data is in both buffers, copy requested data from curr_ and second
-//        buffer to third buffer. If all requested bytes have been copied, do
-//        the asynchronous prefetching in second buffer.
-Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
-                                         RandomAccessFileReader* reader,
-                                         uint64_t offset, size_t length,
-                                         size_t readahead_size,
-                                         Env::IOPriority rate_limiter_priority,
-                                         bool& copy_to_third_buffer) {
-  if (!enable_) {
-    return Status::OK();
-  }
+void FilePrefetchBuffer::PollAndUpdateBuffersIfNeeded(uint64_t offset) {
   if (async_read_in_progress_ && fs_ != nullptr) {
     // Wait for prefetch data to complete.
     // No mutex is needed as PrefetchAsyncCallback updates the result in second
@@ -242,11 +215,6 @@ Status FilePrefetchBuffer::PrefetchAsync
     del_fn_ = nullptr;
   }
 
-  TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
-  Status s;
-  size_t prefetch_size = length + readahead_size;
-
-  size_t alignment = reader->file()->GetRequiredBufferAlignment();
   // Index of second buffer.
   uint32_t second = curr_ ^ 1;
 
@@ -273,17 +241,55 @@ Status FilePrefetchBuffer::PrefetchAsync
     // outdated data and switch the buffers.
     bufs_[curr_].buffer_.Clear();
     curr_ = curr_ ^ 1;
-    second = curr_ ^ 1;
   }
-  // After swap check if all the requested bytes are in curr_, it will go for
-  // async prefetching only.
+}
+
+// If async_read = true:
+// async_read is enabled in case of sequential reads. So when
+// buffers are switched, we clear the curr_ buffer as we assume the data has
+// been consumed because of sequential reads.
+//
+// Scenarios for prefetching asynchronously:
+// Case1: If both buffers are empty, prefetch n bytes
+//        synchronously in curr_
+//        and prefetch readahead_size_/2 async in second buffer.
+// Case2: If second buffer has partial or full data, make it current and
+//        prefetch readahead_size_/2 async in second buffer. In case of
+//        partial data, prefetch remaining bytes from size n synchronously to
+//        fulfill the requested bytes request.
+// Case3: If curr_ has partial data, prefetch remaining bytes from size n
+//        synchronously in curr_ to fulfill the requested bytes request and
+//        prefetch readahead_size_/2 bytes async in second buffer.
+// Case4: If data is in both buffers, copy requested data from curr_ and second
+//        buffer to third buffer. If all requested bytes have been copied, do
+//        the asynchronous prefetching in second buffer.
+Status FilePrefetchBuffer::PrefetchAsyncInternal(
+    const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
+    size_t length, size_t readahead_size, Env::IOPriority rate_limiter_priority,
+    bool& copy_to_third_buffer) {
+  if (!enable_) {
+    return Status::OK();
+  }
+
+  TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsyncInternal:Start");
+
+  PollAndUpdateBuffersIfNeeded(offset);
+
+  // If all the requested bytes are in curr_, it will go for async prefetching
+  // only.
   if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
       offset + length <=
           bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
     offset += length;
     length = 0;
-    prefetch_size = readahead_size;
   }
+
+  Status s;
+  size_t prefetch_size = length + readahead_size;
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+  // Index of second buffer.
+  uint32_t second = curr_ ^ 1;
+
   // Data is overlapping i.e. some of the data is in curr_ buffer and remaining
   // in second buffer.
   if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
@@ -315,9 +321,8 @@ Status FilePrefetchBuffer::PrefetchAsync
     prefetch_size = length + readahead_size;
   }
 
-  // Update second again if swap happened.
-  second = curr_ ^ 1;
   size_t _offset = static_cast<size_t>(offset);
+  second = curr_ ^ 1;
 
   // offset and size alignment for curr_ buffer with synchronous prefetching
   uint64_t rounddown_start1 = Rounddown(_offset, alignment);
@@ -442,12 +447,23 @@ bool FilePrefetchBuffer::TryReadFromCach
 bool FilePrefetchBuffer::TryReadFromCacheAsync(
     const IOOptions& opts, RandomAccessFileReader* reader, uint64_t offset,
     size_t n, Slice* result, Status* status,
-    Env::IOPriority rate_limiter_priority, bool for_compaction /* = false */
-) {
+    Env::IOPriority rate_limiter_priority) {
+  assert(async_io_);
+
   if (track_min_offset_ && offset < min_offset_read_) {
     min_offset_read_ = static_cast<size_t>(offset);
   }
-  if (!enable_ || (offset < bufs_[curr_].offset_)) {
+
+  if (!enable_) {
+    return false;
+  }
+
+  // In case of async_io_, offset can be less than bufs_[curr_].offset_ because
+  // of reads not sequential and PrefetchAsync can be called for any block and
+  // RocksDB will call TryReadFromCacheAsync after PrefetchAsync to Poll for
+  // requested bytes.
+  if (bufs_[curr_].buffer_.CurrentSize() > 0 && offset < bufs_[curr_].offset_ &&
+      prev_len_ != 0) {
     return false;
   }
 
@@ -459,35 +475,25 @@ bool FilePrefetchBuffer::TryReadFromCach
   //    If readahead is not enabled: return false.
   TEST_SYNC_POINT_CALLBACK("FilePrefetchBuffer::TryReadFromCache",
                            &readahead_size_);
-  if (offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
+  if (offset < bufs_[curr_].offset_ ||
+      offset + n > bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
     if (readahead_size_ > 0) {
       Status s;
       assert(reader != nullptr);
       assert(max_readahead_size_ >= readahead_size_);
-      if (for_compaction) {
-        s = Prefetch(opts, reader, offset, std::max(n, readahead_size_),
-                     rate_limiter_priority);
-      } else {
-        if (implicit_auto_readahead_) {
-          if (!IsEligibleForPrefetch(offset, n)) {
-            // Ignore status as Prefetch is not called.
-            s.PermitUncheckedError();
-            return false;
-          }
-        }
-        // async prefetching is enabled if it's implicit_auto_readahead_ or
-        // explicit readahead_size_ is passed along with ReadOptions.async_io =
-        // true.
-        if (async_io_) {
-          // Prefetch n + readahead_size_/2 synchronously as remaining
-          // readahead_size_/2 will be prefetched asynchronously.
-          s = PrefetchAsync(opts, reader, offset, n, readahead_size_ / 2,
-                            rate_limiter_priority, copy_to_third_buffer);
-        } else {
-          s = Prefetch(opts, reader, offset, n + readahead_size_,
-                       rate_limiter_priority);
+
+      if (implicit_auto_readahead_) {
+        if (!IsEligibleForPrefetch(offset, n)) {
+          // Ignore status as Prefetch is not called.
+          s.PermitUncheckedError();
+          return false;
         }
       }
+
+      // Prefetch n + readahead_size_/2 synchronously as remaining
+      // readahead_size_/2 will be prefetched asynchronously.
+      s = PrefetchAsyncInternal(opts, reader, offset, n, readahead_size_ / 2,
+                                rate_limiter_priority, copy_to_third_buffer);
       if (!s.ok()) {
         if (status) {
           *status = s;
@@ -544,4 +550,92 @@ void FilePrefetchBuffer::PrefetchAsyncCa
     bufs_[index].buffer_.Size(current_size + req.result.size());
   }
 }
+
+Status FilePrefetchBuffer::PrefetchAsync(const IOOptions& opts,
+                                         RandomAccessFileReader* reader,
+                                         uint64_t offset, size_t n,
+                                         Env::IOPriority rate_limiter_priority,
+                                         Slice* result) {
+  assert(reader != nullptr);
+  if (!enable_) {
+    return Status::NotSupported();
+  }
+  TEST_SYNC_POINT("FilePrefetchBuffer::PrefetchAsync:Start");
+
+  PollAndUpdateBuffersIfNeeded(offset);
+
+  // Index of second buffer.
+  uint32_t second = curr_ ^ 1;
+
+  // Since PrefetchAsync can be called on non sequential reads. So offset can
+  // be less than buffers' offset. In that case it clears the buffer and
+  // prefetch that block.
+  if (bufs_[curr_].buffer_.CurrentSize() > 0 && offset < bufs_[curr_].offset_) {
+    bufs_[curr_].buffer_.Clear();
+  }
+
+  // All requested bytes are already in the curr_ buffer. So no need to Read
+  // again.
+  if (bufs_[curr_].buffer_.CurrentSize() > 0 &&
+      offset + n <= bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize()) {
+    uint64_t offset_in_buffer = offset - bufs_[curr_].offset_;
+    *result = Slice(bufs_[curr_].buffer_.BufferStart() + offset_in_buffer, n);
+    return Status::OK();
+  }
+
+  Status s;
+  size_t alignment = reader->file()->GetRequiredBufferAlignment();
+
+  // TODO akanksha: Handle the scenario if data is overlapping in 2 buffers.
+  // Currently, tt covers 2 scenarios. Either one buffer (curr_) has no data or
+  // it has partial data. It ignores the contents in second buffer (overlapping
+  // data in 2 buffers) and send the request to re-read that data again.
+
+  // Clear the second buffer in order to do asynchronous prefetching.
+  bufs_[second].buffer_.Clear();
+
+  size_t offset_to_read = static_cast<size_t>(offset);
+  uint64_t rounddown_start = 0;
+  uint64_t roundup_end = 0;
+
+  if (bufs_[curr_].buffer_.CurrentSize() == 0) {
+    // Prefetch full data.
+    rounddown_start = Rounddown(offset_to_read, alignment);
+    roundup_end = Roundup(offset_to_read + n, alignment);
+  } else {
+    // Prefetch remaining data.
+    size_t rem_length = n - (bufs_[curr_].buffer_.CurrentSize() -
+                             (offset - bufs_[curr_].offset_));
+    rounddown_start = bufs_[curr_].offset_ + bufs_[curr_].buffer_.CurrentSize();
+    roundup_end = Roundup(rounddown_start + rem_length, alignment);
+  }
+
+  uint64_t roundup_len = roundup_end - rounddown_start;
+  assert(roundup_len >= alignment);
+  assert(roundup_len % alignment == 0);
+
+  uint64_t chunk_len = 0;
+  CalculateOffsetAndLen(alignment, rounddown_start, roundup_len, second, false,
+                        chunk_len);
+
+  // Update the buffer offset.
+  bufs_[second].offset_ = rounddown_start;
+  assert(roundup_len >= chunk_len);
+
+  size_t read_len = static_cast<size_t>(roundup_len - chunk_len);
+
+  s = ReadAsync(opts, reader, rate_limiter_priority, read_len, chunk_len,
+                rounddown_start, second);
+
+  if (!s.ok()) {
+    return s;
+  }
+
+  // Update read pattern so that TryReadFromCacheAsync call be called to Poll
+  // the data. It will return without polling if blocks are not sequential.
+  UpdateReadPattern(offset, n, /*decrease_readaheadsize=*/false);
+  prev_len_ = 0;
+
+  return Status::TryAgain();
+}
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/file/file_prefetch_buffer.h 7.3.1-2/file/file_prefetch_buffer.h
--- 7.2.2-5/file/file_prefetch_buffer.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/file_prefetch_buffer.h	2022-06-08 21:08:16.000000000 +0000
@@ -71,7 +71,7 @@ class FilePrefetchBuffer {
         readahead_size_(readahead_size),
         initial_auto_readahead_size_(readahead_size),
         max_readahead_size_(max_readahead_size),
-        min_offset_read_(port::kMaxSizet),
+        min_offset_read_(std::numeric_limits<size_t>::max()),
         enable_(enable),
         track_min_offset_(track_min_offset),
         implicit_auto_readahead_(implicit_auto_readahead),
@@ -89,6 +89,7 @@ class FilePrefetchBuffer {
     // while curr_ is being consumed. If data is overlapping in two buffers,
     // data is copied to third buffer to return continuous buffer.
     bufs_.resize(3);
+    (void)async_io_;
   }
 
   ~FilePrefetchBuffer() {
@@ -131,10 +132,21 @@ class FilePrefetchBuffer {
                   uint64_t offset, size_t n,
                   Env::IOPriority rate_limiter_priority);
 
+  // Request for reading the data from a file asynchronously.
+  // If data already exists in the buffer, result will be updated.
+  // reader                : the file reader.
+  // offset                : the file offset to start reading from.
+  // n                     : the number of bytes to read.
+  // rate_limiter_priority : rate limiting priority, or `Env::IO_TOTAL` to
+  //                         bypass.
+  // result                : if data already exists in the buffer, result will
+  //                         be updated with the data.
+  //
+  // If data already exist in the buffer, it will return Status::OK, otherwise
+  // it will send asynchronous request and return Status::TryAgain.
   Status PrefetchAsync(const IOOptions& opts, RandomAccessFileReader* reader,
-                       uint64_t offset, size_t length, size_t readahead_size,
-                       Env::IOPriority rate_limiter_priority,
-                       bool& copy_to_third_buffer);
+                       uint64_t offset, size_t n,
+                       Env::IOPriority rate_limiter_priority, Slice* result);
 
   // Tries returning the data for a file read from this buffer if that data is
   // in the buffer.
@@ -159,8 +171,7 @@ class FilePrefetchBuffer {
   bool TryReadFromCacheAsync(const IOOptions& opts,
                              RandomAccessFileReader* reader, uint64_t offset,
                              size_t n, Slice* result, Status* status,
-                             Env::IOPriority rate_limiter_priority,
-                             bool for_compaction /* = false */);
+                             Env::IOPriority rate_limiter_priority);
 
   // The minimum `offset` ever passed to TryReadFromCache(). This will nly be
   // tracked if track_min_offset = true.
@@ -207,22 +218,6 @@ class FilePrefetchBuffer {
     }
   }
 
-  bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
-    // Prefetch only if this read is sequential otherwise reset readahead_size_
-    // to initial value.
-    if (!IsBlockSequential(offset)) {
-      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
-      ResetValues();
-      return false;
-    }
-    num_file_reads_++;
-    if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
-      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
-      return false;
-    }
-    return true;
-  }
-
   // Callback function passed to underlying FS in case of asynchronous reads.
   void PrefetchAsyncCallback(const FSReadRequest& req, void* cb_arg);
 
@@ -234,6 +229,17 @@ class FilePrefetchBuffer {
                              size_t roundup_len, size_t index, bool refit_tail,
                              uint64_t& chunk_len);
 
+  // It calls Poll API if any there is any pending asynchronous request. It then
+  // checks if data is in any buffer. It clears the outdated data and swaps the
+  // buffers if required.
+  void PollAndUpdateBuffersIfNeeded(uint64_t offset);
+
+  Status PrefetchAsyncInternal(const IOOptions& opts,
+                               RandomAccessFileReader* reader, uint64_t offset,
+                               size_t length, size_t readahead_size,
+                               Env::IOPriority rate_limiter_priority,
+                               bool& copy_to_third_buffer);
+
   Status Read(const IOOptions& opts, RandomAccessFileReader* reader,
               Env::IOPriority rate_limiter_priority, uint64_t read_len,
               uint64_t chunk_len, uint64_t rounddown_start, uint32_t index);
@@ -256,6 +262,22 @@ class FilePrefetchBuffer {
     readahead_size_ = initial_auto_readahead_size_;
   }
 
+  bool IsEligibleForPrefetch(uint64_t offset, size_t n) {
+    // Prefetch only if this read is sequential otherwise reset readahead_size_
+    // to initial value.
+    if (!IsBlockSequential(offset)) {
+      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+      ResetValues();
+      return false;
+    }
+    num_file_reads_++;
+    if (num_file_reads_ <= kMinNumFileReadsToStartAutoReadahead) {
+      UpdateReadPattern(offset, n, false /*decrease_readaheadsize*/);
+      return false;
+    }
+    return true;
+  }
+
   std::vector<BufferInfo> bufs_;
   // curr_ represents the index for bufs_ indicating which buffer is being
   // consumed currently.
diff -pruN 7.2.2-5/file/file_util.h 7.3.1-2/file/file_util.h
--- 7.2.2-5/file/file_util.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/file_util.h	2022-06-08 21:08:16.000000000 +0000
@@ -78,6 +78,8 @@ inline IOStatus PrepareIOFromReadOptions
       (!opts.timeout.count() || ro.io_timeout < opts.timeout)) {
     opts.timeout = ro.io_timeout;
   }
+
+  opts.rate_limiter_priority = ro.rate_limiter_priority;
   return IOStatus::OK();
 }
 
diff -pruN 7.2.2-5/file/prefetch_test.cc 7.3.1-2/file/prefetch_test.cc
--- 7.2.2-5/file/prefetch_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/prefetch_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -5,6 +5,9 @@
 
 #include "db/db_test_util.h"
 #include "test_util/sync_point.h"
+#ifdef GFLAGS
+#include "tools/io_tracer_parser_tool.h"
+#endif
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -534,15 +537,24 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
      * initially (2 more data blocks).
      */
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1000));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1004));  // Prefetch Data
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1008));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1011));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1015));  // Prefetch Data
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));
+    ASSERT_TRUE(iter->Valid());
     // Missed 2 blocks but they are already in buffer so no reset.
     iter->Seek(BuildKey(103));   // Already in buffer.
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1033));  // Prefetch Data
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 3);
       fs->ClearPrefetchCount();
@@ -558,10 +570,15 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
      */
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1008));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1033));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1048));
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 0);
       fs->ClearPrefetchCount();
@@ -576,9 +593,13 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
      */
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(10));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(100));
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 0);
       fs->ClearPrefetchCount();
@@ -596,14 +617,21 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
      */
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1000));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1004));  // This iteration will prefetch buffer
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1008));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(
         BuildKey(996));  // Reseek won't prefetch any data and
                          // readahead_size will be initiallized to 8*1024.
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(992));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(989));
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 1);
       fs->ClearPrefetchCount();
@@ -615,11 +643,17 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
     // Read sequentially to confirm readahead_size is reset to initial value (2
     // more data blocks)
     iter->Seek(BuildKey(1011));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1015));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));  // Prefetch Data
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1022));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1026));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(103));  // Prefetch Data
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 2);
       fs->ClearPrefetchCount();
@@ -634,12 +668,19 @@ TEST_P(PrefetchTest, PrefetchWhenReseek)
      */
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1167));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1334));  // This iteration will prefetch buffer
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1499));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1667));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1847));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1999));
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 1);
       fs->ClearPrefetchCount();
@@ -766,8 +807,11 @@ TEST_P(PrefetchTest, PrefetchWhenReseekw
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     // Warm up the cache
     iter->Seek(BuildKey(1011));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1015));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));
+    ASSERT_TRUE(iter->Valid());
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 1);
       fs->ClearPrefetchCount();
@@ -780,20 +824,31 @@ TEST_P(PrefetchTest, PrefetchWhenReseekw
     // After caching, blocks will be read from cache (Sequential blocks)
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ReadOptions()));
     iter->Seek(BuildKey(0));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1000));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1004));  // Prefetch data (not in cache).
+    ASSERT_TRUE(iter->Valid());
     // Missed one sequential block but next is in already in buffer so readahead
     // will not be reset.
     iter->Seek(BuildKey(1011));
+    ASSERT_TRUE(iter->Valid());
     // Prefetch data but blocks are in cache so no prefetch and reset.
     iter->Seek(BuildKey(1015));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1022));
+    ASSERT_TRUE(iter->Valid());
     // Prefetch data with readahead_size = 4 blocks.
     iter->Seek(BuildKey(1026));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(103));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1033));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1037));
+    ASSERT_TRUE(iter->Valid());
 
     if (support_prefetch && !use_direct_io) {
       ASSERT_EQ(fs->GetPrefetchCount(), 3);
@@ -881,7 +936,7 @@ TEST_P(PrefetchTest1, DBIterLevelReadAhe
         [&](void*) { buff_prefetch_count++; });
 
     SyncPoint::GetInstance()->SetCallBack(
-        "FilePrefetchBuffer::PrefetchAsync:Start",
+        "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
         [&](void*) { buff_async_prefetch_count++; });
 
     // The callback checks, since reads are sequential, readahead_size doesn't
@@ -955,7 +1010,7 @@ class PrefetchTest2 : public DBTestBase,
 INSTANTIATE_TEST_CASE_P(PrefetchTest2, PrefetchTest2, ::testing::Bool());
 
 #ifndef ROCKSDB_LITE
-TEST_P(PrefetchTest2, NonSequentialReads) {
+TEST_P(PrefetchTest2, NonSequentialReadsWithAdaptiveReadahead) {
   const int kNumKeys = 1000;
   // Set options
   std::shared_ptr<MockFS> fs =
@@ -1002,9 +1057,8 @@ TEST_P(PrefetchTest2, NonSequentialReads
   int set_readahead = 0;
   size_t readahead_size = 0;
 
-  SyncPoint::GetInstance()->SetCallBack(
-      "FilePrefetchBuffer::PrefetchAsync:Start",
-      [&](void*) { buff_prefetch_count++; });
+  SyncPoint::GetInstance()->SetCallBack("FilePrefetchBuffer::Prefetch:Start",
+                                        [&](void*) { buff_prefetch_count++; });
   SyncPoint::GetInstance()->SetCallBack(
       "BlockPrefetcher::SetReadaheadState",
       [&](void* /*arg*/) { set_readahead++; });
@@ -1018,13 +1072,15 @@ TEST_P(PrefetchTest2, NonSequentialReads
     // Iterate until prefetch is done.
     ReadOptions ro;
     ro.adaptive_readahead = true;
-    // TODO akanksha: Remove after adding new units.
-    ro.async_io = true;
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
+
     iter->SeekToFirst();
+    ASSERT_TRUE(iter->Valid());
+
     while (iter->Valid() && buff_prefetch_count == 0) {
       iter->Next();
     }
+
     ASSERT_EQ(readahead_size, 8 * 1024);
     ASSERT_EQ(buff_prefetch_count, 1);
     ASSERT_EQ(set_readahead, 0);
@@ -1033,9 +1089,12 @@ TEST_P(PrefetchTest2, NonSequentialReads
     // Move to last file and check readahead size fallbacks to 8KB. So next
     // readahead size after prefetch should be 8 * 1024;
     iter->Seek(BuildKey(4004));
+    ASSERT_TRUE(iter->Valid());
+
     while (iter->Valid() && buff_prefetch_count == 0) {
       iter->Next();
     }
+
     ASSERT_EQ(readahead_size, 8 * 1024);
     ASSERT_EQ(set_readahead, 0);
     ASSERT_EQ(buff_prefetch_count, 1);
@@ -1099,7 +1158,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadI
   size_t decrease_readahead_size = 8 * 1024;
 
   SyncPoint::GetInstance()->SetCallBack(
-      "FilePrefetchBuffer::PrefetchAsync:Start",
+      "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
       [&](void*) { buff_prefetch_count++; });
   SyncPoint::GetInstance()->SetCallBack(
       "FilePrefetchBuffer::TryReadFromCache", [&](void* arg) {
@@ -1120,8 +1179,11 @@ TEST_P(PrefetchTest2, DecreaseReadAheadI
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
     // Warm up the cache
     iter->Seek(BuildKey(1011));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1015));
+    ASSERT_TRUE(iter->Valid());
     iter->Seek(BuildKey(1019));
+    ASSERT_TRUE(iter->Valid());
     buff_prefetch_count = 0;
   }
 
@@ -1129,26 +1191,39 @@ TEST_P(PrefetchTest2, DecreaseReadAheadI
     ASSERT_OK(options.statistics->Reset());
     // After caching, blocks will be read from cache (Sequential blocks)
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
-    iter->Seek(BuildKey(0));
+    iter->Seek(
+        BuildKey(0));  // In cache so it will decrease the readahead_size.
     ASSERT_TRUE(iter->Valid());
-    iter->Seek(BuildKey(1000));
+    expected_current_readahead_size = std::max(
+        decrease_readahead_size,
+        (expected_current_readahead_size >= decrease_readahead_size
+             ? (expected_current_readahead_size - decrease_readahead_size)
+             : 0));
+
+    iter->Seek(BuildKey(1000));  // Prefetch the block.
     ASSERT_TRUE(iter->Valid());
-    iter->Seek(BuildKey(1004));  // Prefetch data (not in cache).
+    ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
+    expected_current_readahead_size *= 2;
+
+    iter->Seek(BuildKey(1004));  // Prefetch the block.
     ASSERT_TRUE(iter->Valid());
     ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
+    expected_current_readahead_size *= 2;
 
-    // Missed one sequential block but 1011 is already in buffer so
-    // readahead will not be reset.
+    // 1011 is already in cache but won't reset??
     iter->Seek(BuildKey(1011));
     ASSERT_TRUE(iter->Valid());
-    ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
 
     // Eligible to Prefetch data (not in buffer) but block is in cache so no
     // prefetch will happen and will result in decrease in readahead_size.
     // readahead_size will be 8 * 1024
     iter->Seek(BuildKey(1015));
     ASSERT_TRUE(iter->Valid());
-    expected_current_readahead_size -= decrease_readahead_size;
+    expected_current_readahead_size = std::max(
+        decrease_readahead_size,
+        (expected_current_readahead_size >= decrease_readahead_size
+             ? (expected_current_readahead_size - decrease_readahead_size)
+             : 0));
 
     // 1016 is the same block as 1015. So no change in readahead_size.
     iter->Seek(BuildKey(1016));
@@ -1169,7 +1244,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadI
     iter->Seek(BuildKey(1022));
     ASSERT_TRUE(iter->Valid());
     ASSERT_EQ(current_readahead_size, expected_current_readahead_size);
-    ASSERT_EQ(buff_prefetch_count, 2);
+    ASSERT_EQ(buff_prefetch_count, 3);
 
     // Check stats to make sure async prefetch is done.
     {
@@ -1179,6 +1254,7 @@ TEST_P(PrefetchTest2, DecreaseReadAheadI
         ASSERT_EQ(async_read_bytes.count, 0);
       } else {
         ASSERT_GT(async_read_bytes.count, 0);
+        ASSERT_GT(get_perf_context()->number_async_seek, 0);
       }
     }
 
@@ -1193,6 +1269,33 @@ class PrefetchTestWithPosix : public DBT
                               public ::testing::WithParamInterface<bool> {
  public:
   PrefetchTestWithPosix() : DBTestBase("prefetch_test_with_posix", true) {}
+
+#ifndef ROCKSDB_LITE
+#ifdef GFLAGS
+  const int kMaxArgCount = 100;
+  const size_t kArgBufferSize = 100000;
+
+  void RunIOTracerParserTool(std::string trace_file) {
+    std::vector<std::string> params = {"./io_tracer_parser",
+                                       "-io_trace_file=" + trace_file};
+
+    char arg_buffer[kArgBufferSize];
+    char* argv[kMaxArgCount];
+    int argc = 0;
+    int cursor = 0;
+    for (const auto& arg : params) {
+      ASSERT_LE(cursor + arg.size() + 1, kArgBufferSize);
+      ASSERT_LE(argc + 1, kMaxArgCount);
+
+      snprintf(arg_buffer + cursor, arg.size() + 1, "%s", arg.c_str());
+
+      argv[argc++] = arg_buffer + cursor;
+      cursor += static_cast<int>(arg.size()) + 1;
+    }
+    ASSERT_EQ(0, ROCKSDB_NAMESPACE::io_tracer_parser(argc, argv));
+  }
+#endif  // GFLAGS
+#endif  // ROCKSDB_LITE
 };
 
 INSTANTIATE_TEST_CASE_P(PrefetchTestWithPosix, PrefetchTestWithPosix,
@@ -1264,7 +1367,7 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncW
   }
 
   SyncPoint::GetInstance()->SetCallBack(
-      "FilePrefetchBuffer::PrefetchAsync:Start",
+      "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
       [&](void*) { buff_prefetch_count++; });
 
   SyncPoint::GetInstance()->SetCallBack(
@@ -1275,12 +1378,15 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncW
   // Read the keys.
   {
     ASSERT_OK(options.statistics->Reset());
+    get_perf_context()->Reset();
+
     auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
     int num_keys = 0;
     for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
       ASSERT_OK(iter->status());
       num_keys++;
     }
+
     ASSERT_EQ(num_keys, total_keys);
     ASSERT_GT(buff_prefetch_count, 0);
 
@@ -1301,6 +1407,180 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncW
       }
       ASSERT_GT(prefetched_bytes_discarded.count, 0);
     }
+    ASSERT_EQ(get_perf_context()->number_async_seek, 0);
+  }
+
+  {
+    // Read the keys using seek.
+    {
+      ASSERT_OK(options.statistics->Reset());
+      get_perf_context()->Reset();
+
+      auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
+      int num_keys = 0;
+      iter->Seek(BuildKey(450));
+      while (iter->Valid()) {
+        ASSERT_OK(iter->status());
+        num_keys++;
+        iter->Next();
+      }
+      ASSERT_OK(iter->status());
+
+      iter->Seek(BuildKey(450));
+      while (iter->Valid()) {
+        ASSERT_OK(iter->status());
+        num_keys++;
+        iter->Prev();
+      }
+
+      ASSERT_EQ(num_keys, total_keys + 1);
+      ASSERT_GT(buff_prefetch_count, 0);
+
+      // Check stats to make sure async prefetch is done.
+      {
+        HistogramData async_read_bytes;
+        options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
+        HistogramData prefetched_bytes_discarded;
+        options.statistics->histogramData(PREFETCHED_BYTES_DISCARDED,
+                                          &prefetched_bytes_discarded);
+
+        // Not all platforms support iouring. In that case, ReadAsync in posix
+        // won't submit async requests.
+        if (read_async_called) {
+          ASSERT_GT(async_read_bytes.count, 0);
+          ASSERT_GT(get_perf_context()->number_async_seek, 0);
+        } else {
+          ASSERT_EQ(async_read_bytes.count, 0);
+          ASSERT_EQ(get_perf_context()->number_async_seek, 0);
+        }
+        ASSERT_GT(prefetched_bytes_discarded.count, 0);
+      }
+    }
+  }
+
+  SyncPoint::GetInstance()->DisableProcessing();
+  SyncPoint::GetInstance()->ClearAllCallBacks();
+
+  Close();
+}
+
+#ifndef ROCKSDB_LITE
+#ifdef GFLAGS
+TEST_P(PrefetchTestWithPosix, TraceReadAsyncWithCallbackWrapper) {
+  if (mem_env_ || encrypted_env_) {
+    ROCKSDB_GTEST_SKIP("Test requires non-mem or non-encrypted environment");
+    return;
+  }
+
+  const int kNumKeys = 1000;
+  std::shared_ptr<MockFS> fs = std::make_shared<MockFS>(
+      FileSystem::Default(), /*support_prefetch=*/false);
+  std::unique_ptr<Env> env(new CompositeEnvWrapper(env_, fs));
+
+  bool use_direct_io = false;
+  Options options = CurrentOptions();
+  options.write_buffer_size = 1024;
+  options.create_if_missing = true;
+  options.compression = kNoCompression;
+  options.env = env.get();
+  options.statistics = CreateDBStatistics();
+  if (use_direct_io) {
+    options.use_direct_reads = true;
+    options.use_direct_io_for_flush_and_compaction = true;
+  }
+  BlockBasedTableOptions table_options;
+  table_options.no_block_cache = true;
+  table_options.cache_index_and_filter_blocks = false;
+  table_options.metadata_block_size = 1024;
+  table_options.index_type =
+      BlockBasedTableOptions::IndexType::kTwoLevelIndexSearch;
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+
+  Status s = TryReopen(options);
+  if (use_direct_io && (s.IsNotSupported() || s.IsInvalidArgument())) {
+    // If direct IO is not supported, skip the test
+    return;
+  } else {
+    ASSERT_OK(s);
+  }
+
+  int total_keys = 0;
+  // Write the keys.
+  {
+    WriteBatch batch;
+    Random rnd(309);
+    for (int j = 0; j < 5; j++) {
+      for (int i = j * kNumKeys; i < (j + 1) * kNumKeys; i++) {
+        ASSERT_OK(batch.Put(BuildKey(i), rnd.RandomString(1000)));
+        total_keys++;
+      }
+      ASSERT_OK(db_->Write(WriteOptions(), &batch));
+      ASSERT_OK(Flush());
+    }
+    MoveFilesToLevel(2);
+  }
+
+  int buff_prefetch_count = 0;
+  bool read_async_called = false;
+  ReadOptions ro;
+  ro.adaptive_readahead = true;
+  ro.async_io = true;
+
+  if (GetParam()) {
+    ro.readahead_size = 16 * 1024;
+  }
+
+  SyncPoint::GetInstance()->SetCallBack(
+      "FilePrefetchBuffer::PrefetchAsyncInternal:Start",
+      [&](void*) { buff_prefetch_count++; });
+
+  SyncPoint::GetInstance()->SetCallBack(
+      "UpdateResults::io_uring_result",
+      [&](void* /*arg*/) { read_async_called = true; });
+  SyncPoint::GetInstance()->EnableProcessing();
+
+  // Read the keys.
+  {
+    // Start io_tracing.
+    WriteOptions write_opt;
+    TraceOptions trace_opt;
+    std::unique_ptr<TraceWriter> trace_writer;
+    std::string trace_file_path = dbname_ + "/io_trace_file";
+
+    ASSERT_OK(
+        NewFileTraceWriter(env_, EnvOptions(), trace_file_path, &trace_writer));
+    ASSERT_OK(db_->StartIOTrace(trace_opt, std::move(trace_writer)));
+    ASSERT_OK(options.statistics->Reset());
+
+    auto iter = std::unique_ptr<Iterator>(db_->NewIterator(ro));
+    int num_keys = 0;
+    for (iter->SeekToFirst(); iter->Valid(); iter->Next()) {
+      ASSERT_OK(iter->status());
+      num_keys++;
+    }
+
+    // End the tracing.
+    ASSERT_OK(db_->EndIOTrace());
+    ASSERT_OK(env_->FileExists(trace_file_path));
+
+    ASSERT_EQ(num_keys, total_keys);
+    ASSERT_GT(buff_prefetch_count, 0);
+
+    // Check stats to make sure async prefetch is done.
+    {
+      HistogramData async_read_bytes;
+      options.statistics->histogramData(ASYNC_READ_BYTES, &async_read_bytes);
+      // Not all platforms support iouring. In that case, ReadAsync in posix
+      // won't submit async requests.
+      if (read_async_called) {
+        ASSERT_GT(async_read_bytes.count, 0);
+      } else {
+        ASSERT_EQ(async_read_bytes.count, 0);
+      }
+    }
+
+    // Check the file to see if ReadAsync is logged.
+    RunIOTracerParserTool(trace_file_path);
   }
 
   SyncPoint::GetInstance()->DisableProcessing();
@@ -1308,6 +1588,8 @@ TEST_P(PrefetchTestWithPosix, ReadAsyncW
 
   Close();
 }
+#endif  // GFLAGS
+#endif  // ROCKSDB_LITE
 }  // namespace ROCKSDB_NAMESPACE
 
 int main(int argc, char** argv) {
diff -pruN 7.2.2-5/file/random_access_file_reader.cc 7.3.1-2/file/random_access_file_reader.cc
--- 7.2.2-5/file/random_access_file_reader.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/random_access_file_reader.cc	2022-06-08 21:08:16.000000000 +0000
@@ -457,9 +457,16 @@ IOStatus RandomAccessFileReader::ReadAsy
 
   IOStatus s = file_->ReadAsync(req, opts, read_async_callback, read_async_info,
                                 io_handle, del_fn, nullptr /*dbg*/);
+// Suppress false positive clang analyzer warnings.
+// Memory is not released if file_->ReadAsync returns !s.ok(), because
+// ReadAsyncCallback is never called in that case. If ReadAsyncCallback is
+// called then ReadAsync should always return IOStatus::OK().
+#ifndef __clang_analyzer__
   if (!s.ok()) {
     delete read_async_info;
   }
+#endif  // __clang_analyzer__
+
   return s;
 }
 
diff -pruN 7.2.2-5/file/random_access_file_reader.h 7.3.1-2/file/random_access_file_reader.h
--- 7.2.2-5/file/random_access_file_reader.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/random_access_file_reader.h	2022-06-08 21:08:16.000000000 +0000
@@ -172,8 +172,11 @@ class RandomAccessFileReader {
                      size_t num_reqs, AlignedBuf* aligned_buf,
                      Env::IOPriority rate_limiter_priority) const;
 
-  IOStatus Prefetch(uint64_t offset, size_t n) const {
-    return file_->Prefetch(offset, n, IOOptions(), nullptr);
+  IOStatus Prefetch(uint64_t offset, size_t n,
+                    const Env::IOPriority rate_limiter_priority) const {
+    IOOptions opts;
+    opts.rate_limiter_priority = rate_limiter_priority;
+    return file_->Prefetch(offset, n, opts, nullptr);
   }
 
   FSRandomAccessFile* file() { return file_.get(); }
diff -pruN 7.2.2-5/file/sequence_file_reader.cc 7.3.1-2/file/sequence_file_reader.cc
--- 7.2.2-5/file/sequence_file_reader.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/sequence_file_reader.cc	2022-06-08 21:08:16.000000000 +0000
@@ -38,6 +38,13 @@ IOStatus SequentialFileReader::Read(size
   IOStatus io_s;
   if (use_direct_io()) {
 #ifndef ROCKSDB_LITE
+    //
+    //    |-offset_advance-|---bytes returned--|
+    //    |----------------------buf size-------------------------|
+    //    |                |                   |                  |
+    // aligned           offset          offset + n  Roundup(offset + n,
+    // offset                                             alignment)
+    //
     size_t offset = offset_.fetch_add(n);
     size_t alignment = file_->GetRequiredBufferAlignment();
     size_t aligned_offset = TruncateToPageBoundary(alignment, offset);
diff -pruN 7.2.2-5/file/writable_file_writer.cc 7.3.1-2/file/writable_file_writer.cc
--- 7.2.2-5/file/writable_file_writer.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/writable_file_writer.cc	2022-06-08 21:08:16.000000000 +0000
@@ -54,10 +54,14 @@ IOStatus WritableFileWriter::Append(cons
   UpdateFileChecksum(data);
 
   {
+    IOOptions io_options;
+    io_options.rate_limiter_priority =
+        WritableFileWriter::DecideRateLimiterPriority(
+            writable_file_->GetIOPriority(), op_rate_limiter_priority);
     IOSTATS_TIMER_GUARD(prepare_write_nanos);
     TEST_SYNC_POINT("WritableFileWriter::Append:BeforePrepareWrite");
     writable_file_->PrepareWrite(static_cast<size_t>(GetFileSize()), left,
-                                 IOOptions(), nullptr);
+                                 io_options, nullptr);
   }
 
   // See whether we need to enlarge the buffer to avoid the flush
@@ -159,7 +163,8 @@ IOStatus WritableFileWriter::Append(cons
 
   TEST_KILL_RANDOM("WritableFileWriter::Append:1");
   if (s.ok()) {
-    filesize_ += data.size();
+    uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+    filesize_.store(cur_size + data.size(), std::memory_order_release);
   }
   return s;
 }
@@ -187,7 +192,8 @@ IOStatus WritableFileWriter::Pad(const s
     cap = buf_.Capacity() - buf_.CurrentSize();
   }
   pending_sync_ = true;
-  filesize_ += pad_bytes;
+  uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+  filesize_.store(cur_size + pad_bytes, std::memory_order_release);
   if (perform_data_verification_) {
     buffered_data_crc32c_checksum_ =
         crc32c::Extend(buffered_data_crc32c_checksum_,
@@ -211,6 +217,8 @@ IOStatus WritableFileWriter::Close() {
   s = Flush();  // flush cache to OS
 
   IOStatus interim;
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
   // In direct I/O mode we write whole pages so
   // we need to let the file know where data ends.
   if (use_direct_io()) {
@@ -221,14 +229,15 @@ IOStatus WritableFileWriter::Close() {
         start_ts = FileOperationInfo::StartNow();
       }
 #endif
-      interim = writable_file_->Truncate(filesize_, IOOptions(), nullptr);
+      uint64_t filesz = filesize_.load(std::memory_order_acquire);
+      interim = writable_file_->Truncate(filesz, io_options, nullptr);
 #ifndef ROCKSDB_LITE
       if (ShouldNotifyListeners()) {
         auto finish_ts = FileOperationInfo::FinishNow();
         NotifyOnFileTruncateFinish(start_ts, finish_ts, s);
         if (!interim.ok()) {
           NotifyOnIOError(interim, FileOperationType::kTruncate, file_name(),
-                          filesize_);
+                          filesz);
         }
       }
 #endif
@@ -241,7 +250,7 @@ IOStatus WritableFileWriter::Close() {
           start_ts = FileOperationInfo::StartNow();
         }
 #endif
-        interim = writable_file_->Fsync(IOOptions(), nullptr);
+        interim = writable_file_->Fsync(io_options, nullptr);
 #ifndef ROCKSDB_LITE
         if (ShouldNotifyListeners()) {
           auto finish_ts = FileOperationInfo::FinishNow();
@@ -267,7 +276,7 @@ IOStatus WritableFileWriter::Close() {
       start_ts = FileOperationInfo::StartNow();
     }
 #endif
-    interim = writable_file_->Close(IOOptions(), nullptr);
+    interim = writable_file_->Close(io_options, nullptr);
 #ifndef ROCKSDB_LITE
     if (ShouldNotifyListeners()) {
       auto finish_ts = FileOperationInfo::FinishNow();
@@ -331,7 +340,11 @@ IOStatus WritableFileWriter::Flush(Env::
       start_ts = FileOperationInfo::StartNow();
     }
 #endif
-    s = writable_file_->Flush(IOOptions(), nullptr);
+    IOOptions io_options;
+    io_options.rate_limiter_priority =
+        WritableFileWriter::DecideRateLimiterPriority(
+            writable_file_->GetIOPriority(), op_rate_limiter_priority);
+    s = writable_file_->Flush(io_options, nullptr);
 #ifndef ROCKSDB_LITE
     if (ShouldNotifyListeners()) {
       auto finish_ts = std::chrono::steady_clock::now();
@@ -362,8 +375,9 @@ IOStatus WritableFileWriter::Flush(Env::
     const uint64_t kBytesNotSyncRange =
         1024 * 1024;                                // recent 1MB is not synced.
     const uint64_t kBytesAlignWhenSync = 4 * 1024;  // Align 4KB.
-    if (filesize_ > kBytesNotSyncRange) {
-      uint64_t offset_sync_to = filesize_ - kBytesNotSyncRange;
+    uint64_t cur_size = filesize_.load(std::memory_order_acquire);
+    if (cur_size > kBytesNotSyncRange) {
+      uint64_t offset_sync_to = cur_size - kBytesNotSyncRange;
       offset_sync_to -= offset_sync_to % kBytesAlignWhenSync;
       assert(offset_sync_to >= last_sync_size_);
       if (offset_sync_to > 0 &&
@@ -428,17 +442,22 @@ IOStatus WritableFileWriter::SyncInterna
   IOSTATS_TIMER_GUARD(fsync_nanos);
   TEST_SYNC_POINT("WritableFileWriter::SyncInternal:0");
   auto prev_perf_level = GetPerfLevel();
+
   IOSTATS_CPU_TIMER_GUARD(cpu_write_nanos, clock_);
+
 #ifndef ROCKSDB_LITE
   FileOperationInfo::StartTimePoint start_ts;
   if (ShouldNotifyListeners()) {
     start_ts = FileOperationInfo::StartNow();
   }
 #endif
+
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
   if (use_fsync) {
-    s = writable_file_->Fsync(IOOptions(), nullptr);
+    s = writable_file_->Fsync(io_options, nullptr);
   } else {
-    s = writable_file_->Sync(IOOptions(), nullptr);
+    s = writable_file_->Sync(io_options, nullptr);
   }
 #ifndef ROCKSDB_LITE
   if (ShouldNotifyListeners()) {
@@ -466,7 +485,9 @@ IOStatus WritableFileWriter::RangeSync(u
     start_ts = FileOperationInfo::StartNow();
   }
 #endif
-  IOStatus s = writable_file_->RangeSync(offset, nbytes, IOOptions(), nullptr);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = writable_file_->GetIOPriority();
+  IOStatus s = writable_file_->RangeSync(offset, nbytes, io_options, nullptr);
 #ifndef ROCKSDB_LITE
   if (ShouldNotifyListeners()) {
     auto finish_ts = std::chrono::steady_clock::now();
@@ -490,19 +511,19 @@ IOStatus WritableFileWriter::WriteBuffer
   size_t left = size;
   DataVerificationInfo v_info;
   char checksum_buf[sizeof(uint32_t)];
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
 
   while (left > 0) {
-    size_t allowed;
-    Env::IOPriority rate_limiter_priority_used =
-        WritableFileWriter::DecideRateLimiterPriority(
-            writable_file_->GetIOPriority(), op_rate_limiter_priority);
+    size_t allowed = left;
     if (rate_limiter_ != nullptr &&
         rate_limiter_priority_used != Env::IO_TOTAL) {
       allowed = rate_limiter_->RequestToken(left, 0 /* alignment */,
                                             rate_limiter_priority_used, stats_,
                                             RateLimiter::OpType::kWrite);
-    } else {
-      allowed = left;
     }
 
     {
@@ -511,7 +532,7 @@ IOStatus WritableFileWriter::WriteBuffer
 
 #ifndef ROCKSDB_LITE
       FileOperationInfo::StartTimePoint start_ts;
-      uint64_t old_size = writable_file_->GetFileSize(IOOptions(), nullptr);
+      uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
       if (ShouldNotifyListeners()) {
         start_ts = FileOperationInfo::StartNow();
         old_size = next_write_offset_;
@@ -524,10 +545,10 @@ IOStatus WritableFileWriter::WriteBuffer
         if (perform_data_verification_) {
           Crc32cHandoffChecksumCalculation(src, allowed, checksum_buf);
           v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
-          s = writable_file_->Append(Slice(src, allowed), IOOptions(), v_info,
+          s = writable_file_->Append(Slice(src, allowed), io_options, v_info,
                                      nullptr);
         } else {
-          s = writable_file_->Append(Slice(src, allowed), IOOptions(), nullptr);
+          s = writable_file_->Append(Slice(src, allowed), io_options, nullptr);
         }
         if (!s.ok()) {
           // If writable_file_->Append() failed, then the data may or may not
@@ -579,15 +600,16 @@ IOStatus WritableFileWriter::WriteBuffer
   size_t left = size;
   DataVerificationInfo v_info;
   char checksum_buf[sizeof(uint32_t)];
-
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
   // Check how much is allowed. Here, we loop until the rate limiter allows to
   // write the entire buffer.
   // TODO: need to be improved since it sort of defeats the purpose of the rate
   // limiter
   size_t data_size = left;
-  Env::IOPriority rate_limiter_priority_used =
-      WritableFileWriter::DecideRateLimiterPriority(
-          writable_file_->GetIOPriority(), op_rate_limiter_priority);
   if (rate_limiter_ != nullptr && rate_limiter_priority_used != Env::IO_TOTAL) {
     while (data_size > 0) {
       size_t tmp_size;
@@ -604,7 +626,7 @@ IOStatus WritableFileWriter::WriteBuffer
 
 #ifndef ROCKSDB_LITE
     FileOperationInfo::StartTimePoint start_ts;
-    uint64_t old_size = writable_file_->GetFileSize(IOOptions(), nullptr);
+    uint64_t old_size = writable_file_->GetFileSize(io_options, nullptr);
     if (ShouldNotifyListeners()) {
       start_ts = FileOperationInfo::StartNow();
       old_size = next_write_offset_;
@@ -617,8 +639,7 @@ IOStatus WritableFileWriter::WriteBuffer
 
       EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
       v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
-      s = writable_file_->Append(Slice(src, left), IOOptions(), v_info,
-                                 nullptr);
+      s = writable_file_->Append(Slice(src, left), io_options, v_info, nullptr);
       SetPerfLevel(prev_perf_level);
     }
 #ifndef ROCKSDB_LITE
@@ -709,20 +730,20 @@ IOStatus WritableFileWriter::WriteDirect
   size_t left = buf_.CurrentSize();
   DataVerificationInfo v_info;
   char checksum_buf[sizeof(uint32_t)];
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
 
   while (left > 0) {
     // Check how much is allowed
-    size_t size;
-    Env::IOPriority rate_limiter_priority_used =
-        WritableFileWriter::DecideRateLimiterPriority(
-            writable_file_->GetIOPriority(), op_rate_limiter_priority);
+    size_t size = left;
     if (rate_limiter_ != nullptr &&
         rate_limiter_priority_used != Env::IO_TOTAL) {
       size = rate_limiter_->RequestToken(left, buf_.Alignment(),
-                                         writable_file_->GetIOPriority(),
-                                         stats_, RateLimiter::OpType::kWrite);
-    } else {
-      size = left;
+                                         rate_limiter_priority_used, stats_,
+                                         RateLimiter::OpType::kWrite);
     }
 
     {
@@ -737,10 +758,10 @@ IOStatus WritableFileWriter::WriteDirect
         Crc32cHandoffChecksumCalculation(src, size, checksum_buf);
         v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
         s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
-                                             IOOptions(), v_info, nullptr);
+                                             io_options, v_info, nullptr);
       } else {
         s = writable_file_->PositionedAppend(Slice(src, size), write_offset,
-                                             IOOptions(), nullptr);
+                                             io_options, nullptr);
       }
 
       if (ShouldNotifyListeners()) {
@@ -810,20 +831,22 @@ IOStatus WritableFileWriter::WriteDirect
   DataVerificationInfo v_info;
   char checksum_buf[sizeof(uint32_t)];
 
+  Env::IOPriority rate_limiter_priority_used =
+      WritableFileWriter::DecideRateLimiterPriority(
+          writable_file_->GetIOPriority(), op_rate_limiter_priority);
+  IOOptions io_options;
+  io_options.rate_limiter_priority = rate_limiter_priority_used;
   // Check how much is allowed. Here, we loop until the rate limiter allows to
   // write the entire buffer.
   // TODO: need to be improved since it sort of defeats the purpose of the rate
   // limiter
   size_t data_size = left;
-  Env::IOPriority rate_limiter_priority_used =
-      WritableFileWriter::DecideRateLimiterPriority(
-          writable_file_->GetIOPriority(), op_rate_limiter_priority);
   if (rate_limiter_ != nullptr && rate_limiter_priority_used != Env::IO_TOTAL) {
     while (data_size > 0) {
       size_t size;
       size = rate_limiter_->RequestToken(data_size, buf_.Alignment(),
-                                         writable_file_->GetIOPriority(),
-                                         stats_, RateLimiter::OpType::kWrite);
+                                         rate_limiter_priority_used, stats_,
+                                         RateLimiter::OpType::kWrite);
       data_size -= size;
     }
   }
@@ -839,7 +862,7 @@ IOStatus WritableFileWriter::WriteDirect
     EncodeFixed32(checksum_buf, buffered_data_crc32c_checksum_);
     v_info.checksum = Slice(checksum_buf, sizeof(uint32_t));
     s = writable_file_->PositionedAppend(Slice(src, left), write_offset,
-                                         IOOptions(), v_info, nullptr);
+                                         io_options, v_info, nullptr);
 
     if (ShouldNotifyListeners()) {
       auto finish_ts = std::chrono::steady_clock::now();
@@ -894,4 +917,5 @@ Env::IOPriority WritableFileWriter::Deci
     return op_rate_limiter_priority;
   }
 }
+
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/file/writable_file_writer.h 7.3.1-2/file/writable_file_writer.h
--- 7.2.2-5/file/writable_file_writer.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/file/writable_file_writer.h	2022-06-08 21:08:16.000000000 +0000
@@ -142,7 +142,7 @@ class WritableFileWriter {
   size_t max_buffer_size_;
   // Actually written data size can be used for truncate
   // not counting padding data
-  uint64_t filesize_;
+  std::atomic<uint64_t> filesize_;
 #ifndef ROCKSDB_LITE
   // This is necessary when we use unbuffered access
   // and writes must happen on aligned offsets
@@ -255,7 +255,9 @@ class WritableFileWriter {
   // returns NotSupported status.
   IOStatus SyncWithoutFlush(bool use_fsync);
 
-  uint64_t GetFileSize() const { return filesize_; }
+  uint64_t GetFileSize() const {
+    return filesize_.load(std::memory_order_acquire);
+  }
 
   IOStatus InvalidateCache(size_t offset, size_t length) {
     return writable_file_->InvalidateCache(offset, length);
@@ -277,6 +279,7 @@ class WritableFileWriter {
   const char* GetFileChecksumFuncName() const;
 
  private:
+  // Decide the Rate Limiter priority.
   static Env::IOPriority DecideRateLimiterPriority(
       Env::IOPriority writable_file_io_priority,
       Env::IOPriority op_rate_limiter_priority);
diff -pruN 7.2.2-5/.gitignore 7.3.1-2/.gitignore
--- 7.2.2-5/.gitignore	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/.gitignore	2022-06-08 21:08:16.000000000 +0000
@@ -36,6 +36,7 @@ manifest_dump
 sst_dump
 blob_dump
 block_cache_trace_analyzer
+db_readonly_with_timestamp_test
 db_with_timestamp_basic_test
 tools/block_cache_analyzer/*.pyc
 column_aware_encoding_exp
diff -pruN 7.2.2-5/HISTORY.md 7.3.1-2/HISTORY.md
--- 7.2.2-5/HISTORY.md	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/HISTORY.md	2022-06-08 21:08:16.000000000 +0000
@@ -1,12 +1,51 @@
 # Rocksdb Change Log
-## 7.2.2 (04/28/2022)
+## 7.3.1 (06/08/2022)
 ### Bug Fixes
-* Fixed a bug in async_io path where incorrect length of data is read by FilePrefetchBuffer if data is consumed from two populated buffers and request for more data is sent.
+* Fix a bug in WAL tracking. Before this PR (#10087), calling `SyncWAL()` on the only WAL file of the db will not log the event in MANIFEST, thus allowing a subsequent `DB::Open` even if the WAL file is missing or corrupted.
+* Fixed a bug for non-TransactionDB with avoid_flush_during_recovery = true and TransactionDB where in case of crash, min_log_number_to_keep may not change on recovery and persisting a new MANIFEST with advanced log_numbers for some column families, results in "column family inconsistency" error on second recovery. As a solution, RocksDB will persist the new MANIFEST after successfully syncing the new WAL. If a future recovery starts from the new MANIFEST, then it means the new WAL is successfully synced. Due to the sentinel empty write batch at the beginning, kPointInTimeRecovery of WAL is guaranteed to go after this point. If future recovery starts from the old MANIFEST, it means the writing the new MANIFEST failed. We won't have the "SST ahead of WAL" error.
+* Fixed a bug where RocksDB DB::Open() may creates and writes to two new MANIFEST files even before recovery succeeds. Now writes to MANIFEST are persisted only after recovery is successful.
 
-## 7.2.1 (04/26/2022)
+## 7.3.0 (05/20/2022)
 ### Bug Fixes
+* Fixed a bug where manual flush would block forever even though flush options had wait=false.
 * Fixed a bug where RocksDB could corrupt DBs with `avoid_flush_during_recovery == true` by removing valid WALs, leading to `Status::Corruption` with message like "SST file is ahead of WALs" when attempting to reopen.
+* Fixed a bug in async_io path where incorrect length of data is read by FilePrefetchBuffer if data is consumed from two populated buffers and request for more data is sent.
+* Fixed a CompactionFilter bug. Compaction filter used to use `Delete` to remove keys, even if the keys should be removed with `SingleDelete`. Mixing `Delete` and `SingleDelete` may cause undefined behavior.
+* Fixed a bug in `WritableFileWriter::WriteDirect` and `WritableFileWriter::WriteDirectWithChecksum`. The rate_limiter_priority specified in ReadOptions was not passed to the RateLimiter when requesting a token.
+* Fixed a bug which might cause process crash when I/O error happens when reading an index block in MultiGet().
+
+### New Features
+* DB::GetLiveFilesStorageInfo is ready for production use.
+* Add new stats PREFETCHED_BYTES_DISCARDED which records number of prefetched bytes discarded by RocksDB FilePrefetchBuffer on destruction and POLL_WAIT_MICROS records wait time for FS::Poll API completion.
+* RemoteCompaction supports table_properties_collector_factories override on compaction worker.
+* Start tracking SST unique id in MANIFEST, which will be used to verify with SST properties during DB open to make sure the SST file is not overwritten or misplaced. A db option `verify_sst_unique_id_in_manifest` is introduced to enable/disable the verification, if enabled all SST files will be opened during DB-open to verify the unique id (default is false), so it's recommended to use it with `max_open_files = -1` to pre-open the files.
+* Added the ability to concurrently read data blocks from multiple files in a level in batched MultiGet. This can be enabled by setting the async_io option in ReadOptions. Using this feature requires a FileSystem that supports ReadAsync (PosixFileSystem is not supported yet for this), and for RocksDB to be compiled with folly and c++20.
+* Add FileSystem::ReadAsync API in io_tracing.
+
+### Public API changes
+* Add rollback_deletion_type_callback to TransactionDBOptions so that write-prepared transactions know whether to issue a Delete or SingleDelete to cancel a previous key written during prior prepare phase. The PR aims to prevent mixing SingleDeletes and Deletes for the same key that can lead to undefined behaviors for write-prepared transactions.
+* EXPERIMENTAL: Add new API AbortIO in file_system to abort the read requests submitted asynchronously.
+* CompactionFilter::Decision has a new value: kRemoveWithSingleDelete. If CompactionFilter returns this decision, then CompactionIterator will use `SingleDelete` to mark a key as removed.
+* Renamed CompactionFilter::Decision::kRemoveWithSingleDelete to kPurge since the latter sounds more general and hides the implementation details of how compaction iterator handles keys.
+* Added ability to specify functions for Prepare and Validate to OptionsTypeInfo.  Added methods to OptionTypeInfo to set the functions via an API.  These methods are intended for RocksDB plugin developers for configuration management.
+* Added a new immutable db options, enforce_single_del_contracts. If set to false (default is true), compaction will NOT fail due to a single delete followed by a delete for the same key. The purpose of this temporay option is to help existing use cases migrate.
+* Introduce `BlockBasedTableOptions::cache_usage_options` and use that to replace `BlockBasedTableOptions::reserve_table_builder_memory` and  `BlockBasedTableOptions::reserve_table_reader_memory`.
+* Changed `GetUniqueIdFromTableProperties` to return a 128-bit unique identifier, which will be the standard size now. The old functionality (192-bit) is available from `GetExtendedUniqueIdFromTableProperties`. Both functions are no longer "experimental" and are ready for production use.
+* In IOOptions, mark `prio` as deprecated for future removal.
+* In `file_system.h`, mark `IOPriority` as deprecated for future removal.
+* Add an option, `CompressionOptions::use_zstd_dict_trainer`, to indicate whether zstd dictionary trainer should be used for generating zstd compression dictionaries. The default value of this option is true for backward compatibility. When this option is set to false, zstd API `ZDICT_finalizeDictionary` is used to generate compression dictionaries.
+* Seek API which positions itself every LevelIterator on the correct data block in the correct SST file which can be parallelized if ReadOptions.async_io option is enabled.
+* Add new stat number_async_seek in PerfContext that indicates number of async calls made by seek to prefetch data.
+
+### Bug Fixes
 * RocksDB calls FileSystem::Poll API during FilePrefetchBuffer destruction which impacts performance as it waits for read requets completion which is not needed anymore. Calling FileSystem::AbortIO to abort those requests instead fixes that performance issue.
+* Fixed unnecessary block cache contention when queries within a MultiGet batch and across parallel batches access the same data block, which previously could cause severely degraded performance in this unusual case. (In more typical MultiGet cases, this fix is expected to yield a small or negligible performance improvement.)
+
+### Behavior changes
+* Enforce the existing contract of SingleDelete so that SingleDelete cannot be mixed with Delete because it leads to undefined behavior. Fix a number of unit tests that violate the contract but happen to pass.
+* ldb `--try_load_options` default to true if `--db` is specified and not creating a new DB, the user can still explicitly disable that by `--try_load_options=false` (or explicitly enable that by `--try_load_options`).
+* During Flush write or Compaction write/read, the WriteController is used to determine whether DB writes are stalled or slowed down. The priority (Env::IOPriority) can then be determined accordingly and be passed in IOOptions to the file system.
+
 
 ## 7.2.0 (04/15/2022)
 ### Bug Fixes
diff -pruN 7.2.2-5/include/rocksdb/advanced_options.h 7.3.1-2/include/rocksdb/advanced_options.h
--- 7.2.2-5/include/rocksdb/advanced_options.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/advanced_options.h	2022-06-08 21:08:16.000000000 +0000
@@ -100,8 +100,9 @@ struct CompressionOptions {
   //
   // The dictionary is created by sampling the SST file data. If
   // `zstd_max_train_bytes` is nonzero, the samples are passed through zstd's
-  // dictionary generator. Otherwise, the random samples are used directly as
-  // the dictionary.
+  // dictionary generator (see comments for option `use_zstd_dict_trainer` for
+  // detail on dictionary generator). If `zstd_max_train_bytes` is zero, the
+  // random samples are used directly as the dictionary.
   //
   // When compression dictionary is disabled, we compress and write each block
   // before buffering data for the next one. When compression dictionary is
@@ -173,6 +174,20 @@ struct CompressionOptions {
   // Default: 0 (unlimited)
   uint64_t max_dict_buffer_bytes;
 
+  // Use zstd trainer to generate dictionaries. When this option is set to true,
+  // zstd_max_train_bytes of training data sampled from max_dict_buffer_bytes
+  // buffered data will be passed to zstd dictionary trainer to generate a
+  // dictionary of size max_dict_bytes.
+  //
+  // When this option is false, zstd's API ZDICT_finalizeDictionary() will be
+  // called to generate dictionaries. zstd_max_train_bytes of training sampled
+  // data will be passed to this API. Using this API should save CPU time on
+  // dictionary training, but the compression ratio may not be as good as using
+  // a dictionary trainer.
+  //
+  // Default: true
+  bool use_zstd_dict_trainer;
+
   CompressionOptions()
       : window_bits(-14),
         level(kDefaultCompressionLevel),
@@ -181,11 +196,13 @@ struct CompressionOptions {
         zstd_max_train_bytes(0),
         parallel_threads(1),
         enabled(false),
-        max_dict_buffer_bytes(0) {}
+        max_dict_buffer_bytes(0),
+        use_zstd_dict_trainer(true) {}
   CompressionOptions(int wbits, int _lev, int _strategy,
                      uint32_t _max_dict_bytes, uint32_t _zstd_max_train_bytes,
                      uint32_t _parallel_threads, bool _enabled,
-                     uint64_t _max_dict_buffer_bytes)
+                     uint64_t _max_dict_buffer_bytes,
+                     bool _use_zstd_dict_trainer)
       : window_bits(wbits),
         level(_lev),
         strategy(_strategy),
@@ -193,7 +210,8 @@ struct CompressionOptions {
         zstd_max_train_bytes(_zstd_max_train_bytes),
         parallel_threads(_parallel_threads),
         enabled(_enabled),
-        max_dict_buffer_bytes(_max_dict_buffer_bytes) {}
+        max_dict_buffer_bytes(_max_dict_buffer_bytes),
+        use_zstd_dict_trainer(_use_zstd_dict_trainer) {}
 };
 
 // Temperature of a file. Used to pass to FileSystem for a different
diff -pruN 7.2.2-5/include/rocksdb/cache.h 7.3.1-2/include/rocksdb/cache.h
--- 7.2.2-5/include/rocksdb/cache.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/cache.h	2022-06-08 21:08:16.000000000 +0000
@@ -559,15 +559,15 @@ enum class CacheEntryRole {
   kIndexBlock,
   // Other kinds of block-based table block
   kOtherBlock,
-  // WriteBufferManager reservations to account for memtable usage
+  // WriteBufferManager's charge to account for its memtable usage
   kWriteBuffer,
-  // BlockBasedTableBuilder reservations to account for
-  // compression dictionary building buffer's memory usage
+  // Compression dictionary building buffer's charge to account for
+  // its memory usage
   kCompressionDictionaryBuildingBuffer,
-  // Filter reservations to account for
+  // Filter's charge to account for
   // (new) bloom and ribbon filter construction's memory usage
   kFilterConstruction,
-  // BlockBasedTableReader reservations to account for
+  // BlockBasedTableReader's charge to account for
   // its memory usage
   kBlockBasedTableReader,
   // Default bucket, for miscellaneous cache entries. Do not use for
diff -pruN 7.2.2-5/include/rocksdb/c.h 7.3.1-2/include/rocksdb/c.h
--- 7.2.2-5/include/rocksdb/c.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/c.h	2022-06-08 21:08:16.000000000 +0000
@@ -63,6 +63,7 @@ extern "C" {
 #endif
 
 #include <stdarg.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -438,6 +439,38 @@ extern ROCKSDB_LIBRARY_API void rocksdb_
     const size_t* keys_list_sizes, char** values_list,
     size_t* values_list_sizes, char** errs);
 
+// The MultiGet API that improves performance by batching operations
+// in the read path for greater efficiency. Currently, only the block based
+// table format with full filters are supported. Other table formats such
+// as plain table, block based table with block based filters and
+// partitioned indexes will still work, but will not get any performance
+// benefits.
+//
+// Note that all the keys passed to this API are restricted to a single
+// column family.
+//
+// Parameters -
+// db - the RocksDB instance.
+// options - ReadOptions
+// column_family - ColumnFamilyHandle* that the keys belong to. All the keys
+//                 passed to the API are restricted to a single column family
+// num_keys - Number of keys to lookup
+// keys_list - Pointer to C style array of keys with num_keys elements
+// keys_list_sizes - Pointer to C style array of the size of corresponding key
+//   in key_list with num_keys elements.
+// values - Pointer to C style array of PinnableSlices with num_keys elements
+// statuses - Pointer to C style array of Status with num_keys elements
+// sorted_input - If true, it means the input keys are already sorted by key
+//                order, so the MultiGet() API doesn't have to sort them
+//                again. If false, the keys will be copied and sorted
+//                internally by the API - the input array will not be
+//                modified
+extern ROCKSDB_LIBRARY_API void rocksdb_batched_multi_get_cf(
+    rocksdb_t* db, const rocksdb_readoptions_t* options,
+    rocksdb_column_family_handle_t* column_family, size_t num_keys,
+    const char* const* keys_list, const size_t* keys_list_sizes,
+    rocksdb_pinnableslice_t** values, char** errs, const bool sorted_input);
+
 // The value is only allocated (using malloc) and returned if it is found and
 // value_found isn't NULL. In that case the user is responsible for freeing it.
 extern ROCKSDB_LIBRARY_API unsigned char rocksdb_key_may_exist(
@@ -1008,6 +1041,12 @@ extern ROCKSDB_LIBRARY_API int
 rocksdb_options_get_compression_options_zstd_max_train_bytes(
     rocksdb_options_t* opt);
 extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t*, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
 rocksdb_options_set_compression_options_parallel_threads(rocksdb_options_t*,
                                                          int);
 extern ROCKSDB_LIBRARY_API int
@@ -1026,6 +1065,12 @@ extern ROCKSDB_LIBRARY_API void
 rocksdb_options_set_bottommost_compression_options_zstd_max_train_bytes(
     rocksdb_options_t*, int, unsigned char);
 extern ROCKSDB_LIBRARY_API void
+rocksdb_options_set_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t*, unsigned char, unsigned char);
+extern ROCKSDB_LIBRARY_API unsigned char
+rocksdb_options_get_bottommost_compression_options_use_zstd_dict_trainer(
+    rocksdb_options_t* opt);
+extern ROCKSDB_LIBRARY_API void
 rocksdb_options_set_bottommost_compression_options_max_dict_buffer_bytes(
     rocksdb_options_t*, uint64_t, unsigned char);
 extern ROCKSDB_LIBRARY_API void rocksdb_options_set_prefix_extractor(
@@ -1508,7 +1553,8 @@ enum {
   rocksdb_env_lock_file_nanos,
   rocksdb_env_unlock_file_nanos,
   rocksdb_env_new_logger_nanos,
-  rocksdb_total_metric_count = 68
+  rocksdb_number_async_seek,
+  rocksdb_total_metric_count = 69
 };
 
 extern ROCKSDB_LIBRARY_API void rocksdb_set_perf_level(int);
@@ -1766,6 +1812,8 @@ extern ROCKSDB_LIBRARY_API void rocksdb_
 
 extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru(
     size_t capacity);
+extern ROCKSDB_LIBRARY_API rocksdb_cache_t*
+rocksdb_cache_create_lru_with_strict_capacity_limit(size_t capacity);
 extern ROCKSDB_LIBRARY_API rocksdb_cache_t* rocksdb_cache_create_lru_opts(
     rocksdb_lru_cache_options_t*);
 extern ROCKSDB_LIBRARY_API void rocksdb_cache_destroy(rocksdb_cache_t* cache);
diff -pruN 7.2.2-5/include/rocksdb/cleanable.h 7.3.1-2/include/rocksdb/cleanable.h
--- 7.2.2-5/include/rocksdb/cleanable.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/cleanable.h	2022-06-08 21:08:16.000000000 +0000
@@ -19,11 +19,12 @@ class Cleanable {
   Cleanable(Cleanable&) = delete;
   Cleanable& operator=(Cleanable&) = delete;
 
+  // Executes all the registered cleanups
   ~Cleanable();
 
   // Move constructor and move assignment is allowed.
-  Cleanable(Cleanable&&);
-  Cleanable& operator=(Cleanable&&);
+  Cleanable(Cleanable&&) noexcept;
+  Cleanable& operator=(Cleanable&&) noexcept;
 
   // Clients are allowed to register function/arg1/arg2 triples that
   // will be invoked when this iterator is destroyed.
@@ -31,8 +32,14 @@ class Cleanable {
   // Note that unlike all of the preceding methods, this method is
   // not abstract and therefore clients should not override it.
   using CleanupFunction = void (*)(void* arg1, void* arg2);
+
+  // Add another Cleanup to the list
   void RegisterCleanup(CleanupFunction function, void* arg1, void* arg2);
+
+  // Move the cleanups owned by this Cleanable to another Cleanable, adding to
+  // any existing cleanups it has
   void DelegateCleanupsTo(Cleanable* other);
+
   // DoCleanup and also resets the pointers for reuse
   inline void Reset() {
     DoCleanup();
@@ -40,6 +47,8 @@ class Cleanable {
     cleanup_.next = nullptr;
   }
 
+  inline bool HasCleanups() { return cleanup_.function != nullptr; }
+
  protected:
   struct Cleanup {
     CleanupFunction function;
@@ -68,4 +77,52 @@ class Cleanable {
   }
 };
 
+// A copyable, reference-counted pointer to a simple Cleanable that only
+// performs registered cleanups after all copies are destroy. This is like
+// shared_ptr<Cleanable> but works more efficiently with wrapping the pointer
+// in an outer Cleanable (see RegisterCopyWith() and MoveAsCleanupTo()).
+// WARNING: if you create a reference cycle, for example:
+//   SharedCleanablePtr scp;
+//   scp.Allocate();
+//   scp.RegisterCopyWith(&*scp);
+// It will prevent cleanups from ever happening!
+class SharedCleanablePtr {
+ public:
+  // Empy/null pointer
+  SharedCleanablePtr() {}
+  // Copy and move constructors and assignment
+  SharedCleanablePtr(const SharedCleanablePtr& from);
+  SharedCleanablePtr(SharedCleanablePtr&& from) noexcept;
+  SharedCleanablePtr& operator=(const SharedCleanablePtr& from);
+  SharedCleanablePtr& operator=(SharedCleanablePtr&& from) noexcept;
+  // Destructor (decrement refcount if non-null)
+  ~SharedCleanablePtr();
+  // Create a new simple Cleanable and make this assign this pointer to it.
+  // (Reset()s first if necessary.)
+  void Allocate();
+  // Reset to empty/null (decrement refcount if previously non-null)
+  void Reset();
+  // Dereference to pointed-to Cleanable
+  Cleanable& operator*();
+  Cleanable* operator->();
+  // Get as raw pointer to Cleanable
+  Cleanable* get();
+
+  // Creates a (virtual) copy of this SharedCleanablePtr and registers its
+  // destruction with target, so that the cleanups registered with the
+  // Cleanable pointed to by this can only happen after the cleanups in the
+  // target Cleanable are run.
+  // No-op if this is empty (nullptr).
+  void RegisterCopyWith(Cleanable* target);
+
+  // Moves (virtually) this shared pointer to a new cleanup in the target.
+  // This is essentilly a move semantics version of RegisterCopyWith(), for
+  // performance optimization. No-op if this is empty (nullptr).
+  void MoveAsCleanupTo(Cleanable* target);
+
+ private:
+  struct Impl;
+  Impl* ptr_ = nullptr;
+};
+
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/include/rocksdb/compaction_filter.h 7.3.1-2/include/rocksdb/compaction_filter.h
--- 7.2.2-5/include/rocksdb/compaction_filter.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/compaction_filter.h	2022-06-08 21:08:16.000000000 +0000
@@ -43,6 +43,7 @@ class CompactionFilter : public Customiz
     kRemoveAndSkipUntil,
     kChangeBlobIndex,  // used internally by BlobDB.
     kIOError,          // used internally by BlobDB.
+    kPurge,            // used for keys that can only be SingleDelete'ed
     kUndetermined,
   };
 
diff -pruN 7.2.2-5/include/rocksdb/convenience.h 7.3.1-2/include/rocksdb/convenience.h
--- 7.2.2-5/include/rocksdb/convenience.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/convenience.h	2022-06-08 21:08:16.000000000 +0000
@@ -518,7 +518,8 @@ Status VerifySstFileChecksum(const Optio
 Status VerifySstFileChecksum(const Options& options,
                              const EnvOptions& env_options,
                              const ReadOptions& read_options,
-                             const std::string& file_path);
+                             const std::string& file_path,
+                             const SequenceNumber& largest_seqno = 0);
 #endif  // ROCKSDB_LITE
 
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/include/rocksdb/db.h 7.3.1-2/include/rocksdb/db.h
--- 7.2.2-5/include/rocksdb/db.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/db.h	2022-06-08 21:08:16.000000000 +0000
@@ -1440,39 +1440,6 @@ class DB {
   virtual Status EnableFileDeletions(bool force = true) = 0;
 
 #ifndef ROCKSDB_LITE
-  // GetLiveFiles followed by GetSortedWalFiles can generate a lossless backup
-
-  // Retrieve the list of all files in the database. The files are
-  // relative to the dbname and are not absolute paths. Despite being relative
-  // paths, the file names begin with "/". The valid size of the manifest file
-  // is returned in manifest_file_size. The manifest file is an ever growing
-  // file, but only the portion specified by manifest_file_size is valid for
-  // this snapshot. Setting flush_memtable to true does Flush before recording
-  // the live files. Setting flush_memtable to false is useful when we don't
-  // want to wait for flush which may have to wait for compaction to complete
-  // taking an indeterminate time.
-  //
-  // In case you have multiple column families, even if flush_memtable is true,
-  // you still need to call GetSortedWalFiles after GetLiveFiles to compensate
-  // for new data that arrived to already-flushed column families while other
-  // column families were flushing
-  virtual Status GetLiveFiles(std::vector<std::string>&,
-                              uint64_t* manifest_file_size,
-                              bool flush_memtable = true) = 0;
-
-  // Retrieve the sorted list of all wal files with earliest file first
-  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
-
-  // Retrieve information about the current wal file
-  //
-  // Note that the log might have rolled after this call in which case
-  // the current_log_file would not point to the current log file.
-  //
-  // Additionally, for the sake of optimization current_log_file->StartSequence
-  // would always be set to 0
-  virtual Status GetCurrentWalFile(
-      std::unique_ptr<LogFile>* current_log_file) = 0;
-
   // Retrieves the creation time of the oldest file in the DB.
   // This API only works if max_open_files = -1, if it is not then
   // Status returned is Status::NotSupported()
@@ -1517,26 +1484,30 @@ class DB {
   // path relative to the db directory. eg. 000001.sst, /archive/000003.log
   virtual Status DeleteFile(std::string name) = 0;
 
-  // Returns a list of all table files with their level, start key
-  // and end key
+  // Obtains a list of all live table (SST) files and how they fit into the
+  // LSM-trees, such as column family, level, key range, etc.
+  // This builds a de-normalized form of GetAllColumnFamilyMetaData().
+  // For information about all files in a DB, use GetLiveFilesStorageInfo().
   virtual void GetLiveFilesMetaData(
       std::vector<LiveFileMetaData>* /*metadata*/) {}
 
-  // Return a list of all table and blob files checksum info.
+  // Return a list of all table (SST) and blob files checksum info.
   // Note: This function might be of limited use because it cannot be
-  // synchronized with GetLiveFiles.
+  // synchronized with other "live files" APIs. GetLiveFilesStorageInfo()
+  // is recommended instead.
   virtual Status GetLiveFilesChecksumInfo(FileChecksumList* checksum_list) = 0;
 
-  // EXPERIMENTAL: This function is not yet feature-complete.
   // Get information about all live files that make up a DB, for making
   // live copies (Checkpoint, backups, etc.) or other storage-related purposes.
-  // Use DisableFileDeletions() before and EnableFileDeletions() after to
-  // preserve the files for live copy.
+  // If creating a live copy, use DisableFileDeletions() before and
+  // EnableFileDeletions() after to prevent deletions.
+  // For LSM-tree metadata, use Get*MetaData() functions instead.
   virtual Status GetLiveFilesStorageInfo(
       const LiveFilesStorageInfoOptions& opts,
       std::vector<LiveFileStorageInfo>* files) = 0;
 
-  // Obtains the meta data of the specified column family of the DB.
+  // Obtains the LSM-tree meta data of the specified column family of the DB,
+  // including metadata for each live table (SST) file in that column family.
   virtual void GetColumnFamilyMetaData(ColumnFamilyHandle* /*column_family*/,
                                        ColumnFamilyMetaData* /*metadata*/) {}
 
@@ -1545,12 +1516,43 @@ class DB {
     GetColumnFamilyMetaData(DefaultColumnFamily(), metadata);
   }
 
-  // Obtains the meta data of all column families for the DB.
-  // The returned map contains one entry for each column family indexed by the
-  // name of the column family.
+  // Obtains the LSM-tree meta data of all column families of the DB,
+  // including metadata for each live table (SST) file in the DB.
   virtual void GetAllColumnFamilyMetaData(
       std::vector<ColumnFamilyMetaData>* /*metadata*/) {}
 
+  // Retrieve the list of all files in the database except WAL files. The files
+  // are relative to the dbname (or db_paths/cf_paths), not absolute paths.
+  // (Not recommended with db_paths/cf_paths because that information is not
+  // returned.) Despite being relative paths, the file names begin with "/".
+  // The valid size of the manifest file is returned in manifest_file_size.
+  // The manifest file is an ever growing file, but only the portion specified
+  // by manifest_file_size is valid for this snapshot. Setting flush_memtable
+  // to true does Flush before recording the live files. Setting flush_memtable
+  // to false is useful when we don't want to wait for flush which may have to
+  // wait for compaction to complete taking an indeterminate time.
+  //
+  // NOTE: Although GetLiveFiles() followed by GetSortedWalFiles() can generate
+  // a lossless backup, GetLiveFilesStorageInfo() is strongly recommended
+  // instead, because it ensures a single consistent view of all files is
+  // captured in one call.
+  virtual Status GetLiveFiles(std::vector<std::string>&,
+                              uint64_t* manifest_file_size,
+                              bool flush_memtable = true) = 0;
+
+  // Retrieve the sorted list of all wal files with earliest file first
+  virtual Status GetSortedWalFiles(VectorLogPtr& files) = 0;
+
+  // Retrieve information about the current wal file
+  //
+  // Note that the log might have rolled after this call in which case
+  // the current_log_file would not point to the current log file.
+  //
+  // Additionally, for the sake of optimization current_log_file->StartSequence
+  // would always be set to 0
+  virtual Status GetCurrentWalFile(
+      std::unique_ptr<LogFile>* current_log_file) = 0;
+
   // IngestExternalFile() will load a list of external SST files (1) into the DB
   // Two primary modes are supported:
   // - Duplicate keys in the new files will overwrite exiting keys (default)
diff -pruN 7.2.2-5/include/rocksdb/env.h 7.3.1-2/include/rocksdb/env.h
--- 7.2.2-5/include/rocksdb/env.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/env.h	2022-06-08 21:08:16.000000000 +0000
@@ -796,7 +796,7 @@ class RandomAccessFile {
   // should return after all reads have completed. The reads will be
   // non-overlapping. If the function return Status is not ok, status of
   // individual requests will be ignored and return status will be assumed
-  // for all read requests. The function return status is only meant for any
+  // for all read requests. The function return status is only meant for
   // any errors that occur before even processing specific read requests
   virtual Status MultiRead(ReadRequest* reqs, size_t num_reqs) {
     assert(reqs != nullptr);
diff -pruN 7.2.2-5/include/rocksdb/file_system.h 7.3.1-2/include/rocksdb/file_system.h
--- 7.2.2-5/include/rocksdb/file_system.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/file_system.h	2022-06-08 21:08:16.000000000 +0000
@@ -53,6 +53,7 @@ struct ConfigOptions;
 using AccessPattern = RandomAccessFile::AccessPattern;
 using FileAttributes = Env::FileAttributes;
 
+// DEPRECATED
 // Priority of an IO request. This is a hint and does not guarantee any
 // particular QoS.
 // IO_LOW - Typically background reads/writes such as compaction/flush
@@ -86,6 +87,7 @@ struct IOOptions {
   // Timeout for the operation in microseconds
   std::chrono::microseconds timeout;
 
+  // DEPRECATED
   // Priority - high or low
   IOPriority prio;
 
diff -pruN 7.2.2-5/include/rocksdb/iostats_context.h 7.3.1-2/include/rocksdb/iostats_context.h
--- 7.2.2-5/include/rocksdb/iostats_context.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/iostats_context.h	2022-06-08 21:08:16.000000000 +0000
@@ -81,10 +81,8 @@ struct IOStatsContext {
 // If RocksDB is compiled with -DNIOSTATS_CONTEXT, then a pointer to a global,
 // non-thread-local IOStatsContext object will be returned. Attempts to update
 // this object will be ignored, and reading from it will also be no-op.
-// Otherwise,
-// a) if thread-local is supported on the platform, then a pointer to
-//    a thread-local IOStatsContext object will be returned.
-// b) if thread-local is NOT supported, then compilation will fail.
+// Otherwise, a pointer to a thread-local IOStatsContext object will be
+// returned.
 //
 // This function never returns nullptr.
 IOStatsContext* get_iostats_context();
diff -pruN 7.2.2-5/include/rocksdb/memtablerep.h 7.3.1-2/include/rocksdb/memtablerep.h
--- 7.2.2-5/include/rocksdb/memtablerep.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/memtablerep.h	2022-06-08 21:08:16.000000000 +0000
@@ -300,6 +300,9 @@ class MemTableRepFactory : public Custom
   static Status CreateFromString(const ConfigOptions& config_options,
                                  const std::string& id,
                                  std::unique_ptr<MemTableRepFactory>* factory);
+  static Status CreateFromString(const ConfigOptions& config_options,
+                                 const std::string& id,
+                                 std::shared_ptr<MemTableRepFactory>* factory);
 
   virtual MemTableRep* CreateMemTableRep(const MemTableRep::KeyComparator&,
                                          Allocator*, const SliceTransform*,
diff -pruN 7.2.2-5/include/rocksdb/options.h 7.3.1-2/include/rocksdb/options.h
--- 7.2.2-5/include/rocksdb/options.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/options.h	2022-06-08 21:08:16.000000000 +0000
@@ -492,6 +492,23 @@ struct DBOptions {
   // Default: false
   bool track_and_verify_wals_in_manifest = false;
 
+  // EXPERIMENTAL: This API/behavior is subject to change
+  // If true, during DB-open it verifies the SST unique id between MANIFEST
+  // and SST properties, which is to make sure the SST is not overwritten or
+  // misplaced. A corruption error will be reported if mismatch detected, but
+  // only when MANIFEST tracks the unique id, which starts from version 7.3.
+  // The unique id is an internal unique id and subject to change.
+  //
+  // Note:
+  // 1. if enabled, it opens every SST files during DB open to read the unique
+  //    id from SST properties, so it's recommended to have `max_open_files=-1`
+  //    to pre-open the SST files before the verification.
+  // 2. existing SST files won't have its unique_id tracked in MANIFEST, then
+  //    verification will be skipped.
+  //
+  // Default: false
+  bool verify_sst_unique_id_in_manifest = false;
+
   // Use the specified object to interact with the environment,
   // e.g. to read/write files, schedule background work, etc. In the near
   // future, support for doing storage operations such as read/write files
@@ -552,7 +569,7 @@ struct DBOptions {
   // compaction. For universal-style compaction, you can usually set it to -1.
   //
   // A high value or -1 for this option can cause high memory usage.
-  // See BlockBasedTableOptions::reserve_table_reader_memory to constrain
+  // See BlockBasedTableOptions::cache_usage_options to constrain
   // memory usage in case of block based table format.
   //
   // Default: -1
@@ -767,6 +784,14 @@ struct DBOptions {
 
   // Allow the OS to mmap file for reading sst tables.
   // Not recommended for 32-bit OS.
+  // When the option is set to true and compression is disabled, the blocks
+  // will not be copied and will be read directly from the mmap-ed memory
+  // area, and the block will not be inserted into the block cache. However,
+  // checksums will still be checked if ReadOptions.verify_checksums is set
+  // to be true. It means a checksum check every time a block is read, more
+  // than the setup where the option is set to false and the block cache is
+  // used. The common use of the options is to run RocksDB on ramfs, where
+  // checksum verification is usually not needed.
   // Default: false
   bool allow_mmap_reads = false;
 
@@ -1142,8 +1167,7 @@ struct DBOptions {
 #endif  // ROCKSDB_LITE
 
   // If true, then DB::Open / CreateColumnFamily / DropColumnFamily
-  // / SetOptions will fail if options file is not detected or properly
-  // persisted.
+  // SetOptions will fail if options file is not properly persisted.
   //
   // DEFAULT: false
   bool fail_if_options_file_error = false;
@@ -1329,6 +1353,19 @@ struct DBOptions {
   //
   // Default: kNonVolatileBlockTier
   CacheTier lowest_used_cache_tier = CacheTier::kNonVolatileBlockTier;
+
+  // If set to false, when compaction or flush sees a SingleDelete followed by
+  // a Delete for the same user key, compaction job will not fail.
+  // Otherwise, compaction job will fail.
+  // This is a temporary option to help existing use cases migrate, and
+  // will be removed in a future release.
+  // Warning: do not set to false unless you are trying to migrate existing
+  // data in which the contract of single delete
+  // (https://github.com/facebook/rocksdb/wiki/Single-Delete) is not enforced,
+  // thus has Delete mixed with SingleDelete for the same user key. Violation
+  // of the contract leads to undefined behaviors with high possibility of data
+  // inconsistency, e.g. deleted old data become visible again, etc.
+  bool enforce_single_del_contracts = true;
 };
 
 // Options to control the behavior of a database (passed to DB::Open)
@@ -1952,6 +1989,11 @@ struct CompactionServiceOptionsOverride
   // returned to CompactionService primary host, to collect that, the user needs
   // to set it here.
   std::shared_ptr<Statistics> statistics = nullptr;
+
+  // Only compaction generated SST files use this user defined table properties
+  // collector.
+  std::vector<std::shared_ptr<TablePropertiesCollectorFactory>>
+      table_properties_collector_factories;
 };
 
 struct OpenAndCompactOptions {
diff -pruN 7.2.2-5/include/rocksdb/perf_context.h 7.3.1-2/include/rocksdb/perf_context.h
--- 7.2.2-5/include/rocksdb/perf_context.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/perf_context.h	2022-06-08 21:08:16.000000000 +0000
@@ -229,6 +229,8 @@ struct PerfContext {
   // Time spent in decrypting data. Populated when EncryptedEnv is used.
   uint64_t decrypt_data_nanos;
 
+  uint64_t number_async_seek;
+
   std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr;
   bool per_level_perf_context_enabled = false;
 };
diff -pruN 7.2.2-5/include/rocksdb/statistics.h 7.3.1-2/include/rocksdb/statistics.h
--- 7.2.2-5/include/rocksdb/statistics.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/statistics.h	2022-06-08 21:08:16.000000000 +0000
@@ -543,6 +543,9 @@ enum Histograms : uint32_t {
   // Number of prefetched bytes discarded by RocksDB.
   PREFETCHED_BYTES_DISCARDED,
 
+  // Number of IOs issued in parallel in a MultiGet batch
+  MULTIGET_IO_BATCH_SIZE,
+
   HISTOGRAM_ENUM_MAX,
 };
 
diff -pruN 7.2.2-5/include/rocksdb/table.h 7.3.1-2/include/rocksdb/table.h
--- 7.2.2-5/include/rocksdb/table.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/table.h	2022-06-08 21:08:16.000000000 +0000
@@ -22,6 +22,7 @@
 #include <string>
 #include <unordered_map>
 
+#include "rocksdb/cache.h"
 #include "rocksdb/customizable.h"
 #include "rocksdb/env.h"
 #include "rocksdb/options.h"
@@ -104,6 +105,23 @@ struct MetadataCacheOptions {
   PinningTier unpartitioned_pinning = PinningTier::kFallback;
 };
 
+struct CacheEntryRoleOptions {
+  enum class Decision {
+    kEnabled,
+    kDisabled,
+    kFallback,
+  };
+  Decision charged = Decision::kFallback;
+  bool operator==(const CacheEntryRoleOptions& other) const {
+    return charged == other.charged;
+  }
+};
+
+struct CacheUsageOptions {
+  CacheEntryRoleOptions options;
+  std::map<CacheEntryRole, CacheEntryRoleOptions> options_overrides;
+};
+
 // For advanced user only
 struct BlockBasedTableOptions {
   static const char* kName() { return "BlockTableOptions"; };
@@ -287,47 +305,80 @@ struct BlockBasedTableOptions {
   // separately
   uint64_t metadata_block_size = 4096;
 
-  // If true, a dynamically updating charge to block cache, loosely based
-  // on the actual memory usage of table building, will occur to account
-  // the memory, if block cache available.
-  //
-  // Charged memory usage includes:
-  // 1. Bloom Filter (format_version >= 5) and Ribbon Filter construction
-  // 2. More to come...
-  //
-  // Note:
-  // 1. Bloom Filter (format_version >= 5) and Ribbon Filter construction
-  //
-  // If additional temporary memory of Ribbon Filter uses up too much memory
-  // relative to the avaible space left in the block cache
+  // `cache_usage_options` allows users to specify the default
+  // options (`cache_usage_options.options`) and the overriding
+  // options (`cache_usage_options.options_overrides`)
+  // for different `CacheEntryRole` under various features related to cache
+  // usage.
+  //
+  // For a certain `CacheEntryRole role` and a certain feature `f` of
+  // `CacheEntryRoleOptions`:
+  // 1. If `options_overrides` has an entry for `role` and
+  // `options_overrides[role].f != kFallback`, we use
+  // `options_overrides[role].f`
+  // 2. Otherwise, if `options[role].f != kFallback`, we use `options[role].f`
+  // 3. Otherwise, we follow the compatible existing behavior for `f` (see
+  // each feature's comment for more)
+  //
+  // `cache_usage_options` currently supports specifying options for the
+  // following features:
+  //
+  // 1. Memory charging to block cache (`CacheEntryRoleOptions::charged`)
+  // Memory charging is a feature of accounting memory usage of specific area
+  // (represented by `CacheEntryRole`) toward usage in block cache (if
+  // available), by updating a dynamical charge to the block cache loosely based
+  // on the actual memory usage of that area.
+  //
+  // (a) CacheEntryRole::kCompressionDictionaryBuildingBuffer
+  // (i) If kEnabled:
+  // Charge memory usage of the buffered data used as training samples for
+  // dictionary compression.
+  // If such memory usage exceeds the avaible space left in the block cache
   // at some point (i.e, causing a cache full under
-  // LRUCacheOptions::strict_capacity_limit = true), construction will fall back
-  // to Bloom Filter.
+  // `LRUCacheOptions::strict_capacity_limit` = true), the data will then be
+  // unbuffered.
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kEnabled.
+  //
+  // (b) CacheEntryRole::kFilterConstruction
+  // (i) If kEnabled:
+  // Charge memory usage of Bloom Filter
+  // (format_version >= 5) and Ribbon Filter construction.
+  // If additional temporary memory of Ribbon Filter exceeds the avaible
+  // space left in the block cache at some point (i.e, causing a cache full
+  // under `LRUCacheOptions::strict_capacity_limit` = true),
+  // construction will fall back to Bloom Filter.
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kDisabled.
+  //
+  // (c) CacheEntryRole::kBlockBasedTableReader
+  // (i) If kEnabled:
+  // Charge memory usage of table properties +
+  // index block/filter block/uncompression dictionary (when stored in table
+  // reader i.e, BlockBasedTableOptions::cache_index_and_filter_blocks ==
+  // false) + some internal data structures during table reader creation.
+  // If such a table reader exceeds
+  // the avaible space left in the block cache at some point (i.e, causing
+  // a cache full under `LRUCacheOptions::strict_capacity_limit` = true),
+  // creation will fail with Status::MemoryLimit().
+  // (ii) If kDisabled:
+  // Does not charge the memory usage mentioned above.
+  // (iii) Compatible existing behavior:
+  // Same as kDisabled.
+  //
+  // (d) Other CacheEntryRole
+  // Not supported.
+  // `Status::kNotSupported` will be returned if
+  // `CacheEntryRoleOptions::charged` is set to {`kEnabled`, `kDisabled`}.
   //
-  // Default: false
-  bool reserve_table_builder_memory = false;
-
-  // If true, a dynamically updating charge to block cache, loosely based
-  // on the actual memory usage of table reader, will occur to account
-  // the memory, if block cache available.
-  //
-  // Charged memory usage includes:
-  // 1. Table properties
-  // 2. Index block/Filter block/Uncompression dictionary if stored in table
-  // reader (i.e, BlockBasedTableOptions::cache_index_and_filter_blocks ==
-  // false)
-  // 3. Some internal data structures
-  // 4. More to come...
-  //
-  // Note:
-  // If creation of a table reader uses up too much memory
-  // relative to the avaible space left in the block cache
-  // at some point (i.e, causing a cache full under
-  // LRUCacheOptions::strict_capacity_limit = true), such creation will fail
-  // with Status::MemoryLimit().
   //
-  // Default: false
-  bool reserve_table_reader_memory = false;
+  // 2. More to come ...
+  //
+  CacheUsageOptions cache_usage_options;
 
   // Note: currently this option requires kTwoLevelIndexSearch to be set as
   // well.
diff -pruN 7.2.2-5/include/rocksdb/thread_status.h 7.3.1-2/include/rocksdb/thread_status.h
--- 7.2.2-5/include/rocksdb/thread_status.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/thread_status.h	2022-06-08 21:08:16.000000000 +0000
@@ -22,8 +22,7 @@
 
 #include "rocksdb/rocksdb_namespace.h"
 
-#if !defined(ROCKSDB_LITE) && !defined(NROCKSDB_THREAD_STATUS) && \
-    defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
+#if !defined(ROCKSDB_LITE) && !defined(NROCKSDB_THREAD_STATUS)
 #define ROCKSDB_USING_THREAD_STATUS
 #endif
 
diff -pruN 7.2.2-5/include/rocksdb/unique_id.h 7.3.1-2/include/rocksdb/unique_id.h
--- 7.2.2-5/include/rocksdb/unique_id.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/unique_id.h	2022-06-08 21:08:16.000000000 +0000
@@ -9,9 +9,7 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-// EXPERIMENTAL: This API is subject to change
-//
-// Computes a stable, universally unique 192-bit (24 binary char) identifier
+// Computes a stable, universally unique 128-bit (16 binary char) identifier
 // for an SST file from TableProperties. This is supported for table (SST)
 // files created with RocksDB 6.24 and later. NotSupported will be returned
 // for other cases. The first 16 bytes (128 bits) is of sufficient quality
@@ -22,22 +20,33 @@ namespace ROCKSDB_NAMESPACE {
 // .c_str() on the result will often result in information loss and very
 // poor uniqueness probability.
 //
-// More detail: the first 128 bits are *guaranteed* unique for SST files
+// More detail: the value is *guaranteed* unique for SST files
 // generated in the same process (even different DBs, RocksDB >= 6.26),
 // and first 128 bits are guaranteed not "all zeros" (RocksDB >= 6.26)
 // so that the "all zeros" value can be used reliably for a null ID.
-// Assuming one generates many SST files in the lifetime of each process,
-// the probability of collision between processes is "better than
-// random": if processes generate n SST files on average, we expect to
-// generate roughly 2^64 * sqrt(n) files before first collision in the
-// first 128 bits. See https://github.com/pdillinger/unique_id
-// Using the full 192 bits, we expect to generate roughly 2^96 * sqrt(n)
-// files before first collision.
+// These IDs are more than sufficient for SST uniqueness within each of
+// many DBs or hosts. For an extreme example assuming random IDs, consider
+// 10^9 hosts each with 10^9 live SST files being replaced at 10^6/second.
+// Such a service would need to run for 10 million years to see an ID
+// collision among live SST files on any host.
+//
+// And assuming one generates many SST files in the lifetime of each process,
+// the probability of ID collisions is much "better than random"; see
+// https://github.com/pdillinger/unique_id
 Status GetUniqueIdFromTableProperties(const TableProperties &props,
                                       std::string *out_id);
 
-// EXPERIMENTAL: This API is subject to change
-//
+// Computes a 192-bit (24 binary char) stable, universally unique ID
+// with an extra 64 bits of uniqueness compared to the standard ID. It is only
+// appropriate to use this ID instead of the 128-bit ID if ID collisions
+// between files among any hosts in a vast fleet is a problem, such as a shared
+// global namespace for SST file backups. Under this criteria, the extreme
+// example above would expect a global file ID collision every 4 days with
+// 128-bit IDs (using some worst-case assumptions about process lifetime).
+// It's 10^17 years with 192-bit IDs.
+Status GetExtendedUniqueIdFromTableProperties(const TableProperties &props,
+                                              std::string *out_id);
+
 // Converts a binary string (unique id) to hexadecimal, with each 64 bits
 // separated by '-', e.g. 6474DF650323BDF0-B48E64F3039308CA-17284B32E7F7444B
 // Also works on unique id prefix.
diff -pruN 7.2.2-5/include/rocksdb/utilities/customizable_util.h 7.3.1-2/include/rocksdb/utilities/customizable_util.h
--- 7.2.2-5/include/rocksdb/utilities/customizable_util.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/utilities/customizable_util.h	2022-06-08 21:08:16.000000000 +0000
@@ -2,6 +2,15 @@
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
+//
+// The methods in this file are used to instantiate new Customizable
+// instances of objects.  These methods are most typically used by
+// the "CreateFromString" method of a customizable class.
+// If not developing a new Type of customizable class, you probably
+// do not need the methods in this file.
+//
+// See https://github.com/facebook/rocksdb/wiki/RocksDB-Configurable-Objects
+// for more information on how to develop and use customizable objects
 
 #pragma once
 #include <functional>
diff -pruN 7.2.2-5/include/rocksdb/utilities/ldb_cmd.h 7.3.1-2/include/rocksdb/utilities/ldb_cmd.h
--- 7.2.2-5/include/rocksdb/utilities/ldb_cmd.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/utilities/ldb_cmd.h	2022-06-08 21:08:16.000000000 +0000
@@ -70,6 +70,7 @@ class LDBCommand {
   static const std::string ARG_BLOB_GARBAGE_COLLECTION_FORCE_THRESHOLD;
   static const std::string ARG_BLOB_COMPACTION_READAHEAD_SIZE;
   static const std::string ARG_DECODE_BLOB_INDEX;
+  static const std::string ARG_DUMP_UNCOMPRESSED_BLOBS;
 
   struct ParsedParams {
     std::string cmd;
@@ -287,6 +288,9 @@ class LDBCommand {
   bool IsValueHex(const std::map<std::string, std::string>& options,
                   const std::vector<std::string>& flags);
 
+  bool IsTryLoadOptions(const std::map<std::string, std::string>& options,
+                        const std::vector<std::string>& flags);
+
   /**
    * Converts val to a boolean.
    * val must be either true or false (case insensitive).
diff -pruN 7.2.2-5/include/rocksdb/utilities/object_registry.h 7.3.1-2/include/rocksdb/utilities/object_registry.h
--- 7.2.2-5/include/rocksdb/utilities/object_registry.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/utilities/object_registry.h	2022-06-08 21:08:16.000000000 +0000
@@ -13,6 +13,7 @@
 #include <mutex>
 #include <string>
 #include <unordered_map>
+#include <unordered_set>
 #include <vector>
 
 #include "rocksdb/status.h"
@@ -217,6 +218,18 @@ class ObjectLibrary {
   // @param num_types returns how many unique types are registered.
   size_t GetFactoryCount(size_t* num_types) const;
 
+  // Returns the number of factories registered for this library
+  // for the input type.
+  // @param num_types returns how many unique types are registered.
+  size_t GetFactoryCount(const std::string& type) const;
+
+  // Returns the registered factory names for the input type
+  // names is updated to include the names for the type
+  void GetFactoryNames(const std::string& type,
+                       std::vector<std::string>* names) const;
+
+  void GetFactoryTypes(std::unordered_set<std::string>* types) const;
+
   void Dump(Logger* logger) const;
 
   // Registers the factory with the library for the name.
@@ -497,6 +510,18 @@ class ObjectRegistry {
     }
   }
 
+  // Returns the number of factories registered for this library
+  // for the input type.
+  // @param num_types returns how many unique types are registered.
+  size_t GetFactoryCount(const std::string& type) const;
+
+  // Returns the names of registered factories for the input type.
+  // names is updated to include the names for the type
+  void GetFactoryNames(const std::string& type,
+                       std::vector<std::string>* names) const;
+
+  void GetFactoryTypes(std::unordered_set<std::string>* types) const;
+
   // Dump the contents of the registry to the logger
   void Dump(Logger* logger) const;
 
diff -pruN 7.2.2-5/include/rocksdb/utilities/options_type.h 7.3.1-2/include/rocksdb/utilities/options_type.h
--- 7.2.2-5/include/rocksdb/utilities/options_type.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/utilities/options_type.h	2022-06-08 21:08:16.000000000 +0000
@@ -2,6 +2,15 @@
 //  This source code is licensed under both the GPLv2 (found in the
 //  COPYING file in the root directory) and Apache 2.0 License
 //  (found in the LICENSE.Apache file in the root directory).
+//
+// The OptionTypeInfo and related classes provide a framework for
+// configuring and validating RocksDB classes via the Options framework.
+// This file is part of the public API to allow developers who wish to
+// write their own extensions and plugins to take use the Options
+// framework in their custom implementations.
+//
+// See https://github.com/facebook/rocksdb/wiki/RocksDB-Configurable-Objects
+// for more information on how to develop and use custom extensions
 
 #pragma once
 
@@ -15,6 +24,8 @@
 
 namespace ROCKSDB_NAMESPACE {
 class OptionTypeInfo;
+struct ColumnFamilyOptions;
+struct DBOptions;
 
 // The underlying "class/type" of the option.
 // This enum is used to determine how the option should
@@ -45,6 +56,7 @@ enum class OptionType {
   kCustomizable,
   kEncodedString,
   kTemperature,
+  kArray,
   kUnknown,
 };
 
@@ -144,6 +156,24 @@ bool SerializeEnum(const std::unordered_
   return false;
 }
 
+template <typename T, size_t kSize>
+Status ParseArray(const ConfigOptions& config_options,
+                  const OptionTypeInfo& elem_info, char separator,
+                  const std::string& name, const std::string& value,
+                  std::array<T, kSize>* result);
+
+template <typename T, size_t kSize>
+Status SerializeArray(const ConfigOptions& config_options,
+                      const OptionTypeInfo& elem_info, char separator,
+                      const std::string& name, const std::array<T, kSize>& vec,
+                      std::string* value);
+
+template <typename T, size_t kSize>
+bool ArraysAreEqual(const ConfigOptions& config_options,
+                    const OptionTypeInfo& elem_info, const std::string& name,
+                    const std::array<T, kSize>& array1,
+                    const std::array<T, kSize>& array2, std::string* mismatch);
+
 template <typename T>
 Status ParseVector(const ConfigOptions& config_options,
                    const OptionTypeInfo& elem_info, char separator,
@@ -196,6 +226,16 @@ using EqualsFunc = std::function<bool(
     const ConfigOptions& /*opts*/, const std::string& /*name*/,
     const void* /*addr1*/, const void* /*addr2*/, std::string* mismatch)>;
 
+// Function for preparing/initializing an option.
+using PrepareFunc =
+    std::function<Status(const ConfigOptions& /*opts*/,
+                         const std::string& /*name*/, void* /*addr*/)>;
+
+// Function for validating an option.
+using ValidateFunc = std::function<Status(
+    const DBOptions& /*db_opts*/, const ColumnFamilyOptions& /*cf_opts*/,
+    const std::string& /*name*/, const void* /*addr*/)>;
+
 // A struct for storing constant option information such as option name,
 // option type, and offset.
 class OptionTypeInfo {
@@ -259,8 +299,9 @@ class OptionTypeInfo {
   static OptionTypeInfo Enum(
       int offset, const std::unordered_map<std::string, T>* const map,
       OptionTypeFlags flags = OptionTypeFlags::kNone) {
-    return OptionTypeInfo(
-        offset, OptionType::kEnum, OptionVerificationType::kNormal, flags,
+    OptionTypeInfo info(offset, OptionType::kEnum,
+                        OptionVerificationType::kNormal, flags);
+    info.SetParseFunc(
         // Uses the map argument to convert the input string into
         // its corresponding enum value.  If value is found in the map,
         // addr is updated to the corresponding map entry.
@@ -275,7 +316,8 @@ class OptionTypeInfo {
           } else {
             return Status::InvalidArgument("No mapping for enum ", name);
           }
-        },
+        });
+    info.SetSerializeFunc(
         // Uses the map argument to convert the input enum into
         // its corresponding string value.  If enum value is found in the map,
         // value is updated to the corresponding string value in the map.
@@ -291,7 +333,8 @@ class OptionTypeInfo {
           } else {
             return Status::InvalidArgument("No mapping for enum ", name);
           }
-        },
+        });
+    info.SetEqualsFunc(
         // Casts addr1 and addr2 to the enum type and returns true if
         // they are equal, false otherwise.
         [](const ConfigOptions&, const std::string&, const void* addr1,
@@ -299,6 +342,7 @@ class OptionTypeInfo {
           return (*static_cast<const T*>(addr1) ==
                   *static_cast<const T*>(addr2));
         });
+    return info;
   }  // End OptionTypeInfo::Enum
 
   // Creates an OptionTypeInfo for a Struct type.  Structs have a
@@ -327,21 +371,23 @@ class OptionTypeInfo {
       const std::string& struct_name,
       const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
       int offset, OptionVerificationType verification, OptionTypeFlags flags) {
-    return OptionTypeInfo(
-        offset, OptionType::kStruct, verification, flags,
+    OptionTypeInfo info(offset, OptionType::kStruct, verification, flags);
+    info.SetParseFunc(
         // Parses the struct and updates the fields at addr
         [struct_name, struct_map](const ConfigOptions& opts,
                                   const std::string& name,
                                   const std::string& value, void* addr) {
           return ParseStruct(opts, struct_name, struct_map, name, value, addr);
-        },
+        });
+    info.SetSerializeFunc(
         // Serializes the struct options into value
         [struct_name, struct_map](const ConfigOptions& opts,
                                   const std::string& name, const void* addr,
                                   std::string* value) {
           return SerializeStruct(opts, struct_name, struct_map, name, addr,
                                  value);
-        },
+        });
+    info.SetEqualsFunc(
         // Compares the struct fields of addr1 and addr2 for equality
         [struct_name, struct_map](const ConfigOptions& opts,
                                   const std::string& name, const void* addr1,
@@ -349,26 +395,48 @@ class OptionTypeInfo {
           return StructsAreEqual(opts, struct_name, struct_map, name, addr1,
                                  addr2, mismatch);
         });
+    return info;
   }
   static OptionTypeInfo Struct(
       const std::string& struct_name,
       const std::unordered_map<std::string, OptionTypeInfo>* struct_map,
       int offset, OptionVerificationType verification, OptionTypeFlags flags,
       const ParseFunc& parse_func) {
-    return OptionTypeInfo(
-        offset, OptionType::kStruct, verification, flags, parse_func,
-        [struct_name, struct_map](const ConfigOptions& opts,
-                                  const std::string& name, const void* addr,
-                                  std::string* value) {
-          return SerializeStruct(opts, struct_name, struct_map, name, addr,
-                                 value);
-        },
-        [struct_name, struct_map](const ConfigOptions& opts,
-                                  const std::string& name, const void* addr1,
-                                  const void* addr2, std::string* mismatch) {
-          return StructsAreEqual(opts, struct_name, struct_map, name, addr1,
-                                 addr2, mismatch);
-        });
+    OptionTypeInfo info(
+        Struct(struct_name, struct_map, offset, verification, flags));
+    return info.SetParseFunc(parse_func);
+  }
+
+  template <typename T, size_t kSize>
+  static OptionTypeInfo Array(int _offset, OptionVerificationType _verification,
+                              OptionTypeFlags _flags,
+                              const OptionTypeInfo& elem_info,
+                              char separator = ':') {
+    OptionTypeInfo info(_offset, OptionType::kArray, _verification, _flags);
+    info.SetParseFunc([elem_info, separator](
+                          const ConfigOptions& opts, const std::string& name,
+                          const std::string& value, void* addr) {
+      auto result = static_cast<std::array<T, kSize>*>(addr);
+      return ParseArray<T, kSize>(opts, elem_info, separator, name, value,
+                                  result);
+    });
+    info.SetSerializeFunc([elem_info, separator](const ConfigOptions& opts,
+                                                 const std::string& name,
+                                                 const void* addr,
+                                                 std::string* value) {
+      const auto& array = *(static_cast<const std::array<T, kSize>*>(addr));
+      return SerializeArray<T, kSize>(opts, elem_info, separator, name, array,
+                                      value);
+    });
+    info.SetEqualsFunc([elem_info](const ConfigOptions& opts,
+                                   const std::string& name, const void* addr1,
+                                   const void* addr2, std::string* mismatch) {
+      const auto& array1 = *(static_cast<const std::array<T, kSize>*>(addr1));
+      const auto& array2 = *(static_cast<const std::array<T, kSize>*>(addr2));
+      return ArraysAreEqual<T, kSize>(opts, elem_info, name, array1, array2,
+                                      mismatch);
+    });
+    return info;
   }
 
   template <typename T>
@@ -377,30 +445,28 @@ class OptionTypeInfo {
                                OptionTypeFlags _flags,
                                const OptionTypeInfo& elem_info,
                                char separator = ':') {
-    return OptionTypeInfo(
-        _offset, OptionType::kVector, _verification, _flags,
-        [elem_info, separator](const ConfigOptions& opts,
-                               const std::string& name,
-                               const std::string& value, void* addr) {
-          auto result = static_cast<std::vector<T>*>(addr);
-          return ParseVector<T>(opts, elem_info, separator, name, value,
-                                result);
-        },
-        [elem_info, separator](const ConfigOptions& opts,
-                               const std::string& name, const void* addr,
-                               std::string* value) {
-          const auto& vec = *(static_cast<const std::vector<T>*>(addr));
-          return SerializeVector<T>(opts, elem_info, separator, name, vec,
-                                    value);
-        },
-        [elem_info](const ConfigOptions& opts, const std::string& name,
-                    const void* addr1, const void* addr2,
-                    std::string* mismatch) {
-          const auto& vec1 = *(static_cast<const std::vector<T>*>(addr1));
-          const auto& vec2 = *(static_cast<const std::vector<T>*>(addr2));
-          return VectorsAreEqual<T>(opts, elem_info, name, vec1, vec2,
-                                    mismatch);
-        });
+    OptionTypeInfo info(_offset, OptionType::kVector, _verification, _flags);
+    info.SetParseFunc([elem_info, separator](
+                          const ConfigOptions& opts, const std::string& name,
+                          const std::string& value, void* addr) {
+      auto result = static_cast<std::vector<T>*>(addr);
+      return ParseVector<T>(opts, elem_info, separator, name, value, result);
+    });
+    info.SetSerializeFunc([elem_info, separator](const ConfigOptions& opts,
+                                                 const std::string& name,
+                                                 const void* addr,
+                                                 std::string* value) {
+      const auto& vec = *(static_cast<const std::vector<T>*>(addr));
+      return SerializeVector<T>(opts, elem_info, separator, name, vec, value);
+    });
+    info.SetEqualsFunc([elem_info](const ConfigOptions& opts,
+                                   const std::string& name, const void* addr1,
+                                   const void* addr2, std::string* mismatch) {
+      const auto& vec1 = *(static_cast<const std::vector<T>*>(addr1));
+      const auto& vec2 = *(static_cast<const std::vector<T>*>(addr2));
+      return VectorsAreEqual<T>(opts, elem_info, name, vec1, vec2, mismatch);
+    });
+    return info;
   }
 
   // Create a new std::shared_ptr<Customizable> OptionTypeInfo
@@ -416,7 +482,19 @@ class OptionTypeInfo {
   static OptionTypeInfo AsCustomSharedPtr(int offset,
                                           OptionVerificationType ovt,
                                           OptionTypeFlags flags) {
-    return AsCustomSharedPtr<T>(offset, ovt, flags, nullptr, nullptr);
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kShared);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto* shared = static_cast<std::shared_ptr<T>*>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        shared->reset();
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, shared);
+      }
+    });
   }
 
   template <typename T>
@@ -425,20 +503,10 @@ class OptionTypeInfo {
                                           OptionTypeFlags flags,
                                           const SerializeFunc& serialize_func,
                                           const EqualsFunc& equals_func) {
-    return OptionTypeInfo(
-        offset, OptionType::kCustomizable, ovt,
-        flags | OptionTypeFlags::kShared,
-        [](const ConfigOptions& opts, const std::string& name,
-           const std::string& value, void* addr) {
-          auto* shared = static_cast<std::shared_ptr<T>*>(addr);
-          if (name == kIdPropName() && value.empty()) {
-            shared->reset();
-            return Status::OK();
-          } else {
-            return T::CreateFromString(opts, value, shared);
-          }
-        },
-        serialize_func, equals_func);
+    OptionTypeInfo info(AsCustomSharedPtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
   }
 
   // Create a new std::unique_ptr<Customizable> OptionTypeInfo
@@ -454,7 +522,19 @@ class OptionTypeInfo {
   static OptionTypeInfo AsCustomUniquePtr(int offset,
                                           OptionVerificationType ovt,
                                           OptionTypeFlags flags) {
-    return AsCustomUniquePtr<T>(offset, ovt, flags, nullptr, nullptr);
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kUnique);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto* unique = static_cast<std::unique_ptr<T>*>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        unique->reset();
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, unique);
+      }
+    });
   }
 
   template <typename T>
@@ -463,20 +543,10 @@ class OptionTypeInfo {
                                           OptionTypeFlags flags,
                                           const SerializeFunc& serialize_func,
                                           const EqualsFunc& equals_func) {
-    return OptionTypeInfo(
-        offset, OptionType::kCustomizable, ovt,
-        flags | OptionTypeFlags::kUnique,
-        [](const ConfigOptions& opts, const std::string& name,
-           const std::string& value, void* addr) {
-          auto* unique = static_cast<std::unique_ptr<T>*>(addr);
-          if (name == kIdPropName() && value.empty()) {
-            unique->reset();
-            return Status::OK();
-          } else {
-            return T::CreateFromString(opts, value, unique);
-          }
-        },
-        serialize_func, equals_func);
+    OptionTypeInfo info(AsCustomUniquePtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
   }
 
   // Create a new Customizable* OptionTypeInfo
@@ -491,7 +561,19 @@ class OptionTypeInfo {
   template <typename T>
   static OptionTypeInfo AsCustomRawPtr(int offset, OptionVerificationType ovt,
                                        OptionTypeFlags flags) {
-    return AsCustomRawPtr<T>(offset, ovt, flags, nullptr, nullptr);
+    OptionTypeInfo info(offset, OptionType::kCustomizable, ovt,
+                        flags | OptionTypeFlags::kRawPointer);
+    return info.SetParseFunc([](const ConfigOptions& opts,
+                                const std::string& name,
+                                const std::string& value, void* addr) {
+      auto** pointer = static_cast<T**>(addr);
+      if (name == kIdPropName() && value.empty()) {
+        *pointer = nullptr;
+        return Status::OK();
+      } else {
+        return T::CreateFromString(opts, value, pointer);
+      }
+    });
   }
 
   template <typename T>
@@ -499,20 +581,34 @@ class OptionTypeInfo {
                                        OptionTypeFlags flags,
                                        const SerializeFunc& serialize_func,
                                        const EqualsFunc& equals_func) {
-    return OptionTypeInfo(
-        offset, OptionType::kCustomizable, ovt,
-        flags | OptionTypeFlags::kRawPointer,
-        [](const ConfigOptions& opts, const std::string& name,
-           const std::string& value, void* addr) {
-          auto** pointer = static_cast<T**>(addr);
-          if (name == kIdPropName() && value.empty()) {
-            *pointer = nullptr;
-            return Status::OK();
-          } else {
-            return T::CreateFromString(opts, value, pointer);
-          }
-        },
-        serialize_func, equals_func);
+    OptionTypeInfo info(AsCustomRawPtr<T>(offset, ovt, flags));
+    info.SetSerializeFunc(serialize_func);
+    info.SetEqualsFunc(equals_func);
+    return info;
+  }
+
+  OptionTypeInfo& SetParseFunc(const ParseFunc& f) {
+    parse_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetSerializeFunc(const SerializeFunc& f) {
+    serialize_func_ = f;
+    return *this;
+  }
+  OptionTypeInfo& SetEqualsFunc(const EqualsFunc& f) {
+    equals_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetPrepareFunc(const PrepareFunc& f) {
+    prepare_func_ = f;
+    return *this;
+  }
+
+  OptionTypeInfo& SetValidateFunc(const ValidateFunc& f) {
+    validate_func_ = f;
+    return *this;
   }
 
   bool IsEnabled(OptionTypeFlags otf) const { return (flags_ & otf) == otf; }
@@ -569,6 +665,24 @@ class OptionTypeInfo {
     }
   }
 
+  bool ShouldPrepare() const {
+    if (IsDeprecated() || IsAlias()) {
+      return false;
+    } else if (IsEnabled(OptionTypeFlags::kDontPrepare)) {
+      return false;
+    } else {
+      return (prepare_func_ != nullptr || IsConfigurable());
+    }
+  }
+
+  bool ShouldValidate() const {
+    if (IsDeprecated() || IsAlias()) {
+      return false;
+    } else {
+      return (validate_func_ != nullptr || IsConfigurable());
+    }
+  }
+
   // Returns true if the option is allowed to be null.
   // Options can be null if the verification type is allow from null
   // or if the flags specify allow null.
@@ -599,6 +713,26 @@ class OptionTypeInfo {
 
   bool IsCustomizable() const { return (type_ == OptionType::kCustomizable); }
 
+  inline const void* GetOffset(const void* base) const {
+    return static_cast<const char*>(base) + offset_;
+  }
+
+  inline void* GetOffset(void* base) const {
+    return static_cast<char*>(base) + offset_;
+  }
+
+  template <typename T>
+  const T* GetOffsetAs(const void* base) const {
+    const void* addr = GetOffset(base);
+    return static_cast<const T*>(addr);
+  }
+
+  template <typename T>
+  T* GetOffsetAs(void* base) const {
+    void* addr = GetOffset(base);
+    return static_cast<T*>(addr);
+  }
+
   // Returns the underlying pointer for the type at base_addr
   // The value returned is the underlying "raw" pointer, offset from base.
   template <typename T>
@@ -606,20 +740,17 @@ class OptionTypeInfo {
     if (base_addr == nullptr) {
       return nullptr;
     }
-    const void* opt_addr = static_cast<const char*>(base_addr) + offset_;
     if (IsUniquePtr()) {
-      const std::unique_ptr<T>* ptr =
-          static_cast<const std::unique_ptr<T>*>(opt_addr);
+      const auto ptr = GetOffsetAs<std::unique_ptr<T>>(base_addr);
       return ptr->get();
     } else if (IsSharedPtr()) {
-      const std::shared_ptr<T>* ptr =
-          static_cast<const std::shared_ptr<T>*>(opt_addr);
+      const auto ptr = GetOffsetAs<std::shared_ptr<T>>(base_addr);
       return ptr->get();
     } else if (IsRawPtr()) {
-      const T* const* ptr = static_cast<const T* const*>(opt_addr);
+      const T* const* ptr = GetOffsetAs<T* const>(base_addr);
       return *ptr;
     } else {
-      return static_cast<const T*>(opt_addr);
+      return GetOffsetAs<T>(base_addr);
     }
   }
 
@@ -630,18 +761,17 @@ class OptionTypeInfo {
     if (base_addr == nullptr) {
       return nullptr;
     }
-    void* opt_addr = static_cast<char*>(base_addr) + offset_;
     if (IsUniquePtr()) {
-      std::unique_ptr<T>* ptr = static_cast<std::unique_ptr<T>*>(opt_addr);
+      auto ptr = GetOffsetAs<std::unique_ptr<T>>(base_addr);
       return ptr->get();
     } else if (IsSharedPtr()) {
-      std::shared_ptr<T>* ptr = static_cast<std::shared_ptr<T>*>(opt_addr);
+      auto ptr = GetOffsetAs<std::shared_ptr<T>>(base_addr);
       return ptr->get();
     } else if (IsRawPtr()) {
-      T** ptr = static_cast<T**>(opt_addr);
+      auto ptr = GetOffsetAs<T*>(base_addr);
       return *ptr;
     } else {
-      return static_cast<T*>(opt_addr);
+      return GetOffsetAs<T>(base_addr);
     }
   }
 
@@ -675,6 +805,11 @@ class OptionTypeInfo {
                       const std::string& opt_name, const void* const this_ptr,
                       const std::string& that_value) const;
 
+  Status Prepare(const ConfigOptions& config_options, const std::string& name,
+                 void* opt_ptr) const;
+  Status Validate(const DBOptions& db_opts, const ColumnFamilyOptions& cf_opts,
+                  const std::string& name, const void* opt_ptr) const;
+
   // Parses the input opts_map according to the type_map for the opt_addr
   // For each name-value pair in opts_map, find the corresponding name in
   // type_map If the name is found:
@@ -802,11 +937,151 @@ class OptionTypeInfo {
   // The optional function to match two option values
   EqualsFunc equals_func_;
 
+  PrepareFunc prepare_func_;
+  ValidateFunc validate_func_;
   OptionType type_;
   OptionVerificationType verification_;
   OptionTypeFlags flags_;
 };
 
+// Parses the input value into elements of the result array, which has fixed
+// array size. For example, if the value=1:2:3 and elem_info parses integers,
+// the result array will be {1,2,3}. Array size is defined in the OptionTypeInfo
+// the input value has to match with that.
+// @param config_options Controls how the option value is parsed.
+// @param elem_info Controls how individual tokens in value are parsed
+// @param separator Character separating tokens in values (':' in the above
+// example)
+// @param name      The name associated with this array option
+// @param value     The input string to parse into tokens
+// @param result    Returns the results of parsing value into its elements.
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or element number
+//                          doesn't match array size defined in OptionTypeInfo
+//                          or if the token could not be parsed
+// @return NotFound         If the tokenized value contains unknown options for
+// its type
+template <typename T, size_t kSize>
+Status ParseArray(const ConfigOptions& config_options,
+                  const OptionTypeInfo& elem_info, char separator,
+                  const std::string& name, const std::string& value,
+                  std::array<T, kSize>* result) {
+  Status status;
+
+  ConfigOptions copy = config_options;
+  copy.ignore_unsupported_options = false;
+  size_t i = 0, start = 0, end = 0;
+  for (; status.ok() && i < kSize && start < value.size() &&
+         end != std::string::npos;
+       i++, start = end + 1) {
+    std::string token;
+    status = OptionTypeInfo::NextToken(value, separator, start, &end, &token);
+    if (status.ok()) {
+      status = elem_info.Parse(copy, name, token, &((*result)[i]));
+      if (config_options.ignore_unsupported_options &&
+          status.IsNotSupported()) {
+        // If we were ignoring unsupported options and this one should be
+        // ignored, ignore it by setting the status to OK
+        status = Status::OK();
+      }
+    }
+  }
+  if (!status.ok()) {
+    return status;
+  }
+  // make sure the element number matches the array size
+  if (i < kSize) {
+    return Status::InvalidArgument(
+        "Serialized value has less elements than array size", name);
+  }
+  if (start < value.size() && end != std::string::npos) {
+    return Status::InvalidArgument(
+        "Serialized value has more elements than array size", name);
+  }
+  return status;
+}
+
+// Serializes the fixed size input array into its output value.  Elements are
+// separated by the separator character.  This element will convert all of the
+// elements in array into their serialized form, using elem_info to perform the
+// serialization.
+// For example, if the array contains the integers 1,2,3 and elem_info
+// serializes the output would be 1:2:3 for separator ":".
+// @param config_options Controls how the option value is serialized.
+// @param elem_info Controls how individual tokens in value are serialized
+// @param separator Character separating tokens in value (':' in the above
+// example)
+// @param name      The name associated with this array option
+// @param array     The input array to serialize
+// @param value     The output string of serialized options
+// @return OK if the value was successfully parse
+// @return InvalidArgument if the value is improperly formed or if the token
+//                          could not be parsed
+// @return NotFound        If the tokenized value contains unknown options for
+//                          its type
+template <typename T, size_t kSize>
+Status SerializeArray(const ConfigOptions& config_options,
+                      const OptionTypeInfo& elem_info, char separator,
+                      const std::string& name,
+                      const std::array<T, kSize>& array, std::string* value) {
+  std::string result;
+  ConfigOptions embedded = config_options;
+  embedded.delimiter = ";";
+  int printed = 0;
+  for (const auto& elem : array) {
+    std::string elem_str;
+    Status s = elem_info.Serialize(embedded, name, &elem, &elem_str);
+    if (!s.ok()) {
+      return s;
+    } else if (!elem_str.empty()) {
+      if (printed++ > 0) {
+        result += separator;
+      }
+      // If the element contains embedded separators, put it inside of brackets
+      if (elem_str.find(separator) != std::string::npos) {
+        result += "{" + elem_str + "}";
+      } else {
+        result += elem_str;
+      }
+    }
+  }
+  if (result.find("=") != std::string::npos) {
+    *value = "{" + result + "}";
+  } else if (printed > 1 && result.at(0) == '{') {
+    *value = "{" + result + "}";
+  } else {
+    *value = result;
+  }
+  return Status::OK();
+}
+
+// Compares the input arrays array1 and array2 for equality
+// Elements of the array are compared one by one using elem_info to perform the
+// comparison.
+//
+// @param config_options Controls how the arrays are compared.
+// @param elem_info  Controls how individual elements in the arrays are compared
+// @param name          The name associated with this array option
+// @param array1,array2 The arrays to compare.
+// @param mismatch      If the arrays are not equivalent, mismatch will point to
+//                       the first element of the comparison that did not match.
+// @return true         If vec1 and vec2 are "equal", false otherwise
+template <typename T, size_t kSize>
+bool ArraysAreEqual(const ConfigOptions& config_options,
+                    const OptionTypeInfo& elem_info, const std::string& name,
+                    const std::array<T, kSize>& array1,
+                    const std::array<T, kSize>& array2, std::string* mismatch) {
+  assert(array1.size() == kSize);
+  assert(array2.size() == kSize);
+  for (size_t i = 0; i < kSize; ++i) {
+    if (!elem_info.AreEqual(config_options, name, &array1[i], &array2[i],
+                            mismatch)) {
+      return false;
+    }
+  }
+  return true;
+}
+
 // Parses the input value into elements of the result vector.  This method
 // will break the input value into the individual tokens (based on the
 // separator), where each of those tokens will be parsed based on the rules of
diff -pruN 7.2.2-5/include/rocksdb/utilities/transaction_db.h 7.3.1-2/include/rocksdb/utilities/transaction_db.h
--- 7.2.2-5/include/rocksdb/utilities/transaction_db.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/utilities/transaction_db.h	2022-06-08 21:08:16.000000000 +0000
@@ -222,6 +222,20 @@ struct TransactionDBOptions {
   // pending writes into the database. A value of 0 or less means no limit.
   int64_t default_write_batch_flush_threshold = 0;
 
+  // This option is valid only for write-prepared/write-unprepared. Transaction
+  // will rely on this callback to determine if a key should be rolled back
+  // with Delete or SingleDelete when necessary. If the callback returns true,
+  // then SingleDelete should be used. If the callback is not callable or the
+  // callback returns false, then a Delete is used.
+  // The application should ensure thread-safety of this callback.
+  // The callback should not throw because RocksDB is not exception-safe.
+  // The callback may be removed if we allow mixing Delete and SingleDelete in
+  // the future.
+  std::function<bool(TransactionDB* /*db*/,
+                     ColumnFamilyHandle* /*column_family*/,
+                     const Slice& /*key*/)>
+      rollback_deletion_type_callback;
+
  private:
   // 128 entries
   // Should the default value change, please also update wp_snapshot_cache_bits
diff -pruN 7.2.2-5/include/rocksdb/version.h 7.3.1-2/include/rocksdb/version.h
--- 7.2.2-5/include/rocksdb/version.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/version.h	2022-06-08 21:08:16.000000000 +0000
@@ -12,8 +12,8 @@
 // NOTE: in 'main' development branch, this should be the *next*
 // minor or major version number planned for release.
 #define ROCKSDB_MAJOR 7
-#define ROCKSDB_MINOR 2
-#define ROCKSDB_PATCH 2
+#define ROCKSDB_MINOR 3
+#define ROCKSDB_PATCH 1
 
 // Do not use these. We made the mistake of declaring macros starting with
 // double underscore. Now we have to live with our choice. We'll deprecate these
diff -pruN 7.2.2-5/include/rocksdb/write_batch.h 7.3.1-2/include/rocksdb/write_batch.h
--- 7.2.2-5/include/rocksdb/write_batch.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/include/rocksdb/write_batch.h	2022-06-08 21:08:16.000000000 +0000
@@ -63,7 +63,9 @@ struct SavePoint {
 
 class WriteBatch : public WriteBatchBase {
  public:
-  explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0);
+  explicit WriteBatch(size_t reserved_bytes = 0, size_t max_bytes = 0)
+      : WriteBatch(reserved_bytes, max_bytes, 0, 0) {}
+
   // `protection_bytes_per_key` is the number of bytes used to store
   // protection information for each key entry. Currently supported values are
   // zero (disabled) and eight.
@@ -318,8 +320,17 @@ class WriteBatch : public WriteBatchBase
 
    protected:
     friend class WriteBatchInternal;
-    virtual bool WriteAfterCommit() const { return true; }
-    virtual bool WriteBeforePrepare() const { return false; }
+    enum class OptionState {
+      kUnknown,
+      kDisabled,
+      kEnabled,
+    };
+    virtual OptionState WriteAfterCommit() const {
+      return OptionState::kUnknown;
+    }
+    virtual OptionState WriteBeforePrepare() const {
+      return OptionState::kUnknown;
+    }
   };
   Status Iterate(Handler* handler) const;
 
@@ -374,7 +385,7 @@ class WriteBatch : public WriteBatchBase
   //
   // in: cf, the column family id.
   // ret: timestamp size of the given column family. Return
-  //      std::numeric_limits<size_t>::max() indicating "dont know or column
+  //      std::numeric_limits<size_t>::max() indicating "don't know or column
   //      family info not found", this will cause UpdateTimestamps() to fail.
   // size_t ts_sz_func(uint32_t cf);
   Status UpdateTimestamps(const Slice& ts,
@@ -402,6 +413,9 @@ class WriteBatch : public WriteBatchBase
   struct ProtectionInfo;
   size_t GetProtectionBytesPerKey() const;
 
+  // Clears prot_info_ if there are no entries.
+  void ClearProtectionInfoIfEmpty();
+
  private:
   friend class WriteBatchInternal;
   friend class LocalSavePoint;
diff -pruN 7.2.2-5/java/Makefile 7.3.1-2/java/Makefile
--- 7.2.2-5/java/Makefile	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/Makefile	2022-06-08 21:08:16.000000000 +0000
@@ -437,7 +437,8 @@ java_test: java resolve_test_deps
 	$(AM_V_at) $(JAVAC_CMD) $(JAVAC_ARGS) -cp $(MAIN_CLASSES):$(JAVA_TESTCLASSPATH) -h $(NATIVE_INCLUDE) -d $(TEST_CLASSES)\
 		$(TEST_SOURCES)
 
-test: java java_test run_test
+test: java java_test
+	$(MAKE) run_test
 
 run_test:
 	$(JAVA_CMD) $(JAVA_ARGS) -Djava.library.path=target -cp "$(MAIN_CLASSES):$(TEST_CLASSES):$(JAVA_TESTCLASSPATH):target/*" org.rocksdb.test.RocksJunitRunner $(ALL_JAVA_TESTS)
diff -pruN 7.2.2-5/java/rocksjni/compression_options.cc 7.3.1-2/java/rocksjni/compression_options.cc
--- 7.2.2-5/java/rocksjni/compression_options.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/rocksjni/compression_options.cc	2022-06-08 21:08:16.000000000 +0000
@@ -154,6 +154,30 @@ jlong Java_org_rocksdb_CompressionOption
   auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
   return static_cast<jlong>(opt->max_dict_buffer_bytes);
 }
+
+/*
+ * Class:     org_rocksdb_CompressionOptions
+ * Method:    setZstdMaxTrainBytes
+ * Signature: (JZ)V
+ */
+void Java_org_rocksdb_CompressionOptions_setUseZstdDictTrainer(
+    JNIEnv*, jobject, jlong jhandle, jboolean juse_zstd_dict_trainer) {
+  auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
+  opt->use_zstd_dict_trainer = juse_zstd_dict_trainer == JNI_TRUE;
+}
+
+/*
+ * Class:     org_rocksdb_CompressionOptions
+ * Method:    zstdMaxTrainBytes
+ * Signature: (J)Z
+ */
+jboolean Java_org_rocksdb_CompressionOptions_useZstdDictTrainer(JNIEnv*,
+                                                                jobject,
+                                                                jlong jhandle) {
+  auto* opt = reinterpret_cast<ROCKSDB_NAMESPACE::CompressionOptions*>(jhandle);
+  return static_cast<bool>(opt->use_zstd_dict_trainer);
+}
+
 /*
  * Class:     org_rocksdb_CompressionOptions
  * Method:    setEnabled
diff -pruN 7.2.2-5/java/rocksjni/portal.h 7.3.1-2/java/rocksjni/portal.h
--- 7.2.2-5/java/rocksjni/portal.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/rocksjni/portal.h	2022-06-08 21:08:16.000000000 +0000
@@ -5593,6 +5593,8 @@ class HistogramTypeJni {
         return 0x34;
       case ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED:
         return 0x35;
+      case ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE:
+        return 0x36;
       case ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX:
         // 0x1F for backwards compatibility on current minor version.
         return 0x1F;
@@ -5716,6 +5718,8 @@ class HistogramTypeJni {
         return ROCKSDB_NAMESPACE::Histograms::POLL_WAIT_MICROS;
       case 0x35:
         return ROCKSDB_NAMESPACE::Histograms::PREFETCHED_BYTES_DISCARDED;
+      case 0x36:
+        return ROCKSDB_NAMESPACE::Histograms::MULTIGET_IO_BATCH_SIZE;
       case 0x1F:
         // 0x1F for backwards compatibility on current minor version.
         return ROCKSDB_NAMESPACE::Histograms::HISTOGRAM_ENUM_MAX;
diff -pruN 7.2.2-5/java/rocksjni/write_batch_test.cc 7.3.1-2/java/rocksjni/write_batch_test.cc
--- 7.2.2-5/java/rocksjni/write_batch_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/rocksjni/write_batch_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -119,7 +119,7 @@ jbyteArray Java_org_rocksdb_WriteBatchTe
         break;
     }
     state.append("@");
-    state.append(ROCKSDB_NAMESPACE::ToString(ikey.sequence));
+    state.append(std::to_string(ikey.sequence));
   }
   if (!s.ok()) {
     state.append(s.ToString());
diff -pruN 7.2.2-5/java/samples/src/main/java/RocksDBSample.java 7.3.1-2/java/samples/src/main/java/RocksDBSample.java
--- 7.2.2-5/java/samples/src/main/java/RocksDBSample.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/samples/src/main/java/RocksDBSample.java	2022-06-08 21:08:16.000000000 +0000
@@ -46,7 +46,7 @@ public class RocksDBSample {
             .setWriteBufferSize(8 * SizeUnit.KB)
             .setMaxWriteBufferNumber(3)
             .setMaxBackgroundJobs(10)
-            .setCompressionType(CompressionType.SNAPPY_COMPRESSION)
+            .setCompressionType(CompressionType.ZLIB_COMPRESSION)
             .setCompactionStyle(CompactionStyle.UNIVERSAL);
       } catch (final IllegalArgumentException e) {
         assert (false);
@@ -56,7 +56,7 @@ public class RocksDBSample {
       assert (options.writeBufferSize() == 8 * SizeUnit.KB);
       assert (options.maxWriteBufferNumber() == 3);
       assert (options.maxBackgroundJobs() == 10);
-      assert (options.compressionType() == CompressionType.SNAPPY_COMPRESSION);
+      assert (options.compressionType() == CompressionType.ZLIB_COMPRESSION);
       assert (options.compactionStyle() == CompactionStyle.UNIVERSAL);
 
       assert (options.memTableFactoryName().equals("SkipListFactory"));
diff -pruN 7.2.2-5/java/src/main/java/org/rocksdb/AbstractMutableOptions.java 7.3.1-2/java/src/main/java/org/rocksdb/AbstractMutableOptions.java
--- 7.2.2-5/java/src/main/java/org/rocksdb/AbstractMutableOptions.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/src/main/java/org/rocksdb/AbstractMutableOptions.java	2022-06-08 21:08:16.000000000 +0000
@@ -81,8 +81,8 @@ public abstract class AbstractMutableOpt
     protected abstract T build(final String[] keys, final String[] values);
 
     public T build() {
-      final String keys[] = new String[options.size()];
-      final String values[] = new String[options.size()];
+      final String[] keys = new String[options.size()];
+      final String[] values = new String[options.size()];
 
       int i = 0;
       for (final Map.Entry<K, MutableOptionValue<?>> option : options.entrySet()) {
@@ -227,7 +227,7 @@ public abstract class AbstractMutableOpt
       } catch (NumberFormatException nfe) {
         final double doubleValue = Double.parseDouble(value);
         if (doubleValue != Math.round(doubleValue))
-          throw new IllegalArgumentException("Unable to parse or round " + value + " to int");
+          throw new IllegalArgumentException("Unable to parse or round " + value + " to long");
         return Math.round(doubleValue);
       }
     }
@@ -245,7 +245,7 @@ public abstract class AbstractMutableOpt
       } catch (NumberFormatException nfe) {
         final double doubleValue = Double.parseDouble(value);
         if (doubleValue != Math.round(doubleValue))
-          throw new IllegalArgumentException("Unable to parse or round " + value + " to long");
+          throw new IllegalArgumentException("Unable to parse or round " + value + " to int");
         return (int) Math.round(doubleValue);
       }
     }
diff -pruN 7.2.2-5/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java 7.3.1-2/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java
--- 7.2.2-5/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/src/main/java/org/rocksdb/HashLinkedListMemTableConfig.java	2022-06-08 21:08:16.000000000 +0000
@@ -125,7 +125,7 @@ public class HashLinkedListMemTableConfi
    * Returns information about logging the distribution of
    *  number of entries on flush.
    *
-   * @return if distrubtion of number of entries shall be logged.
+   * @return if distribution of number of entries shall be logged.
    */
   public boolean ifLogBucketDistWhenFlush() {
     return ifLogBucketDistWhenFlush_;
diff -pruN 7.2.2-5/java/src/main/java/org/rocksdb/MutableOptionValue.java 7.3.1-2/java/src/main/java/org/rocksdb/MutableOptionValue.java
--- 7.2.2-5/java/src/main/java/org/rocksdb/MutableOptionValue.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/src/main/java/org/rocksdb/MutableOptionValue.java	2022-06-08 21:08:16.000000000 +0000
@@ -17,7 +17,7 @@ public abstract class MutableOptionValue
       extends MutableOptionValue<T> {
     protected final T value;
 
-    private MutableOptionValueObject(final T value) {
+    protected MutableOptionValueObject(final T value) {
       this.value = value;
     }
 
@@ -153,10 +153,6 @@ public abstract class MutableOptionValue
 
     @Override
     double asDouble() {
-      if(value > Double.MAX_VALUE || value < Double.MIN_VALUE) {
-        throw new NumberFormatException(
-            "long value lies outside the bounds of int");
-      }
       return Long.valueOf(value).doubleValue();
     }
 
@@ -210,9 +206,6 @@ public abstract class MutableOptionValue
 
     @Override
     double asDouble() {
-      if(value > Double.MAX_VALUE || value < Double.MIN_VALUE) {
-        throw new NumberFormatException("int value lies outside the bounds of int");
-      }
       return Integer.valueOf(value).doubleValue();
     }
 
diff -pruN 7.2.2-5/java/src/main/java/org/rocksdb/RocksDB.java 7.3.1-2/java/src/main/java/org/rocksdb/RocksDB.java
--- 7.2.2-5/java/src/main/java/org/rocksdb/RocksDB.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/src/main/java/org/rocksdb/RocksDB.java	2022-06-08 21:08:16.000000000 +0000
@@ -14,6 +14,8 @@ import java.util.Map;
 import java.util.concurrent.atomic.AtomicReference;
 import org.rocksdb.util.Environment;
 
+import static java.nio.charset.StandardCharsets.UTF_8;
+
 /**
  * A RocksDB is a persistent ordered map from keys to values.  It is safe for
  * concurrent access from multiple threads without any external synchronization.
@@ -21,7 +23,7 @@ import org.rocksdb.util.Environment;
  * indicates sth wrong at the RocksDB library side and the call failed.
  */
 public class RocksDB extends RocksObject {
-  public static final byte[] DEFAULT_COLUMN_FAMILY = "default".getBytes();
+  public static final byte[] DEFAULT_COLUMN_FAMILY = "default".getBytes(UTF_8);
   public static final int NOT_FOUND = -1;
 
   private enum LibraryState {
diff -pruN 7.2.2-5/java/src/test/java/org/rocksdb/RocksDBTest.java 7.3.1-2/java/src/test/java/org/rocksdb/RocksDBTest.java
--- 7.2.2-5/java/src/test/java/org/rocksdb/RocksDBTest.java	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/java/src/test/java/org/rocksdb/RocksDBTest.java	2022-06-08 21:08:16.000000000 +0000
@@ -1428,7 +1428,7 @@ public class RocksDBTest {
         assertThat(livefiles.manifestFileSize).isEqualTo(59);
         assertThat(livefiles.files.size()).isEqualTo(3);
         assertThat(livefiles.files.get(0)).isEqualTo("/CURRENT");
-        assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000004");
+        assertThat(livefiles.files.get(1)).isEqualTo("/MANIFEST-000005");
         assertThat(livefiles.files.get(2)).isEqualTo("/OPTIONS-000007");
       }
     }
diff -pruN 7.2.2-5/logging/env_logger.h 7.3.1-2/logging/env_logger.h
--- 7.2.2-5/logging/env_logger.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/logging/env_logger.h	2022-06-08 21:08:16.000000000 +0000
@@ -100,11 +100,11 @@ class EnvLogger : public Logger {
       char* p = base;
       char* limit = base + bufsize;
 
-      struct timeval now_tv;
-      gettimeofday(&now_tv, nullptr);
+      port::TimeVal now_tv;
+      port::GetTimeOfDay(&now_tv, nullptr);
       const time_t seconds = now_tv.tv_sec;
       struct tm t;
-      localtime_r(&seconds, &t);
+      port::LocalTimeR(&seconds, &t);
       p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llx ",
                     t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
                     t.tm_min, t.tm_sec, static_cast<int>(now_tv.tv_usec),
diff -pruN 7.2.2-5/logging/log_buffer.cc 7.3.1-2/logging/log_buffer.cc
--- 7.2.2-5/logging/log_buffer.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/logging/log_buffer.cc	2022-06-08 21:08:16.000000000 +0000
@@ -27,7 +27,7 @@ void LogBuffer::AddLogToBuffer(size_t ma
   char* limit = alloc_mem + max_log_size - 1;
 
   // store the time
-  gettimeofday(&(buffered_log->now_tv), nullptr);
+  port::GetTimeOfDay(&(buffered_log->now_tv), nullptr);
 
   // Print the message
   if (p < limit) {
@@ -60,7 +60,7 @@ void LogBuffer::FlushBufferToLog() {
   for (BufferedLog* log : logs_) {
     const time_t seconds = log->now_tv.tv_sec;
     struct tm t;
-    if (localtime_r(&seconds, &t) != nullptr) {
+    if (port::LocalTimeR(&seconds, &t) != nullptr) {
       Log(log_level_, info_log_,
           "(Original Log Time %04d/%02d/%02d-%02d:%02d:%02d.%06d) %s",
           t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour, t.tm_min,
diff -pruN 7.2.2-5/logging/log_buffer.h 7.3.1-2/logging/log_buffer.h
--- 7.2.2-5/logging/log_buffer.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/logging/log_buffer.h	2022-06-08 21:08:16.000000000 +0000
@@ -35,7 +35,7 @@ class LogBuffer {
  private:
   // One log entry with its timestamp
   struct BufferedLog {
-    struct timeval now_tv;  // Timestamp of the log
+    port::TimeVal now_tv;   // Timestamp of the log
     char message[1];        // Beginning of log message
   };
 
diff -pruN 7.2.2-5/logging/posix_logger.h 7.3.1-2/logging/posix_logger.h
--- 7.2.2-5/logging/posix_logger.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/logging/posix_logger.h	2022-06-08 21:08:16.000000000 +0000
@@ -103,11 +103,11 @@ class PosixLogger : public Logger {
       char* p = base;
       char* limit = base + bufsize;
 
-      struct timeval now_tv;
-      gettimeofday(&now_tv, nullptr);
+      port::TimeVal now_tv;
+      port::GetTimeOfDay(&now_tv, nullptr);
       const time_t seconds = now_tv.tv_sec;
       struct tm t;
-      localtime_r(&seconds, &t);
+      port::LocalTimeR(&seconds, &t);
       p += snprintf(p, limit - p, "%04d/%02d/%02d-%02d:%02d:%02d.%06d %llu ",
                     t.tm_year + 1900, t.tm_mon + 1, t.tm_mday, t.tm_hour,
                     t.tm_min, t.tm_sec, static_cast<int>(now_tv.tv_usec),
diff -pruN 7.2.2-5/Makefile 7.3.1-2/Makefile
--- 7.2.2-5/Makefile	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/Makefile	2022-06-08 21:08:16.000000000 +0000
@@ -8,7 +8,7 @@
 
 BASH_EXISTS := $(shell which bash)
 SHELL := $(shell which bash)
-include python.mk
+include common.mk
 
 CLEAN_FILES = # deliberately empty, so we can append below.
 CFLAGS += ${EXTRA_CFLAGS}
@@ -136,6 +136,13 @@ CXXFLAGS += $(PLATFORM_SHARED_CFLAGS) -D
 CFLAGS +=  $(PLATFORM_SHARED_CFLAGS) -DROCKSDB_DLL
 endif
 
+ifeq ($(USE_COROUTINES), 1)
+	USE_FOLLY = 1
+	OPT += -DUSE_COROUTINES
+	ROCKSDB_CXX_STANDARD = c++2a
+	USE_RTTI = 1
+endif
+
 # if we're compiling for release, compile without debug code (-DNDEBUG)
 ifeq ($(DEBUG_LEVEL),0)
 OPT += -DNDEBUG
@@ -226,6 +233,7 @@ dummy := $(shell (export ROCKSDB_ROOT="$
                   export ROCKSDB_NO_FBCODE="$(ROCKSDB_NO_FBCODE)"; \
                   export USE_CLANG="$(USE_CLANG)"; \
                   export LIB_MODE="$(LIB_MODE)"; \
+		  export ROCKSDB_CXX_STANDARD="$(ROCKSDB_CXX_STANDARD)"; \
                   "$(CURDIR)/build_tools/build_detect_platform" "$(CURDIR)/make_config.mk"))
 # this file is generated by the previous line to set build flags and sources
 include make_config.mk
@@ -340,6 +348,8 @@ endif
 # ASAN doesn't work well with jemalloc. If we're compiling with ASAN, we should use regular malloc.
 ifdef COMPILE_WITH_ASAN
 	DISABLE_JEMALLOC=1
+	ASAN_OPTIONS?=detect_stack_use_after_return=1
+	export ASAN_OPTIONS
 	EXEC_LDFLAGS += -fsanitize=address
 	PLATFORM_CCFLAGS += -fsanitize=address
 	PLATFORM_CXXFLAGS += -fsanitize=address
@@ -596,9 +606,6 @@ am__v_CCH_1 =
 check-headers: $(HEADER_OK_FILES)
 
 # options_settable_test doesn't pass with UBSAN as we use hack in the test
-ifdef COMPILE_WITH_UBSAN
-TESTS := $(shell echo $(TESTS) | sed 's/\boptions_settable_test\b//g')
-endif
 ifdef ASSERT_STATUS_CHECKED
 # TODO: finish fixing all tests to pass this check
 TESTS_FAILING_ASC = \
@@ -633,7 +640,6 @@ TESTS_PLATFORM_DEPENDENT := \
 	db_basic_test \
 	db_blob_basic_test \
 	db_encryption_test \
-	db_test2 \
 	external_sst_file_basic_test \
 	auto_roll_logger_test \
 	bloom_test \
@@ -655,7 +661,6 @@ TESTS_PLATFORM_DEPENDENT := \
 	rate_limiter_test \
 	perf_context_test \
 	iostats_context_test \
-	db_wal_test \
 
 # Sort ROCKSDBTESTS_SUBSET for filtering, except db_test is special (expensive)
 # so is placed first (out-of-order)
@@ -796,17 +801,10 @@ $(SHARED4): $(LIB_OBJECTS)
 	$(AM_V_CCLD) $(CXX) $(PLATFORM_SHARED_LDFLAGS)$(SHARED3) $(LIB_OBJECTS) $(LDFLAGS) -o $@
 endif  # PLATFORM_SHARED_EXT
 
-.PHONY: blackbox_crash_test check clean coverage crash_test ldb_tests package \
-	release tags tags0 valgrind_check whitebox_crash_test format static_lib shared_lib all \
-	dbg rocksdbjavastatic rocksdbjava gen-pc install install-static install-shared uninstall \
-	analyze tools tools_lib check-headers checkout_folly \
-	blackbox_crash_test_with_atomic_flush whitebox_crash_test_with_atomic_flush  \
-	blackbox_crash_test_with_txn whitebox_crash_test_with_txn \
-	blackbox_crash_test_with_best_efforts_recovery \
-	blackbox_crash_test_with_ts whitebox_crash_test_with_ts \
-	blackbox_crash_test_with_multiops_wc_txn \
-	blackbox_crash_test_with_multiops_wp_txn
-
+.PHONY: check clean coverage ldb_tests package dbg gen-pc build_size \
+	release tags tags0 valgrind_check format static_lib shared_lib all \
+	rocksdbjavastatic rocksdbjava install install-static install-shared \
+	uninstall analyze tools tools_lib check-headers checkout_folly
 
 all: $(LIBRARY) $(BENCHMARKS) tools tools_lib test_libs $(TESTS)
 
@@ -843,18 +841,6 @@ coverage: clean
 	# Delete intermediate files
 	$(FIND) . -type f \( -name "*.gcda" -o -name "*.gcno" \) -exec rm -f {} \;
 
-ifneq (,$(filter check parallel_check,$(MAKECMDGOALS)),)
-# Use /dev/shm if it has the sticky bit set (otherwise, /tmp),
-# and create a randomly-named rocksdb.XXXX directory therein.
-# We'll use that directory in the "make check" rules.
-ifeq ($(TMPD),)
-TMPDIR := $(shell echo $${TMPDIR:-/tmp})
-TMPD := $(shell f=/dev/shm; test -k $$f || f=$(TMPDIR);     \
-  perl -le 'use File::Temp "tempdir";'					\
-    -e 'print tempdir("'$$f'/rocksdb.XXXX", CLEANUP => 0)')
-endif
-endif
-
 # Run all tests in parallel, accumulating per-test logs in t/log-*.
 #
 # Each t/run-* file is a tiny generated bourne shell script that invokes one of
@@ -894,7 +880,7 @@ $(parallel_tests):
 		TEST_SCRIPT=t/run-$$TEST_BINARY-$${TEST_NAME//\//-}; \
     printf '%s\n' \
       '#!/bin/sh' \
-      "d=\$(TMPD)$$TEST_SCRIPT" \
+      "d=\$(TEST_TMPDIR)$$TEST_SCRIPT" \
       'mkdir -p $$d' \
       "TEST_TMPDIR=\$$d $(DRIVER) ./$$TEST_BINARY --gtest_filter=$$TEST_NAME" \
 		> $$TEST_SCRIPT; \
@@ -954,7 +940,6 @@ endif
 
 .PHONY: check_0
 check_0:
-	$(AM_V_GEN)export TEST_TMPDIR=$(TMPD); \
 	printf '%s\n' ''						\
 	  'To monitor subtest <duration,pass/fail,name>,'		\
 	  '  run "make watch-log" in a separate window' '';		\
@@ -965,7 +950,8 @@ check_0:
 	  | $(prioritize_long_running_tests)				\
 	  | grep -E '$(tests-regexp)'					\
 	  | grep -E -v '$(EXCLUDE_TESTS_REGEX)'					\
-	  | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu '{} $(parallel_redir)' ; \
+	  | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu \
+	    --tmpdir=$(TEST_TMPDIR) '{} $(parallel_redir)' ; \
 	parallel_retcode=$$? ; \
 	awk '{ if ($$7 != 0 || $$8 != 0) { if ($$7 == "Exitval") { h = $$0; } else { if (!f) print h; print; f = 1 } } } END { if(f) exit 1; }' < LOG ; \
 	awk_retcode=$$?; \
@@ -976,7 +962,6 @@ valgrind-exclude-regexp = InlineSkipTest
 .PHONY: valgrind_check_0
 valgrind_check_0: test_log_prefix := valgrind_
 valgrind_check_0:
-	$(AM_V_GEN)export TEST_TMPDIR=$(TMPD);				\
 	printf '%s\n' ''						\
 	  'To monitor subtest <duration,pass/fail,name>,'		\
 	  '  run "make watch-log" in a separate window' '';		\
@@ -988,10 +973,11 @@ valgrind_check_0:
 	  | grep -E '$(tests-regexp)'					\
 	  | grep -E -v '$(valgrind-exclude-regexp)'					\
 	  | build_tools/gnu_parallel -j$(J) --plain --joblog=LOG --eta --gnu \
-	  '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) \
+	   --tmpdir=$(TEST_TMPDIR) \
+	   '(if [[ "{}" == "./"* ]] ; then $(DRIVER) {}; else {}; fi) \
 	  $(parallel_redir)' \
 
-CLEAN_FILES += t LOG $(TMPD)
+CLEAN_FILES += t LOG $(TEST_TMPDIR)
 
 # When running parallel "make check", you can monitor its progress
 # from another window.
@@ -1014,12 +1000,12 @@ check: all
 	    && (build_tools/gnu_parallel --gnu --help 2>/dev/null) |                    \
 	        grep -q 'GNU Parallel';                                 \
 	then                                                            \
-	    $(MAKE) T="$$t" TMPD=$(TMPD) check_0;                       \
+	    $(MAKE) T="$$t" check_0;                       \
 	else                                                            \
 	    for t in $(TESTS); do                                       \
 	      echo "===== Running $$t (`date`)"; ./$$t || exit 1; done;          \
 	fi
-	rm -rf $(TMPD)
+	rm -rf $(TEST_TMPDIR)
 ifneq ($(PLATFORM), OS_AIX)
 	$(PYTHON) tools/check_all_python.py
 ifeq ($(filter -DROCKSDB_LITE,$(OPT)),)
@@ -1046,31 +1032,31 @@ ldb_tests: ldb
 include crash_test.mk
 
 asan_check: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) check -j32
+	COMPILE_WITH_ASAN=1 $(MAKE) check -j32
 	$(MAKE) clean
 
 asan_crash_test: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test
+	COMPILE_WITH_ASAN=1 $(MAKE) crash_test
 	$(MAKE) clean
 
 whitebox_asan_crash_test: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) whitebox_crash_test
+	COMPILE_WITH_ASAN=1 $(MAKE) whitebox_crash_test
 	$(MAKE) clean
 
 blackbox_asan_crash_test: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) blackbox_crash_test
+	COMPILE_WITH_ASAN=1 $(MAKE) blackbox_crash_test
 	$(MAKE) clean
 
 asan_crash_test_with_atomic_flush: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush
+	COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_atomic_flush
 	$(MAKE) clean
 
 asan_crash_test_with_txn: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn
+	COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_txn
 	$(MAKE) clean
 
 asan_crash_test_with_best_efforts_recovery: clean
-	ASAN_OPTIONS=detect_stack_use_after_return=1 COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_best_efforts_recovery
+	COMPILE_WITH_ASAN=1 $(MAKE) crash_test_with_best_efforts_recovery
 	$(MAKE) clean
 
 ubsan_check: clean
@@ -1116,11 +1102,11 @@ valgrind_test_some:
 valgrind_check: $(TESTS)
 	$(MAKE) DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" gen_parallel_tests
 	$(AM_V_GEN)if test "$(J)" != 1                                  \
-	    && (build_tools/gnu_parallel --gnu --help 2>/dev/null) |                    \
+	    && (build_tools/gnu_parallel --gnu --help 2>/dev/null) |    \
 	        grep -q 'GNU Parallel';                                 \
 	then                                                            \
-      $(MAKE) TMPD=$(TMPD)                                        \
-      DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" valgrind_check_0; \
+	  $(MAKE)                                                       \
+	  DRIVER="$(VALGRIND_VER) $(VALGRIND_OPTS)" valgrind_check_0;   \
 	else                                                            \
 		for t in $(filter-out %skiplist_test options_settable_test,$(TESTS)); do \
 			$(VALGRIND_VER) $(VALGRIND_OPTS) ./$$t; \
@@ -1140,27 +1126,6 @@ valgrind_check_some: $(ROCKSDBTESTS_SUBS
 		fi; \
 	done
 
-ifneq ($(PAR_TEST),)
-parloop:
-	ret_bad=0;							\
-	for t in $(PAR_TEST); do		\
-		echo "===== Running $$t in parallel $(NUM_PAR) (`date`)";\
-		if [ $(db_test) -eq 1 ]; then \
-			seq $(J) | v="$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{};  export TEST_TMPDIR=$$s;' \
-				'timeout 2m ./db_test --gtest_filter=$$v >> $$s/log-{} 2>1'; \
-		else\
-			seq $(J) | v="./$$t" build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{};' \
-			     'export TEST_TMPDIR=$$s; timeout 10m $$v >> $$s/log-{} 2>1'; \
-		fi; \
-		ret_code=$$?; \
-		if [ $$ret_code -ne 0 ]; then \
-			ret_bad=$$ret_code; \
-			echo $$t exited with $$ret_code; \
-		fi; \
-	done; \
-	exit $$ret_bad;
-endif
-
 test_names = \
   ./db_test --gtest_list_tests						\
     | perl -n								\
@@ -1168,24 +1133,6 @@ test_names = \
       -e '/^(\s*)(\S+)/; !$$1 and do {$$p=$$2; break};'			\
       -e 'print qq! $$p$$2!'
 
-parallel_check: $(TESTS)
-	$(AM_V_GEN)if test "$(J)" > 1                                  \
-	    && (build_tools/gnu_parallel --gnu --help 2>/dev/null) |                    \
-	        grep -q 'GNU Parallel';                                 \
-	then                                                            \
-	    echo Running in parallel $(J);			\
-	else                                                            \
-	    echo "Need to have GNU Parallel and J > 1"; exit 1;		\
-	fi;								\
-	ret_bad=0;							\
-	echo $(J);\
-	echo Test Dir: $(TMPD); \
-        seq $(J) | build_tools/gnu_parallel --gnu --plain 's=$(TMPD)/rdb-{}; rm -rf $$s; mkdir $$s'; \
-	$(MAKE)  PAR_TEST="$(shell $(test_names))" TMPD=$(TMPD) \
-		J=$(J) db_test=1 parloop; \
-	$(MAKE) PAR_TEST="$(filter-out db_test, $(TESTS))" \
-		TMPD=$(TMPD) J=$(J) db_test=0 parloop;
-
 analyze: clean
 	USE_CLANG=1 $(MAKE) analyze_incremental
 
@@ -1431,6 +1378,9 @@ db_blob_basic_test: $(OBJ_DIR)/db/blob/d
 db_blob_compaction_test: $(OBJ_DIR)/db/blob/db_blob_compaction_test.o $(TEST_LIBRARY) $(LIBRARY)
 	$(AM_LINK)
 
+db_readonly_with_timestamp_test: $(OBJ_DIR)/db/db_readonly_with_timestamp_test.o $(TEST_LIBRARY) $(LIBRARY)
+	$(AM_LINK)
+
 db_with_timestamp_basic_test: $(OBJ_DIR)/db/db_with_timestamp_basic_test.o $(TEST_LIBRARY) $(LIBRARY)
 	$(AM_LINK)
 
@@ -1926,7 +1876,7 @@ testutil_test: $(OBJ_DIR)/test_util/test
 io_tracer_test: $(OBJ_DIR)/trace_replay/io_tracer_test.o $(OBJ_DIR)/trace_replay/io_tracer.o $(TEST_LIBRARY) $(LIBRARY)
 	$(AM_LINK)
 
-prefetch_test: $(OBJ_DIR)/file/prefetch_test.o $(TEST_LIBRARY) $(LIBRARY)
+prefetch_test: $(OBJ_DIR)/file/prefetch_test.o  $(OBJ_DIR)/tools/io_tracer_parser_tool.o $(TEST_LIBRARY) $(LIBRARY)
 	$(AM_LINK)
 
 io_tracer_parser_test: $(OBJ_DIR)/tools/io_tracer_parser_test.o $(OBJ_DIR)/tools/io_tracer_parser_tool.o $(TEST_LIBRARY) $(LIBRARY)
@@ -2114,6 +2064,7 @@ ifeq ($(PLATFORM), OS_OPENBSD)
 	ROCKSDBJNILIB = librocksdbjni-openbsd$(ARCH).so
 	ROCKSDB_JAR = rocksdbjni-$(ROCKSDB_JAVA_VERSION)-openbsd$(ARCH).jar
 endif
+export SHA256_CMD
 
 zlib-$(ZLIB_VER).tar.gz:
 	curl --fail --output zlib-$(ZLIB_VER).tar.gz --location ${ZLIB_DOWNLOAD_BASE}/zlib-$(ZLIB_VER).tar.gz
@@ -2246,7 +2197,7 @@ JAR_CMD := jar
 endif
 endif
 rocksdbjavastatic_javalib:
-	cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) javalib
+	cd java; $(MAKE) javalib
 	rm -f java/target/$(ROCKSDBJNILIB)
 	$(CXX) $(CXXFLAGS) -I./java/. $(JAVA_INCLUDE) -shared -fPIC \
 	  -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) \
@@ -2360,7 +2311,7 @@ rocksdbjava: $(LIB_OBJECTS)
 ifeq ($(JAVA_HOME),)
 	$(error JAVA_HOME is not set)
 endif
-	$(AM_V_GEN)cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) javalib;
+	$(AM_V_GEN)cd java; $(MAKE) javalib;
 	$(AM_V_at)rm -f ./java/target/$(ROCKSDBJNILIB)
 	$(AM_V_at)$(CXX) $(CXXFLAGS) -I./java/. -I./java/rocksjni $(JAVA_INCLUDE) $(ROCKSDB_PLUGIN_JNI_CXX_INCLUDEFLAGS) -shared -fPIC -o ./java/target/$(ROCKSDBJNILIB) $(ALL_JNI_NATIVE_SOURCES) $(LIB_OBJECTS) $(JAVA_LDFLAGS) $(COVERAGEFLAGS)
 	$(AM_V_at)cd java; $(JAR_CMD) -cf target/$(ROCKSDB_JAR) HISTORY*.md
@@ -2372,14 +2323,13 @@ jclean:
 	cd java;$(MAKE) clean;
 
 jtest_compile: rocksdbjava
-	cd java; SHA256_CMD='$(SHA256_CMD)' $(MAKE) java_test
+	cd java;$(MAKE) java_test
 
 jtest_run:
 	cd java;$(MAKE) run_test
 
 jtest: rocksdbjava
-	cd java;$(MAKE) sample; SHA256_CMD='$(SHA256_CMD)' $(MAKE) test;
-	$(PYTHON) tools/check_all_python.py # TODO peterd: find a better place for this check in CI targets
+	cd java;$(MAKE) sample test
 
 jdb_bench:
 	cd java;$(MAKE) db_bench;
@@ -2406,6 +2356,38 @@ checkout_folly:
 	perl -pi -e 's/^(#include <boost)/\/\/$$1/' third-party/folly/folly/functional/Invoke.h
 
 # ---------------------------------------------------------------------------
+#   Build size testing
+# ---------------------------------------------------------------------------
+
+REPORT_BUILD_STATISTIC?=echo STATISTIC:
+
+build_size:
+	# === normal build, static ===
+	$(MAKE) clean
+	$(MAKE) static_lib
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.static_lib $$(stat --printf="%s" librocksdb.a)
+	strip librocksdb.a
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.static_lib_stripped $$(stat --printf="%s" librocksdb.a)
+	# === normal build, shared ===
+	$(MAKE) clean
+	$(MAKE) shared_lib
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.shared_lib $$(stat --printf="%s" `readlink -f librocksdb.so`)
+	strip `readlink -f librocksdb.so`
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.shared_lib_stripped $$(stat --printf="%s" `readlink -f librocksdb.so`)
+	# === lite build, static ===
+	$(MAKE) clean
+	$(MAKE) LITE=1 static_lib
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.static_lib_lite $$(stat --printf="%s" librocksdb.a)
+	strip librocksdb.a
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.static_lib_lite_stripped $$(stat --printf="%s" librocksdb.a)
+	# === lite build, shared ===
+	$(MAKE) clean
+	$(MAKE) LITE=1 shared_lib
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.shared_lib_lite $$(stat --printf="%s" `readlink -f librocksdb.so`)
+	strip `readlink -f librocksdb.so`
+	$(REPORT_BUILD_STATISTIC) rocksdb.build_size.shared_lib_lite_stripped $$(stat --printf="%s" `readlink -f librocksdb.so`)
+
+# ---------------------------------------------------------------------------
 #  	Platform-specific compilation
 # ---------------------------------------------------------------------------
 
diff -pruN 7.2.2-5/memory/arena.h 7.3.1-2/memory/arena.h
--- 7.2.2-5/memory/arena.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memory/arena.h	2022-06-08 21:08:16.000000000 +0000
@@ -78,7 +78,7 @@ class Arena : public Allocator {
   size_t BlockSize() const override { return kBlockSize; }
 
   bool IsInInlineBlock() const {
-    return blocks_.empty();
+    return blocks_.empty() && huge_blocks_.empty();
   }
 
  private:
diff -pruN 7.2.2-5/memory/concurrent_arena.cc 7.3.1-2/memory/concurrent_arena.cc
--- 7.2.2-5/memory/concurrent_arena.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memory/concurrent_arena.cc	2022-06-08 21:08:16.000000000 +0000
@@ -14,9 +14,7 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
-__thread size_t ConcurrentArena::tls_cpuid = 0;
-#endif
+thread_local size_t ConcurrentArena::tls_cpuid = 0;
 
 namespace {
 // If the shard block size is too large, in the worst case, every core
@@ -36,11 +34,9 @@ ConcurrentArena::ConcurrentArena(size_t
 
 ConcurrentArena::Shard* ConcurrentArena::Repick() {
   auto shard_and_index = shards_.AccessElementAndIndex();
-#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
   // even if we are cpu 0, use a non-zero tls_cpuid so we can tell we
   // have repicked
   tls_cpuid = shard_and_index.second | shards_.Size();
-#endif
   return shard_and_index.first;
 }
 
diff -pruN 7.2.2-5/memory/concurrent_arena.h 7.3.1-2/memory/concurrent_arena.h
--- 7.2.2-5/memory/concurrent_arena.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memory/concurrent_arena.h	2022-06-08 21:08:16.000000000 +0000
@@ -97,11 +97,7 @@ class ConcurrentArena : public Allocator
     Shard() : free_begin_(nullptr), allocated_and_unused_(0) {}
   };
 
-#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
-  static __thread size_t tls_cpuid;
-#else
-  enum ZeroFirstEnum : size_t { tls_cpuid = 0 };
-#endif
+  static thread_local size_t tls_cpuid;
 
   char padding0[56] ROCKSDB_FIELD_UNUSED;
 
diff -pruN 7.2.2-5/memory/jemalloc_nodump_allocator.cc 7.3.1-2/memory/jemalloc_nodump_allocator.cc
--- 7.2.2-5/memory/jemalloc_nodump_allocator.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memory/jemalloc_nodump_allocator.cc	2022-06-08 21:08:16.000000000 +0000
@@ -114,19 +114,18 @@ Status JemallocNodumpAllocator::Initiali
       mallctl("arenas.create", &arena_index_, &arena_index_size, nullptr, 0);
   if (ret != 0) {
     return Status::Incomplete("Failed to create jemalloc arena, error code: " +
-                              ROCKSDB_NAMESPACE::ToString(ret));
+                              std::to_string(ret));
   }
   assert(arena_index_ != 0);
 
   // Read existing hooks.
-  std::string key =
-      "arena." + ROCKSDB_NAMESPACE::ToString(arena_index_) + ".extent_hooks";
+  std::string key = "arena." + std::to_string(arena_index_) + ".extent_hooks";
   extent_hooks_t* hooks;
   size_t hooks_size = sizeof(hooks);
   ret = mallctl(key.c_str(), &hooks, &hooks_size, nullptr, 0);
   if (ret != 0) {
     return Status::Incomplete("Failed to read existing hooks, error code: " +
-                              ROCKSDB_NAMESPACE::ToString(ret));
+                              std::to_string(ret));
   }
 
   // Store existing alloc.
@@ -146,7 +145,7 @@ Status JemallocNodumpAllocator::Initiali
   ret = mallctl(key.c_str(), nullptr, nullptr, &hooks_ptr, sizeof(hooks_ptr));
   if (ret != 0) {
     return Status::Incomplete("Failed to set custom hook, error code: " +
-                              ROCKSDB_NAMESPACE::ToString(ret));
+                              std::to_string(ret));
   }
   return Status::OK();
 }
@@ -226,12 +225,11 @@ void* JemallocNodumpAllocator::Alloc(ext
 
 Status JemallocNodumpAllocator::DestroyArena(unsigned arena_index) {
   assert(arena_index != 0);
-  std::string key =
-      "arena." + ROCKSDB_NAMESPACE::ToString(arena_index) + ".destroy";
+  std::string key = "arena." + std::to_string(arena_index) + ".destroy";
   int ret = mallctl(key.c_str(), nullptr, 0, nullptr, 0);
   if (ret != 0) {
     return Status::Incomplete("Failed to destroy jemalloc arena, error code: " +
-                              ROCKSDB_NAMESPACE::ToString(ret));
+                              std::to_string(ret));
   }
   return Status::OK();
 }
diff -pruN 7.2.2-5/memtable/skiplistrep.cc 7.3.1-2/memtable/skiplistrep.cc
--- 7.2.2-5/memtable/skiplistrep.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memtable/skiplistrep.cc	2022-06-08 21:08:16.000000000 +0000
@@ -353,7 +353,7 @@ SkipListFactory::SkipListFactory(size_t
 std::string SkipListFactory::GetId() const {
   std::string id = Name();
   if (lookahead_ > 0) {
-    id.append(":").append(ROCKSDB_NAMESPACE::ToString(lookahead_));
+    id.append(":").append(std::to_string(lookahead_));
   }
   return id;
 }
diff -pruN 7.2.2-5/memtable/write_buffer_manager.cc 7.3.1-2/memtable/write_buffer_manager.cc
--- 7.2.2-5/memtable/write_buffer_manager.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memtable/write_buffer_manager.cc	2022-06-08 21:08:16.000000000 +0000
@@ -82,7 +82,7 @@ void WriteBufferManager::ReserveMemWithC
 
   // We absorb the error since WriteBufferManager is not able to handle
   // this failure properly. Ideallly we should prevent this allocation
-  // from happening if this cache reservation fails.
+  // from happening if this cache charging fails.
   // [TODO] We'll need to improve it in the future and figure out what to do on
   // error
   s.PermitUncheckedError();
diff -pruN 7.2.2-5/memtable/write_buffer_manager_test.cc 7.3.1-2/memtable/write_buffer_manager_test.cc
--- 7.2.2-5/memtable/write_buffer_manager_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/memtable/write_buffer_manager_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -77,7 +77,9 @@ TEST_F(WriteBufferManagerTest, ShouldFlu
   ASSERT_FALSE(wbf->ShouldFlush());
 }
 
-TEST_F(WriteBufferManagerTest, CacheCost) {
+class ChargeWriteBufferTest : public testing::Test {};
+
+TEST_F(ChargeWriteBufferTest, Basic) {
   constexpr std::size_t kMetaDataChargeOverhead = 10000;
 
   LRUCacheOptions co;
@@ -197,7 +199,7 @@ TEST_F(WriteBufferManagerTest, CacheCost
   ASSERT_EQ(cache->GetPinnedUsage(), 0);
 }
 
-TEST_F(WriteBufferManagerTest, NoCapCacheCost) {
+TEST_F(ChargeWriteBufferTest, BasicWithNoBufferSizeLimit) {
   constexpr std::size_t kMetaDataChargeOverhead = 10000;
   // 1GB cache
   std::shared_ptr<Cache> cache = NewLRUCache(1024 * 1024 * 1024, 4);
@@ -231,7 +233,7 @@ TEST_F(WriteBufferManagerTest, NoCapCach
   ASSERT_LT(cache->GetPinnedUsage(), 4 * 256 * 1024 + kMetaDataChargeOverhead);
 }
 
-TEST_F(WriteBufferManagerTest, CacheFull) {
+TEST_F(ChargeWriteBufferTest, BasicWithCacheFull) {
   constexpr std::size_t kMetaDataChargeOverhead = 20000;
 
   // 12MB cache size with strict capacity
diff -pruN 7.2.2-5/microbench/db_basic_bench.cc 7.3.1-2/microbench/db_basic_bench.cc
--- 7.2.2-5/microbench/db_basic_bench.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/microbench/db_basic_bench.cc	2022-06-08 21:08:16.000000000 +0000
@@ -15,6 +15,7 @@
 #include "table/block_based/block.h"
 #include "table/block_based/block_builder.h"
 #include "util/random.h"
+#include "utilities/merge_operators.h"
 
 namespace ROCKSDB_NAMESPACE {
 
@@ -32,7 +33,11 @@ class KeyGenerator {
     if (is_sequential_) {
       assert(next_sequential_key_ < max_key_);
       k = (next_sequential_key_ % max_key_) * MULTIPLIER + offset;
-      next_sequential_key_++;
+      if (next_sequential_key_ + 1 == max_key_) {
+        next_sequential_key_ = 0;
+      } else {
+        next_sequential_key_++;
+      }
     } else {
       k = (rnd_->Next() % max_key_) * MULTIPLIER + offset;
     }
@@ -118,7 +123,8 @@ class KeyGenerator {
   }
 };
 
-static void SetupDB(benchmark::State& state, Options& options, DB** dpptr,
+static void SetupDB(benchmark::State& state, Options& options,
+                    std::unique_ptr<DB>* db,
                     const std::string& test_name = "") {
   options.create_if_missing = true;
   auto env = Env::Default();
@@ -132,15 +138,17 @@ static void SetupDB(benchmark::State& st
       db_path + kFilePathSeparator + test_name + std::to_string(getpid());
   DestroyDB(db_name, options);
 
-  s = DB::Open(options, db_name, dpptr);
+  DB* db_ptr = nullptr;
+  s = DB::Open(options, db_name, &db_ptr);
   if (!s.ok()) {
     state.SkipWithError(s.ToString().c_str());
     return;
   }
+  db->reset(db_ptr);
 }
 
-static void TeardownDB(benchmark::State& state, DB* db, const Options& options,
-                       KeyGenerator& kg) {
+static void TeardownDB(benchmark::State& state, const std::unique_ptr<DB>& db,
+                       const Options& options, KeyGenerator& kg) {
   char min_buff[256], max_buff[256];
   const Range r(kg.MinKey(min_buff), kg.MaxKey(max_buff));
   uint64_t size;
@@ -160,7 +168,7 @@ static void TeardownDB(benchmark::State&
 
 static void DBOpen(benchmark::State& state) {
   // create DB
-  DB* db = nullptr;
+  std::unique_ptr<DB> db;
   Options options;
   SetupDB(state, options, &db, "DBOpen");
 
@@ -172,12 +180,17 @@ static void DBOpen(benchmark::State& sta
   auto rnd = Random(123);
 
   for (auto _ : state) {
-    Status s = DB::Open(options, db_name, &db);
-    if (!s.ok()) {
-      state.SkipWithError(s.ToString().c_str());
+    {
+      DB* db_ptr = nullptr;
+      Status s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+      db.reset(db_ptr);
     }
     state.PauseTiming();
     auto wo = WriteOptions();
+    Status s;
     for (int i = 0; i < 2; i++) {
       for (int j = 0; j < 100; j++) {
         s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
@@ -204,7 +217,7 @@ BENCHMARK(DBOpen)->Iterations(200);  //
 
 static void DBClose(benchmark::State& state) {
   // create DB
-  DB* db;
+  std::unique_ptr<DB> db;
   Options options;
   SetupDB(state, options, &db, "DBClose");
 
@@ -217,11 +230,16 @@ static void DBClose(benchmark::State& st
 
   for (auto _ : state) {
     state.PauseTiming();
-    Status s = DB::Open(options, db_name, &db);
-    if (!s.ok()) {
-      state.SkipWithError(s.ToString().c_str());
+    {
+      DB* db_ptr = nullptr;
+      Status s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+      db.reset(db_ptr);
     }
     auto wo = WriteOptions();
+    Status s;
     for (int i = 0; i < 2; i++) {
       for (int j = 0; j < 100; j++) {
         s = db->Put(wo, rnd.RandomString(10), rnd.RandomString(100));
@@ -255,7 +273,7 @@ static void DBPut(benchmark::State& stat
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db = nullptr;
+  static std::unique_ptr<DB> db = nullptr;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
@@ -284,7 +302,7 @@ static void DBPut(benchmark::State& stat
   }
 
   if (state.thread_index() == 0) {
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     Status s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -332,7 +350,7 @@ static void ManualCompaction(benchmark::
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
@@ -391,7 +409,7 @@ static void ManualCompaction(benchmark::
   }
 
   if (state.thread_index() == 0) {
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -453,7 +471,7 @@ static void ManualFlush(benchmark::State
   bool enable_statistics = true;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
@@ -489,7 +507,7 @@ static void ManualFlush(benchmark::State
   }
 
   if (state.thread_index() == 0) {
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     Status s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -525,21 +543,30 @@ static void DBGet(benchmark::State& stat
   bool enable_statistics = state.range(3);
   bool negative_query = state.range(4);
   bool enable_filter = state.range(5);
+  bool mmap = state.range(6);
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
   }
+  if (mmap) {
+    options.allow_mmap_reads = true;
+    options.compression = kNoCompression;
+  }
   options.compaction_style = compaction_style;
 
+  BlockBasedTableOptions table_options;
   if (enable_filter) {
-    BlockBasedTableOptions table_options;
     table_options.filter_policy.reset(NewBloomFilterPolicy(10, false));
-    options.table_factory.reset(NewBlockBasedTableFactory(table_options));
   }
+  if (mmap) {
+    table_options.no_block_cache = true;
+    table_options.block_restart_interval = 1;
+  }
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
 
   auto rnd = Random(301 + state.thread_index());
   KeyGenerator kg(&rnd, key_num);
@@ -564,7 +591,7 @@ static void DBGet(benchmark::State& stat
       state.SkipWithError(s.ToString().c_str());
     }
 
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -573,6 +600,9 @@ static void DBGet(benchmark::State& stat
   }
 
   auto ro = ReadOptions();
+  if (mmap) {
+    ro.verify_checksums = false;
+  }
   size_t not_found = 0;
   if (negative_query) {
     for (auto _ : state) {
@@ -616,8 +646,10 @@ static void DBGetArguments(benchmark::in
         for (bool enable_statistics : {false, true}) {
           for (bool negative_query : {false, true}) {
             for (bool enable_filter : {false, true}) {
-              b->Args({comp_style, max_data, per_key_size, enable_statistics,
-                       negative_query, enable_filter});
+              for (bool mmap : {false, true}) {
+                b->Args({comp_style, max_data, per_key_size, enable_statistics,
+                         negative_query, enable_filter, mmap});
+              }
             }
           }
         }
@@ -625,16 +657,16 @@ static void DBGetArguments(benchmark::in
     }
   }
   b->ArgNames({"comp_style", "max_data", "per_key_size", "enable_statistics",
-               "negative_query", "enable_filter"});
+               "negative_query", "enable_filter", "mmap"});
 }
 
-static constexpr uint64_t kDBGetNum = 10l << 10;
+static constexpr uint64_t kDBGetNum = 1l << 20;
 BENCHMARK(DBGet)->Threads(1)->Iterations(kDBGetNum)->Apply(DBGetArguments);
 BENCHMARK(DBGet)->Threads(8)->Iterations(kDBGetNum / 8)->Apply(DBGetArguments);
 
 static void SimpleGetWithPerfContext(benchmark::State& state) {
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   std::string db_name;
   Options options;
   options.create_if_missing = true;
@@ -654,10 +686,14 @@ static void SimpleGetWithPerfContext(ben
     db_name = db_path + "/simple_get_" + std::to_string(getpid());
     DestroyDB(db_name, options);
 
-    s = DB::Open(options, db_name, &db);
-    if (!s.ok()) {
-      state.SkipWithError(s.ToString().c_str());
-      return;
+    {
+      DB* db_ptr = nullptr;
+      s = DB::Open(options, db_name, &db_ptr);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+        return;
+      }
+      db.reset(db_ptr);
     }
     // load db
     auto wo = WriteOptions();
@@ -668,7 +704,7 @@ static void SimpleGetWithPerfContext(ben
         state.SkipWithError(s.ToString().c_str());
       }
     }
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -755,6 +791,191 @@ static void SimpleGetWithPerfContext(ben
 
 BENCHMARK(SimpleGetWithPerfContext)->Iterations(1000000);
 
+static void DBGetMergeOperandsInMemtable(benchmark::State& state) {
+  const uint64_t kDataLen = 16 << 20;  // 16MB
+  const uint64_t kValueLen = 64;
+  const uint64_t kNumEntries = kDataLen / kValueLen;
+  const uint64_t kNumEntriesPerKey = state.range(0);
+  const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+
+  Options options;
+  options.merge_operator = MergeOperators::CreateStringAppendOperator();
+  // Make memtable large enough that automatic flush will not be triggered.
+  options.write_buffer_size = 2 * kDataLen;
+
+  KeyGenerator sequential_key_gen(kNumKeys);
+  auto rnd = Random(301 + state.thread_index());
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBGetMergeOperandsInMemtable");
+
+    // load db
+    auto write_opts = WriteOptions();
+    write_opts.disableWAL = true;
+    for (uint64_t i = 0; i < kNumEntries; i++) {
+      Status s = db->Merge(write_opts, sequential_key_gen.Next(),
+                           rnd.RandomString(static_cast<int>(kValueLen)));
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+  }
+
+  KeyGenerator random_key_gen(kNumKeys);
+  std::vector<PinnableSlice> value_operands;
+  value_operands.resize(kNumEntriesPerKey);
+  GetMergeOperandsOptions get_merge_ops_opts;
+  get_merge_ops_opts.expected_max_number_of_operands =
+      static_cast<int>(kNumEntriesPerKey);
+  for (auto _ : state) {
+    int num_value_operands = 0;
+    Status s = db->GetMergeOperands(
+        ReadOptions(), db->DefaultColumnFamily(), random_key_gen.Next(),
+        value_operands.data(), &get_merge_ops_opts, &num_value_operands);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
+      state.SkipWithError("Unexpected number of merge operands found for key");
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, random_key_gen);
+  }
+}
+
+static void DBGetMergeOperandsInSstFile(benchmark::State& state) {
+  const uint64_t kDataLen = 16 << 20;  // 16MB
+  const uint64_t kValueLen = 64;
+  const uint64_t kNumEntries = kDataLen / kValueLen;
+  const uint64_t kNumEntriesPerKey = state.range(0);
+  const uint64_t kNumKeys = kNumEntries / kNumEntriesPerKey;
+  const bool kMmap = state.range(1);
+
+  // setup DB
+  static std::unique_ptr<DB> db;
+
+  BlockBasedTableOptions table_options;
+  if (kMmap) {
+    table_options.no_block_cache = true;
+  } else {
+    // Make block cache large enough that eviction will not be triggered.
+    table_options.block_cache = NewLRUCache(2 * kDataLen);
+  }
+
+  Options options;
+  if (kMmap) {
+    options.allow_mmap_reads = true;
+  }
+  options.compression = kNoCompression;
+  options.merge_operator = MergeOperators::CreateStringAppendOperator();
+  options.table_factory.reset(NewBlockBasedTableFactory(table_options));
+  // Make memtable large enough that automatic flush will not be triggered.
+  options.write_buffer_size = 2 * kDataLen;
+
+  KeyGenerator sequential_key_gen(kNumKeys);
+  auto rnd = Random(301 + state.thread_index());
+
+  if (state.thread_index() == 0) {
+    SetupDB(state, options, &db, "DBGetMergeOperandsInBlockCache");
+
+    // load db
+    //
+    // Take a snapshot after each cycle of merges to ensure flush cannot
+    // merge any entries.
+    std::vector<const Snapshot*> snapshots;
+    snapshots.resize(kNumEntriesPerKey);
+    auto write_opts = WriteOptions();
+    write_opts.disableWAL = true;
+    for (uint64_t i = 0; i < kNumEntriesPerKey; i++) {
+      for (uint64_t j = 0; j < kNumKeys; j++) {
+        Status s = db->Merge(write_opts, sequential_key_gen.Next(),
+                             rnd.RandomString(static_cast<int>(kValueLen)));
+        if (!s.ok()) {
+          state.SkipWithError(s.ToString().c_str());
+        }
+      }
+      snapshots[i] = db->GetSnapshot();
+    }
+
+    // Flush to an L0 file; read back to prime the cache/mapped memory.
+    db->Flush(FlushOptions());
+    for (uint64_t i = 0; i < kNumKeys; ++i) {
+      std::string value;
+      Status s = db->Get(ReadOptions(), sequential_key_gen.Next(), &value);
+      if (!s.ok()) {
+        state.SkipWithError(s.ToString().c_str());
+      }
+    }
+
+    if (state.thread_index() == 0) {
+      for (uint64_t i = 0; i < kNumEntriesPerKey; ++i) {
+        db->ReleaseSnapshot(snapshots[i]);
+      }
+    }
+  }
+
+  KeyGenerator random_key_gen(kNumKeys);
+  std::vector<PinnableSlice> value_operands;
+  value_operands.resize(kNumEntriesPerKey);
+  GetMergeOperandsOptions get_merge_ops_opts;
+  get_merge_ops_opts.expected_max_number_of_operands =
+      static_cast<int>(kNumEntriesPerKey);
+  for (auto _ : state) {
+    int num_value_operands = 0;
+    ReadOptions read_opts;
+    read_opts.verify_checksums = false;
+    Status s = db->GetMergeOperands(
+        read_opts, db->DefaultColumnFamily(), random_key_gen.Next(),
+        value_operands.data(), &get_merge_ops_opts, &num_value_operands);
+    if (!s.ok()) {
+      state.SkipWithError(s.ToString().c_str());
+    }
+    if (num_value_operands != static_cast<int>(kNumEntriesPerKey)) {
+      state.SkipWithError("Unexpected number of merge operands found for key");
+    }
+  }
+
+  if (state.thread_index() == 0) {
+    TeardownDB(state, db, options, random_key_gen);
+  }
+}
+
+static void DBGetMergeOperandsInMemtableArguments(
+    benchmark::internal::Benchmark* b) {
+  for (int entries_per_key : {1, 32, 1024}) {
+    b->Args({entries_per_key});
+  }
+  b->ArgNames({"entries_per_key"});
+}
+
+static void DBGetMergeOperandsInSstFileArguments(
+    benchmark::internal::Benchmark* b) {
+  for (int entries_per_key : {1, 32, 1024}) {
+    for (bool mmap : {false, true}) {
+      b->Args({entries_per_key, mmap});
+    }
+  }
+  b->ArgNames({"entries_per_key", "mmap"});
+}
+
+BENCHMARK(DBGetMergeOperandsInMemtable)
+    ->Threads(1)
+    ->Apply(DBGetMergeOperandsInMemtableArguments);
+BENCHMARK(DBGetMergeOperandsInMemtable)
+    ->Threads(8)
+    ->Apply(DBGetMergeOperandsInMemtableArguments);
+BENCHMARK(DBGetMergeOperandsInSstFile)
+    ->Threads(1)
+    ->Apply(DBGetMergeOperandsInSstFileArguments);
+BENCHMARK(DBGetMergeOperandsInSstFile)
+    ->Threads(8)
+    ->Apply(DBGetMergeOperandsInSstFileArguments);
+
 std::string GenerateKey(int primary_key, int secondary_key, int padding_size,
                         Random* rnd) {
   char buf[50];
@@ -844,7 +1065,7 @@ static void IteratorSeek(benchmark::Stat
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
@@ -880,7 +1101,7 @@ static void IteratorSeek(benchmark::Stat
       state.SkipWithError(s.ToString().c_str());
     }
 
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -947,7 +1168,7 @@ static void IteratorNext(benchmark::Stat
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   options.compaction_style = compaction_style;
 
@@ -973,7 +1194,7 @@ static void IteratorNext(benchmark::Stat
       state.SkipWithError(s.ToString().c_str());
     }
 
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -1022,7 +1243,7 @@ BENCHMARK(IteratorNext)
 
 static void IteratorNextWithPerfContext(benchmark::State& state) {
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
 
   auto rnd = Random(301 + state.thread_index());
@@ -1039,7 +1260,7 @@ static void IteratorNextWithPerfContext(
         state.SkipWithError(s.ToString().c_str());
       }
     }
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     Status s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -1114,7 +1335,7 @@ static void IteratorPrev(benchmark::Stat
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   std::string db_name;
   Options options;
   options.compaction_style = compaction_style;
@@ -1141,7 +1362,7 @@ static void IteratorPrev(benchmark::Stat
       state.SkipWithError(s.ToString().c_str());
     }
 
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -1198,7 +1419,7 @@ static void PrefixSeek(benchmark::State&
   uint64_t key_num = max_data / per_key_size;
 
   // setup DB
-  static DB* db;
+  static std::unique_ptr<DB> db;
   Options options;
   if (enable_statistics) {
     options.statistics = CreateDBStatistics();
@@ -1235,7 +1456,7 @@ static void PrefixSeek(benchmark::State&
       state.SkipWithError(s.ToString().c_str());
     }
 
-    auto db_full = static_cast_with_check<DBImpl>(db);
+    auto db_full = static_cast_with_check<DBImpl>(db.get());
     s = db_full->WaitForCompact(true);
     if (!s.ok()) {
       state.SkipWithError(s.ToString().c_str());
@@ -1311,7 +1532,7 @@ static void RandomAccessFileReaderRead(b
   auto statistics_share = CreateDBStatistics();
   Statistics* statistics = enable_statistics ? statistics_share.get() : nullptr;
   for (int i = 0; i < kFileNum; i++) {
-    std::string fname = fname_base + ToString(i);
+    std::string fname = fname_base + std::to_string(i);
     std::string content = rand.RandomString(kDefaultPageSize);
     std::unique_ptr<WritableFile> tgt_file;
     env->NewWritableFile(fname, &tgt_file, EnvOptions());
@@ -1344,7 +1565,7 @@ static void RandomAccessFileReaderRead(b
 
   // clean up
   for (int i = 0; i < kFileNum; i++) {
-    std::string fname = fname_base + ToString(i);
+    std::string fname = fname_base + std::to_string(i);
     env->DeleteFile(fname);  // ignore return, okay to fail cleanup
   }
 }
diff -pruN 7.2.2-5/monitoring/histogram.cc 7.3.1-2/monitoring/histogram.cc
--- 7.2.2-5/monitoring/histogram.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/histogram.cc	2022-06-08 21:08:16.000000000 +0000
@@ -26,7 +26,8 @@ HistogramBucketMapper::HistogramBucketMa
   // size of array buckets_ in HistogramImpl
   bucketValues_ = {1, 2};
   double bucket_val = static_cast<double>(bucketValues_.back());
-  while ((bucket_val = 1.5 * bucket_val) <= static_cast<double>(port::kMaxUint64)) {
+  while ((bucket_val = 1.5 * bucket_val) <=
+         static_cast<double>(std::numeric_limits<uint64_t>::max())) {
     bucketValues_.push_back(static_cast<uint64_t>(bucket_val));
     // Extracts two most significant digits to make histogram buckets more
     // human-readable. E.g., 172 becomes 170.
diff -pruN 7.2.2-5/monitoring/iostats_context.cc 7.3.1-2/monitoring/iostats_context.cc
--- 7.2.2-5/monitoring/iostats_context.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/iostats_context.cc	2022-06-08 21:08:16.000000000 +0000
@@ -13,11 +13,8 @@ namespace ROCKSDB_NAMESPACE {
 // Should not be used because the counters are not thread-safe.
 // Put here just to make get_iostats_context() simple without ifdef.
 static IOStatsContext iostats_context;
-#elif defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
-__thread IOStatsContext iostats_context;
 #else
-#error \
-    "No thread-local support. Disable iostats context with -DNIOSTATS_CONTEXT."
+thread_local IOStatsContext iostats_context;
 #endif
 
 IOStatsContext* get_iostats_context() {
diff -pruN 7.2.2-5/monitoring/iostats_context_imp.h 7.3.1-2/monitoring/iostats_context_imp.h
--- 7.2.2-5/monitoring/iostats_context_imp.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/iostats_context_imp.h	2022-06-08 21:08:16.000000000 +0000
@@ -7,9 +7,9 @@
 #include "monitoring/perf_step_timer.h"
 #include "rocksdb/iostats_context.h"
 
-#if defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(NIOSTATS_CONTEXT)
+#if !defined(NIOSTATS_CONTEXT)
 namespace ROCKSDB_NAMESPACE {
-extern __thread IOStatsContext iostats_context;
+extern thread_local IOStatsContext iostats_context;
 }  // namespace ROCKSDB_NAMESPACE
 
 // increment a specific counter by the specified value
@@ -40,7 +40,7 @@ extern __thread IOStatsContext iostats_c
       PerfLevel::kEnableTimeAndCPUTimeExceptForMutex); \
   iostats_step_timer_##metric.Start();
 
-#else  // ROCKSDB_SUPPORT_THREAD_LOCAL && !NIOSTATS_CONTEXT
+#else  // !NIOSTATS_CONTEXT
 
 #define IOSTATS_ADD(metric, value)
 #define IOSTATS_ADD_IF_POSITIVE(metric, value)
@@ -53,4 +53,4 @@ extern __thread IOStatsContext iostats_c
 #define IOSTATS_TIMER_GUARD(metric)
 #define IOSTATS_CPU_TIMER_GUARD(metric, clock) static_cast<void>(clock)
 
-#endif  // ROCKSDB_SUPPORT_THREAD_LOCAL && !NIOSTATS_CONTEXT
+#endif  // !NIOSTATS_CONTEXT
diff -pruN 7.2.2-5/monitoring/perf_context.cc 7.3.1-2/monitoring/perf_context.cc
--- 7.2.2-5/monitoring/perf_context.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/perf_context.cc	2022-06-08 21:08:16.000000000 +0000
@@ -13,14 +13,8 @@ namespace ROCKSDB_NAMESPACE {
 // Should not be used because the counters are not thread-safe.
 // Put here just to make get_perf_context() simple without ifdef.
 PerfContext perf_context;
-#elif defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
-#if defined(OS_SOLARIS)
-__thread PerfContext perf_context;
-#else   // OS_SOLARIS
-thread_local PerfContext perf_context;
-#endif  // OS_SOLARIS
 #else
-#error "No thread-local support. Disable perf context with -DNPERF_CONTEXT."
+thread_local PerfContext perf_context;
 #endif
 
 PerfContext* get_perf_context() {
@@ -28,7 +22,7 @@ PerfContext* get_perf_context() {
 }
 
 PerfContext::~PerfContext() {
-#if !defined(NPERF_CONTEXT) && defined(ROCKSDB_SUPPORT_THREAD_LOCAL) && !defined(OS_SOLARIS)
+#if !defined(NPERF_CONTEXT) && !defined(OS_SOLARIS)
   ClearPerLevelPerfContext();
 #endif
 }
@@ -120,6 +114,7 @@ PerfContext::PerfContext(const PerfConte
   iter_next_cpu_nanos = other.iter_next_cpu_nanos;
   iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
   iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
+  number_async_seek = other.number_async_seek;
   if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
     ClearPerLevelPerfContext();
   }
@@ -218,6 +213,7 @@ PerfContext::PerfContext(PerfContext&& o
   iter_next_cpu_nanos = other.iter_next_cpu_nanos;
   iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
   iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
+  number_async_seek = other.number_async_seek;
   if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
     ClearPerLevelPerfContext();
   }
@@ -318,6 +314,7 @@ PerfContext& PerfContext::operator=(cons
   iter_next_cpu_nanos = other.iter_next_cpu_nanos;
   iter_prev_cpu_nanos = other.iter_prev_cpu_nanos;
   iter_seek_cpu_nanos = other.iter_seek_cpu_nanos;
+  number_async_seek = other.number_async_seek;
   if (per_level_perf_context_enabled && level_to_perf_context != nullptr) {
     ClearPerLevelPerfContext();
   }
@@ -413,6 +410,7 @@ void PerfContext::Reset() {
   iter_next_cpu_nanos = 0;
   iter_prev_cpu_nanos = 0;
   iter_seek_cpu_nanos = 0;
+  number_async_seek = 0;
   if (per_level_perf_context_enabled && level_to_perf_context) {
     for (auto& kv : *level_to_perf_context) {
       kv.second.Reset();
@@ -532,6 +530,7 @@ std::string PerfContext::ToString(bool e
   PERF_CONTEXT_OUTPUT(iter_next_cpu_nanos);
   PERF_CONTEXT_OUTPUT(iter_prev_cpu_nanos);
   PERF_CONTEXT_OUTPUT(iter_seek_cpu_nanos);
+  PERF_CONTEXT_OUTPUT(number_async_seek);
   PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_useful);
   PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_positive);
   PERF_CONTEXT_BY_LEVEL_OUTPUT_ONE_COUNTER(bloom_filter_full_true_positive);
diff -pruN 7.2.2-5/monitoring/perf_context_imp.h 7.3.1-2/monitoring/perf_context_imp.h
--- 7.2.2-5/monitoring/perf_context_imp.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/perf_context_imp.h	2022-06-08 21:08:16.000000000 +0000
@@ -9,11 +9,11 @@
 #include "util/stop_watch.h"
 
 namespace ROCKSDB_NAMESPACE {
-#if defined(NPERF_CONTEXT) || !defined(ROCKSDB_SUPPORT_THREAD_LOCAL)
+#if defined(NPERF_CONTEXT)
 extern PerfContext perf_context;
 #else
 #if defined(OS_SOLARIS)
-extern __thread PerfContext perf_context_;
+extern thread_local PerfContext perf_context_;
 #define perf_context (*get_perf_context())
 #else
 extern thread_local PerfContext perf_context;
diff -pruN 7.2.2-5/monitoring/perf_level.cc 7.3.1-2/monitoring/perf_level.cc
--- 7.2.2-5/monitoring/perf_level.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/perf_level.cc	2022-06-08 21:08:16.000000000 +0000
@@ -9,11 +9,7 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
-__thread PerfLevel perf_level = kEnableCount;
-#else
-PerfLevel perf_level = kEnableCount;
-#endif
+thread_local PerfLevel perf_level = kEnableCount;
 
 void SetPerfLevel(PerfLevel level) {
   assert(level > kUninitialized);
diff -pruN 7.2.2-5/monitoring/perf_level_imp.h 7.3.1-2/monitoring/perf_level_imp.h
--- 7.2.2-5/monitoring/perf_level_imp.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/perf_level_imp.h	2022-06-08 21:08:16.000000000 +0000
@@ -9,10 +9,6 @@
 
 namespace ROCKSDB_NAMESPACE {
 
-#ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
-extern __thread PerfLevel perf_level;
-#else
-extern PerfLevel perf_level;
-#endif
+extern thread_local PerfLevel perf_level;
 
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/monitoring/persistent_stats_history.cc 7.3.1-2/monitoring/persistent_stats_history.cc
--- 7.2.2-5/monitoring/persistent_stats_history.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/persistent_stats_history.cc	2022-06-08 21:08:16.000000000 +0000
@@ -98,13 +98,13 @@ std::pair<uint64_t, std::string> parseKe
   std::string::size_type pos = key_str.find("#");
   // TODO(Zhongyi): add counters to track parse failures?
   if (pos == std::string::npos) {
-    result.first = port::kMaxUint64;
+    result.first = std::numeric_limits<uint64_t>::max();
     result.second.clear();
   } else {
     uint64_t parsed_time = ParseUint64(key_str.substr(0, pos));
     // skip entries with timestamp smaller than start_time
     if (parsed_time < start_time) {
-      result.first = port::kMaxUint64;
+      result.first = std::numeric_limits<uint64_t>::max();
       result.second = "";
     } else {
       result.first = parsed_time;
diff -pruN 7.2.2-5/monitoring/statistics.cc 7.3.1-2/monitoring/statistics.cc
--- 7.2.2-5/monitoring/statistics.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/statistics.cc	2022-06-08 21:08:16.000000000 +0000
@@ -286,7 +286,7 @@ const std::vector<std::pair<Histograms,
     {ASYNC_READ_BYTES, "rocksdb.async.read.bytes"},
     {POLL_WAIT_MICROS, "rocksdb.poll.wait.micros"},
     {PREFETCHED_BYTES_DISCARDED, "rocksdb.prefetched.bytes.discarded"},
-
+    {MULTIGET_IO_BATCH_SIZE, "rocksdb.multiget.io.batch.size"},
 };
 
 std::shared_ptr<Statistics> CreateDBStatistics() {
diff -pruN 7.2.2-5/monitoring/stats_history_test.cc 7.3.1-2/monitoring/stats_history_test.cc
--- 7.2.2-5/monitoring/stats_history_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/stats_history_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -604,10 +604,14 @@ TEST_F(StatsHistoryTest, ForceManualFlus
   dbfull()->TEST_WaitForStatsDumpRun(
       [&] { mock_clock_->MockSleepForSeconds(kPeriodSec); });
   // writing to all three cf, flush default cf
-  // LogNumbers: default: 14, stats: 4, pikachu: 4
+  // LogNumbers: default: 16, stats: 10, pikachu: 5
+  // Since in recovery process, cfd_stats column is created after WAL is
+  // created, synced and MANIFEST is persisted, its log number which depends on
+  // logfile_number_ will be different. Since "pikachu" is never flushed, thus
+  // its log_number should be the smallest of the three.
   ASSERT_OK(Flush());
-  ASSERT_EQ(cfd_stats->GetLogNumber(), cfd_test->GetLogNumber());
-  ASSERT_LT(cfd_stats->GetLogNumber(), cfd_default->GetLogNumber());
+  ASSERT_LT(cfd_test->GetLogNumber(), cfd_stats->GetLogNumber());
+  ASSERT_LT(cfd_test->GetLogNumber(), cfd_default->GetLogNumber());
 
   ASSERT_OK(Put("foo1", "v1"));
   ASSERT_OK(Put("bar1", "v1"));
diff -pruN 7.2.2-5/monitoring/thread_status_updater.cc 7.3.1-2/monitoring/thread_status_updater.cc
--- 7.2.2-5/monitoring/thread_status_updater.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/thread_status_updater.cc	2022-06-08 21:08:16.000000000 +0000
@@ -16,7 +16,8 @@ namespace ROCKSDB_NAMESPACE {
 
 #ifdef ROCKSDB_USING_THREAD_STATUS
 
-__thread ThreadStatusData* ThreadStatusUpdater::thread_status_data_ = nullptr;
+thread_local ThreadStatusData* ThreadStatusUpdater::thread_status_data_ =
+    nullptr;
 
 void ThreadStatusUpdater::RegisterThread(ThreadStatus::ThreadType ttype,
                                          uint64_t thread_id) {
diff -pruN 7.2.2-5/monitoring/thread_status_updater.h 7.3.1-2/monitoring/thread_status_updater.h
--- 7.2.2-5/monitoring/thread_status_updater.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/thread_status_updater.h	2022-06-08 21:08:16.000000000 +0000
@@ -196,7 +196,7 @@ class ThreadStatusUpdater {
  protected:
 #ifdef ROCKSDB_USING_THREAD_STATUS
   // The thread-local variable for storing thread status.
-  static __thread ThreadStatusData* thread_status_data_;
+  static thread_local ThreadStatusData* thread_status_data_;
 
   // Returns the pointer to the thread status data only when the
   // thread status data is non-null and has enable_tracking == true.
diff -pruN 7.2.2-5/monitoring/thread_status_util.cc 7.3.1-2/monitoring/thread_status_util.cc
--- 7.2.2-5/monitoring/thread_status_util.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/thread_status_util.cc	2022-06-08 21:08:16.000000000 +0000
@@ -12,9 +12,9 @@
 namespace ROCKSDB_NAMESPACE {
 
 #ifdef ROCKSDB_USING_THREAD_STATUS
-__thread ThreadStatusUpdater* ThreadStatusUtil::thread_updater_local_cache_ =
-    nullptr;
-__thread bool ThreadStatusUtil::thread_updater_initialized_ = false;
+thread_local ThreadStatusUpdater*
+    ThreadStatusUtil::thread_updater_local_cache_ = nullptr;
+thread_local bool ThreadStatusUtil::thread_updater_initialized_ = false;
 
 void ThreadStatusUtil::RegisterThread(const Env* env,
                                       ThreadStatus::ThreadType thread_type) {
diff -pruN 7.2.2-5/monitoring/thread_status_util.h 7.3.1-2/monitoring/thread_status_util.h
--- 7.2.2-5/monitoring/thread_status_util.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/monitoring/thread_status_util.h	2022-06-08 21:08:16.000000000 +0000
@@ -94,7 +94,7 @@ class ThreadStatusUtil {
   // When this variable is set to true, thread_updater_local_cache_
   // will not be updated until this variable is again set to false
   // in UnregisterThread().
-  static  __thread bool thread_updater_initialized_;
+  static thread_local bool thread_updater_initialized_;
 
   // The thread-local cached ThreadStatusUpdater that caches the
   // thread_status_updater_ of the first Env that uses any ThreadStatusUtil
@@ -109,7 +109,7 @@ class ThreadStatusUtil {
   // When thread_updater_initialized_ is set to true, this variable
   // will not be updated until this thread_updater_initialized_ is
   // again set to false in UnregisterThread().
-  static __thread ThreadStatusUpdater* thread_updater_local_cache_;
+  static thread_local ThreadStatusUpdater* thread_updater_local_cache_;
 #else
   static bool thread_updater_initialized_;
   static ThreadStatusUpdater* thread_updater_local_cache_;
diff -pruN 7.2.2-5/options/cf_options.cc 7.3.1-2/options/cf_options.cc
--- 7.2.2-5/options/cf_options.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/cf_options.cc	2022-06-08 21:08:16.000000000 +0000
@@ -116,6 +116,15 @@ static Status ParseCompressionOptions(co
     compression_opts.max_dict_buffer_bytes = ParseUint64(field);
   }
 
+  // use_zstd_dict_trainer is optional for backwards compatibility
+  if (!field_stream.eof()) {
+    if (!std::getline(field_stream, field, kDelimiter)) {
+      return Status::InvalidArgument(
+          "unable to parse the specified CF option " + name);
+    }
+    compression_opts.use_zstd_dict_trainer = ParseBoolean("", field);
+  }
+
   if (!field_stream.eof()) {
     return Status::InvalidArgument("unable to parse the specified CF option " +
                                    name);
@@ -156,6 +165,10 @@ static std::unordered_map<std::string, O
          {offsetof(struct CompressionOptions, max_dict_buffer_bytes),
           OptionType::kUInt64T, OptionVerificationType::kNormal,
           OptionTypeFlags::kMutable}},
+        {"use_zstd_dict_trainer",
+         {offsetof(struct CompressionOptions, use_zstd_dict_trainer),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kMutable}},
 };
 
 static std::unordered_map<std::string, OptionTypeInfo>
@@ -364,9 +377,10 @@ static std::unordered_map<std::string, O
          OptionTypeInfo::Struct(
              "compaction_options_fifo", &fifo_compaction_options_type_info,
              offsetof(struct MutableCFOptions, compaction_options_fifo),
-             OptionVerificationType::kNormal, OptionTypeFlags::kMutable,
-             [](const ConfigOptions& opts, const std::string& name,
-                const std::string& value, void* addr) {
+             OptionVerificationType::kNormal, OptionTypeFlags::kMutable)
+             .SetParseFunc([](const ConfigOptions& opts,
+                              const std::string& name, const std::string& value,
+                              void* addr) {
                // This is to handle backward compatibility, where
                // compaction_options_fifo could be assigned a single scalar
                // value, say, like "23", which would be assigned to
@@ -556,30 +570,30 @@ static std::unordered_map<std::string, O
         {"comparator",
          OptionTypeInfo::AsCustomRawPtr<const Comparator>(
              offsetof(struct ImmutableCFOptions, user_comparator),
-             OptionVerificationType::kByName, OptionTypeFlags::kCompareLoose,
-             // Serializes a Comparator
-             [](const ConfigOptions& opts, const std::string&, const void* addr,
-                std::string* value) {
-               // it's a const pointer of const Comparator*
-               const auto* ptr = static_cast<const Comparator* const*>(addr);
-
-               // Since the user-specified comparator will be wrapped by
-               // InternalKeyComparator, we should persist the user-specified
-               // one instead of InternalKeyComparator.
-               if (*ptr == nullptr) {
-                 *value = kNullptrString;
-               } else if (opts.mutable_options_only) {
-                 *value = "";
-               } else {
-                 const Comparator* root_comp = (*ptr)->GetRootComparator();
-                 if (root_comp == nullptr) {
-                   root_comp = (*ptr);
-                 }
-                 *value = root_comp->ToString(opts);
-               }
-               return Status::OK();
-             },
-             /* Use the default match function*/ nullptr)},
+             OptionVerificationType::kByName, OptionTypeFlags::kCompareLoose)
+             .SetSerializeFunc(
+                 // Serializes a Comparator
+                 [](const ConfigOptions& opts, const std::string&,
+                    const void* addr, std::string* value) {
+                   // it's a const pointer of const Comparator*
+                   const auto* ptr =
+                       static_cast<const Comparator* const*>(addr);
+                   // Since the user-specified comparator will be wrapped by
+                   // InternalKeyComparator, we should persist the
+                   // user-specified one instead of InternalKeyComparator.
+                   if (*ptr == nullptr) {
+                     *value = kNullptrString;
+                   } else if (opts.mutable_options_only) {
+                     *value = "";
+                   } else {
+                     const Comparator* root_comp = (*ptr)->GetRootComparator();
+                     if (root_comp == nullptr) {
+                       root_comp = (*ptr);
+                     }
+                     *value = root_comp->ToString(opts);
+                   }
+                   return Status::OK();
+                 })},
         {"memtable_insert_with_hint_prefix_extractor",
          OptionTypeInfo::AsCustomSharedPtr<const SliceTransform>(
              offsetof(struct ImmutableCFOptions,
@@ -595,10 +609,7 @@ static std::unordered_map<std::string, O
             auto* shared =
                 static_cast<std::shared_ptr<MemTableRepFactory>*>(addr);
             Status s =
-                MemTableRepFactory::CreateFromString(opts, value, &factory);
-            if (factory && s.ok()) {
-              shared->reset(factory.release());
-            }
+                MemTableRepFactory::CreateFromString(opts, value, shared);
             return s;
           }}},
         {"memtable",
@@ -611,10 +622,7 @@ static std::unordered_map<std::string, O
             auto* shared =
                 static_cast<std::shared_ptr<MemTableRepFactory>*>(addr);
             Status s =
-                MemTableRepFactory::CreateFromString(opts, value, &factory);
-            if (factory && s.ok()) {
-              shared->reset(factory.release());
-            }
+                MemTableRepFactory::CreateFromString(opts, value, shared);
             return s;
           }}},
         {"table_factory",
@@ -886,7 +894,7 @@ uint64_t MultiplyCheckOverflow(uint64_t
   if (op1 == 0 || op2 <= 0) {
     return 0;
   }
-  if (port::kMaxUint64 / op1 < op2) {
+  if (std::numeric_limits<uint64_t>::max() / op1 < op2) {
     return op1;
   }
   return static_cast<uint64_t>(op1 * op2);
@@ -915,8 +923,9 @@ size_t MaxFileSizeForL0MetaPin(const Mut
   // or a former larger `write_buffer_size` value to avoid surprising users with
   // pinned memory usage. We use a factor of 1.5 to account for overhead
   // introduced during flush in most cases.
-  if (port::kMaxSizet / 3 < cf_options.write_buffer_size / 2) {
-    return port::kMaxSizet;
+  if (std::numeric_limits<size_t>::max() / 3 <
+      cf_options.write_buffer_size / 2) {
+    return std::numeric_limits<size_t>::max();
   }
   return cf_options.write_buffer_size / 2 * 3;
 }
diff -pruN 7.2.2-5/options/configurable.cc 7.3.1-2/options/configurable.cc
--- 7.2.2-5/options/configurable.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/configurable.cc	2022-06-08 21:08:16.000000000 +0000
@@ -46,20 +46,10 @@ Status Configurable::PrepareOptions(cons
     if (opt_iter.type_map != nullptr) {
       for (auto map_iter : *(opt_iter.type_map)) {
         auto& opt_info = map_iter.second;
-        if (!opt_info.IsDeprecated() && !opt_info.IsAlias() &&
-            opt_info.IsConfigurable()) {
-          if (!opt_info.IsEnabled(OptionTypeFlags::kDontPrepare)) {
-            Configurable* config =
-                opt_info.AsRawPointer<Configurable>(opt_iter.opt_ptr);
-            if (config != nullptr) {
-              status = config->PrepareOptions(opts);
-            } else if (!opt_info.CanBeNull()) {
-              status = Status::NotFound("Missing configurable object",
-                                        map_iter.first);
-            }
-            if (!status.ok()) {
-              return status;
-            }
+        if (opt_info.ShouldPrepare()) {
+          status = opt_info.Prepare(opts, map_iter.first, opt_iter.opt_ptr);
+          if (!status.ok()) {
+            return status;
           }
         }
       }
@@ -79,19 +69,11 @@ Status Configurable::ValidateOptions(con
     if (opt_iter.type_map != nullptr) {
       for (auto map_iter : *(opt_iter.type_map)) {
         auto& opt_info = map_iter.second;
-        if (!opt_info.IsDeprecated() && !opt_info.IsAlias()) {
-          if (opt_info.IsConfigurable()) {
-            const Configurable* config =
-                opt_info.AsRawPointer<Configurable>(opt_iter.opt_ptr);
-            if (config != nullptr) {
-              status = config->ValidateOptions(db_opts, cf_opts);
-            } else if (!opt_info.CanBeNull()) {
-              status = Status::NotFound("Missing configurable object",
-                                        map_iter.first);
-            }
-            if (!status.ok()) {
-              return status;
-            }
+        if (opt_info.ShouldValidate()) {
+          status = opt_info.Validate(db_opts, cf_opts, map_iter.first,
+                                     opt_iter.opt_ptr);
+          if (!status.ok()) {
+            return status;
           }
         }
       }
diff -pruN 7.2.2-5/options/configurable_test.cc 7.3.1-2/options/configurable_test.cc
--- 7.2.2-5/options/configurable_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/configurable_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -173,7 +173,7 @@ TEST_F(ConfigurableTest, GetOptionsTest)
   int i = 11;
   for (auto opt : {"", "shared.", "unique.", "pointer."}) {
     std::string value;
-    std::string expected = ToString(i);
+    std::string expected = std::to_string(i);
     std::string opt_name = opt;
     ASSERT_OK(
         simple->ConfigureOption(config_options_, opt_name + "int", expected));
diff -pruN 7.2.2-5/options/customizable.cc 7.3.1-2/options/customizable.cc
--- 7.2.2-5/options/customizable.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/customizable.cc	2022-06-08 21:08:16.000000000 +0000
@@ -76,7 +76,9 @@ bool Customizable::AreEquivalent(const C
   if (config_options.sanity_level > ConfigOptions::kSanityLevelNone &&
       this != other) {
     const Customizable* custom = reinterpret_cast<const Customizable*>(other);
-    if (GetId() != custom->GetId()) {
+    if (custom == nullptr) {  // Cast failed
+      return false;
+    } else if (GetId() != custom->GetId()) {
       *mismatch = OptionTypeInfo::kIdPropName();
       return false;
     } else if (config_options.sanity_level >
diff -pruN 7.2.2-5/options/customizable_test.cc 7.3.1-2/options/customizable_test.cc
--- 7.2.2-5/options/customizable_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/customizable_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -13,8 +13,11 @@
 #include <cinttypes>
 #include <cstring>
 #include <unordered_map>
+#include <unordered_set>
 
 #include "db/db_test_util.h"
+#include "memory/jemalloc_nodump_allocator.h"
+#include "memory/memkind_kmem_allocator.h"
 #include "options/options_helper.h"
 #include "options/options_parser.h"
 #include "port/stack_trace.h"
@@ -1655,6 +1658,190 @@ class LoadCustomizableTest : public test
 #endif  // !ROCKSDB_LITE
   }
 
+  template <typename T, typename U>
+  Status TestCreateStatic(const std::string& name, U** result,
+                          bool delete_result = false) {
+    Status s = T::CreateFromString(config_options_, name, result);
+    if (s.ok()) {
+      EXPECT_NE(*result, nullptr);
+      EXPECT_TRUE(*result != nullptr && (*result)->IsInstanceOf(name));
+    }
+    if (delete_result) {
+      delete *result;
+      *result = nullptr;
+    }
+    return s;
+  }
+
+  template <typename T, typename U>
+  std::shared_ptr<U> ExpectCreateShared(const std::string& name,
+                                        std::shared_ptr<U>* object) {
+    EXPECT_OK(T::CreateFromString(config_options_, name, object));
+    EXPECT_NE(object->get(), nullptr);
+    EXPECT_TRUE(object->get()->IsInstanceOf(name));
+    return *object;
+  }
+
+  template <typename T>
+  std::shared_ptr<T> ExpectCreateShared(const std::string& name) {
+    std::shared_ptr<T> result;
+    return ExpectCreateShared<T>(name, &result);
+  }
+
+  template <typename T, typename U>
+  Status TestExpectedBuiltins(
+      const std::string& mock, const std::unordered_set<std::string>& expected,
+      std::shared_ptr<U>* object, std::vector<std::string>* failed,
+      const std::function<std::vector<std::string>(const std::string&)>& alt =
+          nullptr) {
+    std::unordered_set<std::string> factories = expected;
+    Status s = T::CreateFromString(config_options_, mock, object);
+    EXPECT_NOK(s);
+#ifndef ROCKSDB_LITE
+    std::vector<std::string> builtins;
+    ObjectLibrary::Default()->GetFactoryNames(T::Type(), &builtins);
+    factories.insert(builtins.begin(), builtins.end());
+#endif  // ROCKSDB_LITE
+    Status result;
+    int created = 0;
+    for (const auto& name : factories) {
+      created++;
+      s = T::CreateFromString(config_options_, name, object);
+      if (!s.ok() && alt != nullptr) {
+        for (const auto& alt_name : alt(name)) {
+          s = T::CreateFromString(config_options_, alt_name, object);
+          if (s.ok()) {
+            break;
+          }
+        }
+      }
+      if (!s.ok()) {
+        result = s;
+        failed->push_back(name);
+      } else {
+        EXPECT_NE(object->get(), nullptr);
+        EXPECT_TRUE(object->get()->IsInstanceOf(name));
+      }
+    }
+#ifndef ROCKSDB_LITE
+    std::vector<std::string> plugins;
+    ObjectRegistry::Default()->GetFactoryNames(T::Type(), &plugins);
+    if (plugins.size() > builtins.size()) {
+      for (const auto& name : plugins) {
+        if (factories.find(name) == factories.end()) {
+          created++;
+          s = T::CreateFromString(config_options_, name, object);
+          if (!s.ok() && alt != nullptr) {
+            for (const auto& alt_name : alt(name)) {
+              s = T::CreateFromString(config_options_, alt_name, object);
+              if (s.ok()) {
+                break;
+              }
+            }
+          }
+          if (!s.ok()) {
+            failed->push_back(name);
+            if (result.ok()) {
+              result = s;
+            }
+            printf("%s: Failed creating plugin[%s]: %s\n", T::Type(),
+                   name.c_str(), s.ToString().c_str());
+          } else if (object->get() == nullptr ||
+                     !object->get()->IsInstanceOf(name)) {
+            failed->push_back(name);
+            printf("%s: Invalid plugin[%s]\n", T::Type(), name.c_str());
+          }
+        }
+      }
+    }
+    printf("%s: Created %d (expected+builtins+plugins %d+%d+%d) %d Failed\n",
+           T::Type(), created, (int)expected.size(),
+           (int)(factories.size() - expected.size()),
+           (int)(plugins.size() - builtins.size()), (int)failed->size());
+#else
+    printf("%s: Created %d (expected %d) %d Failed\n", T::Type(), created,
+           (int)expected.size(), (int)failed->size());
+#endif  // ROCKSDB_LITE
+    return result;
+  }
+
+  template <typename T>
+  Status TestSharedBuiltins(const std::string& mock,
+                            const std::string& expected,
+                            std::vector<std::string>* failed = nullptr) {
+    std::unordered_set<std::string> values;
+    if (!expected.empty()) {
+      values.insert(expected);
+    }
+    std::shared_ptr<T> object;
+    if (failed != nullptr) {
+      return TestExpectedBuiltins<T>(mock, values, &object, failed);
+    } else {
+      std::vector<std::string> failures;
+      Status s = TestExpectedBuiltins<T>(mock, values, &object, &failures);
+      EXPECT_EQ(0U, failures.size());
+      return s;
+    }
+  }
+
+  template <typename T, typename U>
+  Status TestStaticBuiltins(const std::string& mock, U** object,
+                            const std::unordered_set<std::string>& expected,
+                            std::vector<std::string>* failed,
+                            bool delete_objects = false) {
+    std::unordered_set<std::string> factories = expected;
+    Status s = TestCreateStatic<T>(mock, object, delete_objects);
+    EXPECT_NOK(s);
+#ifndef ROCKSDB_LITE
+    std::vector<std::string> builtins;
+    ObjectLibrary::Default()->GetFactoryNames(T::Type(), &builtins);
+    factories.insert(builtins.begin(), builtins.end());
+#endif  // ROCKSDB_LITE
+    int created = 0;
+    Status result;
+    for (const auto& name : factories) {
+      created++;
+      s = TestCreateStatic<T>(name, object, delete_objects);
+      if (!s.ok()) {
+        result = s;
+        failed->push_back(name);
+      }
+    }
+#ifndef ROCKSDB_LITE
+    std::vector<std::string> plugins;
+    ObjectRegistry::Default()->GetFactoryNames(T::Type(), &plugins);
+    if (plugins.size() > builtins.size()) {
+      for (const auto& name : plugins) {
+        if (factories.find(name) == factories.end()) {
+          created++;
+          s = T::CreateFromString(config_options_, name, object);
+          if (!s.ok() || *object == nullptr ||
+              !((*object)->IsInstanceOf(name))) {
+            failed->push_back(name);
+            if (result.ok() && !s.ok()) {
+              result = s;
+            }
+            printf("%s: Failed creating plugin[%s]: %s\n", T::Type(),
+                   name.c_str(), s.ToString().c_str());
+          }
+          if (delete_objects) {
+            delete *object;
+            *object = nullptr;
+          }
+        }
+      }
+    }
+    printf("%s: Created %d (expected+builtins+plugins %d+%d+%d) %d Failed\n",
+           T::Type(), created, (int)expected.size(),
+           (int)(factories.size() - expected.size()),
+           (int)(plugins.size() - builtins.size()), (int)failed->size());
+#else
+    printf("%s: Created %d (expected %d) %d Failed\n", T::Type(), created,
+           (int)expected.size(), (int)failed->size());
+#endif  // ROCKSDB_LITE
+    return result;
+  }
+
  protected:
   DBOptions db_opts_;
   ColumnFamilyOptions cf_opts_;
@@ -1662,13 +1849,9 @@ class LoadCustomizableTest : public test
 };
 
 TEST_F(LoadCustomizableTest, LoadTableFactoryTest) {
-  std::shared_ptr<TableFactory> factory;
-  ASSERT_NOK(TableFactory::CreateFromString(
-      config_options_, mock::MockTableFactory::kClassName(), &factory));
-  ASSERT_OK(TableFactory::CreateFromString(
-      config_options_, TableFactory::kBlockBasedTableName(), &factory));
-  ASSERT_NE(factory, nullptr);
-  ASSERT_STREQ(factory->Name(), TableFactory::kBlockBasedTableName());
+  ASSERT_OK(
+      TestSharedBuiltins<TableFactory>(mock::MockTableFactory::kClassName(),
+                                       TableFactory::kBlockBasedTableName()));
 #ifndef ROCKSDB_LITE
   std::string opts_str = "table_factory=";
   ASSERT_OK(GetColumnFamilyOptionsFromString(
@@ -1679,10 +1862,7 @@ TEST_F(LoadCustomizableTest, LoadTableFa
                TableFactory::kBlockBasedTableName());
 #endif  // ROCKSDB_LITE
   if (RegisterTests("Test")) {
-    ASSERT_OK(TableFactory::CreateFromString(
-        config_options_, mock::MockTableFactory::kClassName(), &factory));
-    ASSERT_NE(factory, nullptr);
-    ASSERT_STREQ(factory->Name(), mock::MockTableFactory::kClassName());
+    ExpectCreateShared<TableFactory>(mock::MockTableFactory::kClassName());
 #ifndef ROCKSDB_LITE
     ASSERT_OK(GetColumnFamilyOptionsFromString(
         config_options_, cf_opts_,
@@ -1695,151 +1875,97 @@ TEST_F(LoadCustomizableTest, LoadTableFa
 }
 
 TEST_F(LoadCustomizableTest, LoadFileSystemTest) {
-  ColumnFamilyOptions cf_opts;
-  std::shared_ptr<FileSystem> result;
-  ASSERT_NOK(FileSystem::CreateFromString(
-      config_options_, DummyFileSystem::kClassName(), &result));
-  ASSERT_OK(FileSystem::CreateFromString(config_options_,
-                                         FileSystem::kDefaultName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_TRUE(result->IsInstanceOf(FileSystem::kDefaultName()));
+  ASSERT_OK(TestSharedBuiltins<FileSystem>(DummyFileSystem::kClassName(),
+                                           FileSystem::kDefaultName()));
   if (RegisterTests("Test")) {
-    ASSERT_OK(FileSystem::CreateFromString(
-        config_options_, DummyFileSystem::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), DummyFileSystem::kClassName());
-    ASSERT_FALSE(result->IsInstanceOf(FileSystem::kDefaultName()));
+    auto fs = ExpectCreateShared<FileSystem>(DummyFileSystem::kClassName());
+    ASSERT_FALSE(fs->IsInstanceOf(FileSystem::kDefaultName()));
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadSecondaryCacheTest) {
-  std::shared_ptr<SecondaryCache> result;
-  ASSERT_NOK(SecondaryCache::CreateFromString(
-      config_options_, TestSecondaryCache::kClassName(), &result));
+  ASSERT_OK(
+      TestSharedBuiltins<SecondaryCache>(TestSecondaryCache::kClassName(), ""));
   if (RegisterTests("Test")) {
-    ASSERT_OK(SecondaryCache::CreateFromString(
-        config_options_, TestSecondaryCache::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), TestSecondaryCache::kClassName());
+    ExpectCreateShared<SecondaryCache>(TestSecondaryCache::kClassName());
   }
 }
 
 #ifndef ROCKSDB_LITE
 TEST_F(LoadCustomizableTest, LoadSstPartitionerFactoryTest) {
-  std::shared_ptr<SstPartitionerFactory> factory;
-  ASSERT_NOK(SstPartitionerFactory::CreateFromString(config_options_, "Mock",
-                                                     &factory));
-  ASSERT_OK(SstPartitionerFactory::CreateFromString(
-      config_options_, SstPartitionerFixedPrefixFactory::kClassName(),
-      &factory));
-  ASSERT_NE(factory, nullptr);
-  ASSERT_STREQ(factory->Name(), SstPartitionerFixedPrefixFactory::kClassName());
-
+  ASSERT_OK(TestSharedBuiltins<SstPartitionerFactory>(
+      "Mock", SstPartitionerFixedPrefixFactory::kClassName()));
   if (RegisterTests("Test")) {
-    ASSERT_OK(SstPartitionerFactory::CreateFromString(config_options_, "Mock",
-                                                      &factory));
-    ASSERT_NE(factory, nullptr);
-    ASSERT_STREQ(factory->Name(), "Mock");
+    ExpectCreateShared<SstPartitionerFactory>("Mock");
   }
 }
 #endif  // ROCKSDB_LITE
 
 TEST_F(LoadCustomizableTest, LoadChecksumGenFactoryTest) {
-  std::shared_ptr<FileChecksumGenFactory> factory;
-  ASSERT_NOK(FileChecksumGenFactory::CreateFromString(config_options_, "Mock",
-                                                      &factory));
-  ASSERT_OK(FileChecksumGenFactory::CreateFromString(
-      config_options_, FileChecksumGenCrc32cFactory::kClassName(), &factory));
-  ASSERT_NE(factory, nullptr);
-  ASSERT_STREQ(factory->Name(), FileChecksumGenCrc32cFactory::kClassName());
-
+  ASSERT_OK(TestSharedBuiltins<FileChecksumGenFactory>("Mock", ""));
   if (RegisterTests("Test")) {
-    ASSERT_OK(FileChecksumGenFactory::CreateFromString(config_options_, "Mock",
-                                                       &factory));
-    ASSERT_NE(factory, nullptr);
-    ASSERT_STREQ(factory->Name(), "Mock");
+    ExpectCreateShared<FileChecksumGenFactory>("Mock");
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadTablePropertiesCollectorFactoryTest) {
-  std::shared_ptr<TablePropertiesCollectorFactory> factory;
-  ASSERT_NOK(TablePropertiesCollectorFactory::CreateFromString(
-      config_options_, MockTablePropertiesCollectorFactory::kClassName(),
-      &factory));
+  ASSERT_OK(TestSharedBuiltins<TablePropertiesCollectorFactory>(
+      MockTablePropertiesCollectorFactory::kClassName(), ""));
   if (RegisterTests("Test")) {
-    ASSERT_OK(TablePropertiesCollectorFactory::CreateFromString(
-        config_options_, MockTablePropertiesCollectorFactory::kClassName(),
-        &factory));
-    ASSERT_NE(factory, nullptr);
-    ASSERT_STREQ(factory->Name(),
-                 MockTablePropertiesCollectorFactory::kClassName());
+    ExpectCreateShared<TablePropertiesCollectorFactory>(
+        MockTablePropertiesCollectorFactory::kClassName());
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadComparatorTest) {
   const Comparator* bytewise = BytewiseComparator();
   const Comparator* reverse = ReverseBytewiseComparator();
-
   const Comparator* result = nullptr;
-  ASSERT_NOK(Comparator::CreateFromString(
-      config_options_, test::SimpleSuffixReverseComparator::kClassName(),
-      &result));
-  ASSERT_OK(
-      Comparator::CreateFromString(config_options_, bytewise->Name(), &result));
-  ASSERT_EQ(result, bytewise);
-  ASSERT_OK(
-      Comparator::CreateFromString(config_options_, reverse->Name(), &result));
-  ASSERT_EQ(result, reverse);
-
+  std::unordered_set<std::string> expected = {bytewise->Name(),
+                                              reverse->Name()};
+  std::vector<std::string> failures;
+  ASSERT_OK(TestStaticBuiltins<Comparator>(
+      test::SimpleSuffixReverseComparator::kClassName(), &result, expected,
+      &failures));
   if (RegisterTests("Test")) {
-    ASSERT_OK(Comparator::CreateFromString(
-        config_options_, test::SimpleSuffixReverseComparator::kClassName(),
-        &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(),
-                 test::SimpleSuffixReverseComparator::kClassName());
+    ASSERT_OK(TestCreateStatic<Comparator>(
+        test::SimpleSuffixReverseComparator::kClassName(), &result));
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadSliceTransformFactoryTest) {
   std::shared_ptr<const SliceTransform> result;
-  ASSERT_NOK(
-      SliceTransform::CreateFromString(config_options_, "Mock", &result));
-  ASSERT_OK(
-      SliceTransform::CreateFromString(config_options_, "fixed:16", &result));
-  ASSERT_NE(result.get(), nullptr);
-  ASSERT_TRUE(result->IsInstanceOf("fixed"));
+  std::vector<std::string> failures;
+  std::unordered_set<std::string> expected = {"rocksdb.Noop", "fixed",
+                                              "rocksdb.FixedPrefix", "capped",
+                                              "rocksdb.CappedPrefix"};
+  ASSERT_OK(TestExpectedBuiltins<SliceTransform>(
+      "Mock", expected, &result, &failures, [](const std::string& name) {
+        std::vector<std::string> names = {name + ":22", name + ".22"};
+        return names;
+      }));
   ASSERT_OK(SliceTransform::CreateFromString(
       config_options_, "rocksdb.FixedPrefix.22", &result));
   ASSERT_NE(result.get(), nullptr);
   ASSERT_TRUE(result->IsInstanceOf("fixed"));
-
-  ASSERT_OK(
-      SliceTransform::CreateFromString(config_options_, "capped:16", &result));
-  ASSERT_NE(result.get(), nullptr);
-  ASSERT_TRUE(result->IsInstanceOf("capped"));
-
   ASSERT_OK(SliceTransform::CreateFromString(
-      config_options_, "rocksdb.CappedPrefix.11", &result));
+      config_options_, "rocksdb.CappedPrefix.22", &result));
   ASSERT_NE(result.get(), nullptr);
   ASSERT_TRUE(result->IsInstanceOf("capped"));
-
   if (RegisterTests("Test")) {
-    ASSERT_OK(
-        SliceTransform::CreateFromString(config_options_, "Mock", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "Mock");
+    ExpectCreateShared<SliceTransform>("Mock", &result);
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadStatisticsTest) {
-  std::shared_ptr<Statistics> stats;
-  ASSERT_NOK(Statistics::CreateFromString(
-      config_options_, TestStatistics::kClassName(), &stats));
-  ASSERT_OK(
-      Statistics::CreateFromString(config_options_, "BasicStatistics", &stats));
-  ASSERT_NE(stats, nullptr);
-  ASSERT_EQ(stats->Name(), std::string("BasicStatistics"));
+  ASSERT_OK(TestSharedBuiltins<Statistics>(TestStatistics::kClassName(),
+                                           "BasicStatistics"));
+  // Empty will create a default BasicStatistics
+  ASSERT_OK(
+      Statistics::CreateFromString(config_options_, "", &db_opts_.statistics));
+  ASSERT_NE(db_opts_.statistics, nullptr);
+  ASSERT_STREQ(db_opts_.statistics->Name(), "BasicStatistics");
+
 #ifndef ROCKSDB_LITE
   ASSERT_NOK(GetDBOptionsFromString(config_options_, db_opts_,
                                     "statistics=Test", &db_opts_));
@@ -1849,10 +1975,7 @@ TEST_F(LoadCustomizableTest, LoadStatist
   ASSERT_STREQ(db_opts_.statistics->Name(), "BasicStatistics");
 
   if (RegisterTests("test")) {
-    ASSERT_OK(Statistics::CreateFromString(
-        config_options_, TestStatistics::kClassName(), &stats));
-    ASSERT_NE(stats, nullptr);
-    ASSERT_STREQ(stats->Name(), TestStatistics::kClassName());
+    auto stats = ExpectCreateShared<Statistics>(TestStatistics::kClassName());
 
     ASSERT_OK(GetDBOptionsFromString(config_options_, db_opts_,
                                      "statistics=Test", &db_opts_));
@@ -1883,167 +2006,90 @@ TEST_F(LoadCustomizableTest, LoadStatist
 }
 
 TEST_F(LoadCustomizableTest, LoadMemTableRepFactoryTest) {
-  std::unique_ptr<MemTableRepFactory> result;
-  ASSERT_NOK(MemTableRepFactory::CreateFromString(
-      config_options_, "SpecialSkipListFactory", &result));
-  ASSERT_OK(MemTableRepFactory::CreateFromString(
-      config_options_, SkipListFactory::kClassName(), &result));
-  ASSERT_NE(result.get(), nullptr);
-  ASSERT_TRUE(result->IsInstanceOf(SkipListFactory::kClassName()));
+  std::unordered_set<std::string> expected = {
+      SkipListFactory::kClassName(),
+      SkipListFactory::kNickName(),
+  };
 
+  std::vector<std::string> failures;
+  std::shared_ptr<MemTableRepFactory> factory;
+  Status s = TestExpectedBuiltins<MemTableRepFactory>(
+      "SpecialSkipListFactory", expected, &factory, &failures);
+  // There is a "cuckoo" factory registered that we expect to fail.  Ignore the
+  // error if this is the one
+  if (s.ok() || failures.size() > 1 || failures[0] != "cuckoo") {
+    ASSERT_OK(s);
+  }
   if (RegisterTests("Test")) {
-    ASSERT_OK(MemTableRepFactory::CreateFromString(
-        config_options_, "SpecialSkipListFactory", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "SpecialSkipListFactory");
+    ExpectCreateShared<MemTableRepFactory>("SpecialSkipListFactory");
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadMergeOperatorTest) {
   std::shared_ptr<MergeOperator> result;
-
-  ASSERT_NOK(
-      MergeOperator::CreateFromString(config_options_, "Changling", &result));
-  //**TODO: MJR: Use the constants when these names are in public classes
-  ASSERT_OK(MergeOperator::CreateFromString(config_options_, "put", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "PutOperator");
-  ASSERT_OK(
-      MergeOperator::CreateFromString(config_options_, "PutOperator", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "PutOperator");
-  ASSERT_OK(
-      MergeOperator::CreateFromString(config_options_, "put_v1", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "PutOperator");
-
-  ASSERT_OK(
-      MergeOperator::CreateFromString(config_options_, "uint64add", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "UInt64AddOperator");
-  ASSERT_OK(MergeOperator::CreateFromString(config_options_,
-                                            "UInt64AddOperator", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "UInt64AddOperator");
-
-  ASSERT_OK(MergeOperator::CreateFromString(config_options_, "max", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "MaxOperator");
-  ASSERT_OK(
-      MergeOperator::CreateFromString(config_options_, "MaxOperator", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "MaxOperator");
+  std::vector<std::string> failed;
+  std::unordered_set<std::string> expected = {
+      "put", "put_v1",      "PutOperator", "uint64add", "UInt64AddOperator",
+      "max", "MaxOperator",
+  };
 #ifndef ROCKSDB_LITE
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, StringAppendOperator::kNickName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), StringAppendOperator::kClassName());
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, StringAppendOperator::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), StringAppendOperator::kClassName());
-
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, StringAppendTESTOperator::kNickName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), StringAppendTESTOperator::kClassName());
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, StringAppendTESTOperator::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), StringAppendTESTOperator::kClassName());
-
-  ASSERT_OK(MergeOperator::CreateFromString(config_options_,
-                                            SortList::kNickName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), SortList::kClassName());
-  ASSERT_OK(MergeOperator::CreateFromString(config_options_,
-                                            SortList::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), SortList::kClassName());
-
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, BytesXOROperator::kNickName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), BytesXOROperator::kClassName());
-  ASSERT_OK(MergeOperator::CreateFromString(
-      config_options_, BytesXOROperator::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), BytesXOROperator::kClassName());
+  expected.insert({
+      StringAppendOperator::kClassName(),
+      StringAppendOperator::kNickName(),
+      StringAppendTESTOperator::kClassName(),
+      StringAppendTESTOperator::kNickName(),
+      SortList::kClassName(),
+      SortList::kNickName(),
+      BytesXOROperator::kClassName(),
+      BytesXOROperator::kNickName(),
+  });
 #endif  // ROCKSDB_LITE
-  ASSERT_NOK(
-      MergeOperator::CreateFromString(config_options_, "Changling", &result));
+
+  ASSERT_OK(TestExpectedBuiltins<MergeOperator>("Changling", expected, &result,
+                                                &failed));
   if (RegisterTests("Test")) {
-    ASSERT_OK(
-        MergeOperator::CreateFromString(config_options_, "Changling", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "ChanglingMergeOperator");
+    ExpectCreateShared<MergeOperator>("Changling");
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadCompactionFilterFactoryTest) {
-  std::shared_ptr<CompactionFilterFactory> result;
-
-  ASSERT_NOK(CompactionFilterFactory::CreateFromString(config_options_,
-                                                       "Changling", &result));
+  ASSERT_OK(TestSharedBuiltins<CompactionFilterFactory>("Changling", ""));
   if (RegisterTests("Test")) {
-    ASSERT_OK(CompactionFilterFactory::CreateFromString(config_options_,
-                                                        "Changling", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "ChanglingCompactionFilterFactory");
+    ExpectCreateShared<CompactionFilterFactory>("Changling");
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadCompactionFilterTest) {
   const CompactionFilter* result = nullptr;
-
-  ASSERT_NOK(CompactionFilter::CreateFromString(config_options_, "Changling",
-                                                &result));
-#ifndef ROCKSDB_LITE
-  ASSERT_OK(CompactionFilter::CreateFromString(
-      config_options_, RemoveEmptyValueCompactionFilter::kClassName(),
-      &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), RemoveEmptyValueCompactionFilter::kClassName());
-  delete result;
-  result = nullptr;
+  std::vector<std::string> failures;
+  ASSERT_OK(TestStaticBuiltins<CompactionFilter>("Changling", &result, {},
+                                                 &failures, true));
   if (RegisterTests("Test")) {
-    ASSERT_OK(CompactionFilter::CreateFromString(config_options_, "Changling",
-                                                 &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "ChanglingCompactionFilter");
-    delete result;
+    ASSERT_OK(TestCreateStatic<CompactionFilter>("Changling", &result, true));
   }
-#endif  // ROCKSDB_LITE
 }
 
 #ifndef ROCKSDB_LITE
 TEST_F(LoadCustomizableTest, LoadEventListenerTest) {
-  std::shared_ptr<EventListener> result;
-
-  ASSERT_NOK(EventListener::CreateFromString(
-      config_options_, OnFileDeletionListener::kClassName(), &result));
-  ASSERT_NOK(EventListener::CreateFromString(
-      config_options_, FlushCounterListener::kClassName(), &result));
+  ASSERT_OK(TestSharedBuiltins<EventListener>(
+      OnFileDeletionListener::kClassName(), ""));
   if (RegisterTests("Test")) {
-    ASSERT_OK(EventListener::CreateFromString(
-        config_options_, OnFileDeletionListener::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), OnFileDeletionListener::kClassName());
-    ASSERT_OK(EventListener::CreateFromString(
-        config_options_, FlushCounterListener::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), FlushCounterListener::kClassName());
+    ExpectCreateShared<EventListener>(OnFileDeletionListener::kClassName());
+    ExpectCreateShared<EventListener>(FlushCounterListener::kClassName());
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadEncryptionProviderTest) {
+  std::vector<std::string> failures;
   std::shared_ptr<EncryptionProvider> result;
-  ASSERT_NOK(
-      EncryptionProvider::CreateFromString(config_options_, "Mock", &result));
   ASSERT_OK(
-      EncryptionProvider::CreateFromString(config_options_, "CTR", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "CTR");
+      TestExpectedBuiltins<EncryptionProvider>("Mock", {}, &result, &failures));
+  if (!failures.empty()) {
+    ASSERT_EQ(failures[0], "1://test");
+    ASSERT_EQ(failures.size(), 1U);
+  }
+
+  result = ExpectCreateShared<EncryptionProvider>("CTR");
   ASSERT_NOK(result->ValidateOptions(db_opts_, cf_opts_));
   ASSERT_OK(EncryptionProvider::CreateFromString(config_options_, "CTR://test",
                                                  &result));
@@ -2052,10 +2098,7 @@ TEST_F(LoadCustomizableTest, LoadEncrypt
   ASSERT_OK(result->ValidateOptions(db_opts_, cf_opts_));
 
   if (RegisterTests("Test")) {
-    ASSERT_OK(
-        EncryptionProvider::CreateFromString(config_options_, "Mock", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "Mock");
+    ExpectCreateShared<EncryptionProvider>("Mock");
     ASSERT_OK(EncryptionProvider::CreateFromString(config_options_,
                                                    "Mock://test", &result));
     ASSERT_NE(result, nullptr);
@@ -2065,72 +2108,69 @@ TEST_F(LoadCustomizableTest, LoadEncrypt
 }
 
 TEST_F(LoadCustomizableTest, LoadEncryptionCipherTest) {
-  std::shared_ptr<BlockCipher> result;
-  ASSERT_NOK(BlockCipher::CreateFromString(config_options_, "Mock", &result));
-  ASSERT_OK(BlockCipher::CreateFromString(config_options_, "ROT13", &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), "ROT13");
+  ASSERT_OK(TestSharedBuiltins<BlockCipher>("Mock", "ROT13"));
   if (RegisterTests("Test")) {
-    ASSERT_OK(BlockCipher::CreateFromString(config_options_, "Mock", &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), "Mock");
+    ExpectCreateShared<BlockCipher>("Mock");
   }
 }
 #endif  // !ROCKSDB_LITE
 
 TEST_F(LoadCustomizableTest, LoadSystemClockTest) {
-  std::shared_ptr<SystemClock> result;
-  ASSERT_NOK(SystemClock::CreateFromString(
-      config_options_, MockSystemClock::kClassName(), &result));
-  ASSERT_OK(SystemClock::CreateFromString(
-      config_options_, SystemClock::kDefaultName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_TRUE(result->IsInstanceOf(SystemClock::kDefaultName()));
+  ASSERT_OK(TestSharedBuiltins<SystemClock>(MockSystemClock::kClassName(),
+                                            SystemClock::kDefaultName()));
   if (RegisterTests("Test")) {
-    ASSERT_OK(SystemClock::CreateFromString(
-        config_options_, MockSystemClock::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), MockSystemClock::kClassName());
+    auto result =
+        ExpectCreateShared<SystemClock>(MockSystemClock::kClassName());
+    ASSERT_FALSE(result->IsInstanceOf(SystemClock::kDefaultName()));
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadMemoryAllocatorTest) {
-  std::shared_ptr<MemoryAllocator> result;
-  ASSERT_NOK(MemoryAllocator::CreateFromString(
-      config_options_, MockMemoryAllocator::kClassName(), &result));
-  ASSERT_OK(MemoryAllocator::CreateFromString(
-      config_options_, DefaultMemoryAllocator::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), DefaultMemoryAllocator::kClassName());
+  std::vector<std::string> failures;
+  Status s = TestSharedBuiltins<MemoryAllocator>(
+      MockMemoryAllocator::kClassName(), DefaultMemoryAllocator::kClassName(),
+      &failures);
+  if (failures.empty()) {
+    ASSERT_OK(s);
+  } else {
+    ASSERT_NOK(s);
+    for (const auto& failure : failures) {
+      if (failure == JemallocNodumpAllocator::kClassName()) {
+        ASSERT_FALSE(JemallocNodumpAllocator::IsSupported());
+      } else if (failure == MemkindKmemAllocator::kClassName()) {
+        ASSERT_FALSE(MemkindKmemAllocator::IsSupported());
+      } else {
+        printf("BYPASSED: %s -- %s\n", failure.c_str(), s.ToString().c_str());
+      }
+    }
+  }
   if (RegisterTests("Test")) {
-    ASSERT_OK(MemoryAllocator::CreateFromString(
-        config_options_, MockMemoryAllocator::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), MockMemoryAllocator::kClassName());
+    ExpectCreateShared<MemoryAllocator>(MockMemoryAllocator::kClassName());
   }
 }
 
 TEST_F(LoadCustomizableTest, LoadRateLimiterTest) {
+#ifndef ROCKSDB_LITE
+  ASSERT_OK(TestSharedBuiltins<RateLimiter>(MockRateLimiter::kClassName(),
+                                            GenericRateLimiter::kClassName()));
+#else
+  ASSERT_OK(TestSharedBuiltins<RateLimiter>(MockRateLimiter::kClassName(), ""));
+#endif  // ROCKSDB_LITE
+
   std::shared_ptr<RateLimiter> result;
-  ASSERT_NOK(RateLimiter::CreateFromString(
-      config_options_, MockRateLimiter::kClassName(), &result));
   ASSERT_OK(RateLimiter::CreateFromString(
       config_options_, std::string(GenericRateLimiter::kClassName()) + ":1234",
       &result));
   ASSERT_NE(result, nullptr);
+  ASSERT_TRUE(result->IsInstanceOf(GenericRateLimiter::kClassName()));
 #ifndef ROCKSDB_LITE
-  ASSERT_OK(RateLimiter::CreateFromString(
-      config_options_, GenericRateLimiter::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
   ASSERT_OK(GetDBOptionsFromString(
       config_options_, db_opts_,
       std::string("rate_limiter=") + GenericRateLimiter::kClassName(),
       &db_opts_));
   ASSERT_NE(db_opts_.rate_limiter, nullptr);
   if (RegisterTests("Test")) {
-    ASSERT_OK(RateLimiter::CreateFromString(
-        config_options_, MockRateLimiter::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
+    ExpectCreateShared<RateLimiter>(MockRateLimiter::kClassName());
     ASSERT_OK(GetDBOptionsFromString(
         config_options_, db_opts_,
         std::string("rate_limiter=") + MockRateLimiter::kClassName(),
@@ -2141,17 +2181,52 @@ TEST_F(LoadCustomizableTest, LoadRateLim
 }
 
 TEST_F(LoadCustomizableTest, LoadFilterPolicyTest) {
-  std::shared_ptr<TableFactory> table;
+  const std::string kAutoBloom = BloomFilterPolicy::kClassName();
+  const std::string kAutoRibbon = RibbonFilterPolicy::kClassName();
+
   std::shared_ptr<const FilterPolicy> result;
-  ASSERT_NOK(FilterPolicy::CreateFromString(
-      config_options_, MockFilterPolicy::kClassName(), &result));
+  std::vector<std::string> failures;
+  std::unordered_set<std::string> expected = {
+      ReadOnlyBuiltinFilterPolicy::kClassName(),
+  };
 
-  ASSERT_OK(FilterPolicy::CreateFromString(config_options_, "", &result));
-  ASSERT_EQ(result, nullptr);
+#ifndef ROCKSDB_LITE
+  expected.insert({
+      kAutoBloom,
+      BloomFilterPolicy::kNickName(),
+      kAutoRibbon,
+      RibbonFilterPolicy::kNickName(),
+  });
+#endif  // ROCKSDB_LITE
+  ASSERT_OK(TestExpectedBuiltins<const FilterPolicy>(
+      "Mock", expected, &result, &failures, [](const std::string& name) {
+        std::vector<std::string> names = {name + ":1.234"};
+        return names;
+      }));
+#ifndef ROCKSDB_LITE
   ASSERT_OK(FilterPolicy::CreateFromString(
-      config_options_, ReadOnlyBuiltinFilterPolicy::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), ReadOnlyBuiltinFilterPolicy::kClassName());
+      config_options_, kAutoBloom + ":1.234:false", &result));
+  ASSERT_NE(result.get(), nullptr);
+  ASSERT_TRUE(result->IsInstanceOf(kAutoBloom));
+  ASSERT_OK(FilterPolicy::CreateFromString(
+      config_options_, kAutoBloom + ":1.234:false", &result));
+  ASSERT_NE(result.get(), nullptr);
+  ASSERT_TRUE(result->IsInstanceOf(kAutoBloom));
+  ASSERT_OK(FilterPolicy::CreateFromString(config_options_,
+                                           kAutoRibbon + ":1.234:-1", &result));
+  ASSERT_NE(result.get(), nullptr);
+  ASSERT_TRUE(result->IsInstanceOf(kAutoRibbon));
+  ASSERT_OK(FilterPolicy::CreateFromString(config_options_,
+                                           kAutoRibbon + ":1.234:56", &result));
+  ASSERT_NE(result.get(), nullptr);
+  ASSERT_TRUE(result->IsInstanceOf(kAutoRibbon));
+#endif  // ROCKSDB_LITE
+
+  if (RegisterTests("Test")) {
+    ExpectCreateShared<FilterPolicy>(MockFilterPolicy::kClassName(), &result);
+  }
+
+  std::shared_ptr<TableFactory> table;
 
 #ifndef ROCKSDB_LITE
   std::string table_opts = "id=BlockBasedTable; filter_policy=";
@@ -2173,42 +2248,30 @@ TEST_F(LoadCustomizableTest, LoadFilterP
       config_options_, table_opts + MockFilterPolicy::kClassName(), &table));
   bbto = table->GetOptions<BlockBasedTableOptions>();
   ASSERT_NE(bbto, nullptr);
-  ASSERT_EQ(bbto->filter_policy.get(), nullptr);
-  if (RegisterTests("Test")) {
-    ASSERT_OK(FilterPolicy::CreateFromString(
-        config_options_, MockFilterPolicy::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), MockFilterPolicy::kClassName());
-    ASSERT_OK(TableFactory::CreateFromString(
-        config_options_, table_opts + MockFilterPolicy::kClassName(), &table));
-    bbto = table->GetOptions<BlockBasedTableOptions>();
-    ASSERT_NE(bbto, nullptr);
-    ASSERT_NE(bbto->filter_policy.get(), nullptr);
-    ASSERT_STREQ(bbto->filter_policy->Name(), MockFilterPolicy::kClassName());
-  }
+  ASSERT_NE(bbto->filter_policy.get(), nullptr);
+  ASSERT_TRUE(
+      bbto->filter_policy->IsInstanceOf(MockFilterPolicy::kClassName()));
 #endif  // ROCKSDB_LITE
 }
 
 TEST_F(LoadCustomizableTest, LoadFlushBlockPolicyFactoryTest) {
-  std::shared_ptr<TableFactory> table;
   std::shared_ptr<FlushBlockPolicyFactory> result;
-  ASSERT_NOK(FlushBlockPolicyFactory::CreateFromString(
-      config_options_, TestFlushBlockPolicyFactory::kClassName(), &result));
+  std::shared_ptr<TableFactory> table;
+  std::vector<std::string> failed;
+  std::unordered_set<std::string> expected = {
+      FlushBlockBySizePolicyFactory::kClassName(),
+      FlushBlockEveryKeyPolicyFactory::kClassName(),
+  };
+
+  ASSERT_OK(TestExpectedBuiltins<FlushBlockPolicyFactory>(
+      TestFlushBlockPolicyFactory::kClassName(), expected, &result, &failed));
 
+  // An empty policy name creates a BySize policy
   ASSERT_OK(
       FlushBlockPolicyFactory::CreateFromString(config_options_, "", &result));
   ASSERT_NE(result, nullptr);
   ASSERT_STREQ(result->Name(), FlushBlockBySizePolicyFactory::kClassName());
 
-  ASSERT_OK(FlushBlockPolicyFactory::CreateFromString(
-      config_options_, FlushBlockEveryKeyPolicyFactory::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), FlushBlockEveryKeyPolicyFactory::kClassName());
-
-  ASSERT_OK(FlushBlockPolicyFactory::CreateFromString(
-      config_options_, FlushBlockBySizePolicyFactory::kClassName(), &result));
-  ASSERT_NE(result, nullptr);
-  ASSERT_STREQ(result->Name(), FlushBlockBySizePolicyFactory::kClassName());
 #ifndef ROCKSDB_LITE
   std::string table_opts = "id=BlockBasedTable; flush_block_policy_factory=";
   ASSERT_OK(TableFactory::CreateFromString(
@@ -2220,10 +2283,8 @@ TEST_F(LoadCustomizableTest, LoadFlushBl
   ASSERT_STREQ(bbto->flush_block_policy_factory->Name(),
                FlushBlockEveryKeyPolicyFactory::kClassName());
   if (RegisterTests("Test")) {
-    ASSERT_OK(FlushBlockPolicyFactory::CreateFromString(
-        config_options_, TestFlushBlockPolicyFactory::kClassName(), &result));
-    ASSERT_NE(result, nullptr);
-    ASSERT_STREQ(result->Name(), TestFlushBlockPolicyFactory::kClassName());
+    ExpectCreateShared<FlushBlockPolicyFactory>(
+        TestFlushBlockPolicyFactory::kClassName());
     ASSERT_OK(TableFactory::CreateFromString(
         config_options_, table_opts + TestFlushBlockPolicyFactory::kClassName(),
         &table));
diff -pruN 7.2.2-5/options/db_options.cc 7.3.1-2/options/db_options.cc
--- 7.2.2-5/options/db_options.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/db_options.cc	2022-06-08 21:08:16.000000000 +0000
@@ -228,6 +228,10 @@ static std::unordered_map<std::string, O
                    track_and_verify_wals_in_manifest),
           OptionType::kBoolean, OptionVerificationType::kNormal,
           OptionTypeFlags::kNone}},
+        {"verify_sst_unique_id_in_manifest",
+         {offsetof(struct ImmutableDBOptions, verify_sst_unique_id_in_manifest),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
         {"skip_log_error_on_recovery",
          {0, OptionType::kBoolean, OptionVerificationType::kDeprecated,
           OptionTypeFlags::kNone}},
@@ -439,22 +443,36 @@ static std::unordered_map<std::string, O
                 static_cast<int64_t>(ParseUint64(value))));
             return Status::OK();
           }}},
-        {"env",
-         {offsetof(struct ImmutableDBOptions, env), OptionType::kUnknown,
-          OptionVerificationType::kNormal,
-          (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever),
-          // Parse the input value as an Env
-          [](const ConfigOptions& opts, const std::string& /*name*/,
-             const std::string& value, void* addr) {
-            auto old_env = static_cast<Env**>(addr);       // Get the old value
-            Env* new_env = *old_env;                       // Set new to old
-            Status s = Env::CreateFromString(opts, value,
-                                             &new_env);    // Update new value
-            if (s.ok()) {                                  // It worked
-              *old_env = new_env;                          // Update the old one
-            }
-            return s;
-          }}},
+        {"env",  //**TODO: Should this be kCustomizable?
+         OptionTypeInfo(
+             offsetof(struct ImmutableDBOptions, env), OptionType::kUnknown,
+             OptionVerificationType::kNormal,
+             (OptionTypeFlags::kDontSerialize | OptionTypeFlags::kCompareNever))
+             .SetParseFunc([](const ConfigOptions& opts,
+                              const std::string& /*name*/,
+                              const std::string& value, void* addr) {
+               // Parse the input value as an Env
+               auto old_env = static_cast<Env**>(addr);  // Get the old value
+               Env* new_env = *old_env;                  // Set new to old
+               Status s = Env::CreateFromString(opts, value,
+                                                &new_env);  // Update new value
+               if (s.ok()) {                                // It worked
+                 *old_env = new_env;  // Update the old one
+               }
+               return s;
+             })
+             .SetPrepareFunc([](const ConfigOptions& opts,
+                                const std::string& /*name*/, void* addr) {
+               auto env = static_cast<Env**>(addr);
+               return (*env)->PrepareOptions(opts);
+             })
+             .SetValidateFunc([](const DBOptions& db_opts,
+                                 const ColumnFamilyOptions& cf_opts,
+                                 const std::string& /*name*/,
+                                 const void* addr) {
+               const auto env = static_cast<const Env* const*>(addr);
+               return (*env)->ValidateOptions(db_opts, cf_opts);
+             })},
         {"allow_data_in_errors",
          {offsetof(struct ImmutableDBOptions, allow_data_in_errors),
           OptionType::kBoolean, OptionVerificationType::kNormal,
@@ -538,6 +556,10 @@ static std::unordered_map<std::string, O
          OptionTypeInfo::Enum<CacheTier>(
              offsetof(struct ImmutableDBOptions, lowest_used_cache_tier),
              &cache_tier_string_map, OptionTypeFlags::kNone)},
+        {"enforce_single_del_contracts",
+         {offsetof(struct ImmutableDBOptions, enforce_single_del_contracts),
+          OptionType::kBoolean, OptionVerificationType::kNormal,
+          OptionTypeFlags::kNone}},
 };
 
 const std::string OptionsHelper::kDBOptionsName = "DBOptions";
@@ -662,6 +684,8 @@ ImmutableDBOptions::ImmutableDBOptions(c
       flush_verify_memtable_count(options.flush_verify_memtable_count),
       track_and_verify_wals_in_manifest(
           options.track_and_verify_wals_in_manifest),
+      verify_sst_unique_id_in_manifest(
+          options.verify_sst_unique_id_in_manifest),
       env(options.env),
       rate_limiter(options.rate_limiter),
       sst_file_manager(options.sst_file_manager),
@@ -736,7 +760,8 @@ ImmutableDBOptions::ImmutableDBOptions(c
       db_host_id(options.db_host_id),
       checksum_handoff_file_types(options.checksum_handoff_file_types),
       lowest_used_cache_tier(options.lowest_used_cache_tier),
-      compaction_service(options.compaction_service) {
+      compaction_service(options.compaction_service),
+      enforce_single_del_contracts(options.enforce_single_del_contracts) {
   fs = env->GetFileSystem();
   clock = env->GetSystemClock().get();
   logger = info_log.get();
@@ -756,6 +781,8 @@ void ImmutableDBOptions::Dump(Logger* lo
                    "                              "
                    "Options.track_and_verify_wals_in_manifest: %d",
                    track_and_verify_wals_in_manifest);
+  ROCKS_LOG_HEADER(log, "       Options.verify_sst_unique_id_in_manifest: %d",
+                   verify_sst_unique_id_in_manifest);
   ROCKS_LOG_HEADER(log, "                                    Options.env: %p",
                    env);
   ROCKS_LOG_HEADER(log, "                                     Options.fs: %s",
@@ -907,6 +934,8 @@ void ImmutableDBOptions::Dump(Logger* lo
                    allow_data_in_errors);
   ROCKS_LOG_HEADER(log, "            Options.db_host_id: %s",
                    db_host_id.c_str());
+  ROCKS_LOG_HEADER(log, "            Options.enforce_single_del_contracts: %s",
+                   enforce_single_del_contracts ? "true" : "false");
 }
 
 bool ImmutableDBOptions::IsWalDirSameAsDBPath() const {
diff -pruN 7.2.2-5/options/db_options.h 7.3.1-2/options/db_options.h
--- 7.2.2-5/options/db_options.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/db_options.h	2022-06-08 21:08:16.000000000 +0000
@@ -26,6 +26,7 @@ struct ImmutableDBOptions {
   bool paranoid_checks;
   bool flush_verify_memtable_count;
   bool track_and_verify_wals_in_manifest;
+  bool verify_sst_unique_id_in_manifest;
   Env* env;
   std::shared_ptr<RateLimiter> rate_limiter;
   std::shared_ptr<SstFileManager> sst_file_manager;
@@ -105,6 +106,7 @@ struct ImmutableDBOptions {
   Statistics* stats;
   Logger* logger;
   std::shared_ptr<CompactionService> compaction_service;
+  bool enforce_single_del_contracts;
 
   bool IsWalDirSameAsDBPath() const;
   bool IsWalDirSameAsDBPath(const std::string& path) const;
diff -pruN 7.2.2-5/options/options.cc 7.3.1-2/options/options.cc
--- 7.2.2-5/options/options.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/options.cc	2022-06-08 21:08:16.000000000 +0000
@@ -211,6 +211,10 @@ void ColumnFamilyOptions::Dump(Logger* l
         "        Options.bottommost_compression_opts.max_dict_buffer_bytes: "
         "%" PRIu64,
         bottommost_compression_opts.max_dict_buffer_bytes);
+    ROCKS_LOG_HEADER(
+        log,
+        "        Options.bottommost_compression_opts.use_zstd_dict_trainer: %s",
+        bottommost_compression_opts.use_zstd_dict_trainer ? "true" : "false");
     ROCKS_LOG_HEADER(log, "           Options.compression_opts.window_bits: %d",
                      compression_opts.window_bits);
     ROCKS_LOG_HEADER(log, "                 Options.compression_opts.level: %d",
@@ -225,6 +229,9 @@ void ColumnFamilyOptions::Dump(Logger* l
                      "        Options.compression_opts.zstd_max_train_bytes: "
                      "%" PRIu32,
                      compression_opts.zstd_max_train_bytes);
+    ROCKS_LOG_HEADER(
+        log, "        Options.compression_opts.use_zstd_dict_trainer: %s",
+        compression_opts.use_zstd_dict_trainer ? "true" : "false");
     ROCKS_LOG_HEADER(log,
                      "        Options.compression_opts.parallel_threads: "
                      "%" PRIu32,
diff -pruN 7.2.2-5/options/options_helper.cc 7.3.1-2/options/options_helper.cc
--- 7.2.2-5/options/options_helper.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/options_helper.cc	2022-06-08 21:08:16.000000000 +0000
@@ -70,6 +70,8 @@ DBOptions BuildDBOptions(const Immutable
       immutable_db_options.flush_verify_memtable_count;
   options.track_and_verify_wals_in_manifest =
       immutable_db_options.track_and_verify_wals_in_manifest;
+  options.verify_sst_unique_id_in_manifest =
+      immutable_db_options.verify_sst_unique_id_in_manifest;
   options.env = immutable_db_options.env;
   options.rate_limiter = immutable_db_options.rate_limiter;
   options.sst_file_manager = immutable_db_options.sst_file_manager;
@@ -182,6 +184,8 @@ DBOptions BuildDBOptions(const Immutable
   options.checksum_handoff_file_types =
       immutable_db_options.checksum_handoff_file_types;
   options.lowest_used_cache_tier = immutable_db_options.lowest_used_cache_tier;
+  options.enforce_single_del_contracts =
+      immutable_db_options.enforce_single_del_contracts;
   return options;
 }
 
@@ -463,43 +467,43 @@ bool SerializeSingleOptionHelper(const v
       *value = *(static_cast<const bool*>(opt_address)) ? "true" : "false";
       break;
     case OptionType::kInt:
-      *value = ToString(*(static_cast<const int*>(opt_address)));
+      *value = std::to_string(*(static_cast<const int*>(opt_address)));
       break;
     case OptionType::kInt32T:
-      *value = ToString(*(static_cast<const int32_t*>(opt_address)));
+      *value = std::to_string(*(static_cast<const int32_t*>(opt_address)));
       break;
     case OptionType::kInt64T:
       {
         int64_t v;
         GetUnaligned(static_cast<const int64_t*>(opt_address), &v);
-        *value = ToString(v);
+        *value = std::to_string(v);
       }
       break;
     case OptionType::kUInt:
-      *value = ToString(*(static_cast<const unsigned int*>(opt_address)));
+      *value = std::to_string(*(static_cast<const unsigned int*>(opt_address)));
       break;
     case OptionType::kUInt8T:
-      *value = ToString(*(static_cast<const uint8_t*>(opt_address)));
+      *value = std::to_string(*(static_cast<const uint8_t*>(opt_address)));
       break;
     case OptionType::kUInt32T:
-      *value = ToString(*(static_cast<const uint32_t*>(opt_address)));
+      *value = std::to_string(*(static_cast<const uint32_t*>(opt_address)));
       break;
     case OptionType::kUInt64T:
       {
         uint64_t v;
         GetUnaligned(static_cast<const uint64_t*>(opt_address), &v);
-        *value = ToString(v);
+        *value = std::to_string(v);
       }
       break;
     case OptionType::kSizeT:
       {
         size_t v;
         GetUnaligned(static_cast<const size_t*>(opt_address), &v);
-        *value = ToString(v);
+        *value = std::to_string(v);
       }
       break;
     case OptionType::kDouble:
-      *value = ToString(*(static_cast<const double*>(opt_address)));
+      *value = std::to_string(*(static_cast<const double*>(opt_address)));
       break;
     case OptionType::kString:
       *value =
@@ -898,18 +902,18 @@ Status OptionTypeInfo::Parse(const Confi
     return Status::OK();
   }
   try {
-    void* opt_addr = static_cast<char*>(opt_ptr) + offset_;
     const std::string& opt_value = config_options.input_strings_escaped
                                        ? UnescapeOptionString(value)
                                        : value;
 
-    if (opt_addr == nullptr) {
+    if (opt_ptr == nullptr) {
       return Status::NotFound("Could not find option", opt_name);
     } else if (parse_func_ != nullptr) {
       ConfigOptions copy = config_options;
       copy.invoke_prepare_options = false;
+      void* opt_addr = GetOffset(opt_ptr);
       return parse_func_(copy, opt_name, opt_value, opt_addr);
-    } else if (ParseOptionHelper(opt_addr, type_, opt_value)) {
+    } else if (ParseOptionHelper(GetOffset(opt_ptr), type_, opt_value)) {
       return Status::OK();
     } else if (IsConfigurable()) {
       // The option is <config>.<name>
@@ -1021,12 +1025,12 @@ Status OptionTypeInfo::Serialize(const C
                                  std::string* opt_value) const {
   // If the option is no longer used in rocksdb and marked as deprecated,
   // we skip it in the serialization.
-  const void* opt_addr = static_cast<const char*>(opt_ptr) + offset_;
-  if (opt_addr == nullptr || IsDeprecated()) {
+  if (opt_ptr == nullptr || IsDeprecated()) {
     return Status::OK();
   } else if (IsEnabled(OptionTypeFlags::kDontSerialize)) {
     return Status::NotSupported("Cannot serialize option: ", opt_name);
   } else if (serialize_func_ != nullptr) {
+    const void* opt_addr = GetOffset(opt_ptr);
     return serialize_func_(config_options, opt_name, opt_addr, opt_value);
   } else if (IsCustomizable()) {
     const Customizable* custom = AsRawPointer<Customizable>(opt_ptr);
@@ -1074,7 +1078,8 @@ Status OptionTypeInfo::Serialize(const C
     return Status::OK();
   } else if (config_options.mutable_options_only && !IsMutable()) {
     return Status::OK();
-  } else if (SerializeSingleOptionHelper(opt_addr, type_, opt_value)) {
+  } else if (SerializeSingleOptionHelper(GetOffset(opt_ptr), type_,
+                                         opt_value)) {
     return Status::OK();
   } else {
     return Status::InvalidArgument("Cannot serialize option: ", opt_name);
@@ -1223,39 +1228,43 @@ bool OptionTypeInfo::AreEqual(const Conf
   if (!config_options.IsCheckEnabled(level)) {
     return true;  // If the sanity level is not being checked, skip it
   }
-  const void* this_addr = static_cast<const char*>(this_ptr) + offset_;
-  const void* that_addr = static_cast<const char*>(that_ptr) + offset_;
-  if (this_addr == nullptr || that_addr == nullptr) {
-    if (this_addr == that_addr) {
+  if (this_ptr == nullptr || that_ptr == nullptr) {
+    if (this_ptr == that_ptr) {
       return true;
     }
   } else if (equals_func_ != nullptr) {
+    const void* this_addr = GetOffset(this_ptr);
+    const void* that_addr = GetOffset(that_ptr);
     if (equals_func_(config_options, opt_name, this_addr, that_addr,
                      mismatch)) {
       return true;
     }
-  } else if (AreOptionsEqual(type_, this_addr, that_addr)) {
-    return true;
-  } else if (IsConfigurable()) {
-    const auto* this_config = AsRawPointer<Configurable>(this_ptr);
-    const auto* that_config = AsRawPointer<Configurable>(that_ptr);
-    if (this_config == that_config) {
+  } else {
+    const void* this_addr = GetOffset(this_ptr);
+    const void* that_addr = GetOffset(that_ptr);
+    if (AreOptionsEqual(type_, this_addr, that_addr)) {
       return true;
-    } else if (this_config != nullptr && that_config != nullptr) {
-      std::string bad_name;
-      bool matches;
-      if (level < config_options.sanity_level) {
-        ConfigOptions copy = config_options;
-        copy.sanity_level = level;
-        matches = this_config->AreEquivalent(copy, that_config, &bad_name);
-      } else {
-        matches =
-            this_config->AreEquivalent(config_options, that_config, &bad_name);
-      }
-      if (!matches) {
-        *mismatch = opt_name + "." + bad_name;
+    } else if (IsConfigurable()) {
+      const auto* this_config = AsRawPointer<Configurable>(this_ptr);
+      const auto* that_config = AsRawPointer<Configurable>(that_ptr);
+      if (this_config == that_config) {
+        return true;
+      } else if (this_config != nullptr && that_config != nullptr) {
+        std::string bad_name;
+        bool matches;
+        if (level < config_options.sanity_level) {
+          ConfigOptions copy = config_options;
+          copy.sanity_level = level;
+          matches = this_config->AreEquivalent(copy, that_config, &bad_name);
+        } else {
+          matches = this_config->AreEquivalent(config_options, that_config,
+                                               &bad_name);
+        }
+        if (!matches) {
+          *mismatch = opt_name + "." + bad_name;
+        }
+        return matches;
       }
-      return matches;
     }
   }
   if (mismatch->empty()) {
@@ -1379,6 +1388,44 @@ bool OptionTypeInfo::AreEqualByName(cons
   return (this_value == that_value);
 }
 
+Status OptionTypeInfo::Prepare(const ConfigOptions& config_options,
+                               const std::string& name, void* opt_ptr) const {
+  if (ShouldPrepare()) {
+    if (prepare_func_ != nullptr) {
+      void* opt_addr = GetOffset(opt_ptr);
+      return prepare_func_(config_options, name, opt_addr);
+    } else if (IsConfigurable()) {
+      Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+      if (config != nullptr) {
+        return config->PrepareOptions(config_options);
+      } else if (!CanBeNull()) {
+        return Status::NotFound("Missing configurable object", name);
+      }
+    }
+  }
+  return Status::OK();
+}
+
+Status OptionTypeInfo::Validate(const DBOptions& db_opts,
+                                const ColumnFamilyOptions& cf_opts,
+                                const std::string& name,
+                                const void* opt_ptr) const {
+  if (ShouldValidate()) {
+    if (validate_func_ != nullptr) {
+      const void* opt_addr = GetOffset(opt_ptr);
+      return validate_func_(db_opts, cf_opts, name, opt_addr);
+    } else if (IsConfigurable()) {
+      const Configurable* config = AsRawPointer<Configurable>(opt_ptr);
+      if (config != nullptr) {
+        return config->ValidateOptions(db_opts, cf_opts);
+      } else if (!CanBeNull()) {
+        return Status::NotFound("Missing configurable object", name);
+      }
+    }
+  }
+  return Status::OK();
+}
+
 const OptionTypeInfo* OptionTypeInfo::Find(
     const std::string& opt_name,
     const std::unordered_map<std::string, OptionTypeInfo>& opt_map,
diff -pruN 7.2.2-5/options/options_parser.cc 7.3.1-2/options/options_parser.cc
--- 7.2.2-5/options/options_parser.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/options_parser.cc	2022-06-08 21:08:16.000000000 +0000
@@ -79,16 +79,16 @@ Status PersistRocksDBOptions(const Confi
 
   std::string options_file_content;
 
-  s = writable->Append(option_file_header + "[" +
-                       opt_section_titles[kOptionSectionVersion] +
-                       "]\n"
-                       "  rocksdb_version=" +
-                       ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR) +
-                       "." + ToString(ROCKSDB_PATCH) + "\n");
+  s = writable->Append(
+      option_file_header + "[" + opt_section_titles[kOptionSectionVersion] +
+      "]\n"
+      "  rocksdb_version=" +
+      std::to_string(ROCKSDB_MAJOR) + "." + std::to_string(ROCKSDB_MINOR) +
+      "." + std::to_string(ROCKSDB_PATCH) + "\n");
   if (s.ok()) {
     s = writable->Append(
-        "  options_file_version=" + ToString(ROCKSDB_OPTION_FILE_MAJOR) + "." +
-        ToString(ROCKSDB_OPTION_FILE_MINOR) + "\n");
+        "  options_file_version=" + std::to_string(ROCKSDB_OPTION_FILE_MAJOR) +
+        "." + std::to_string(ROCKSDB_OPTION_FILE_MINOR) + "\n");
   }
   if (s.ok()) {
     s = writable->Append("\n[" + opt_section_titles[kOptionSectionDBOptions] +
@@ -216,7 +216,7 @@ Status RocksDBOptionsParser::InvalidArgu
                                              const std::string& message) {
   return Status::InvalidArgument(
       "[RocksDBOptionsParser Error] ",
-      message + " (at line " + ToString(line_num) + ")");
+      message + " (at line " + std::to_string(line_num) + ")");
 }
 
 Status RocksDBOptionsParser::ParseStatement(std::string* name,
@@ -590,7 +590,7 @@ Status RocksDBOptionsParser::VerifyRocks
       return Status::InvalidArgument(
           "[RocksDBOptionParser Error] The persisted options and the db"
           "instance does not have the same name for column family ",
-          ToString(i));
+          std::to_string(i));
     }
   }
 
diff -pruN 7.2.2-5/options/options_settable_test.cc 7.3.1-2/options/options_settable_test.cc
--- 7.2.2-5/options/options_settable_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/options_settable_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -34,6 +34,7 @@ namespace ROCKSDB_NAMESPACE {
 #ifndef ROCKSDB_LITE
 #if defined OS_LINUX || defined OS_WIN
 #ifndef __clang__
+#ifndef ROCKSDB_UBSAN_RUN
 
 class OptionsSettableTest : public testing::Test {
  public:
@@ -116,7 +117,8 @@ bool CompareBytes(char* start_ptr1, char
 // kBbtoExcluded, and maybe add customized verification for it.
 TEST_F(OptionsSettableTest, BlockBasedTableOptionsAllFieldsSettable) {
   // Items in the form of <offset, size>. Need to be in ascending order
-  // and not overlapping. Need to updated if new pointer-option is added.
+  // and not overlapping. Need to update if new option to be excluded is added
+  // (e.g, pointer-type)
   const OffsetGap kBbtoExcluded = {
       {offsetof(struct BlockBasedTableOptions, flush_block_policy_factory),
        sizeof(std::shared_ptr<FlushBlockPolicyFactory>)},
@@ -126,6 +128,8 @@ TEST_F(OptionsSettableTest, BlockBasedTa
        sizeof(std::shared_ptr<PersistentCache>)},
       {offsetof(struct BlockBasedTableOptions, block_cache_compressed),
        sizeof(std::shared_ptr<Cache>)},
+      {offsetof(struct BlockBasedTableOptions, cache_usage_options),
+       sizeof(CacheUsageOptions)},
       {offsetof(struct BlockBasedTableOptions, filter_policy),
        sizeof(std::shared_ptr<const FilterPolicy>)},
   };
@@ -188,8 +192,6 @@ TEST_F(OptionsSettableTest, BlockBasedTa
       "index_block_restart_interval=4;"
       "filter_policy=bloomfilter:4:true;whole_key_filtering=1;detect_filter_"
       "construct_corruption=false;"
-      "reserve_table_builder_memory=false;"
-      "reserve_table_reader_memory=false;"
       "format_version=1;"
       "verify_compression=true;read_amp_bytes_per_bit=0;"
       "enable_index_compression=false;"
@@ -302,6 +304,7 @@ TEST_F(OptionsSettableTest, DBOptionsAll
                              "paranoid_checks=true;"
                              "flush_verify_memtable_count=true;"
                              "track_and_verify_wals_in_manifest=true;"
+                             "verify_sst_unique_id_in_manifest=true;"
                              "is_fd_close_on_exec=false;"
                              "bytes_per_sync=4295013613;"
                              "strict_bytes_per_sync=true;"
@@ -352,10 +355,11 @@ TEST_F(OptionsSettableTest, DBOptionsAll
                              "write_dbid_to_manifest=false;"
                              "best_efforts_recovery=false;"
                              "max_bgerror_resume_count=2;"
-                             "bgerror_resume_retry_interval=1000000"
+                             "bgerror_resume_retry_interval=1000000;"
                              "db_host_id=hostname;"
                              "lowest_used_cache_tier=kNonVolatileBlockTier;"
-                             "allow_data_in_errors=false",
+                             "allow_data_in_errors=false;"
+                             "enforce_single_del_contracts=false;",
                              new_options));
 
   ASSERT_EQ(unset_bytes_base, NumUnsetBytes(new_options_ptr, sizeof(DBOptions),
@@ -478,8 +482,8 @@ TEST_F(OptionsSettableTest, ColumnFamily
       "max_bytes_for_level_multiplier=60;"
       "memtable_factory=SkipListFactory;"
       "compression=kNoCompression;"
-      "compression_opts=5:6:7:8:9:10:true:11;"
-      "bottommost_compression_opts=4:5:6:7:8:9:true:10;"
+      "compression_opts=5:6:7:8:9:10:true:11:false;"
+      "bottommost_compression_opts=4:5:6:7:8:9:true:10:true;"
       "bottommost_compression=kDisableCompressionOption;"
       "level0_stop_writes_trigger=33;"
       "num_levels=99;"
@@ -581,6 +585,7 @@ TEST_F(OptionsSettableTest, ColumnFamily
   delete[] mcfo2_ptr;
   delete[] cfo_clean_ptr;
 }
+#endif  // !ROCKSDB_UBSAN_RUN
 #endif  // !__clang__
 #endif  // OS_LINUX || OS_WIN
 #endif  // !ROCKSDB_LITE
diff -pruN 7.2.2-5/options/options_test.cc 7.3.1-2/options/options_test.cc
--- 7.2.2-5/options/options_test.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/options/options_test.cc	2022-06-08 21:08:16.000000000 +0000
@@ -68,7 +68,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTes
        "kZSTDNotFinalCompression"},
       {"bottommost_compression", "kLZ4Compression"},
       {"bottommost_compression_opts", "5:6:7:8:10:true"},
-      {"compression_opts", "4:5:6:7:8:true"},
+      {"compression_opts", "4:5:6:7:8:2:true:100:false"},
       {"num_levels", "8"},
       {"level0_file_num_compaction_trigger", "8"},
       {"level0_slowdown_writes_trigger", "9"},
@@ -118,6 +118,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTes
       {"error_if_exists", "false"},
       {"paranoid_checks", "true"},
       {"track_and_verify_wals_in_manifest", "true"},
+      {"verify_sst_unique_id_in_manifest", "true"},
       {"max_open_files", "32"},
       {"max_total_wal_size", "33"},
       {"use_fsync", "true"},
@@ -190,9 +191,10 @@ TEST_F(OptionsTest, GetOptionsFromMapTes
   ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
   ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
   ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
-  ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads,
-            CompressionOptions().parallel_threads);
+  ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 2u);
   ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
+  ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 100u);
+  ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
   ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
@@ -202,6 +204,8 @@ TEST_F(OptionsTest, GetOptionsFromMapTes
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
             CompressionOptions().parallel_threads);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
+  ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            CompressionOptions().use_zstd_dict_trainer);
   ASSERT_EQ(new_cf_opt.num_levels, 8);
   ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
   ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
@@ -278,6 +282,7 @@ TEST_F(OptionsTest, GetOptionsFromMapTes
   ASSERT_EQ(new_db_opt.error_if_exists, false);
   ASSERT_EQ(new_db_opt.paranoid_checks, true);
   ASSERT_EQ(new_db_opt.track_and_verify_wals_in_manifest, true);
+  ASSERT_EQ(new_db_opt.verify_sst_unique_id_in_manifest, true);
   ASSERT_EQ(new_db_opt.max_open_files, 32);
   ASSERT_EQ(new_db_opt.max_total_wal_size, static_cast<uint64_t>(33));
   ASSERT_EQ(new_db_opt.use_fsync, true);
@@ -602,6 +607,8 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads,
             dflt.parallel_threads);
   ASSERT_EQ(base_cf_opt.compression_opts.enabled, dflt.enabled);
+  ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer,
+            dflt.use_zstd_dict_trainer);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 4);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 5);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 6);
@@ -611,10 +618,12 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
             dflt.parallel_threads);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, dflt.enabled);
+  ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            dflt.use_zstd_dict_trainer);
 
   ASSERT_OK(GetColumnFamilyOptionsFromString(
       config_options, ColumnFamilyOptions(),
-      "compression_opts=4:5:6:7:8:9:true; "
+      "compression_opts=4:5:6:7:8:9:true:10:false; "
       "bottommost_compression_opts=5:6:7:8:9:false",
       &base_cf_opt));
   ASSERT_EQ(base_cf_opt.compression_opts.window_bits, 4);
@@ -624,6 +633,8 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(base_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
   ASSERT_EQ(base_cf_opt.compression_opts.parallel_threads, 9u);
   ASSERT_EQ(base_cf_opt.compression_opts.enabled, true);
+  ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
+  ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.level, 6);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.strategy, 7);
@@ -632,6 +643,8 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.parallel_threads,
             dflt.parallel_threads);
   ASSERT_EQ(base_cf_opt.bottommost_compression_opts.enabled, false);
+  ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            dflt.use_zstd_dict_trainer);
 
   ASSERT_OK(
       GetStringFromColumnFamilyOptions(config_options, base_cf_opt, &opts_str));
@@ -644,6 +657,8 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
   ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u);
   ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
+  ASSERT_EQ(base_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
+  ASSERT_EQ(base_cf_opt.compression_opts.use_zstd_dict_trainer, false);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 7);
@@ -652,15 +667,18 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
             dflt.parallel_threads);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
+  ASSERT_EQ(base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            dflt.use_zstd_dict_trainer);
 
   // Test as struct values
   ASSERT_OK(GetColumnFamilyOptionsFromString(
       config_options, ColumnFamilyOptions(),
       "compression_opts={window_bits=5; level=6; strategy=7; max_dict_bytes=8;"
-      "zstd_max_train_bytes=9;parallel_threads=10;enabled=true}; "
+      "zstd_max_train_bytes=9;parallel_threads=10;enabled=true;use_zstd_dict_"
+      "trainer=false}; "
       "bottommost_compression_opts={window_bits=4; level=5; strategy=6;"
       " max_dict_bytes=7;zstd_max_train_bytes=8;parallel_threads=9;"
-      "enabled=false}; ",
+      "enabled=false;use_zstd_dict_trainer=true}; ",
       &new_cf_opt));
   ASSERT_EQ(new_cf_opt.compression_opts.window_bits, 5);
   ASSERT_EQ(new_cf_opt.compression_opts.level, 6);
@@ -669,6 +687,7 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 9u);
   ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 10u);
   ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
+  ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 4);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 5);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.strategy, 6);
@@ -676,6 +695,7 @@ TEST_F(OptionsTest, CompressionOptionsFr
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.zstd_max_train_bytes, 8u);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads, 9u);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, false);
+  ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer, true);
 
   ASSERT_OK(GetColumnFamilyOptionsFromString(
       config_options, base_cf_opt,
@@ -707,6 +727,8 @@ TEST_F(OptionsTest, CompressionOptionsFr
             base_cf_opt.bottommost_compression_opts.parallel_threads);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled,
             base_cf_opt.bottommost_compression_opts.enabled);
+  ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            base_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer);
 
   // Test a few individual struct values
   ASSERT_OK(GetColumnFamilyOptionsFromString(
@@ -799,6 +821,7 @@ TEST_F(OptionsTest, OldInterfaceTest) {
       {"error_if_exists", "false"},
       {"paranoid_checks", "true"},
       {"track_and_verify_wals_in_manifest", "true"},
+      {"verify_sst_unique_id_in_manifest", "true"},
       {"max_open_files", "32"},
   };
   ASSERT_OK(GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt));
@@ -807,6 +830,7 @@ TEST_F(OptionsTest, OldInterfaceTest) {
   ASSERT_EQ(new_db_opt.error_if_exists, false);
   ASSERT_EQ(new_db_opt.paranoid_checks, true);
   ASSERT_EQ(new_db_opt.track_and_verify_wals_in_manifest, true);
+  ASSERT_EQ(new_db_opt.verify_sst_unique_id_in_manifest, true);
   ASSERT_EQ(new_db_opt.max_open_files, 32);
   db_options_map["unknown_option"] = "1";
   Status s = GetDBOptionsFromMap(base_db_opt, db_options_map, &new_db_opt);
@@ -854,8 +878,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOp
       "block_cache=1M;block_cache_compressed=1k;block_size=1024;"
       "block_size_deviation=8;block_restart_interval=4;"
       "format_version=5;whole_key_filtering=1;"
-      "reserve_table_builder_memory=true;"
-      "reserve_table_reader_memory=true;"
       "filter_policy=bloomfilter:4.567:false;detect_filter_construct_"
       "corruption=true;"
       // A bug caused read_amp_bytes_per_bit to be a large integer in OPTIONS
@@ -877,8 +899,6 @@ TEST_F(OptionsTest, GetBlockBasedTableOp
   ASSERT_EQ(new_opt.format_version, 5U);
   ASSERT_EQ(new_opt.whole_key_filtering, true);
   ASSERT_EQ(new_opt.detect_filter_construct_corruption, true);
-  ASSERT_EQ(new_opt.reserve_table_builder_memory, true);
-  ASSERT_EQ(new_opt.reserve_table_reader_memory, true);
   ASSERT_TRUE(new_opt.filter_policy != nullptr);
   auto bfp = new_opt.filter_policy->CheckedCast<BloomFilterPolicy>();
   ASSERT_NE(bfp, nullptr);
@@ -1351,6 +1371,7 @@ TEST_F(OptionsTest, GetOptionsFromString
   ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
   ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
   ASSERT_EQ(new_options.compression_opts.enabled, false);
+  ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true);
   ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
   ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
@@ -1359,6 +1380,8 @@ TEST_F(OptionsTest, GetOptionsFromString
   ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
   ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
   ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
+  ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer,
+            true);
   ASSERT_EQ(new_options.write_buffer_size, 10U);
   ASSERT_EQ(new_options.max_write_buffer_number, 16);
   const auto new_bbto =
@@ -2263,7 +2286,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
        "kZSTDNotFinalCompression"},
       {"bottommost_compression", "kLZ4Compression"},
       {"bottommost_compression_opts", "5:6:7:8:9:true"},
-      {"compression_opts", "4:5:6:7:8:true"},
+      {"compression_opts", "4:5:6:7:8:9:true:10:false"},
       {"num_levels", "8"},
       {"level0_file_num_compaction_trigger", "8"},
       {"level0_slowdown_writes_trigger", "9"},
@@ -2316,6 +2339,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
       {"error_if_exists", "false"},
       {"paranoid_checks", "true"},
       {"track_and_verify_wals_in_manifest", "true"},
+      {"verify_sst_unique_id_in_manifest", "true"},
       {"max_open_files", "32"},
       {"max_total_wal_size", "33"},
       {"use_fsync", "true"},
@@ -2380,9 +2404,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
   ASSERT_EQ(new_cf_opt.compression_opts.strategy, 6);
   ASSERT_EQ(new_cf_opt.compression_opts.max_dict_bytes, 7u);
   ASSERT_EQ(new_cf_opt.compression_opts.zstd_max_train_bytes, 8u);
-  ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads,
-            CompressionOptions().parallel_threads);
+  ASSERT_EQ(new_cf_opt.compression_opts.parallel_threads, 9u);
   ASSERT_EQ(new_cf_opt.compression_opts.enabled, true);
+  ASSERT_EQ(new_cf_opt.compression_opts.max_dict_buffer_bytes, 10u);
+  ASSERT_EQ(new_cf_opt.compression_opts.use_zstd_dict_trainer, false);
   ASSERT_EQ(new_cf_opt.bottommost_compression, kLZ4Compression);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.level, 6);
@@ -2392,6 +2417,10 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.parallel_threads,
             CompressionOptions().parallel_threads);
   ASSERT_EQ(new_cf_opt.bottommost_compression_opts.enabled, true);
+  ASSERT_EQ(new_cf_opt.bottommost_compression_opts.max_dict_buffer_bytes,
+            CompressionOptions().max_dict_buffer_bytes);
+  ASSERT_EQ(new_cf_opt.bottommost_compression_opts.use_zstd_dict_trainer,
+            CompressionOptions().use_zstd_dict_trainer);
   ASSERT_EQ(new_cf_opt.num_levels, 8);
   ASSERT_EQ(new_cf_opt.level0_file_num_compaction_trigger, 8);
   ASSERT_EQ(new_cf_opt.level0_slowdown_writes_trigger, 9);
@@ -3054,6 +3083,7 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
   ASSERT_EQ(new_options.compression_opts.zstd_max_train_bytes, 0u);
   ASSERT_EQ(new_options.compression_opts.parallel_threads, 1u);
   ASSERT_EQ(new_options.compression_opts.enabled, false);
+  ASSERT_EQ(new_options.compression_opts.use_zstd_dict_trainer, true);
   ASSERT_EQ(new_options.bottommost_compression, kDisableCompressionOption);
   ASSERT_EQ(new_options.bottommost_compression_opts.window_bits, 5);
   ASSERT_EQ(new_options.bottommost_compression_opts.level, 6);
@@ -3062,6 +3092,8 @@ TEST_F(OptionsOldApiTest, GetOptionsFrom
   ASSERT_EQ(new_options.bottommost_compression_opts.zstd_max_train_bytes, 0u);
   ASSERT_EQ(new_options.bottommost_compression_opts.parallel_threads, 1u);
   ASSERT_EQ(new_options.bottommost_compression_opts.enabled, false);
+  ASSERT_EQ(new_options.bottommost_compression_opts.use_zstd_dict_trainer,
+            true);
   ASSERT_EQ(new_options.write_buffer_size, 10U);
   ASSERT_EQ(new_options.max_write_buffer_number, 16);
 
@@ -3346,31 +3378,31 @@ TEST_F(OptionsParserTest, IgnoreUnknownO
     if (case_id == 0) {
       // same version
       should_ignore = false;
-      version_string =
-          ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR) + ".0";
+      version_string = std::to_string(ROCKSDB_MAJOR) + "." +
+                       std::to_string(ROCKSDB_MINOR) + ".0";
     } else if (case_id == 1) {
       // higher minor version
       should_ignore = true;
-      version_string =
-          ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR + 1) + ".0";
+      version_string = std::to_string(ROCKSDB_MAJOR) + "." +
+                       std::to_string(ROCKSDB_MINOR + 1) + ".0";
     } else if (case_id == 2) {
       // higher major version.
       should_ignore = true;
-      version_string = ToString(ROCKSDB_MAJOR + 1) + ".0.0";
+      version_string = std::to_string(ROCKSDB_MAJOR + 1) + ".0.0";
     } else if (case_id == 3) {
       // lower minor version
 #if ROCKSDB_MINOR == 0
       continue;
 #else
-      version_string =
-          ToString(ROCKSDB_MAJOR) + "." + ToString(ROCKSDB_MINOR - 1) + ".0";
+      version_string = std::to_string(ROCKSDB_MAJOR) + "." +
+                       std::to_string(ROCKSDB_MINOR - 1) + ".0";
       should_ignore = false;
 #endif
     } else {
       // lower major version
       should_ignore = false;
-      version_string =
-          ToString(ROCKSDB_MAJOR - 1) + "." + ToString(ROCKSDB_MINOR) + ".0";
+      version_string = std::to_string(ROCKSDB_MAJOR - 1) + "." +
+                       std::to_string(ROCKSDB_MINOR) + ".0";
     }
 
     std::string options_file_content =
@@ -4082,9 +4114,10 @@ TEST_F(OptionsParserTest, IntegerParsing
   ASSERT_EQ(ParseUint32("4294967295"), 4294967295U);
   ASSERT_EQ(ParseSizeT("18446744073709551615"), 18446744073709551615U);
   ASSERT_EQ(ParseInt64("9223372036854775807"), 9223372036854775807);
-  ASSERT_EQ(ParseInt64("-9223372036854775808"), port::kMinInt64);
+  ASSERT_EQ(ParseInt64("-9223372036854775808"),
+            std::numeric_limits<int64_t>::min());
   ASSERT_EQ(ParseInt32("2147483647"), 2147483647);
-  ASSERT_EQ(ParseInt32("-2147483648"), port::kMinInt32);
+  ASSERT_EQ(ParseInt32("-2147483648"), std::numeric_limits<int32_t>::min());
   ASSERT_EQ(ParseInt("-32767"), -32767);
   ASSERT_EQ(ParseDouble("-1.234567"), -1.234567);
 }
@@ -4265,19 +4298,20 @@ TEST_F(OptionTypeInfoTest, TestInvalidAr
 }
 
 TEST_F(OptionTypeInfoTest, TestParseFunc) {
-  OptionTypeInfo opt_info(
-      0, OptionType::kUnknown, OptionVerificationType::kNormal,
-      OptionTypeFlags::kNone,
-      [](const ConfigOptions& /*opts*/, const std::string& name,
-         const std::string& value, void* addr) {
-        auto ptr = static_cast<std::string*>(addr);
-        if (name == "Oops") {
-          return Status::InvalidArgument(value);
-        } else {
-          *ptr = value + " " + name;
-          return Status::OK();
-        }
-      });
+  OptionTypeInfo opt_info(0, OptionType::kUnknown,
+                          OptionVerificationType::kNormal,
+                          OptionTypeFlags::kNone);
+  opt_info.SetParseFunc([](const ConfigOptions& /*opts*/,
+                           const std::string& name, const std::string& value,
+                           void* addr) {
+    auto ptr = static_cast<std::string*>(addr);
+    if (name == "Oops") {
+      return Status::InvalidArgument(value);
+    } else {
+      *ptr = value + " " + name;
+      return Status::OK();
+    }
+  });
   ConfigOptions config_options;
   std::string base;
   ASSERT_OK(opt_info.Parse(config_options, "World", "Hello", &base));
@@ -4286,19 +4320,19 @@ TEST_F(OptionTypeInfoTest, TestParseFunc
 }
 
 TEST_F(OptionTypeInfoTest, TestSerializeFunc) {
-  OptionTypeInfo opt_info(
-      0, OptionType::kString, OptionVerificationType::kNormal,
-      OptionTypeFlags::kNone, nullptr,
-      [](const ConfigOptions& /*opts*/, const std::string& name,
-         const void* /*addr*/, std::string* value) {
-        if (name == "Oops") {
-          return Status::InvalidArgument(name);
-        } else {
-          *value = name;
-          return Status::OK();
-        }
-      },
-      nullptr);
+  OptionTypeInfo opt_info(0, OptionType::kString,
+                          OptionVerificationType::kNormal,
+                          OptionTypeFlags::kNone);
+  opt_info.SetSerializeFunc([](const ConfigOptions& /*opts*/,
+                               const std::string& name, const void* /*addr*/,
+                               std::string* value) {
+    if (name == "Oops") {
+      return Status::InvalidArgument(name);
+    } else {
+      *value = name;
+      return Status::OK();
+    }
+  });
   ConfigOptions config_options;
   std::string base;
   std::string value;
@@ -4308,24 +4342,24 @@ TEST_F(OptionTypeInfoTest, TestSerialize
 }
 
 TEST_F(OptionTypeInfoTest, TestEqualsFunc) {
-  OptionTypeInfo opt_info(
-      0, OptionType::kInt, OptionVerificationType::kNormal,
-      OptionTypeFlags::kNone, nullptr, nullptr,
-      [](const ConfigOptions& /*opts*/, const std::string& name,
-         const void* addr1, const void* addr2, std::string* mismatch) {
-        auto i1 = *(static_cast<const int*>(addr1));
-        auto i2 = *(static_cast<const int*>(addr2));
-        if (name == "LT") {
-          return i1 < i2;
-        } else if (name == "GT") {
-          return i1 > i2;
-        } else if (name == "EQ") {
-          return i1 == i2;
-        } else {
-          *mismatch = name + "???";
-          return false;
-        }
-      });
+  OptionTypeInfo opt_info(0, OptionType::kInt, OptionVerificationType::kNormal,
+                          OptionTypeFlags::kNone);
+  opt_info.SetEqualsFunc([](const ConfigOptions& /*opts*/,
+                            const std::string& name, const void* addr1,
+                            const void* addr2, std::string* mismatch) {
+    auto i1 = *(static_cast<const int*>(addr1));
+    auto i2 = *(static_cast<const int*>(addr2));
+    if (name == "LT") {
+      return i1 < i2;
+    } else if (name == "GT") {
+      return i1 > i2;
+    } else if (name == "EQ") {
+      return i1 == i2;
+    } else {
+      *mismatch = name + "???";
+      return false;
+    }
+  });
 
   ConfigOptions config_options;
   int int1 = 100;
@@ -4341,6 +4375,64 @@ TEST_F(OptionTypeInfoTest, TestEqualsFun
   ASSERT_EQ(mismatch, "NO???");
 }
 
+TEST_F(OptionTypeInfoTest, TestPrepareFunc) {
+  OptionTypeInfo opt_info(0, OptionType::kInt, OptionVerificationType::kNormal,
+                          OptionTypeFlags::kNone);
+  opt_info.SetPrepareFunc(
+      [](const ConfigOptions& /*opts*/, const std::string& name, void* addr) {
+        auto i1 = static_cast<int*>(addr);
+        if (name == "x2") {
+          *i1 *= 2;
+        } else if (name == "/2") {
+          *i1 /= 2;
+        } else {
+          return Status::InvalidArgument("Bad Argument", name);
+        }
+        return Status::OK();
+      });
+  ConfigOptions config_options;
+  int int1 = 100;
+  ASSERT_OK(opt_info.Prepare(config_options, "x2", &int1));
+  ASSERT_EQ(int1, 200);
+  ASSERT_OK(opt_info.Prepare(config_options, "/2", &int1));
+  ASSERT_EQ(int1, 100);
+  ASSERT_NOK(opt_info.Prepare(config_options, "??", &int1));
+  ASSERT_EQ(int1, 100);
+}
+TEST_F(OptionTypeInfoTest, TestValidateFunc) {
+  OptionTypeInfo opt_info(0, OptionType::kSizeT,
+                          OptionVerificationType::kNormal,
+                          OptionTypeFlags::kNone);
+  opt_info.SetValidateFunc([](const DBOptions& db_opts,
+                              const ColumnFamilyOptions& cf_opts,
+                              const std::string& name, const void* addr) {
+    const auto sz = static_cast<const size_t*>(addr);
+    bool is_valid = false;
+    if (name == "keep_log_file_num") {
+      is_valid = (*sz == db_opts.keep_log_file_num);
+    } else if (name == "write_buffer_size") {
+      is_valid = (*sz == cf_opts.write_buffer_size);
+    }
+    if (is_valid) {
+      return Status::OK();
+    } else {
+      return Status::InvalidArgument("Mismatched value", name);
+    }
+  });
+  ConfigOptions config_options;
+  DBOptions db_options;
+  ColumnFamilyOptions cf_options;
+
+  ASSERT_OK(opt_info.Validate(db_options, cf_options, "keep_log_file_num",
+                              &db_options.keep_log_file_num));
+  ASSERT_OK(opt_info.Validate(db_options, cf_options, "write_buffer_size",
+                              &cf_options.write_buffer_size));
+  ASSERT_NOK(opt_info.Validate(db_options, cf_options, "keep_log_file_num",
+                               &cf_options.write_buffer_size));
+  ASSERT_NOK(opt_info.Validate(db_options, cf_options, "write_buffer_size",
+                               &db_options.keep_log_file_num));
+}
+
 TEST_F(OptionTypeInfoTest, TestOptionFlags) {
   OptionTypeInfo opt_none(0, OptionType::kString,
                           OptionVerificationType::kNormal,
@@ -4543,6 +4635,68 @@ TEST_F(OptionTypeInfoTest, TestStruct) {
   ASSERT_EQ(e1.b.s, "66");
 }
 
+TEST_F(OptionTypeInfoTest, TestArrayType) {
+  OptionTypeInfo array_info = OptionTypeInfo::Array<std::string, 4>(
+      0, OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+      {0, OptionType::kString});
+  std::array<std::string, 4> array1, array2;
+  std::string mismatch;
+
+  ConfigOptions config_options;
+  TestParseAndCompareOption(config_options, array_info, "v", "a:b:c:d", &array1,
+                            &array2);
+
+  ASSERT_EQ(array1.size(), 4);
+  ASSERT_EQ(array1[0], "a");
+  ASSERT_EQ(array1[1], "b");
+  ASSERT_EQ(array1[2], "c");
+  ASSERT_EQ(array1[3], "d");
+  array1[3] = "e";
+  ASSERT_FALSE(
+      array_info.AreEqual(config_options, "v", &array1, &array2, &mismatch));
+  ASSERT_EQ(mismatch, "v");
+
+  // Test vectors with inner brackets
+  TestParseAndCompareOption(config_options, array_info, "v", "a:{b}:c:d",
+                            &array1, &array2);
+  ASSERT_EQ(array1.size(), 4);
+  ASSERT_EQ(array1[0], "a");
+  ASSERT_EQ(array1[1], "b");
+  ASSERT_EQ(array1[2], "c");
+  ASSERT_EQ(array1[3], "d");
+
+  std::array<std::string, 3> array3, array4;
+  OptionTypeInfo bar_info = OptionTypeInfo::Array<std::string, 3>(
+      0, OptionVerificationType::kNormal, OptionTypeFlags::kNone,
+      {0, OptionType::kString}, '|');
+  TestParseAndCompareOption(config_options, bar_info, "v", "x|y|z", &array3,
+                            &array4);
+
+  // Test arrays with inner array
+  TestParseAndCompareOption(config_options, bar_info, "v",
+                            "a|{b1|b2}|{c1|c2|{d1|d2}}", &array3, &array4,
+                            false);
+  ASSERT_EQ(array3.size(), 3);
+  ASSERT_EQ(array3[0], "a");
+  ASSERT_EQ(array3[1], "b1|b2");
+  ASSERT_EQ(array3[2], "c1|c2|{d1|d2}");
+
+  TestParseAndCompareOption(config_options, bar_info, "v",
+                            "{a1|a2}|{b1|{c1|c2}}|d1", &array3, &array4, true);
+  ASSERT_EQ(array3.size(), 3);
+  ASSERT_EQ(array3[0], "a1|a2");
+  ASSERT_EQ(array3[1], "b1|{c1|c2}");
+  ASSERT_EQ(array3[2], "d1");
+
+  // Test invalid input: less element than requested
+  auto s = bar_info.Parse(config_options, "opt_name1", "a1|a2", &array3);
+  ASSERT_TRUE(s.IsInvalidArgument());
+
+  // Test invalid input: more element than requested
+  s = bar_info.Parse(config_options, "opt_name2", "a1|b|c1|d3", &array3);
+  ASSERT_TRUE(s.IsInvalidArgument());
+}
+
 TEST_F(OptionTypeInfoTest, TestVectorType) {
   OptionTypeInfo vec_info = OptionTypeInfo::Vector<std::string>(
       0, OptionVerificationType::kNormal, OptionTypeFlags::kNone,
diff -pruN 7.2.2-5/PLUGINS.md 7.3.1-2/PLUGINS.md
--- 7.2.2-5/PLUGINS.md	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/PLUGINS.md	2022-06-08 21:08:16.000000000 +0000
@@ -4,3 +4,4 @@ This is the list of all known third-part
 * [HDFS](https://github.com/riversand963/rocksdb-hdfs-env): an Env used for interacting with HDFS. Migrated from main RocksDB repo
 * [ZenFS](https://github.com/westerndigitalcorporation/zenfs): a file system for zoned block devices
 * [RADOS](https://github.com/riversand963/rocksdb-rados-env): an Env used for interacting with RADOS. Migrated from RocksDB main repo.
+* [PMEM](https://github.com/pmem/pmem-rocksdb-plugin): a collection of plugins to enable Persistent Memory on RocksDB.
diff -pruN 7.2.2-5/port/port_posix.h 7.3.1-2/port/port_posix.h
--- 7.2.2-5/port/port_posix.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/port_posix.h	2022-06-08 21:08:16.000000000 +0000
@@ -25,11 +25,6 @@
 
 #define ROCKSDB_NOEXCEPT noexcept
 
-// thread_local is part of C++11 and later (TODO: clean up this define)
-#ifndef __thread
-#define __thread thread_local
-#endif
-
 #undef PLATFORM_IS_LITTLE_ENDIAN
 #if defined(OS_MACOSX)
   #include <machine/endian.h>
@@ -95,16 +90,6 @@ namespace ROCKSDB_NAMESPACE {
 extern const bool kDefaultToAdaptiveMutex;
 
 namespace port {
-
-// For use at db/file_indexer.h kLevelMaxIndex
-const uint32_t kMaxUint32 = std::numeric_limits<uint32_t>::max();
-const int kMaxInt32 = std::numeric_limits<int32_t>::max();
-const int kMinInt32 = std::numeric_limits<int32_t>::min();
-const uint64_t kMaxUint64 = std::numeric_limits<uint64_t>::max();
-const int64_t kMaxInt64 = std::numeric_limits<int64_t>::max();
-const int64_t kMinInt64 = std::numeric_limits<int64_t>::min();
-const size_t kMaxSizet = std::numeric_limits<size_t>::max();
-
 constexpr bool kLittleEndian = PLATFORM_IS_LITTLE_ENDIAN;
 #undef PLATFORM_IS_LITTLE_ENDIAN
 
diff -pruN 7.2.2-5/port/sys_time.h 7.3.1-2/port/sys_time.h
--- 7.2.2-5/port/sys_time.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/sys_time.h	2022-06-08 21:08:16.000000000 +0000
@@ -12,36 +12,52 @@
 
 #pragma once
 
-#if defined(OS_WIN) && defined(_MSC_VER)
+#include "rocksdb/rocksdb_namespace.h"
 
-#include <time.h>
+#if defined(OS_WIN) && (defined(_MSC_VER) || defined(__MINGW32__))
 
-#include "rocksdb/rocksdb_namespace.h"
+#include <time.h>
 
 namespace ROCKSDB_NAMESPACE {
 
 namespace port {
 
-// Avoid including winsock2.h for this definition
-struct timeval {
+struct TimeVal {
   long tv_sec;
   long tv_usec;
 };
 
-void gettimeofday(struct timeval* tv, struct timezone* tz);
+void GetTimeOfDay(TimeVal* tv, struct timezone* tz);
 
-inline struct tm* localtime_r(const time_t* timep, struct tm* result) {
+inline struct tm* LocalTimeR(const time_t* timep, struct tm* result) {
   errno_t ret = localtime_s(result, timep);
   return (ret == 0) ? result : NULL;
 }
-}
 
-using port::timeval;
-using port::gettimeofday;
-using port::localtime_r;
+}  // namespace port
+
 }  // namespace ROCKSDB_NAMESPACE
 
 #else
 #include <time.h>
 #include <sys/time.h>
+
+namespace ROCKSDB_NAMESPACE {
+
+namespace port {
+
+using TimeVal = struct timeval;
+
+inline void GetTimeOfDay(TimeVal* tv, struct timezone* tz) {
+  gettimeofday(tv, tz);
+}
+
+inline struct tm* LocalTimeR(const time_t* timep, struct tm* result) {
+  return localtime_r(timep, result);
+}
+
+}  // namespace port
+
+}  // namespace ROCKSDB_NAMESPACE
+
 #endif
diff -pruN 7.2.2-5/port/win/env_win.cc 7.3.1-2/port/win/env_win.cc
--- 7.2.2-5/port/win/env_win.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/win/env_win.cc	2022-06-08 21:08:16.000000000 +0000
@@ -28,6 +28,7 @@
 #include "monitoring/iostats_context_imp.h"
 #include "monitoring/thread_status_updater.h"
 #include "monitoring/thread_status_util.h"
+#include "port/lang.h"
 #include "port/port.h"
 #include "port/port_dirent.h"
 #include "port/win/io_win.h"
@@ -192,8 +193,8 @@ WinFileSystem::WinFileSystem(const std::
 }
 
 const std::shared_ptr<WinFileSystem>& WinFileSystem::Default() {
-  static std::shared_ptr<WinFileSystem> fs =
-      std::make_shared<WinFileSystem>(WinClock::Default());
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<WinFileSystem>, fs)
+  (std::make_shared<WinFileSystem>(WinClock::Default()));
   return fs;
 }
 
@@ -1410,8 +1411,8 @@ std::shared_ptr<FileSystem> FileSystem::
 }
 
 const std::shared_ptr<SystemClock>& SystemClock::Default() {
-  static std::shared_ptr<SystemClock> clock =
-      std::make_shared<port::WinClock>();
+  STATIC_AVOID_DESTRUCTION(std::shared_ptr<SystemClock>, clock)
+  (std::make_shared<port::WinClock>());
   return clock;
 }
 }  // namespace ROCKSDB_NAMESPACE
diff -pruN 7.2.2-5/port/win/env_win.h 7.3.1-2/port/win/env_win.h
--- 7.2.2-5/port/win/env_win.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/win/env_win.h	2022-06-08 21:08:16.000000000 +0000
@@ -80,8 +80,8 @@ class WinClock : public SystemClock {
   virtual ~WinClock() {}
 
   static const char* kClassName() { return "WindowsClock"; }
-  const char* Name() const override { return kClassName(); }
-  const char* NickName() const override { return kDefaultName(); }
+  const char* Name() const override { return kDefaultName(); }
+  const char* NickName() const override { return kClassName(); }
 
   uint64_t NowMicros() override;
 
diff -pruN 7.2.2-5/port/win/port_win.cc 7.3.1-2/port/win/port_win.cc
--- 7.2.2-5/port/win/port_win.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/win/port_win.cc	2022-06-08 21:08:16.000000000 +0000
@@ -52,7 +52,7 @@ std::wstring utf8_to_utf16(const std::st
 }
 #endif
 
-void gettimeofday(struct timeval* tv, struct timezone* /* tz */) {
+void GetTimeOfDay(TimeVal* tv, struct timezone* /* tz */) {
   std::chrono::microseconds usNow(
       std::chrono::duration_cast<std::chrono::microseconds>(
           std::chrono::system_clock::now().time_since_epoch()));
diff -pruN 7.2.2-5/port/win/port_win.h 7.3.1-2/port/win/port_win.h
--- 7.2.2-5/port/win/port_win.h	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/win/port_win.h	2022-06-08 21:08:16.000000000 +0000
@@ -60,11 +60,6 @@ using ssize_t = SSIZE_T;
 #ifdef _MSC_VER
 #define __attribute__(A)
 
-// thread_local is part of C++11 and later (TODO: clean up this define)
-#ifndef __thread
-#define __thread thread_local
-#endif
-
 #endif
 
 namespace ROCKSDB_NAMESPACE {
@@ -82,37 +77,11 @@ namespace port {
 #define snprintf _snprintf
 
 #define ROCKSDB_NOEXCEPT
-// std::numeric_limits<size_t>::max() is not constexpr just yet
-// therefore, use the same limits
-
-// For use at db/file_indexer.h kLevelMaxIndex
-const uint32_t kMaxUint32 = UINT32_MAX;
-const int kMaxInt32 = INT32_MAX;
-const int kMinInt32 = INT32_MIN;
-const int64_t kMaxInt64 = INT64_MAX;
-const int64_t kMinInt64 = INT64_MIN;
-const uint64_t kMaxUint64 = UINT64_MAX;
-
-#ifdef _WIN64
-const size_t kMaxSizet = UINT64_MAX;
-#else
-const size_t kMaxSizet = UINT_MAX;
-#endif
 
 #else // VS >= 2015 or MinGW
 
 #define ROCKSDB_NOEXCEPT noexcept
 
-// For use at db/file_indexer.h kLevelMaxIndex
-const uint32_t kMaxUint32 = std::numeric_limits<uint32_t>::max();
-const int kMaxInt32 = std::numeric_limits<int>::max();
-const int kMinInt32 = std::numeric_limits<int>::min();
-const uint64_t kMaxUint64 = std::numeric_limits<uint64_t>::max();
-const int64_t kMaxInt64 = std::numeric_limits<int64_t>::max();
-const int64_t kMinInt64 = std::numeric_limits<int64_t>::min();
-
-const size_t kMaxSizet = std::numeric_limits<size_t>::max();
-
 #endif //_MSC_VER
 
 // "Windows is designed to run on little-endian computer architectures."
diff -pruN 7.2.2-5/port/win/win_logger.cc 7.3.1-2/port/win/win_logger.cc
--- 7.2.2-5/port/win/win_logger.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/port/win/win_logger.cc	2022-06-08 21:08:16.000000000 +0000
@@ -118,8 +118,8 @@ void WinLogger::Logv(const char* format,
     char* p = base;
     char* limit = base + bufsize;
 
-    struct timeval now_tv;
-    gettimeofday(&now_tv, nullptr);
+    port::TimeVal now_tv;
+    port::GetTimeOfDay(&now_tv, nullptr);
     const time_t seconds = now_tv.tv_sec;
     struct tm t;
     localtime_s(&t, &seconds);
diff -pruN 7.2.2-5/python.mk 7.3.1-2/python.mk
--- 7.2.2-5/python.mk	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/python.mk	1970-01-01 00:00:00.000000000 +0000
@@ -1,9 +0,0 @@
-ifndef PYTHON
-
-# Default to python3. Some distros like CentOS 8 do not have `python`.
-ifeq ($(origin PYTHON), undefined)
-	PYTHON := $(shell which python3 || which python || echo python3)
-endif
-export PYTHON
-
-endif
diff -pruN 7.2.2-5/rocksdb.pc.in 7.3.1-2/rocksdb.pc.in
--- 7.2.2-5/rocksdb.pc.in	1970-01-01 00:00:00.000000000 +0000
+++ 7.3.1-2/rocksdb.pc.in	2022-06-08 21:08:16.000000000 +0000
@@ -0,0 +1,11 @@
+prefix="@CMAKE_INSTALL_PREFIX@"
+exec_prefix="${prefix}"
+libdir="${prefix}/lib"
+includedir="${prefix}/include"
+
+Name: @PROJECT_NAME@
+Description: @CMAKE_PROJECT_DESCRIPTION@
+URL: @CMAKE_PROJECT_HOMEPAGE_URL@
+Version: @PROJECT_VERSION@
+Cflags: -I"${includedir}"
+Libs: -L"${libdir}" -lrocksdb
diff -pruN 7.2.2-5/src.mk 7.3.1-2/src.mk
--- 7.2.2-5/src.mk	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/src.mk	2022-06-08 21:08:16.000000000 +0000
@@ -5,6 +5,7 @@ LIB_SOURCES =
   cache/cache_key.cc                                            \
   cache/cache_reservation_manager.cc                            \
   cache/clock_cache.cc                                          \
+  cache/fast_lru_cache.cc                                       \
   cache/lru_cache.cc                                            \
   cache/compressed_secondary_cache.cc                           \
   cache/sharded_cache.cc                                        \
@@ -210,7 +211,9 @@ LIB_SOURCES =
   trace_replay/trace_replay.cc                                  \
   trace_replay/block_cache_tracer.cc                            \
   trace_replay/io_tracer.cc                                     \
+  util/async_file_reader.cc					\
   util/build_version.cc                                         \
+  util/cleanable.cc                                             \
   util/coding.cc                                                \
   util/compaction_job_stats_impl.cc                             \
   util/comparator.cc                                            \
@@ -362,6 +365,7 @@ STRESS_LIB_SOURCES =
 
 TEST_LIB_SOURCES =                                              \
   db/db_test_util.cc                                            \
+  db/db_with_timestamp_test_util.cc                             \
   test_util/mock_time_env.cc                                    \
   test_util/testharness.cc                                      \
   test_util/testutil.cc                                         \
@@ -427,7 +431,6 @@ TEST_MAIN_SOURCES =
   db/corruption_test.cc                                                 \
   db/cuckoo_table_db_test.cc                                            \
   db/db_basic_test.cc                                                   \
-  db/db_with_timestamp_basic_test.cc                                    \
   db/db_block_cache_test.cc                                             \
   db/db_bloom_filter_test.cc                                            \
   db/db_compaction_filter_test.cc                                       \
@@ -435,6 +438,8 @@ TEST_MAIN_SOURCES =
   db/db_dynamic_level_test.cc                                           \
   db/db_encryption_test.cc                                              \
   db/db_flush_test.cc                                                   \
+  db/db_readonly_with_timestamp_test.cc                                 \
+  db/db_with_timestamp_basic_test.cc                                    \
   db/import_column_family_test.cc                                       \
   db/db_inplace_update_test.cc                                          \
   db/db_io_failure_test.cc                                              \
diff -pruN 7.2.2-5/table/block_based/binary_search_index_reader.cc 7.3.1-2/table/block_based/binary_search_index_reader.cc
--- 7.2.2-5/table/block_based/binary_search_index_reader.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/table/block_based/binary_search_index_reader.cc	2022-06-08 21:08:16.000000000 +0000
@@ -47,7 +47,8 @@ InternalIteratorBase<IndexValue>* Binary
   const bool no_io = (read_options.read_tier == kBlockCacheTier);
   CachableEntry<Block> index_block;
   const Status s =
-      GetOrReadIndexBlock(no_io, get_context, lookup_context, &index_block);
+      GetOrReadIndexBlock(no_io, read_options.rate_limiter_priority,
+                          get_context, lookup_context, &index_block);
   if (!s.ok()) {
     if (iter != nullptr) {
       iter->Invalidate(s);
diff -pruN 7.2.2-5/table/block_based/block_based_filter_block.cc 7.3.1-2/table/block_based/block_based_filter_block.cc
--- 7.2.2-5/table/block_based/block_based_filter_block.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/table/block_based/block_based_filter_block.cc	2022-06-08 21:08:16.000000000 +0000
@@ -51,7 +51,7 @@ void AppendItem(std::string* props, cons
 
 template <class TKey>
 void AppendItem(std::string* props, const TKey& key, const std::string& value) {
-  std::string key_str = ROCKSDB_NAMESPACE::ToString(key);
+  std::string key_str = std::to_string(key);
   AppendItem(props, key_str, value);
 }
 }  // namespace
@@ -337,7 +337,7 @@ std::string BlockBasedFilterBlockReader:
   result.reserve(1024);
 
   std::string s_bo("Block offset"), s_hd("Hex dump"), s_fb("# filter blocks");
-  AppendItem(&result, s_fb, ROCKSDB_NAMESPACE::ToString(num));
+  AppendItem(&result, s_fb, std::to_string(num));
   AppendItem(&result, s_bo, s_hd);
 
   for (size_t index = 0; index < num; index++) {
@@ -345,8 +345,7 @@ std::string BlockBasedFilterBlockReader:
     uint32_t limit = DecodeFixed32(offset + index * 4 + 4);
 
     if (start != limit) {
-      result.append(" filter block # " +
-                    ROCKSDB_NAMESPACE::ToString(index + 1) + "\n");
+      result.append(" filter block # " + std::to_string(index + 1) + "\n");
       Slice filter = Slice(data + start, limit - start);
       AppendItem(&result, start, filter.ToString(true));
     }
diff -pruN 7.2.2-5/table/block_based/block_based_table_builder.cc 7.3.1-2/table/block_based/block_based_table_builder.cc
--- 7.2.2-5/table/block_based/block_based_table_builder.cc	2022-04-28 15:05:36.000000000 +0000
+++ 7.3.1-2/table/block_based/block_based_table_builder.cc	2022-06-08 21:08:16.000000000 +0000
@@ -461,14 +461,24 @@ struct BlockBasedTableBuilder::Rep {
       buffer_limit = std::min(tbo.target_file_size,
                               compression_opts.max_dict_buffer_bytes);
     }
-    if (table_options.no_block_cache || table_options.block_cache == nullptr) {
-      compression_dict_buffer_cache_res_mgr = nullptr;
-    } else {
+
+    const auto compress_dict_build_buffer_charged =
+        table_options.cache_usage_options.options_overrides
+            .at(CacheEntryRole::kCompressionDictionaryBuildingBuffer)
+            .charged;
+    if (table_options.block_cache &&
+        (compress_dict_build_buffer_charged ==
+             CacheEntryRoleOptions::Decision::kEnabled ||
+         compress_dict_build_buffer_charged ==
+             CacheEntryRoleOptions::Decision::kFallback)) {
       compression_dict_buffer_cache_res_mgr =
           std::make_shared<CacheReservationManagerImpl<
               CacheEntryRole::kCompressionDictionaryBuildingBuffer>>(
               table_options.block_cache);
+    } else {
+      compression_dict_buffer_cache_res_mgr = nullptr;
     }
+
     for (uint32_t i = 0; i < compression_opts.parallel_threads; i++) {
       compression_ctxs[i].reset(new CompressionContext(compression_type));
     }
@@ -942,7 +952,7 @@ void BlockBasedTableBuilder::Add(const S
             (r->buffer_limit != 0 && r->data_begin_offset > r->buffer_limit);
         bool exceeds_global_block_cache_limit = false;
 
-        // Increase cache reservation for the last buffered data block
+        // Increase cache charging for the last buffered data block
         // only if the block is not going to be unbuffered immediately
         // and there exists a cache reservation manager
         if (!exceeds_buffer_limit &&
@@ -1886,9 +1896,15 @@ void BlockBasedTableBuilder::EnterUnbuff
   // OK if compression_dict_samples is empty, we'll just get empty dictionary.
   std::string dict;
   if (r->compression_opts.zstd_max_train_bytes > 0) {
-    dict = ZSTD_TrainDictionary(compression_dict_samples,
-                                compression_dict_sample_lens,
-                                r->compression_opts.max_dict_bytes);
+    if (r->compression_opts.use_zstd_dict_trainer) {
+      dict = ZSTD_TrainDictionary(compression_dict_samples,
+                                  compression_dict_sample_lens,
+                                  r->compression_opts.max_dict_bytes);
+    } else {
+      dict = ZSTD_FinalizeDictionary(
+          compression_dict_samples, compression_dict_sample_lens,
+          r->compression_opts.max_dict_bytes, r->compression_opts.level);
+    }
   } else {
     dict = std::move(compression_dict_samples);
   }
@@ -1924,7 +1940,6 @@ void BlockBasedTableBuilder::EnterUnbuff
     }
 
     auto& data_block = r->data_block_buffers[i];
-
     if (r->IsParallelCompressionEnabled()) {
       Slice first_key_in_next_block;
       const Slice* first_key_in_next_block_ptr = &first_key_in_next_block;
diff -pruN 7.2