diff -pruN 0.0~git20220809.9b17af9-1/debian/changelog 0.0~git20220809.9b17af9-1ubuntu2/debian/changelog
--- 0.0~git20220809.9b17af9-1/debian/changelog	2022-08-11 18:58:31.000000000 +0000
+++ 0.0~git20220809.9b17af9-1ubuntu2/debian/changelog	2022-09-21 17:41:12.000000000 +0000
@@ -1,3 +1,20 @@
+libyuv (0.0~git20220809.9b17af9-1ubuntu2) kinetic; urgency=medium
+
+  * Add arm64 to the list of test blacklist archs: tests fail in Ubuntu
+    but not in Debian and when running the testsuite for the previous
+    version of the package (before the testsuite was enabled) it also fails.
+
+ -- Steve Langasek <vorlon@debian.org>  Wed, 21 Sep 2022 17:41:12 +0000
+
+libyuv (0.0~git20220809.9b17af9-1ubuntu1) kinetic; urgency=medium
+
+  * debian/patches/0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-
+    Inte.patch: [PATCH] Revert "Merge/SplitRGB fix -mcmodel=large x86
+    and.  Latest libyuv snapshot fails to build on amd64 and arm64 in
+    Ubuntu with test regressions but not in Debian, not yet clear why.
+
+ -- Steve Langasek <steve.langasek@ubuntu.com>  Mon, 19 Sep 2022 10:26:03 -0700
+
 libyuv (0.0~git20220809.9b17af9-1) unstable; urgency=medium
 
   * New upstream snapshot 20220809 (version 1838).
diff -pruN 0.0~git20220809.9b17af9-1/debian/patches/0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-Inte.patch 0.0~git20220809.9b17af9-1ubuntu2/debian/patches/0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-Inte.patch
--- 0.0~git20220809.9b17af9-1/debian/patches/0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-Inte.patch	1970-01-01 00:00:00.000000000 +0000
+++ 0.0~git20220809.9b17af9-1ubuntu2/debian/patches/0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-Inte.patch	2022-09-19 19:33:54.000000000 +0000
@@ -0,0 +1,760 @@
+From 7da57651a1ca2b7bf31ae9e5618ced6f7ad88134 Mon Sep 17 00:00:00 2001
+From: Steve Langasek <steve.langasek@canonical.com>
+Date: Mon, 19 Sep 2022 12:33:24 -0700
+Subject: [PATCH] Revert "Merge/SplitRGB fix -mcmodel=large x86 and
+ InterpolateRow_16To8_NEON"
+
+This reverts commit 6900494d90ae095d44405cd4cc3f346971fa69c9.
+---
+ include/libyuv/row.h   |  25 -------
+ source/row_any.cc      | 102 +++++++--------------------
+ source/row_common.cc   | 128 +++++++++++++++------------------
+ source/row_gcc.cc      | 156 +++++++++++++++++++++++++----------------
+ source/row_neon64.cc   |  80 ---------------------
+ source/scale_common.cc |  24 +------
+ 6 files changed, 179 insertions(+), 336 deletions(-)
+
+diff --git a/include/libyuv/row.h b/include/libyuv/row.h
+index 1c14ef3b..7770c0a5 100644
+--- a/include/libyuv/row.h
++++ b/include/libyuv/row.h
+@@ -548,7 +548,6 @@ extern "C" {
+ 
+ // The following are available on AArch64 platforms:
+ #if !defined(LIBYUV_DISABLE_NEON) && defined(__aarch64__)
+-#define HAS_INTERPOLATEROW_16TO8_NEON
+ #define HAS_SCALESUMSAMPLES_NEON
+ #define HAS_GAUSSROW_F32_NEON
+ #define HAS_GAUSSCOL_F32_NEON
+@@ -5310,30 +5309,6 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+                             int scale,
+                             int width,
+                             int source_y_fraction);
+-void InterpolateRow_16To8_NEON(uint8_t* dst_ptr,
+-                               const uint16_t* src_ptr,
+-                               ptrdiff_t src_stride,
+-                               int scale,
+-                               int width,
+-                               int source_y_fraction);
+-void InterpolateRow_16To8_Any_NEON(uint8_t* dst_ptr,
+-                                   const uint16_t* src_ptr,
+-                                   ptrdiff_t src_stride,
+-                                   int scale,
+-                                   int width,
+-                                   int source_y_fraction);
+-void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
+-                               const uint16_t* src_ptr,
+-                               ptrdiff_t src_stride,
+-                               int scale,
+-                               int width,
+-                               int source_y_fraction);
+-void InterpolateRow_16To8_Any_AVX2(uint8_t* dst_ptr,
+-                                   const uint16_t* src_ptr,
+-                                   ptrdiff_t src_stride,
+-                                   int scale,
+-                                   int width,
+-                                   int source_y_fraction);
+ 
+ // Sobel images.
+ void SobelXRow_C(const uint8_t* src_y0,
+diff --git a/source/row_any.cc b/source/row_any.cc
+index bd46ba1b..c8b125a3 100644
+--- a/source/row_any.cc
++++ b/source/row_any.cc
+@@ -1634,100 +1634,46 @@ ANY11C(UYVYToARGBRow_Any_LSX, UYVYToARGBRow_LSX, 1, 4, 4, 7)
+ #undef ANY11C
+ 
+ // Any 1 to 1 interpolate.  Takes 2 rows of source via stride.
+-#define ANY11I(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)           \
+-  void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride, \
+-               int width, int source_y_fraction) {                   \
+-    SIMD_ALIGNED(TS temps[64 * 2]);                                  \
+-    SIMD_ALIGNED(TD tempd[64]);                                      \
+-    memset(temps, 0, sizeof(temps)); /* for msan */                  \
+-    int r = width & MASK;                                            \
+-    int n = width & ~MASK;                                           \
+-    if (n > 0) {                                                     \
+-      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);  \
+-    }                                                                \
+-    memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS));        \
+-    if (source_y_fraction) {                                         \
+-      memcpy(temps + 64, src_ptr + src_stride + n * SBPP,            \
+-             r * SBPP * sizeof(TS));                                 \
+-    }                                                                \
+-    ANY_SIMD(tempd, temps, 64, MASK + 1, source_y_fraction);         \
+-    memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD));          \
++#define ANY11I(NAMEANY, ANY_SIMD, T, SBPP, BPP, MASK)                         \
++  void NAMEANY(T* dst_ptr, const T* src_ptr, ptrdiff_t src_stride, int width, \
++               int source_y_fraction) {                                       \
++    SIMD_ALIGNED(T temp[64 * 3]);                                             \
++    memset(temp, 0, 64 * 2 * sizeof(T)); /* for msan */                       \
++    int r = width & MASK;                                                     \
++    int n = width & ~MASK;                                                    \
++    if (n > 0) {                                                              \
++      ANY_SIMD(dst_ptr, src_ptr, src_stride, n, source_y_fraction);           \
++    }                                                                         \
++    memcpy(temp, src_ptr + n * SBPP, r * SBPP * sizeof(T));                   \
++    if (source_y_fraction) {                                                  \
++      memcpy(temp + 64, src_ptr + src_stride + n * SBPP,                      \
++             r * SBPP * sizeof(T));                                           \
++    }                                                                         \
++    ANY_SIMD(temp + 128, temp, 64, MASK + 1, source_y_fraction);              \
++    memcpy(dst_ptr + n * BPP, temp + 128, r * BPP * sizeof(T));               \
+   }
+ 
+ #ifdef HAS_INTERPOLATEROW_AVX2
+-ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, uint8_t, 1, 1, 31)
++ANY11I(InterpolateRow_Any_AVX2, InterpolateRow_AVX2, uint8_t, 1, 1, 31)
+ #endif
+ #ifdef HAS_INTERPOLATEROW_SSSE3
+-ANY11I(InterpolateRow_Any_SSSE3,
+-       InterpolateRow_SSSE3,
+-       uint8_t,
+-       uint8_t,
+-       1,
+-       1,
+-       15)
++ANY11I(InterpolateRow_Any_SSSE3, InterpolateRow_SSSE3, uint8_t, 1, 1, 15)
+ #endif
+ #ifdef HAS_INTERPOLATEROW_NEON
+-ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, uint8_t, 1, 1, 15)
++ANY11I(InterpolateRow_Any_NEON, InterpolateRow_NEON, uint8_t, 1, 1, 15)
+ #endif
+ #ifdef HAS_INTERPOLATEROW_MSA
+-ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, uint8_t, 1, 1, 31)
++ANY11I(InterpolateRow_Any_MSA, InterpolateRow_MSA, uint8_t, 1, 1, 31)
+ #endif
+ #ifdef HAS_INTERPOLATEROW_LSX
+-ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, uint8_t, 1, 1, 31)
++ANY11I(InterpolateRow_Any_LSX, InterpolateRow_LSX, uint8_t, 1, 1, 31)
+ #endif
+ 
+ #ifdef HAS_INTERPOLATEROW_16_NEON
+-ANY11I(InterpolateRow_16_Any_NEON,
+-       InterpolateRow_16_NEON,
+-       uint16_t,
+-       uint16_t,
+-       1,
+-       1,
+-       7)
++ANY11I(InterpolateRow_16_Any_NEON, InterpolateRow_16_NEON, uint16_t, 1, 1, 7)
+ #endif
+-#undef ANY11I
+ 
+-// Any 1 to 1 interpolate with scale param
+-#define ANY11IS(NAMEANY, ANY_SIMD, TD, TS, SBPP, BPP, MASK)                \
+-  void NAMEANY(TD* dst_ptr, const TS* src_ptr, ptrdiff_t src_stride,       \
+-               int scale, int width, int source_y_fraction) {              \
+-    SIMD_ALIGNED(TS temps[64 * 2]);                                        \
+-    SIMD_ALIGNED(TD tempd[64]);                                            \
+-    memset(temps, 0, sizeof(temps)); /* for msan */                        \
+-    int r = width & MASK;                                                  \
+-    int n = width & ~MASK;                                                 \
+-    if (n > 0) {                                                           \
+-      ANY_SIMD(dst_ptr, src_ptr, src_stride, scale, n, source_y_fraction); \
+-    }                                                                      \
+-    memcpy(temps, src_ptr + n * SBPP, r * SBPP * sizeof(TS));              \
+-    if (source_y_fraction) {                                               \
+-      memcpy(temps + 64, src_ptr + src_stride + n * SBPP,                  \
+-             r * SBPP * sizeof(TS));                                       \
+-    }                                                                      \
+-    ANY_SIMD(tempd, temps, 64, scale, MASK + 1, source_y_fraction);        \
+-    memcpy(dst_ptr + n * BPP, tempd, r * BPP * sizeof(TD));                \
+-  }
+-
+-#ifdef HAS_INTERPOLATEROW_16TO8_NEON
+-ANY11IS(InterpolateRow_16To8_Any_NEON,
+-        InterpolateRow_16To8_NEON,
+-        uint8_t,
+-        uint16_t,
+-        1,
+-        1,
+-        7)
+-#endif
+-#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
+-ANY11IS(InterpolateRow_16To8_Any_AVX2,
+-        InterpolateRow_16To8_AVX2,
+-        uint8_t,
+-        uint16_t,
+-        1,
+-        1,
+-        31)
+-#endif
+-
+-#undef ANY11IS
++#undef ANY11I
+ 
+ // Any 1 to 1 mirror.
+ #define ANY11M(NAMEANY, ANY_SIMD, BPP, MASK)                              \
+diff --git a/source/row_common.cc b/source/row_common.cc
+index 4e1141f7..2a4b2939 100644
+--- a/source/row_common.cc
++++ b/source/row_common.cc
+@@ -3016,9 +3016,6 @@ void DivideRow_16_C(const uint16_t* src_y,
+ // 16384 = 10 bits
+ // 4096 = 12 bits
+ // 256 = 16 bits
+-// TODO(fbarchard): change scale to bits
+-#define C16TO8(v, scale) clamp255(((v) * (scale)) >> 16)
+-
+ void Convert16To8Row_C(const uint16_t* src_y,
+                        uint8_t* dst_y,
+                        int scale,
+@@ -3028,7 +3025,7 @@ void Convert16To8Row_C(const uint16_t* src_y,
+   assert(scale <= 32768);
+ 
+   for (x = 0; x < width; ++x) {
+-    dst_y[x] = C16TO8(src_y[x], scale);
++    dst_y[x] = clamp255((src_y[x] * scale) >> 16);
+   }
+ }
+ 
+@@ -3445,7 +3442,8 @@ static void HalfRow_16To8_C(const uint16_t* src_uv,
+                             int width) {
+   int x;
+   for (x = 0; x < width; ++x) {
+-    dst_uv[x] = C16TO8((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1, scale);
++    dst_uv[x] = clamp255(
++        (((src_uv[x] + src_uv[src_uv_stride + x] + 1) >> 1) * scale) >> 16);
+   }
+ }
+ 
+@@ -3459,9 +3457,6 @@ void InterpolateRow_C(uint8_t* dst_ptr,
+   int y0_fraction = 256 - y1_fraction;
+   const uint8_t* src_ptr1 = src_ptr + src_stride;
+   int x;
+-  assert(source_y_fraction >= 0);
+-  assert(source_y_fraction < 256);
+-
+   if (y1_fraction == 0) {
+     memcpy(dst_ptr, src_ptr, width);
+     return;
+@@ -3470,42 +3465,18 @@ void InterpolateRow_C(uint8_t* dst_ptr,
+     HalfRow_C(src_ptr, src_stride, dst_ptr, width);
+     return;
+   }
+-  for (x = 0; x < width; ++x) {
++  for (x = 0; x < width - 1; x += 2) {
+     dst_ptr[0] =
+         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+-    ++src_ptr;
+-    ++src_ptr1;
+-    ++dst_ptr;
++    dst_ptr[1] =
++        (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction + 128) >> 8;
++    src_ptr += 2;
++    src_ptr1 += 2;
++    dst_ptr += 2;
+   }
+-}
+-
+-// C version 2x2 -> 2x1.
+-void InterpolateRow_16_C(uint16_t* dst_ptr,
+-                         const uint16_t* src_ptr,
+-                         ptrdiff_t src_stride,
+-                         int width,
+-                         int source_y_fraction) {
+-  int y1_fraction = source_y_fraction;
+-  int y0_fraction = 256 - y1_fraction;
+-  const uint16_t* src_ptr1 = src_ptr + src_stride;
+-  int x;
+-  assert(source_y_fraction >= 0);
+-  assert(source_y_fraction < 256);
+-
+-  if (y1_fraction == 0) {
+-    memcpy(dst_ptr, src_ptr, width * 2);
+-    return;
+-  }
+-  if (y1_fraction == 128) {
+-    HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
+-    return;
+-  }
+-  for (x = 0; x < width; ++x) {
++  if (width & 1) {
+     dst_ptr[0] =
+         (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8;
+-    ++src_ptr;
+-    ++src_ptr1;
+-    ++dst_ptr;
+   }
+ }
+ 
+@@ -3515,8 +3486,6 @@ void InterpolateRow_16_C(uint16_t* dst_ptr,
+ // 16384 = 10 bits
+ // 4096 = 12 bits
+ // 256 = 16 bits
+-// TODO(fbarchard): change scale to bits
+-
+ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+                             const uint16_t* src_ptr,
+                             ptrdiff_t src_stride,
+@@ -3527,9 +3496,6 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+   int y0_fraction = 256 - y1_fraction;
+   const uint16_t* src_ptr1 = src_ptr + src_stride;
+   int x;
+-  assert(source_y_fraction >= 0);
+-  assert(source_y_fraction < 256);
+-
+   if (source_y_fraction == 0) {
+     Convert16To8Row_C(src_ptr, dst_ptr, scale, width);
+     return;
+@@ -3538,13 +3504,53 @@ void InterpolateRow_16To8_C(uint8_t* dst_ptr,
+     HalfRow_16To8_C(src_ptr, src_stride, dst_ptr, scale, width);
+     return;
+   }
+-  for (x = 0; x < width; ++x) {
+-    dst_ptr[0] = C16TO8(
+-        (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction + 128) >> 8,
+-        scale);
+-    src_ptr += 1;
+-    src_ptr1 += 1;
+-    dst_ptr += 1;
++  for (x = 0; x < width - 1; x += 2) {
++    dst_ptr[0] = clamp255(
++        (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) *
++         scale) >>
++        16);
++    dst_ptr[1] = clamp255(
++        (((src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8) *
++         scale) >>
++        16);
++    src_ptr += 2;
++    src_ptr1 += 2;
++    dst_ptr += 2;
++  }
++  if (width & 1) {
++    dst_ptr[0] = clamp255(
++        (((src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8) *
++         scale) >>
++        16);
++  }
++}
++
++void InterpolateRow_16_C(uint16_t* dst_ptr,
++                         const uint16_t* src_ptr,
++                         ptrdiff_t src_stride,
++                         int width,
++                         int source_y_fraction) {
++  int y1_fraction = source_y_fraction;
++  int y0_fraction = 256 - y1_fraction;
++  const uint16_t* src_ptr1 = src_ptr + src_stride;
++  int x;
++  if (source_y_fraction == 0) {
++    memcpy(dst_ptr, src_ptr, width * 2);
++    return;
++  }
++  if (source_y_fraction == 128) {
++    HalfRow_16_C(src_ptr, src_stride, dst_ptr, width);
++    return;
++  }
++  for (x = 0; x < width - 1; x += 2) {
++    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
++    dst_ptr[1] = (src_ptr[1] * y0_fraction + src_ptr1[1] * y1_fraction) >> 8;
++    src_ptr += 2;
++    src_ptr1 += 2;
++    dst_ptr += 2;
++  }
++  if (width & 1) {
++    dst_ptr[0] = (src_ptr[0] * y0_fraction + src_ptr1[0] * y1_fraction) >> 8;
+   }
+ }
+ 
+@@ -4149,26 +4155,6 @@ void RAWToYJRow_SSSE3(const uint8_t* src_raw, uint8_t* dst_yj, int width) {
+ }
+ #endif  // HAS_RAWTOYJROW_SSSE3
+ 
+-#ifdef HAS_INTERPOLATEROW_16TO8_AVX2
+-void InterpolateRow_16To8_AVX2(uint8_t* dst_ptr,
+-                               const uint16_t* src_ptr,
+-                               ptrdiff_t src_stride,
+-                               int scale,
+-                               int width,
+-                               int source_y_fraction) {
+-  // Row buffer for intermediate 16 bit pixels.
+-  SIMD_ALIGNED(uint16_t row[MAXTWIDTH]);
+-  while (width > 0) {
+-    int twidth = width > MAXTWIDTH ? MAXTWIDTH : width;
+-    InterpolateRow_16_C(row, src_ptr, src_stride, twidth, source_y_fraction);
+-    Convert16To8Row_AVX2(row, dst_ptr, scale, twidth);
+-    src_ptr += twidth;
+-    dst_ptr += twidth;
+-    width -= twidth;
+-  }
+-}
+-#endif  // HAS_INTERPOLATEROW_16TO8_AVX2
+-
+ float ScaleSumSamples_C(const float* src, float* dst, float scale, int width) {
+   float fsum = 0.f;
+   int i;
+diff --git a/source/row_gcc.cc b/source/row_gcc.cc
+index 3bda4482..5af7779a 100644
+--- a/source/row_gcc.cc
++++ b/source/row_gcc.cc
+@@ -5414,26 +5414,37 @@ void Convert8To16Row_AVX2(const uint8_t* src_y,
+ #endif  // HAS_CONVERT8TO16ROW_AVX2
+ 
+ #ifdef HAS_SPLITRGBROW_SSSE3
++
+ // Shuffle table for converting RGB to Planar.
+-static const uvec8 kSplitRGBShuffle[9] = {
+-    {0u, 3u, 6u, 9u, 12u, 15u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 128u, 2u, 5u, 8u, 11u, 14u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 1u, 4u,
+-     7u, 10u, 13u},
+-    {1u, 4u, 7u, 10u, 13u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 0u, 3u, 6u, 9u, 12u, 15u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 2u, 5u,
+-     8u, 11u, 14u},
+-    {2u, 5u, 8u, 11u, 14u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 1u, 4u, 7u, 10u, 13u, 128u, 128u, 128u, 128u,
+-     128u, 128u},
+-    {128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 128u, 0u, 3u, 6u, 9u,
+-     12u, 15u}};
++static const uvec8 kShuffleMaskRGBToR0 = {0u,   3u,   6u,   9u,   12u,  15u,
++                                          128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToR1 = {128u, 128u, 128u, 128u, 128u, 128u,
++                                          2u,   5u,   8u,   11u,  14u,  128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToR2 = {128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u, 128u, 1u,
++                                          4u,   7u,   10u,  13u};
++
++static const uvec8 kShuffleMaskRGBToG0 = {1u,   4u,   7u,   10u,  13u,  128u,
++                                          128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToG1 = {128u, 128u, 128u, 128u, 128u, 0u,
++                                          3u,   6u,   9u,   12u,  15u,  128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToG2 = {128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u, 128u, 2u,
++                                          5u,   8u,   11u,  14u};
++
++static const uvec8 kShuffleMaskRGBToB0 = {2u,   5u,   8u,   11u,  14u,  128u,
++                                          128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToB1 = {128u, 128u, 128u, 128u, 128u, 1u,
++                                          4u,   7u,   10u,  13u,  128u, 128u,
++                                          128u, 128u, 128u, 128u};
++static const uvec8 kShuffleMaskRGBToB2 = {128u, 128u, 128u, 128u, 128u, 128u,
++                                          128u, 128u, 128u, 128u, 0u,   3u,
++                                          6u,   9u,   12u,  15u};
+ 
+ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+                        uint8_t* dst_r,
+@@ -5447,9 +5458,9 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      0x10(%0),%%xmm1               \n"
+       "movdqu      0x20(%0),%%xmm2               \n"
+-      "pshufb      0(%5), %%xmm0                 \n"
+-      "pshufb      16(%5), %%xmm1                \n"
+-      "pshufb      32(%5), %%xmm2                \n"
++      "pshufb      %5, %%xmm0                    \n"
++      "pshufb      %6, %%xmm1                    \n"
++      "pshufb      %7, %%xmm2                    \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,(%1)                   \n"
+@@ -5458,9 +5469,9 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      0x10(%0),%%xmm1               \n"
+       "movdqu      0x20(%0),%%xmm2               \n"
+-      "pshufb      48(%5),%%xmm0                 \n"
+-      "pshufb      64(%5),%%xmm1                 \n"
+-      "pshufb      80(%5), %%xmm2                \n"
++      "pshufb      %8, %%xmm0                    \n"
++      "pshufb      %9, %%xmm1                    \n"
++      "pshufb      %10, %%xmm2                   \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,(%2)                   \n"
+@@ -5469,9 +5480,9 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      0x10(%0),%%xmm1               \n"
+       "movdqu      0x20(%0),%%xmm2               \n"
+-      "pshufb      96(%5), %%xmm0                \n"
+-      "pshufb      112(%5), %%xmm1               \n"
+-      "pshufb      128(%5), %%xmm2               \n"
++      "pshufb      %11, %%xmm0                   \n"
++      "pshufb      %12, %%xmm1                   \n"
++      "pshufb      %13, %%xmm2                   \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,(%3)                   \n"
+@@ -5484,32 +5495,51 @@ void SplitRGBRow_SSSE3(const uint8_t* src_rgb,
+         "+r"(dst_g),               // %2
+         "+r"(dst_b),               // %3
+         "+r"(width)                // %4
+-      : "r"(&kSplitRGBShuffle[0])  // %5
++      : "m"(kShuffleMaskRGBToR0),  // %5
++        "m"(kShuffleMaskRGBToR1),  // %6
++        "m"(kShuffleMaskRGBToR2),  // %7
++        "m"(kShuffleMaskRGBToG0),  // %8
++        "m"(kShuffleMaskRGBToG1),  // %9
++        "m"(kShuffleMaskRGBToG2),  // %10
++        "m"(kShuffleMaskRGBToB0),  // %11
++        "m"(kShuffleMaskRGBToB1),  // %12
++        "m"(kShuffleMaskRGBToB2)   // %13
+       : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ }
+ #endif  // HAS_SPLITRGBROW_SSSE3
+ 
+ #ifdef HAS_MERGERGBROW_SSSE3
+-// Shuffle table for converting Planar to RGB.
+-static const uvec8 kMergeRGBShuffle[9] = {
+-    {0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u, 4u, 128u,
+-     128u, 5u},
+-    {128u, 0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u, 4u,
+-     128u, 128u},
+-    {128u, 128u, 0u, 128u, 128u, 1u, 128u, 128u, 2u, 128u, 128u, 3u, 128u, 128u,
+-     4u, 128u},
+-    {128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u, 128u, 128u,
+-     10u, 128u},
+-    {5u, 128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u, 128u,
+-     128u, 10u},
+-    {128u, 5u, 128u, 128u, 6u, 128u, 128u, 7u, 128u, 128u, 8u, 128u, 128u, 9u,
+-     128u, 128u},
+-    {128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u, 128u,
+-     15u, 128u, 128u},
+-    {128u, 128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u, 128u,
+-     128u, 15u, 128u},
+-    {10u, 128u, 128u, 11u, 128u, 128u, 12u, 128u, 128u, 13u, 128u, 128u, 14u,
+-     128u, 128u, 15u}};
++
++// Shuffle table for converting RGB to Planar.
++static const uvec8 kShuffleMaskRToRGB0 = {0u, 128u, 128u, 1u, 128u, 128u,
++                                          2u, 128u, 128u, 3u, 128u, 128u,
++                                          4u, 128u, 128u, 5u};
++static const uvec8 kShuffleMaskGToRGB0 = {128u, 0u, 128u, 128u, 1u, 128u,
++                                          128u, 2u, 128u, 128u, 3u, 128u,
++                                          128u, 4u, 128u, 128u};
++static const uvec8 kShuffleMaskBToRGB0 = {128u, 128u, 0u, 128u, 128u, 1u,
++                                          128u, 128u, 2u, 128u, 128u, 3u,
++                                          128u, 128u, 4u, 128u};
++
++static const uvec8 kShuffleMaskGToRGB1 = {5u, 128u, 128u, 6u, 128u, 128u,
++                                          7u, 128u, 128u, 8u, 128u, 128u,
++                                          9u, 128u, 128u, 10u};
++static const uvec8 kShuffleMaskBToRGB1 = {128u, 5u, 128u, 128u, 6u, 128u,
++                                          128u, 7u, 128u, 128u, 8u, 128u,
++                                          128u, 9u, 128u, 128u};
++static const uvec8 kShuffleMaskRToRGB1 = {128u, 128u, 6u,  128u, 128u, 7u,
++                                          128u, 128u, 8u,  128u, 128u, 9u,
++                                          128u, 128u, 10u, 128u};
++
++static const uvec8 kShuffleMaskBToRGB2 = {10u, 128u, 128u, 11u, 128u, 128u,
++                                          12u, 128u, 128u, 13u, 128u, 128u,
++                                          14u, 128u, 128u, 15u};
++static const uvec8 kShuffleMaskRToRGB2 = {128u, 11u, 128u, 128u, 12u, 128u,
++                                          128u, 13u, 128u, 128u, 14u, 128u,
++                                          128u, 15u, 128u, 128u};
++static const uvec8 kShuffleMaskGToRGB2 = {128u, 128u, 11u, 128u, 128u, 12u,
++                                          128u, 128u, 13u, 128u, 128u, 14u,
++                                          128u, 128u, 15u, 128u};
+ 
+ void MergeRGBRow_SSSE3(const uint8_t* src_r,
+                        const uint8_t* src_g,
+@@ -5523,9 +5553,9 @@ void MergeRGBRow_SSSE3(const uint8_t* src_r,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      (%1),%%xmm1                   \n"
+       "movdqu      (%2),%%xmm2                   \n"
+-      "pshufb      (%5), %%xmm0                  \n"
+-      "pshufb      16(%5), %%xmm1                \n"
+-      "pshufb      32(%5), %%xmm2                \n"
++      "pshufb      %5, %%xmm0                    \n"
++      "pshufb      %6, %%xmm1                    \n"
++      "pshufb      %7, %%xmm2                    \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,(%3)                   \n"
+@@ -5533,9 +5563,9 @@ void MergeRGBRow_SSSE3(const uint8_t* src_r,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      (%1),%%xmm1                   \n"
+       "movdqu      (%2),%%xmm2                   \n"
+-      "pshufb      48(%5), %%xmm0                \n"
+-      "pshufb      64(%5), %%xmm1                \n"
+-      "pshufb      80(%5), %%xmm2                \n"
++      "pshufb      %8, %%xmm0                    \n"
++      "pshufb      %9, %%xmm1                    \n"
++      "pshufb      %10, %%xmm2                   \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,16(%3)                 \n"
+@@ -5543,9 +5573,9 @@ void MergeRGBRow_SSSE3(const uint8_t* src_r,
+       "movdqu      (%0),%%xmm0                   \n"
+       "movdqu      (%1),%%xmm1                   \n"
+       "movdqu      (%2),%%xmm2                   \n"
+-      "pshufb      96(%5), %%xmm0                \n"
+-      "pshufb      112(%5), %%xmm1               \n"
+-      "pshufb      128(%5), %%xmm2               \n"
++      "pshufb      %11, %%xmm0                   \n"
++      "pshufb      %12, %%xmm1                   \n"
++      "pshufb      %13, %%xmm2                   \n"
+       "por         %%xmm1,%%xmm0                 \n"
+       "por         %%xmm2,%%xmm0                 \n"
+       "movdqu      %%xmm0,32(%3)                 \n"
+@@ -5561,7 +5591,15 @@ void MergeRGBRow_SSSE3(const uint8_t* src_r,
+         "+r"(src_b),               // %2
+         "+r"(dst_rgb),             // %3
+         "+r"(width)                // %4
+-      : "r"(&kMergeRGBShuffle[0])  // %5
++      : "m"(kShuffleMaskRToRGB0),  // %5
++        "m"(kShuffleMaskGToRGB0),  // %6
++        "m"(kShuffleMaskBToRGB0),  // %7
++        "m"(kShuffleMaskRToRGB1),  // %8
++        "m"(kShuffleMaskGToRGB1),  // %9
++        "m"(kShuffleMaskBToRGB1),  // %10
++        "m"(kShuffleMaskRToRGB2),  // %11
++        "m"(kShuffleMaskGToRGB2),  // %12
++        "m"(kShuffleMaskBToRGB2)   // %13
+       : "memory", "cc", "xmm0", "xmm1", "xmm2");
+ }
+ #endif  // HAS_MERGERGBROW_SSSE3
+diff --git a/source/row_neon64.cc b/source/row_neon64.cc
+index 37962378..5bbc9da2 100644
+--- a/source/row_neon64.cc
++++ b/source/row_neon64.cc
+@@ -3142,86 +3142,6 @@ void InterpolateRow_16_NEON(uint16_t* dst_ptr,
+       : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5");
+ }
+ 
+-// Bilinear filter 8x2 -> 8x1
+-// Use scale to convert lsb formats to msb, depending how many bits there are:
+-// 32768 = 9 bits
+-// 16384 = 10 bits
+-// 4096 = 12 bits
+-// 256 = 16 bits
+-void InterpolateRow_16To8_NEON(uint8_t* dst_ptr,
+-                               const uint16_t* src_ptr,
+-                               ptrdiff_t src_stride,
+-                               int scale,
+-                               int dst_width,
+-                               int source_y_fraction) {
+-  int y1_fraction = source_y_fraction;
+-  int y0_fraction = 256 - y1_fraction;
+-  const uint16_t* src_ptr1 = src_ptr + src_stride;
+-  int shift = 15 - __builtin_clz((int32_t)scale);  // Negative shl is shr
+-
+-  asm volatile(
+-      "dup         v6.8h, %w6                    \n"
+-      "cmp         %w4, #0                       \n"
+-      "b.eq        100f                          \n"
+-      "cmp         %w4, #128                     \n"
+-      "b.eq        50f                           \n"
+-
+-      "dup         v5.8h, %w4                    \n"
+-      "dup         v4.8h, %w5                    \n"
+-      // General purpose row blend.
+-      "1:                                        \n"
+-      "ld1         {v0.8h}, [%1], #16            \n"
+-      "ld1         {v1.8h}, [%2], #16            \n"
+-      "subs        %w3, %w3, #8                  \n"
+-      "umull       v2.4s, v0.4h, v4.4h           \n"
+-      "prfm        pldl1keep, [%1, 448]          \n"
+-      "umull2      v3.4s, v0.8h, v4.8h           \n"
+-      "prfm        pldl1keep, [%2, 448]          \n"
+-      "umlal       v2.4s, v1.4h, v5.4h           \n"
+-      "umlal2      v3.4s, v1.8h, v5.8h           \n"
+-      "rshrn       v0.4h, v2.4s, #8              \n"
+-      "rshrn2      v0.8h, v3.4s, #8              \n"
+-      "ushl        v0.8h, v0.8h, v6.8h           \n"
+-      "uqxtn       v0.8b, v0.8h                  \n"
+-      "st1         {v0.8b}, [%0], #8             \n"
+-      "b.gt        1b                            \n"
+-      "b           99f                           \n"
+-
+-      // Blend 50 / 50.
+-      "50:                                       \n"
+-      "ld1         {v0.8h}, [%1], #16            \n"
+-      "ld1         {v1.8h}, [%2], #16            \n"
+-      "subs        %w3, %w3, #8                  \n"
+-      "prfm        pldl1keep, [%1, 448]          \n"
+-      "urhadd      v0.8h, v0.8h, v1.8h           \n"
+-      "prfm        pldl1keep, [%2, 448]          \n"
+-      "ushl        v0.8h, v0.8h, v6.8h           \n"
+-      "uqxtn       v0.8b, v0.8h                  \n"
+-      "st1         {v0.8b}, [%0], #8             \n"
+-      "b.gt        50b                           \n"
+-      "b           99f                           \n"
+-
+-      // Blend 100 / 0 - Copy row unchanged.
+-      "100:                                      \n"
+-      "ldr         q0, [%1], #16                 \n"
+-      "ushl        v0.8h, v0.8h, v2.8h           \n"  // shr = v2 is negative
+-      "prfm        pldl1keep, [%1, 448]          \n"
+-      "uqxtn       v0.8b, v0.8h                  \n"
+-      "subs        %w3, %w3, #8                  \n"  // 8 src pixels per loop
+-      "str         d0, [%0], #8                  \n"  // store 8 pixels
+-      "b.gt        100b                          \n"
+-
+-      "99:                                       \n"
+-      : "+r"(dst_ptr),     // %0
+-        "+r"(src_ptr),     // %1
+-        "+r"(src_ptr1),    // %2
+-        "+r"(dst_width)    // %3
+-      : "r"(y1_fraction),  // %4
+-        "r"(y0_fraction),  // %5
+-        "r"(shift)         // %6
+-      : "cc", "memory", "v0", "v1", "v2", "v3", "v4", "v5", "v6");
+-}
+-
+ // dr * (256 - sa) / 256 + sr = dr - dr * sa / 256 + sr
+ void ARGBBlendRow_NEON(const uint8_t* src_argb,
+                        const uint8_t* src_argb1,
+diff --git a/source/scale_common.cc b/source/scale_common.cc
+index b02bdafd..812d57ec 100644
+--- a/source/scale_common.cc
++++ b/source/scale_common.cc
+@@ -1605,12 +1605,6 @@ void ScalePlaneVertical_16(int src_height,
+   }
+ }
+ 
+-// Use scale to convert lsb formats to msb, depending how many bits there are:
+-// 32768 = 9 bits
+-// 16384 = 10 bits
+-// 4096 = 12 bits
+-// 256 = 16 bits
+-// TODO(fbarchard): change scale to bits
+ void ScalePlaneVertical_16To8(int src_height,
+                               int dst_width,
+                               int dst_height,
+@@ -1626,7 +1620,7 @@ void ScalePlaneVertical_16To8(int src_height,
+                               enum FilterMode filtering) {
+   // TODO(fbarchard): Allow higher wpp.
+   int dst_width_words = dst_width * wpp;
+-  // TODO(https://crbug.com/libyuv/931): Add NEON 32 bit and AVX2 versions.
++  // TODO(https://crbug.com/libyuv/931): Add NEON and AVX2 versions.
+   void (*InterpolateRow_16To8)(uint8_t * dst_argb, const uint16_t* src_argb,
+                                ptrdiff_t src_stride, int scale, int dst_width,
+                                int source_y_fraction) = InterpolateRow_16To8_C;
+@@ -1638,22 +1632,6 @@ void ScalePlaneVertical_16To8(int src_height,
+   assert(dst_height > 0);
+   src_argb += (x >> 16) * wpp;
+ 
+-#if defined(HAS_INTERPOLATEROW_16TO8_NEON)
+-  if (TestCpuFlag(kCpuHasNEON)) {
+-    InterpolateRow_16To8 = InterpolateRow_16To8_Any_NEON;
+-    if (IS_ALIGNED(dst_width, 8)) {
+-      InterpolateRow_16To8 = InterpolateRow_16To8_NEON;
+-    }
+-  }
+-#endif
+-#if defined(HAS_INTERPOLATEROW_16TO8_AVX2)
+-  if (TestCpuFlag(kCpuHasAVX2)) {
+-    InterpolateRow_16To8 = InterpolateRow_16To8_Any_AVX2;
+-    if (IS_ALIGNED(dst_width, 32)) {
+-      InterpolateRow_16To8 = InterpolateRow_16To8_AVX2;
+-    }
+-  }
+-#endif
+   for (j = 0; j < dst_height; ++j) {
+     int yi;
+     int yf;
+-- 
+2.34.1
+
diff -pruN 0.0~git20220809.9b17af9-1/debian/patches/series 0.0~git20220809.9b17af9-1ubuntu2/debian/patches/series
--- 0.0~git20220809.9b17af9-1/debian/patches/series	2022-08-11 18:58:31.000000000 +0000
+++ 0.0~git20220809.9b17af9-1ubuntu2/debian/patches/series	2022-09-19 22:32:03.000000000 +0000
@@ -1,2 +1,3 @@
 0001-CMakeLists.txt-Apply-SONAME-and-multiarch.patch
 0002-Specify-debian-googletest-dir.patch
+0001-Revert-Merge-SplitRGB-fix-mcmodel-large-x86-and-Inte.patch
diff -pruN 0.0~git20220809.9b17af9-1/debian/rules 0.0~git20220809.9b17af9-1ubuntu2/debian/rules
--- 0.0~git20220809.9b17af9-1/debian/rules	2022-07-27 21:56:02.000000000 +0000
+++ 0.0~git20220809.9b17af9-1ubuntu2/debian/rules	2022-09-21 17:41:03.000000000 +0000
@@ -26,7 +26,7 @@ else
 endif
 
 # Known broken googletest
-ifneq (,$(filter $(DEB_HOST_ARCH), armel s390x powerpc ppc64 sparc64))
+ifneq (,$(filter $(DEB_HOST_ARCH), arm64 armel s390x powerpc ppc64 sparc64))
   LIBYUV_TEST_FLAG = OFF
 endif
 
