diff -pruN 0.9.1+dfsg-1/Build/linux/build.sh 1.2.0+dfsg-2/Build/linux/build.sh
--- 0.9.1+dfsg-1/Build/linux/build.sh	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Build/linux/build.sh	2022-08-01 19:12:00.000000000 +0000
@@ -180,7 +180,7 @@ check_executable() (
 
 install_build() (
     build_type=Release
-    sudo=$(check_executable -p sudo)
+    sudo=$(check_executable -p sudo) || :
     while [ -n "$*" ]; do
         case $(printf %s "$1" | tr '[:upper:]' '[:lower:]') in
         release) build_type="Release" && shift ;;
diff -pruN 0.9.1+dfsg-1/Build/windows/build.bat 1.2.0+dfsg-2/Build/windows/build.bat
--- 0.9.1+dfsg-1/Build/windows/build.bat	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Build/windows/build.bat	2022-08-01 19:12:00.000000000 +0000
@@ -43,6 +43,8 @@ if "%unittest%"=="ON" echo Building unit
 
 if "%vs%"=="2019" (
     cmake ../.. %GENERATOR% -A x64 -DCMAKE_INSTALL_PREFIX=%SYSTEMDRIVE%\svt-encoders -DBUILD_SHARED_LIBS=%shared% -DBUILD_TESTING=%unittest% %cmake_eflags% || exit /b 1
+) else if "%vs%"=="2022" (
+    cmake ../.. %GENERATOR% -A x64 -DCMAKE_INSTALL_PREFIX=%SYSTEMDRIVE%\svt-encoders -DBUILD_SHARED_LIBS=%shared% -DBUILD_TESTING=%unittest% %cmake_eflags% || exit /b 1
 ) else (
     cmake ../.. %GENERATOR% -DCMAKE_INSTALL_PREFIX=%SYSTEMDRIVE%\svt-encoders -DBUILD_SHARED_LIBS=%shared% -DBUILD_TESTING=%unittest% %cmake_eflags% || exit /b 1
 )
@@ -65,6 +67,11 @@ if -%1-==-- (
         rmdir /s /q "%%~i" 1>nul
     )
     exit /b
+) else if /I "%1"=="2022" (
+    echo Generating Visual Studio 2022 solution
+    set "GENERATOR=Visual Studio 17 2022"
+    set vs=2022
+    shift
 ) else if /I "%1"=="2019" (
     echo Generating Visual Studio 2019 solution
     set "GENERATOR=Visual Studio 16 2019"
@@ -160,6 +167,6 @@ goto :args
 
 :help
     echo Batch file to build SVT-AV1 on Windows
-    echo Usage: build.bat [2019^|2017^|2015^|clean] [release^|debug] [nobuild] [test] [shared^|static] [c-only] [avx512]
+    echo Usage: build.bat [2022^|2019^|2017^|2015^|clean] [release^|debug] [nobuild] [test] [shared^|static] [c-only] [avx512]
     exit /b 1
 goto :EOF
diff -pruN 0.9.1+dfsg-1/CHANGELOG.md 1.2.0+dfsg-2/CHANGELOG.md
--- 0.9.1+dfsg-1/CHANGELOG.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/CHANGELOG.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,5 +1,72 @@
 # Changelog
 
+## [1.2.0] - 2022-08-02
+
+Encoder
+- Improve CRF preset tradeoffs for both the default and fast-decode modes
+- Improve the SSIM-based tradeoffs of the presets without impacting those of PSNR / VMAF
+- Improve CBR mode by enhancing the bit-distribution within the gop
+- Added support for reference frame scaling
+- Added support for quantization matrices
+- Added svtparams patches applicable to ffmpeg 4.4
+- AVX2 optimizations for low-delay mode
+- TPL-based VBR mode improvements
+- Improved Chroma RDOQ
+- Improve TPL QP Scaling
+- Add length info to ivf header
+- Fix support for metadata pass-through
+- Add ability to specify Chroma and Luma qindex offsets independently on top of CRF qp assignments
+
+Build, Cleanup and Documentation
+- Fix multiple API documentation mismatches
+- Updated features documentation
+- Various functional bug fixes
+
+## [1.1.0] - 2022-05-17
+
+Encoder
+- TPL tradeoff optimizations for 4L pred structure
+- Quality-vs-cycles tradeoff improvements across all presets
+- Add ability to force key_frame positions through ffmpeg for CRF mode
+- Minimize the quality impact of fast-decode while maintaining the decoder speedup
+- AVX2 optimizations for low delay mode
+- Fix VQ issues #1896 #1857 and #1819
+
+Build, Cleanup and Documentation
+- API / ABI cleanup and implement independent versioning
+- Add UEB_DLL for static linking with pkgconf
+- Update system requirements docs
+- Rate control code refactoring
+- Fix AVX512 vs AVX2 mismatch
+
+## [1.0.0] - 2022-04-22
+
+Encoder
+- Added S-frames support
+- CBR Rate control mode for low delay
+- Added support for chroma position signalling
+- Added support for skipping denoising pictures after film grain synthesis
+- Extend fast-decode support to cover presets M0-M10
+- Simplified --fast-decode to have only one level
+- Optimized --fast-decode level 1 for better tradeoffs
+- Visual quality improvements addressing issues #1819 / #1297
+- Visual quality fixes and improvements for both tune 0 and 1
+- Quality vs density tradeoffs tuning across all presets in CRF mode with TPL improvements
+- Update default settings to use a longer gop / higher quality preset and lower CRF value
+- Various code cleanups and memory optimizations
+- Additional AVX2 optimizations
+- Fixed all known functional bugs
+- More robust rate control parameter verification
+
+Build and Documentation
+- Major documentation update and re-structure
+- Added more user guides, preset guides and common questions section
+- Improve CI coverage
+- Reduced unnecessary warnings
+- Improved the documentation of the configuration parameters
+- Improve Unit Test Coverage
+- Address C vs asm mismatches
+
 ## [0.9.1] - 2022-02-23
 
 Encoder
diff -pruN 0.9.1+dfsg-1/CMakeLists.txt 1.2.0+dfsg-2/CMakeLists.txt
--- 0.9.1+dfsg-1/CMakeLists.txt	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/CMakeLists.txt	2022-08-01 19:12:00.000000000 +0000
@@ -16,7 +16,7 @@ if("${CMAKE_CURRENT_SOURCE_DIR}" STREQUA
                     "Please use the Build folder or create your own.")
 endif()
 
-project(svt-av1 VERSION 0.9.1
+project(svt-av1 VERSION 1.2.0
     LANGUAGES C CXX)
 
 if(POLICY CMP0063)
@@ -25,6 +25,9 @@ endif()
 if(POLICY CMP0069)
     cmake_policy(SET CMP0069 NEW)
 endif()
+if(POLICY CMP0077)
+    cmake_policy(SET CMP0077 NEW)
+endif()
 
 
 set(CMAKE_C_VISIBILITY_PRESET hidden)
@@ -42,9 +45,9 @@ option(COMPILE_C_ONLY "Compile only C co
 include(CheckCSourceCompiles)
 
 check_c_source_compiles("
-#if defined(_M_IX86) || defined(_M_X64) || defined(__i386__) || defined(__x86_64__)
+#if defined(_M_X64) || defined(__x86_64__)
 #else
-#error \"Non-x86\"
+#error \"Non-x64\"
 #endif
 int main(void) {}
 " HAVE_X86_PLATFORM)
@@ -205,7 +208,7 @@ if(UNIX)
         set(CMAKE_C_ARCHIVE_FINISH   "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
         set(CMAKE_CXX_ARCHIVE_FINISH "<CMAKE_RANLIB> -no_warning_for_no_symbols -c <TARGET>")
     else()
-        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -pie -z noexecstack -z relro -z now")
+        set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -z noexecstack -z relro -z now")
     endif()
 endif()
 
@@ -216,9 +219,7 @@ function(check_flag lang flag)
     string(REGEX REPLACE "[^A-Za-z0-9]" "_" flag_var "${flag}")
     if(NOT DEFINED ${lang}_FLAG${flag_var})
         execute_process(COMMAND ${CMAKE_COMMAND} -E echo_append "-- Checking ${lang} flag support for: [${flag}] - ")
-        if(NOT MSVC AND CLANG)
-            set(STORE_CMAKE_C_FLAGS "${CMAKE_C_FLAGS}")
-            set(STORE_CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS}")
+        if(CMAKE_C_COMPILER_ID MATCHES "Clang")
             set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Werror=unused-command-line-argument")
             set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror=unused-command-line-argument")
         endif()
@@ -227,10 +228,6 @@ function(check_flag lang flag)
         else()
             check_c_compiler_flag("${flag}" "${lang}_FLAG${flag_var}")
         endif()
-        if(NOT MSVC AND CLANG)
-            set(CMAKE_C_FLAGS "${STORE_CMAKE_C_FLAGS}")
-            set(CMAKE_CXX_FLAGS "${STORE_CMAKE_CXX_FLAGS}")
-        endif()
         if(${lang}_FLAG${flag_var})
             execute_process(COMMAND ${CMAKE_COMMAND} -E echo "Yes")
         else()
@@ -446,7 +443,8 @@ elseif(SANITIZER STREQUAL "undefined")
 elseif(SANITIZER STREQUAL "integer")
     add_clike_and_ld_flags(-fsanitize=integer)
 elseif(SANITIZER)
-    message(FATAL_ERROR "Unknown sanitizer: ${SANITIZER}")
+    message(WARNING "Unknown sanitizer: ${SANITIZER}, going to try adding it anyway")
+    add_clike_and_ld_flags(-fsanitize=${SANITIZER})
 endif()
 
 if(SANITIZER)
diff -pruN 0.9.1+dfsg-1/debian/changelog 1.2.0+dfsg-2/debian/changelog
--- 0.9.1+dfsg-1/debian/changelog	2022-03-04 14:01:26.000000000 +0000
+++ 1.2.0+dfsg-2/debian/changelog	2022-08-04 12:25:54.000000000 +0000
@@ -1,3 +1,25 @@
+svt-av1 (1.2.0+dfsg-2) unstable; urgency=medium
+
+  * Upload to unstable.
+
+ -- Dylan Aïssi <daissi@debian.org>  Thu, 04 Aug 2022 14:25:54 +0200
+
+svt-av1 (1.2.0+dfsg-1) experimental; urgency=medium
+
+  * New upstream release
+  * Standards-Version: 4.6.1 (no changes needed)
+
+ -- Dylan Aïssi <daissi@debian.org>  Wed, 03 Aug 2022 22:16:24 +0200
+
+svt-av1 (1.1.0+dfsg-1) experimental; urgency=medium
+
+  * New upstream release (Closes: #1015304)
+  * Bump SONAME to libsvtav1enc1
+  * Improve package description of libsvtav1-dev (Closes: #1015257)
+  * Update debian/copyright
+
+ -- Dylan Aïssi <daissi@debian.org>  Thu, 28 Jul 2022 22:46:56 +0200
+
 svt-av1 (0.9.1+dfsg-1) unstable; urgency=medium
 
   * New upstream release
diff -pruN 0.9.1+dfsg-1/debian/control 1.2.0+dfsg-2/debian/control
--- 0.9.1+dfsg-1/debian/control	2022-03-04 14:01:26.000000000 +0000
+++ 1.2.0+dfsg-2/debian/control	2022-08-04 12:25:54.000000000 +0000
@@ -6,7 +6,7 @@ Uploaders: Dylan Aïssi <daissi@debian.o
 Build-Depends: debhelper-compat (= 13),
                cmake,
                yasm
-Standards-Version: 4.6.0
+Standards-Version: 4.6.1
 Rules-Requires-Root: no
 Homepage: https://gitlab.com/AOMediaCodec/SVT-AV1
 Vcs-Browser: https://salsa.debian.org/multimedia-team/svt-av1
@@ -54,7 +54,7 @@ Description: Scalable Video Technology f
  .
  This package provides the development files for libsvtav1dec.
 
-Package: libsvtav1enc0
+Package: libsvtav1enc1
 Architecture: any
 Multi-Arch: same
 Section: libs
@@ -74,7 +74,7 @@ Architecture: any
 Multi-Arch: same
 Section: libdevel
 Depends: libsvtav1-dev (= ${source:Version}),
-         libsvtav1enc0 (= ${binary:Version}),
+         libsvtav1enc1 (= ${binary:Version}),
          ${misc:Depends}
 Description: Scalable Video Technology for AV1 (libsvtav1enc development files)
  The Scalable Video Technology for AV1 (SVT-AV1 Encoder and Decoder) is an
@@ -97,4 +97,5 @@ Description: Scalable Video Technology f
  Live encoding / transcoding video applications. The SVT-AV1 decoder
  implementation is targeting future codec research activities.
  .
- This package provides the development files for libsvtav1dec and libsvtav1enc.
+ This package provides the header files shared between libsvtav1enc-dev
+ and libsvtav1dec-dev.
diff -pruN 0.9.1+dfsg-1/debian/copyright 1.2.0+dfsg-2/debian/copyright
--- 0.9.1+dfsg-1/debian/copyright	2022-03-04 14:01:26.000000000 +0000
+++ 1.2.0+dfsg-2/debian/copyright	2022-08-04 12:25:54.000000000 +0000
@@ -117,13 +117,13 @@ Comment:
 
 Files: *
 Copyright: 2016-2021 Alliance for Open Media
-           2018-2021 Intel Corporation
+           2018-2022 Intel Corporation
            2020 Tencent Corporation
            2019 Netflix, Inc.
 License: BSD-3-Clause-Clear
 
 Files: gstreamer-plugin/gstsvtav1enc.*
-Copyright: 2019 Intel Corporation
+Copyright: 2019-2022 Intel Corporation
 License: LGPL-2.1+
 
 Files: Source/Lib/Common/ASM_SSE2/x86inc.asm
@@ -151,7 +151,7 @@ Copyright: 2007-2013 by Cisco Systems, I
 License: Expat
 
 Files: debian/*
-Copyright: 2021 Collabora, Ltd.
+Copyright: 2021-2022 Collabora, Ltd.
 License: BSD-2-clause
 
 License: BSD-2-clause
diff -pruN 0.9.1+dfsg-1/debian/libsvtav1enc0.install 1.2.0+dfsg-2/debian/libsvtav1enc0.install
--- 0.9.1+dfsg-1/debian/libsvtav1enc0.install	2022-03-04 14:01:26.000000000 +0000
+++ 1.2.0+dfsg-2/debian/libsvtav1enc0.install	1970-01-01 00:00:00.000000000 +0000
@@ -1 +0,0 @@
-usr/lib/*/libSvtAv1Enc.so.*
diff -pruN 0.9.1+dfsg-1/debian/libsvtav1enc1.install 1.2.0+dfsg-2/debian/libsvtav1enc1.install
--- 0.9.1+dfsg-1/debian/libsvtav1enc1.install	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/debian/libsvtav1enc1.install	2022-08-04 12:25:54.000000000 +0000
@@ -0,0 +1 @@
+usr/lib/*/libSvtAv1Enc.so.*
diff -pruN 0.9.1+dfsg-1/debian/watch 1.2.0+dfsg-2/debian/watch
--- 0.9.1+dfsg-1/debian/watch	2022-03-04 14:01:26.000000000 +0000
+++ 1.2.0+dfsg-2/debian/watch	2022-08-04 12:25:54.000000000 +0000
@@ -1,3 +1,3 @@
 version=4
-opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,repack,compression=xz" \
+opts="repacksuffix=+dfsg,dversionmangle=s/\+dfsg//g,uversionmangle=s/(\d)[_\.\-\+]?((RC|rc|pre|dev|beta|alpha)\d*)$/$1~$2/,repack,compression=xz" \
  https://gitlab.com/AOMediaCodec/SVT-AV1/tags?sort=updated_desc archive/v@ANY_VERSION@/SVT-AV1-v\d\S*@ARCHIVE_EXT@
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Adaptive-Prediction-Structure.md 1.2.0+dfsg-2/Docs/Appendix-Adaptive-Prediction-Structure.md
--- 0.9.1+dfsg-1/Docs/Appendix-Adaptive-Prediction-Structure.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Adaptive-Prediction-Structure.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,13 +1,22 @@
+[Top level](../README.md)
+
 # Shot-based Adaptive Prediction Structure
 
 ## 1. Description of the algorithm
 
-In shot-based encoding, a video sequence is split into relatively short segments that are typically few seconds long and that have uniform spatio-temporal
-characteristics. Such segments are referred to as shots. Each segment is encoded on its own using a closed GoP configuration with only one key frame at the
-beginning of the shot. When shots are encoded using a multi-pass encoding approach, data from earlier passes are used to make decisions about the encoding
-settings for subsequent passes. An example of such decisions is the prediction structure to consider in subsequent passes. In SVT-AV1, a the shot-based
-adaptive prediction structure algorithm makes use of information received from the first encoding pass to decide on a suitable prediction structure for the
-shot being encoded, as illustrated in Figure 1. Moreover, the first encoding pass is configured to use an IPP flat prediction structure.
+In shot-based encoding, a video sequence is split into relatively short
+segments that are typically few seconds long and that have uniform
+spatio-temporal characteristics. Such segments are referred to as shots. Each
+segment is encoded on its own using a closed GoP configuration with only one
+key frame at the beginning of the shot. When shots are encoded using a
+multi-pass encoding approach, data from earlier passes are used to make
+decisions about the encoding settings for subsequent passes. An example of such
+decisions is the prediction structure to consider in subsequent passes. In
+SVT-AV1, the shot-based adaptive prediction structure algorithm makes use of
+information received from the first encoding pass to decide on a suitable
+prediction structure for the shot being encoded, as illustrated in Figure 1.
+Moreover, the first encoding pass is configured to use an IPP flat prediction
+structure.
 
 ![adaptive_fig1](./img/adaptive_fig1.png)
 
@@ -23,8 +32,9 @@ shot being encoded, as illustrated in Fi
 
 ### 2.2. Adaptive Prediction Structure API
 
-Table 1 below summarises the invoked functions when Adaptive Prediction Structure is enabled. The process where each function is called is also indicated as
-well as a brief description of each function.
+Table 1 below summarises the invoked functions when Adaptive Prediction
+Structure is enabled. The process where each function is called is also
+indicated as well as a brief description of each function.
 
 ##### Table 1. Main function calls associated with the Adaptive Prediction Structure algorithm.
 
@@ -35,8 +45,10 @@ well as a brief description of each func
 
 ### 2.3 Control flags:
 
-The Adaptive Prediction Structure algorithm is a threshold-based algorithm. All used thresholds are grouped under one control function called
-```set_mini_gop_size_controls```. Table 2 below summarizes the control parameters for this feature.
+The Adaptive Prediction Structure algorithm is a threshold-based algorithm. All
+used thresholds are grouped under one control function called
+```set_mini_gop_size_controls```. Table 2 below summarizes the control
+parameters for this feature.
 
 ##### Table 2. Control flags associated with the Adaptive Prediction Structure algorithm.
 
@@ -54,27 +66,31 @@ The Adaptive Prediction Structure algori
 
 ### Details of the implementation
 
-The adaptive prediction structure is invoked at the entry point of the final encoding pass as depicted in Figure 2.
+The adaptive prediction structure is invoked at the entry point of the final
+encoding pass as depicted in Figure 2.
 
 ![adaptive_fig2](./img/adaptive_fig2.png)
 
 ##### Figure 2. Diagram showing where the Adaptive Prediction Structure algorithm is used in the encoder pipeline for the shot-based final encoding pass.
 
-The Adaptive Prediction Structure algorithm aims at selecting the maximum mini-GoP-size based on the average collected statistics (generated by the IPP pass)
-over all input frames in the shot. The algorithm consists of the following steps:
+The Adaptive Prediction Structure algorithm aims at selecting the maximum
+mini-GoP-size based on the average collected statistics (generated by the IPP
+pass) over all input frames in the shot. The algorithm consists of the
+following steps:
 
 1.  Run the IPP pass over the whole shot and collect statistics for each frame.
 2.  Generate average statistics over the whole shot.
 3.  Select the best mini-GoP size based on the collected statistics.
 
-The algorithm tends to select a large mini-GoP size (32 frames) for low motion clips,  a small mini-GoP size (8 frames) for high motion clips and the default
+The algorithm tends to select a large mini-GoP size (32 frames) for low motion
+clips, a small mini-GoP size (8 frames) for high motion clips and the default
 mini-gop-size (16 frames) otherwise.
 
 The selection of the mini-gop-size is performed as follows:
 
 ```
 if ((low_motion_clip > lm_th) && !avoid_long_gop) then max-min_gop_size = 32
-elseif  (low_motion_clip > hm_th) then max-min_gop_size = 16
+elseif (low_motion_clip > hm_th) then max-min_gop_size = 16
 else max-min_gop_size = 8
 ```
 
@@ -84,22 +100,28 @@ where:
    low_motion_clip = (pcnt_inter - pcnt_motion) / (number_of_frames)
    ```
    where:
-   - `pcnt_inter` is the sum over all frames in the shot of the frame-based area percentage of blocks with inter prediction error smaller than intra
-      prediction error.
-   - `pcnt_motion` is the sum over all frames in the shot of the frame-based area percentage of blocks with inter prediction error smaller than intra prediction error and with non-zero motion vectors.
+   - `pcnt_inter` is the sum over all frames in the shot of the frame-based area percentage
+      of blocks with inter prediction error smaller than intra prediction error.
+   - `pcnt_motion` is the sum over all frames in the shot of the frame-based area percentage
+      of blocks with inter prediction error smaller than intra prediction error and with non-zero motion vectors.
       The prediction errors are generated in the IPP pass.
    - `lm_th` and `hm_th` are defined in Table 2 above.
-   - `avoid_long_gop` is a flag to indicate corner cases where a large mini-gop-size may hurt the quality although it might be a very low motion shot as
-      is the case with short shots or animation shots.
+   - `avoid_long_gop` is a flag to indicate corner cases where a large mini-gop-size may hurt
+     the quality although it might be a very low motion shot as is the case with short shots
+     or animation shots.
 
 ## 3. Optimization of the algorithm
 
 The Adaptive Prediction Structure is enabled by default only in the case of multi-pass encoding
-(i.e.  when the `TWO_PASS_IPP_FINAL` multi-pass encoding mode is used).
+(i.e., when the `TWO_PASS_IPP_FINAL` multi-pass encoding mode is used).
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Alt-Refs.md 1.2.0+dfsg-2/Docs/Appendix-Alt-Refs.md
--- 0.9.1+dfsg-1/Docs/Appendix-Alt-Refs.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Alt-Refs.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,40 +1,54 @@
+[Top level](../README.md)
+
 # ALTREF and Overlay Pictures
 
 ## 1. ALTREF pictures
 
 ### Introduction
 
-ALTREFs are non-displayable pictures that are used as reference for other pictures. They are usually constructed using several source frames
-but can hold any type of information useful for compression and the given use-case. In the current version of SVT-AV1, temporal filtering of
-adjacent video frames is used to construct some of the ALTREF pictures. The resulting temporally filtered pictures will be encoded in place
-of or in addition to the original source pictures. This methodology is especially useful for source pictures that contain a high level
-of noise since the temporal filtering process produces reference pictures with reduced noise level.
-
-Temporal filtering is currently applied to the base layer picture and can also be applied to layer 1 pictures of each mini-GOP
-(e.g. source frame positions 16 and 8 respectively in a mini-GOP in a 5-layer hierarchical prediction structure).
-In addition, filtering of the key-frames and intra-only frames is also supported.
-
-The diagram in Figure 1 illustrates the use of five adjacent pictures: Two past, two future and one central picture, in order to produce a
-single filtered picture. Motion estimation is applied between the central picture and each future or past pictures generating multiple
-motion-compensated predictions.
-These are then combined using adaptive weighting (filtering) to produce the final noise-reduced picture.
+ALTREFs are non-displayable pictures that are used as reference for other
+pictures. They are usually constructed using several source frames but can hold
+any type of information useful for compression and the given use-case. In the
+current version of SVT-AV1, temporal filtering of adjacent video frames is used
+to construct some of the ALTREF pictures. The resulting temporally filtered
+pictures will be encoded in place of or in addition to the original source
+pictures. This methodology is especially useful for source pictures that
+contain a high level of noise since the temporal filtering process produces
+reference pictures with reduced noise level.
+
+Temporal filtering is currently applied to the base layer picture and can also
+be applied to layer 1 pictures of each mini-GOP (e.g. source frame positions 16
+and 8 respectively in a mini-GOP in a 5-layer hierarchical prediction
+structure). In addition, filtering of the key-frames and intra-only frames is
+also supported.
+
+The diagram in Figure 1 illustrates the use of five adjacent pictures: Two
+past, two future and one central picture, in order to produce a single filtered
+picture. Motion estimation is applied between the central picture and each
+future or past pictures generating multiple motion-compensated predictions.
+These are then combined using adaptive weighting (filtering) to produce the
+final noise-reduced picture.
 
 ![altref_fig1](./img/altref_fig1.png)
 
 ##### Fig. 1. Example of motion estimation for temporal filtering in a temporal window consisting of 5 adjacent pictures
 
-Since temporal filtering makes use of a number of adjacent frames, the Look Ahead Distance (lad_mg_pictures) needs to be incremented by the
-number of future frames used for ALTREF temporal filtering. When applying temporal filtering to ALTREF pictures, an Overlay picture might be
-necessary. This Overlay picture corresponds to the same original source picture but uses only the temporally filtered version of the source
-picture as a reference to reconstruct the original picture.
+Since temporal filtering makes use of a number of adjacent frames, the Look
+Ahead Distance (lad_mg_pictures) needs to be incremented by the number of
+future frames used for ALTREF temporal filtering. When applying temporal
+filtering to ALTREF pictures, an Overlay picture might be necessary. This
+Overlay picture corresponds to the same original source picture but uses only
+the temporally filtered version of the source picture as a reference to
+reconstruct the original picture.
 
 ### Description of the temporal filtering control
 
-Various signals are used to specify the temporal filtering settings and are described in Table 1 below. The settings could be different based
-on the frame type; however, the same set of signals is used for all frame types. The temporal filtering flow diagram in Figure 2 below further
-explains how and where each of the defined signals is used.
-These parameters are decided as a function of the encoder preset (enc_mode).
-
+Various signals are used to specify the temporal filtering settings and are
+described in Table 1 below. The settings could be different based on the frame
+type; however, the same set of signals is used for all frame types. The
+temporal filtering flow diagram in Figure 2 below further explains how and
+where each of the defined signals is used. These parameters are decided as a
+function of the encoder preset (enc_mode).
 
 |**Category**|**Signal(s)**|**Description**|
 | --- | --- | --- |
@@ -48,7 +62,7 @@ These parameters are decided as a functi
 |**Number of reference frame(s)**|noise_adjust_past_pics|Specifies whether num_past_pics will be incremented or not based on the noise level of the central frame (0: OFF or 1: ON).|
 |**Number of reference frame(s)**|noise_adjust_future_pics|Specifies whether num_future_pics will be incremented or not based on the noise level of the central frame (0: OFF or 1: ON).|
 |**Number of reference frame(s)**|use_intra_for_noise_est|Specifies whether to use the key-frame noise level for all inputs or to re-compute the noise level for each input.|
-|**Number of reference frame(s)**|activity_adjust_th|Specifies whether `num_past_pics`  and `num_future_pics` will be decremented or not based on the activity of the outer reference frame(s) compared to the central frame (∞: OFF, else remove the reference frame if the cumulative differences between the histogram bins of the central frame and the histogram bins of the reference frame is higher than `activity_adjust_th`.|
+|**Number of reference frame(s)**|activity_adjust_th|Specifies whether `num_past_pics` and `num_future_pics` will be decremented or not based on the activity of the outer reference frame(s) compared to the central frame (∞: OFF, else remove the reference frame if the cumulative differences between the histogram bins of the central frame and the histogram bins of the reference frame is higher than `activity_adjust_th`.|
 |**Number of reference frame(s)**|max_num_past_pics|Specifies the maximum number of frame(s) from past (after all adjustments).|
 |**Number of reference frame(s)**|max_num_past_pics|Specifies the maximum number of frame(s) from future (after all adjustments).|
 |**Motion search**|hme_me_level|Specifies the accuracy of the ME search (note that ME performs a HME search, then a Full-Pel search).|
@@ -76,82 +90,116 @@ The block diagram in Figure 2 outlines t
 
 ### Source picture noise estimation
 
-In order to decide temporal window length according to the content characteristics, the amount of noise is estimated from the central source
-picture. The algorithm considered is based on a simplification of the algorithm proposed in [1]. The standard deviation (sigma) of the noise
-is estimated using the Laplacian operator. Pixels that belong to an edge (i.e. as determined by how the magnitude of the Sobel gradients
-compare to a predetermined threshold), are not considered in the computation. The current noise estimation considers only the luma component.
-When `use_intra_for_noise_est` is set to 1, the noise level of the I-frame will be used for ALTREF_FRAME or ALTREF2_FRAME.
+In order to decide temporal window length according to the content
+characteristics, the amount of noise is estimated from the central source
+picture. The algorithm considered is based on a simplification of the algorithm
+proposed in [1]. The standard deviation (sigma) of the noise is estimated using
+the Laplacian operator. Pixels that belong to an edge (i.e. as determined by
+how the magnitude of the Sobel gradients compare to a predetermined threshold),
+are not considered in the computation. The current noise estimation considers
+only the luma component. When `use_intra_for_noise_est` is set to 1, the noise
+level of the I-frame will be used for ALTREF_FRAME or ALTREF2_FRAME.
 
 ### Building the list of source pictures
 
-As mentioned previously, the temporal filtering algorithm uses multiple frames to generate a temporally
-denoised or filtered picture at the central picture location. If enough pictures are available in the list of source picture buffers,
-the number of pictures used will generally be given by the num_past_pics  and num_future_pics in addition to the central picture, unless not
-enough frames are available (e.g. end of sequence).
-
-The number of pictures will be first increased based on the noise level of the central picture. Basically, the lower the noise of the central
-picture, the widerthe temporal window (+3 on each side if noise <0.5, +2 on each side if noise < 1.0, and +1 on each if noise < 2.0).
-Both sides of the window could be adjusted or just one side depending on noise_adjust_past_pics and noise_adjust_future_pics.
-
-In order to account for illumination changes, which might compromise the quality of the temporally filtered picture, an adjustment of both
-`num_past_pics`  and `num_future_pics` is conducted to remove cases where a significant illumination change is found in the defined temporal
-window. This algorithm first computes and accumulates the absolute difference between the luminance histograms of adjacent pictures in the
-temporal window, starting from the first past picture to the last past picture and from the first future picture to the last future picture.
-Then, depending on a threshold, ahd_th, if the cumulative difference is high enough, edge pictures will be removed. The current threshold is
-chosen based on the picture width and height:ahd_th = (width * height) * activity_adjust_th / 100
-
-After this step, the list of pictures to use for the temporal filtering is ready. However, given that the number of past and future frames
-can be different, the index of the central picture needs to be known.
+As mentioned previously, the temporal filtering algorithm uses multiple frames
+to generate a temporally denoised or filtered picture at the central picture
+location. If enough pictures are available in the list of source picture
+buffers, the number of pictures used will generally be given by the
+num_past_pics and num_future_pics in addition to the central picture, unless
+not enough frames are available (e.g. end of sequence).
+
+The number of pictures will be first increased based on the noise level of the
+central picture. Basically, the lower the noise of the central picture, the
+widerthe temporal window (+3 on each side if noise <0.5, +2 on each side if
+noise < 1.0, and +1 on each if noise < 2.0). Both sides of the window could be
+adjusted or just one side depending on noise_adjust_past_pics and
+noise_adjust_future_pics.
+
+In order to account for illumination changes, which might compromise the
+quality of the temporally filtered picture, an adjustment of both
+`num_past_pics` and `num_future_pics` is conducted to remove cases where a
+significant illumination change is found in the defined temporal window. This
+algorithm first computes and accumulates the absolute difference between the
+luminance histograms of adjacent pictures in the temporal window, starting from
+the first past picture to the last past picture and from the first future
+picture to the last future picture. Then, depending on a threshold, ahd_th, if
+the cumulative difference is high enough, edge pictures will be removed. The
+current threshold is chosen based on the picture width and height:ahd_th =
+(width * height) * activity_adjust_th / 100
+
+After this step, the list of pictures to use for the temporal filtering is
+ready. However, given that the number of past and future frames can be
+different, the index of the central picture needs to be known.
 
 ### Block-based processing
 
-The central picture is split into 64x64 pixel non-overlapping blocks. For each block, (num_past_pics + num_future_pics )–,
-motion-compensated predictions will be determined from the adjacent frames and weighted in order to generate a final filtered block.
-All blocks are then combined to build the final filtered picture.
+The central picture is split into 64x64 pixel non-overlapping blocks. For each
+block, (num_past_pics + num_future_pics )–, motion-compensated predictions will
+be determined from the adjacent frames and weighted in order to generate a
+final filtered block. All blocks are then combined to build the final filtered
+picture.
 
 ### Block-based motion search and compensation
 
-The motion search consists of three steps: (1) Hierarchical Motion Estimation (HME), (2) Full-Pel search, and (3) Sub-Pel search,
-and performed for only the Luma plane.
-
-HME is performed for each single 64x64-block, while Full-Pel search is performed for the 85 square blocks between 8x8 and 64x64,
-and the Sub-Pel search (using regular or bilinear as filter type depending on use_2tap) is performed for only the 4 32x32-blocks and the
-16 16x16-blocks.
-
-After obtaining the motion information, an inter-depth decision between the 4 32x32-blocks and the 16 16x16-blocks is performed towards a
-final partitioning for the 64x64. The latter will be considered at the final compensation (using sharp as filter type and for all planes).
-
-However, if the 64x64 distortion after HME is less than tf_me_exit_th, then the Full_Pel search is bypassed and Sub-Pel search/final
-compensation is performed for only the 64x64.
-
-Also, Sub-Pel search/final compensation is performed for only 64x64 blocks, if the deviation between the 64x64 ME distortion and the 4 32x32
-ME distortions (after the Full-Pel search) is less than use_pred_64x64_only_th.
+The motion search consists of three steps: (1) Hierarchical Motion Estimation
+(HME), (2) Full-Pel search, and (3) Sub-Pel search, and performed for only the
+Luma plane.
+
+HME is performed for each single 64x64-block, while Full-Pel search is
+performed for the 85 square blocks between 8x8 and 64x64, and the Sub-Pel
+search (using regular or bilinear as filter type depending on use_2tap) is
+performed for only the 4 32x32-blocks and the 16 16x16-blocks.
+
+After obtaining the motion information, an inter-depth decision between the 4
+32x32-blocks and the 16 16x16-blocks is performed towards a final partitioning
+for the 64x64. The latter will be considered at the final compensation (using
+sharp as filter type and for all planes).
+
+However, if the 64x64 distortion after HME is less than tf_me_exit_th, then the
+Full_Pel search is bypassed and Sub-Pel search/final compensation is performed
+for only the 64x64.
+
+Also, Sub-Pel search/final compensation is performed for only 64x64 blocks, if
+the deviation between the 64x64 ME distortion and the 4 32x32 ME distortions
+(after the Full-Pel search) is less than use_pred_64x64_only_th.
 
 
 ### Compute the Decay Factor
 
-The decay factor (`tf_decay_factor`) is derived per block/per component and will be used at the sample-based filtering operations.
+The decay factor (`tf_decay_factor`) is derived per block/per component and
+will be used at the sample-based filtering operations.
 
 ```tf_decay_factor = 2 * n_decay * n_decay * q_decay * s_decay```
 
-The noise-decay (`n_decay`) is mainly an increasing function of the input noise level, but is also adjusted depending on the filtering method
-(`use_fast_filter`), the input resolution, and the input QP; where a higher noise level implies a larger n_decay value and a stronger
-filtering. The computations of `n_decay` are simplified when `use_fixed_point` or `use_fast_filter` is set to 1.
-
-The QP-decay (`q_decay`) is an increasing function of the input QP. For a high QP, the quantization leads to a higher loss of information,
-and thus a stronger filtering is less likely to distort the encoded quality, while a stronger filtering could reduce bit rates. For a low QP,
-more details are expected to be retained. Filtering is thus more conservative.
+The noise-decay (`n_decay`) is mainly an increasing function of the input noise
+level, but is also adjusted depending on the filtering method
+(`use_fast_filter`), the input resolution, and the input QP; where a higher
+noise level implies a larger n_decay value and a stronger filtering. The
+computations of `n_decay` are simplified when `use_fixed_point` or
+`use_fast_filter` is set to 1.
+
+The QP-decay (`q_decay`) is an increasing function of the input QP. For a high
+QP, the quantization leads to a higher loss of information, and thus a stronger
+filtering is less likely to distort the encoded quality, while a stronger
+filtering could reduce bit rates. For a low QP, more details are expected to be
+retained. Filtering is thus more conservative.
 
-The strength decay (`s_decay`) is a function of the filtering strength that is set in the code.
+The strength decay (`s_decay`) is a function of the filtering strength that is
+set in the code.
 
 ### Temporal filtering of the co-located motion compensated blocks
 
-After multiplying each pixel of the co-located 64x64 blocks by the respective weight, the blocks are then added and normalized to produce the
-final output filtered block. These are then combined with the rest of the blocks in the frame to produce the final temporally filtered picture.
-
-The process of generating one filtered block is illustrated in diagram of Figure 3. In this example, only 3 pictures are used for the temporal
-filtering (`num_past_pics = 1` and `num_future_pics = 1`). Moreover, the values of the filter weights are used for illustration purposes only
-and are in the range {0,32}.
+After multiplying each pixel of the co-located 64x64 blocks by the respective
+weight, the blocks are then added and normalized to produce the final output
+filtered block. These are then combined with the rest of the blocks in the
+frame to produce the final temporally filtered picture.
+
+The process of generating one filtered block is illustrated in diagram of
+Figure 3. In this example, only 3 pictures are used for the temporal filtering
+(`num_past_pics = 1` and `num_future_pics = 1`). Moreover, the values of the
+filter weights are used for illustration purposes only and are in the range
+{0,32}.
 
 ![altref_fig2](./img/altref_fig2.png)
 
@@ -159,15 +207,17 @@ and are in the range {0,32}.
 
 ## 2. Implementation of the algorithm
 
-**Inputs**: list of picture buffer pointers to use for filtering, location of central picture, initial filtering strength
+**Inputs**: list of picture buffer pointers to use for filtering, location of
+central picture, initial filtering strength
 
-**Outputs**: the resulting temporally filtered picture, which replaces the location of the central pictures in the source buffer.
-The original source picture is stored in an additional buffer.
+**Outputs**: the resulting temporally filtered picture, which replaces the
+location of the central pictures in the source buffer. The original source
+picture is stored in an additional buffer.
 
 **Control flags**:
 
 #### Table 2: Control signals/flags for the ALTREF frames feature.
-| **Flag**         | **Level** |
+| **Flag**         | **Level**     |
 | ---------------- | ------------- |
 | tf-controls      | Sequence      |
 | enable-overlays  | Sequence      |
@@ -204,10 +254,12 @@ consumed by the HME process.
 
 ### Memory allocation
 
-Three uint8_t or uint16_t buffers of size 64x64x3 are allocated: the accumulator, predictor and counter. In addition, an extra picture buffer
-(or two in case of high bit-depth content) is allocated to store the original source. Finally, a temporary buffer is allocated for high-bit
-depth sources, due to the way high bit-depth sources are stored in the encoder implementation (see sub-section on high bit-depth
-considerations).
+Three uint8_t or uint16_t buffers of size 64x64x3 are allocated: the
+accumulator, predictor and counter. In addition, an extra picture buffer (or
+two in case of high bit-depth content) is allocated to store the original
+source. Finally, a temporary buffer is allocated for high-bit depth sources,
+due to the way high bit-depth sources are stored in the encoder implementation
+(see sub-section on high bit-depth considerations).
 
 ### High bit-depth considerations
 
@@ -290,18 +342,22 @@ The ALTREF and Overlay picture settings
 
 **Example when picture 16 is ALTREF**:
 
-| **Key Point**         | **ALTREF Picture** | **Overlay Picture**    |
-| ---------------- | ------------- | ------------ |
-| picture_number | 16 | 16 |
-| is_alt_ref | 1 | 0 |
-| is_overlay | 0 | 1 |
-| show_frame | 0 | 1 |
-| slice_type | B_SLICE | P_SLICE |
+| **Key Point**    | **ALTREF Picture** | **Overlay Picture** |
+| ---------------- | -------------      | ------------        |
+| picture_number   | 16                 | 16                  |
+| is_alt_ref       | 1                  | 0                   |
+| is_overlay       | 0                  | 1                   |
+| show_frame       | 0                  | 1                   |
+| slice_type       | B_SLICE            | P_SLICE             |
 
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing  how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-CDEF.md 1.2.0+dfsg-2/Docs/Appendix-CDEF.md
--- 0.9.1+dfsg-1/Docs/Appendix-CDEF.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-CDEF.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,6 +1,8 @@
+[Top level](../README.md)
+
 # Constrained Directional Enhancement Filter (CDEF) Appendix
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 The constrained directional enhancement filter (CDEF) is applied after
 the deblocking filter and aims at improving the reconstructed picture by
@@ -8,20 +10,24 @@ addressing ringing artifacts. CDEF is a
 de-ringing filter from the Daala codec (Mozilla) and the Constrained Low
 Pass Filter (CLPF) from the Thor codec (Cisco).
 
-Filtering is applied on an 8x8 block level, which is a large enough
-block size to reliably detect edges, but small enough block size to
-accurately detect edge directions. Filtering is applied to both luma and
-chroma samples. For a given block, the algorithm consists of the two
-main steps outlined below:
-
-1. Identify the direction **d** of the block (i.e. direction of edges). Eight directions {0,…,7} could be identified.
+Filtering is applied on an 8x8 block level. 8x8 blocks have enough pixels
+that an edge can be accurately detected, which would not be possible for
+4x4 blocks. However, 8x8 blocks don't have so many pixels that the detected
+edge will have more than one direction, which would be much more likely
+to occur for an edge present in a 16x16 block. Filtering is applied to
+both luma and chroma samples. For a given block, the algorithm consists
+of the two main steps outlined below:
+
+1. Identify the direction **d** of the block (i.e. direction of edges). One
+of eight possible directions {0,…,7} can be utilized to identify the direction
+of the block.
 
 2. Filtering
     * Apply a nonlinear filter along the edge in the identified direction. Filter taps are aligned in the direction of the block. The main goal is to address ringing artifacts.
 
     * Filter mildly along a 45 degree direction from the edge.
 
-The two steps are outlined in more detail in the following.
+The two steps are outlined in more detail in the following section.
 
 ***Step 1 – Identification of edge direction***. Eight edge directions
 could be considered. The directions are indexed with d=0,…,7 as
@@ -48,8 +54,8 @@ performed as follows:
     direction.
 
 The example in Figure 2 below illustrates this step for an 8x8 input block. In this
-example, direction 0 results in the smallest error variance and hence
-was selected as the block direction.
+example, direction 0 has the smallest error variance and is selected as the block
+direction.
 
 ![image25](./img/image25.png)
 
@@ -60,12 +66,11 @@ steps, namely a primary filtering operat
 operation. The primary filter acts along the identified block direction.
 The secondary filter acts at ![math](http://latex.codecogs.com/gif.latex?45^o) from the identified
 direction. In the example shown in Figure 3 below, the block direction
-is d=0 ![math](http://latex.codecogs.com/gif.latex?45^o). The sample to be filtered is highlighted in
-red. The samples to be considered when filtering the red sample in
-primary filtering are highlighted in green (a total of four samples).
-Those considered in the secondary filtering of the red sample are
-located at ![math](http://latex.codecogs.com/gif.latex?45^o) angle from the block direction and are
-highlighted in blue (a total of eight samples).
+is d=0, ![math](http://latex.codecogs.com/gif.latex?45^o). The sample to be filtered is highlighted in
+red. During primary filtering of the red sample the four green samples are considered.
+During secondary filtering of the red sample the eight blue samples
+located at a ![math](http://latex.codecogs.com/gif.latex?45^o) angle from the block direction
+are considered.
 
 ![image26](./img/image26.png)
 
@@ -112,7 +117,7 @@ shown in blue.
 ##### Figure 5. Filter weights for secondary filtering.
 
 
-## 2.  Implementation
+## 2. Implementation
 
 **Inputs to cdef\_kernel**: Output frame from the deblocking filter.
 
@@ -126,7 +131,7 @@ Control flags associated with CDEF are l
 
 | **Flag**                        | **Level**      | **Description**                                                                                                            |
 | ------------------------------- | -------------- | -------------------------------------------------------------------------------------------------------------------------- |
-| --enable-cdef                     | Configuration  | CDEF filter control (0:OFF , 1: ON (Default))                           |
+| --enable-cdef                   | Configuration  | CDEF filter control (0:OFF, 1: ON (Default))                                                                               |
 | cdef\_level                     | Sequence       | Indicates whether to use CDEF for the whole sequence.                                                                      |
 | cdef\_level                     | Picture        | Indicates the level of complexity of the CDEF strength search as a function of the encoder mode (enc\_mode).               |
 
@@ -143,25 +148,23 @@ The main steps involved in the implement
 outlined below, followed by more details on some of the important
 functions.
 
-Step 1 - Splitting the frame into segments
-
-- The frame to be filtered is divided into segments to allow for parallel filtering operations on
-  different parts of the frame. The segments are set according to the following
-  (see ```load_default_buffer_configuration_settings``` in ```EbEncHandle.c```)
-  The number of segment rows is set to 1 if ```(luma height/64)<6```, else it set to 6.
-- The number of segment columns is set to 1 if ```(luma width/64)<10```, else it set to 6.
+Step 1: Split the frame to be filtered into segments to allow for parallel filtering operations on
+  different parts of the frame. The segments are set according to the following rules
+  (see ```load_default_buffer_configuration_settings``` in ```EbEncHandle.c```):
+- The number of segment rows is set to 1 if ```(luma height/64)<6```, else it is set to 6.
+- The number of segment columns is set to 1 if ```(luma width/64)<10```, else it is set to 6.
 
 The segments are processed in ```cdef_kernel```. Each segment is split into
 64x64 filter blocks.
 
-Step 2: Perform CDEF search for each segment \[each running on a
-separate thread\]. Each segment goes through a filter search operation
+Step 2: CDEF search is performed for each segment using a separate thread.
+Each segment goes through a filter search operation
 through the function (```cdef_seg_search```). For a given 64x64 filter block
 in a segment, the main purpose of the search is to identify the
 directions of the 8x8 blocks in the filter block, and the best filter
 (Primary strength, Secondary strength) pair to use in filtering the
-filter block. The primary filter strength takes value in {0,…,15},
-whereas the secondary filter strength takes value in {0, 1, 2, 4}. The
+filter block. The primary filter strength can take any value between 0 and 15 inclusive,
+whereas the secondary filter strength can take one of the following values: 0, 1, 2 or 4. The
 (primary strength, secondary strength) pairs are then indexed and
 ordered as indicated in Table 2 below:
 
@@ -178,8 +181,8 @@ ordered as indicated in Table 2 below:
 | …                         | (...,…)                                         |
 | 63                        | (15,4)                                          |
 
-The search for the best (Primary strength, Secondary strength) pair to
-use is equivalent to the search for the index for such pair.
+Searching for the best filter (Primary strength, Secondary strength) pair is equivalent
+to searching for the best filter strength index.
 
 The primary luma damping (```pri_damping```) and secondary luma damping (```sec_damping```)
 values are set as a function of the base qindex for the picture and are given by:
@@ -189,7 +192,7 @@ pri_damping = 3 + (base_qindex/64);
 sec_damping = 3 + (picture_control_set_ptr->parent_pcs_ptr->base_qindex/64);
 ```
 
-Chroma damping values are always one less the luma damping value.
+Chroma damping values are always one less than the luma damping values.
 
 The CDEF search (```cdef_seg_search```) proceeds along the following steps.
 
@@ -202,24 +205,24 @@ The CDEF search (```cdef_seg_search```)
 
     - Perform the following for each 8x8 non-skip block (```cdef_filter_fb```):
 
-      - Perform the following for each 8x8 non-skip block (```cdef_filter_fb```):
-
-        - Find the direction for each 8x8 block (```cdef_find_dir```).
-        - Filter the 8x8 block according to the identified direction using the set filter strength
-          (```cdef_filter_block```, C only version ```cdef_filter_block_c```.
-          More details on ```cdef_filter_block_c``` are provided below.).
+      - Find the direction for each 8x8 block (```cdef_find_dir```).
+      - Filter the 8x8 block according to the identified direction using the set filter strength
+        (```cdef_filter_block```). More details on the C-only version of this
+        function, ```cdef_filter_block_c```, are provided below.
 
 
     - Compute the filtering mse for the filter block corresponding to the filter strength being considered
       (```compute_cdef_dist```).
 
 Step 3: Select a subset of filter strengths to use in the final filtering of the 64x64
-filter blocks in the frame based on the filtering results from step 2
-(```finish_cdef_search```. More details on ```finish_cdef_search``` are provided below.).
-This step is frame-based and is performed by only one thread.
+filter blocks using the results from step 2. The entire frame will be filtered using the subset of filter
+strenghts chosen, therefore before the subset can be chosen all segements must be completed in order to
+collect the filter strength data from all 64x64 filter blocks. This step is frame-based and is performed
+using only one thread (```finish_cdef_search```). More details on ```finish_cdef_search``` are
+provided below.
 
-Step 4: Complete the filtering of the frame based on the selected set of filtering strengths from Step 3.
-(```av1_cdef_frame```. More details on ```av1_cdef_frame``` are provided below.)
+Step 4: Complete the filtering of the frame using the subset of filtering strengths chosen in Step 3.
+(```av1_cdef_frame```). More details on ```av1_cdef_frame``` are provided below
 
 **More details about cdef\_filter\_block\_c**
 
@@ -234,16 +237,20 @@ The primary and secondary filter coeffic
 
 **More details on finish\_cdef\_search in step 3**
 
-For each 64x64 filter block, the output from Step 2 is an array of distortion values
-corresponding to different filter strength pairs (Primary strength, Secondary strength).
-To reduce the overhead associated with the signaling of the individual filter strength index for each
-64x64 filter block, only a subset of the identified filter strength pairs is selected. Final filtering of the
-64x64 filter blocks in the frame is then redone using the best among the selected subset of filter strengths.
-The encoder needs to signal to the decoder only the selected subset of filter strengths for the decoder to use
-in the filtering operation. The encoder could signal a set that consists of only 1, 2, 4, or 8 different
-(Primary strength, Secondary strength) pairs to be used for the frame. The specific pair to use for each 64x64
-filter block is signaled separately. The search performed in ```finish_cdef_search``` is to find the best RDO option
-(i.e. 1, 2, 4, or 8 filter strength pairs for the frame) to work with.
+For each 64x64 filter block, the output from Step 2 is an array of distortion
+values corresponding to different filter strength pairs (Primary strength,
+Secondary strength). To reduce the overhead associated with the signaling of
+the individual filter strength index for each 64x64 filter block, only a subset
+of the identified filter strength pairs is selected. Final filtering of the
+64x64 filter blocks in the frame is then redone using the best among the
+selected subset of filter strengths. The encoder needs to signal to the decoder
+only the selected subset of filter strengths for the decoder to use in the
+filtering operation. The encoder could signal a set that consists of only 1, 2,
+4, or 8 different (Primary strength, Secondary strength) pairs to be used for
+the frame. The specific pair to use for each 64x64 filter block is signaled
+separately. The search performed in ```finish_cdef_search``` is to find the
+best RDO option (i.e. 1, 2, 4, or 8 filter strength pairs for the frame) to
+work with.
 
   - Loop over the cardinality of the set of the strength pair options (1
     then 2 then 4 then 8)
@@ -254,7 +261,7 @@ filter block is signaled separately. The
     based on filtering distortion (```joint_strength_search_dual``` function makes use of a greedy search
     algorithm). Compute the RDO cost of each of the options and keep track of the best option (i.e. the best
     number of bits and the corresponding set of best (Primary strength, Secondary strength) pairs. The latter are
-    stored in the ```cdef_strengths```  and ```cdef_uv_strengths```  arrays.
+    stored in the ```cdef_strengths``` and ```cdef_uv_strengths``` arrays.
 
   - Loop over the filter blocks in the frame and select for each filter block the best (Primary strength,
     Secondary strength) pair. The selected pair is signaled in ```mbmi.cdef_strength``` whereas damping values
@@ -273,49 +280,56 @@ Loop over the 64x64 filter blocks
 (selected in ```finish_cdef_search```).
 
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
-The search for the best filter strength pair for each 64x64 block can be algorithmically optimized using the
-features described below.  The aggressiveness of the CDEF algorithm depends on the CDEF filter mode
-(```picture_control_set_ptr->cdef_level```), which is specified based on the encoder preset
-(```picture_control_set_ptr->enc_mode```).
+The search for the best filter strength pair for each 64x64 block can be
+algorithmically optimized using the features described below. The
+aggressiveness of the CDEF algorithm depends on the CDEF filter mode
+(```picture_control_set_ptr->cdef_level```), which is specified based on the
+encoder preset (```picture_control_set_ptr->enc_mode```).
 
 ### Reducing Number of Filter Strengths Tested
 
-The search in ```cdef_seg_search``` for the filter strength is performed by considering a subset of the allowable
-filter strength indices [0,63].  For each ```cdef_level```, a set of primary and secondary filter strengths are
-specified to be tested.  The search is performed in two stages:
-
-1st stage: Test the specified primary filter strengths.
-The number of primary filter strengths to test is specified by ```cdef_ctrls->first_pass_fs_num```, and
-the values of the primary strengths are set in the array ```cdef_ctrls->default_first_pass_fs```.
-
-2nd stage: Test the specified secondary filter strengths.
-The number of secondary filter strengths to test is specified by ```cdef_ctrls->default_second_pass_fs_num```, and
-the values of the primary strengths are set in the array ```cdef_ctrls->default_second_pass_fs```.
+The search in ```cdef_seg_search``` for the filter strength is performed by
+considering a subset of the allowable filter strength indices [0,63]. For each
+```cdef_level```, a set of primary and secondary filter strengths are specified
+to be tested. The search is performed in two stages:
+
+1st stage: Test the specified primary filter strengths. The number of primary
+filter strengths to test is specified by ```cdef_ctrls->first_pass_fs_num```,
+and the values of the primary strengths are set in the array
+```cdef_ctrls->default_first_pass_fs```.
+
+2nd stage: Test the specified secondary filter strengths. The number of
+secondary filter strengths to test is specified by
+```cdef_ctrls->default_second_pass_fs_num```, and the values of the primary
+strengths are set in the array ```cdef_ctrls->default_second_pass_fs```.
 
 ### Reducing Number of Rows Used in CDEF search
 
-The CDEF search can be performed on subsampled blocks to reduce the number of required computations.  A subsampling factor is specified using
-```cdef_ctrls->subsampling_factor```, according to the allowable values in Table 3.
+The CDEF search can be performed on subsampled blocks to reduce the number of
+required computations. A subsampling factor is specified using
+```cdef_ctrls->subsampling_factor```, according to the allowable values in
+Table 3.
 
 
 ##### Table 3. Allowable subsampling factors in CDEF search.
 
-|**Subsampling_Factor** | **Action** |
-| --------------------- | ---------- |
-| 1                     | No subsampling|
-| 2                     | Subsample each block by 2 (i.e. perform CDEF filtering on every 2nd row)|
-| 3                     | Subsample each block by 4 (i.e. perform CDEF filtering on every 4th row)|
+| **Subsampling_Factor** | **Action**                                                               |
+| ---------------------  | ----------                                                               |
+| 1                      | No subsampling                                                           |
+| 2                      | Subsample each block by 2 (i.e. perform CDEF filtering on every 2nd row) |
+| 3                      | Subsample each block by 4 (i.e. perform CDEF filtering on every 4th row) |
 
 ### Using Reference Frame Info to Reduce CDEF Search
 
-Information from the nearest reference frames can be used to reduce the number of filter
-strengths tested for each frame.  The selected CDEF filter strengths for each frame are saved in the
-```EbReferenceObject``` to be used by subsequent frames.
+Information from the nearest reference frames can be used to reduce the number
+of filter strengths tested for each frame. The selected CDEF filter strengths
+for each frame are saved in the ```EbReferenceObject``` to be used by
+subsequent frames.
 
-When ```cdef_ctrls->search_best_ref_fs``` is enabled, only the best filter strengths from the reference frames are tested,
-as follows:
+When ```cdef_ctrls->search_best_ref_fs``` is enabled, only the best filter
+strengths from the reference frames are tested, as follows:
 
 ```
 If (list0_best_filter == list1_best_filter)
@@ -331,8 +345,9 @@ Else {
 }
 ```
 
-When ```cdef_ctrls->use_reference_cdef_fs``` is enabled, CDEF search is skipped and the filter strengths are
-set to the average of the lowest and highest filter strengths of the reference frames, as follows:
+When ```cdef_ctrls->use_reference_cdef_fs``` is enabled, CDEF search is skipped
+and the filter strengths are set to the average of the lowest and highest
+filter strengths of the reference frames, as follows:
 
 ```
 Lowest_fs = MIN(lowest_selected_fs_from_list0, lowest_selected_fs_from_list1)
@@ -341,51 +356,59 @@ Luma_cdef_fs = MIN( 63, (lowest_fs + hig
 Chroma_cdef_fs = 0
 ```
 
-When ```cdef_ctrls->use_skip_detector``` is enabled, CDEF will be disabled if the skip area percentage of the
-nearest reference frames (i.e. the percentage of zero coefficients in the nearest ref frames) is above 75%.
+When ```cdef_ctrls->use_skip_detector``` is enabled, CDEF will be disabled if
+the skip area percentage of the nearest reference frames (i.e. the percentage
+of zero coefficients in the nearest ref frames) is above 75%.
 
 ### Cost Biasing to Reduce CDEF Application
 
-After the CDEF search, the best selected filters must be applied to each SB; however,
-if the best selected filter for an SB is (0,0), then no filtering is required.
-By enabling ```cdef_ctrls->zero_fs_cost_bias```, the cost of the (0,0) filter can be scaled down to make it more
-favourable, resulting in fewer SBs requiring filtering in ```svt_av1_cdef_frame```.
-
-When ```cdef_ctrls->zero_fs_cost_bias``` is non-zero, the cost of the (0,0) filter for each SB will be
-scaled by (```cdef_ctrls->zero_fs_cost_bias /64```).
-
-4.  **Signaling**
-
-At the frame level, the algorithm signals the luma damping value and up to 8 different filter strength presets to
-choose from. Each preset includes luma primary preset, chroma primary preset, luma secondary preset, a chroma
-secondary preset and the number of bits used to signal the 64x64 level preset.
+After the CDEF search, the best selected filters must be applied to each filter block;
+however, if the best selected filter for a filter block is (0,0), then no filtering is
+required. By enabling ```cdef_ctrls->zero_fs_cost_bias```, the cost of the
+(0,0) filter can be scaled down to make it more favourable, resulting in fewer
+filter blocks requiring filtering in ```av1_cdef_frame```.
+
+When ```cdef_ctrls->zero_fs_cost_bias``` is non-zero, the cost of the (0,0) filter for each filter block will be
+scaled by ```cdef_ctrls->zero_fs_cost_bias /64```.
+
+## 4. Signaling
+
+At the frame level, the algorithm signals the luma damping value and up to 8
+different filter strength pairs to choose from. Each pair includes luma
+primary strength, chroma primary strength, luma secondary strength, chroma
+secondary strength and the number of bits used to signal the 64x64 level pairs.
 Table 4 summarizes the parameters signaled at the frame level.
 
-At the 64x64 filter block level, the algorithm signals the index for the specific preset to work with for the
-64x64 filter block from among the set of presets specified at the frame level.
-Table 5 summarizes the parameters signaled at the filter block level.
+At the 64x64 filter block level, the algorithm signals the index for the
+specific filter strength pair to work with for the 64x64 filter block from among the set of
+filter strength pairs specified at the frame level. Table 5 summarizes the parameters
+signaled at the filter block level.
 
 ##### Table 4. CDEF parameters signaled at the frame level.
 
-| **Frame level Parameters**                                        | **Values (for 8-bit content)** |
-| ----------------------------------------------------------------- | ------------------------------ |
-| Luma Damping D                                                    | {3, 4, 5, 6}                   |
-| Number of bits used for filter block signaling                    | {0,..,3}                       |
-| List of 1, 2, 4 or 8 presets. Each preset contains the following: |                                |
-| Luma primary strength                                             | {0,…,15}                       |
-| Chroma primary strength                                           | {0,…,15}                       |
-| Luma secondary strength                                           | {0,1,2,3}                      |
-| Chroma secondary strength                                         | {0,1,2,3}                      |
+| **Frame level Parameters**                                                    | **Values (for 8-bit content)** |
+| ----------------------------------------------------------------------------- | ------------------------------ |
+| Luma Damping D                                                                | {3, 4, 5, 6}                   |
+| Number of bits used for filter block signaling                                | {0,..,3}                       |
+| List of 1, 2, 4 or 8 filter strength pairs. Each pair contains the following: |                                |
+| Luma primary strength                                                         | {0,…,15}                       |
+| Chroma primary strength                                                       | {0,…,15}                       |
+| Luma secondary strength                                                       | {0,1,2,3}                      |
+| Chroma secondary strength                                                     | {0,1,2,3}                      |
 
 ##### Table 5. CDEF parameters signaled at the filter block level.
-| **Filter-Block-level Parameters**                                 | **Values**                     |
-| ----------------------------------------------------------------- | ------------------------------ |
-| Index for the preset to use                                       | Up to 7                        |
+| **Filter-Block-level Parameters**                                               | **Values**                     |
+| ------------------------------------------------------------------------------- | ------------------------------ |
+| Index for the filter strength pair to use                                       | Up to 7                        |
 
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 
 ## References
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-CfL.md 1.2.0+dfsg-2/Docs/Appendix-CfL.md
--- 0.9.1+dfsg-1/Docs/Appendix-CfL.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-CfL.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,18 +1,24 @@
+[Top level](../README.md)
+
 # Chroma from Luma Prediction
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
-The general idea behind the chroma from luma (CfL) prediction feature is to exploit the correlation
-between luma and chroma to express the Intra prediction of chroma sample values as an affine function of
-the corresponding reconstructed luma sample values, where the reconstructed luma samples are sub-sampled to
-match the chroma sub-sampling. The chroma prediction is given by
+The general idea behind the chroma from luma (CfL) prediction feature is to
+exploit the correlation between luma and chroma to express the Intra prediction
+of chroma sample values as an affine function of the corresponding
+reconstructed luma sample values, where the reconstructed luma samples are
+sub-sampled to match the chroma sub-sampling. The chroma prediction is given by
 
 ![math](http://latex.codecogs.com/gif.latex?Chroma_{pred}=\alpha*Luma_{recon}+\beta)
 
-where  ![math](http://latex.codecogs.com/gif.latex?Chroma_{pred}) and ![math](http://latex.codecogs.com/gif.latex?Luma_{recon}) are predicted chroma
-and reconstructed luma samples, respectively. The parameters ![math](http://latex.codecogs.com/gif.latex?\alpha) and ![math](http://latex.codecogs.com/gif.latex?\beta) can be
-determined (at least theoretically) using least squares regression. The feature provides gains in screen sharing
-applications.
+where ![math](http://latex.codecogs.com/gif.latex?Chroma_{pred}) and
+![math](http://latex.codecogs.com/gif.latex?Luma_{recon}) are predicted chroma
+and reconstructed luma samples, respectively. The parameters
+![math](http://latex.codecogs.com/gif.latex?\alpha) and
+![math](http://latex.codecogs.com/gif.latex?\beta) can be determined (at least
+theoretically) using least squares regression. The feature provides gains in
+screen sharing applications.
 
 In practice, the CfL prediction is performed as illustrated in Figure 1 below.
 
@@ -32,7 +38,7 @@ The steps illustrated in the diagram abo
 
   - Subtract the ![math6](./img/cfl_appendix_math6.png) from the reconstructed luma
     sample values to generate the AC reconstructed luma sample values,
-    ![math7](./img/cfl_appendix_math7.png) , which has a zero average.
+    ![math7](./img/cfl_appendix_math7.png), which has a zero average.
 
   - Compute ![math8](./img/cfl_appendix_math8.png) using the
     AC reconstructed luma sample values.
@@ -42,7 +48,7 @@ The steps illustrated in the diagram abo
 
 ![math11](./img/cfl_appendix_math11.png)
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Inputs**: luma inverse quantized residuals
 
@@ -81,10 +87,11 @@ For an intra coded block, the function `
 
 **Step 1**: Reconstruct the Luma samples (```AV1PerformInverseTransformReconLuma```)
 
-The first step is to reconstruct the luma samples, since the latter would be used to generate the chroma prediction.
-At this stage in the encoder pipeline, the luma residuals are transformed, quantized and inverse quantized.
-In this step, the inverse transform is applied, and the reconstructed luma residuals are added to the prediction
-to build the reconstructed samples.
+The first step is to reconstruct the luma samples, since the latter would be
+used to generate the chroma prediction. At this stage in the encoder pipeline,
+the luma residuals are transformed, quantized and inverse quantized. In this
+step, the inverse transform is applied, and the reconstructed luma residuals
+are added to the prediction to build the reconstructed samples.
 
 **Step 2**: Compute the AC component of the luma intra prediction
 
@@ -109,23 +116,23 @@ After the best ![math](http://latex.code
 CfL mode is performed using the ```svt_cfl_predict``` function. The chroma
 residuals are then calculated using the function ```residual_kernel```.
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 Finding the best ![math](http://latex.codecogs.com/gif.latex?\alpha) requires searching different
 values in the set of allowed ![math](http://latex.codecogs.com/gif.latex?\alpha) values and calculating the cost
 associated with each value. Performing this ![math](http://latex.codecogs.com/gif.latex?\alpha) search
 process in MD for every luma mode and block size
 at MD would be very costly. In order to find the best quality-speed
-trade offs for the feature,  CfL and UV (i.e. chroma) control signals are defined with multiple levels.
+trade offs for the feature, CfL and UV (i.e. chroma) control signals are defined with multiple levels.
 Table 2 shows the CfL control signals and their descriptions.
 The CfL control signals are set in the function ```set_cfl_ctrls``` based on the ```cfl_level``` value.
 
 ##### Table 2. CfL control signals description.
 
-| **Signal**        | **Description**                                                                                                                                                                                               |
-| ----------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------     |
-| enabled           | 0/1: Disable/Enable CfL candidate injection                                                                                                                                                                   |
-| itr_th            | Threshold to indicate the minimum number of α values to try. However if a large enough number of α values are evaluated without improvements in the overall rate-distortion cost, the search would stop.      |
+| **Signal**        | **Description**                                                                                                                                                                                           |
+| ----------------- | --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| enabled           | 0/1: Disable/Enable CfL candidate injection                                                                                                                                                               |
+| itr_th            | Threshold to indicate the minimum number of α values to try. However if a large enough number of α values are evaluated without improvements in the overall rate-distortion cost, the search would stop.  |
 
 Table 3 shows the CfL-related UV control signal and its description. The signal is set in the function ```set_chroma_controls``` based on the chroma level ```uv_level```.
 
@@ -135,9 +142,9 @@ Table 3 shows the CfL-related UV control
 | ----------------- | ------------------------------------------------------------------------------------------------------------          |
 | uv_cfl_th         | Threshold to skip CfL if the ratio of the best intra cost to the best inter cost is greater than uv_cfl_th.           |
 
-The CfL and UV levels are set according to the encoder preset, PD_PASS, temporal layer index, slice type and  screen content class.
+The CfL and UV levels are set according to the encoder preset, PD_PASS, temporal layer index, slice type and screen content class.
 
-## 4.  Signaling
+## 4. Signaling
 
 CfL is an Intra chroma mode that is allowed only for blocks with height and width of 32 or smaller.
 The entropy encoder signals the chroma mode per block and if the mode is CfL,
@@ -150,7 +157,11 @@ extra parameters are included in the bit
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Compliant-Subpel-Interpolation-Filter-Search.md 1.2.0+dfsg-2/Docs/Appendix-Compliant-Subpel-Interpolation-Filter-Search.md
--- 0.9.1+dfsg-1/Docs/Appendix-Compliant-Subpel-Interpolation-Filter-Search.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Compliant-Subpel-Interpolation-Filter-Search.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Sub-pel Interpolation and Interpolation Filter Search
 
 ## 1. Compliant Sub-Pel Interpolation
@@ -97,14 +99,30 @@ The steps involved in the process are ou
 
 ##### Figure 2. Example of sub-pel calculations.
 
-### 1.2  Implementation of the sub-pel search
+### 1.2 Implementation of the sub-pel search
 #### 1.2.1 Sub-pel search in the SVT-AV1 Encoder
 
-Figure 3 illustrates the different encoder tasks that involve sub-pel search. The Motion Estimation (ME) performs a Hierarchical Motion Estimation (HME) for each single 64x64, and a Full-Pel search around the HME-MV for the square blocks from 8x8 to 64x64. Those operations are performed at the Motion Estimation Process where only source input pictures could be used as reference pictures. In the Mode Decision Process, where the reconstructed pictures could be used as reference pictures, full-pel MVs are derived for the 4x4, the 128x128 and the non-square bocks, a then sub-pel search could take place.
-
-The sub-pel refinement could be performed at both partitioning decision pass 0 (PD_PASS_0) and the partitioning decision pass 1 (PD_PASS_1). However the refinement accuracy is always higher at PD_PASS_1. The sub-pel refinement is also considered in the Predictive Motion Estimation (PME) step.
-
-The sub-pel search deploys non-compliant/short filters (e.g., 4-tap or bilinear), and once all full-pel ME/PME MVs for a each given (list, reference) are refined, the MD candidates are constructed and then placed into the MD queue for evaluation through the different MD stages where the use of compliant filters becomes necessary (e.g., at MD_Stage_0 towards an accurate prediction, and at MD_Stage_3 towards compliant streams for the cases where the Encode-Pass is bypassed).
+Figure 3 illustrates the different encoder tasks that involve sub-pel search.
+The Motion Estimation (ME) performs a Hierarchical Motion Estimation (HME) for
+each single 64x64, and a Full-Pel search around the HME-MV for the square
+blocks from 8x8 to 64x64. Those operations are performed at the Motion
+Estimation Process where only source input pictures could be used as reference
+pictures. In the Mode Decision Process, where the reconstructed pictures could
+be used as reference pictures, full-pel MVs are derived for the 4x4, the
+128x128 and the non-square bocks, a then sub-pel search could take place.
+
+The sub-pel refinement could be performed at both partitioning decision pass 0
+(PD_PASS_0) and the partitioning decision pass 1 (PD_PASS_1). However the
+refinement accuracy is always higher at PD_PASS_1. The sub-pel refinement is
+also considered in the Predictive Motion Estimation (PME) step.
+
+The sub-pel search deploys non-compliant/short filters (e.g., 4-tap or
+bilinear), and once all full-pel ME/PME MVs for a each given (list, reference)
+are refined, the MD candidates are constructed and then placed into the MD
+queue for evaluation through the different MD stages where the use of compliant
+filters becomes necessary (e.g., at MD_Stage_0 towards an accurate prediction,
+and at MD_Stage_3 towards compliant streams for the cases where the Encode-Pass
+is bypassed).
 
 ![csifs_figure3](./img/csifs_figure3.png)
 
@@ -112,28 +130,42 @@ Figure 3. Sub-pel calculations in the Mo
 
 #### 1.2.2 Search method
 
-The Sub-pel search is a logarithmic search that keeps stepping at 1/2-Pel units until no further block-error reduction, then repeats the same process for 1/4-Pel refinement and 1/8-Pel refinement. Along the way it skips many diagonal positions.
+The Sub-pel search is a logarithmic search that keeps stepping at 1/2-Pel units
+until no further block-error reduction, then repeats the same process for
+1/4-Pel refinement and 1/8-Pel refinement. Along the way it skips many diagonal
+positions.
 
 #### 1.2.3 Sub-pel search control
 
 ##### Table 2. Sub-Pel search control signals.
-| **Signal(s)**        | **Description**  |
-| -----------          | -----------------|
-| enabled              | Specifies whether the sub-pel search will be performed or not (0: OFF, 1: ON). |
-| subpel_search_type   | Specifies the interpolation filter tap (1: 2-tap filter, 2: 4-tap filter, 3: 8-tap filter). |
-| max_precision        | Specifies the refinement precision (or number of rounds) (0: 1/8-pel (3 rounds), 1: 1/4-pel (2 rounds), 2: 1/2-pel (1 round), 3: Full-pel-no refinement (0 round)). |
-| subpel_search_method | Specifies whether pruning will be applied to 1/2-pel position(s) or not (SUBPEL_TREE: No, SUBPEL_TREE_PRUNED: YES). |
-| subpel_iters_per_step| Specifies the maximum number of steps in the logarithmic sub-pel search before giving up.  |
-| pred_variance_th     | Specifies the full-pel prediction-block-variance threshold under which the sub-pel search is not performed; do not perform sub-pel if the variance of the full-pel prediction-block is low (where interpolation would unlikely modify the full-pel samples).  |
-| abs_th_mult          | Specifies the full-pel prediction-block-error-threshold below which sub-pel search is not performed; do not perform sub-pel if the prediction-block-error is already low.  |
-| round_dev_th         | Specifies the prediction-block-error deviation threshold between round-(N-1) and round-(N-2) under which the refinement is paused; pause the refinement if the prediction-block-error is not getting better through the process (the check takes place at only the 2nd round (prior to the 1/4-Pel refinement) or the 3rd round (prior to the 1/8-Pel refinement).  |
-| skip_diag_refinement | Specifies the refinement accuracy for diagonal position(s).  |
-| skip_zz_mv           | Specifies whether the sub-pel search will be performed  around (0,0) or not (0: OFF, 1: ON). |
+| **Signal(s)**         | **Description**                                                                                                                                                                                                                                                                                                                                                    |
+| -----------           | -----------------                                                                                                                                                                                                                                                                                                                                                  |
+| enabled               | Specifies whether the sub-pel search will be performed or not (0: OFF, 1: ON).                                                                                                                                                                                                                                                                                     |
+| subpel_search_type    | Specifies the interpolation filter tap (1: 2-tap filter, 2: 4-tap filter, 3: 8-tap filter).                                                                                                                                                                                                                                                                        |
+| max_precision         | Specifies the refinement precision (or number of rounds) (0: 1/8-pel (3 rounds), 1: 1/4-pel (2 rounds), 2: 1/2-pel (1 round), 3: Full-pel-no refinement (0 round)).                                                                                                                                                                                                |
+| subpel_search_method  | Specifies whether pruning will be applied to 1/2-pel position(s) or not (SUBPEL_TREE: No, SUBPEL_TREE_PRUNED: YES).                                                                                                                                                                                                                                                |
+| subpel_iters_per_step | Specifies the maximum number of steps in the logarithmic sub-pel search before giving up.                                                                                                                                                                                                                                                                          |
+| pred_variance_th      | Specifies the full-pel prediction-block-variance threshold under which the sub-pel search is not performed; do not perform sub-pel if the variance of the full-pel prediction-block is low (where interpolation would unlikely modify the full-pel samples).                                                                                                       |
+| abs_th_mult           | Specifies the full-pel prediction-block-error-threshold below which sub-pel search is not performed; do not perform sub-pel if the prediction-block-error is already low.                                                                                                                                                                                          |
+| round_dev_th          | Specifies the prediction-block-error deviation threshold between round-(N-1) and round-(N-2) under which the refinement is paused; pause the refinement if the prediction-block-error is not getting better through the process (the check takes place at only the 2nd round (prior to the 1/4-Pel refinement) or the 3rd round (prior to the 1/8-Pel refinement). |
+| skip_diag_refinement  | Specifies the refinement accuracy for diagonal position(s).                                                                                                                                                                                                                                                                                                        |
+| skip_zz_mv            | Specifies whether the sub-pel search will be performed around (0,0) or not (0: OFF, 1: ON).                                                                                                                                                                                                                                                                        |
 
 ## 2. Interpolation Filter Search
 ### 2.1 Search Method
 
-To account for the varying characteristics of the video picture in both the horizontal and vertical directions, the selection of the interpolation filter could be done independently for each of the two directions. The selection could be performed through an interpolation filter search, where in addition to the (Regular, Regular) vertical and horizontal filter pair, eight other combination pairs could be evaluated in motion compensation for the same motion vector. The selection of the pair to work with is based on a rate-distortion cost where the filter combination that provides the lowest rate-distortion cost is selected as the best filter pair. The selected filter pair (which corresponds to the best filter combination) is used in the Encode Pass final motion compensation in the case where the associated candidate is selected as the best candidate in inter-depth decision.
+To account for the varying characteristics of the video picture in both the
+horizontal and vertical directions, the selection of the interpolation filter
+could be done independently for each of the two directions. The selection could
+be performed through an interpolation filter search, where in addition to the
+(Regular, Regular) vertical and horizontal filter pair, eight other combination
+pairs could be evaluated in motion compensation for the same motion vector. The
+selection of the pair to work with is based on a rate-distortion cost where the
+filter combination that provides the lowest rate-distortion cost is selected as
+the best filter pair. The selected filter pair (which corresponds to the best
+filter combination) is used in the Encode Pass final motion compensation in the
+case where the associated candidate is selected as the best candidate in
+inter-depth decision.
 
 As depicted in the Figure 4 below, the interpolation filter search consists of three main steps:
 - Step1: Test Regular filters for both vertical and horizontal directions.
@@ -143,34 +175,52 @@ As depicted in the Figure 4 below, the i
 ![csifsa_fig4](./img/csifsa_fig4.png)
 Figure 4. Diagram illustrating the interpolation filter search process.
 
-In the current implementation, the dual option is not active in any of the presets. Only (Regular, Regular), (Smooth, Smooth) and (Sharp, Sharp) are available.
+In the current implementation, the dual option is not active in any of the
+presets. Only (Regular, Regular), (Smooth, Smooth) and (Sharp, Sharp) are
+available.
 
 ### 2.2 Optimization of the Interpolation Filter Search
 
-Various signals are used to specify the interpolation filter search settings. The following table presents a brief description for each signal. These parameters are decided by ```interpolation_search_level```, which is also function of the enc_mode, input resolution, and skip selection at the reference frame(s) if available (for temporal layer 1 and higher). Basically, the higher the resolution and the skip selection at the reference frame(s), the higher is ```interpolation_search_level``` towards a faster interpolation filter search.
-
-The ```interpolation_search_level``` is set at the Mode Decision Configuration Process, while ```set_interpolation_search_level_ctrls()``` is called at the Mode Decision Process for only the second Partitioning Decision Pass (PD_PASS_1) as the  interpolation filter search is not used in the first Partitioning Decision Pass (PD_PASS_0) (i.e. (Regular, Regular) is used for all candidates).
+Various signals are used to specify the interpolation filter search settings.
+The following table presents a brief description for each signal. These
+parameters are decided by ```interpolation_search_level```, which is also
+function of the enc_mode, input resolution, and skip selection at the reference
+frame(s) if available (for temporal layer 1 and higher). Basically, the higher
+the resolution and the skip selection at the reference frame(s), the higher is
+```interpolation_search_level``` towards a faster interpolation filter search.
+
+The ```interpolation_search_level``` is set at the Mode Decision Configuration
+Process, while ```set_interpolation_search_level_ctrls()``` is called at the
+Mode Decision Process for only the second Partitioning Decision Pass
+(PD_PASS_1) as the interpolation filter search is not used in the first
+Partitioning Decision Pass (PD_PASS_0) (i.e. (Regular, Regular) is used for all
+candidates).
 
 ##### Table 3. Control flags for the interpolation filter search.
-| **Signal(s)**                  | **Description**  |
-| -----------                    | -----------------|
-| Level                          | Specifies the MD Stage where the interpolation filter search will take place (IFS_MDS0, IFS_MDS1, IFS_MDS2, or IFS_MDS3 for MD Stage 0, MD Stage 1, MD Stage 2, and MD Stage 3, respectively).   |
-| quarter_pel_only               | Specifies whether the interpolation filter search will use 1/8-Pel precision or 1/4 -Pel precision (0: 1/8-Pel precision, 1: 1/4 -Pel precision).|
-| modulate_filter_per_resolution | Specifies whether certain combination(s) will be skipped depending on the input resolution or not (0: NO, 1: only (regular, regular) and (sharp, sharp) if 480p and below, and only (regular, regular) and (smooth, smooth) otherwise.  |
-| early_skip                     | Specifies whether an early interpolation filter search exit could take place based on the cost of signaling a switchable filter type (0: OFF, 1: ON).|
-| subsampled_distortion          | Specifies whether sub-sampled input/prediction will be used at the distortion computation (0: OFF, 1: ON, NA for block height 16 and lower).  |
-| skip_sse_rd_model              | Specifies whether a model will be used for rate estimation or not (0: NO (assume rate is 0), 1: estimate rate from distortion).  |
+| **Signal(s)**                  | **Description**                                                                                                                                                                                                                        |
+| -----------                    | -----------------                                                                                                                                                                                                                      |
+| Level                          | Specifies the MD Stage where the interpolation filter search will take place (IFS_MDS0, IFS_MDS1, IFS_MDS2, or IFS_MDS3 for MD Stage 0, MD Stage 1, MD Stage 2, and MD Stage 3, respectively).                                         |
+| quarter_pel_only               | Specifies whether the interpolation filter search will use 1/8-Pel precision or 1/4 -Pel precision (0: 1/8-Pel precision, 1: 1/4 -Pel precision).                                                                                      |
+| modulate_filter_per_resolution | Specifies whether certain combination(s) will be skipped depending on the input resolution or not (0: NO, 1: only (regular, regular) and (sharp, sharp) if 480p and below, and only (regular, regular) and (smooth, smooth) otherwise. |
+| early_skip                     | Specifies whether an early interpolation filter search exit could take place based on the cost of signaling a switchable filter type (0: OFF, 1: ON).                                                                                  |
+| subsampled_distortion          | Specifies whether sub-sampled input/prediction will be used at the distortion computation (0: OFF, 1: ON, NA for block height 16 and lower).                                                                                           |
+| skip_sse_rd_model              | Specifies whether a model will be used for rate estimation or not (0: NO (assume rate is 0), 1: estimate rate from distortion).                                                                                                        |
 
 ## 3. Signaling
 
-Each of the vertical filter type and horizontal filter type are signaled independently in the bitstream.
+Each of the vertical filter type and horizontal filter type are signaled
+independently in the bitstream.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
 [1] Ching-Han Chiang, Jingning Han, Stan Vitvitskyy, Debargha Mukherjee, and Yaowu Xu, “Adaptive interpolation filter scheme in AV1,” International Conference on Image Processing, 2017.
 
-[2] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James  Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
+[2] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Compound-Mode-Prediction.md 1.2.0+dfsg-2/Docs/Appendix-Compound-Mode-Prediction.md
--- 0.9.1+dfsg-1/Docs/Appendix-Compound-Mode-Prediction.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Compound-Mode-Prediction.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,6 +1,8 @@
+[Top level](../README.md)
+
 # Compound Mode Prediction Appendix
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 The general idea behind compound prediction is to generate a weighted
 average of two different predictions of the same block to develop a
@@ -138,7 +140,7 @@ Table 1 below provides the weights as a
 
 ![comp_mode_pred_table1](./img/comp_mode_pred_table1.png)
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Control tokens/flags**:
 
@@ -196,13 +198,13 @@ the candidate injection stage are
 of inter-intra compound candidates. The third is related to the
 injection of inter-inter compound candidates.
 
-1.  ```Precompute_intra_pred_for_inter_intra```
+1. ```Precompute_intra_pred_for_inter_intra```
 
 The function generates for a given block DC, Vertical, Horizontal and
 Smooth intra predictions that would be used in subsequent stages in the
 compound mode candidate injection process.
 
-2.  ```Inter_intra_search```
+2. ```Inter_intra_search```
 
 For a given block, the generation of inter-intra wedge prediction and
 the smooth inter-intra prediction is performed using the function
@@ -221,7 +223,7 @@ inter-intra search are outlined below.
  - Loop over the intra prediction modes: II\_DC\_PRED, II\_V\_PRED,
  II\_H\_PRED, II\_SMOOTH\_PRED
 
-     - Perform smooth filtering of the inter prediction and the intra
+    - Perform smooth filtering of the inter prediction and the intra
  prediction through the function call combine\_interintra\_highbd or
  combine\_interintra based on the already computed inter predictions
  and intra predictions. The intra predictions are already generated in
@@ -309,17 +311,17 @@ mask. Block size should be at least 8x8
 As an example, consider the flow below for the function
 ```inject_mvp_candidates_II```
 
-1.  Check if compound reference mode is allowed, i.e. The candidate
+1. Check if compound reference mode is allowed, i.e. The candidate
     should not be a single-reference candidate and the block size
     should be at least 8x8 for bipred to be allowed.
 
-2.  Determine the number of compound modes to try:
+2. Determine the number of compound modes to try:
 
      - If compound is enabled for this inter type (`inter_comp_ctrls.do_nearest_nearest` is true) `tot_comp_types` equal to `inter_comp_ctrls.tot_comp_types`
        which is based on `inter_compound_mode` level
      - else `tot_comp_types` equal to `MD_COMP_DIST`
 
-3.  Single reference case
+3. Single reference case
 
     - Check if inter-intra is allowed: ```svt_is_interintra_allowed```
 
@@ -348,7 +350,7 @@ mode).
     prediction option based on the best intra prediction mode from the
     smooth inter-intra prediction search. (```inter_intra_search```)
 
-4.  Compound reference case
+4. Compound reference case
 
     For all ```NEARESTMV_NEARESTMV``` and ```NEAR_NEARMV``` candidates, loop over
 all selected compound prediction modes
@@ -411,7 +413,7 @@ av1\_inter\_prediction.
     convolveHbd\[\]\[\]\[\] is then called to generate the prediction
     using the forward offset and the backward offset weights.
 
-- Step 2.1:  **av1\_inter\_prediction**
+- Step 2.1: **av1\_inter\_prediction**
 
 ![comp_mode_pred_fig8](./img/comp_mode_pred_fig8.png)
 
@@ -429,7 +431,7 @@ pass. The two main relevant functions ar
 ```av1_inter_prediction```. The two functions are described above.
 
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 **Inter-intra prediction**
 
@@ -459,7 +461,11 @@ signals are given in Table 6.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
@@ -475,5 +481,5 @@ Peter de Rivaz, “An Overview of Core C
 Codec,” Picture Coding Symposium, pp. 41-45, 2018.
 
 \[3\] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen,
-Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James  Bankoski,
+Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski,
 “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-DLF.md 1.2.0+dfsg-2/Docs/Appendix-DLF.md
--- 0.9.1+dfsg-1/Docs/Appendix-DLF.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-DLF.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,6 +1,8 @@
+[Top level](../README.md)
+
 # Deblocking Loop Filter Appendix
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 The deblocking loop filter is used to address blocking artifacts in
 reconstructed pictures. The filter was developed based on VP9 deblocking
@@ -24,7 +26,7 @@ edges. The main idea behind the filter c
 |--- |--- |--- |--- |--- |--- |
 |>TX_8x8|Yes|Filter14 (13-tap)|14|Filter6 (5-tap)|6|
 ||No|Filter8 (7-tap)/ Filter4|4|Filter4|4|
-|TX_8x8|Yes|Filter8 (7-tap)|8|Filter6 (5-tap)|6|
+| TX_8x8 | Yes | Filter8 (7-tap) | 8 | Filter6 (5-tap) | 6 |
 ||No|Filter4|4|Filter4|4|
 |TX_4x4|-|Filter4|4|Filter4|4|
 
@@ -298,7 +300,7 @@ filter14)
 
 
 
-## 2.  Implementation
+## 2. Implementation
 
 **Inputs**: Block mode, transform size and reconstructed picture.
 
@@ -308,11 +310,11 @@ filter14)
 
 ##### Table 4. List of loop filter control flags.
 
-| **Flag**                | **Level** | **Description**                                                                                                                                                                                                                                                                                         |
-| ----------------------- | --------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
-| --enable-dlf                    | Configuration | Deblocking loop filter control (0:ON (Default), 1: OFF)   |
-| DlfCtrls      | Picture   | Describes the Dlf control signal.                                                                                                                                                                                                           |
-| combine\_vert\_horz\_lf | Picture   | When set, it implies performing filtering of vertical edges in the current SB followed by filtering of horizontal edges in the preceding SB in the same SB row. When OFF, it implies performing filtering of vertical edges in the current SB followed by filtering of horizontal edges in the same SB. |
+| **Flag**                | **Level**     | **Description**                                                                                                                                                                                                                                                                                         |
+| ----------------------- | ---------     | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| --enable-dlf            | Configuration | Deblocking loop filter control (0:ON (Default), 1: OFF)                                                                                                                                                                                                                                                 |
+| DlfCtrls                | Picture       | Describes the Dlf control signal.                                                                                                                                                                                                                                                                       |
+| combine\_vert\_horz\_lf | Picture       | When set, it implies performing filtering of vertical edges in the current SB followed by filtering of horizontal edges in the preceding SB in the same SB row. When OFF, it implies performing filtering of vertical edges in the current SB followed by filtering of horizontal edges in the same SB. |
 
 ![dlf_new_fig4](./img/dlf_new_fig4.png)
 
@@ -475,12 +477,12 @@ indicated in the Table above.
 
       - Apply the selected filter to the four samples along the vertical edge.
 
-3.  Return the frame filtering sse for the loop filter level and the
-    picture data plane being considered.
+3. Return the frame filtering sse for the loop filter level and the
+   picture data plane being considered.
 
 <!-- end list -->
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 The algorithmic optimization of the loop filter is performed by considering different loop filter search methods. If LPF_PICK_FROM_Q is chosen as the search
 method, the filter levels are determined using the picture qindex, however is LPF_PICK_FROM_FULL_IMAGE is selected, a binary search is performed to find the
@@ -494,7 +496,7 @@ determine the search method.
 | enabled | 0/1: Enable/Disable DLF |
 | sb_based_dlf | 0: perform picture-based DLF with LPF_PICK_FROM_FULL_IMAGE search method 1: perform DLF per SB using LPF_PICK_FROM_Q method |
 
-## 4.  Signaling
+## 4. Signaling
 
 The loop filter parameters are signaled at the frame level and include
 the following parameters: ```filter_level[0]```, ```filter_level[1]```,
@@ -513,7 +515,11 @@ seen in Table 6.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
@@ -523,4 +529,4 @@ for AV1 CODEC,” International Conferen
 925-929, 2017.
 
 \[2\] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen,
-Yunqing Wang, Paul Wilkins, Yaowu Xu, James  Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
+Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Film-Grain-Synthesis.md 1.2.0+dfsg-2/Docs/Appendix-Film-Grain-Synthesis.md
--- 0.9.1+dfsg-1/Docs/Appendix-Film-Grain-Synthesis.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Film-Grain-Synthesis.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,6 +1,8 @@
+[Top level](../README.md)
+
 # Film Grain Synthesis Appendix
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 The film grain synthesis algorithm involves two key steps. In the first
 step, the input pictures are denoised and the resulting denoised version
@@ -109,7 +111,7 @@ An optional block overlap can be applied
 block overlap attenuates potential artifacts at the film grain block
 boundaries.
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Inputs**:
 
@@ -131,7 +133,7 @@ The control flags for the film grain fea
 | ------------------------------ | ---------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------ |
 | film\_grain\_denoise\_strength | Sequence                     | Takes values from 0 to 50 and determines strength of film grain used in grain filtering and estimation. 0 – film grain is turned off, 50 – maximum strength. |
 | film\_grain\_params            | Picture                      | Film grain parameters for a reference frame                                                                                                                  |
-| apply\_grain               | Picture                      | Apply grain for to the current frame.                                                                                                                        |
+| apply\_grain                   | Picture                      | Apply grain for to the current frame.                                                                                                                        |
 
 **Details of the film grain parameters estimation**
 
@@ -191,7 +193,7 @@ intermediateBuffer, which follows by the
 The film grain application is performed by the
 ```svt_av1_add_film_grain_run``` in grainSynthesis.c.
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 The algorithm is implemented in a parallel manner. This means that
 estimation of the film grain parameters is performed independently
@@ -208,7 +210,7 @@ worse denoising and consequently less ac
 estimation, but makes parallelization easier since only the current
 frame is used to obtain the film grain parameters.
 
-## 4.  Signaling
+## 4. Signaling
 
 The signaling part of the film grain parameters algorithm is
 implemented as follows. The film grain is called from
@@ -269,7 +271,11 @@ to the bitstream.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Global-Motion.md 1.2.0+dfsg-2/Docs/Appendix-Global-Motion.md
--- 0.9.1+dfsg-1/Docs/Appendix-Global-Motion.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Global-Motion.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Global Motion Compensation
 
 ## 1. Description of the algorithm
@@ -95,7 +97,7 @@ algorithm are as follows:
     threshold.
 
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 ### 2.1. Global Motion inputs/outputs
 
@@ -106,17 +108,19 @@ algorithm are as follows:
 
 ### 2.2 Global Motion API
 
-Table 1 below summarises the invoked functions when global motion is enabled. The process where each function is invoked is also indicated as well as a brief description of each function.
+Table 1 below summarises the invoked functions when global motion is enabled.
+The process where each function is invoked is also indicated as well as a brief
+description of each function.
 
 ##### Table 1. Global motion estimation API.
 
-|**Process**|**Function**|**Purpose**|
-|--- |--- |--- |
-|Picture Decision Process|set_gm_controls|Set global motion controls|
-|Motion Estimation Process|perform_gm_detection|Detect whether a global motion may be identified based on the uniformity of the motion vectors produced by the  normal motion estimation search|
-|Motion Estimation Process|global_motion_estimation|Perform global motion estimation search|
-|Mode Decision Configuration process|set_global_motion_field|Map the global motion information generated in EbMotionEstimationProcess to EbEncDecProcess|
-|Mode Decision Process|inject_global_candidates|Inject global motion as a mode candidate to the mode decision|
+| **Process**                         | **Function**             | **Purpose**                                                                                                                                    |
+| ---                                 | ---                      | ---                                                                                                                                            |
+| Picture Decision Process            | set_gm_controls          | Set global motion controls                                                                                                                     |
+| Motion Estimation Process           | perform_gm_detection     | Detect whether a global motion may be identified based on the uniformity of the motion vectors produced by the normal motion estimation search |
+| Motion Estimation Process           | global_motion_estimation | Perform global motion estimation search                                                                                                        |
+| Mode Decision Configuration process | set_global_motion_field  | Map the global motion information generated in EbMotionEstimationProcess to EbEncDecProcess                                                    |
+| Mode Decision Process               | inject_global_candidates | Inject global motion as a mode candidate to the mode decision                                                                                  |
 
 ### Details of the implementation
 
@@ -126,8 +130,10 @@ The global motion data flow is summarize
 
 ##### Figure 2. Global motion data flow in the encoder pipeline.
 
-The main algorithmic components of the global motion feature are the estimation component which takes place in the motion estimation process,
-and the injection and processing component which takes place in the Mode Decision process(injection and processing).
+The main algorithmic components of the global motion feature are the estimation
+component which takes place in the motion estimation process, and the injection
+and processing component which takes place in the Mode Decision
+process(injection and processing).
 
 ### Global motion estimation
 
@@ -212,8 +218,8 @@ enabled and for the case where it is not
 
 The two main steps involved in MD are the injection of GLOBAL and GLOBAL_GLOBAL candidates, and the processing of those candidates through MD stages.
 The conditions for the injection of GLOBAL candidates are as follows: For the case where downsample_level <= GM_DOWN:
-1.  The global motion vector points inside the current tile AND
-2.  (((Transformation Type > TRANSLATION AND block width >= 8 AND  block height >= 8) OR Transformation type <= TRANSLATION))
+1. The global motion vector points inside the current tile AND
+2. (((Transformation Type > TRANSLATION AND block width >= 8 AND block height >= 8) OR Transformation type <= TRANSLATION))
 
 Otherwise, only condition 1 above applies.
 
@@ -221,9 +227,9 @@ The conditions for the injection of GLOB
 
 For the case where downsample_level <= GM_DOWN:
 
-1.  Is_compound_enabled (i.e. compound reference mode) AND
-2.  2.  allow_bipred (i.e. block height > 4 or block width > 4) AND
-3.  (List_0 Transformation type > TRANSLATION AND List_1 Transformation type > TRANSLATION))
+1. Is_compound_enabled (i.e. compound reference mode) AND
+2. allow_bipred (i.e. block height > 4 or block width > 4) AND
+3. (List_0 Transformation type > TRANSLATION AND List_1 Transformation type > TRANSLATION))
 
 Otherwise, only conditions 1 and 2 above apply.
 
@@ -242,12 +248,14 @@ of the mode decision process.
 
 
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
-Different quality-complexity tradeoffs of the global motion algorithm can be achieved by manipulating a set of control parameters that are
-set in the gm_controls() function. These control parameters are set according to the flag gm_level which is set in the picture decision
-process according to the encoder preset.
-The different parameters that are controlled by the flag gm_level are described in Table 2 below.
+Different quality-complexity tradeoffs of the global motion algorithm can be
+achieved by manipulating a set of control parameters that are set in the
+gm_controls() function. These control parameters are set according to the flag
+gm_level which is set in the picture decision process according to the encoder
+preset. The different parameters that are controlled by the flag gm_level are
+described in Table 2 below.
 
 ##### Table 2. Optimization flags associated with global motion compensation.
 
@@ -266,7 +274,7 @@ The different parameters that are contro
 The generated global motion information may be used in all or some of the mode decision Partitioning Decision (PD) passes.
 The injection of global motion candidates in MD is controlled by the flag global_mv_injection.
 
-## 4.  Signaling
+## 4. Signaling
 
 The global motion parameters are written in the bitstream for each
 encoded frame with their corresponding references.
@@ -275,13 +283,15 @@ Boolean parameters encode the type of gl
 
 ##### Table 3. Global motion types signaled in the bitstream.
 
-| **Frame level** | **Values** | **Number of bits** |
+| **Frame level** | **Values**                     | **Number of bits** |
 | --------------- | ------------------------------ | ------------------ |
 | is\_global      | {0, 1}                         | 1                  |
 | is\_rot\_zoom   | {0, 1}                         | 1                  |
 | is\_translation | {0, 1}                         | 1                  |
 
-Depending on the model complexity, several parameters are also encoded (See Table 4). Each of those parameters corresponds to an entry in the affine transformation matrix.
+Depending on the model complexity, several parameters are also encoded (See
+Table 4). Each of those parameters corresponds to an entry in the affine
+transformation matrix.
 
 ##### Table 4. Global motion parameters signaled in the bitstream.
 
@@ -295,7 +305,11 @@ Depending on the model complexity, sever
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Intra-Block-Copy.md 1.2.0+dfsg-2/Docs/Appendix-Intra-Block-Copy.md
--- 0.9.1+dfsg-1/Docs/Appendix-Intra-Block-Copy.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Intra-Block-Copy.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,15 +1,23 @@
+[Top level](../README.md)
+
 # Intra Block Copy
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
-In intra frames, intra block copy (IBC) allows for the prediction of a given intra coded block to be a copy of another intra coded
-block in the same frame (i.e. from the reconstructed part of the current frame). The copied block is specified using a displacement
-vector (DV). Only integer precision DVs are allowed since force_integer_mv will be equal to 1 for intra frames. Bilinear interpolation
-is considered for chroma prediction in the case of odd DVs. IBC is applicable only to key frames and intra-only non-key frames.
-When IBC is active, all in-loop filtering is disabled for the whole frame, including deblocking filter, CDEF and restoration filter.
-The prediction is generated from the reconstructed intra pixels, where the latter would not have been processed by the in-loop
-filters since the latter are disabled. The predicted block is considered an inter predicted block using a single reference frame
-(the current intra frame), and where the DV is full-pel. Only single-reference mode is allowed.
+In intra frames, intra block copy (IBC) allows for the prediction of a given
+intra coded block to be a copy of another intra coded block in the same frame
+(i.e. from the reconstructed part of the current frame). The copied block is
+specified using a displacement vector (DV). Only integer precision DVs are
+allowed since force_integer_mv will be equal to 1 for intra frames. Bilinear
+interpolation is considered for chroma prediction in the case of odd DVs. IBC
+is applicable only to key frames and intra-only non-key frames. When IBC is
+active, all in-loop filtering is disabled for the whole frame, including
+deblocking filter, CDEF and restoration filter. The prediction is generated
+from the reconstructed intra pixels, where the latter would not have been
+processed by the in-loop filters since the latter are disabled. The predicted
+block is considered an inter predicted block using a single reference frame
+(the current intra frame), and where the DV is full-pel. Only single-reference
+mode is allowed.
 
 A 256 pixels wide area just before the block being predicted is excluded
 from the block copy intra search area, i.e. the valid region of the
@@ -24,7 +32,7 @@ illustrates the allowed search area.
 The IBC feature is useful in encoding screen content that involves
 repeated patterns, text and texture in the same frame.
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 ### Main function calls
 
@@ -38,60 +46,69 @@ depth of the function call.
 
 ### Candidate Injection
 
-In the function ```inject_intra_bc_candidates()```, up to 2 intra candidates are injected as IBC modes. These modes are considered
-DC_PRED modes when coding the block mode information in the bit stream. Simple translation is used and no fractional DVs are
-allowed for this case. For Chroma, bilinear interpolation is used to produce predicted pixels. The two candidates are determined
-through the ```intra_bc_search()``` function call, which is discussed next.
+In the function ```inject_intra_bc_candidates()```, up to 2 intra candidates
+are injected as IBC modes. These modes are considered DC_PRED modes when coding
+the block mode information in the bit stream. Simple translation is used and no
+fractional DVs are allowed for this case. For Chroma, bilinear interpolation is
+used to produce predicted pixels. The two candidates are determined through the
+```intra_bc_search()``` function call, which is discussed next.
 
 ### DV Search
 
-The function ```intra_bc_search()``` performs a search within the current picture (i.e. within the already reconstructed area).
-The search is a combination of a classic Diamond search followed by Hash search (CRC is used as Hash metric).
-The search is only performed in full pel resolution as sub-pel displacements are not allowed in the IBC tool in AV1.
-
-The decoded reconstructed area is divided into two search areas: Top and Left. As explained above, due to HW constraints,
-not all of the top reconstructed area is used to derive DV vectors.
-To support wavefront-like SW based processing, more constraints are added to only consider the valid SBs in such scenario.
+The function ```intra_bc_search()``` performs a search within the current
+picture (i.e. within the already reconstructed area). The search is a
+combination of a classic Diamond search followed by Hash search (CRC is used as
+Hash metric). The search is only performed in full pel resolution as sub-pel
+displacements are not allowed in the IBC tool in AV1.
+
+The decoded reconstructed area is divided into two search areas: Top and Left.
+As explained above, due to HW constraints, not all of the top reconstructed
+area is used to derive DV vectors. To support wavefront-like SW based
+processing, more constraints are added to only consider the valid SBs in such
+scenario.
 
 More detailed steps involved in the DV search are listed below:
 
-1.  Set the reference frame to ```INTRA_FRAME```.
+1. Set the reference frame to ```INTRA_FRAME```.
 
-2.  Get nearest and near MVs from MV stack for the specified reference
-    frame. See (```svt_av1_find_best_ref_mvs_from_stack```)
+2. Get nearest and near MVs from MV stack for the specified reference
+   frame. See (```svt_av1_find_best_ref_mvs_from_stack```)
 
-3.  Set ```dv_ref``` to either nearest or near.
+3. Set ```dv_ref``` to either nearest or near.
 
-4.  Constrain the ```dv_ref``` mv to be at least a block size away from the
-    current block, and also to point at least 256 samples away to the
-    left in the x direction when too close to the tile top boundary.
-    (```av1_find_ref_dv```)
-
-5.  Two types of searches could be performed: Search above the current
-    block (```IBC_MOTION_ABOVE```) only or search above and to the left of
-    the current block (```IBC_MOTION_ABOVE``` and ```IBC_MOTION_LEFT```),
-    depending on the setting of ```ibc_mode```. Up to two dv candidates could
-    be generated.
-
-6.  Limits on mv sizes are computed and refined
-    (```svt_av1_set_mv_search_range```).
-
-    Perform full-pel diamond/exhaustive search followed by hash search (svt_av1_full_pixel_search).
-    The hash search computes the hash of 2x2 blocks around each luma pixel in the reference frame.
-    The 2x2 hashes arethen used to make up the 4x4 hashes, which are then used to make up the 8x8 hashes, and so on.
-    All the hash values are stored in a hash table.  The hash for the current block is then computed and compared to hash values
-    in the hash table which stores the hashes from the reference frames.  If a match is found, then there is a block in the
-    reference frame that is the same as the current block.
-    That block may then be used as an IBC candidate if its estimated cost is lower than all other IBC candidates.
+4. Constrain the ```dv_ref``` mv to be at least a block size away from the
+   current block, and also to point at least 256 samples away to the
+   left in the x direction when too close to the tile top boundary.
+   (```av1_find_ref_dv```)
+
+5. Two types of searches could be performed: Search above the current
+   block (```IBC_MOTION_ABOVE```) only or search above and to the left of
+   the current block (```IBC_MOTION_ABOVE``` and ```IBC_MOTION_LEFT```),
+   depending on the setting of ```ibc_mode```. Up to two dv candidates could
+   be generated.
+
+6. Limits on mv sizes are computed and refined
+   (```svt_av1_set_mv_search_range```).
+
+   Perform full-pel diamond/exhaustive search followed by hash search
+   (svt_av1_full_pixel_search). The hash search computes the hash of 2x2 blocks
+   around each luma pixel in the reference frame. The 2x2 hashes arethen used
+   to make up the 4x4 hashes, which are then used to make up the 8x8 hashes,
+   and so on. All the hash values are stored in a hash table. The hash for the
+   current block is then computed and compared to hash values in the hash table
+   which stores the hashes from the reference frames. If a match is found, then
+   there is a block in the reference frame that is the same as the current
+   block. That block may then be used as an IBC candidate if its estimated cost
+   is lower than all other IBC candidates.
 
-7.  Perform full-pel diamond search followed by hash search
-    (```svt_av1_full_pixel_search```).
+7. Perform full-pel diamond search followed by hash search
+   (```svt_av1_full_pixel_search```).
 
-8.  Make sure returned mv is within the specified mv bounds
-    (```mv_check_bounds```)
+8. Make sure returned mv is within the specified mv bounds
+   (```mv_check_bounds```)
 
-9.  Make sure the returned mv meets HW and SW constraints
-    (```av1_is_dv_valid```)
+9. Make sure the returned mv meets HW and SW constraints
+   (```av1_is_dv_valid```)
 
 ### Control Tokens/flags
 
@@ -105,31 +122,37 @@ The control tokens and flags associated
 
 ##### Table 1. Control tokens and flags for the IBC feature.
 
-|**Flag**|**Level(Sequence/Picture)**|**Description**|
-|--- |--- |--- |
-|-scm|Sequence|Command line token. 0: No IBC, 1: IBC ON 2:Auto mode (detector based)|
-|-intrabc-mode|Configuration|Command line token to specify IBC mode.  0: OFF, 1-3: IBC ON with intrabc levels mentioned below. , -1: Default behavior|
-|intrabc_level|Picture|Controls the complexity-quality trade-offs of the feature. 0: OFF, 1-6 ON|
-|allow_intrabc|Picture|For intra pictures, set to 1 when IBC is allowed, else set to 0.|
+| **Flag**      | **Level(Sequence/Picture)** | **Description**                                                                                                         |
+| ---           | ---                         | ---                                                                                                                     |
+| -scm          | Sequence                    | Command line token. 0: No IBC, 1: IBC ON 2:Auto mode (detector based)                                                   |
+| -intrabc-mode | Configuration               | Command line token to specify IBC mode. 0: OFF, 1-3: IBC ON with intrabc levels mentioned below.,  -1: Default behavior |
+| intrabc_level | Picture                     | Controls the complexity-quality trade-offs of the feature. 0: OFF, 1-6 ON                                               |
+| allow_intrabc | Picture                     | For intra pictures, set to 1 when IBC is allowed, else set to 0.                                                        |
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 ##### Table 2. Optimization signals associated with the IBC feature..
 
-|**Signal**|**Description**|
-|--- |--- |
-|enabled| |
-|ibc_shift|After the full-pel diamond search, a full-pel exhaustive search may be performed if the variance of the best residual out of the diamond search is still above a certain threshold.  ibc_shift will shift the threshold to the left (i.e. double the threshold value), making the exhaustive search less likely to be performed. (0: No Shift; 1: Shift to left by 1).|
-|ibc_direction|Directions to perform IBC search for.  0: Left + Top; 1: Top only|
-|hash_4x4_blocks|Set by get_disallow_4x4() to not hash 4x4 blocks for higher presets where 4x4 blocks are not encoded|
-|max_block_size_hash|The maximum block size that will be hashed; corresponds to the maximum block size for which an MD candidate will be generated by IBC hashing algorithm.|
-
+| **Signal**          | **Description**                                                                                                                                                                                                                                                                                                                                                       |
+| ---                 | ---                                                                                                                                                                                                                                                                                                                                                                   |
+| enabled             |                                                                                                                                                                                                                                                                                                                                                                       |
+| ibc_shift           | After the full-pel diamond search, a full-pel exhaustive search may be performed if the variance of the best residual out of the diamond search is still above a certain threshold. ibc_shift will shift the threshold to the left (i.e. double the threshold value), making the exhaustive search less likely to be performed. (0: No Shift; 1: Shift to left by 1). |
+| ibc_direction       | Directions to perform IBC search for. 0: Left + Top; 1: Top only                                                                                                                                                                                                                                                                                                     |
+| hash_4x4_blocks     | Set by get_disallow_4x4() to not hash 4x4 blocks for higher presets where 4x4 blocks are not encoded                                                                                                                                                                                                                                                                  |
+| max_block_size_hash | The maximum block size that will be hashed; corresponds to the maximum block size for which an MD candidate will be generated by IBC hashing algorithm.                                                                                                                                                                                                               |
 
-## 4.  Signaling
 
-The main signal that is sent in the bitstream to enable IBC is allow_intrabc that is sent in the frame header. Note that IBC is
-only allowed for Intra coded frames. In the sequence header screen content tools must be enabled to use IBC at the frame level.
+## 4. Signaling
+
+The main signal that is sent in the bitstream to enable IBC is allow_intrabc
+that is sent in the frame header. Note that IBC is only allowed for Intra coded
+frames. In the sequence header screen content tools must be enabled to use IBC
+at the frame level.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-IPP-Pass.md 1.2.0+dfsg-2/Docs/Appendix-IPP-Pass.md
--- 0.9.1+dfsg-1/Docs/Appendix-IPP-Pass.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-IPP-Pass.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,8 +1,22 @@
+[Top level](../README.md)
+
 # IPP Encoding Pass Appendix
 
 ## 1. Description of the Algorithm
 
-The IPP encoding pass of SVT-AV1 encoder is a light encoding process that processes all of the input video pictures and collects statistics while consuming negligible time as compared to the time it takes to run the final encoding. The IPP pass makes use of a flat IPP… prediction structure where the input pictures are used as reference pictures. The IPP encoding pass can be used as a first pass in the multi-pass encoding mode as shown in Figure 1, or as a statistics generation tool based on a lookahead window when it is enabled in the single pass encoding mode as shown in Figure 2.  In both encoding modes, when IPP pass is enabled, the input data from the source clip is analyzed and stored in a local buffer. The collected data from the IPP pass is then used to achieve the best encoding complexity-quality trade-offs. The collected data may be used to select the suitable prediction structure for the next encoding pass or may be used to improve the rate control behavior.
+The IPP encoding pass of SVT-AV1 encoder is a light encoding process that
+processes all of the input video pictures and collects statistics while
+consuming negligible time as compared to the time it takes to run the final
+encoding. The IPP pass makes use of a flat IPP… prediction structure where the
+input pictures are used as reference pictures. The IPP encoding pass can be
+used as a first pass in the multi-pass encoding mode as shown in Figure 1, or
+as a statistics generation tool based on a lookahead window when it is enabled
+in the single pass encoding mode as shown in Figure 2. In both encoding modes,
+when IPP pass is enabled, the input data from the source clip is analyzed and
+stored in a local buffer. The collected data from the IPP pass is then used to
+achieve the best encoding complexity-quality trade-offs. The collected data may
+be used to select the suitable prediction structure for the next encoding pass
+or may be used to improve the rate control behavior.
 
 ![ipp_figure1](./img/ipp_figure1.PNG)
 ##### Figure 1. Multi-pass encoding mode with IPP pass.
@@ -21,36 +35,38 @@ The IPP encoding pass of SVT-AV1 encoder
 Table 1 summarises the output statistic variables.
 
 ##### Table 1: Frame-level IPP pass output statistics.
-| **Parameter**        | **Description**  |
-| -----------          | -----------------|
-| intra_error          | Intra prediction error |
-| coded_error          | Best of intra pred error and inter pred error using last frame as ref. |
-| sr_coded_error       | Best of intra pred error and inter pred error using golden frame as ref. |
-| mv_count             | Count of motion vector |
-| inter_count          | Count of blocks that pick inter prediction (inter pred error is smaller than intra pred error)  |
-| second_ref_count     | Count of blocks that pick second ref (golden frame)  |
-| neutral_count        | Count of blocks where the inter and intra prediction distortions are very close and small  |
-| intra_skip_count     | Count of blocks where the intra prediction error is very small  |
-| image_data_start_row | Start row  |
-| sum_in_vectors       | Sum of inward motion vectors |
-| sum_mvc              | Sum of motion vector x component  |
-| sum_mvr_abs          | Sum of absolute value of motion vector y component |
-| sum_mvc_abs          | Sum of absolute value of motion vector x component |
-| intra_factor         | A factor calculated using intra pred error |
-| brightness_factor    | A factor that measures brightness |
+| **Parameter**        | **Description**                                                                                |
+| -----------          | -----------------                                                                              |
+| intra_error          | Intra prediction error                                                                         |
+| coded_error          | Best of intra pred error and inter pred error using last frame as ref.                         |
+| sr_coded_error       | Best of intra pred error and inter pred error using golden frame as ref.                       |
+| mv_count             | Count of motion vector                                                                         |
+| inter_count          | Count of blocks that pick inter prediction (inter pred error is smaller than intra pred error) |
+| second_ref_count     | Count of blocks that pick second ref (golden frame)                                            |
+| neutral_count        | Count of blocks where the inter and intra prediction distortions are very close and small      |
+| intra_skip_count     | Count of blocks where the intra prediction error is very small                                 |
+| image_data_start_row | Start row                                                                                      |
+| sum_in_vectors       | Sum of inward motion vectors                                                                   |
+| sum_mvc              | Sum of motion vector x component                                                               |
+| sum_mvr_abs          | Sum of absolute value of motion vector y component                                             |
+| sum_mvc_abs          | Sum of absolute value of motion vector x component                                             |
+| intra_factor         | A factor calculated using intra pred error                                                     |
+| brightness_factor    | A factor that measures brightness                                                              |
 
 ### 2.2. IPP Pass API
 
-Table 2 summarises the invoked functions when IPP pass is enabled. The process where each function is called is indicated as well as a brief description of each function.
+Table 2 summarises the invoked functions when IPP pass is enabled. The process
+where each function is called is indicated as well as a brief description of
+each function.
 
 ##### Table 2: IPP pass main function calls.
-| **Process**               | **Function**             | **Purpose**      |
-| -----------               | -----------------        | -----------------|
-| motion_estimation_kernel  | setup_firstpass_data_seg | Initialize the IPP data |
-| motion_estimation_kernel  | first_pass_me            | Source-based Motion estimation |
-| motion_estimation_kernel  | first_pass_frame_seg     | Source-based Intra prediction and Motion compensation |
-| motion_estimation_kernel  | first_pass_frame_end     | Normalize and accumulate statistics |
-| motion_estimation_kernel  | svt_av1_end_first_pass   | Output statistics when multi-pass encoding mode is used |
+| **Process**              | **Function**             | **Purpose**                                             |
+| -----------              | -----------------        | -----------------                                       |
+| motion_estimation_kernel | setup_firstpass_data_seg | Initialize the IPP data                                 |
+| motion_estimation_kernel | first_pass_me            | Source-based Motion estimation                          |
+| motion_estimation_kernel | first_pass_frame_seg     | Source-based Intra prediction and Motion compensation   |
+| motion_estimation_kernel | first_pass_frame_end     | Normalize and accumulate statistics                     |
+| motion_estimation_kernel | svt_av1_end_first_pass   | Output statistics when multi-pass encoding mode is used |
 
 ### Details of the Implementation
 
@@ -59,7 +75,7 @@ The IPP pass is enabled when a multi-pas
 1. Initialisation of all statistics structures and variables.
 2. If a reference picture is not enabled yet, as in the example of the first picture in the input sequence, then an intra DC prediction takes place and the statistics are updated accordingly.
 3. If at least one reference picture is available then, motion estimation takes place followed by an intra DC prediction and motion compensation. According to the generated prediction errors and the motion information, the prediction and coding statistics are updated for each block.
-4. After processing all blocks within a picture, the generated statistics are accumulated,  normalized and stored for further use.
+4. After processing all blocks within a picture, the generated statistics are accumulated, normalized and stored for further use.
 
 Figures 3 and 4 summarises the data flow of the IPP pass.
 
@@ -71,7 +87,9 @@ Figures 3 and 4 summarises the data flow
 
 ## 3. Optimization of the Algorithm
 
-The IPP pass algorithm can be configured in different ways to achieve its best quality-complexity tradoffs depending on the application. The control parameters used to configure the IPP pass are described in Table 3.
+The IPP pass algorithm can be configured in different ways to achieve its best
+quality-complexity tradoffs depending on the application. The control
+parameters used to configure the IPP pass are described in Table 3.
 
 ##### Table 3: Control flags associated with the IPP pass.
 | **Flag**              | **Level (Sequence/Picture)** | **Description**      |
@@ -84,8 +102,16 @@ The IPP pass algorithm can be configured
 | use8blk               | Sequence                     | 0: OFF, 1: Use 8x8 blocks instead of 16x16 blocks in the IPP search algorithms |
 | reduce_me_search      | Sequence                     | 0: OFF, 1: Reduce HME and ME search area size |
 
-Note that in the multi-pass encoding mode, the encode pipeline of the IPP pass has been modified to involve only the necessary encoder kernels, such motion estimation kernel, and all unnecessary kernels such as Rate Control, Mode Decision, Encode-decode, Deblocking, CDEF, Restoration, Entropy Coder… are bypassed.
+Note that in the multi-pass encoding mode, the encode pipeline of the IPP pass
+has been modified to involve only the necessary encoder kernels, such motion
+estimation kernel, and all unnecessary kernels such as Rate Control, Mode
+Decision, Encode-decode, Deblocking, CDEF, Restoration, Entropy Coder… are
+bypassed.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Local-Warped-Motion.md 1.2.0+dfsg-2/Docs/Appendix-Local-Warped-Motion.md
--- 0.9.1+dfsg-1/Docs/Appendix-Local-Warped-Motion.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Local-Warped-Motion.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,10 +1,13 @@
+[Top level](../README.md)
+
 # Local Warped Motion Compensation
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
-The warped motion mode is an inter-prediction mode where the prediction is generated by applying an (affine) transform to the
-reference. AV1 has two affine prediction modes: global warped motion and local warped motion (LW).
-The latter is discussed in more detail in the following.
+The warped motion mode is an inter-prediction mode where the prediction is
+generated by applying an (affine) transform to the reference. AV1 has two
+affine prediction modes: global warped motion and local warped motion (LW). The
+latter is discussed in more detail in the following.
 
 AV1 has three types of motion modes that specify the motion of a
 block, namely SIMPLE, OBMC and LW. LW motion estimation provides a description
@@ -12,13 +15,14 @@ of the type of local motion. Minimal sig
 is realized by signaling one flag in the inter block mode info, and
 that only under some conditions. LW cannot be combined with OBMC.
 
-Warped motion compensation concerns the estimation and compensation of small local motion for a given block.
-The feature makes use of motion vector information for neighboring blocks to extract the affine local motion model parameters.
-The general motion model for local warped motion is given by
+Warped motion compensation concerns the estimation and compensation of small
+local motion for a given block. The feature makes use of motion vector
+information for neighboring blocks to extract the affine local motion model
+parameters. The general motion model for local warped motion is given by
 
 ![local_warped_motion_math1](./img/local_warped_motion_math1.png)
 
-where ![local_warped_motion_math2](./img/local_warped_motion_math2.png) and ![local_warped_motion_math3](./img/local_warped_motion_math3.png)  represent the sample pixel coordinates in the current
+where ![local_warped_motion_math2](./img/local_warped_motion_math2.png) and ![local_warped_motion_math3](./img/local_warped_motion_math3.png) represent the sample pixel coordinates in the current
 and reference frames, respectively. The decoder performs the same model
 estimation, so the encoder needs only to signal whether local warped
 motion is the selected mode for the current block and the corresponding
@@ -100,8 +104,9 @@ The shear parameters ![latexmath](http:/
 the horizontal and vertical shears are evaluated using 8-tap
 interpolation filters with ![latexmath](http://latex.codecogs.com/gif.latex?1/64^{th}) pel precision.
 
-The final warped motion model is applied on an 8x8 basis in the reference frame. The predicted block is constructed by
-assembling the 8x8 predicted warped blocks from the reference picture.
+The final warped motion model is applied on an 8x8 basis in the reference
+frame. The predicted block is constructed by assembling the 8x8 predicted
+warped blocks from the reference picture.
 
 ![local_warped_motion_fig2](./img/local_warped_motion_fig2.png)
 
@@ -111,7 +116,7 @@ At the decoder side, the affine transfor
 block-level using as input the motion vectors of the current and
 neighboring blocks.
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Control tokens/flags**:
 
@@ -120,11 +125,11 @@ indicated in Table 1.
 
 ##### Table 1. Control tokens/flags associated with the LW feature.
 
-| **Flag**                      | **Level (sequence/Picture)** | **Description**                            |
-| ----------------------------- | ---------------------------- | ------------------------------------------ |
-| enable_warped_motion          | Sequence                     | Encoder configuration parameter to allow/disallow LW in the encoding process  for the whole sequence.  |
-| allow\_warped\_motion         | Picture                      | Allow/disable LW at the picture level              |
-| wm\_ctrls                     | Super-block                  | Control the number of LW candidates to be considered in the mode decision.       |
+| **Flag**                      | **Level (sequence/Picture)** | **Description**                                                                                       |
+| ----------------------------- | ---------------------------- | ------------------------------------------                                                            |
+| enable_warped_motion          | Sequence                     | Encoder configuration parameter to allow/disallow LW in the encoding process  for the whole sequence. |
+| allow\_warped\_motion         | Picture                      | Allow/disable LW at the picture level                                                                 |
+| wm\_ctrls                     | Super-block                  | Control the number of LW candidates to be considered in the mode decision.                            |
 
 
 **Details of the implementation**
@@ -135,11 +140,11 @@ Figure 3 below summarizes the data flow
 
 ##### Figure 3. Data flow for the LW feature.
 
-As with other prediction mode candidates in the encoder, candidates for the LW mode are first injected into MD and then processed
-through several MD stages of RD optimization.
-A high-level diagram of the function calls relevant to the two main LW functions, namely
-```inject_inter_candidates``` and ```warped_motion_prediction``` is given in
-Figure 4 below.
+As with other prediction mode candidates in the encoder, candidates for the LW
+mode are first injected into MD and then processed through several MD stages of
+RD optimization. A high-level diagram of the function calls relevant to the two
+main LW functions, namely ```inject_inter_candidates``` and
+```warped_motion_prediction``` is given in Figure 4 below.
 
 ![local_warped_motion_fig_new2](./img/local_warped_motion_fig_new2.png)
 
@@ -238,7 +243,7 @@ are outlined below.
 
         2.  ```av1_make_masked_warp_inter_predictor```: Called only in the case of compound
             reference candidate where the inter-inter compound type is COMPOUND_WEDGE or
-            COMPOUND_DIFFWTD. Generates the predictions for both of those two compound types.  The
+            COMPOUND_DIFFWTD. Generates the predictions for both of those two compound types. The
             first step is to build the mask for the case of the COMPOUND_DIFFWTD inter-inter compound
             type using the function ```av1_build_compound_diffwtd_mask_d16```. The next step is to generate
             the predictions using the function ```build_masked_compound_no_round``` as follows:
@@ -260,14 +265,14 @@ are outlined below.
             motion prediction using the forward offset and backward offset weights associated with the COMPOUND_DISTWTD mode.
             This last step is performed at the level of 8x8 blocks, until the prediction for the entire block is generated.
 
-    2.  ```chroma_plane_warped_motion_prediction_sub8x8```: Generates chroma warped motion
+    2. ```chroma_plane_warped_motion_prediction_sub8x8```: Generates chroma warped motion
         predictions for blocks that are smaller than 16x16. The function
         ```av1_dist_wtd_comp_weight_assign``` is first called to generate the mask for the
         COMPOUND_DISTWTD case. The appropriate function in the function array
         ```convolve[][][]``` / ```convolveHbd[][][]``` is then called to generate the prediction
         using the forward offset and the backward offset weights.
 
-2.  Compute RD for the LW prediction. Rate includes the signaling of
+2. Compute RD for the LW prediction. Rate includes the signaling of
     the syntax element ```motion_mode```
 
 <!-- end list -->
@@ -275,18 +280,23 @@ are outlined below.
 **Step 3**: Generate the final warped motion predictions in the encode pass.
 The main relevant function is warped_motion_prediction which is described above.
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
-The injection of the LW motion candidates is performed if the following is true: ```allow_warped_motion``` is set AND the block has
-overlappable candidates AND ```bwidth >= 8``` AND ```bheight >= 8``` AND enable flag (in the LW controls structure) is set to 1.
+The injection of the LW motion candidates is performed if the following is
+true: ```allow_warped_motion``` is set AND the block has overlappable
+candidates AND ```bwidth >= 8``` AND ```bheight >= 8``` AND enable flag (in the
+LW controls structure) is set to 1.
 
 The injection of LW candidates is not allowed for ```PD_PASS_0```.
 
-In mode decision, the picture-level flag wm_level controls the complexity-quality tradeoffs associated with the LW feature.
-The flag is set is set in ```signal_derivation_mode_decision_config_kernel_oq``` and control the LW optimization signals listed in
-Table 2 below.
-
-The wm_level is set to zero for Intra pictures or when ```error_resilient_mode``` is enabled or when ```frame_super-resolution``` is enabled.
+In mode decision, the picture-level flag wm_level controls the
+complexity-quality tradeoffs associated with the LW feature. The flag is set is
+set in ```signal_derivation_mode_decision_config_kernel_oq``` and control the
+LW optimization signals listed in Table 2 below.
+
+The wm_level is set to zero for Intra pictures or when
+```error_resilient_mode``` is enabled or when ```frame_super-resolution``` is
+enabled.
 
 ##### Table 2. Optimization signals associated with the LW feature.
 |**Signal**|**Level**|**Description**|
@@ -295,19 +305,25 @@ The wm_level is set to zero for Intra pi
 |use_wm_for_mvp|Super-block|Allow/disallow the injection of MVP-based LW candidates|
 |num_new_mv_refinement|Super-block|Define the number of refinement positions around the NEW_MVs [0..12]|
 
-## 4.  Signaling
+## 4. Signaling
 
-The configuration flag ```enable_local_warp_flag``` controls the encoder use of LW at the sequence level.
-At the frame level, the use of LW is controlled by ```allow_warped_motion```. At the block level, the use of LW is signaled by the syntax element ```motion_mode```,
-which indicates the type of motion for a block: simple translation, OBMC, or warped motion.
+The configuration flag ```enable_local_warp_flag``` controls the encoder use of
+LW at the sequence level. At the frame level, the use of LW is controlled by
+```allow_warped_motion```. At the block level, the use of LW is signaled by the
+syntax element ```motion_mode```, which indicates the type of motion for a
+block: simple translation, OBMC, or warped motion.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## Reference
 [1] Sarah Parker, Yue Chen, David Barker, Peter de Rivaz, Debargha Mukherjee,
-“Global and Locally Adaptive Warped Motion Compensation in Video Compression,”  IEEE International Conference on Image Processing (ICIP), pp. 275-279, 2017.
+“Global and Locally Adaptive Warped Motion Compensation in Video Compression,” IEEE International Conference on Image Processing (ICIP), pp. 275-279, 2017.
 
-[2] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James  Bankoski,
+[2] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski,
 “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Mode-Decision.md 1.2.0+dfsg-2/Docs/Appendix-Mode-Decision.md
--- 0.9.1+dfsg-1/Docs/Appendix-Mode-Decision.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Mode-Decision.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,10 +1,24 @@
+[Top level](../README.md)
+
 # Mode Decision and Encode Pass Appendix
 
 ## High Level Description
 
-The Mode Decision process encapsulates the partitioning decision, mode decisions, and conformant AV1 encoding.  The partitioning decision (PD) stages output the final partitioning of each SB, along with the final mode decision for each block.  Mode decision (MD) happens within each PD stage and is where many of the encoder tasks such as Intra Prediction, Motion Compensated Prediction, Transform, and Quantization are performed.  Finally, the final partitioning and mode decisions are passed to the Encode Pass, where conformant reconstructed samples are generated and final mode info is stored, to be passed to in-loop filters and entropy coding.  The flow of the  Mode Decision process is summarized in Figure 1.
-
-The Mode Decision  process takes as input the motion vector and distortion estimations from the Motion Estimation process and the picture-level QP from the Rate Control process. The  Mode Decision process operates on an SB basis (i.e. each step in the Mode Decision  process is performed for each SB).
+The Mode Decision process encapsulates the partitioning decision, mode
+decisions, and conformant AV1 encoding. The partitioning decision (PD) stages
+output the final partitioning of each SB, along with the final mode decision
+for each block. Mode decision (MD) happens within each PD stage and is where
+many of the encoder tasks such as Intra Prediction, Motion Compensated
+Prediction, Transform, and Quantization are performed. Finally, the final
+partitioning and mode decisions are passed to the Encode Pass, where conformant
+reconstructed samples are generated and final mode info is stored, to be passed
+to in-loop filters and entropy coding. The flow of the Mode Decision process is
+summarized in Figure 1.
+
+The Mode Decision process takes as input the motion vector and distortion
+estimations from the Motion Estimation process and the picture-level QP from
+the Rate Control process. The Mode Decision process operates on an SB basis
+(i.e. each step in the Mode Decision process is performed for each SB).
 
 ![md_figure1](./img/md_figure1.png)
 
@@ -12,15 +26,43 @@ The Mode Decision  process takes as inpu
 
 ### Partitioning Decision
 
-Given the large number of block sizes and the large number of modes that could be considered for each block, it would be computationally very expensive to evaluate all options using all available prediction and coding tools to converge on the final partitioning and coding modes. Consequently, a staged decision approach is considered in SVT-AV1 as shown in Figure 2.  Each PD stage performs MD to determine the best partitions to consider (based on rate distortion optimization (RDO) analysis).  The tools used in MD depend on the PD stage being performed.
-
-The process starts with the very large number ![md_math1](./img/md_math1.png) of partitions at the input of partitioning decision stage 0 (PD Stage 0). At this stage, very elementary MD tools and performance measures are used in evaluating the fitness of the different partitions. The best ![md_math2](./img/md_math2.png) partitions are selected and passed on to PD stage 1. More sophisticated MD prediction and performance measure tools are considered in PD Stage 1 to evaluate all the ![md_math3](./img/md_math3.png) input partitions and select the top ![md_math4](./img/md_math4.png) from among the tested ![md_math3](./img/md_math3.png) partitions. The same idea is applied in subsequent steps until PD Stage n where ![md_math5](./img/md_math5.png) partitions will be tested.  The output of the final (nth) PD stage is the final partition and its corresponding coding modes that are selected.
+Given the large number of block sizes and the large number of modes that could
+be considered for each block, it would be computationally very expensive to
+evaluate all options using all available prediction and coding tools to
+converge on the final partitioning and coding modes. Consequently, a staged
+decision approach is considered in SVT-AV1 as shown in Figure 2. Each PD stage
+performs MD to determine the best partitions to consider (based on rate
+distortion optimization (RDO) analysis). The tools used in MD depend on the PD
+stage being performed.
+
+The process starts with the very large number ![md_math1](./img/md_math1.png)
+of partitions at the input of partitioning decision stage 0 (PD Stage 0). At
+this stage, very elementary MD tools and performance measures are used in
+evaluating the fitness of the different partitions. The best
+![md_math2](./img/md_math2.png) partitions are selected and passed on to PD
+stage 1. More sophisticated MD prediction and performance measure tools are
+considered in PD Stage 1 to evaluate all the ![md_math3](./img/md_math3.png)
+input partitions and select the top ![md_math4](./img/md_math4.png) from among
+the tested ![md_math3](./img/md_math3.png) partitions. The same idea is applied
+in subsequent steps until PD Stage n where ![md_math5](./img/md_math5.png)
+partitions will be tested. The output of the final (nth) PD stage is the final
+partition and its corresponding coding modes that are selected.
 
 ![md_figure2](./img/md_figure2.png)
 
 ##### Figure 2. Partitioning decision stages.
 
-An illustration of the different processing details that can take place in each PD stage are given in Figure 3. In this example, PD Stage 0 is based on the ME data. Candidate cost, which includes the MV rate, is used in making decisions to select the best subset of candidate partitions to pass on to PD Stage 1. The latter may involve more precise prediction tools and more accurate cost calculations as part of the MD process to select an even smaller subset of partition candidates to pass on to PD stage 2. For example, PD Stage 1 may use more accurate interpolation filters in the sub-pel search. The same idea is applied in subsequent stages. In the last stage, only very few candidates as considered at the input and usually the full set of high-performance prediction tools are used to finalize the selected partitions and modes.
+An illustration of the different processing details that can take place in each
+PD stage are given in Figure 3. In this example, PD Stage 0 is based on the ME
+data. Candidate cost, which includes the MV rate, is used in making decisions
+to select the best subset of candidate partitions to pass on to PD Stage 1. The
+latter may involve more precise prediction tools and more accurate cost
+calculations as part of the MD process to select an even smaller subset of
+partition candidates to pass on to PD stage 2. For example, PD Stage 1 may use
+more accurate interpolation filters in the sub-pel search. The same idea is
+applied in subsequent stages. In the last stage, only very few candidates as
+considered at the input and usually the full set of high-performance prediction
+tools are used to finalize the selected partitions and modes.
 
 ![md_figure3](./img/md_figure3.png)
 
@@ -28,11 +70,34 @@ An illustration of the different process
 
 ### Mode Decision
 
-The mode decision takes place in each of the PD Stages 0 to n (where n=1 in the current implementation).  The mode decision tasks performed in each PD stage are structured as shown in Figure 4.  As with the partitioning decisions, multiple MD stages are involved in each PD stage, where the complexity of the MD stages increases from MD Stage 0 to MD Stage n due to the use of more accurate prediction tools and more accurate performance measures.  For each block in the PD stage, MD is performed to determine the best coding mode and cost for the block.
-
-The input candidates to the MD stages are grouped according to classes. In the current implementation, there are four classes corresponding to Intra, Inter (NEWMV and NEW_NEWMV), MV Pred (Nearest, Near…) and Palette prediction candidates.  Once the input candidates to a given MD stage are processed, only the best among the processed candidates are passed on to the next MD stage, hence reducing the number of candidates to be processed in the subsequent stage. In the process, some classes might be retired (not considered in subsequent MD stages) if the performance of their corresponding candidates is not satisfactory. The main idea behind introducing candidate classes is to ensure that important types of candidates are given a chance to be present in the final MD stage and to compete at that stage against the best from other candidate classes.
-
-It should be noted that the prediction tools considered in MD are not necessarily conformant tools, as the objective of MD is to produce partitioning and mode decisions, and not necessarily residuals to be coded and transmitted in the bitstream, which is the task performed by the Encode Pass discussed next.  The exception is that final MD stage must be conformant when the Encode Pass is bypassed (controlled using the ```bypass_encdec``` signal).
+The mode decision takes place in each of the PD Stages 0 to n (where n=1 in the
+current implementation). The mode decision tasks performed in each PD stage are
+structured as shown in Figure 4. As with the partitioning decisions, multiple
+MD stages are involved in each PD stage, where the complexity of the MD stages
+increases from MD Stage 0 to MD Stage n due to the use of more accurate
+prediction tools and more accurate performance measures. For each block in the
+PD stage, MD is performed to determine the best coding mode and cost for the
+block.
+
+The input candidates to the MD stages are grouped according to classes. In the
+current implementation, there are four classes corresponding to Intra, Inter
+(NEWMV and NEW_NEWMV), MV Pred (Nearest, Near…) and Palette prediction
+candidates. Once the input candidates to a given MD stage are processed, only
+the best among the processed candidates are passed on to the next MD stage,
+hence reducing the number of candidates to be processed in the subsequent
+stage. In the process, some classes might be retired (not considered in
+subsequent MD stages) if the performance of their corresponding candidates is
+not satisfactory. The main idea behind introducing candidate classes is to
+ensure that important types of candidates are given a chance to be present in
+the final MD stage and to compete at that stage against the best from other
+candidate classes.
+
+It should be noted that the prediction tools considered in MD are not
+necessarily conformant tools, as the objective of MD is to produce partitioning
+and mode decisions, and not necessarily residuals to be coded and transmitted
+in the bitstream, which is the task performed by the Encode Pass discussed
+next. The exception is that final MD stage must be conformant when the Encode
+Pass is bypassed (controlled using the ```bypass_encdec``` signal).
 
 ![md_figure4](./img/md_figure4.png)
 
@@ -40,44 +105,73 @@ It should be noted that the prediction t
 
 ### Encode Pass
 
-The encode pass takes as input the selected partitioning and coding modes from mode decision for each SB and produces quantized transform coefficients for the residuals and syntax elements that would be included in an AV1 conformant bit stream. The encode pass includes intra prediction, motion compensation, transform, quantization, inverse quantization, inverse transform, and reconstruction. All the prediction tools considered in the encode pass are conformant tools.
+The encode pass takes as input the selected partitioning and coding modes from
+mode decision for each SB and produces quantized transform coefficients for the
+residuals and syntax elements that would be included in an AV1 conformant bit
+stream. The encode pass includes intra prediction, motion compensation,
+transform, quantization, inverse quantization, inverse transform, and
+reconstruction. All the prediction tools considered in the encode pass are
+conformant tools.
 
 ## Optimizations
 
-Optimizations for PD will reduce the number of partitions (i.e. blocks) tested.  This can be done by limiting allowable block sizes and/or reducing non-square (NSQ) shapes.  Optimizations for MD will reduce the processing needed for each block, by reducing the complexity of the coding tools used in the MD stages.  The quality/complexity trade-offs are achieved by optimizing the number of blocks tested and the processing required per block.
+Optimizations for PD will reduce the number of partitions (i.e. blocks) tested.
+This can be done by limiting allowable block sizes and/or reducing non-square
+(NSQ) shapes. Optimizations for MD will reduce the processing needed for each
+block, by reducing the complexity of the coding tools used in the MD stages.
+The quality/complexity trade-offs are achieved by optimizing the number of
+blocks tested and the processing required per block.
 
 ### Depth Refinement
 
-Depth refinement aims to limit the number of partitions passed from PD stage (N-1) to N.  The output from PD stage (N-1) will be the best partition, based on the MD tools used in that PD stage.  The ```depth_level``` signal controls the number of additional depths that are sent to PD stage N for evaluation.  The available depths are shown in the Table 1.
+Depth refinement aims to limit the number of partitions passed from PD stage
+(N-1) to N. The output from PD stage (N-1) will be the best partition, based on
+the MD tools used in that PD stage. The ```depth_level``` signal controls the
+number of additional depths that are sent to PD stage N for evaluation. The
+available depths are shown in the Table 1.
 
 ##### Table 1. Definitions of the partitioning depths.
 
-| **Depth**   | **SQ block size**|
-| ----------- | -----------------|
-| 0           | 128x128          |
-| 1           | 64x64            |
-| 2           | 32x32            |
-| 3           | 16x16            |
-| 4           | 8x8              |
-| 5           | 4x4              |
-
-For example, if PD stage (N-1) selects depth 2 (i.e. a 32x32 block) as the best partitioning, then depth level may specify to test (-1) and (+1) depth from the current depth in PD stage N.  That means PD stage N would test depth 1, 2, and 3.  The best partition depth from PD stage (N-1) is always tested in PD stage N.
+| **Depth**   | **SQ block size** |
+| ----------- | ----------------- |
+| 0           | 128x128           |
+| 1           | 64x64             |
+| 2           | 32x32             |
+| 3           | 16x16             |
+| 4           | 8x8               |
+| 5           | 4x4               |
+
+For example, if PD stage (N-1) selects depth 2 (i.e. a 32x32 block) as the best
+partitioning, then depth level may specify to test (-1) and (+1) depth from the
+current depth in PD stage N. That means PD stage N would test depth 1, 2, and
+3. The best partition depth from PD stage (N-1) is always tested in PD stage N.
 
 The allowable refinements are shown in the Table 2 below.
 
 ##### Table 2. Settings for depth_level.
 
-| **Depth_level**   | **Action**                                                                               |
-| ----------------- | -----------------------------------------------------------------------------------------|
-| 0                 | Test only the depth selected by PDN-1 in PDN                                             |
-| 1                 | Test the depth selected by PDN-1 in PDN as well as one depth below and one depth above   |
-| 2                 | Test the depth selected by PDN-1 in PDN as well as two depths below and two depths above |
-
-The signal ```pic_block_based_depth_refinement_level``` acts to reduce the number of partition depths passed from PD stage (N-1) to PD stage N.  The feature uses information from ME and previous PD stages (especially the relative costs of each depth) to eliminate unlikely partition depths.
+| **Depth_level**   | **Action**                                                                                |
+| ----------------- | ----------------------------------------------------------------------------------------- |
+| 0                 | Test only the depth selected by PDN-1 in PDN                                              |
+| 1                 | Test the depth selected by PDN-1 in PDN as well as one depth below and one depth above    |
+| 2                 | Test the depth selected by PDN-1 in PDN as well as two depths below and two depths above  |
+
+The signal ```pic_block_based_depth_refinement_level``` acts to reduce the
+number of partition depths passed from PD stage (N-1) to PD stage N. The
+feature uses information from ME and previous PD stages (especially the
+relative costs of each depth) to eliminate unlikely partition depths.
 
 ### Depth Removal
 
-Depth removal (signal ```pic_depth_removal_level```) aims to reduce the number of partition depths passed to PD0 (so some depths would not be tested at all by any PD stage).  The depth removal algorithm uses the ME distortions of each depth to generate a cost estimate for each depth.  The ME distortions are normalized so that each distortion represents a 64x64 area (i.e. a complete SB).  If the absolute cost of a depth is low (below a threshold set based on the level of ```pic_depth_removal_level```) then all depths below that depth are skipped.  Additionally, if the relative cost between depths is low, then lower depths may be skipped.  For example, if:
+Depth removal (signal ```pic_depth_removal_level```) aims to reduce the number
+of partition depths passed to PD0 (so some depths would not be tested at all by
+any PD stage). The depth removal algorithm uses the ME distortions of each
+depth to generate a cost estimate for each depth. The ME distortions are
+normalized so that each distortion represents a 64x64 area (i.e. a complete
+SB). If the absolute cost of a depth is low (below a threshold set based on the
+level of ```pic_depth_removal_level```) then all depths below that depth are
+skipped. Additionally, if the relative cost between depths is low, then lower
+depths may be skipped. For example, if:
 
 ![md_math6](./img/md_math6.png)
 
@@ -85,37 +179,66 @@ then block sizes below 16x16 will not be
 
 ### Light MD Paths
 
-Some PD stages may employ so-called light MD paths.  The light paths use lighter MD prediction tools than would regularly be used, towards reducing the computations performed at a given PD stage. The MD path for each PD will be selected based on previously available information (such as ME information or results from previous PD stages).  In this way, the prediction and coding of “easy” SBs that require only light prediction tools can be performed by fewer computations by not using more complex prediction tools.
+Some PD stages may employ so-called light MD paths. The light paths use lighter
+MD prediction tools than would regularly be used, towards reducing the
+computations performed at a given PD stage. The MD path for each PD will be
+selected based on previously available information (such as ME information or
+results from previous PD stages). In this way, the prediction and coding of
+“easy” SBs that require only light prediction tools can be performed by fewer
+computations by not using more complex prediction tools.
 
-The MD paths are controlled by the signals; ```pic_pd0_level``` (for MD within PD stage 0) and ```pic_lpd1_lvl``` (for MD within PD stage 1).
+The MD paths are controlled by the signals; ```pic_pd0_level``` (for MD within
+PD stage 0) and ```pic_lpd1_lvl``` (for MD within PD stage 1).
 
 ### Bypass Encode Pass
 
-The ```bypass_encdec``` signal allows the Encode Pass to be skipped.  When this signal is enabled, the final MD stage of the final PD stage must be conformant, i.e. it must produce AV1 conformant reconstructed samples and signal all coding mode information required to produce a conformant bitstream.
+The ```bypass_encdec``` signal allows the Encode Pass to be skipped. When this
+signal is enabled, the final MD stage of the final PD stage must be conformant,
+i.e. it must produce AV1 conformant reconstructed samples and signal all coding
+mode information required to produce a conformant bitstream.
 
 ## Neighbor Array
 
-The Neighbor Array is a structure that manages neighboring block information in the  Mode Decision process by continuously updating its memory locations throughout the encoding process. The Neighbor Array replaces the normal entire-picture block array solutions that are used to access neighboring block data. There are three neighbor array types: Top block, Left block, and Top-left block as illustrated in Figure 5. Also note that the neighbor array design can store either mode information directly or reference data indirectly (e.g. pointers).
+The Neighbor Array is a structure that manages neighboring block information in
+the Mode Decision process by continuously updating its memory locations
+throughout the encoding process. The Neighbor Array replaces the normal
+entire-picture block array solutions that are used to access neighboring block
+data. There are three neighbor array types: Top block, Left block, and Top-left
+block as illustrated in Figure 5. Also note that the neighbor array design can
+store either mode information directly or reference data indirectly (e.g.
+pointers).
 
 ![md_figure5](./img/md_figure5.png)
 
 ##### Figure 5. Neighbor array structure.
 
-The Neighbor Array design hinges on how its memory locations are accessed and updated. The Left Neighbor Array is approximately one SB tall and is accessed by using the SB y-location of the current block. The Top Neighbor Array is approximately one Picture wide and is accessed by using the x-location of the current block. The Top-Left Neighbor Array is accessed as seen in Figure 5.
+The Neighbor Array design hinges on how its memory locations are accessed and
+updated. The Left Neighbor Array is approximately one SB tall and is accessed
+by using the SB y-location of the current block. The Top Neighbor Array is
+approximately one Picture wide and is accessed by using the x-location of the
+current block. The Top-Left Neighbor Array is accessed as seen in Figure 5.
 
-The basic processing flow is that at the beginning of a picture, each of the Neighbor Arrays is reset. As each block is completed, its mode information or reference information is written to each of the Neighbor Arrays using the appropriate index. The Neighbor Array update and access flow can be described as follows:
+The basic processing flow is that at the beginning of a picture, each of the
+Neighbor Arrays is reset. As each block is completed, its mode information or
+reference information is written to each of the Neighbor Arrays using the
+appropriate index. The Neighbor Array update and access flow can be described
+as follows:
 
-1.  Construct neighbor information using the Neighbor Arrays
+1. Construct neighbor information using the Neighbor Arrays
 
-2.  Block Mode Decision
+2. Block Mode Decision
 
-3.  Update each of the Neighbor Arrays using the current block location
+3. Update each of the Neighbor Arrays using the current block location
 
-4.  If at a partitioning (Quadtree or otherwise) mode decision point, update the neighbor array
+4. If at a partitioning (Quadtree or otherwise) mode decision point, update the neighbor array
 
-5.  Proceed to the next block
+5. Proceed to the next block
 
-This process is illustrated in Figure 6. The arrows represent the block z-scan coding order and the colors represent each block’s mode information. The three neighbor arrays contain a snapshot of the mode information currently stored in each block position at the time that the block labeled “Current Block” is being processed.
+This process is illustrated in Figure 6. The arrows represent the block z-scan
+coding order and the colors represent each block’s mode information. The three
+neighbor arrays contain a snapshot of the mode information currently stored in
+each block position at the time that the block labeled “Current Block” is being
+processed.
 
 ![md_figure6](./img/md_figure6.png)
 
@@ -123,4 +246,8 @@ This process is illustrated in Figure 6.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Open-Loop-Motion-Estimation.md 1.2.0+dfsg-2/Docs/Appendix-Open-Loop-Motion-Estimation.md
--- 0.9.1+dfsg-1/Docs/Appendix-Open-Loop-Motion-Estimation.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Open-Loop-Motion-Estimation.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,48 +1,73 @@
+[Top level](../README.md)
+
 # Open-Loop Motion Estimation
 
-The Motion Estimation (ME) process generates inter-prediction candidates using highly parallelizable, open loop, neighbor-independent methods.
-In the current SVT-AV1 encoder, the ME process is based on the input pictures, i.e. the reference pictures are replaced by the corresponding source pictures.
-As a result, the ME is an open loop operation. The Motion Estimation (ME) process has access to the current input picture as well as to the input pictures the
-current picture uses as references according to the hierarchical prediction structure under consideration. The ME process is multithreaded, so pictures can be
-processed out of order as long as corresponding reference input pictures are available. The ME process generates motion estimation information for all square
-blocks starting at 8x8 size up to the superblock (SB) size. The motion vector information is generated at full-pel precision. The candidates’ costs generated
-in the ME processes are further refined in downstream processes and as more neighbor information becomes available allowing for more accurate costs to be
-calculated.
-
-ME process involves four components: Pre-Hierarchical Motion Estimation (pre-HME), Hierarchical Motion Estimation (HME),
-Search Center Selection, and Motion Estimation (ME). The pre-HME/HME performs a quick search on down-sampled pictures to converge to a
-candidate search center for full-pel motion estimation search at the full picture resolution. Search center selection selects a single search center from several
-pre-HME/HME candidates and external search center candidates (in the current configuration (0,0) is always considered as a search centre candidate).
-One goal of this design is to eliminate the need for large search areas with search center selection and then to fully search a smaller search area with the
-refinement stage. Motion Estimation finds the best motion vector around the SB search center for each of the partitions being considered.
+The Motion Estimation (ME) process generates inter-prediction candidates using
+highly parallelizable, open loop, neighbor-independent methods. In the current
+SVT-AV1 encoder, the ME process is based on the input pictures, i.e. the
+reference pictures are replaced by the corresponding source pictures. As a
+result, the ME is an open loop operation. The Motion Estimation (ME) process
+has access to the current input picture as well as to the input pictures the
+current picture uses as references according to the hierarchical prediction
+structure under consideration. The ME process is multithreaded, so pictures can
+be processed out of order as long as corresponding reference input pictures are
+available. The ME process generates motion estimation information for all
+square blocks starting at 8x8 size up to the superblock (SB) size. The motion
+vector information is generated at full-pel precision. The candidates’ costs
+generated in the ME processes are further refined in downstream processes and
+as more neighbor information becomes available allowing for more accurate costs
+to be calculated.
+
+ME process involves four components: Pre-Hierarchical Motion Estimation
+(pre-HME), Hierarchical Motion Estimation (HME), Search Center Selection, and
+Motion Estimation (ME). The pre-HME/HME performs a quick search on down-sampled
+pictures to converge to a candidate search center for full-pel motion
+estimation search at the full picture resolution. Search center selection
+selects a single search center from several pre-HME/HME candidates and external
+search center candidates (in the current configuration (0,0) is always
+considered as a search centre candidate). One goal of this design is to
+eliminate the need for large search areas with search center selection and then
+to fully search a smaller search area with the refinement stage. Motion
+Estimation finds the best motion vector around the SB search center for each of
+the partitions being considered.
 
 
 ## Description of the Algorithm
 
 ### Pre-Hierarchical Motion Estimation (pre-HME)
 
-The goal of pre-HME is to catch very high motion along the horizontal or vertical directions.
-A narrow, long search is performed along each direction on the sixteenth-downsampled pictures
-(i.e. downsampling by a factor of 4 in each direction) as shown in Figure 1.
-
-The best overall pre-HME search centre/SAD pair is saved from the search.  After HME-level-0,
-if the best pre-HME result is better than any of the HME-level-0 search centres (based on SAD), the worst HME-level-0 search centre is replaced by the best
-pre-HME result (to be considered in the next HME stages).  As such, HME-level-1 must be enabled to use pre-HME.
+The goal of pre-HME is to catch very high motion along the horizontal or
+vertical directions. A narrow, long search is performed along each direction on
+the sixteenth-downsampled pictures (i.e. downsampling by a factor of 4 in each
+direction) as shown in Figure 1.
+
+The best overall pre-HME search centre/SAD pair is saved from the search. After
+HME-level-0, if the best pre-HME result is better than any of the HME-level-0
+search centres (based on SAD), the worst HME-level-0 search centre is replaced
+by the best pre-HME result (to be considered in the next HME stages). As such,
+HME-level-1 must be enabled to use pre-HME.
 
 ![me_fig1](./img/me_fig1.png)
 
-##### Figure 1. Example of a pre-HME search area around point X.  The search region is long and narrow in the horizontal and vertical direction.
+##### Figure 1. Example of a pre-HME search area around point X. The search region is long and narrow in the horizontal and vertical direction.
 
 ### Hierarchical Motion Estimation (HME)
 
-Hierarchical Motion Estimation (HME) takes as input an enhanced input picture and reference picture and produces a search center for each SB, to be searched at ME.
-The enhanced input picture is the temporally filtered source picture.
-The HME consists of up to three stages: a one-sixteenth resolution Level-0 full search, a one-quarter resolution Level-1 refinement search, and a base-resolution
-Level-2 refinement search as depicted in Figure 2. In addition, the total search area is subdivided into N-search areas, where each of the Level-0, Level-1,
-and Level-2 searches are performed independently to produce N-search centers. Of the N-search centers, one search center is finally selected. Having multiple
-search centers prevents the Level-0 and Level-1 searches from choosing local minima and missing the true center of motion completely. Currently, N is 4 for all
-presets (the search region at HME-level-0 is divided into a 2x2 grid).  All search and selection decisions are based on a pure SAD distortion metric.
-Figure 3 depicts an example HME full search and refinement data flow through Level-0, Level-1, and Level-2.
+Hierarchical Motion Estimation (HME) takes as input an enhanced input picture
+and reference picture and produces a search center for each SB, to be searched
+at ME. The enhanced input picture is the temporally filtered source picture.
+The HME consists of up to three stages: a one-sixteenth resolution Level-0 full
+search, a one-quarter resolution Level-1 refinement search, and a
+base-resolution Level-2 refinement search as depicted in Figure 2. In addition,
+the total search area is subdivided into N-search areas, where each of the
+Level-0, Level-1, and Level-2 searches are performed independently to produce
+N-search centers. Of the N-search centers, one search center is finally
+selected. Having multiple search centers prevents the Level-0 and Level-1
+searches from choosing local minima and missing the true center of motion
+completely. Currently, N is 4 for all presets (the search region at HME-level-0
+is divided into a 2x2 grid). All search and selection decisions are based on a
+pure SAD distortion metric. Figure 3 depicts an example HME full search and
+refinement data flow through Level-0, Level-1, and Level-2.
 
 ![me_fig2](./img/me_fig2.png)
 
@@ -54,33 +79,42 @@ Figure 3 depicts an example HME full sea
 
 ### Search Centre Selection
 
-Search Center Selection chooses the best SB search center for Motion Estimation based on a SAD distortion metric. Search center candidates may be generated from HME
-or other sources of candidate search centers.
-A diagram showing search center selection and the Motion Estimation is given in Figure 4.
+Search Center Selection chooses the best SB search center for Motion Estimation
+based on a SAD distortion metric. Search center candidates may be generated
+from HME or other sources of candidate search centers. A diagram showing search
+center selection and the Motion Estimation is given in Figure 4.
 
 ![me_fig4](./img/me_fig4.png)
 
-##### Figure 4: Search centre selection and motion estimation.  Reference N search centre candidates come from HME (or other sources, if applicable).
+##### Figure 4: Search centre selection and motion estimation. Reference N search centre candidates come from HME (or other sources, if applicable).
 
 ### Motion Estimation (ME)
 
-Motion Estimation (ME) takes as input an enhanced input picture, reference picture, and search center for each SB. ME produces Motion Vectors (MVs),
-one for each of the 8x8 and larger square blocks in an SB. ME is an integer full search around the search centre on the full resolution picture and is performed
-for square blocks only.
-The integer full search produces an integer MV candidate for each 8x8 and larger square blocks and the SB SAD estimation using the base 8x8 block SAD data.
-As shown in Figure 5, the ME search is performed on 8x8 blocks, and the MV/SAD information for larger block sizes are derived from the 8x8 results (by adding together all the 8x8 SADs that make up a given block).
+Motion Estimation (ME) takes as input an enhanced input picture, reference
+picture, and search center for each SB. ME produces Motion Vectors (MVs), one
+for each of the 8x8 and larger square blocks in an SB. ME is an integer full
+search around the search centre on the full resolution picture and is performed
+for square blocks only. The integer full search produces an integer MV
+candidate for each 8x8 and larger square blocks and the SB SAD estimation using
+the base 8x8 block SAD data. As shown in Figure 5, the ME search is performed
+on 8x8 blocks, and the MV/SAD information for larger block sizes are derived
+from the 8x8 results (by adding together all the 8x8 SADs that make up a given
+block).
 
 ![me_fig5](./img/me_fig_new.png)
 
-##### Figure 5: ME search for the case of a 64x64 SB.  The SAD is computed for each 8x8 block.  For larger square blocks, the 8x8 SADs are summed to produce the output SAD of the larger blocks.
+##### Figure 5: ME search for the case of a 64x64 SB. The SAD is computed for each 8x8 block. For larger square blocks, the 8x8 SADs are summed to produce the output SAD of the larger blocks.
 
 ## Implementation of the Algorithm
 
-**Inputs**: Source pictures: the current frame and all its references in one-sixteenth resolution, one-quarter resolution, and base resolution
+**Inputs**: Source pictures: the current frame and all its references in
+one-sixteenth resolution, one-quarter resolution, and base resolution
 
-**Outputs**: MVs for each 8x8 and larger square blocks and SB distortion (SAD) values.
+**Outputs**: MVs for each 8x8 and larger square blocks and SB distortion (SAD)
+values.
 
-The flow of data in open-loop ME is illustrated in Figure 6, along with the relevant functions that are associated with each part of the algorithm.
+The flow of data in open-loop ME is illustrated in Figure 6, along with the
+relevant functions that are associated with each part of the algorithm.
 
 ![me_fig6](./img/me_fig5.png)
 
@@ -99,30 +133,46 @@ The flow of data in open-loop ME is illu
 
 ### Search Area Sizes
 
-The search areas of pre-HME, HME and ME can be adjusted for quality/complexity trade-offs.
-Larger search areas will capture more motion, thereby improving quality, while smaller search areas will require less computation and favour speedups.
-
-For ME and HME, a maximum and a minimum search area are specified.
-Closer frames are expected to have less motion relative to the current frame, thus the search areas are scaled-up more for distant frames.
-The actual area searched for each reference frame depends on the distance of the reference frame to the current frame (and will always be greater than or equal to the minimum area and less than or equal to the maximum area).
-Closer frames are expected to have less motion relative to the current frame, thus the search areas are scaled-up more for distant frames.
-
-Pre-HME level is set with ```prehme_level```, and the search areas are set in ```set_prehme_ctrls()```.  ME and HME search areas are set in ```set_me_hme_params_oq()```.
+The search areas of pre-HME, HME and ME can be adjusted for quality/complexity
+trade-offs. Larger search areas will capture more motion, thereby improving
+quality, while smaller search areas will require less computation and favour
+speedups.
+
+For ME and HME, a maximum and a minimum search area are specified. Closer
+frames are expected to have less motion relative to the current frame, thus the
+search areas are scaled-up more for distant frames. The actual area searched
+for each reference frame depends on the distance of the reference frame to the
+current frame (and will always be greater than or equal to the minimum area and
+less than or equal to the maximum area). Closer frames are expected to have
+less motion relative to the current frame, thus the search areas are scaled-up
+more for distant frames.
+
+Pre-HME level is set with ```prehme_level```, and the search areas are set in
+```set_prehme_ctrls()```. ME and HME search areas are set in
+```set_me_hme_params_oq()```.
 
 ### HME Level 0 Search Area Adjustment
 
-The HME-level-0 search region can also be adjusted. HME adjustment is done based on the distance of the reference frames and the results of previously processed
-reference frames.  Reducing the search area based on distance from the current frame is controlled by ```distance_based_hme_resizing```.
-
-Search region adjustment is also done based on the HME search centre chosen by the first reference frame (list 0, index 0).
-Subsequent frames can see whether the motion on the first reference frame was mainly vertical, mainly horizontal, or still.
-If the motion is vertical (horizontal), the horizontal (vertical) search area will be reduced.  If the motion is still (i.e. low motion) then both dimensions are
-reduced.  The thresholds for characterising motion as vertical, horizontal, or still are set by ```reduce_hme_l0_sr_th_min``` and ```reduce_hme_l0_sr_th_max```.
+The HME-level-0 search region can also be adjusted. HME adjustment is done
+based on the distance of the reference frames and the results of previously
+processed reference frames. Reducing the search area based on distance from the
+current frame is controlled by ```distance_based_hme_resizing```.
+
+Search region adjustment is also done based on the HME search centre chosen by
+the first reference frame (list 0, index 0). Subsequent frames can see whether
+the motion on the first reference frame was mainly vertical, mainly horizontal,
+or still. If the motion is vertical (horizontal), the horizontal (vertical)
+search area will be reduced. If the motion is still (i.e. low motion) then both
+dimensions are reduced. The thresholds for characterising motion as vertical,
+horizontal, or still are set by ```reduce_hme_l0_sr_th_min``` and
+```reduce_hme_l0_sr_th_max```.
 
 ### ME Search Area Adjustment
 
-The ME search region can be adjusted based on the HME results.  Specifically, if the HME distortions are low, the block is expected to have low motion, so the ME search region is reduced.
-Similarly, if the HME output search centre is close to (0,0) the ME search region can be reduced.
+The ME search region can be adjusted based on the HME results. Specifically, if
+the HME distortions are low, the block is expected to have low motion, so the
+ME search region is reduced. Similarly, if the HME output search centre is
+close to (0,0) the ME search region can be reduced.
 
 ```
 IF (HME_search_centre_x <= MV_TH &&
@@ -133,7 +183,8 @@ ELSE IF (HME_SAD < general_sad_th)
     Divide ME search width and height by general_divisor
 ```
 
-Where the variables in the pseudo-code above correspond to the following signals in the code (set in ```set_me_sr_adjustment_ctrls()```):
+Where the variables in the pseudo-code above correspond to the following
+signals in the code (set in ```set_me_sr_adjustment_ctrls()```):
 
 | **Signal in code**                   | **Name in pseudo-code** | **Description**                                                                      |
 | ------------------------------------ | ----------------------- | ------------------------------------------------------------------------------------ |
@@ -146,27 +197,36 @@ Where the variables in the pseudo-code a
 
 ### HME/ME-based reference pruning
 
-Reference frames can be pruned at early stages of open-loop ME to save the cost of searching them at subsequent, more expensive stages and MD.
-Pruning is performed on a per-SB basis after HME (before ME) and after ME (so that candidates will not be passed to MD).  Pruning decisions are based on the
+Reference frames can be pruned at early stages of open-loop ME to save the cost
+of searching them at subsequent, more expensive stages and MD. Pruning is
+performed on a per-SB basis after HME (before ME) and after ME (so that
+candidates will not be passed to MD). Pruning decisions are based on the
 relative SAD of each reference frame (relative to the best SAD), as follows:
 
 ![me_eqn1](./img/me_eqn1.png)
 
-Reference pruning controls are set in ```set_me_hme_ref_prune_ctrls()```, with the following controls:
+Reference pruning controls are set in ```set_me_hme_ref_prune_ctrls()```, with
+the following controls:
 
-| **Signal**                              | **Description**                                                                |
-| --------------------------------------  | ----------------------------------------------------------------------         |
-| enable_me_hme_ref_pruning               |                                                                                |
-| prune_ref_if_hme_sad_dev_bigger_than_th | TH used to prune references based on HME sad deviation                         |
-| prune_ref_if_me_sad_dev_bigger_than_th  | TH used to prune references based on ME sad deviation                          |
-| protect_closest_refs                    | If true, do not prune closest ref frames                                       |
+| **Signal**                              | **Description**                                                        |
+| --------------------------------------  | ---------------------------------------------------------------------- |
+| enable_me_hme_ref_pruning               |                                                                        |
+| prune_ref_if_hme_sad_dev_bigger_than_th | TH used to prune references based on HME sad deviation                 |
+| prune_ref_if_me_sad_dev_bigger_than_th  | TH used to prune references based on ME sad deviation                  |
+| protect_closest_refs                    | If true, do not prune closest ref frames                               |
 
 
 ### ME Early Exit
 
-If the (0,0) SAD is low, pre-HME and HME can be skipped, and the ME search area can be reduced.  The feature is controlled by ```me_early_exit_th```,
-which is the threshold used to determine if the (0,0) SAD is low enough to apply the optimizations.
+If the (0,0) SAD is low, pre-HME and HME can be skipped, and the ME search area
+can be reduced. The feature is controlled by ```me_early_exit_th```, which is
+the threshold used to determine if the (0,0) SAD is low enough to apply the
+optimizations.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Overlapped-Block-Motion-Compensation.md 1.2.0+dfsg-2/Docs/Appendix-Overlapped-Block-Motion-Compensation.md
--- 0.9.1+dfsg-1/Docs/Appendix-Overlapped-Block-Motion-Compensation.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Overlapped-Block-Motion-Compensation.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,10 +1,22 @@
+[Top level](../README.md)
+
 # Overlapped Block Motion Compensation
 
 ## 1. Description of the algorithm
 
-For a given block which uses single reference INTER prediction (i.e. a NEWMV candidate), the idea behind OBMC is to consider additional predictors based on neighboring block MVs, as those predictors may offer more accurate predictions for pixels near the block boundary. A final inter prediction is generated using a weighted sum of predictions based on the current block MV and predictions  based on neighboring block MVs. The weights favor the predictions based on the neighboring block MVs near the block boundary and favor the predictions based on the current block MV for the pixels away from the boundary.
-
-In general, OBMC produces better and smoother predictions near the boundary and *may help reduce blocking artifacts*. It may help also in the selection of larger inter partitions. OBMC is enabled only for inter blocks with single MV and with block sizes ≥ 8x8.
+For a given block which uses single reference INTER prediction (i.e. a NEWMV
+candidate), the idea behind OBMC is to consider additional predictors based on
+neighboring block MVs, as those predictors may offer more accurate predictions
+for pixels near the block boundary. A final inter prediction is generated using
+a weighted sum of predictions based on the current block MV and predictions
+based on neighboring block MVs. The weights favor the predictions based on the
+neighboring block MVs near the block boundary and favor the predictions based
+on the current block MV for the pixels away from the boundary.
+
+In general, OBMC produces better and smoother predictions near the boundary and
+*may help reduce blocking artifacts*. It may help also in the selection of
+larger inter partitions. OBMC is enabled only for inter blocks with single MV
+and with block sizes ≥ 8x8.
 
 The prediction algorithm for a given block proceeds as follows:
 
@@ -35,7 +47,7 @@ Step 1: Vertical blending (see Figure 2
 
 ##### Figure 2. Vertical blending of the prediction based on the blue 8x8 inter block.
 
-Step 2: Horizontal blending (See  Figure 3 below)
+Step 2: Horizontal blending (See Figure 3 below)
 
 - Consider now the area to the left of the light green block and that extends half-way into the yellow block (The area inside the dotted green line). Note that the orange area corresponds to the pixels that have already been updated in the vertical blending in step 1.
 - Generate inter prediction of the dotted green block using the inter mode for the green block.
@@ -82,11 +94,19 @@ A diagram of the OBMC-related function c
 
 ##### Figure 4. A diagram of the function calls leading to the generation and coding of OBMC candidates in MD and in the Encode Pass.
 
-The function ```inject_inter_candidates``` is used to generate the OBMC candidates to be injected in MD stage 0. The function ```av1_inter_prediction``` is used to generate OBMC prediction for each block in the MD stages.
-
-The steps involved in the generation of OBMC input candidates for MD stage 0 are as follows:
-
-1. Determine the number of top blocks and the number of blocks to the left of the current block that overlap with the current block (```svt_av1_count_overlappable_neighbors```). A top block is considered to overlap with the current block if it is an inter block. Similar idea applies to blocks to the left of the current block.
+The function ```inject_inter_candidates``` is used to generate the OBMC
+candidates to be injected in MD stage 0. The function
+```av1_inter_prediction``` is used to generate OBMC prediction for each block
+in the MD stages.
+
+The steps involved in the generation of OBMC input candidates for MD stage 0
+are as follows:
+
+1. Determine the number of top blocks and the number of blocks to the left of
+   the current block that overlap with the current block
+   (```svt_av1_count_overlappable_neighbors```). A top block is considered to
+   overlap with the current block if it is an inter block. Similar idea applies
+   to blocks to the left of the current block.
 
 2. Check if OBMC is allowed for the current block, based on the following criteria (```obmc_motion_mode_allowed```):
     - Block height and block width are both greater than or equal to 8.
@@ -99,15 +119,27 @@ The steps involved in the generation of
     - For each of the overlapping top blocks, prediction candidate for the current block based on the MV of the top block is generated. (```build_prediction_by_above_preds```)
     - For each of the overlapping top blocks, prediction candidate for the current block based on the MV of the top block is generated. (```build_prediction_by_left_preds```)
 
-   The predictions for the current block based on the MVs of the top blocks and those of the left blocks are prepared in the above steps to be used in subsequent stages when the actual OBMC prediction is computed.
-
-The generation of OBMC predictions using the function ```av1_inter_prediction``` in each MD stage proceeds along the same lines as in ```precompute_obmc_data``` to compute predictions for the current block based on MVs of the top and left blocks. Blending of those predictions is then performed to construct the OBMC prediction (see ```av1_build_obmc_inter_prediction```).
-
-Similarly, in the Encode Pass, the function av1_inter_prediction is called to generate the final OBMC encodings based on the final partitioning and modes for the neighboring blocks.
+   The predictions for the current block based on the MVs of the top blocks and
+   those of the left blocks are prepared in the above steps to be used in
+   subsequent stages when the actual OBMC prediction is computed.
+
+The generation of OBMC predictions using the function
+```av1_inter_prediction``` in each MD stage proceeds along the same lines as in
+```precompute_obmc_data``` to compute predictions for the current block based
+on MVs of the top and left blocks. Blending of those predictions is then
+performed to construct the OBMC prediction (see
+```av1_build_obmc_inter_prediction```).
+
+Similarly, in the Encode Pass, the function av1_inter_prediction is called to
+generate the final OBMC encodings based on the final partitioning and modes for
+the neighboring blocks.
 
 ## 3. Optimization of the algorithm
 
-The optimization of the OBMC algorithm is based on limiting the blocks where OBMC prediction is considered. The OBMC levels are controlled by the ```pic_obmc_mode flag```. Table 1 below provides a description of the quality-complexity tradeoffs in the algorithm.
+The optimization of the OBMC algorithm is based on limiting the blocks where
+OBMC prediction is considered. The OBMC levels are controlled by the
+```pic_obmc_mode flag```. Table 1 below provides a description of the
+quality-complexity tradeoffs in the algorithm.
 
 ##### Table 2. Description of the settings of the pic_obmc_mode flag.
 
@@ -119,18 +151,21 @@ The optimization of the OBMC algorithm i
 
 ## 4. Signaling
 
-The flag ```is_motion_mode_switchable specifyies``` if the motion mode can change from one block to another. As a result, a block-based field called ```motion_mode``` is sent to indicate, when set, that the type of motion for the block could be either SIMPLE_TRANSLATION, OBMC, or WARPED.
+The flag ```is_motion_mode_switchable specifyies``` if the motion mode can
+change from one block to another. As a result, a block-based field called
+```motion_mode``` is sent to indicate, when set, that the type of motion for
+the block could be either SIMPLE_TRANSLATION, OBMC, or WARPED.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
 \[1\] Yue Chen and Debargha Mukherjee, “Variable block-size overlapped block motion compensation in the next generation open-source video codec,” International Conference on Image Processing, pp. 938-942, 2017
 
-\[2\] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James  Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
-
-## Notes
-
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+\[2\] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski, “A Technical Overview of AV1,” Proceedings of the IEEE, vol. 109, no. 9, pp. 1435-1462, Sept. 2021.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Palette-Prediction.md 1.2.0+dfsg-2/Docs/Appendix-Palette-Prediction.md
--- 0.9.1+dfsg-1/Docs/Appendix-Palette-Prediction.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Palette-Prediction.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,6 +1,8 @@
+[Top level](../README.md)
+
 # Palette Prediction
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 A palette refers to a subset of the color space. Palette prediction is
 used to reduce the redundancy in coding pixel information within a given
@@ -29,7 +31,7 @@ indices then proceeds in a wavefront man
 
 ![palette_prediction_fig1](./img/palette_prediction_fig1.png)
 
-#####  Figure 1. Example of a 4x4 source block, corresponding palette, index map and wavefront processing pattern. The 4x4 block is considered here only for illustration purposes, as the block size needs to be at least 8x8 for palette prediction to be allowed.</p>
+##### Figure 1. Example of a 4x4 source block, corresponding palette, index map and wavefront processing pattern. The 4x4 block is considered here only for illustration purposes, as the block size needs to be at least 8x8 for palette prediction to be allowed.</p>
 
 
 The index for each pixel is encoded using the top and left encoded
@@ -50,7 +52,7 @@ indices as context, as shown in the tabl
 | …         |             |
 | 15        | 13, 14      |
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Inputs**: Input source video
 
@@ -67,11 +69,11 @@ The feature is currently active only whe
 
 ##### Table 2. Control tokens and flags for palette prediction.
 
-|**Flag**|**Level (Sequence/Picture)**|**Description**|
-|--- |--- |--- |
-|--scm|Sequence|Command line token. 0: No SC, 1: SC ON 2: Auto mode (detector based)|
-|--palette|Configuration|To enable palette from the command-line interface. 0: OFF; 1: Slow;  2: Fastest. Auto mode=-1 if not set from the encoder configuration|
-|palette_level|Picture based|Set based on the configuration palette mode. For auto mode it is set to 6 for M0.|
+| **Flag**      | **Level (Sequence/Picture)** | **Description**                                                                                                                        |
+| ---           | ---                          | ---                                                                                                                                    |
+| --scm         | Sequence                     | Command line token. 0: No SC, 1: SC ON 2: Auto mode (detector based)                                                                   |
+| --palette     | Configuration                | To enable palette from the command-line interface. 0: OFF; 1: Slow; 2: Fastest. Auto mode=-1 if not set from the encoder configuration |
+| palette_level | Picture based                | Set based on the configuration palette mode. For auto mode it is set to 6 for M0.                                                      |
 
 
 **Details of the implementation**
@@ -84,17 +86,17 @@ The main function calls associated with
 
 The following steps are then considered in the generation of palette prediction candidates.
 
-1.  In the function ```generate_md_stage_0_cand```, a candidate for palette prediction is
-    first evaluated to determine if the palette mode is allowed (svt_av1_allow_palette).
-    The use of palette prediction mode is allowed if (palette_level different from 0 AND block
-    width <= 64 AND block height <= 64 AND block size at least 8x8.)
-
-2.  For blocks where palette prediction mode is allowed, the function ``` inject_palette_candidates``` is invoked to create and
-    inject palette candidates.The candidates are signaled using the Intra DC mode. This function
-    calls another function (```search_palette_luma```) in order to
-    determine all palette candidates for luma. The palette prediction candidates are determined by performing two
-    types of search, namely a search based on the most dominant colors and
-    a search based on the K-means clustering of the colors in the block.
+1. In the function ```generate_md_stage_0_cand```, a candidate for palette prediction is
+   first evaluated to determine if the palette mode is allowed (svt_av1_allow_palette).
+   The use of palette prediction mode is allowed if (palette_level different from 0 AND block
+   width <= 64 AND block height <= 64 AND block size at least 8x8.)
+
+2. For blocks where palette prediction mode is allowed, the function ``` inject_palette_candidates``` is invoked to create and
+   inject palette candidates.The candidates are signaled using the Intra DC mode. This function
+   calls another function (```search_palette_luma```) in order to
+   determine all palette candidates for luma. The palette prediction candidates are determined by performing two
+   types of search, namely a search based on the most dominant colors and
+   a search based on the K-means clustering of the colors in the block.
 
     1. **Most dominant colors search**: In this search, a histogram of the
        colors in the source block is generated. The number of the most used
@@ -117,7 +119,7 @@ block.
 In mode decision, palette prediction candidates are assigned to a special
 MD candidate class.
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
 | **Signal**          | **Description**                                                                         |
 | -----------------   | --------------------------------------------------------------------------------------- |
@@ -125,7 +127,7 @@ MD candidate class.
 | dominant_color_step | In the dominant color search, test a subset of the most dominant color combinations by testing every nth combo. For example, with step size of 2, if the block involves 7 colors, then only 3 candidates with palettes based on the most dominant 7, 5 and 3 colors are tested. Range: [1 (test all), 7 (test one)]           |
 
 
-## 4.  **Signaling**
+## 4. **Signaling**
 
 The most important signals/parameters which are sent in the bit
 stream regarding palette prediction:
@@ -144,4 +146,8 @@ stream regarding palette prediction:
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Rate-Control.md 1.2.0+dfsg-2/Docs/Appendix-Rate-Control.md
--- 0.9.1+dfsg-1/Docs/Appendix-Rate-Control.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Rate-Control.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,81 +1,171 @@
+[Top level](../README.md)
+
 # High Level Description of Rate Control in SVT-AV1
 
 ## Table of Contents
 1. [Introduction](#introduction)
 2. [High Level Idea](#high-level-idea)
-  * 1-PASS VBR + Look Ahead
-  * 1-PASS VBR + Look Ahead
-3. [Rate Control Flow](#rate-control-flow)
+  * One-Pass VBR + Look Ahead
+  * Multi-Pass VBR
+3. [Rate Control Flow](#vbr-rate-control-flow)
   * CRF IPP Pass
   * Middle-Pass: CRF Encoding based on the Final Prediction Strucutre
   * Final Pass
 4. [Final Pass RC Algorithm in Detail](#final-pass-rc-algorithm-in-detail)
   * GoP-Level Bit Assignment (get_kf_group_bits())
-    * 1-Pass VBR + LAD with look ahead shorter than GoP size
-    * 1-Pass VBR + LAD with look ahead longer than GoP size
-    * 3-Pass VBR
+    * One-Pass VBR + LAD with look ahead shorter than GoP size
+    * One-Pass VBR + LAD with look ahead longer than GoP size
+    * Multi-Pass VBR
   * Mini-GoP Level Bit Assignment (calculate_total_GF_group_bits())
-    * 1-Pass VBR + LAD
-    * 3-Pass
+    * One-Pass VBR + LAD
+    * Multi-Pass
   * Frame Level Bit Assignment
-    * 1-Pass VBR + LAD
-    * 3-Pass
+    * One-Pass VBR + LAD
+    * Multi-Pass
   * SB-based QP_Modulation Algorithm
   * Block-basd Lambda Modulation Algorithm
   * Re-encoding
   * Post Encode RC Update
+5. [Constant Bitrate Rate Control](#constant-bitrate-rate-control)
 - Appendix A: Final-Pass Rate Control Functions
-- Appendix B: Speed Optimization of the IPP Pass
-- Appendix C: Speed Optimization of the Middle Pass
-- Appendix D: Capped CRF
+- Appendix B: Speed Optimization of the Middle Pass
+- Appendix C: Capped CRF
 
 ## Introduction
 
-Rate control is responsible for adjusting the size of the encoded bit stream corresponding to the input video pictures to achieve a certain behavior in the encoded sequence bitrate. The rate control algorithm adjusts the quantization parameter qindex per frame and per SB, as well as adjusting the lambda parameter used in the RD cost calculations per block to achieve a desired bitrate behavior specified by the application. Using  statistics generated based on  the analysis of frames in the look ahead window or from previous encode passes, the sequence target rate is translated to a target bits per GoP, mini-GoP and then per frame. The frame rate is then mapped to qindex for each frame. Feedback from packetization which includes the actual number of bits per pictures is also used to update the rate control model, correct the target bit estimates and provide better quantization parameter assignment.
+Rate control is responsible for adjusting the size of the encoded bit stream
+corresponding to the input video pictures to achieve a certain behavior in the
+encoded sequence bitrate. The rate control algorithm adjusts the quantization
+parameter qindex per frame and per SB, as well as adjusting the lambda
+parameter used in the RD cost calculations per block to achieve a desired
+bitrate behavior specified by the application. Using statistics generated based
+on the analysis of frames in the look ahead window or from previous encode
+passes, the sequence target rate is translated to a target bits per GoP,
+mini-GoP and then per frame. The frame rate is then mapped to qindex for each
+frame. Feedback from packetization which includes the actual number of bits per
+pictures is also used to update the rate control model, correct the target bit
+estimates and provide better quantization parameter assignment.
 
 ## High Level Idea
 
-In SVT-AV1, the rate control process sits after the Picture Manager and before Mode Decision Configuration kernels. There are two inputs to the rate control kernel, one coming from the Picture Manager process (forward) and one from the Packetization process (feedback) as shown below. Based on the provided inputs, the rate control algorithm decides on the best QP/qindex for each frame and SB to match the target rate constraints. In the Mode Decision kernel, there is an option to perform re-encoding using a new QP to achieve better bit rate matching. After each frame is completely coded, the packetization kernel sends the corresponding actual number of bits for the encoded frame back to the rate control algorithm to update the internal model and decide on future QPs.
+In SVT-AV1, the rate control process sits after the Picture Manager and before
+Mode Decision Configuration kernels. There are two inputs to the rate control
+kernel, one coming from the Picture Manager process (forward) and one from the
+Packetization process (feedback) as shown below. Based on the provided inputs,
+the rate control algorithm decides on the best QP/qindex for each frame and SB
+to match the target rate constraints. In the Mode Decision kernel, there is an
+option to perform re-encoding using a new QP to achieve better bit rate
+matching. After each frame is completely coded, the packetization kernel sends
+the corresponding actual number of bits for the encoded frame back to the rate
+control algorithm to update the internal model and decide on future QPs.
 
 ![rc_figure1](./img/rc_figure1.PNG)
 
 ###### Figure 1. High-level encoder process dataflow.
 
-The inputs to the rate control algorithm vary based on the application and rate control mode. The input could be analysis statistics from an IPP processing of frames in the look ahead window, or statistics coming from previous passes. However, in all cases, the Temporal Dependency Model (TPL) data is used to calculate the boosts for frame-level QP assignment, SB QP modulation and lambda modulation per block.
-
-In SVT-AV1, there are different options for VBR encoding. Based on the application requirements. These option range from an adjustable latency algorithm (1 PASS + adjustable look ahead) for low to medium latency applications to a multi-pass algorithm for VOD applications. Having access to more pictures in future, which translates to higher latency, generally helps the rate control algorithm, however the performance benefits saturate after a long enough lookahead window. The available options are listed below.
-
-### 1-PASS VBR + Look Ahead
-
-In this mode, the latency is controlled by the size of the look ahead. The CRF IPP processing is performed for the frames in the look ahead window and the collected analysis data is used in the rate control algorithm. The default size of the look ahead is around 60 frames, but it can be increased to 120 frames.
-
-### 3-PASS VBR (CRF IPP first pass + CRF middle pass with same Prediction structure as final pass + final VBR pass)
-
-The multi-pass mode can only be used in applications where latency is not a concern. This mode provides the best BD-rate and rate matching performance. In the first pass, the encoder runs in IPP CRF mode with simple prediction modes. The collected statistics are stored in memory or file and are passed to the encoder in the next pass. The middle pass has the same prediction structure as the final pass. Using similar prediction structure helps significantly in rate assignment. The data from the first pass is used to estimate the sequence QP for the middle pass. Having a middle pass with closer rate to the target rate helps in matching the target rate considerably. Finally, the last pass uses the data from the previous passes to achieve the best performance. A block diagram of the encoder with three passes is shown in Figure 2.
+The inputs to the rate control algorithm vary based on the application and rate
+control mode. In the case of VBR mode, the input could be analysis statistics
+from an IPP processing of frames in the look ahead window, or statistics coming
+from previous passes. However, in all cases, the Temporal Dependency Model
+(TPL) data is used to calculate the boosts for frame-level QP assignment, SB QP
+modulation and lambda modulation per block.
+
+In SVT-AV1, there are different options for VBR encoding based on the
+application requirements. These options range from an adjustable latency
+algorithm (One-pass + adjustable look ahead) for low to medium latency
+applications to a multi-pass algorithm for VOD applications. Having access to
+more pictures in future, which translates to higher latency, generally helps
+the rate control algorithm, however the performance benefits saturate after a
+long enough lookahead window. The available options are listed below. In
+addition to the VBR mode, for low delay applications, the SVT-AV1 encoder
+supports a constant bitrate (CBR) rate control mode where the objective is to
+maintain a constant bitrate in the generated bitstream.
+
+### One-Pass VBR + Look Ahead
+
+In this mode, the latency is controlled by the size of the look ahead. The CRF
+IPP processing is performed for the frames in the look ahead window and the
+collected analysis data is used in the rate control algorithm. The default size
+of the look ahead is around 2 mini-GoPs (e.g. 32 frames for the case of a
+five-layer prediction structure), but it can be increased to 120 frames.
+
+### Multi-Pass VBR (CRF IPP first pass + CRF middle pass with same Prediction
+structure as final pass + final VBR pass)
+
+The multi-pass mode can only be used in applications where latency is not a
+concern. This mode is available only using the SVT-AV1 sample application. This
+mode provides the best BD-rate and rate matching performance. In the first
+pass, the encoder runs in IPP CRF mode with simple prediction modes. The
+collected statistics are stored in memory or file and are passed to the encoder
+in the next pass. The middle pass has the same prediction structure as the
+final pass. Using similar prediction structure helps significantly in rate
+assignment. The data from the first pass is used to estimate the sequence QP
+for the middle pass. Having a middle pass with closer rate to the target rate
+helps in matching the target rate considerably. Finally, the last pass uses the
+data from the previous passes to achieve the best performance. A block diagram
+of the encoder with three passes is shown in Figure 2.
 
 ![rc_figure2](./img/rc_figure2.PNG)
 
 ###### Figure 2. Block diagram of multi-pass encoder.
 
-## Rate Control Flow
+## VBR Rate Control Flow
 
 ### CRF IPP Pass
 
-The IPP pass is used to generate statistics to be used in the final pass in a one-pass + LAD encoder or in subsequent passes in a multi-pass encoder. It is a simple and fast processing of the source input pictures that is based only on the source pictures (i.e. the reference pictures are also source input pictures) and makes use of a flat prediction structure where any given picture would reference at most the two preceding pictures in display order. Pictures are divided into 16x16 blocks and simple ME and Intra DC predictions are performed. The inter and intra prediction residuals are used in the calculations of the prediction distortion but not processed through transform/quantization steps. The collected data includes:  Intra error (distortion), Inter error (distortion), coded error (distortion) of the best mode, percentage of Inter blocks, percentage of zero motion blocks, and information about motion vectors. The coded error of each mini-GoP or GoP is used in the final pass to allocate the rate for each section of the clip. Note that the results from the IPP pass can be used in other decision-making steps, such as the dynamic GoP decisions, in the subsequent encode pass. The calculated data is stored in *FRAME_STATS* per block and then converted to *FIRSTPASS_STATS* per frame in *update_firstpass_stats()*. To keep the cost of running the IPP pass low, a number of optimization ideas are introduced and are summarized in Appendix B.
+The IPP pass is used to generate statistics to be used in the final pass in a
+one-pass + LAD encoder or in subsequent passes in a multi-pass encoder. It is a
+simple and fast processing of the source input pictures that is based only on
+the source pictures (i.e. the reference pictures are also source input
+pictures) and makes use of a flat prediction structure where any given picture
+would reference at most the two preceding pictures in display order. Pictures
+are divided into 16x16 blocks and simple ME and Intra DC predictions are
+performed. The inter and intra prediction residuals are used in the
+calculations of the prediction distortion but not processed through
+transform/quantization steps. The collected data includes: Intra error
+(distortion), Inter error (distortion), coded error (distortion) of the best
+mode, percentage of Inter blocks, percentage of zero motion blocks, and
+information about motion vectors. The coded error of each mini-GoP or GoP is
+used in the final pass to allocate the rate for each section of the clip. Note
+that the results from the IPP pass can be used in other decision-making steps,
+such as the dynamic GoP decisions, in the subsequent encode pass. The
+calculated data is stored in *FRAME_STATS* per block and then converted to
+*FIRSTPASS_STATS* per frame in *update_firstpass_stats()*.
 
 ### Middle-Pass: CRF Encoding based on the Final Prediction Structure
 
-A robust rate control would require accurate statistical information to properly distribute the rate budget and meet the constraints imposed by the application. Even though the IPP pass provides useful information for the subsequent encode passes, the corresponding statistics are not accurate enough to make good rate distribution decisions in the final encode pass. Using a CRF pass with the same prediction structure as the final pass provides accurate enough estimates upon which to base the final encoding pass rate control decisions. This newly added middle pass is a fast version of the final pass with similar prediction structure as the final pass. In order to improve the accuracy of information, we use the statistics from the IPP pass to estimate the input QP of the Middle-Pass to get closer to the target rate. This process results in substantially better rate matching in the multi pass rate control. The middle pass stores the following data per frame (See StatStruct structure): Picture number, total number of bits, qindex for the frame, qindex for the sequence.
-
-In order to reduce the speed overhead of the middle pass, a faster preset of the encoder is considered in the middle pass. For example, if the final pass preset is set to M5, the preset of the middle pass is chosen to be M11. To make the middle pass even faster, some additional speed optimizations are considered and are briefly described in Appendix C.
+A robust rate control would require accurate statistical information to
+properly distribute the rate budget and meet the constraints imposed by the
+application. Even though the IPP pass provides useful information for the
+subsequent encode passes, the corresponding statistics are not accurate enough
+to make good rate distribution decisions in the final encode pass. Using a CRF
+pass with the same prediction structure as the final pass provides accurate
+enough estimates upon which to base the final encoding pass rate control
+decisions. This newly added middle pass is a fast version of the final pass
+with similar prediction structure as the final pass. In order to improve the
+accuracy of information, we use the statistics from the IPP pass to estimate
+the input QP of the Middle-Pass to get closer to the target rate. This process
+results in substantially better rate matching in the multi pass rate control.
+The middle pass stores the following data per frame (See StatStruct structure):
+Picture number, total number of bits, qindex for the frame, qindex for the
+sequence.
+
+In order to reduce the speed overhead of the middle pass, a faster preset of
+the encoder is considered in the middle pass. For example, if the final pass
+preset is set to M5, the preset of the middle pass is chosen to be M11. To make
+the middle pass even faster, some additional speed optimizations are considered
+and are briefly described in Appendix B.
 
 ### Final Pass
 
-The following presents a very high-level description of the steps involved in the rate control algorithm in the final encode pass. The flowchart is shown in Figure 3. A more detailed presentation of these steps is provided in the next section.
+The following presents a very high-level description of the steps involved in
+the rate control algorithm in the final encode pass. The flowchart is shown in
+Figure 3. A more detailed presentation of these steps is provided in the next
+section.
 
 The rate control algorithm in the final pass includes the following main steps:
-1) For each GoP/KF_group assign the target number of bits (*find_next_key_frame()*).
-2) For each mini-GoP or GF_group assign the target number of bits per frame (*define_gf_group()*).
+1) For each GoP/KF_group assign the target number of bits (*kf_group_rate_assingment()*).
+2) For each mini-GoP or GF_group assign the target number of bits per frame (*gf_group_rate_assingment()*).
 3) Update the target number of bits per frame based on the feedback and internal buffers (*av1_set_target_rate()*).
 4) Assign qindex per frame based on tpl boost and the target number of bits (*rc_pick_q_and_bounds()*)
 5) SB-level QP modification and block-level lambda generation based on TPL (*sb_qp_derivation_tpl_la()*).
@@ -88,23 +178,26 @@ The rate control algorithm in the final
 
 ## Final Pass RC Algorithm in Detail
 
-This section provides a more detailed description of the main steps involved in the Final Pass RC algorithm
+This section provides a more detailed description of the main steps involved in
+the Final Pass RC algorithm
 
 ### GoP-level Bit Assignment (get_kf_group_bits())
 
 Based on the mode of the VBR algorithm, different GoP bit assignments are used.
 
-#### 1-Pass VBR + LAD with Look Ahead Shorter than GoP size
+#### One-Pass VBR + LAD with Look Ahead Shorter than GoP size
 
-In this case, the look ahead is not long enough to cover the GOP. So, a uniform rate distribution is used.
+In this case, the look ahead is not long enough to cover the GOP. So, a uniform
+rate distribution is used.
 
 ___kf_group_bits = number of frames in GoP * avg_bits_per_frame___
 
 where *avg_bits_per_frame* represents the average number of bits per frame.
 
-#### 1-Pass VBR + LAD with Look Ahead Longer than GoP size
+#### One-Pass VBR + LAD with Look Ahead Longer than GoP size
 
-When first pass statistics are available for one GoP or more, the frame errors are used to allocate bit for the GoP as follows:
+When first pass statistics are available for one GoP or more, the frame errors
+are used to allocate bit for the GoP as follows:
 
 ___kf_group_bits = bits left * (kf_group_err / modified_error_left)___
 
@@ -113,81 +206,147 @@ where:
 - *kf_group_err* is the calculated error for the GoP as the sum of frame errors in the GoP.
 - *modified_error_left* is the calculated error over the remaining frames in the clip.
 
-In the above definitions, error is defined as function of the best Inter vsIntra error from the first pass.
+In the above definitions, error is defined as function of the best Inter vs.
+Intra error from the first pass.
 
-#### 3-Pass VBR:
+#### Multi-Pass VBR:
 
 In this case, the error is replaced by the actual number of bits in the previous pass.
 
 ___kf_group_bits = bits left * (kf_group_rate_in_ref / rate_in_ref_left)___
 
-where *kf_group_rate_in_ref* is the sum over the corresponding frames in the GoP of the actual number of bits from the previous CRF pass, and where *rate_in_ref_left* is the sum of the actual bits of the remaining frames.
+where *kf_group_rate_in_ref* is the sum over the corresponding frames in the
+GoP of the actual number of bits from the previous CRF pass, and where
+*rate_in_ref_left* is the sum of the actual bits of the remaining frames.
 
 ### Mini-GoP level Bit Assignment (calculate_total_GF_group_bits())
 
-The concept of rate allocation for the mini-GoP is similar to the rate allocation of the GoP. Based on the VBR mode, we have the following scenarios.
+The concept of rate allocation for the mini-GoP is similar to the rate
+allocation of the GoP. Based on the VBR mode, we have the following scenarios.
 
-#### 1-Pass VBR + LAD
+#### One-Pass VBR + LAD
 
 ___GF_group_bits = kf_bits_left * (GF_group_err / kf_error_left)___
 
-*kf_bits_left* refers to the remaining bit budget in the GoP. *GF_group_err* is the calculated error for the gf group as the sum of frame errors in the gf group and *kf_error_left* is the calculated error for the remaining frames in the GoP. Error is defined as best inter vsintra error from the first pass for the mini-GoP.
+*kf_bits_left* refers to the remaining bit budget in the GoP. *GF_group_err* is
+the calculated error for the gf group as the sum of frame errors in the gf
+group and *kf_error_left* is the calculated error for the remaining frames in
+the GoP. Error is defined as a function of the best inter vs. intra error from
+the first pass for the mini-GoP.
 
-#### 3-Pass
+#### Multi-Pass
 
 In this case, the error is replaced by the actual number of bits in the previous pass.
 
-___GF_group_bits = kf_bits left * (GF_group_rate_in ref / rate_in_ref_kf_left)___
+___GF_group_bits = kf_bits left * (GF_group_rate_in ref /
+rate_in_ref_kf_left)___
 
-*GF_group_rate_in* ref is the sum over the corresponding frames in the gf group or mini-GoP of the actual number of bits from the previous CRF pass. *rate_in_ref_kf_left* is the sum of actual bits of the frames in the remaining mini-GoPs in the GoP.
+*GF_group_rate_in* ref is the sum over the corresponding frames in the gf group
+or mini-GoP of the actual number of bits from the previous CRF pass.
+*rate_in_ref_kf_left* is the sum of actual bits of the frames in the remaining
+mini-GoPs in the GoP.
 
 ### Frame Level Bit Assignment
 
-After calculating the rate per GoP and mini-GoP, the rate control algorithm computes the base target bits for each frame.  The data is stored in *GF_group->bit_allocation[]* for all pictures in the mini-GoP and then copied to *base_frame_target* under *PCS* structure.
-
-#### 1-Pass VBR + LAD
+After calculating the rate per GoP and mini-GoP, the rate control algorithm
+computes the base target bits for each frame. The data is stored in
+*GF_group->bit_allocation[]* for all pictures in the mini-GoP and then copied
+to *base_frame_target* under *PCS* structure.
+
+#### One-Pass VBR + LAD
+
+The total number of bits in each mini-GoP are distributed among all frames
+based on the number of frames in each layer and a *layer_fraction* table in
+*allocate_GF_group_bits()* function. The main idea behind the distribution is
+to allocate more budget to frames in lower temporal layers. For Key and base
+layer frames, the boost factor is used as a factor to adjust the number of
+bits, where frames with higher boost factor are assigned higher bits
+(*calculate_boost_bits()*).
 
-The total number of bits in each mini-GoP are distributed among all frames based on the number of frames in each layer and a *layer_fraction* table in *allocate_GF_group_bits()* function. The main idea behind the distribution is to allocate more budget to frames in lower temporal layers. For Key and base layer frames, the boost factor is used as a factor to adjust the number of bits, where frames with higher boost factor are assigned higher bits (*calculate_boost_bits()*).
+#### Multi-Pass
 
-#### 3-Pass
-
-In this case, the actual rate data of each picture from the preceding pass is used to calculate the bit budget per frame as follows:
+In this case, the actual rate data of each picture from the preceding pass is
+used to calculate the bit budget per frame as follows:
 
 ___bit_allocation = GF_group_bits * total_num_bits / gf_stats.GF_group_rate___
 
-*total_num_bits (stat_struct[i].total_num_bits)* refers to actual number of bits for the current frame in the previous pass, and *GF_group_rate (gf_stats.GF_group_rate)* is the total number of bits allocated in the preceding pass to the mini-GoP or GF group to which the current frame belongs.
-
-After the base target bits per frame are calculated using the information from the look ahead or previous passes (base_frame_target), the rate control updates the target budget  based on the feedback received from packetization.
+*total_num_bits (stat_struct[i].total_num_bits)* refers to actual number of
+bits for the current frame in the previous pass, and *GF_group_rate
+(gf_stats.GF_group_rate)* is the total number of bits allocated in the
+preceding pass to the mini-GoP or GF group to which the current frame belongs.
+
+After the base target bits per frame are calculated using the information from
+the look ahead or previous passes (base_frame_target), the rate control updates
+the target budget based on the feedback received from packetization.
 
-___this_frame_target = base_frame_target + (vbr_bits_off_target >= 0) ? max_delta : -max_delta___
+___this_frame_target = base_frame_target + (vbr_bits_off_target >= 0) ?
+max_delta : -max_delta___
 
-where this_frame_target is the updated target number of bits and *vbr_bits_off_target* is calculated as:
+where this_frame_target is the updated target number of bits and
+*vbr_bits_off_target* is calculated as:
 
 ___vbr_bits_off_target += base_frame_target - actual_frame_size;___
 
-*vbr_bits_off_target* > 0 means we have extra bits to spend and *vbr_bits_off_target* < 0 means we are currently overshooting. The *max_delta* is calculated based on the *vbr_bits_off_target* and the number of remaining frames (i.e. *max_delta=vbr_bits_off_target* /(number of remaining frames)).
+*vbr_bits_off_target* > 0 means we have extra bits to spend and
+*vbr_bits_off_target* < 0 means we are currently overshooting. The *max_delta*
+is calculated based on the *vbr_bits_off_target* and the number of remaining
+frames (i.e. *max_delta=vbr_bits_off_target* /(number of remaining frames)).
 
 ### Frame Level QP assignment
 
-In the following, qindex is 4xQP for most of the QP values and represents the quantization parameter the encoder works with internally instead of QP. Given the target bit allocation for a given frame, it is now desired to find a qindex to use with the frame that would result in an encoded frame size that is as close as possible to the allocated number of bits for the frame. To achieve this objective, a range of candidate qindex values is first defined based on data from TPL, state of the rate control buffers and statistics from the IPP pass and possibly the middle pass when applicable. Using predefined lookup tables that relate qindex information to encoding bits, the qindex that yields the best matching rate is selected for the frame.
+In the following, qindex is 4xQP for most of the QP values and represents the
+quantization parameter the encoder works with internally instead of QP. Given
+the target bit allocation for a given frame, it is now desired to find a qindex
+to use with the frame that would result in an encoded frame size that is as
+close as possible to the allocated number of bits for the frame. To achieve
+this objective, a range of candidate qindex values is first defined based on
+data from TPL, state of the rate control buffers and statistics from the IPP
+pass and possibly the middle pass when applicable. Using predefined lookup
+tables that relate qindex information to encoding bits, the qindex that yields
+the best matching rate is selected for the frame.
 
 #### Identifying the Subset of Qindex Values to work with
 
-The range of qindex values is between the active_worst_quality and the active_best_quality variables. The calculation of each of those two variables is outlined in the following.
+The range of qindex values is between the active_worst_quality and the
+active_best_quality variables. The calculation of each of those two variables
+is outlined in the following.
 
 ##### Calculation of Active_Worst_Quality
 
-active_worst_quality refers to the highest qindex that can be assigned in each mini-GoP and is usually associated with the qindex for the non-reference frames. It is calculated per mini-GoP using data from the previous pass and is updated later based on internal rate control buffers. First, the target number of bits per mini-GoP is calculated and then using the number of frames in the mini-GoP and the number of 16x16 blocks in each picture, the target number of bits per 16x16 block is calculated.  The algorithm then loops over the *qindex values* and estimates the number of bits per 16x16 block using predefined lookup tables that map qindex to number of bits and a *correction_factor*. The *qindex* that yields the number of bits closest to the target number of bits is chosen as the final qindex of the picture. (see *av1_rc_regulate_q()* and *get_twopass_worst_quality()*)
+active_worst_quality refers to the highest qindex that can be assigned in each
+mini-GoP and is usually associated with the qindex for the non-reference
+frames. It is calculated per mini-GoP using data from the previous pass and is
+updated later based on internal rate control buffers. First, the target number
+of bits per mini-GoP is calculated and then using the number of frames in the
+mini-GoP and the number of 16x16 blocks in each picture, the target number of
+bits per 16x16 block is calculated. The algorithm then loops over the *qindex
+values* and estimates the number of bits per 16x16 block using predefined
+lookup tables that map qindex to number of bits and a *correction_factor*. The
+*qindex* that yields the number of bits closest to the target number of bits is
+chosen as the final qindex of the picture. (see *av1_rc_regulate_q()* and
+*get_twopass_worst_quality()*)
 
 ##### Calculation of Active_Best_Quality
 
-The variable active_best_quality represents the lowest qindex that can be assigned per picture given the the value of active_worst_quality. The picture *qindex* in VBR mode is computed in the *rc_pick_q_and_bounds()* function. The details of each step are described in this section.
+The variable active_best_quality represents the lowest qindex that can be
+assigned per picture given the the value of active_worst_quality. The picture
+*qindex* in VBR mode is computed in the *rc_pick_q_and_bounds()* function. The
+details of each step are described in this section.
+
+The calculation of active_best_quality is a function of active_worst_quality
+and of data collected from TPL. Similar to CRF, ordinary QP scaling based on
+TPL is used to improve the performance of a hierarchical prediction structure
+where smaller quantization parameters (QP) are assigned to frames in the lower
+temporal layers, which serve as reference pictures for the higher temporal
+layer pictures. In the TPL algorithm, the propagation factor r0 is used to
+improve the base layer picture QP assignment. The main idea is that the lower
+r0 is the more improvements the picture would need. A summary of the QPS
+adjustment ideas is presented below. For more details, refer to Temporal
+Dependency Model (TPL) document.
 
-The calculation of active_best_quality is a function of active_worst_quality and of data collected from TPL. Similar to CRF, ordinary QP scaling based on TPL is used to improve the performance of a hierarchical prediction structure where smaller quantization parameters (QP) are assigned to frames in the lower temporal layers, which serve as reference pictures for the higher temporal layer pictures. In the TPL algorithm, the propagation factor r0 is used to improve the base layer picture QP assignment.  The main idea is that the lower r0 is the more improvements the picture would need. A summary of the QPS adjustment ideas is presented below. For more details, refer to Temporal Dependency Model (TPL) document.
-
-- Intra pictures: *The active_best_quality* for both Intra Key Frames (IDR) and non-Key frames (CRA) is generated using similar approaches with slightly different tuning.  A lower *active_best_quality* is assigned to the pictures with small r0 values. The main idea behind the adjustment of the *active_best_quality* for a given picture is as follows:
+- Intra pictures: *The active_best_quality* for both Intra Key Frames (IDR) and non-Key frames (CRA) is generated using similar approaches with slightly different tuning. A lower *active_best_quality* is assigned to the pictures with small r0 values. The main idea behind the adjustment of the *active_best_quality* for a given picture is as follows:
   * Compute *kf_boost* based on the r0 for the picture, where *kf_boost* is inversely proportional to r0. A range for allowed *kf_boost* values is defined by *kf_boost_low* = 400 and *kf_boost_high* = 5000.
-  * The range of *active_best_quality* adjustment is defined by two lookup tables that associate to *active_worst_quality* an upper bound (*high_motion_minq*) and a lower bound (*low_motion_minq*) to the *active_best_quality* adjustment interval.  *high_motion_minq* and *low_motion_minq* are shown in Figure 4.
+  * The range of *active_best_quality* adjustment is defined by two lookup tables that associate to *active_worst_quality* an upper bound (*high_motion_minq*) and a lower bound (*low_motion_minq*) to the *active_best_quality* adjustment interval. *high_motion_minq* and *low_motion_minq* are shown in Figure 4.
 
 ![rc_figure4](./img/rc_figure4.PNG)
 
@@ -211,17 +370,28 @@ The calculation of active_best_quality i
 
 #### Identifying the Best Qindex
 
-Once the *active_best_quality* and *active_worst_quality* variables are calculated, the algorithm finds the qindex in the range from active_worst_quality and active_best_quality that has the closest rate to the target rate (see *av1_rc_regulate_q()*). First, the target rate per 16x16 blocks is calculated, then the algorithm loops over the qindex values and estimates the rate per 16x16 block using  predefined lookup tables that map qindex to number of bits and a *correction_factor*. The *qindex* with the closest rate to the target is chosen as the final *qindex* of the picture.
+Once the *active_best_quality* and *active_worst_quality* variables are
+calculated, the algorithm finds the qindex in the range from
+active_worst_quality and active_best_quality that has the closest rate to the
+target rate (see *av1_rc_regulate_q()*). First, the target rate per 16x16
+blocks is calculated, then the algorithm loops over the qindex values and
+estimates the rate per 16x16 block using predefined lookup tables that map
+qindex to number of bits and a *correction_factor*. The *qindex* with the
+closest rate to the target is chosen as the final *qindex* of the picture.
 
 
 ### SB-based QP-Modulation Algorithm
 
-The SB-based QP-modulation algorithm is based on TPL and is the same between the VBR and the CRF modes. In TPL, the parameter beta plays the same role at the SB-level as that of r0 at the picture level. Therefore, a large beta for a given SB implies that quality of that SB should be improved.
-For each SB, the main idea in QP modulation is that a new QP value is determined based on the corresponding beta value using the following equation:
+The SB-based QP-modulation algorithm is based on TPL and is the same between
+the VBR and the CRF modes. In TPL, the parameter beta plays the same role at
+the SB-level as that of r0 at the picture level. Therefore, a large beta for a
+given SB implies that quality of that SB should be improved. For each SB, the
+main idea in QP modulation is that a new QP value is determined based on the
+corresponding beta value using the following equation:
 
 ![rc_math1](./img/rc_math1.PNG)
 
-where f = sqrt(.) for intra_picture or when beta < 1, and  f=sqrt(sqrt(.)) otherwise. The idea then behind the TPL QP modulation is as follows:
+where f = sqrt(.) for intra_picture or when beta < 1, and f=sqrt(sqrt(.)) otherwise. The idea then behind the TPL QP modulation is as follows:
 - If beta > 1 -> rk<r0 -> SB does not have a good quality as compared to average picture quality -> Reduce QP for the SB, e.g. QP'=QP/sqrt(beta) or QP'=QP/sqrt(sqrt(beta)). Since beta > 1, QP'<QP.
 - If beta <1 -> rk>r0 -> SB has better quality than average picture quality -> Can increase the QP for the SB, e.g. QP'=QP/sqrt(beta). QP' would then be larger than QP since beta <1.
 
@@ -253,7 +423,19 @@ where
 
 ### Re-encoding
 
-The re-encoding mechanism is used to achieve the desired bitrate without much overshoot or undershoot. In SVT-AV1, the re-encoding decision is made at the end of Mode Decision and after the normative coding of the whole frame. Since the re-encode decision making takes place before entropy coding, the frame size is estimated inside Mode Decision instead of getting the actual frame size information from the packetization kernel. The estimated size is compared to the target rate and if it does not satisfy the rate constraints, the algorithm decides to re-encode the frame with a new qindex. In general, re-encoding can be very costly, however based on the flexible design of the SVT encoder, only the Mode Decision part is performed again and there is no need to redo other encoder pipeline tasks such as motion estimation, entropy coding or in-loop filtering. The flowchart in Figure 6 shows the high-level design of the re-encode decision mechanism.
+The re-encoding mechanism is used to achieve the desired bitrate without much
+overshoot or undershoot. In SVT-AV1, the re-encoding decision is made at the
+end of Mode Decision and after the normative coding of the whole frame. Since
+the re-encode decision making takes place before entropy coding, the frame size
+is estimated inside Mode Decision instead of getting the actual frame size
+information from the packetization kernel. The estimated size is compared to
+the target rate and if it does not satisfy the rate constraints, the algorithm
+decides to re-encode the frame with a new qindex. In general, re-encoding can
+be very costly, however based on the flexible design of the SVT encoder, only
+the Mode Decision part is performed again and there is no need to redo other
+encoder pipeline tasks such as motion estimation, entropy coding or in-loop
+filtering. The flowchart in Figure 6 shows the high-level design of the
+re-encode decision mechanism.
 
 ![rc_figure6](./img/rc_figure6.PNG)
 
@@ -261,17 +443,184 @@ The re-encoding mechanism is used to ach
 
 ### Post Encode RC Update
 
-After each frame is completely processed in the Packetization process, feedback information representing the size of the processed frame is sent to the rate control algorithm to update the internal buffers and variables that are used in the computation of *qindex* for future frames. Using this mechanism, the algorithm keeps track of the difference between the target number of bits and actual number of bits for the encoded frames (*vbr_bits_off_target*).
+After each frame is completely processed in the Packetization process, feedback
+information representing the size of the processed frame is sent to the rate
+control algorithm to update the internal buffers and variables that are used in
+the computation of *qindex* for future frames. Using this mechanism, the
+algorithm keeps track of the difference between the target number of bits and
+actual number of bits for the encoded frames (*vbr_bits_off_target*).
 
 ___vbr_bits_off_target += base_frame_target - projected_frame_size___
 
-where projected_frame_size in this case refers to the actual frame size. Based on the sign of *vbr_bits_off_target*, a limited adjustment is made to the target rate of subsequent frames to push *vbr_bits_off_target* back towards its acceptable range of values. The acceptable range is specified as an input to the encoder using *undershoot_pct* and *overshoot_pct*, where the latter refer to the tolerable undershoot and overshoot percentages of deviation from the target rate.
+where projected_frame_size in this case refers to the actual frame size. Based
+on the sign of *vbr_bits_off_target*, a limited adjustment is made to the
+target rate of subsequent frames to push *vbr_bits_off_target* back towards its
+acceptable range of values. The acceptable range is specified as an input to
+the encoder using *undershoot_pct* and *overshoot_pct*, where the latter refer
+to the tolerable undershoot and overshoot percentages of deviation from the
+target rate.
+
+*extend_minq* and *extend_maxq* are also two important variables that are used
+in *active_best_quality* and *active_worst_quality* adjustment. *extend_minq*
+and *extend_maxq* are updated by comparing *rate_error_estimate* and
+*undershoot_pct* and *overshoot_pct*:
+
+___rate_error_estimate = (vbr_bits_off_target * 100) / total_actual_bits___
+
+The main idea is to update the range of qindex values which is between the
+active_best_quality and active_worst_quality using the feedback information
+from the packetization. If *rate_error_estimate* > *undershoot_pct*, the
+encoder is undershooting, so the lower value of the range is reduced by
+extend_minq, hence allowing the encoder to reduce the qindex and increase the
+rate. If *rate_error_estimate* < *-overshoot_pct*, the higher value of the
+qindex range is increased by extend_maxq to reduce the overall bit rate (see
+svt_av1_twopass_postencode_update).
+
+## Constant Bitrate Rate Control
+
+In several rate constrained video coding applications, it is desired to use the
+constant bitrate (CBR) mode to maintain a constant bitrate during the encoding
+process while allowing the resulting video quality to vary. In SVT-AV1, the CBR
+mode is implemented through a qindex adjustment mechanism based on the fullness
+status of a virtual buffer. A virtual buffer is used to account for the size of
+the encoded frames. Knowing the desired constant bit rate, the size of
+previously encoded pictures and the fullness status of the virtual buffer, the
+algorithm adjusts the qindex of the frame being encoded to maintain the virtual
+buffer fullness at the desired level. Once a frame is completely processed in
+the packetization process, feedback information representing the size of the
+processed frame is sent to the rate control algorithm to update the buffer
+fullness level.
+
+### CBR Data Flow
+
+A high-level description of the steps involved in the CBR mode in SVT-AV1 is as follows:
+1) Set the CBR rate control virtual buffer parameters.
+2) Determine the target bitrate for the frame being processed based on the buffer status and the packetization process feedback.
+3) Determine the range of candidate qindex values and generate the final qindex.
+4) Encode the current picture and update the virtual buffer level.
+
+### Setting the virtual buffer parameters (set_rc_buffer_sizes())
+
+The CBR rate control makes use of a virtual buffer and tries to maintain the
+buffer fullness close to a desired optimal fullness level. This goal is
+achieved by adjusting the encoded frame size through the quantization parameter
+qindex. A diagram of a virtual buffer is shown in the Figure 7. The input to
+the frame buffer is the desired frame size corresponding to the target bitrate.
+The buffer content is incremented by the target frame size every time a new
+frame is to be processed. The output is the actual encoded frame size, and is
+removed from the buffer content at the same frequency the contents of the
+virtual buffer are updated by the target frame size.
 
-*extend_minq* and *extend_maxq* are also two important variables that are used in *active_best_quality* and *active_worst_quality* adjustment. *extend_minq* and *extend_maxq* are updated by comparing *rate_error_estimate* and *undershoot_pct* and *overshoot_pct*:
+![rc_figure7](./img/rc_figure7.PNG)
+###### Figure 7. CBR virtual buffer diagram.
 
-___rate_error_estimate  = (vbr_bits_off_target * 100) / total_actual_bits___
+The virtual buffer parameters are initialized once before invoking the CBR rate control for the first frame as follows:
 
-The main idea is to update the range of qindex values which is between the active_best_quality and active_worst_quality  using the feedback information from the packetization. If *rate_error_estimate* > *undershoot_pct*, the encoder is undershooting, so the lower value of the range is reduced by extend_minq, hence allowing the encoder to reduce the qindex and increase the rate. If *rate_error_estimate* < *-overshoot_pct*, the higher value of the qindex range is increased by extend_maxq to reduce the overall bit rate (see svt_av1_twopass_postencode_update).
+starting_buffer_level = starting_buffer_level_ms* target_bit_rate/ 1000 <br /> optimal_buffer_level = optimal_buffer_level_ms* bandwidth / 1000 <br /> maximum_buffer_size = maximum_buffer_size_ms* target_bit_rate/ 1000
+
+where the following are user-specified input parameters to the encoder:
+
+- starting_buffer_level_ms: Initial delay in milliseconds before the decoder starts removing bits from its buffer.
+- optimal_buffer_level_ms: Optimal delay in milliseconds the decoder should maintain.
+- maximum_buffer_size_ms: Maximum buffer delay in milliseconds.
+- target_bit_rate: Target bitrate.
+
+### Determining the target frame size for the frame being processed (one_pass_rt_rate_alloc())
+
+The target frame size is computed differently for key frames and for other frames.
+
+#### A. Key frame target frame size calculation (calc_iframe_target_size_one_pass_cbr())
+
+First Key Frame
+
+The target frame size for the first key frame is set based on the starting
+buffer level multiplied by a weight that depends on the GoP length.
+
+___Target_frame_size = (starting_buffer_level * w)___
+
+where weight = 3/4 if intra period = -1 (only one I) or intra period >128, 1/4 if 0 <intra period < 64, 1/2 if intra 64 <= intra period <=128
+
+Remaining key frames
+
+For the remaining key frames, the target frame size is set based on the average
+frame size (avg_frame_size = target_bit_rate/number_of_frames_per_second)
+multiplied by a boost factor.
+
+___Target_frame_size = ((16 + kf_boost) \* avg_frame_size)/16___
+where
+___kf_boost = 2 \* framerate – 16___
+
+#### B.Non-Key frame target frame size calculation (av1_calc_pframe_target_size_one_pass_cbr())
+
+Setting the target frame size for non-key frames involves a first step where an
+initial value of the target frame size is set followed by a second step where
+an adjustment of the generated target frame size occurs. In the first step, the
+initial target frame size is normally set to avg_frame_size. In the second
+step, the target frame size is adjusted based on the difference between the
+optimal buffer level and the current buffer level. The algorithm tends to lower
+the target frame size for the current frame when the computed difference is
+positive or increase the target frame size for the current frame if the
+difference is negative. Finally, the adjusted target frame size is clipped.
+
+### Determining the final qindex for the frame (rc_pick_q_and_bounds_no_stats_cbr())
+
+After estimating the target frame size for the current frame, the algorithm
+proceeds with the selection of the qindex that provides the closest frame size
+to the target frame size. This process starts by identifying an interval of
+candidate qindex values [best quality qindex, worst quality qindex]. A suitable
+qindex that belongs to the set qindex interval is generated based on the frame
+type and the estimated frame size corresponding to each qindex in the set
+qindex interval.
+
+#### A. Determining the worst quality qindex (calc_active_worst_quality_no_stats_cbr())
+
+The worst quality qindex is the highest allowed qindex value and is initialized
+to 255. The worst quality qindex is generated based on the current buffer
+level. Initially, the worst quality qindex is first obtained by applying a
+factor of 5/4 to the average qindex of the previously coded frames of the same
+type (i.e. key frames or non-key frames). The worst quality qindex is further
+adjusted based on the fullness of the buffer as follows:
+
+- If the buffer fullness level is greater than the optimal level, then the
+  value of the worst quality qindex is reduced (to increase the actual size of
+  the encoded frame) in such a way that the expected reduction in buffer level
+  does not go beyond 30% of its current level.
+- If the buffer fullness level is greater than the critical level and less than
+  the optimal level, then the worst quality qindex is increased by a factor
+  that is a linear function of the current buffer level and the difference
+  between worst_quality and the average qindex mentioned in the initialization
+  step above.
+- If the buffer level is less than the critical level, the qindex is set to the
+  highest possible qindex value worst_qindex. The latter is set to by default
+  to 255 or to any value entered by the user.
+
+#### B. Determining the best quality q index (calc_active_best_quality_no_stats_cbr())
+
+In the case of a key frame, the best quality qindex is initially set to a fixed
+value (default 4). For reference non-key pictures, the best quality q index is
+inherited from the qindex of the previously coded reference pictures. For the
+remaining frames (non-referenced) the best qindex is obtained by taking the
+smallest of the worst quality qindex and the average of the qindex of the
+previously coded non-reference pictures.
+
+#### C. Computing the qindex for the frame
+
+The final qindex for the frame is obtained by looping over all the qindex
+values in the interval [worst quality qindex, best quality qindex] and using
+the following model to determine an estimate for the frame size:
+
+![rc_math3](./img/rc_math3.PNG)
+
+where α is 1500000 for key frames and 1300000 otherwise. The correction_factor
+is as in the VBR case. The qindex that provides the closest rate to the target
+frame size is considered.
+
+### Updating the buffer fullness level
+
+The buffer fullness level is initialized at starting_buffer_level. Following
+the encoding of the current frame, the buffer fullness level is updated by
+adding the average frame size (avg_frame_size) and removing the encoded frame
+size.
 
 ## Appendix A: Final-Pass Rate Control Functions
 
@@ -279,62 +628,86 @@ A description of the main relevant funct
 
 Picture arriving from Motions Estimation kernel:
 
-|Main Functions                          | Descriptions |
-| -------------                          | ------------------------------ |
-| if (pcs_ptr->picture_number == 0) {    |                                 |
-|     set_rc_buffer_sizes();             | Buffers initialization at the beginning|
-| av1_rc_init()                          | RC initialization at the beginning|
-| }                                      |  |
-| restore_param()                        | Populate the required parameters in RATE_CONTROL,  TWO_PASS and GF_GROUP structures from other structures|
-|  svt_av1_get_second_pass_params()      | Read the stats, assign bits per KF (GoP), mini-GoP and frames|
-| av1_set_target_rate()                  | Update the target rate per frame based on the provided feedback |
-| store_param()                          | Store the required parameters from RATE_CONTROL,  TWO_PASS and GF_GROUP  structures to other structures|
-| process_tpl_stats_frame_kf_gfu_boost() | Update the KF and GFU boosts based on tpl|
-| rc_pick_q_and_bounds()                 | Assign qindex per frame|
-| sb_qp_derivation_tpl_la()              | QPM: assign delta_qindex per SB and lambda per block based on tpl stat|
+| Main Functions                         | Descriptions                                                                                              |
+| -------------                          | ------------------------------                                                                            |
+| if (pcs_ptr->picture_number == 0) {    |                                                                                                           |
+| set_rc_buffer_sizes();                 | Buffers initialization at the beginning                                                                   |
+| av1_rc_init()                          | RC initialization at the beginning                                                                        |
+| }                                      |                                                                                                           |
+| restore_param()                        | Populate the required parameters in RATE_CONTROL, TWO_PASS and GF_GROUP structures from other structures |
+| process_rc_stat()       | Read the stats, assign bits per KF (GoP), mini-GoP and frames                                             |
+| av1_set_target_rate()                  | Update the target rate per frame based on the provided feedback                                           |
+| store_param()                          | Store the required parameters from RATE_CONTROL, TWO_PASS and GF_GROUP  structures to other structures   |
+| process_tpl_stats_frame_kf_gfu_boost() | Update the KF and GFU boosts based on tpl                                                                 |
+| rc_pick_q_and_bounds()                 | Assign qindex per frame                                                                                   |
+| sb_qp_derivation_tpl_la()              | QPM: assign delta_qindex per SB and lambda per block based on tpl stat                                    |
 
 Picture arriving from Packetization kernel:
 
-|Main Functions                                                  | Descriptions |
-| ------------------------------------------------------------   | ------------------------------ |
-| restore_GF_group_param()                                       | Populate the required parameters in and GF_GROUP  structure from other structures |
-| av1_rc_postencode_update() svt_av1_twopass_postencode_update() | Update the internal RC and TWO_PASS struct stats based on the received feedback |
+| Main Functions                                                 | Descriptions                                                                     |
+| ------------------------------------------------------------   | ------------------------------                                                   |
+| av1_rc_postencode_update() svt_av1_twopass_postencode_update() | Update the internal RC and TWO_PASS struct stats based on the received feedback  |
 
 More details for some of the main functions:
 
-|svt_av1_get_second_pass_params() {     | Descriptions |
-| ------------------------------------- | ------------------------------ |
-| process_first_pass_stats()            |  Performed on a frame basis. Parts of it are for initialization at POC0, the rest is per frame.                               |
-| if (key_frame)                        | |
-| find_next_key_frame();                | Define the next KF group (GoP) and assign bits to it |
-| if (pcs_ptr->is_new_GF_group)                                     |  |
-|       GF_group_rate_assingment () }   | Define the GF_group (mini-GoP) bits and assign bits per frame based on the target rate|
-
-|rc_pick_q_and_bounds() { {                  | Descriptions |
-| ----------------------------------------   | ------------------------------ |
-| if (frame_is_intra)                        |                                |
-| get_intra_q_and_bounds()                   | Find the active_best_quality (qindex) based on the kf_boost calculated using first pass data|
-| Else                                       |  |
-| get_active_best_quality()                  | Find the active_best_quality (qindex) based on the gf_boost calculated using previous pass data and tpl |
-|  adjust_active_best_and_worst_quality_org()| Adjust active best and worse quality based on the rate|
-|  get_q() }                                 | Get the qindex in the range of active_best_quality to  active_worse_quality based on the target rate per frame |
+| process_rc_stat() {    | Descriptions                                                                                   |
+| ------------------------------------- | ------------------------------                                                                 |
+| process_first_pass_stats()            | Performed on a frame basis. Parts of it are for initialization at POC0, the rest is per frame. |
+| if (key_frame)                        |                                                                                                |
+| kf_group_rate_assingment();                | Rate assignment for the next kf group                                           |
+| if (pcs_ptr->is_new_GF_group)         |                                                                                                |
+| gf_group_rate_assingment () }         | Define the GF_group (mini-GoP) bits and assign bits per frame based on the target rate         |
+
+| rc_pick_q_and_bounds() { {                 | Descriptions                                                                                                  |
+| ----------------------------------------   | ------------------------------                                                                                |
+| if (frame_is_intra)                        |                                                                                                               |
+| get_intra_q_and_bounds()                   | Find the active_best_quality (qindex) based on the kf_boost calculated using first pass data                  |
+| Else                                       |                                                                                                               |
+| get_active_best_quality()                  | Find the active_best_quality (qindex) based on the gf_boost calculated using previous pass data and tpl       |
+| adjust_active_best_and_worst_quality_org() | Adjust active best and worse quality based on the rate                                                        |
+| get_q() }                                  | Get the qindex in the range of active_best_quality to active_worse_quality based on the target rate per frame |
+
+There are some functions (*restore_param()*, *store_param()*,
+*restore_GF_group_param()*) in the rate control kernel that store and restore
+data from PCS to/from internal data structures like RATE_CONTROL, TWO_PASS and
+GF_GROUP. These functions were added to handle the frame-level parallelism and
+out-of-order encoding characteristics of the SVT encoder.
 
-There are some functions (*restore_param()*, *store_param()*, *restore_GF_group_param()*) in the rate control kernel that store and restore data from PCS to/from internal data structures like RATE_CONTROL,  TWO_PASS and GF_GROUP. These functions were added to handle the frame-level parallelism and out-of-order encoding characteristics of the SVT encoder.
-
-## Appendix B: Speed Optimization of the IPP Pass
-
-## Appendix C: Speed Optimization of the Middle Pass
+## Appendix B: Speed Optimization of the Middle Pass
 
 To make the middle pass even faster, the following speed optimizations are done:
-- The input video is down-sampled by two in each direction and the middle pass is performed on a smaller resolution of the input video. Down-sampling results in a significant encoder speed up.
-- Since the middle pass does not output a conformant stream, the encoding of non-reference frames is by-passed to speed up the middle pass encoding.
-
-## Appendix D: Capped CRF
-
-In some video coding applications, it is desired to use the CRF mode with an upper limit for the bit rate. This rate control mode is referred as capped CRF. In this mode, the algorithm tries to achieve the best quality while maintaining the overall bit rate below the maximum bit rate specified as an input to the encoder. If the maximum bit rate is set to a high value, the CRF and capped CRF might produce the same results.
-
-In SVT-AV1, the capped CRF is implemented using the re-encode mechanism and the qindex adjustment of frames using a virtual buffer. First, for each base layer frame, a maximum bit budget is set using the maximum bit rate of the clip. Then using the re-encode algorithm, as described in section 4, the rate violation of each frame is identified and corrected. Similar to other rate control modes, after each frame is completely processed in the Packetization process, feedback information representing the size of the processed frame is sent to the rate control algorithm. A virtual buffer is used to keep track of the size of each frame. Knowing the maximum bit rate and the size of previously encoded pictures, the algorithm adjusts the qindex of the future frame to prevent bit rate violation. For more details of the algorithm see ```capped_crf_reencode()``` and ```crf_assign_max_rate```.
+- The input video is down-sampled by two in each direction and the middle pass
+  is performed on a smaller resolution of the input video. Down-sampling
+  results in a significant encoder speed up.
+- Since the middle pass does not output a conformant stream, the encoding of
+  non-reference frames is by-passed to speed up the middle pass encoding.
+
+## Appendix C: Capped CRF
+
+In some video coding applications, it is desired to use the CRF mode with an
+upper limit for the bit rate. This rate control mode is referred to as capped
+CRF. In this mode, the algorithm tries to achieve the best quality while
+maintaining the overall bit rate below the maximum bit rate specified as an
+input to the encoder. If the maximum bit rate is set to a high value, the CRF
+and capped CRF might produce the same results.
+
+In SVT-AV1, the capped CRF is implemented using the re-encode mechanism and the
+qindex adjustment of frames using a virtual buffer. First, for each base layer
+frame, a maximum bit budget is set using the maximum bit rate of the clip. Then
+using the re-encode algorithm, as described in section 4, the rate violation of
+each frame is identified and corrected. Similar to other rate control modes,
+after each frame is completely processed in the Packetization process, feedback
+information representing the size of the processed frame is sent to the rate
+control algorithm. A virtual buffer is used to keep track of the size of each
+frame. Knowing the maximum bit rate and the size of previously encoded
+pictures, the algorithm adjusts the qindex of the future frame to prevent bit
+rate violation. For more details of the algorithm see
+```capped_crf_reencode()``` and ```crf_assign_max_rate```.
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Recursive-Intra.md 1.2.0+dfsg-2/Docs/Appendix-Recursive-Intra.md
--- 0.9.1+dfsg-1/Docs/Appendix-Recursive-Intra.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Recursive-Intra.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,11 +1,15 @@
+[Top level](../README.md)
+
 # Recursive Intra Prediction
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
-The recursive intra prediction mode involves generating predictions for intra samples in a raster scan order based on filtering a set of neighboring predicted samples.
-As an illustration of the basic idea consider the sample P shown in Figure 1 below.
-The intra prediction for sample P in this case is the weighted sum of samples A, B and C.
-The latter could be reference samples for the current block and/or already predicted samples within the same block.
+The recursive intra prediction mode involves generating predictions for intra
+samples in a raster scan order based on filtering a set of neighboring
+predicted samples. As an illustration of the basic idea consider the sample P
+shown in Figure 1 below. The intra prediction for sample P in this case is the
+weighted sum of samples A, B and C. The latter could be reference samples for
+the current block and/or already predicted samples within the same block.
 
 ![recursive_intra_fig0](./img/recursive_intra_fig0.png)
 
@@ -58,7 +62,8 @@ where mode refers to one of the five sup
 | 3                             | FILTER\_D157\_PRED    |
 | 4                             | FILTER\_PAETH\_PRED   |
 
-The filter coefficients are listed in Table 2 as a function of the intra prediction mode and predicted sample.
+The filter coefficients are listed in Table 2 as a function of the intra
+prediction mode and predicted sample.
 
 ##### Table 2. Filter coefficients as a function of the intra prediction mode and the predicted sample.
 
@@ -72,9 +77,10 @@ example shown in Figure 2, the order of
 be: Block\_0 → (Block\_1 and Block\_2) → (Block\_3 and Block\_4) →
 (Block\_5 and Block\_6) → Block\_7.
 
-The recursive intra prediction feature is applicable only to luma intra prediction and to blocks that have width and height less than or equal to 32.
+The recursive intra prediction feature is applicable only to luma intra
+prediction and to blocks that have width and height less than or equal to 32.
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 ##### Control macros/flags
 
@@ -89,7 +95,8 @@ the control flags associated with the fi
 
 ### Recursive intra prediction API
 
-The interface to the recursive intra prediction feature is described in Table 4 below.
+The interface to the recursive intra prediction feature is described in Table 4
+below.
 
 ##### Table 4. Interface to the recursive filter intra feature.
 
@@ -97,33 +104,39 @@ The interface to the recursive intra pre
 
 ### Candidate Injection
 
-The function ```inject_filter_intra_candidates``` is responsible of injecting all recursive intra candidates in MD.
-Candidates are injected for a given block if the flag ```md_filter_intra_level``` is not zero and
-both the block width and block height are smaller than or equal to 32.
-
-A total of five intra based candidates are injected where the candidate field ```filter_intra_mode``` is assigned
-a value from the following list:
-```FILTER_DC_PRED``` / ```FILTER_V_PRED``` / ```FILTER_H_PRED``` / ```FILTER_D157_PRED``` / ```FILTER_PAETH_PRED```.
-For other regular intra candidates ```filter_intra_mode``` is assigned a special value (```FILTER_INTRA_MODES```) to
-make sure it is not a filter intra candidate.
+The function ```inject_filter_intra_candidates``` is responsible of injecting
+all recursive intra candidates in MD. Candidates are injected for a given block
+if the flag ```md_filter_intra_level``` is not zero and both the block width
+and block height are smaller than or equal to 32.
+
+A total of five intra based candidates are injected where the candidate field
+```filter_intra_mode``` is assigned a value from the following list:
+```FILTER_DC_PRED``` / ```FILTER_V_PRED``` / ```FILTER_H_PRED``` /
+```FILTER_D157_PRED``` / ```FILTER_PAETH_PRED```. For other regular intra
+candidates ```filter_intra_mode``` is assigned a special value
+(```FILTER_INTRA_MODES```) to make sure it is not a filter intra candidate.
 
-Note that when inter-intra compound is used, no filter intra modes are allowed in the intra part of the prediction.
+Note that when inter-intra compound is used, no filter intra modes are allowed
+in the intra part of the prediction.
 
 ### Generation of the filter intra prediction
 
-The generation of the recursive intra prediction is performed in the function ```svt_av1_filter_intra_predictor```.
-The generation of the prediction is performed as described above.
-The block is split into 4x2 blocks and the predictions for the 4x2 blocks are generated in a raster scan order.
+The generation of the recursive intra prediction is performed in the function
+```svt_av1_filter_intra_predictor```. The generation of the prediction is
+performed as described above. The block is split into 4x2 blocks and the
+predictions for the 4x2 blocks are generated in a raster scan order.
 
 ## 3. Optimization of the algorithm
 
-To reduce the complexity associated with the recursive intra prediction feature,
-the latter follows the regular intra optimization levels. (e.g., number of injected candidates, chroma level, etc..)
-
-## 4.  Signaling
-
-At the sequence header the flag enable_filter_intra will enable the recursive  intra prediction in the bit-stream.
-When the recursive intra prediction mode is selected:
+To reduce the complexity associated with the recursive intra prediction
+feature, the latter follows the regular intra optimization levels. (e.g.,
+number of injected candidates, chroma level, etc..)
+
+## 4. Signaling
+
+At the sequence header the flag enable_filter_intra will enable the recursive
+intra prediction in the bit-stream. When the recursive intra prediction mode is
+selected:
 
   - The intra mode that is sent to the decoder is ```DC_PRED```.
 
@@ -133,4 +146,8 @@ When the recursive intra prediction mode
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Reference-Scaling.md 1.2.0+dfsg-2/Docs/Appendix-Reference-Scaling.md
--- 0.9.1+dfsg-1/Docs/Appendix-Reference-Scaling.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Reference-Scaling.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,199 @@
+[Top level](../README.md)
+
+# Reference-scaling Appendix
+
+## 1. Description of the algorithm
+The AV1 specification allows the input video pictures to be coded at a
+lower resolution and then outputs reconstructed reference frames and bitstream
+at the same scaled resolution. This coding procedure, referred to as
+reference-scaling, is useful in scenarios where bandwidth is restricted with
+low bit rate. Figure 1 depicts the architecture of the pipeline using reference
+frame scaling.
+At the encoder side, the encoding process using the reference-scaling feature
+involves the following step: Firstly, the source video is downscaled using a
+non-normative process. Secondly, the downscaled version of the input video is
+encoded, followed by application of deblocking and CDEF. Then loop restoration
+filtering is used with downscaled input pictures to recover part of the lost
+high frequency information. This recovered picture is downscaled using the same
+non-normative process. At the decoder side, the resulting bitstream is decoded,
+and deblocking, CDEF and loop restoration are then applied on the lower
+resolution pictures. The downscaling factor is constrained to 8/9 ~ 8/16, i.e.,
+maximum 2x.
+The downscaling operation can be applied to some of the frames, especially for
+frames that are too complex to fit in the target bandwidth. Different frames
+can have different downscaling factors. The following sections outline how
+pictures with different sizes are processed in the different stages of the
+SVT-AV1 encoder pipeline and how the downscaling factor is determined.
+
+![reference_scaling_pipeline](./img/reference_scaling_pipeline.png)
+##### Figure 1. Processing pipeline when reference scaling is active.
+
+
+## 2. Implementation of the algorithm
+### 2.1. Downscaled and full-size versions of pictures
+Figure 2 illustrates how pictures with different sizes are processed in
+different coding processes/stages.
+![reference_scaling_picture_size](./img/reference_scaling_picture_size.png)
+##### Figure 2. Processing of the downscaled and full-size pictures in the encoder pipeline.
+
+In Figure 2, downscaled input refers to a downscaled version of the current
+input picture.
+
+In the Motion Estimation process (open-loop stage based on using source
+pictures as reference pictures), pa ref denotes a list of reference pictures
+consisting of input pictures at their original input resolution. When
+reference-scaling is active, downscaled versions of reference and input pictures
+are used in this process to compute SAD distortion.
+
+In the Mode Decision process (in-loop stage), recon ref denotes reference
+picture list which contains the reconstructed pictures. When reference-scaling
+is active, downscaled version of the recon references and input pictures are
+used in this process to compute SAD and other distortion measures.
+
+In the prediction process, when reference-scaling is active, MotionVectors and
+ScaleFactors are used to create downscaled prediction picture from resized
+recon ref pictures via convolve_2d_scale() function. Such design follows AV1
+spec "Motion vector scaling process". The current downscaled input picture and
+resized prediction picture are used to create residual picture for transform
+and quantization process.
+
+Reference scaling does not upscale the picture resulted from CDEF filtering,
+this is the major difference between scaling reference and super-res. The
+downscaled pictures are directly reconstructed and used as reference pictures
+in list.
+
+Since the CDEF filter results are not upscaled to original size, the input
+reference pictures in original restoration should be downscaled to the same
+size of reconstructed pictures with the same non-normative scaling method.
+
+### 2.2. Determination of the downscaling factor
+The downscaling factor is constrained to 8/9 ~ 8/16. Since the numerator of the
+factor is fixed to 8, only the denominator needs to be determined. There is a
+special downscaling factor in dynamic mode, which is described in following
+mode description.
+
+Three modes are used to set the denominator namely Fixed, Random and Dynamic
+modes, respectively. The mode is set by the user. A brief description of how
+each of the above-mentioned modes works is included below.
+
+* Fixed: Two denominator values can be set by the user, one for Key-frames and
+  the other for non-key-frames. Downscaling can be applied to all pictures.
+* Random: The denominator is set randomly. Downscaling can be applied to all
+  pictures. This mode is mainly used for debugging/verification purpose.
+* Dynamic: The use of downscaling is decided by evaluating the rate control
+  buffer level being underflow. Two stages of downscaling can be applied to new
+  input frames after scaling decision. The first stage is downscaling
+  three-quarters in both width and height and the second stage is downscaling
+  one-half in both width and height. This mode decides downscaling from
+  original to stage1 then stage2 when buffer level being underflow and decides
+  recovering from stage 2 to stage1 then original if QP level being better than
+  worst case.
+
+The following sections explain how these different modes are implemented in the
+SVT-AV1 encoder. The high-level dataflow of reference scaling is shown in Figure
+3.
+![ref_scale_modes_dataflow](./img/ref_scale_modes_dataflow.png)
+##### Figure 3. High-level encoder process dataflow with reference scaling feature.
+
+#### 2.2.1. Fixed and Random mode
+Setting the denominator value and the downscaling of the input picture are
+performed in the Picture Decision process. The Picture Decision process posts
+three types of tasks to Motion Estimation process. They are first pass ME, TFME
+(temporal filter) and PAME respectively. Reference scaling is not considered if
+it is the first pass of multi-pass encoding process. Downscaling is performed
+after TFME and before PAME, i.e. TFME is applied to the full-size picture and
+then downscaling is applied to TFME filtered picture. PAME is performed using
+downscaled pictures. As shown in Figure 2, PAME requires the input source
+pictures and the corresponding reference pictures to be of the same size, so
+the references (pa reference pictures) are also downscaled or upscaled in
+advance.
+
+#### 2.2.2. Dynamic mode
+Dynamic mode only works in 1-pass CBR low-delay mode, it requires a collection
+of buffer level and QP (or qindex) to determine which to be used, the original
+resolution or downscaling in two stages. In SVT-AV1 encoder, picture buffer
+level and QP are determined in the Rate Control process. So, the pictures
+already in pipeline should keep their current resolution, the new scaling
+denominator will be performed on new input pictures.
+
+### 2.3. Other noticeable changes in code base
+In SVT-AV1, data structure pool is widely used. That means many data structures
+are used in recycled manner and when a data structure is acquired from pool,
+its member variables may contain outdated values. This is not a problem when
+all pictures are in the same size, but care should be taken when
+reference scaling is enabled. For example, variables related to size and
+geometry, like super block count, or extra buffers allocated for downscaled
+pictures need to be reset to default (full size) values and memory to be safely
+freed. The most noticeable data structures are parent PCS
+(PictureParentControlSet) and child PCS (PictureControlSet). They are acquired
+in Resource Coordination process and Picture Manager process respectively.
+
+In the dynamic mode, a reset of frame size related information is applied when
+the new scaled picture arrives in the Rate Control process, a new watch window
+of scaling decision starts after this reset.
+
+### 2.4. Reference Scaling API
+Table 1 illustrates the usage of reference scaling functions. Only related
+processes are listed.
+![ref_scale_API](./img/ref_scale_API.png)
+##### Table 1. Reference Scaling API
+
+## 3. Optimization
+How reference Scaling affects the coding speed: The current picture and its
+references are downscaled to the same size for motion estimation search.
+Considering its interaction with super-res, in current implementation,
+pa ref[sr_denom_idx][10] array and recon ref[sr_denom_idx][10] array is used to
+hold downscaled reference pictures, to avoid duplicate downscaling on the same
+reference picture for different input pictures. The sr_denom_idx and the
+resize_denom_idx is calculated from scale_denom - scale_numerator.
+E.g., pictures with index 4, 8 and 16 refer to picture 0 and their downscaled
+reference frame of reference scaling combined with super-res are as following
+table:
+| pic index | refer scale ratio | super-res ratio |                      | scaling ref in array  |                      |
+|-----------|-------------------|-----------------|----------------------|-----------------------|----------------------|
+| 0         | 8/8               | 8/8             | N/A                  | N/A                   | N/A                  |
+| 4         | 8/9               | 8/12            | ref_pic_0->downscaled_reference_picture[4][1] | ref_pic_16->downscaled_reference_picture[4][1] | ref_pic_8->downscaled_reference_picture[4][1] |
+| 8         | 8/10              | 8/11            | ref_pic_0->downscaled_reference_picture[3][2] | ref_pic_16->downscaled_reference_picture[3][2] | N/A                  |
+| 16        | 8/9               | 8/12            | ref_pic_0->downscaled_reference_picture[4][1] | N/A                   | N/A                  |
+
+The duplicated downscaling of ref_pic_0->downscaled_reference_picture[4][1] performs only once.
+
+Reference Scaling also has an impact on memory usage: Extra buffers are
+allocated to hold downscaled pictures, including current coding picture, PA
+references and reconstructed references as shown in Figure 4.
+![ref_scale_downscaled_buffers](./img/ref_scale_downscaled_buffers.png)
+##### Figure 4. Buffers for downscaled pictures
+
+## 4. Usage recommendation
+The Random mode is preferred to be used for validation or testing only because
+it consumes more memory. Consider reference scaling is required to interact
+with super-res, pa_ref and recon_ref are expended to 2D arrays. The super-res
+denom index is up to 9 (mapping from denom 8 to denom 16), and the resize denom
+index is up to 10 (mapping from denom 8 to denom 16 and one special index for
+3/4 in dynamic mode). The pa_ref and the recon_ref might be fully filled, and
+this calls for more CPU time to scale the same reference with different
+denominator values, but random mode brings no benefit compared to other modes.
+
+The Fixed mode with constant QP configuration can achieve the target to work
+with a narrow bandwidth with acceptable quality. The video quality goes down
+with lower scaling ratio, it is recommended to set higher scaling ratio (8/9
+or 8/10) for key frames and lower scaling ratio (8/13 to 8/16) for non-key
+frames. It can balance between bit-rate and quality. The settings of lower ratio
+for key frames and higher ratio for non-key frames are not recommended, because
+the smaller key frames will lose too many details in scaling yet they are always
+referred by other frames.
+
+The Dynamic mode only works with 1-Pass CBR low-delay configuration, any
+settings in conflict with those will make dynamic mode to be rejected. The new
+scaling denominator is applied to the next input pictures, the expected bitrate
+improvement will be delayed according to how many frames already exist in
+encoding pipeline. According to internal tests, dynamic mode can be activated
+at about 60% of target bitrate setting compared with the average output
+bitrate in common encoding settings.
+
+## 5. Notes
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Restoration-Filter.md 1.2.0+dfsg-2/Docs/Appendix-Restoration-Filter.md
--- 0.9.1+dfsg-1/Docs/Appendix-Restoration-Filter.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Restoration-Filter.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Restoration Filter
 
 The restoration filter is applied after the constrained directional
@@ -19,7 +21,7 @@ or switching between those three modes a
 
 ## I. Wiener Filter
 
-### 1.  Description of the algorithm [To be completed]
+### 1. Description of the algorithm [To be completed]
 
 The Wiener filter is a separable symmetric filter (7/5/3-tap
 filters), where only three, two or one coefficient(s) for the horizontal
@@ -43,7 +45,7 @@ The filter is designed over windows of s
 The filter taps are either 7, 5, or 3 for luma and 5 or 3 for
 chroma.
 
-### 2.  Implementation
+### 2. Implementation
 
 **Inputs to rest\_kernel**: Output frame of the CDEF filter.
 
@@ -82,8 +84,8 @@ Each restoration unit is split into rest
 Loop over all restoration units in each tile segment (```foreach_rest_unit_in_tile_seg```)
 
 - Determine the best filtering parameters for the restoration unit (```search_wiener_seg```)
-   - The initial Wiener filter coeff are computed.  See functions ```svt_av1_compute_stats```, ```wiener_decompose_sep_sym```, ```finalize_sym_filter```.  (This step may be skipped, see optimization section).
-     Check that the new filter params are an improvement over the identity filter (i.e. no filtering).  If not, exit Wiener filter search and do not use Wiener filter.  See function ```compute_score```.
+   - The initial Wiener filter coeff are computed. See functions ```svt_av1_compute_stats```, ```wiener_decompose_sep_sym```, ```finalize_sym_filter```. (This step may be skipped, see optimization section).
+     Check that the new filter params are an improvement over the identity filter (i.e. no filtering). If not, exit Wiener filter search and do not use Wiener filter. See function ```compute_score```.
    - Refine the initially computed Wiener filter coeffs (see function ```finer_tile_search_wiener_seg```).
      Up to three refinement steps are performed (with step sizes 4,2,1).
      In each step, the filter coeffs are shifted according to the step size.
@@ -110,7 +112,7 @@ More details on ```try_restoration_unit_
 
 ## II. Self-Guided Restoration Filter with Subspace Projection (SGRPROJ)
 
-### 1.  Description of the algorithm
+### 1. Description of the algorithm
 
 The main objective behind using the SGRPROJ filter is to smooth the
 reconstructed image while preserving edges. The filter consists of two
@@ -152,7 +154,7 @@ The figure below illustrates the main id
 The derivation of the filter parameters is outlined below.
 
   - Compute the mean ![math](http://latex.codecogs.com/gif.latex?\mu) and the square of the variance ![math](http://latex.codecogs.com/gif.latex?\sigma^2) of a (2r+1)x(2r+1) window ***w*** around the sample ![math](http://latex.codecogs.com/gif.latex?\mathbf{p_r}) in the reconstructed image.
-  - Define ![math](./img/restoration_filter_math8.png) , ![math](http://latex.codecogs.com/gif.latex?\mathbf{g=(1-f)\mu}). The parameter ![math](http://latex.codecogs.com/gif.latex?\varepsilon) is used to tune the filter.
+  - Define ![math](./img/restoration_filter_math8.png), ![math](http://latex.codecogs.com/gif.latex?\mathbf{g=(1-f)\mu}). The parameter ![math](http://latex.codecogs.com/gif.latex?\varepsilon) is used to tune the filter.
   - Repeat the same computations above for every sample in the window
     ***w*** (or for a subset of those samples). Define **F** and **G** to be the averages of ![math](http://latex.codecogs.com/gif.latex?\mathbf{f}) and ![math](http://latex.codecogs.com/gif.latex?\mathbf{g}) computed for all samples in the window ***w*** (or for a subset of those samples), respectively.
   - Filtering: ![math](http://latex.codecogs.com/gif.latex?\mathbf{p_f=Fp_r+G\mu})
@@ -162,7 +164,7 @@ produce good quality reconstructed image
 restoration step is considered and involves the use of subspace
 projection.
 
-### 1.2.  Subspace Projection
+### 1.2. Subspace Projection
 
 The main idea behind subspace projection is as follows:
 
@@ -193,7 +195,7 @@ To illustrate the idea of subspace proje
   - ![math26](./img/restoration_filter_math26.png)
 
 
-### 2.  Implementation
+### 2. Implementation
 
 **Inputs to rest\_kernel**: Output frame of the CDEF filter.
 
@@ -320,7 +322,7 @@ the filter algorithm. See ```av1_selfgui
 selfguided_restoration_fast_internal and
 selfguided_restoration_internal``` for the C implementation, av1_selfguided_restoration_avx2, integral_images, calc_ab_fast, final_filter_fast, calc_ab, final_filter for the avx2 implementation.
 
-### 3.  Optimization of the algorithm
+### 3. Optimization of the algorithm
 
 Both the Wiener filter and the SGRPROJ filters involve, at the
 restoration unit level, a search procedure for the best Wiener filter
@@ -352,35 +354,42 @@ and chroma, as given in the table below.
 
 ### Filter Coeff Selection
 
-Generally, the Wiener filter coeffs for each restoration unit are computed; however,
-if the Wiener filter coeff values of ref frames are available, they can be used instead
-(and the computation can be skipped).  When enabled, ```cm->wn_filter_ctrls.use_prev_frame_coeffs```
-will set the initial coeff values to those chosen by the nearest list 0 reference frame for each
-corresponding restoration unit.  Refinement (if enabled – see next section) will then be performed.
+Generally, the Wiener filter coeffs for each restoration unit are computed;
+however, if the Wiener filter coeff values of ref frames are available, they
+can be used instead (and the computation can be skipped). When enabled,
+```cm->wn_filter_ctrls.use_prev_frame_coeffs``` will set the initial coeff
+values to those chosen by the nearest list 0 reference frame for each
+corresponding restoration unit. Refinement (if enabled – see next section) will
+then be performed.
 
 ### Filter Coeff Refinement
 
-After the initial filter coeff values are selected, a refinement search can be performed to
-improve the coeff values.  The refinement is performed iteratively, with 3 step sizes: 4, 2, 1.
-By enabling ```cm->wn_filter_ctrls.max_one_refinement_step``` only a step size of 4 is used in the refinement
-(smaller step sizes, which improve granularity of the coeff, and therefore accuracy, will be skipped).
-To disable the refinement and automatically use the computed coeffs without refinement, set ```cm->wn_filter_ctrls.use_refinement``` to 0.
+After the initial filter coeff values are selected, a refinement search can be
+performed to improve the coeff values. The refinement is performed iteratively,
+with 3 step sizes: 4, 2, 1. By enabling
+```cm->wn_filter_ctrls.max_one_refinement_step``` only a step size of 4 is used
+in the refinement (smaller step sizes, which improve granularity of the coeff,
+and therefore accuracy, will be skipped). To disable the refinement and
+automatically use the computed coeffs without refinement, set
+```cm->wn_filter_ctrls.use_refinement``` to 0.
 
 
 **3.2 SGRPROJ filter search**
 
-The search for the best SGRPROJ filter is normally performed by
-evaluating the filter performance for each of the sixteen different
-![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values in the interval \[0,15\], where
-![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) is used in the outline of SGRPROJ algorithm
-presented above. The algorithmic optimization of the filter search
-involves restricting the range of ![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values in the
+The search for the best SGRPROJ filter is normally performed by evaluating the
+filter performance for each of the sixteen different
+![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values in the
+interval \[0,15\], where
+![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) is used in the
+outline of SGRPROJ algorithm presented above. The algorithmic optimization of
+the filter search involves restricting the range of
+![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values in the
 search operation. The parameter ```cm->sg_filter_mode``` is used to specify
 different level of search complexity, where a higher value of
 ```cm->sg_filter_mode``` would correspond to a wider interval of
-![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values and a more costly search. The parameter
-step is used to control the width of the search interval, and is given
-in the following table.
+![epsilon](http://latex.codecogs.com/gif.latex?\varepsilon) values and a more
+costly search. The parameter step is used to control the width of the search
+interval, and is given in the following table.
 
 ##### Table 5. Step parameter as a function of the sg\_filter\_mode.
 
@@ -413,15 +422,15 @@ The optimization proceeds as follows:
   - The interval limits are given by:
     ```c
     start_ep = 0 if (sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0), else start_ep = max(0, mid_ep - step)
-    end_ep = 8  if (sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0), else end_ep = min(8, mid_ep + step)
+    end_ep = 8 if (sg_ref_frame_ep[0] < 0 && sg_ref_frame_ep[1] < 0), else end_ep = min(8, mid_ep + step)
     ```
-### 4.  Signaling
+### 4. Signaling
 
 ##### Table 7. Restoration filter signals.
 
 | **Signal**                        | **Description**                                                       |
 | --------------------------------- | --------------------------------------------------------------------- |
-| **At the frame level**            |                                                                |
+| **At the frame level**            |                                                                       |
 | frame\_restoration\_type          | RESTORE\_NONE, RESTORE\_WIENER, RESTORE\_SGRPROJ, RESTORE\_SWITCHABLE |
 | restoration\_unit\_size           | Size of restoration unit. For luma: 128x128 or 256x256
 | **At the restoration unit level** |                                                                       |
@@ -431,7 +440,11 @@ The optimization proceeds as follows:
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## References
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-SQ-Weight.md 1.2.0+dfsg-2/Docs/Appendix-SQ-Weight.md
--- 0.9.1+dfsg-1/Docs/Appendix-SQ-Weight.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-SQ-Weight.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,10 +1,14 @@
+[Top level](../README.md)
+
 # SQ\_Weight
 
 ## Description
 
-SQ\_WEIGHT is a cost scaling factor used to determine if the evaluation of the non-square (NSQ) shapes HA, HB, VA, VB, H4 and V4
-could be skipped based on the relative cost of the square (SQ), H and V shapes.
-The various shapes supported in the AV1 Bitstream & Decoding Process Specification are shown in Figure 1 below.
+SQ\_WEIGHT is a cost scaling factor used to determine if the evaluation of the
+non-square (NSQ) shapes HA, HB, VA, VB, H4 and V4 could be skipped based on the
+relative cost of the square (SQ), H and V shapes. The various shapes supported
+in the AV1 Bitstream & Decoding Process Specification are shown in Figure 1
+below.
 
 ![sq_weight_fig2](./img/sq_weight_fig2.png)
 
@@ -22,7 +26,7 @@ to determine if the HA, HB, VA, VB, H4 a
 
 where X_COST refers to the cost of the block with shape X. The lower the SQ_WEIGHT, the higher the chance to skip NSQ shapes.
 
-The SQ_WEIGHT is a scaling factor of the square shape cost,  and can be made more or less aggressive based on the preset and block characteristics.
+The SQ_WEIGHT is a scaling factor of the square shape cost, and can be made more or less aggressive based on the preset and block characteristics.
 The SQ_WEIGHT is derived as follows:
 
 where Base is a function of the encoder preset.
@@ -41,12 +45,17 @@ Final offset is between -10 and +5.
 
 ### H vs. V comparison
 
-The second part of the algorithm compares the cost of H block to the cost of the V block to determine if the HA, HB, VA, VB, H4 and V4 shapes could be skipped.
-The NSQ shapes are skipped as follows:
+The second part of the algorithm compares the cost of H block to the cost of
+the V block to determine if the HA, HB, VA, VB, H4 and V4 shapes could be
+skipped. The NSQ shapes are skipped as follows:
 
   - skip HA, HB and H4 if (H and V are valid shapes) and (H_COST > (110 * V_COST) / 100)
   - skip VA, VB and V4 if (H and V are valid shapes) and (V_COST > (110 * H_COST) / 100)
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Super-Resolution.md 1.2.0+dfsg-2/Docs/Appendix-Super-Resolution.md
--- 0.9.1+dfsg-1/Docs/Appendix-Super-Resolution.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Super-Resolution.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,7 +1,33 @@
+[Top level](../README.md)
+
 # Super-resolution Appendix
 
 ## 1. Description of the algorithm
-The AV1 specification allows for the input video pictures to be coded at a lower resolution and then upscaled to the original resolution after reconstruction. This coding procedure, referred to as super-resolution, is useful in low bit rate streaming scenarios [1]. Figure 1 depicts the architecture of the processing pipeline when using super-resolution [2]. At the encoder side, encoding using the super-resolution feature involves the following step. First, the source video is downscaled using a non-normative process. Second, the downscaled version of the input video is encoded, followed by application of deblocking and CDEF to the reconstructed downscaled version of the input video. Third, the output pictures from CDEF are upscaled to the original resolution and loop restoration is applied to the full resolution pictures to recover part of lost high frequency information. The upscaling and loop restoration operations are referred to as the super-resolve steps and are normative. At the decoder side, the resulting bitstream is decoded, and deblocking and CDEF are then applied on the lower resolution pictures. The output pictures from CDEF are then super-resolved to original video resolution [2]. In order to reduce overheads associated with line-buffers in hardware implementations, the upscaling and downscaling operations are applied to the horizontal direction only [3]. The downscaling factor is constrained to 8/9 ~ 8/16, i.e, maximum 2x. The downscaling operation can be applied to some of the frames, especially for frames that are too complex to fit in the target bandwidth. Different frames can have different downscaling factors. The following sections outline how pictures with different sizes are processed in the different stages of the SVT-AV1 encoder pipeline and how the downscaling factor is determined.
+The AV1 specification allows for the input video pictures to be coded at a
+lower resolution and then upscaled to the original resolution after
+reconstruction. This coding procedure, referred to as super-resolution, is
+useful in low bit rate streaming scenarios [1]. Figure 1 depicts the
+architecture of the processing pipeline when using super-resolution [2]. At the
+encoder side, encoding using the super-resolution feature involves the
+following step. First, the source video is downscaled using a non-normative
+process. Second, the downscaled version of the input video is encoded, followed
+by application of deblocking and CDEF to the reconstructed downscaled version
+of the input video. Third, the output pictures from CDEF are upscaled to the
+original resolution and loop restoration is applied to the full resolution
+pictures to recover part of lost high frequency information. The upscaling and
+loop restoration operations are referred to as the super-resolve steps and are
+normative. At the decoder side, the resulting bitstream is decoded, and
+deblocking and CDEF are then applied on the lower resolution pictures. The
+output pictures from CDEF are then super-resolved to original video resolution
+[2]. In order to reduce overheads associated with line-buffers in hardware
+implementations, the upscaling and downscaling operations are applied to the
+horizontal direction only [3]. The downscaling factor is constrained to 8/9 ~
+8/16, i.e, maximum 2x. The downscaling operation can be applied to some of the
+frames, especially for frames that are too complex to fit in the target
+bandwidth. Different frames can have different downscaling factors. The
+following sections outline how pictures with different sizes are processed in
+the different stages of the SVT-AV1 encoder pipeline and how the downscaling
+factor is determined.
 
 ![superres_pipeline](./img/superres_pipeline.png)
 ##### Figure 1. Processing pipeline when super-resolution is active.
@@ -13,73 +39,176 @@ Figure 2 illustrates how pictures with d
 ![superres_picture_size](./img/superres_picture_size.png)
 ##### Figure 2. Processing of the downscaled and full-size pictures in the encoder pipeline.
 
-In Figure 2, downscaled input refers to a downscaled version of the current input picture.
-
-In the Motion Estimation process (open-loop stage based on using source pictures as reference pictures), pa ref denotes a list of reference pictures consisting of input pictures at their original input resolution. When super-resolution is active, downscaled versions of reference and input pictures are used in this process to compute SAD distortion.
-
-In the Mode Decision process (in-loop stage), recon ref denotes reference picture list which contains the reconstructed pictures. When super-resolution is active, downscaled version of the recon references and input pictures are used in this process to compute SAD and other distortion measures.
+In Figure 2, downscaled input refers to a downscaled version of the current
+input picture.
 
-In the prediction process, when super-resolution is active, MotionVectors and ScaleFactors are used to create downscaled prediction picture from upscaled recon ref pictures via convolve_2d_scale() function. Such design follows AV1 spec “Motion vector scaling process”. Current downscaled input picture and downscaled prediction picture are used to create residual picture for transform and quantization process.
+In the Motion Estimation process (open-loop stage based on using source
+pictures as reference pictures), pa ref denotes a list of reference pictures
+consisting of input pictures at their original input resolution. When
+super-resolution is active, downscaled versions of reference and input pictures
+are used in this process to compute SAD distortion.
+
+In the Mode Decision process (in-loop stage), recon ref denotes reference
+picture list which contains the reconstructed pictures. When super-resolution
+is active, downscaled version of the recon references and input pictures are
+used in this process to compute SAD and other distortion measures.
+
+In the prediction process, when super-resolution is active, MotionVectors and
+ScaleFactors are used to create downscaled prediction picture from upscaled
+recon ref pictures via convolve_2d_scale() function. Such design follows AV1
+spec “Motion vector scaling process”. Current downscaled input picture and
+downscaled prediction picture are used to create residual picture for transform
+and quantization process.
 
-After deblocking and CDEF, current downscaled reconstructed picture is upscaled for next restoration process.
+After deblocking and CDEF, current downscaled reconstructed picture is upscaled
+for next restoration process.
 
 After restoration, current upscaled reconstructed picture is added to the recon ref list.
 
 ### 2.2. Determination of the downscaling factor
-The downscaling factor is constrained to 8/9 ~ 8/16. Since the numerator of the factor is fixed to 8, only the denominator needs to be determined. Four modes are used to set the denominator namely Fixed, Random, QThreshold and Auto modes, respectively. The mode is set by the user. Auto mode has three search types: Auto-Solo, Auto-Dual and Auto-All and is set according to the encoder Preset. A brief description of how each of the above mentioned modes works is included below.
-
-* Fixed: Two denominator values can be set by the user, one for Key-frames and the other for non-key-frames. Downscaling can be applied to all pictures.
-* Random: The denominator is set randomly. Downscaling can be applied to all pictures. This mode is mainly used for debugging/verification purpose.
-* QThreshold: The use of super-resolution is decided by comparing the QP of the frame with a user-supplied QP threshold. Downscaling is applied to Key-frames and ARFs only.
-* Auto-Solo: It works similarly to QThreshold mode except that the QP threshold is fixed by the encoder.
-* Auto-Dual: Both downscaled (the denominator is determined by QP) and full size original input pictures are encoded. The output with the better rate-distortion cost is selected. Downscaling is applied to Key-frames and ARFs only.
-* Auto-All: Both downscaled with all possible denominator values (9~16) and full-size original input pictures are encoded. The output with best rate-distortion cost is selected. Downscaling is applied to Key-frames and ARFs only.
-
-The following sections explain how these different modes are implemented in the SVT-AV1 encoder. The high level dataflow of super-resolution is shown in Figure 3.
+The downscaling factor is constrained to 8/9 ~ 8/16. Since the numerator of the
+factor is fixed to 8, only the denominator needs to be determined. Four modes
+are used to set the denominator namely Fixed, Random, QThreshold and Auto
+modes, respectively. The mode is set by the user. Auto mode has three search
+types: Auto-Solo, Auto-Dual and Auto-All and is set according to the encoder
+Preset. A brief description of how each of the above mentioned modes works is
+included below.
+
+* Fixed: Two denominator values can be set by the user, one for Key-frames and the other
+  for non-key-frames. Downscaling can be applied to all pictures.
+* Random: The denominator is set randomly. Downscaling can be applied to all pictures.
+  This mode is mainly used for debugging/verification purpose.
+* QThreshold: The use of super-resolution is decided by comparing the QP of the frame
+  with a user-supplied QP threshold. Downscaling is applied to Key-frames and ARFs only.
+* Auto-Solo: It works similarly to QThreshold mode except that the QP threshold is fixed
+  by the encoder.
+* Auto-Dual: Both downscaled (the denominator is determined by QP) and full size original
+  input pictures are encoded. The output with the better rate-distortion cost is selected.
+  Downscaling is applied to Key-frames and ARFs only.
+* Auto-All: Both downscaled with all possible denominator values (9~16) and full-size
+  original input pictures are encoded. The output with best rate-distortion cost is selected.
+  Downscaling is applied to Key-frames and ARFs only.
+
+The following sections explain how these different modes are implemented in the
+SVT-AV1 encoder. The high level dataflow of super-resolution is shown in Figure
+3.
 ![superres_new_modes_dataflow](./img/superres_new_modes_dataflow.png)
 ##### Figure 3. High-level encoder process dataflow with super-resolution feature.
 
 #### 2.2.1. Fixed and Random mode
-Setting the denominator value and the downscaling of the input picture are performed in the Picture Decision process. The Picture Decision process posts three types of tasks to Motion Estimation process. They are first pass ME, TFME (temporal filter) and PAME respectively. Super-resolution is not considered if it is the first pass of two-pass encoding process. Super-resolution downscaling is performed after TFME and before PAME, i.e. TFME is applied to the full size picture and then downscaling is applied to TFME filtered picture. PAME is performed using downscaled pictures. As shown in Figure 2, PAME requires the input source pictures and the corresponding reference pictures to be of the same size, so the references (pa reference pictures) are also downscaled in advance.
+Setting the denominator value and the downscaling of the input picture are
+performed in the Picture Decision process. The Picture Decision process posts
+three types of tasks to Motion Estimation process. They are first pass ME, TFME
+(temporal filter) and PAME respectively. Super-resolution is not considered if
+it is the first pass of two-pass encoding process. Super-resolution downscaling
+is performed after TFME and before PAME, i.e. TFME is applied to the full size
+picture and then downscaling is applied to TFME filtered picture. PAME is
+performed using downscaled pictures. As shown in Figure 2, PAME requires the
+input source pictures and the corresponding reference pictures to be of the
+same size, so the references (pa reference pictures) are also downscaled in
+advance.
 
 #### 2.2.2. QThreshold and Auto-Solo mode
-Both modes require QP (or qindex) to determine denominator. In SVT-AV1 encoder, picture level QP is determined in the Rate Control process. So the denominator can be decided only if the picture level QP is determined in the Rate Control process on the original resolution. Because the resolution is changed by new denominator, PAME must be done again at new resolution. As illustrated in Figure 3, a PAME task is posted after Rate Control and dataflow goes back to the Motion Estimation process.
+Both modes require QP (or qindex) to determine denominator. In SVT-AV1 encoder,
+picture level QP is determined in the Rate Control process. So the denominator
+can be decided only if the picture level QP is determined in the Rate Control
+process on the original resolution. Because the resolution is changed by new
+denominator, PAME must be done again at new resolution. As illustrated in
+Figure 3, a PAME task is posted after Rate Control and dataflow goes back to
+the Motion Estimation process.
 
 #### 2.2.3. Auto-Dual and Auto-All mode
-The Auto-Dual and the Auto-All modes require going through the coding loop multiple times. The scaling factor denominator for each pass through the coding loop (including full resolution pass which is represented by special scaling factor denominator 8) is determined in the rate control process. Once all the passes through the coding loop are completed, the scaling factor denominator corresponding to the best rate-distortion cost is selected. The rate-distortion cost is derived based on the SSE and the rate corresponding to the coded picture. In SVT-AV1, the SSE is computed in the Restoration process, and the rate of the coded picture is computed in the Packetization process. So the rate-distortion cost is computed in the Packetization process. After the rate-distortion cost is acquired, a PAME task is posted to trigger the next coding loop as illustrated in Figure 3.
-
-When multiple coding loops are considered, feedback tasks such as RC feedback and reference list update tasks won’t be posted until the final pass through the coding loop is finished. The recon output is also delayed.
+The Auto-Dual and the Auto-All modes require going through the coding loop
+multiple times. The scaling factor denominator for each pass through the coding
+loop (including full resolution pass which is represented by special scaling
+factor denominator 8) is determined in the rate control process. Once all the
+passes through the coding loop are completed, the scaling factor denominator
+corresponding to the best rate-distortion cost is selected. The rate-distortion
+cost is derived based on the SSE and the rate corresponding to the coded
+picture. In SVT-AV1, the SSE is computed in the Restoration process, and the
+rate of the coded picture is computed in the Packetization process. So the
+rate-distortion cost is computed in the Packetization process. After the
+rate-distortion cost is acquired, a PAME task is posted to trigger the next
+coding loop as illustrated in Figure 3.
+
+When multiple coding loops are considered, feedback tasks such as RC feedback
+and reference list update tasks won’t be posted until the final pass through
+the coding loop is finished. The recon output is also delayed.
 
 ### 2.3. Other noticeable changes in code base
-In SVT-AV1, data structure pool is widely used. That means many data structures are used in recycled manner and when a data structure is acquired from pool, its member variables may contain outdated values. This is not a problem when all pictures are in the same size, but care should be taken when super-resolution is enabled. For example, variables related to size and geometry, like super block count, or extra buffers allocated for downscaled pictures need to be reset to default (full size) values and memory to be safely freed. The most noticeable data structures are parent PCS (PictureParentControlSet) and child PCS (PictureControlSet). They are acquired in Resource Coordination process and Picture Manager process respectively.
+In SVT-AV1, data structure pool is widely used. That means many data structures
+are used in recycled manner and when a data structure is acquired from pool,
+its member variables may contain outdated values. This is not a problem when
+all pictures are in the same size, but care should be taken when
+super-resolution is enabled. For example, variables related to size and
+geometry, like super block count, or extra buffers allocated for downscaled
+pictures need to be reset to default (full size) values and memory to be safely
+freed. The most noticeable data structures are parent PCS
+(PictureParentControlSet) and child PCS (PictureControlSet). They are acquired
+in Resource Coordination process and Picture Manager process respectively.
 
-When Auto-Dual or Auto-All mode is active, coding state needs to be reset before each coding loop (Similar to ‘do recode’ in Mode Decision process).
+When Auto-Dual or Auto-All mode is active, coding state needs to be reset
+before each coding loop (Similar to ‘do recode’ in Mode Decision process).
 
 ### 2.4. Super-resolution API
-Table 1 illustrates the usage of super-resolution functions. Only related processes are listed.
+Table 1 illustrates the usage of super-resolution functions. Only related
+processes are listed.
 ![superres_API](./img/superres_API.png)
 ##### Table 1. Super-resolution API
 
 ## 3. Optimization
-Super-resolution affects the coding speed: The current picture and its references are downscaled to the same size for motion estimation search. In current implementation, pa ref[8] array and recon ref[8] array are used to hold downscaled reference pictures, to avoid duplicate downscaling on the same reference picture for different input pictures.
-
-When Auto-Dual or Auto-All is selected, each picture is encoded multiple times with different denominator values. A rate-distortion cost is produced by each coding pass. If the last rate-distortion cost is the best, the encoded bitstream will be used directly. If not, the current picture must be encoded again with the denominator value of the best rate-distortion cost. Mostly, full size coding has better rate-distortion cost than the downscaling ones, so full size coding pass is arranged in the last. Because only key frame and ARF may enable downscaling, an extra coding pass is barely needed in practice. The other possible solution is to eliminate the extra coding pass by saving coding states, but it requires extra memory and it is a bit more complicated to implement (E.g. At least need an extra special PCS to save all coding state for the best rate-distortion cost).
-
-Super-resolution also has an impact on memory usage: Extra buffers are allocated to hold downscaled pictures, including current coding picture, PA references and reconstructed references as shown in Figure 4.
+Super-resolution affects the coding speed: The current picture and its
+references are downscaled to the same size for motion estimation search. In
+current implementation, pa ref[8] array and recon ref[8] array are used to hold
+downscaled reference pictures, to avoid duplicate downscaling on the same
+reference picture for different input pictures.
+
+When Auto-Dual or Auto-All is selected, each picture is encoded multiple times
+with different denominator values. A rate-distortion cost is produced by each
+coding pass. If the last rate-distortion cost is the best, the encoded
+bitstream will be used directly. If not, the current picture must be encoded
+again with the denominator value of the best rate-distortion cost. Mostly, full
+size coding has better rate-distortion cost than the downscaling ones, so full
+size coding pass is arranged in the last. Because only key frame and ARF may
+enable downscaling, an extra coding pass is barely needed in practice. The
+other possible solution is to eliminate the extra coding pass by saving coding
+states, but it requires extra memory and it is a bit more complicated to
+implement (E.g. At least need an extra special PCS to save all coding state for
+the best rate-distortion cost).
+
+Super-resolution also has an impact on memory usage: Extra buffers are
+allocated to hold downscaled pictures, including current coding picture, PA
+references and reconstructed references as shown in Figure 4.
 ![superres_downscaled_buffers](./img/superres_downscaled_buffers.png)
 ##### Figure 4. Buffers for downscaled pictures
 
-Whether to enable super-resolution or not is up to the user. If the Auto mode is selected, the encoder may decide the search type according to specified encoder preset.
+Whether to enable super-resolution or not is up to the user. If the Auto mode
+is selected, the encoder may decide the search type according to specified
+encoder preset.
 
 ## 4. Usage recommendation
-The Random mode is suitable for validation or testing only because it requires more memory (pa ref[8] and recon ref[8] are fully filled) and more CPU time (scaling the same reference on all different denominator values) but brings no benefit as compared to other modes.
-
-The Fixed mode with constant QP configuration can achieve less bandwidth requirement and acceptable quality.
-
-The Qthreshold or Auto mode with VBR configuration is expected to have better coding efficiency than other modes because for most natural videos and common bit rates, enabling super-resolution actually leads to a drop in compression efficiency. According to internal tests, coding gain is achieved only at very low bit rates with the Auto mode. With Qthreshold and Auto modes, super-resolution is only conducted on selected frames. The selection is based on the frame QP (Qthreshold mode) and rate-distortion cost (Auto mode).
+The Random mode is suitable for validation or testing only because it requires
+more memory (pa ref[8] and recon ref[8] are fully filled) and more CPU time
+(scaling the same reference on all different denominator values) but brings no
+benefit as compared to other modes.
+
+The Fixed mode with constant QP configuration can achieve less bandwidth
+requirement and acceptable quality.
+
+The Qthreshold or Auto mode with VBR configuration is expected to have better
+coding efficiency than other modes because for most natural videos and common
+bit rates, enabling super-resolution actually leads to a drop in compression
+efficiency. According to internal tests, coding gain is achieved only at very
+low bit rates with the Auto mode. With Qthreshold and Auto modes,
+super-resolution is only conducted on selected frames. The selection is based
+on the frame QP (Qthreshold mode) and rate-distortion cost (Auto mode).
 
 ## 5. Notes
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 ## 6. References
 [1] Jingning Han, Bohan Li, Debargha Mukherjee, Ching-Han Chiang, Adrian Grange, Cheng Chen, Hui Su, Sarah Parker, Sai Deng, Urvang Joshi, Yue Chen, Yunqing Wang, Paul Wilkins, Yaowu Xu, James Bankoski, “A Technical Overview of AV1”
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-Switch-Frame.md 1.2.0+dfsg-2/Docs/Appendix-Switch-Frame.md
--- 0.9.1+dfsg-1/Docs/Appendix-Switch-Frame.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-Switch-Frame.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,142 @@
+[Top level](../README.md)
+
+# Switch Frame Appendix
+## 1. Introduction [1]
+In HTTP ABR (adaptive bitrate) streaming, there will be multiple renditions
+generated at different bit rate after the encode of a source video to meet
+various network status. Each rendition is cut into multiple segments each
+starts with a key frame. As the switch of rendition can only happens at the
+position of a key frame, there will be a latency of quite a few seconds caused
+by the decode of current segment before reaching the next key frame. <br> To
+shorten the latency, a possible solution is to insert more key frames into
+renditions as to shorten the length of a segment. However, this will inevitably
+increase the bits as key frames call for more to encode. Or, an obvious drop of
+video quality will happen when the original bitrates is managed to be kept.
+![switch-at-key-frames-only](./img/switch-at-key-frames-only.png)
+
+
+To solve this issue, a new type of frame is introduced as Switch Frame(s-frame)
+in AV1. Unlike key frame, s-frame requires fewer bits to encode and is more
+affordable to be inserted into the segments. It can act like a key frame to
+further divide the segments to smaller pieces at a low cost. The spread of
+s-frames will shorten the latency to around 1 second or even 0.25 second (when
+set the mini GOP size of a 30fps video source to 8).
+![switch-at-switch-frames-or-key-frames](./img/switch-at-switch-frames-or-key-frames.png)
+
+## 2. Implementation
+According to the AV1 spec, the following requirements must be satisfied and so
+corresponding changes are made in SVT-AV1 encoder to support s-frame feature.
+
+### At the sequence level
+  - max_frame_width and max_frame_height (refer to the AV1 spec 5.5.1.) written in sequence header must be equal to or larger than the dimensions of the largest rendition to be used in the sequence.
+Two command line options are added to the encoder to allow user to set these two parameters, they are ‘--forced-max-frame-width’ and ‘--forced-max-frame-height’.
+  - frame_width_bits_minus_1 and frame_height_bits_minus_1 (refer to the AV1 spec 5.5.1.) must be equal across all renditions.
+SVT-AV1 encoder derives the values of these two parameters from max_frame_width and max_frame_height.
+  - All renditions have identical mini GOP size.
+In the high level (API), encoder will disable adaptive mini GOP size to ensure aligned mini GOP between all renditions when s-frame feature is on.
+  - All renditions have identical super block size.
+Super block size is fixed to 64 when s-frame feature is on.
+
+### At the frame level
+  - Decide which frames to be made into s-frames.<br>
+This is done in the Picture Decision process. For every sframe-dist frames
+(controlled by command line option --sframe-dist), the encoder will decide
+whether to make the frame into an s-frame. There are two modes to make the
+decision (controlled by command line option --sframe-mode):
+    * Mode 1. Strict mode<br>
+The considered frame will be made into an s-frame only if it is a base layer
+inter frame. This mode is supposed to be used for debugging purposes only since
+the s-frame interval could be much larger than expected, E.g. Setting s-frame
+interval to 30, while mini GOP size is set to 16.
+    * Mode 2. Nearest mode<br>
+The considered frame will be made into an s-frame if it is a base layer inter
+frame. If it’s not, the next base layer inter frame will be made into an
+s-frame. This is the default mode.
+
+  - Set s-frame’s refresh_frame_flags to ‘allFrames’. All frames after an s-frame don’t use any reference frame before the s-frame.<br>
+This is done in the Picture Decision process. After a frame is decided to be
+made into an s-frame, its dependent lists will be updated to ensure frames
+after it will not use reference frames before it.
+
+  - Set error resilient flag in s-frames.<br>
+This flag is set in the Picture Decision process.
+
+  - Set s-frame’s primary_ref_frame to PRIMARY_REF_NONE.<br>
+The parameter is set in the Picture Manager process.
+
+  - Disable reference frame MVs in s-frames.<br>
+This parameter is set in the Mode Decision Configuration process. When
+reference frame MVs are enabled globally, the encoder will set the feature off
+for s-frames.
+
+  - Send reference frame order hints with s-frames. Refer to the AV1 spec 5.9.2.<br>
+The hints are updated in the Picture Decision process and written to bitstream
+in the Entropy Coding process.
+
+## 3. Usage
+In the example below three different resolutions of a video sequence are
+encoded with the switch frame feature on. Keyframe interval is set to 150,
+s-frame interval is set to 32, prediction structure is set to low delay
+(--pred-struct 1), and mini GOP size is set to 8 (--hierarchical-levels 3).
+
+##### Generate a rendition in 1080p
+```./SvtAv1EncApp --preset 8 --sframe-dist 32 --sframe-mode 2 --pred-struct 1 --hierarchical-levels 3 --forced-max-frame-width 1920 --forced-max-frame-height 1080 -q 32 --rc 0 --keyint 150 -i dota2_1080p60_600f.y4m -b dota2_1080p60_600f.ivf```
+
+##### Generate a rendition in 720p
+```./SvtAv1EncApp --preset 8 --sframe-dist 32 --sframe-mode 2 --pred-struct 1 --hierarchical-levels 3 --forced-max-frame-width 1920 --forced-max-frame-height 1080 -q 32 --rc 0 --keyint 150 -i dota2_720p60_600f.y4m -b dota2_720p60_600f.ivf```
+
+##### Generate a rendition in 360p
+```./SvtAv1EncApp --preset 8 --sframe-dist 32 --sframe-mode 2 --pred-struct 1 --hierarchical-levels 3 --forced-max-frame-width 1920 --forced-max-frame-height 1080 -q 32 --rc 0 --keyint 150 -i dota2_360p60_600f.y4m -b dota2_360p60_600f.ivf```
+<br><br>
+There are a few parameters from the above commands that are important to make renditions ‘switchable’ -- an AV1-spec-compliant decoder can decode sub-segments starting with s-frames from multiple independently-encoded renditions:
+
+  - --sframe-dist<br>
+    S-frame interval. All renditions must have identical value.
+
+  - --sframe-mode<br>
+    S-frame insertion mode. All renditions must have identical value.
+
+  - --pred-struct<br>
+    Set prediction structure type. S-frame feature only supports low delay. All
+    renditions must have identical value to align s-frames reference frame
+    lists between renditions.
+
+  - --hierarchical-levels<br>
+    Set hierarchical levels. All renditions must have identical value to align
+    mini GOP size between renditions. Available values are 3, 4 and 5;
+    correspond to mini GOP size 8, 16 and 32 respectively.
+
+  - --forced-max-frame-width and --forced-max-frame-height<br>
+    Should be set to larger than or equal to the dimensions of the largest
+    rendition to be used in the sequence.
+
+  - --keyint<br>
+    Set keyframe interval. All renditions must have identical value to align keyframes.
+
+  - Switch between renditions<br>
+    According to the AV1 spec chapter “7.11.3.3. Motion vector scaling
+    process”, it is a requirement of bitstream conformance that all the
+    following conditions are satisfied:<br>
+    - 2 * FrameWidth >= RefUpscaledWidth[ refIdx ]<br>
+    - 2 * FrameHeight >= RefFrameHeight[ refIdx ]<br>
+    - FrameWidth <= 16 * RefUpscaledWidth[ refIdx ]<br>
+    - FrameHeight <= 16 * RefFrameHeight[ refIdx ]<br>
+    So when switching, dest_resolution / source_resolution must be between 1/2 and 16.
+
+##### Stitch renditions
+The above three renditions can be stitched together to simulate the ABR player
+behavior mentioned in the introduction section. This requires an ivf parser to
+extract frame type and stitch renditions at s-frames. There are players have
+already supported such stitched bitstream, such as mpv player, FFmpeg and
+Chrome, etc.
+
+## Notes
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
+
+## References
+[1] Tarek Amara (Twitch) - “S-Frame in AV1: Enabling better compression for low latency live streaming”. Presented in the Demuxed 2017.([Video link](https://www.youtube.com/watch?v=o5sJX6VA34o&list=PLkyaYNWEKcOfntbMd6KtHhF7qpL9hj6of&index=3&ab_channel=Demuxed))<br>
+[2] AV1 Bitstream & Decoding Process Specification v1.0.0
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-TPL.md 1.2.0+dfsg-2/Docs/Appendix-TPL.md
--- 0.9.1+dfsg-1/Docs/Appendix-TPL.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-TPL.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,40 +1,64 @@
+[Top level](../README.md)
+
 # Temporal Dependency Model
 
-## 1.  Description of the algorithm
+## 1. Description of the algorithm
 
 ### 1.1 Introduction
 
-The Temporal Dependency Model (TPL) algorithm represents an extension of the mb_tree algorithm from the x264 encoder. The main purpose of the algorithm is
-optimize the encoder settings to reduce the impact reference pictures have on the degradation in quality/rate in the pictures that reference them directly
-or indirectly. In the case of the algorithm presented in this document, the focus is on reducing the impact of base layer pictures on such degradations.
-The algorithm involves two main steps. In the first step, encoding is performed using an elementary encoder to collect prediction information.
-The second step involves using the collected prediction information to optimize the encoder settings that would be used in the final encoding of the input
-pictures. The affected encoder settings include QPS, QPM and the lambda parameter considered in the RD cost calculations.
+The Temporal Dependency Model (TPL) algorithm represents an extension of the
+mb_tree algorithm from the x264 encoder. The main purpose of the algorithm is
+optimize the encoder settings to reduce the impact reference pictures have on
+the degradation in quality/rate in the pictures that reference them directly or
+indirectly. In the case of the algorithm presented in this document, the focus
+is on reducing the impact of base layer pictures on such degradations. The
+algorithm involves two main steps. In the first step, encoding is performed
+using an elementary encoder to collect prediction information. The second step
+involves using the collected prediction information to optimize the encoder
+settings that would be used in the final encoding of the input pictures. The
+affected encoder settings include QPS, QPM and the lambda parameter considered
+in the RD cost calculations.
 
 ### 1.2 High Level Idea
 
 The following concepts are introduced to define the high level operation of the TPL algorithm:
 
-- Degradation measure: This measure would concern both the distortion and the rate variables. The degradations is based on the difference between the distortion and rate that result from considering source samples as reference samples  and similar quantities when considering reconstructed samples as reference samples. There is an underlying assumption in the development of the algorithm that the recon distortion and rate are worse than those based on source samples.
-
-- TPL group of pictures: This represents the largest set of pictures that would be considered in collecting prediction data in the first step of the algorithm. The actual set of pictures used in the TPL algorithm could, for optimization purposes, be a subset of TPL group of pictures.
-
-The main idea of the TPL algorithm is to backpropagate the prediction degradation information back to the base layer pictures.
-Therefore, the accumulation of the backpropagated degradation information is performed by considering the pictures in the TPL group in reverse of the decode
-order, i.e. the last decoded picture is processed first, then the next to last picture… It should be noted that the prediction for a given block typically
-involves samples from multiple contiguous blocks in the reference picture. Therefore, the degradation measure is backpropagated from a given block to the
-blocks in the reference picture that contribute corresponding prediction samples in proportion to the following:
+- Degradation measure: This measure would concern both the distortion and the rate variables.
+  The degradations is based on the difference between the distortion and rate
+  that result from considering source samples as reference samples and similar
+  quantities when considering reconstructed samples as reference samples. There
+  is an underlying assumption in the development of the algorithm that the
+  recon distortion and rate are worse than those based on source samples.
+
+- TPL group of pictures: This represents the largest set of pictures that would be considered
+  in collecting prediction data in the first step of the algorithm. The actual
+  set of pictures used in the TPL algorithm could, for optimization purposes,
+  be a subset of TPL group of pictures.
+
+The main idea of the TPL algorithm is to backpropagate the prediction
+degradation information back to the base layer pictures. Therefore, the
+accumulation of the backpropagated degradation information is performed by
+considering the pictures in the TPL group in reverse of the decode order, i.e.
+the last decoded picture is processed first, then the next to last picture… It
+should be noted that the prediction for a given block typically involves
+samples from multiple contiguous blocks in the reference picture. Therefore,
+the degradation measure is backpropagated from a given block to the blocks in
+the reference picture that contribute corresponding prediction samples in
+proportion to the following:
 
 - The ratio of the (recon-based distortion – source-based distortion)/(recon-based distortion)
 
 - The overlap area of the prediction block with each of the blocks contributing prediction samples in the reference picture.
 
-A given reference picture would collect such backpropagated information from all pictures that use it as a reference picture.
-The cycle is then repeated again when the same operations are considered for all pictures that act as references to the reference picture that was just
-considered above. The process continues until all the degradation information is accumulated in the base layer pictures.
-The accumulated degradation information is then used to adjust encoder parameters for the final encoding of the pictures.
+A given reference picture would collect such backpropagated information from
+all pictures that use it as a reference picture. The cycle is then repeated
+again when the same operations are considered for all pictures that act as
+references to the reference picture that was just considered above. The process
+continues until all the degradation information is accumulated in the base
+layer pictures. The accumulated degradation information is then used to adjust
+encoder parameters for the final encoding of the pictures.
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 ### 2.1 TPL inputs/outputs
 
@@ -69,9 +93,13 @@ The TPL feature data flow is summarized
 
 ### TPL Flow/Operations
 
-The TPL algorithm consists of three main components: A dispenser, a synthesizer and an optimizer. A high-level diagram of the algorithm is shown in the figure below. The functions of the three components are briefly summarized in the following.
+The TPL algorithm consists of three main components: A dispenser, a synthesizer
+and an optimizer. A high-level diagram of the algorithm is shown in the figure
+below. The functions of the three components are briefly summarized in the
+following.
 
-As illustrated in Figure 2 below, the TPL algorithm consists, for each base picture, of two picture-based loops:
+As illustrated in Figure 2 below, the TPL algorithm consists, for each base
+picture, of two picture-based loops:
 
 ![tpl_fig2](./img/tpl_fig2.png)
 
@@ -80,10 +108,14 @@ As illustrated in Figure 2 below, the TP
 
 ### TPL dispenser
 
-For a given TPL group of pictures, the function of the dispenser is to encode the pictures in the TPL group using a very simple encoder and collect
-prediction data. Pictures in the TPL group are divided into 16x16 blocks. The encoding of the blocks is performed using both source-based references and
-reconstructed references. The details of the operation of the dispenser are outlined in Appendix B. For each 16x16 block, the output of the dispenser
-consists of the following variables associated with the best prediction mode for the block, which could be either an intra or an inter mode:
+For a given TPL group of pictures, the function of the dispenser is to encode
+the pictures in the TPL group using a very simple encoder and collect
+prediction data. Pictures in the TPL group are divided into 16x16 blocks. The
+encoding of the blocks is performed using both source-based references and
+reconstructed references. The details of the operation of the dispenser are
+outlined in Appendix B. For each 16x16 block, the output of the dispenser
+consists of the following variables associated with the best prediction mode
+for the block, which could be either an intra or an inter mode:
 
 - srcrf_dist: Distortion based on the source samples.
 - recrf_dist: Distortion based on the reconstructed samples.
@@ -92,13 +124,14 @@ consists of the following variables asso
 - mv: Best motion vector.
 - ref_frame_poc: Picture number of the best reference picture
 
-The TPL Dispenser performs the processing on a 16x16 block or 32x32 block or 64x64 block basis.
-The resulting distortion and rate data listed above is then normalized (on a 4x4 block basis) and stored on the used block size basis.
+The TPL Dispenser performs the processing on a 16x16 block or 32x32 block or
+64x64 block basis. The resulting distortion and rate data listed above is then
+normalized (on a 4x4 block basis) and stored on the used block size basis.
 
 Implementation notes:
 
 - When the best prediction mode is an intra mode, then srcrf_dist ← recrf_dist. This would imply that (recrf_dist - srcrf_dist) = 0.
-  It follows that the intra coded blocks would not have any impact on the computation of mc_dep_dist  in the synthesizer. Similarly,
+  It follows that the intra coded blocks would not have any impact on the computation of mc_dep_dist in the synthesizer. Similarly,
   srcrf_rate ← recrf_rate, i.e. recrf_rate - srcrf_rate = 0, and the intra coded blocks would not to have any impact on the computation of mc_dep_rate
   in the synthesizer.
 
@@ -114,32 +147,49 @@ Implementation notes:
 
 ### TPL Synthesizer
 
-The main function of the synthesizer is to compute for each block in the TPL group of pictures two quantities:
-
-mc_dep_dist: For a given block, this quantity represents the total contribution of the block to the degradation in quality (i.e. distortion) in the blocks whose predictions are either directly or indirectly (through a chain or references) affected by the quality of the current block.
+The main function of the synthesizer is to compute for each block in the TPL
+group of pictures two quantities:
 
-mc_dep_rate: For a given block, this quantity represents the total contribution of the block to the increase in rate in the blocks that use the current block as a direct reference or indirect reference (through a chain or references).
-
-The computations are performed using the  prediction information provided by the dispenser. The main output of the synthesizer are the mc_dep_dist and mc_dep_rate quantities for the blocks in the base layer picture of interest in the TPL group.  . The processing in the Synthesizer is performed on a 16x16 block or 8x8 block basis.
+mc_dep_dist: For a given block, this quantity represents the total contribution
+of the block to the degradation in quality (i.e. distortion) in the blocks
+whose predictions are either directly or indirectly (through a chain or
+references) affected by the quality of the current block.
+
+mc_dep_rate: For a given block, this quantity represents the total contribution
+of the block to the increase in rate in the blocks that use the current block
+as a direct reference or indirect reference (through a chain or references).
+
+The computations are performed using the prediction information provided by the
+dispenser. The main output of the synthesizer are the mc_dep_dist and
+mc_dep_rate quantities for the blocks in the base layer picture of interest in
+the TPL group. The processing in the Synthesizer is performed on a 16x16 block
+or 8x8 block basis.
 
 ### TPL Optimizer
 
-The main function of the optimizer is to adjust if necessary the quality of the base layer picture of interest in the TPL group to minimize the impact that
-picture has on other pictures in the TPL group that use it either directly or indirectly as a reference. The TPL optimizer makes use of the mc_dep_dist,
-mc_dep_rate and dispenser prediction data for blocks in the base layer picture of interest in the TPL group to derive adjustment factors in a number of
-encoder setting for the base layer pictures. The adjustment factors are referred to as r0,
-beta and lambda factor and are associated mainly with QPS, QP modulation and Lambda adjustments, respectively.
+The main function of the optimizer is to adjust if necessary the quality of the
+base layer picture of interest in the TPL group to minimize the impact that
+picture has on other pictures in the TPL group that use it either directly or
+indirectly as a reference. The TPL optimizer makes use of the mc_dep_dist,
+mc_dep_rate and dispenser prediction data for blocks in the base layer picture
+of interest in the TPL group to derive adjustment factors in a number of
+encoder setting for the base layer pictures. The adjustment factors are
+referred to as r0, beta and lambda factor and are associated mainly with QPS,
+QP modulation and Lambda adjustments, respectively.
 
 ### Generation of r0, beta and tpl_rdmult_scaling_factors
 
-The TPL synthesizer generates for each block in each TPL group of pictures values for the distortion propagation variable (mc_dep_dist) and the rate
-propagation variable (mc_dep_rate). These two variables are then used to generate adjustment parameters for a number of encoder settings for base layer
-pictures, including QP scaling, QP modulation for the individual blocks and lambda parameter used in cost calculation.
+The TPL synthesizer generates for each block in each TPL group of pictures
+values for the distortion propagation variable (mc_dep_dist) and the rate
+propagation variable (mc_dep_rate). These two variables are then used to
+generate adjustment parameters for a number of encoder settings for base layer
+pictures, including QP scaling, QP modulation for the individual blocks and
+lambda parameter used in cost calculation.
 
 - r0: A frame-based parameter used to introduce adjustments in QPS. Define the following variables :
     - mc_dep_delta: Represents the RD cost associated with a given 16x16 or 8x8 block based on the corresponding distortion mc_dep_dist and corresponding rate mc_dep_rate.
     - intra_cost_base: Represents an accumulation over the whole frame of recrf_dist for all (16x16 or 8x8) blocks in the frame.
-    - mc_dep_cost_base:  Represents an accumulation over the whole frame of (recrf_dist + mc_dep_delta) for all (16x16 or 8x8)  blocks in the frame.
+    - mc_dep_cost_base: Represents an accumulation over the whole frame of (recrf_dist + mc_dep_delta) for all (16x16 or 8x8) blocks in the frame.
 
 The r0 value for the base layer picture is given by:
 ```
@@ -149,11 +199,15 @@ or
 
 ![tpl_math1](./img/tpl_math1.png)
 
-Based on the definitions above, r0 takes value between 0 and 1. A large r0 value implies ![tpl_math_sum](./img/tpl_math_sum.png) is small and that the base
-layer picture is of good quality and is not contributing much to the degradation in quality in the pictures they use it as a reference. Consequently,
-such picture may not need any adjustment in QP. On the other hand, small r0 values imply ![tpl_math_sum](./img/tpl_math_sum.png) is large and that the base
-layer picture is contributing significantly to the degradation in quality in the pictures that use it directly or indirectly as a reference.
-Consequently, such picture may need to have its QP reduced.
+Based on the definitions above, r0 takes value between 0 and 1. A large r0
+value implies ![tpl_math_sum](./img/tpl_math_sum.png) is small and that the
+base layer picture is of good quality and is not contributing much to the
+degradation in quality in the pictures they use it as a reference.
+Consequently, such picture may not need any adjustment in QP. On the other
+hand, small r0 values imply ![tpl_math_sum](./img/tpl_math_sum.png) is large
+and that the base layer picture is contributing significantly to the
+degradation in quality in the pictures that use it directly or indirectly as a
+reference. Consequently, such picture may need to have its QP reduced.
 
 - beta: The beta parameter is used to adjust the QP per superblock. For the SB of interest in the base layer picture, beta is determined by performing the
   same computations performed above for r0 but restricted only to the SB of interest. An SB-based parameter rk is then computed as follows:
@@ -164,10 +218,13 @@ Consequently, such picture may need to h
   ```
   beta = r0/rk
   ```
-  Beta is a measure of how much better or worse rk is as compared to r0. For beta >> 1, rk is much smaller as compared to r0, implying that the corresponding
-  SB is of low quality as compared to the average quality of the frame, and would need a significant QP adjustment. For beta <<1, rk is much larger than r0,
-  implying that the corresponding SB is of good quality as compared to the average quality of the picture and may not need much in terms of QP adjustment
-  or could have its QP increased.
+  Beta is a measure of how much better or worse rk is as compared to r0. For
+  beta >> 1, rk is much smaller as compared to r0, implying that the
+  corresponding SB is of low quality as compared to the average quality of the
+  frame, and would need a significant QP adjustment. For beta <<1, rk is much
+  larger than r0, implying that the corresponding SB is of good quality as
+  compared to the average quality of the picture and may not need much in terms
+  of QP adjustment or could have its QP increased.
 
 - tpl_rdmult_scaling_factors: The tpl_rdmult_scaling_factors is computed on a 16x16 block basis, regardless of the picture resolution. For each 16x16 block,
   calculations similar to those performed for beta are performed for the 16x16 block. The corresponding tpl_rdmult_scaling_factors is given by:
@@ -181,13 +238,19 @@ Consequently, such picture may need to h
 
 ### QP-Scaling Algorithm
 
-Ordinary QP scaling is used to improve the performance of a hierarchical prediction structure where lower quantization parameters (QP) are assigned to frames
-in the lower temporal layers, which serve as reference pictures for the higher temporal layer pictures. In the TPL algorithm, the propagation factor r0 is
-used to improve the base layer picture QP assignment.  The main idea is that the lower r0 is the more improvements the picture would need.
-
-The picture qindex in CRF mode is computed (in the cqp_qindex_calc_tpl_la() function) following different methods depending on the picture type, namely Intra,
-BASE, REF-NON-BASE and NON-REF. A summary of the QPS adjustment ideas is presented below.. In the following, qindex is 4xQP for most of the QP values and
-represents the quantization parameter the encoder works with internally instead of QP. The later is just an input parameter.
+Ordinary QP scaling is used to improve the performance of a hierarchical
+prediction structure where lower quantization parameters (QP) are assigned to
+frames in the lower temporal layers, which serve as reference pictures for the
+higher temporal layer pictures. In the TPL algorithm, the propagation factor r0
+is used to improve the base layer picture QP assignment. The main idea is that
+the lower r0 is the more improvements the picture would need.
+
+The picture qindex in CRF mode is computed (in the cqp_qindex_calc_tpl_la()
+function) following different methods depending on the picture type, namely
+Intra, BASE, REF-NON-BASE and NON-REF. A summary of the QPS adjustment ideas is
+presented below.. In the following, qindex is 4xQP for most of the QP values
+and represents the quantization parameter the encoder works with internally
+instead of QP. The later is just an input parameter.
 
 - Intra pictures: The qindex for both Intra Key Frames (IDR) and non-Key frames (CRA) is generated using similar approaches with slightly different tuning.
   A lower qindex is assigned to the pictures with small r0 values. The main idea behind the adjustment of the qindex for a given picture is as follows:
@@ -219,12 +282,15 @@ represents the quantization parameter th
 
 ### SB-based QP-Modulation Algorithm
 
-In TPL, the parameter beta plays the same role at the SB level as that of r0 at the picture level. Therefore, a large beta for a given SB implies that quality of that SB should be improved.
-For each SB, the main idea in QP modulation is that a new QP value is determined based on the corresponding beta value using the following equation:
+In TPL, the parameter beta plays the same role at the SB level as that of r0 at
+the picture level. Therefore, a large beta for a given SB implies that quality
+of that SB should be improved. For each SB, the main idea in QP modulation is
+that a new QP value is determined based on the corresponding beta value using
+the following equation:
 
 ![tpl_math2](./img/tpl_math2.png)
 
-where f = sqrt(.) for intra_picture or when beta < 1, and  f=sqrt(sqrt(.)) otherwise. The idea then behind the TPL QP modulation is as follows:
+where f = sqrt(.) for intra_picture or when beta < 1, and f=sqrt(sqrt(.)) otherwise. The idea then behind the TPL QP modulation is as follows:
 
 - If beta > 1 → rk<r0 → SB does not have a good quality as compared to average picture quality → Reduce QP for the SB, e.g. QP’=QP/sqrt(beta) or QP’=QP/sqrt(sqrt(beta)). Since beta > 1, QP’<QP.
 
@@ -259,11 +325,12 @@ i.e. better quality for the intra pictur
     ```
     where pic_full_lambda is the original lambda value based on the picture qindex.
 
-## 3.  Optimization of the algorithm
+## 3. Optimization of the algorithm
 
-Different quality-complexity trade offs of the TPL algorithm can be achieved by considering different settings for the flag tpl_level.
-The latter controls a set of parameters that are grouped under set_tpl_extended_controls () function.
-Table 2 describes the functionality of each of the  TPL control parameters.
+Different quality-complexity trade offs of the TPL algorithm can be achieved by
+considering different settings for the flag tpl_level. The latter controls a
+set of parameters that are grouped under set_tpl_extended_controls () function.
+Table 2 describes the functionality of each of the TPL control parameters.
 
 #### Table 2: Control and optimization flags associated with TPL.
 
@@ -291,15 +358,21 @@ Table 2 describes the functionality of e
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.1.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
 
 ## Appendix A: TPL Group
 
-The TPL group is a collection of N pictures (stored in decode order) that limits the domain of analysis and application of the TPL algorithm.
+The TPL group is a collection of N pictures (stored in decode order) that
+limits the domain of analysis and application of the TPL algorithm.
 
-The composition of the TPL group depends on the base layer picture of interest. To illustrate the construction of the TPL group,
-the case of a three-layer prediction structure shown below is considered.
+The composition of the TPL group depends on the base layer picture of interest.
+To illustrate the construction of the TPL group, the case of a three-layer
+prediction structure shown below is considered.
 
 ![tpl_fig3](./img/tpl_fig3.png)
 
@@ -309,10 +382,10 @@ the case of a three-layer prediction str
     - Example: IDR picture 0 → TPL group: 0,4,2,1,3.
   - B picture:
     - TPL group: B picture plus preceding pictures in the mini-GoP.
-    - Example: B picture 8 → TPL group:  8,6,5,7.
+    - Example: B picture 8 → TPL group: 8,6,5,7.
   - CRA picture aligned on the mini-GoP:
     - TPL group: Same as in the case of B picture.
-    - Example: CRA picture 24 → TPL group:  24,22,21,23
+    - Example: CRA picture 24 → TPL group: 24,22,21,23
   - CRA not aligned with mini-GoP:
     - TPL group: Same as in the case of an IDR (delayed intra) picture.
 
@@ -322,10 +395,10 @@ the case of a three-layer prediction str
     - Example: mg-lad = 1, IDR picture 0 → TPL group: 0,4,2,1,3,8,6,5,7.
   - B picture:
     - TPL group: B picture plus n min-GoPs, including the next n base-layer pictures.
-    - Example: mg-lad = 1, B picture 8 → TPL group:  8,6,5,7,12,10,9,11.
+    - Example: mg-lad = 1, B picture 8 → TPL group: 8,6,5,7,12,10,9,11.
   - CRA picture aligned on the mini-GoP:
     - TPL group: Same as in the case of B picture.
-    - Example: mg-lad = 1, CRA picture 24 → TPL group:  24,22,21,23,28,26, 25,27
+    - Example: mg-lad = 1, CRA picture 24 → TPL group: 24,22,21,23,28,26, 25,27
   - CRA not aligned with mini-GoP:
     - TPL group: Same as in the case of an IDR (delayed intra) picture.
 
@@ -334,7 +407,7 @@ the case of a three-layer prediction str
 
 For a given TPL group of pictures, the dispenser operates as follows:
 
-- For  each picture in the TPL group (from picture 0 to picture N-1 considered in decode order):
+- For each picture in the TPL group (from picture 0 to picture N-1 considered in decode order):
   - For each 64x64 super-block in the picture
     - For each 16x16 block in the super-block
       1. Source based operations
@@ -396,17 +469,23 @@ For a given TPL group of pictures, the d
 
 ## Appendix C: Example of Synthesizer Operations
 
-To illustrate the operations of the Synthesizer, consider the case of a three layer prediction structure and assume lad_mg = 0 and the TPL group size is 4.
-As an example, assume picture 8 is being processed by the TPL algorithm and that the Dispenser has completed its operations for the pictures prior to and
-including picture 8. The corresponding TPL group consists of pictures 8, 6, 5 and 7, listed in decode order.  At the Synthesizer, the pictures are processed
+To illustrate the operations of the Synthesizer, consider the case of a three
+layer prediction structure and assume lad_mg = 0 and the TPL group size is 4.
+As an example, assume picture 8 is being processed by the TPL algorithm and
+that the Dispenser has completed its operations for the pictures prior to and
+including picture 8. The corresponding TPL group consists of pictures 8, 6, 5
+and 7, listed in decode order. At the Synthesizer, the pictures are processed
 in reverse decode order: 7,5,6,8.
 
 ![tpl_fig4](./img/tpl_fig4.png)
 
-To illustrate the Synthesizer operations, it is assumed the picture size is 64x64. The TPL group associated with picture 8 is show below, where each picture is
-split into 16x16 blocks (in the code the block size would be set to 8x8 since the considered resolution <720p. the 16x16 block size is used here for
-illustration purposes). The blocks are indexed in raster scan order from 0 to 15 in each picture. A block is identified using the pair (i,j),
-where i is the picture number and j is the block index in picture i.
+To illustrate the Synthesizer operations, it is assumed the picture size is
+64x64. The TPL group associated with picture 8 is show below, where each
+picture is split into 16x16 blocks (in the code the block size would be set to
+8x8 since the considered resolution <720p. the 16x16 block size is used here
+for illustration purposes). The blocks are indexed in raster scan order from 0
+to 15 in each picture. A block is identified using the pair (i,j), where i is
+the picture number and j is the block index in picture i.
 
 ![tpl_fig5](./img/tpl_fig5.png)
 
@@ -418,31 +497,47 @@ The list of references to be considered
 | 5 | 6 |
 | 6 | 8 |
 
-The operations of the synthesizer relies on distortion and rate related quantities. Let Bc denote the index for the current block in the current picture Pc for which
-predictions samples are obtained from block Br in reference picture Pr. The three distortion related quantities needed in the Synthesizer are:
-
-- Distortion ratio DistRatio: For a given 16x16/8x8 block to be encoded, let  recrf_dist and srcrf_dist denote the recon-based and source-based prediction distortions, respectively. Then
+The operations of the synthesizer relies on distortion and rate related
+quantities. Let Bc denote the index for the current block in the current
+picture Pc for which predictions samples are obtained from block Br in
+reference picture Pr. The three distortion related quantities needed in the
+Synthesizer are:
+
+- Distortion ratio DistRatio: For a given 16x16/8x8 block to be encoded,
+  let recrf_dist and srcrf_dist denote the recon-based and source-based
+  prediction distortions, respectively. Then
   ```
   DistRatio(Pc,Bc) = (recrf_dist(Pc,Bc) - srcrf_dist(Pc,Bc)) / recrf_dist(Pc,Bc)
   ```
-  srcrf_dist could be assumed to be the smallest distortion we could have. Therefore (recrf_dist - srcrf_dist) is a measure how much worse the recon-based prediction is as compared to
-  the source based prediction. When DistRatio is close to zero (i.e. recrf_dist is very close to srcrf_dist), the recon based prediction is considered to be very accurate and would not
-  need to be improved. On the other hand, when DistRatio is close to 1, it implies the recon based prediction is very inaccurate and should be improved.
-
-  Note: For intra coded blocks, srcrf_dist was set at the Dispenser to be equal to recrf_dist. It follows that recrf_dist - srcrf_dist = 0 and DistRatio
-  for intra coded blocks is zero.  Similarly, for inter coded blocks where recrf_dist < srcrf_dist in the dispenser, srcrf_dist was set in the Dispenser to
-  be equal to recrf_dist resulting in DistRatio = 0.
-
-- Area ratio AreaRatio: For a given 16x16/8x8 block to be encoded, the corresponding prediction in the reference picture could involve samples from up to
-  four contiguous blocks, as in the case of the blue block in the picture above. Each of the up to four blocks in the reference picture contributing
+  srcrf_dist could be assumed to be the smallest distortion we could have.
+  Therefore (recrf_dist - srcrf_dist) is a measure how much worse the
+  recon-based prediction is as compared to the source based prediction. When
+  DistRatio is close to zero (i.e. recrf_dist is very close to srcrf_dist), the
+  recon based prediction is considered to be very accurate and would not need
+  to be improved. On the other hand, when DistRatio is close to 1, it implies
+  the recon based prediction is very inaccurate and should be improved.
+
+  Note: For intra coded blocks, srcrf_dist was set at the Dispenser to be equal
+  to recrf_dist. It follows that recrf_dist - srcrf_dist = 0 and DistRatio for
+  intra coded blocks is zero. Similarly, for inter coded blocks where
+  recrf_dist < srcrf_dist in the dispenser, srcrf_dist was set in the Dispenser
+  to be equal to recrf_dist resulting in DistRatio = 0.
+
+- Area ratio AreaRatio: For a given 16x16/8x8 block to be encoded, the
+  corresponding prediction in the reference picture could involve samples from
+  up to four contiguous blocks, as in the case of the blue block in the picture
+  above. Each of the up to four blocks in the reference picture contributing
   prediction samples has associated with it an area ratio defined as
   ```
   AreaRatio = overlap_area / number_of_samples_in_block
   ```
-  where the overlap_area is the overlap between the block in the reference picture and the prediction block.
+  where the overlap_area is the overlap between the block in the reference
+  picture and the prediction block.
 
-- Motion-compensation-dependent Distortion mc_dep_dist: For each 16x16/8x8 block in any given reference picture, mc_dep_dist is a measure of the quality of
-  the predictions generated based on that block. The mc_dep_dist variable for a given block Br in a reference picture Pr is computed based on:
+- Motion-compensation-dependent Distortion mc_dep_dist: For each 16x16/8x8
+  block in any given reference picture, mc_dep_dist is a measure of the quality of
+  the predictions generated based on that block. The mc_dep_dist variable for a
+  given block Br in a reference picture Pr is computed based on:
   - mc_dep_dist(Pc,Bc) for the block Bc that uses prediction samples from the block Br in the reference picture Pr. mc_dep_dist(Pc,Bc) is set to zero for all blocks in non-reference pictures.
   - The difference (recrf_dist(Pc,Bc) - srcrf_dist(Pc,Bc)) for the block Bc that uses prediction samples from the block Br in the reference picture Pr.
 
@@ -454,9 +549,12 @@ predictions samples are obtained from bl
                                                mc_dep_dist(Pc,Bc) x DistRatio(Pc,Bc)) x AreaRatio(Pr,Br)
                                           += (recrf_dist(Pc,Bc) + mc_dep_dist(Pc,Bc)) x DistRatio(Pc,Bc) x AreaRatio(Pr,Br)
   ```
-  In a given TPL group, we are ultimately interested in the impact each of the 16x16/8x8 blocks in the base layer picture has on the quality of predictions
-  for the other pictures in the TPL group, whether the base layer picture is used as a direct reference or as indirect reference. Therefore,
-  the backpropagation of mc_dep_dist is performed for all pictures in the TPL group, starting from the first picture in reverse decode order, to the base
+  In a given TPL group, we are ultimately interested in the impact each of the
+  16x16/8x8 blocks in the base layer picture has on the quality of predictions
+  for the other pictures in the TPL group, whether the base layer picture is
+  used as a direct reference or as indirect reference. Therefore, the
+  backpropagation of mc_dep_dist is performed for all pictures in the TPL
+  group, starting from the first picture in reverse decode order, to the base
   layer picture.
 
   The rate-related quantities are:
@@ -483,9 +581,13 @@ predictions samples are obtained from bl
 
     The backpropagation of mc_dep_rate is performed for all pictures in the TPL group, starting with the first picture in reverse decode order, to the base layer picture.
 
-The Synthesizer processes the pictures in the TPL group in reverse of the decode order, so that the impact of the base layer picture on the other pictures
-in the TPL group is evaluated through the chain of references starting with the last picture to be decoded, the next to last picture to be decoded and so on.
-To illustrate this process for the example presented above, consider the reverse of the decode order of the pictures in the TPL group, which is 7, 5, 6 and 8.
+The Synthesizer processes the pictures in the TPL group in reverse of the
+decode order, so that the impact of the base layer picture on the other
+pictures in the TPL group is evaluated through the chain of references starting
+with the last picture to be decoded, the next to last picture to be decoded and
+so on. To illustrate this process for the example presented above, consider the
+reverse of the decode order of the pictures in the TPL group, which is 7, 5, 6
+and 8.
 
 - Start with picture 7.
   - For block (7,0), assume picture 8 instead of picture 6 is the reference picture, since only unipred candidates are considered:
@@ -493,27 +595,32 @@ To illustrate this process for the examp
     - mc_dep_dist(8,i) += (recrf_dist(7,0) + mc_dep_dist(7,0))* DistRatio(7,0)* AreaRatio(8,i), i=0, 1, 4, 5. In this case the blue block in the Figure above is assumed to represent the prediction block for block (7,0). The prediction block overlaps with blocks (8,0), (8,1), (8,4) and (8,5). It should be noted that mc_dep_dist(7,0) is set to zero since picture 7 is a non-reference picture.
     - delta_rate(7,0) = recrf_rate(7,0) - srcrf_rate(7,0)
     - mc_dep_rate(7,0) = delta_rate_cost(mc_dep_rate(7,0), recrf_dist(7,0), srcrf_dist(7,0), number_of_samples_in_16x16_block)
-    - mc_dep_rate(8,i) += (delta_rate(7,0) + mc_dep_rate(7,0)) * AreaRatio(8,i) , i=0, 1, 4, 5.
+    - mc_dep_rate(8,i) += (delta_rate(7,0) + mc_dep_rate(7,0)) * AreaRatio(8,i), i=0, 1, 4, 5.
   - Repeat similar operations for blocks (7,i), i=1,…, 15. Depending on the block, the reference picture could be either picture 8 or picture 6.
 - Repeat the above with picture 5 and consider reference picture 6
 - Repeat the above with picture 6 and consider reference picture 8
 
-At the end of the process, each of the sixteen 16x16 blocks in base layer picture 8 will have associated with it an mc_dep_dist value and an mc_dep_rate
-value. A high value of mc_dep_dist indicates the block is not contributing high quality predictions for the pictures that reference it directly or indirectly
-and that the quality of the block should be improved. The opposite is true when mc_dep_dist is small. The same applies to mc_dep_rate.
+At the end of the process, each of the sixteen 16x16 blocks in base layer
+picture 8 will have associated with it an mc_dep_dist value and an mc_dep_rate
+value. A high value of mc_dep_dist indicates the block is not contributing high
+quality predictions for the pictures that reference it directly or indirectly
+and that the quality of the block should be improved. The opposite is true when
+mc_dep_dist is small. The same applies to mc_dep_rate.
 
 ## Appendix D: QP scaling based on r0
 
 ### 1. Case of Intra Pictures
 
-Main idea: The smaller the r0 value, the more improvements the picture would need. The kf_boost variable is inversely proportional to r0 and is used to
-indicate the level of the required improvement in the picture. The larger the kf_boost parameter is, the smaller the resulting qindex for the picture
-would be.
+Main idea: The smaller the r0 value, the more improvements the picture would
+need. The kf_boost variable is inversely proportional to r0 and is used to
+indicate the level of the required improvement in the picture. The larger the
+kf_boost parameter is, the smaller the resulting qindex for the picture would
+be.
 
 - factor:
 
   For KEY_FRAME AND (intra_period_length = -1 OR intra_period_length > 64)
-  If r0 < 0.2 set factor  = 255/qindex, else factor = 1.
+  If r0 < 0.2 set factor = 255/qindex, else factor = 1.
 
 - r0:
   ```
@@ -522,7 +629,7 @@ would be.
   When factor is set to 255/qindex, r0 becomes smaller, implying a larger kf_boost as indicated below.
 
   Further adjustments in r0 are introduced to account for the prediction structure by dividing r0 by tpl_hl_islice_div_factor in the case of an I_SLICE,
-  or by tpl_hl_base_frame_div_factor in the case of a base layer picture, where tpl_hl_islice_div_factor  and tpl_hl_base_frame_div_factor are given in the
+  or by tpl_hl_base_frame_div_factor in the case of a base layer picture, where tpl_hl_islice_div_factor and tpl_hl_base_frame_div_factor are given in the
   table below.
 
   |**Hierarchical level**|**0**|**1**|**2**|**3**|**4**|**5**|
@@ -559,7 +666,7 @@ would be.
     ```
     adjustment_ratio = ((kf_boost_high – kf_boost)/ (kf_boost_high – kf_boost_low))
     ```
-    where kf_boost_high = 5000;  kf_boost_low = 400.
+    where kf_boost_high = 5000; kf_boost_low = 400.
 
   - Adjusted qindex:
     ```
@@ -577,7 +684,7 @@ would be.
 
 *r0, gfu_boost and arf_boost_factor*
 
-- Adjust r0: If base layer picture  AND (lad_mg > 0)  AND (r0_adjust_factor > 0) ( r0_adjust_factor depends on tpl_level, and if hierarchical_level < 4,
+- Adjust r0: If base layer picture AND (lad_mg > 0) AND (r0_adjust_factor > 0) ( r0_adjust_factor depends on tpl_level, and if hierarchical_level < 4,
   it is set to 0.1), then
 
   ```
@@ -611,7 +718,7 @@ arfgf_high_motion_minq and arfgf_low_mot
   adjustment_ratio = ((gf_high_tpl_la – gfu_boost)/ (gf_high_tpl_la – gf_low_tpl_la))
   ```
 
-  where gf_high_tpl_la = 2400;  gf_low_tpl_la = 300.
+  where gf_high_tpl_la = 2400; gf_low_tpl_la = 300.
 
 - Initial adjusted qindex:
 
@@ -627,10 +734,14 @@ arfgf_high_motion_minq and arfgf_low_mot
 
 *Case of non-base-layer reference pictures*
 
-- Main idea: Assign a reference qindex to the current picture, then adjust the assigned qindex value closer to the original qindex of the picture depending
-  on the temporal layer difference between the current picture and it references. The farther the references are from the current picture in terms of temporal
-  layer distance, the smaller the difference between the adjusted qindex value and the input qindex value for the picture,
-  i.e. the adjusted value is moved farther away from the reference qindex value and closer to the input qindex value.
+- Main idea: Assign a reference qindex to the current picture, then adjust the
+- assigned qindex value closer to the original qindex of the picture depending
+  on the temporal layer difference between the current picture and it
+  references. The farther the references are from the current picture in terms
+  of temporal layer distance, the smaller the difference between the adjusted
+  qindex value and the input qindex value for the picture, i.e. the adjusted
+  value is moved farther away from the reference qindex value and closer to the
+  input qindex value.
 
 - Define:
   ```
@@ -655,9 +766,12 @@ arfgf_high_motion_minq and arfgf_low_mot
 
 *Adjust active_worst_quality*
 
-- Main idea: Applies only to base layer pictures and reference non-base-layer pictures. From the code “For alt_ref and GF frames (including internal arf
-  frames) adjust the worst allowed quality as well. This ensures that even on hard sections we do not clamp the Q at the same value for arf frames and leaf
-  (non arf) frames. This is important to the TPL model which assumes Q drops with each arf level.”
+- Main idea: Applies only to base layer pictures and reference non-base-layer
+  pictures. From the code “For alt_ref and GF frames (including internal arf
+  frames) adjust the worst allowed quality as well. This ensures that even on
+  hard sections we do not clamp the Q at the same value for arf frames and leaf
+  (non arf) frames. This is important to the TPL model which assumes Q drops
+  with each arf level.”
 
   ```
   active_worst_quality = (active_best_quality + (3 * active_worst_quality) + 2) / 4
@@ -668,7 +782,7 @@ arfgf_high_motion_minq and arfgf_low_mot
 - The qindex for the picture remains unchanged.
 
 
-### 3.  Final active_best_quality and active_worst_quality, qindex
+### 3. Final active_best_quality and active_worst_quality, qindex
 
 - Clamp active_best_quality to between 0 and 255.
 - Clamp active_worst_quality to between active_best_quality and 255.
@@ -680,5 +794,9 @@ qindex = active_best_quality
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
 
diff -pruN 0.9.1+dfsg-1/Docs/Appendix-TX-Search.md 1.2.0+dfsg-2/Docs/Appendix-TX-Search.md
--- 0.9.1+dfsg-1/Docs/Appendix-TX-Search.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Appendix-TX-Search.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Transform Type and Transform Size Search
 
 ## Transform Search
@@ -19,22 +21,22 @@ transform selection are available as sho
 
 | **Transform Type**     | **Vertical** | **Horizontal** |
 | ---------------------- | ------------ | -------------- |
-| DCT\_DCT           | DCT      | DCT        |
-| ADST\_DCT          | ADST     | DCT        |
-| DCT\_ADST          | DCT      | ADST       |
-| ADST\_ADST         | ADST     | ADST       |
-| FLIPADST\_DCT      | FLIPADST | DCT        |
-| DCT\_FLIPADST      | DCT      | FLIPADST   |
-| FLIPADST\_FLIPADST | FLIPADST | FLIPADST   |
-| ADST\_FLIPADST     | ADST     | FLIPADST   |
-| FLIPADST\_ADST     | FLIPADST | ADST       |
-| IDTX               | IDTX     | IDTX       |
-| V\_DCT             | DCT      | IDTX       |
-| H\_DCT             | IDTX     | DCT        |
-| V\_ADST            | ADST     | IDTX       |
-| H\_ADST            | IDTX     | ADST       |
-| V\_FLIPADST        | FLIPADST | IDTX       |
-| H\_FLIPADST        | IDTX     | FLIPADST   |
+| DCT\_DCT               | DCT          | DCT            |
+| ADST\_DCT              | ADST         | DCT            |
+| DCT\_ADST              | DCT          | ADST           |
+| ADST\_ADST             | ADST         | ADST           |
+| FLIPADST\_DCT          | FLIPADST     | DCT            |
+| DCT\_FLIPADST          | DCT          | FLIPADST       |
+| FLIPADST\_FLIPADST     | FLIPADST     | FLIPADST       |
+| ADST\_FLIPADST         | ADST         | FLIPADST       |
+| FLIPADST\_ADST         | FLIPADST     | ADST           |
+| IDTX                   | IDTX         | IDTX           |
+| V\_DCT                 | DCT          | IDTX           |
+| H\_DCT                 | IDTX         | DCT            |
+| V\_ADST                | ADST         | IDTX           |
+| H\_ADST                | IDTX         | ADST           |
+| V\_FLIPADST            | FLIPADST     | IDTX           |
+| H\_FLIPADST            | IDTX         | FLIPADST       |
 
 For best performance, all the applicable transform options would be
 evaluated for a given candidate prediction and the transform option that
@@ -51,7 +53,7 @@ considered in the following are listed b
 
 <!-- end list -->
 
-### 2.  Implementation of the algorithm
+### 2. Implementation of the algorithm
 
 **Inputs**: Prediction candidate.
 
@@ -67,33 +69,39 @@ considered in the following are listed b
 
 #### Details of the implementation
 
-TX type search happens in the function tx_type_search().  The luma transform for each TX type (or a subset of TX types – see optimization section) is tested
-and the associated cost is computed.  The TX type with the lowest cost is selected.
-
-For the looping, the transform types (see Table 1) are split into groups based on how likely each transform is to be selected (on average).  This allows for
-only certain groups of transforms to be tested (see optimization section).  All transforms in Group 0 will be tested, then all transforms in Group 1 will be
-tested, then all transforms in Group 2 will be tested, and so on. The groups area as follows:
+TX type search happens in the function tx_type_search(). The luma transform for
+each TX type (or a subset of TX types – see optimization section) is tested and
+the associated cost is computed. The TX type with the lowest cost is selected.
+
+For the looping, the transform types (see Table 1) are split into groups based
+on how likely each transform is to be selected (on average). This allows for
+only certain groups of transforms to be tested (see optimization section). All
+transforms in Group 0 will be tested, then all transforms in Group 1 will be
+tested, then all transforms in Group 2 will be tested, and so on. The groups
+area as follows:
 
 ##### Table 3. Groupings of transform options.
 
-| **Group**     | **TX Types in Non-SC Encoder** | **TX Types in the SC Encoder** |
-| ---------------------- | ------------ | -------------- |
-| 0          | DCT\_DCT      | DCT\_DCT, IDTX       |
-| 1          | V_DCT, H_DCT      | V_DCT, H_DCT      |
-| 2          | ADST_ADST      | ADST_ADST       |
-| 3          | ADST_DCT, DCT_ADST      | ADST_DCT, DCT_ADST       |
-| 4          | FLIPADST_FLIPADST, IDTX      | FLIPADST_FLIPADST       |
-| 5          | FLIPADST_DCT, DCT_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST      | FLIPADST_DCT, DCT_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST       |
+| **Group**              | **TX Types in Non-SC Encoder**                                                                   | **TX Types in the SC Encoder**                                                                   |
+| ---------------------- | ------------                                                                                     | --------------                                                                                   |
+| 0                      | DCT\_DCT                                                                                         | DCT\_DCT, IDTX                                                                                   |
+| 1                      | V_DCT, H_DCT                                                                                     | V_DCT, H_DCT                                                                                     |
+| 2                      | ADST_ADST                                                                                        | ADST_ADST                                                                                        |
+| 3                      | ADST_DCT, DCT_ADST                                                                               | ADST_DCT, DCT_ADST                                                                               |
+| 4                      | FLIPADST_FLIPADST, IDTX                                                                          | FLIPADST_FLIPADST                                                                                |
+| 5                      | FLIPADST_DCT, DCT_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST | FLIPADST_DCT, DCT_FLIPADST, ADST_FLIPADST, FLIPADST_ADST, V_ADST, H_ADST, V_FLIPADST, H_FLIPADST |
 
 
-### 3.  Optimization of the algorithm
+### 3. Optimization of the algorithm
 
 #### Reducing Tested TX Types
 
 The search for the transform type is controlled by the **txt_level** signal.
 
-The txt_level signal controls the number of TX groups to test in the TX type search.  The number of TX groups to be tested are reduced based on block size
-and block type.  The search can exit early if the distortion and/or resulting number of coefficients is sufficiently low.
+The txt_level signal controls the number of TX groups to test in the TX type
+search. The number of TX groups to be tested are reduced based on block size
+and block type. The search can exit early if the distortion and/or resulting
+number of coefficients is sufficiently low.
 
 ##### Table 4. Groupings of transform options.
 
@@ -109,53 +117,61 @@ and block type.  The search can exit ear
 
 #### Tx Type Search Shortcuts Based on Block Characteristics
 
-This type of optimization is controlled by the **tx_shortcut_level** signal. Shortcuts to speedup the search can be used based on results from previous MD stages and
-neighbouring blocks.
+This type of optimization is controlled by the **tx_shortcut_level** signal.
+Shortcuts to speedup the search can be used based on results from previous MD
+stages and neighbouring blocks.
 
 ##### Table 5. Optimization signals for Tx type search controlled by the tx_shortcut_level.
 
-| **Signal**     | **Description** |
-| -------------- | --------------- |
-| bypass_tx_when_zcoeff | Skip TX at MD_Stage_3 if the MD_Stage_1 TX did not yield any non-zero coefficients |
-| apply_pf_on_coeffs | Apply partial frequency transform (PF) based on the number of coefficients |
+| **Signal**            | **Description**                                                                                                              |
+| --------------        | ---------------                                                                                                              |
+| bypass_tx_when_zcoeff | Skip TX at MD_Stage_3 if the MD_Stage_1 TX did not yield any non-zero coefficients                                           |
+| apply_pf_on_coeffs    | Apply partial frequency transform (PF) based on the number of coefficients                                                   |
 | use_mds3_shortcuts_th | if (best MD_Stage_0 distortion/QP < TH) use shortcuts for candidates at MD_Stage_3; 0: OFF, higher settings: More aggressive |
-| use_neighbour_info | if true, use info from neighbouring blocks to use more aggressive Thresholds/actions |
+| use_neighbour_info    | if true, use info from neighbouring blocks to use more aggressive Thresholds/actions                                         |
 
 #### Residual Subsampling
 
-The residuals can be subsampled before the transform is performed to reduce the computations spent in the transform loop.  When enabled, only every 2nd line
-(or 4th line, depending on settings) is used to compute the transform.  Subsampling will change the size of the TX according to the table below; subsampling
-more than every 4th line is not permitted since there is no corresponding TX size (e.g. no 8x64).
+The residuals can be subsampled before the transform is performed to reduce the
+computations spent in the transform loop. When enabled, only every 2nd line (or
+4th line, depending on settings) is used to compute the transform. Subsampling
+will change the size of the TX according to the table below; subsampling more
+than every 4th line is not permitted since there is no corresponding TX size
+(e.g. no 8x64).
 
 ##### Table 6. Tx block size for different subsampling levels.
 
-| **TX size (width x height)**     | **2x subsampled TX size** | **4x subsampled TX size** |
-| -------------------------------- | ------------------------- | ------------------------- |
-| 128x128 | Not supported | Not supported |
-| 64x64 | 64x32 | 64x16 |
-| 32x32 | 32x16 | 32x8 |
-| 16x16 | 16x8 | 16x4 |
-| 8x8 | 8x4 | Not allowed (no 8x2 TX size) |
-| 4x4 | Not allowed (no 4x2 TX size) | Not allowed (no 4x1 TX size) |
-
-When computing the reconstructed block from a subsampled TX, the rows that were skipped in the transform coefficient calculations will be filled with the copy
-of the previous line.  Subsampling cannot be used when computing a conformant transform because the reconstructed block will not be the correct size.
+| **TX size (width x height)**     | **2x subsampled TX size**    | **4x subsampled TX size**    |
+| -------------------------------- | -------------------------    | -------------------------    |
+| 128x128                          | Not supported                | Not supported                |
+| 64x64                            | 64x32                        | 64x16                        |
+| 32x32                            | 32x16                        | 32x8                         |
+| 16x16                            | 16x8                         | 16x4                         |
+| 8x8                              | 8x4                          | Not allowed (no 8x2 TX size) |
+| 4x4                              | Not allowed (no 4x2 TX size) | Not allowed (no 4x1 TX size) |
+
+When computing the reconstructed block from a subsampled TX, the rows that were
+skipped in the transform coefficient calculations will be filled with the copy
+of the previous line. Subsampling cannot be used when computing a conformant
+transform because the reconstructed block will not be the correct size.
 
 Subsampling the residual is controlled with the **subres_level** signal.
 
 #### Partial Frequency Transforms
 
-Instead of computing the transform for the entire block area, partial frequency transforms (PF) considers the computations necessary to generate only transform
-coefficients in an upper left area of the transform coefficient block.   Table 7 and Figure 1 below summarize the available PF sizes.
+Instead of computing the transform for the entire block area, partial frequency
+transforms (PF) considers the computations necessary to generate only transform
+coefficients in an upper left area of the transform coefficient block. Table 7
+and Figure 1 below summarize the available PF sizes.
 
 ##### Table 7. Description for PF shapes.
 
-| **PF Shape**     | **Block Size Used in TX** |
-| ---------------- | ------------------------- |
-| ```DEFAULT_SHAPE``` | Full Block (regular TX) |
-| ```N2_SHAPE``` | Quarter block (half width x half height) |
-| ```N4_SHAPE``` | Eighth block (quarter width x quarter height) |
-| ```ONLY_DC_SHAPE``` | DC position only – not recommended |
+| **PF Shape**        | **Block Size Used in TX**                     |
+| ----------------    | -------------------------                     |
+| ```DEFAULT_SHAPE``` | Full Block (regular TX)                       |
+| ```N2_SHAPE```      | Quarter block (half width x half height)      |
+| ```N4_SHAPE```      | Eighth block (quarter width x quarter height) |
+| ```ONLY_DC_SHAPE``` | DC position only – not recommended            |
 
 
 ![tx_search_new_fig1](./img/tx_search_new_fig1.png)
@@ -168,20 +184,23 @@ PF is controlled through the **pf_level*
 
 ### Description of the Algorithm
 
-For a given block, Tx size (TxS) search is used to determine the transform block size that yields the best rate-distortion cost for the block under
-consideration. Block transform size can be either the same as, one depth or two depths below the current block size.  The maximum allowable depth change for
-each block size is specified below, where a depth change of 2 means a TX size two depths more than the current block size, and 0 means the default TX size.
+For a given block, Tx size (TxS) search is used to determine the transform
+block size that yields the best rate-distortion cost for the block under
+consideration. Block transform size can be either the same as, one depth or two
+depths below the current block size. The maximum allowable depth change for
+each block size is specified below, where a depth change of 2 means a TX size
+two depths more than the current block size, and 0 means the default TX size.
 
 ##### Table 8. Maximum change in block depth in transform size search as a function of the block size.
 
-| **Block size**     | **Max transform block depth change beyond current block depth** |
-| ------------------ | --------------------------------------------------------------- |
-| 64X64, 32X32, 16X16, 64X32, 32X64, 16X32, 32X16, 16X8, 8X16, 64X16, 16X64, 32X8, 8X32, 16X4, 4X16 | 2 |
-| 8X8 | 1 |
+| **Block size**                                                                                    | **Max transform block depth change beyond current block depth** |
+| ------------------                                                                                | --------------------------------------------------------------- |
+| 64X64, 32X32, 16X16, 64X32, 32X64, 16X32, 32X16, 16X8, 8X16, 64X16, 16X64, 32X8, 8X32, 16X4, 4X16 | 2                                                               |
+| 8X8                                                                                               | 1                                                               |
 
 ### Implementation of the Algorithm
 
-## 2.  Implementation of the algorithm
+## 2. Implementation of the algorithm
 
 **Inputs**: Prediction candidate.
 
@@ -191,27 +210,36 @@ each block size is specified below, wher
 
 ##### Table 9. Control flags associated with Tx size search.
 
-| **Flag**               | **Level (sequence/Picture)** | **Description**                                                       |
-| ---------------------- | ---------------------------- | --------------------------------------------------------------------- |
-| tx_size_search_mode    | Picture                      | When set, it allows transform block size search.                      |
+| **Flag**               | **Level (sequence/Picture)** | **Description**                                                                    |
+| ---------------------- | ---------------------------- | ---------------------------------------------------------------------              |
+| tx_size_search_mode    | Picture                      | When set, it allows transform block size search.                                   |
 | frm_hdr->tx_mode       | Picture                      | Frame-based signal used to signal that TX size search is allowed in a given frame. |
-| txs_level              | Picture                      | Indicates the level of optimization to use in the TX size search. |
+| txs_level              | Picture                      | Indicates the level of optimization to use in the TX size search.                  |
 
 #### Details of the implementation
 
-The function tx_partitioning_path performs the Tx size search in MD. The flow of the evaluation depends on whether the block is an inter-coded block or an
+The function tx_partitioning_path performs the Tx size search in MD. The flow
+of the evaluation depends on whether the block is an inter-coded block or an
 intra-coded block, as outlined below.
 
-1.  In the case of an inter block (i.e. the candidate type is INTER or Intra Block Copy), the residual block can be computed for the whole block based on the already computed prediction (computed in MD_Stage_0 in the function full_loop_core) through the call to the function residual_kernel.
-2.  Loop over the depths to be evaluated (up to 2 depths, if block size allows – see optimization section for signals limiting number of TX depths to search).
-    1. The function tx_reset_neighbor_arrays is used to reset the neighbor arrays. Initialize the neighbor arrays using tx_initialize_neighbor_arrays.  The neighbour arrays are needed to store the data from all TX depths until a decision on the best depth can be made.
+1. In the case of an inter block (i.e. the candidate type is INTER or Intra
+   Block Copy), the residual block can be computed for the whole block based on
+   the already computed prediction (computed in MD_Stage_0 in the function
+   full_loop_core) through the call to the function residual_kernel.
+2. Loop over the depths to be evaluated (up to 2 depths, if block size allows – see
+   optimization section for signals limiting number of TX depths to search).
+    1. The function tx_reset_neighbor_arrays is used to reset the neighbor arrays. Initialize the neighbor arrays using tx_initialize_neighbor_arrays. The neighbour arrays are needed to store the data from all TX depths until a decision on the best depth can be made.
     2. Loop over the Tx blocks in the depth being evaluated.
        - If the block is an intra block, then:
          - Perform luma intra prediction in av1_intra_luma_prediction.
          - Compute the luma resulting residuals in residual_kernel.
        - Perform Tx search for the current Tx block in tx_type_search
-       - Perform Tx, quantization, inverse quantization, and if spatial SSE is needed, inverse transform. Compute the cost of the current transform type for the transform block size under consideration.
-       - If the block is an intra block, update both the recon sample neighbor array and the transform-related neighbor array tx_update_neighbor_array. Otherwise, update only the transform-related neighbor array in the same function.
+       - Perform Tx, quantization, inverse quantization, and if spatial SSE is needed,
+         inverse transform. Compute the cost of the current transform type for
+         the transform block size under consideration.
+       - If the block is an intra block, update both the recon sample neighbor array
+         and the transform-related neighbor array tx_update_neighbor_array. Otherwise,
+         update only the transform-related neighbor array in the same function.
     3. Estimate the rate associated with signaling the Tx size in get_tx_size_bits.
     4. Update best_cost_search and best_tx_depth based on the depths evaluated so far.
 
@@ -223,16 +251,20 @@ The optimization of the TX size search f
 
 ##### Table 10. Optimization signals for Tx size search.
 
-| **Signal**     | **Description** |
-| -------------- | --------------- |
-| enabled | 1: Ts size search enabled; 0: Tx size search disabled (use default TX size) |
-| prev_depth_coeff_exit | Skip current depth if previous depth has zero coeffs |
-| intra_class_max_depth | Max number of depth(s) for INTRA classes |
-| inter_class_max_depth | Max number of depth(s) for INTER classes |
+| **Signal**              | **Description**                                                                   |
+| --------------          | ---------------                                                                   |
+| enabled                 | 1: Ts size search enabled; 0: Tx size search disabled (use default TX size)       |
+| prev_depth_coeff_exit   | Skip current depth if previous depth has zero coeffs                              |
+| intra_class_max_depth   | Max number of depth(s) for INTRA classes                                          |
+| inter_class_max_depth   | Max number of depth(s) for INTER classes                                          |
 | depth1_txt_group_offset | Offset to be subtracted from default txt-group to derive the txt-group of depth-1 |
 | depth2_txt_group_offset | Offset to be subtracted from default txt-group to derive the txt-group of depth-2 |
-| min_sq_size | Min. square size to use Tx size for |
+| min_sq_size             | Min. square size to use Tx size for                                               |
 
 ## Notes
 
-The feature settings that are described in this document were compiled at v0.9.0 of the code and may not reflect the current status of the code. The description in this document represents an example showing  how features would interact with the SVT architecture. For the most up-to-date settings, it's recommended to review the section of the code implementing this feature.
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Build-Guide.md 1.2.0+dfsg-2/Docs/Build-Guide.md
--- 0.9.1+dfsg-1/Docs/Build-Guide.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Build-Guide.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,118 @@
+[Top level](../README.md)
+
+# Build and Install
+
+## Windows* Operating Systems (64-bit)
+
+- __Build Requirements__
+  - Visual Studio* 2017 (download [here](https://www.visualstudio.com/vs/older-downloads/)) or 2019 (download [here](https://visualstudio.microsoft.com/downloads/))
+  - CMake 3.5 or later (download [here](https://github.com/Kitware/CMake/releases/download/v3.14.5/cmake-3.14.5-win64-x64.msi))
+  - YASM Assembler version 1.2.0 or later
+    - Download the yasm exe from the following [link](http://www.tortall.net/projects/yasm/releases/yasm-1.3.0-win64.exe)
+    - Rename yasm-*-win64.exe to yasm.exe
+    - Copy yasm.exe into a location that is in the `PATH` environment variable
+
+- __Build Instructions__
+  - Build the project by following the steps below
+    - cd into `Build\windows`
+    - run `build.bat <2019|2017|2015>` [This will generate the .sln files and build the project]
+
+- __Binaries Location__
+  - Binaries can be found under `<repo dir>/Bin/Release` or `<repo dir>/Bin/Debug`, depending on whether Debug or Release were selected in the build mode.
+
+- __Installation__
+
+  For the binaries to operate properly on your system, the following conditions have to be met:
+  - On any of the Windows* Operating Systems listed in the OS requirements section, install Visual Studio* 2015/2017/2019
+  - Once the installation is complete, copy the binaries to a location making sure that both the sample application `SvtAv1EncApp.exe` and library `SvtAv1Enc.dll` are in the same folder.
+  - Open the command prompt window at the chosen location and run the sample application to encode: `SvtAV1EncApp.exe -i [in.yuv] -w [width] -h [height] -b [out.ivf]`
+  - Sample application supports reading from pipe. E.g. `ffmpeg -i [input.mp4] -nostdin -f rawvideo -pix_fmt yuv420p - | SvtAv1EncApp.exe -i stdin -n [number_of_frames_to_encode] -w [width] -h [height]`
+
+## Linux* Operating Systems (64-bit)
+
+Note - a Dockerfile is provided to build the encoder into a tiny Alpine Linux Docker image.
+
+- __Build Requirements__
+  - GCC 5.4.0 or later
+  - CMake 3.5.1 or later
+  - YASM Assembler version 1.2.0 or later
+
+- __Build Instructions__
+  - `cd Build/linux`
+  - `./build.sh <release | debug>`
+
+- __Sample Binaries location__
+  - Binaries can be found under `Bin/Release` and/or `Bin/Debug`
+
+- __Clang usage__
+  - To install Clang-11 on Ubuntu 20.04 execute single command: `sudo apt install clang-11`
+  - To install Clang-11 on Ubuntu 18.04 execute commands:
+    - `wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -`
+    - `sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main"`
+    - `sudo apt install clang-11`
+  - To build SVT-AV1 using Clang-11:
+    - `export CC="clang-11"`
+    - `export CXX="clang++-11"`
+    - `./build.sh <release | debug>`
+
+
+- __Installation__
+
+  For the binaries to operate properly on your system, the following conditions have to be met:
+
+  - On any of the Linux* Operating Systems listed above, copy the binaries under a location of your choice.
+  - Change the permissions on the sample application `SvtAV1EncApp` executable by running the command: `chmod +x SvtAv1EncApp`
+  - cd into your chosen location
+  - Run the sample application to encode: `./SvtAv1EncApp -i [in.yuv] -w [width] -h [height] -b [out.ivf]`
+  - Sample application supports reading from pipe. E.g. `ffmpeg -i [input.mp4] -nostdin -f rawvideo -pix_fmt yuv420p - | ./SvtAv1EncApp -i stdin -n [number_of_frames_to_encode] -w [width] -h [height]`
+
+# SVT-AV1 ffmpeg plugin installation
+
+## 1. Build and install SVT-AV1
+
+``` bash
+git clone --depth=1 https://gitlab.com/AOMediaCodec/SVT-AV1.git
+cd SVT-AV1
+cd Build
+cmake .. -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Release
+make -j $(nproc)
+sudo make install
+```
+
+## 2. Enable libsvtav1 in FFmpeg
+
+NOTE: If you wish to use an FFmpeg tag or release before 4.4, please go
+[here](https://gitlab.com/AOMediaCodec/SVT-AV1/tree/v0.8.4/ffmpeg_plugin) and
+consult that page to properly patch ffmpeg for use with SVT-AV1.
+
+``` bash
+   git clone --depth=1 https://github.com/FFmpeg/FFmpeg ffmpeg
+   cd ffmpeg
+   export LD_LIBRARY_PATH+=":/usr/local/lib"
+   export PKG_CONFIG_PATH+=":/usr/local/lib/pkgconfig"
+   ./configure --enable-libsvtav1
+   make -j $(nproc)
+```
+
+## 3. Verify that ffmpeg is working
+
+``` bash
+./ffmpeg -i input.mp4 -c:v libsvtav1 -y test.mp4
+```
+
+# How to evaluate by ready-to-run executables with docker
+
+Refer to the guide [here](https://github.com/OpenVisualCloud/Dockerfiles/blob/master/doc/svt.md#Evaluate-SVT).
+
+# Demo features and limitations
+
+- **Multi-instance support:** The multi-instance functionality is a demo
+  feature implemented in the SVT-AV1 Encoder sample application as an example
+  of one sample application using multiple encoding libraries. Encoding using
+  the multi-instance support is limited to only 6 simultaneous streams. For
+  example two channels encoding on Windows: `SvtAV1EncApp.exe -nch 2 -c
+  firstchannel.cfg secondchannel.cfg`
+- **Features enabled:** The library will display an error message any
+  feature combination that is not currently supported.
+
+
diff -pruN 0.9.1+dfsg-1/Docs/CommonQuestions.md 1.2.0+dfsg-2/Docs/CommonQuestions.md
--- 0.9.1+dfsg-1/Docs/CommonQuestions.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/CommonQuestions.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,310 @@
+[Top level](../README.md)
+
+# Common Questions and Topics of Interest
+
+[[_TOC_]]
+
+# What Presets Do
+
+Presets control how many efficiency features are used during the encoding
+process, and the intensity with which those features are used. Lower presets
+use more features and produce a more efficient file (smaller file, for a given
+visual quality). However, lower presets also require more compute time during the
+encode process. If a file is to be widely distributed, it can be worth it to
+use very low presets, while high presets allow fast encoding, such
+as for real-time applications.
+
+Generally speaking, presets 1-3 represent extremely high efficiency, for
+use when encode time is not important and quality/size of the resulting
+video file is critical. Presets 4-6 are commonly used by home enthusiasts
+as they represent a balance of efficiency and reasonable compute time. Presets
+between 7 and 12 are used for fast and real-time encoding. Preset 13 is even
+faster but not intended for direct human consumption--it can be used, for
+example, as a per-scene quality metric in VOD applications. One
+should use the lowest preset that is tolerable.
+
+The features enabled or changed by each preset are as follows
+
+| **Category**                | **Feature**                                 | **0** | **1** | **2** | **3** | **4** | **5** | **6** | **7** | **8** | **9** | **10** | **11** | **12** |
+| --------------------------- | ------------------------------------------  | ----  | ----  | ----  | ----  | ----  | ----  | ----  | ----  | ----  | ----  | -----  | -----  | -----  |
+| Prediction structure & RC   | Hierarchical levels                         | 5L    | 5L    | 5L    | 5L    | 5L    | 5L    | 5L    | 5L    | 5L    | 5L    | 5L     | 5L     | 5L     |
+|                             | aq-mode                                     | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | max reference frame count                   | 7     | 7     | 7     | 7     | 4     | 4     | 4     | 4     | 4     | 4     | 4      | 4      | 4      |
+| Motion Estimation           | Full pel Motion Estimation                  | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Hierarchical ME                             | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | subpel                                      | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+| Block Partitioning          | sb size                                     | 128   | 128   | 128   | 64    | 64    | 64    | 64    | 64    | 64    | 64    | 64     | 64     | 64     |
+|                             | min block size                              | 4     | 4     | 4     | 4     | 4     | 4     | 8     | 8     | 8     | 8     | 8      | 8      | 8      |
+|                             | Non-square partitions                       | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+| AV1 mode decision features  | DC                                          | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Smooth, Smooth_V, Smooth_H                  | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Directional Angular modes                   | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Paeth                                       | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | OFF    | OFF    | OFF    |
+|                             | Chroma from Luma (CfL)                      | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Filter intra                                | ON    | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Intra block copy (IBC) (SC)                 | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | OFF    | OFF    |
+|                             | Palette prediction (SC)                     | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Single-reference prediction                 | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Compound-reference prediction               | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Eighth-pel f(resolution, qindex)            | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | OFF    |
+|                             | Interpolation Filter Search                 | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Warped motion compensation                  | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Global motion compensation                  | ON    | ON    | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Motion Field Motion Vector (MFMV)           | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | OFF    | OFF    |
+|                             | Overlapped Block Motion Compensation (OBMC) | ON    | ON    | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Inter-Intra prediction                      | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Wedge prediction                            | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Difference-weighted prediction              | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Distance-weighted prediction                | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+| Transform                   | Transform type search                       | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Transform Size search                       | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | OFF    |
+| AV1 inloop filters          | Deblocking Filter                           | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | CDEF                                        | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON     | ON     | ON     |
+|                             | Restoration Filter - Wiener Filter          | ON    | ON    | ON    | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF    | OFF    | OFF    |
+|                             | Restoration Filter - SG Filter              | ON    | ON    | ON    | ON    | ON    | OFF   | OFF   | OFF   | OFF   | OFF   | OFF    | OFF    | OFF    |
+
+
+# Scene Change Detection
+
+Modern video files include key frames, which are intra-coded pictures, and
+inter-frames, which store only information changed since the previously encoded
+reference frames. A scene change can be a reasonable time to insert a new key
+frame since the previous key frame may not have much in common with subsequent
+images. Insertion of key frames at scene changes is common practice but is not required.
+
+At present, SVT-AV1 does not insert key frames at scene changes, regardless of
+the `scd` parameter. It is, therefore, advisable to use a third-party splitting
+program to encode videos by chunks if key frame insertion at scene changes is
+desired.
+
+Note that not inserting a key frame at scene changes is not considered a bug
+nor missing feature by the SVT-AV1 team. AV1 is sufficiently flexible that when
+a scene change is detected, the encoder automatically relies less on temporally
+neighboring frames, which allows it to adapt to scene changes without affecting
+the GOP structure (frequency of key frames).
+
+# GOP Size Selection
+
+GOP stands for "Group Of Pictures." Modern video files include key frames,
+which save the entire intra-coded image and begin a new GOP, and delta frames,
+which store only information changed since the last key frame (through
+motion-compensated-prediction and residual coding). The GOP size (governed by
+the `keyint` parameter) determines how frequently a key frame is inserted. In
+general, key frames are far larger, in terms of bits, than delta frames are, so
+using a larger GOP can significantly improve efficiency.
+
+However, frequent key frames can be helpful in two respects:
+
+1. They make seek times faster. Decode software must go to the nearest
+   previous key frame when a seek is requested, then it must process
+   all delta frames between the key frame and the desired seek point. When key
+   frames are far apart, seek performance can suffer.
+2. In video-on-demand applications, key frames provide improved resilience to
+   lost packets. Each key frame serves as a sort of reset for the reference
+   chain used by inter frames. If the key frames are too widely spaced, missed
+   packets can cause relatively long-lasting visual artifacts.
+
+For video on demand applications, it is common to use GOP sizes of
+approximately one second. For example, `keyint=24` provides a key frame once
+per second for a 24 fps video. Home users frequently prefer longer GOP sizes, often
+in the range of 5-10 seconds. One can specify the GOP length in seconds by appending
+the `s` character. For example, `keyint=5s` would have a GOP length of 5 seconds,
+regardless of the video frame rate. This usage works only with `keyint`, not ffmpeg's `-g`
+parameter.
+
+# Threading and Efficiency
+
+SVT-AV1 is specifically designed to scale well across many logical processors.
+By default, SVT-AV1 only uses multi-threading techniques that do not cause a
+decrease in the resulting quality/efficiency. For example, it does not use
+tile-based threading, which is known to decrease quality--SVT-AV1 supports
+tiles but does not use them for parallelization by default. As a result of the
+reliance on threading techniques that do not degrade quality, the
+video output will be the same when using `--lp 1` as `--lp n` in the
+default CRF configuration.
+
+Anecdotally, SVT-AV1 is able to fairly efficiently use about 16 processor cores
+when encoding 1080p video on a preset in the 4-6 range using the default
+configuration. When using high core-count systems, SVT-AV1's ability to fully
+utilize all available threads drops off and additional cores provide less
+incremental encoding speed. For this reason, tools that split video into
+scene-based chunks can be useful if greater parallelization is desired. As
+resolution increases, threading capabilities go up as well.
+
+Note that the highest quality presets (0-3) use features that have a lot of
+dependencies and may lead to lower parallel CPU usage.
+
+# Practical Advice on Grain Synthesis
+
+The random pattern associated with film grain (or CCD noise) is notoriously difficult
+to compress. When grain is present in the original source, significant efficiency gains
+can be made by deleting film grain and replacing it with synthetic grain that
+does not take up significant space in the file. When `film-grain` is enabled, SVT-AV1
+denoises the image, compares the resulting image with the original, and analyzes
+the nature of the grain. It then inserts synthetic grain with similar
+properties. This can greatly improve efficiency while retaining visual quality and
+character.
+
+The process of removing grain can sometimes delete very fine detail from the
+original image, so it should not be used too aggressively. The level passed
+to the `film-grain` parameter controls how aggressively this procedure is
+employed. As a general rule, a `film-grain` level of around 8 is sufficient for
+live action video with a normal amount of grain. Noisier video benefits
+from higher levels in the 10-15 range. 2-D animation typically has less grain,
+so a level of around 4 works well with standard (hand-drawn) animation. Grainy
+animation can benefit from higher levels up to around 10.
+
+If grain synthesis levels can be manually verified through subjective evaluation or
+high fidelity metrics, then passing `film-grain-denoise=0` may
+result in higher fidelity by disabling source denoising. In that case, the
+correct `film-grain` level is important because a more conservative smoothing
+process is used--too high a `film-grain` level can lead to noise stacking.
+
+More detail on film grain synthesis is available in the [appendix](Appendix-Film-Grain-Synthesis.md).
+
+# Improving Decoding Performance
+
+Although modern AV1 decoders (such as dav1d and hardware decoders) are
+extremely efficient, there can be cases where software decoders on slower
+hardware have a difficult time decoding AV1 streams without stuttering.
+
+Tips that may improve decoding performance include:
+
+* Use the `fast-decode=1` parameter
+* Reduce the bitrate used (higher CRF values result in lower bitrates)
+* Encode using tiles (`tile-columns=2`, for example)
+* Avoid the use of synthetic grain
+* Use higher presets (which do not use the more complex AV1 tools)
+* Provide the option of a lower resolution version of the video
+* Use 8-bit color depth instead of 10
+
+Note that each of these options has the potential to reduce the image
+quality to a greater or lesser degree, so they should be used with care.
+The performance gains may also depend on the decoding platform. For example,
+using tiles via the `tile-columns` and/or `tile-rows` options can lead
+to large improvements in encoding and decoding performance if both the encoder
+and decoder have sufficient threads available. However, if the target platform
+does not support multithreaded tile decoding, then no decoding gains will be
+realized. Tiling can lead to visible artifacts, especially if many
+tiles are used. The use of `fast-decode=1`, on the other hand, may provide
+decoding performance improvement even if the target decoder does not have
+multithreading. It can also affect video quality.
+
+Note that, for more advanced media players and streaming tool implementations,
+enabling GPU AV1 grain synthesis can actually increase decoding speeds and
+offset the slight decoding latency penalty.
+
+# Tuning for Animation
+
+There are two types of video that are called "animation": hand-drawn 2D
+and 3D animated. Both types tend to be easy to encode (meaning the resulting
+file will be small), but for different reasons. 2D animation often has large
+areas that do not move, so the difference between one frame and another is
+often small. In addition, it tends to have low levels of grain.
+Experience has shown that relatively high `crf` values with low levels of
+`film-grain` produce 2D animation results that are visually good.
+
+3D animation has much more detail and movement, but it sometimes has no grain
+whatsoever, or only small amounts that were purposely added to the image. If
+the original animated video has no grain, encoding without `film-grain` will
+increase encoding speed and avoid the possible loss of fine detail that can
+sometimes result from the denoising step of the synthetic grain process.
+
+# 8 or 10-bit Encoding
+
+Video may be encoded with either 8 or 10-bit color depth. 10-bit video
+can represent more shades of grey and colors and is less prone to certain artifacts, such as color
+banding and loss of detail in low luma areas. Most SDR sources come in 8-bit
+color and SVT-AV1, by default, will encode 8-bit video to 8-bit video or 10-bit
+to 10-bit.
+
+It is possible to encode 8-bit video to a 10-bit final result. This allows the
+encoder to use less rounding and can produce slightly better fidelity. There is
+a small cost in terms of resulting file size (~5%), and, with some encoders, an
+encoding-time cost. SVT-AV1 was carefully designed to be able to encode 10-bit
+color in a compute-efficient manner, so there should not be much of a
+encoding performance penalty associated with 10-bit except at very fast presets
+(11-13), where the slowdown may be more noticeable. One should be aware,
+however, that 10-bit *decoding* can also be more compute-intensive than 8-bit
+in some decoders.
+
+To force the final result to be 10-bit, specify `-pix_fmt yuv420p10le` in ffmpeg, or you
+can force 8-bit using `-pix_fmt yuv420p` in ffmpeg.
+
+# HDR and SDR Video
+
+Some video sources now come designed for display on high dynamic range (HDR) hardware.
+SVT-AV1 can encode these sources correctly and embed the required metadata in the final
+output. High definition typically involves a high definition color space (such as BT.2020)
+and an associated set of transfer functions (such as PQ or HLG). HDR video is also
+typically encoded with 10-bit color. Detailed information on forcing these
+settings is available in the [full parameters description](parameters.md).
+
+Colors can be mapped from a wider space into a less wide space (such as the
+frequently used BT.709) using ffmpeg or other tools, but there are always
+losses, either in the accuracy of the color or the retention of detail in high
+and low brightness areas. As a rule, it is best to avoid conversion from HDR to
+SDR, if possible.
+
+# Options That Give the Best Encoding Bang-For-Buck
+
+The quality/filesize tradeoff is controlled by the `crf` parameter. Increasing
+this parameter can significantly reduce the file size. AV1 is very efficient at
+preserving the types of details that humans notice, so significant reduction in
+objective quality (PNSR and similar measures) can still result in a video with
+good subjective quality.
+
+[Film grain synthesis](#practical-advice-on-grain-synethsis) can also significantly
+reduce file size while retaining apparent visual quality. It is not enabled by
+default because not all sources have film grain or CCD noise, Additionally,
+the de-noising process used in this procedure can delete very fine details
+(such as small, fast-moving particles or skin imperfections), so the
+aggressiveness of the denoising/synthesis needs to be paired with the strength
+of the grain/noise in the original sample.
+
+The use of subjective mode (`--tune=0`) often results in an image with greater
+sharpness and is intended to produce a result that appears to humans
+to be of high quality (as opposed to doing well on basic objective measures, such as
+PSNR).
+
+# Multi-Pass Encoding
+
+Some encoder features benefit from or require the use of a multi-pass encoding
+approach. In SVT-AV1, in general, multi-pass encoding is useful for achieving a
+target bitrate when using VBR (variable bit rate) encoding, although both
+one-pass and multi-pass modes are supported.
+
+When using CRF (constant visual rate factor) mode, multi-pass encoding is
+designed to improve quality for corner case videos--it is particularly helpful
+in videos with high motion because it can adjust the prediction structure (to
+use closer references, for example). Multi-pass encoding, therefore, can be
+said to have an impact on quality in CRF mode, but is not critical in most
+situations. In general, multi-pass encoding is not as important for SVT-AV1 in CRF
+mode than it is for some other encoders.
+
+CBR (constant bit rate) encoding is always one-pass.
+
+# Bitrate Control Modes
+
+SVT-AV1 supports three general approaches to controlling the bitrate.
+
+* In CBR (constant bit rate) mode, the target bitrate is forced at all times.
+This results in a predictable file size, but inefficient use of space as simple
+scenes get too many bits and complex scenes get too few. Use `--rc=2` for this mode.
+* In VBR (variable bit rate) mode, a target bitrate it set, but the effective bitrate
+can vary above and below the target. Use `--rc=1` to enable VBR mode and set
+the flexibility of the effective bitrate using `--bias-pct`. A value closer to
+zero makes the encode behave more like a CBR encode, while a value closer to
+100 gives it greater flexibility.
+* CRF (constant rate factor) mode targets a constant visual quality. This approach
+leads to a favorable visual quality for a given file size and is recommended
+for applications where a target bitrate is not necessary, such as in a home environment.
+Set `--rc=0` to use this method.
+
+## Notes
+
+The feature settings that are described in this document were compiled at
+v1.2.0 of the code and may not reflect the current status of the code. The
+description in this document represents an example showing how features would
+interact with the SVT architecture. For the most up-to-date settings, it's
+recommended to review the section of the code implementing this feature.
diff -pruN 0.9.1+dfsg-1/Docs/Contribute.md 1.2.0+dfsg-2/Docs/Contribute.md
--- 0.9.1+dfsg-1/Docs/Contribute.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Contribute.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,35 @@
+[Top level](../README.md)
+
+# How to Contribute to SVT-AV1
+
+We welcome community contributions to the SVT-AV1 Encoder and Decoder. Thank
+you for your time! By contributing to the project, you agree to the license,
+patent and copyright terms in the AOM License and Patent License and to the
+release of your contribution under these terms. See [LICENSE](LICENSE.md) and
+[PATENTS](PATENTS.md) for details.
+
+## Contributor agreement
+
+You will be required to execute the appropriate [contributor agreement](http://aomedia.org/license/)
+to ensure that the AOMedia Project has the right to distribute your changes.
+
+## Contribution process
+
+- Follow the [coding guidelines](STYLE.md) and the [contributing guidelines](CONTRIBUTING.md)
+
+- Validate that your changes do not break a build
+
+- Perform smoke tests and ensure they pass
+
+- Submit a pull request for review to the maintainer
+
+## How to Report Bugs and Provide Feedback
+
+Use the [Issues](https://gitlab.com/AOMediaCodec/SVT-AV1/issues) tab on GitLab.
+To avoid duplicate issues, please make sure you go through the existing issues
+before logging a new one. You can find previously asked questions
+[here](https://gitlab.com/AOMediaCodec/SVT-AV1/-/issues?sort=created_date&state=all&label_name[]=question).
+
+## IRC
+
+`#svt` on Libera.chat. Join via [Libera Webchat](https://web.libera.chat/?channel=#svt) or use your favorite IRC client.
diff -pruN 0.9.1+dfsg-1/Docs/Ffmpeg.md 1.2.0+dfsg-2/Docs/Ffmpeg.md
--- 0.9.1+dfsg-1/Docs/Ffmpeg.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Ffmpeg.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,126 @@
+[Top level](../README.md)
+
+# Using SVT-AV1 within ffmpeg
+
+## A Note on ffmpeg Versions
+
+Although ffmpeg has an SVT-AV1 wrapper, its functionality was severely limited
+prior to and including ffmpeg version 5.0.X. Any version starting with 5.1.0 will
+permit full SVT-AV1 functionality, including passing SVT-AV1 parameters directly
+via the `svtav1-params` keyword.
+
+If your ffmpeg version is 5.0.X or lower, we suggest you upgrade to a more recent version or
+use the [ffmpeg patch](../ffmpeg_plugin/README.md) included in the [SVT-AV1 repository](https://gitlab.com/AOMediaCodec/SVT-AV1/-/tree/master/ffmpeg_plugin).
+
+## Most Common Options
+
+The parameters that are most frequently changed from one encode to another are
+
+* `crf`. This parameter governs the quality/size trade-off. Higher CRF values will
+  result in a final output that takes less space, but begins to lose detail. Lower CRF
+  values retain more detail at the cost of larger file sizes. The possible range of CRF
+  in SVT-AV1 is 1-63. CRF values are not meant to be equivalent across different
+  encoders. A good starting point for 1080p video is `crf=30`.
+* `preset`. This parameter governs the efficiency/encode-time trade-off. Lower
+  presets will result in an output with better quality for a given file size, but
+  will take longer to encode. Higher presets can result in a very fast encode,
+  but will make some compromises on visual quality for a given crf value.
+* `-g` (in ffmpeg) or `keyint` (in SvtAv1EncApp). These parameters govern how
+  many frames will pass before the encoder will add a key frame. Key frames include
+  all information about a single image. Other (delta) frames store only
+  differences between one frame and another. Key frames are necessary for seeking
+  and for error-resilience (in VOD applications). More frequent key frames will
+  make the video quicker to seek and more robust, but it will also increase the
+  file size. For VOD, a setting a key frame once per second or so is a common
+  choice. In other contexts, less frequent key frames (such as 5 or 10 seconds)
+  are preferred.
+* `film-grain`. Because of its random nature, film grain and CCD noise are
+  very difficult to compress. The AV1 specification has the
+  capability to produce synthetic noise of varying intensity. It is therefore
+  possible for SVT-AV1 to delete film grain and CCD noise and replace it with
+  [synthetic grain](Appendix-Film-Grain-Synthesis.md) of the same character, resulting in good bitrate savings while
+  retaining subjective visual quality and character. The `film-grain` parameter enables this
+  behavior. Setting it to a higher level does so more aggressively. Very high
+  levels of denoising can result in the loss of some high-frequency detail, however.
+* `pix_fmt`. This parameter can be used to force encoding to 10 or 8 bit color depth. By default
+  SVT-AV1 will encode 10-bit sources to 10-bit outputs and 8-bit to 8-bit.
+* `tune`. This parameter changes some encoder settings to produce a result
+  that is optimized for subjective quality (`tune=0`) or PSNR (`tune=1`). Tuning
+  for subjective quality can result in a sharper image and higher psycho-visual fidelity.
+
+The following are some examples of common use cases that utilize ffmpeg.
+
+## Example 1: Fast/Realtime Encoding
+
+For fast encoding, the preset must be sufficiently high that your CPU can
+encode without stuttering. Higher presets are faster but less efficient. The
+highest preset is 13 (the highest preset intended for human use is 12).
+
+    ffmpeg -i infile.mkv -c:v libsvtav1 -preset 10 -crf 35 -c:a copy outfile.mkv
+
+Since SVT-AV1 is designed to scale well across cores/processors, fast encoding is
+best performed on machines with a sufficient number of threads.
+
+## Example 2: Encoding for Personal Use
+
+When encoding for personal use, such as a media server or HTPC, higher efficiency
+and *reasonable* encoding times are desirable.
+
+    ffmpeg -i infile.mkv -c:v libsvtav1 -preset 5 -crf 32 -g 240 -pix_fmt yuv420p10le -svtav1-params tune=0:film-grain=8 -c:a copy outfile.mkv
+
+Presets between 4 and 6 offer what many people consider a reasonable trade-off
+between quality and encoding time. Encoding with 10-bit depth results in more
+accurate colors and fewer artifacts with minimal increase in file size, though the
+resulting file may be somewhat more computationally intensive to decode for a given
+bitrate.
+
+If higher decoding performance is required, using 10-bit YCbCr encoding will
+increase efficiency, so a lower average bitrate can be used, which in turn
+improves decoding performance. In addition, passing the parameter
+`fast-decode=1` can help (this parameter does not have an effect for all
+presets, so check the [parameter description](parameters.md) for your preset).
+Last, for a given bitrate, 8-bit `yuv420p` can sometimes be faster to encode,
+albeit at the cost of some fidelity.
+
+The `tune=0` parameter optimizes the encode for subjective visual quality (with higher sharpness),
+instead of objective quality (PSNR).
+
+The `film-grain` parameter allows SVT-AV1 to detect and delete film grain from the original video,
+and replace it with synthetic grain of the same character, resulting in significant bitrate savings. A
+value of 8 is a reasonable starting point for live-action video with a normal amount of grain. Higher
+values in the range of 10-15 enable more aggressive use of this technique for video with lots of natural
+grain. For 2D animation, lower values in the range of 4-6 are often appropriate. If the original
+video does not have natural grain, this parameter can be omitted.
+
+Note that the `crf` range for SVT-AV1 is 1-63, which is a wider range than is found on some popular
+open-source encoders. As a result, `crf` values that approximate the visual quality in those encoders
+will tend to be higher in SVT-AV1.
+
+Using a higher GOP via the `-g` ffmpeg parameter results in a more efficient
+encode in terms of quality per bitrate, at the cost of seeking performance. A
+common rule-of-thumb among hobbyists is to use ten times the framerate of the
+video, but not more than 300.
+
+## Example 3: Encoding for Video On Demand
+
+For professional VOD applications, the best possible efficiency is often
+desired and videos are often split by scenes using third-party tools.
+
+A short GOP size (the `g` parameter) results in better seeking performance and fault-tolerance.
+
+An example use of a single-scene video:
+
+    ffmpeg -i infile.mkv -c:v libsvtav1 -preset 2 -crf 25 -g 24 -pix_fmt yuv420p10le -svtav1-params tune=0:film-grain=8 -c:a copy outfile.mkv
+
+Note that using 8-bit instead may increase decode performance and compatibility.
+
+# Piping from ffmpeg into the standalone encoder
+
+If you are unable to use a version of ffmpeg that is recent enough to pass all required parameters
+to SVT-AV1 (or if you want to use a specific version of SVT-AV1 rather than the one included with your version
+of ffmpeg), you can use ffmpeg to decode the video and pipe the result to the standalone app
+for encoding. Then you can add the audio and video into a final file.
+
+    ffmpeg -i infile.mkv -map 0:v:0 -pix_fmt yuv420p10le -f yuv4mpegpipe -strict -1 - | SvtAv1EncApp -i stdin --preset 5 --keyint 240 --input-depth 10 --crf 32 --film-grain 8 -tune 0 -b outfile.ivf
+    ffmpeg -i outfile.ivf -i infile.mkv -map 0:v -map 1:a:0 -c:v copy -c:a copy outfile.mkv
+
Binary files 0.9.1+dfsg-1/Docs/img/rc_figure7.PNG and 1.2.0+dfsg-2/Docs/img/rc_figure7.PNG differ
Binary files 0.9.1+dfsg-1/Docs/img/rc_math3.PNG and 1.2.0+dfsg-2/Docs/img/rc_math3.PNG differ
Binary files 0.9.1+dfsg-1/Docs/img/reference_scaling_picture_size.png and 1.2.0+dfsg-2/Docs/img/reference_scaling_picture_size.png differ
Binary files 0.9.1+dfsg-1/Docs/img/reference_scaling_pipeline.png and 1.2.0+dfsg-2/Docs/img/reference_scaling_pipeline.png differ
Binary files 0.9.1+dfsg-1/Docs/img/ref_scale_API.png and 1.2.0+dfsg-2/Docs/img/ref_scale_API.png differ
Binary files 0.9.1+dfsg-1/Docs/img/ref_scale_downscaled_buffers.png and 1.2.0+dfsg-2/Docs/img/ref_scale_downscaled_buffers.png differ
Binary files 0.9.1+dfsg-1/Docs/img/ref_scale_modes_dataflow.png and 1.2.0+dfsg-2/Docs/img/ref_scale_modes_dataflow.png differ
Binary files 0.9.1+dfsg-1/Docs/img/switch-at-key-frames-only.png and 1.2.0+dfsg-2/Docs/img/switch-at-key-frames-only.png differ
Binary files 0.9.1+dfsg-1/Docs/img/switch-at-switch-frames-or-key-frames.png and 1.2.0+dfsg-2/Docs/img/switch-at-switch-frames-or-key-frames.png differ
diff -pruN 0.9.1+dfsg-1/Docs/Parameters.md 1.2.0+dfsg-2/Docs/Parameters.md
--- 0.9.1+dfsg-1/Docs/Parameters.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/Parameters.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,482 @@
+[Top level](../README.md)
+
+# SVT-AV1 Parameters
+
+
+## Configuration File Parameters
+
+The encoder parameters are listed in this table below along with their
+ status of support, command line parameter and the range of values that
+ the parameters can take. Any of the parameters below that have a non-empty
+ `Configuration file parameter` field, can be set by adding them to the
+ `Sample.cfg` file.
+
+### Options
+
+| **Configuration file parameter**   | **Command line**     | **Range**    | **Default**   | **Description**                                                                                                   |
+| ---------------------------------- | -------------------- | ------------ | ------------- | ----------------------------------------------------------------------------------------------------------------- |
+|                                    | --help               |              |               | Shows the command line options currently available                                                                |
+|                                    | --version            |              |               | Shows the version of the library that's linked to the library                                                     |
+| **InputFile**                      | -i                   | any string   | None          | Input raw video (y4m and yuv) file path, use `stdin` to read from pipe                                            |
+| **StreamFile**                     | -b                   | any string   | None          | Output compressed (ivf) file path, use `stdout` to write to pipe                                                  |
+|                                    | -c                   | any string   | None          | Configuration file path                                                                                           |
+| **ErrorFile**                      | --errlog             | any string   | `stderr`      | Error file path                                                                                                   |
+| **ReconFile**                      | -o                   | any string   | None          | Reconstructed yuv file path                                                                                       |
+| **StatFile**                       | --stat-file          | any string   | None          | PSNR / SSIM per picture stat output file path, requires `--enable-stat-report 1`                                  |
+| **PredStructFile**                 | --pred-struct-file   | any string   | None          | Manual prediction structure file path                                                                             |
+| **Progress**                       | --progress           | [0-2]        | 1             | Verbosity of the output [0: no progress is printed, 2: aomenc style output]                                       |
+| **NoProgress**                     | --no-progress        | [0-1]        | 0             | Do not print out progress [1: `--progress 0`, 0: `--progress 1`]                                                  |
+| **EncoderMode**                    | --preset             | [-2-13]      | 10            | Encoder preset, presets < 0 are for debugging. Higher presets means faster encodes, but with a quality tradeoff   |
+| **SvtAv1Params**                   | --svtav1-params      | any string   | None          | Colon-separated list of `key=value` pairs of parameters with keys based on command line options without `--`      |
+|                                    | --nch                | [1-6]        | 1             | Number of channels (library instance) that will be instantiated                                                   |
+
+#### Usage of **SvtAv1Params**
+
+To use the `--svtav1-params` option, the syntax is `--svtav1-params option1=value1:option2=value2...`.
+
+An example is:
+
+```bash
+SvtAv1EncApp \
+  -i input.y4m \
+  -b output.ivf \
+  --svtav1-params \
+  "preset=10:crf=30:irefresh-type=kf:matrix-coefficients=bt709:mastering-display=G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1)"
+```
+
+This will set `--preset` to 10 and `--crf` to 30 inside the API along with some other parameters.
+
+Do note however, that there is no error checking for duplicate keys and only for invalid keys or values.
+
+For more information on valid values for specific keys, refer to the [EbEncSettings](../Source/Lib/Encoder/Globals/EbEncSettings.c) file.
+
+## Encoder Global Options
+
+| **Configuration file parameter** | **Command line**            | **Range**                      | **Default** | **Description**                                                                                               |
+|----------------------------------|-----------------------------|--------------------------------|-------------|---------------------------------------------------------------------------------------------------------------|
+| **SourceWidth**                  | -w                          | [64-16384]                     | None        | Frame width in pixels, inferred if y4m.                                                                       |
+| **SourceHeight**                 | -h                          | [64-8704]                      | None        | Frame height in pixels, inferred if y4m.                                                                      |
+| **ForcedMaximumFrameWidth**      | --forced-max-frame-width    | [64-16384]                     | None        | Maximum frame width value to force.                                                                           |
+| **ForcedMaximumFrameheight**     | --forced-max-frame-height   | [64-8704]                      | None        | Maximum frame height value to force.                                                                          |
+| **FrameToBeEncoded**             | -n                          | [0-`(2^63)-1`]                 | 0           | Number of frames to encode. If `n` is larger than the input, the encoder will loop back and continue encoding |
+| **BufferedInput**                | --nb                        | [-1, 1-`(2^31)-1`]             | -1          | Buffer `n` input frames into memory and use them to encode                                                    |
+| **EncoderColorFormat**           | --color-format              | [0-3]                          | 1           | Color format, only yuv420 is supported at this time [0: yuv400, 1: yuv420, 2: yuv422, 3: yuv444]              |
+| **Profile**                      | --profile                   | [0-2]                          | 0           | Bitstream profile [0: main, 1: high, 2: professional]                                                         |
+| **Level**                        | --level                     | [0,2.0-7.3]                    | 0           | Bitstream level, defined in A.3 of the av1 spec [0: auto]                                                     |
+| **HighDynamicRangeInput**        | --enable-hdr                | [0-1]                          | 0           | Enable writing of HDR metadata in the bitstream                                                               |
+| **FrameRate**                    | --fps                       | [1-240]                        | 60          | Input video frame rate, integer values only, inferred if y4m                                                  |
+| **FrameRateNumerator**           | --fps-num                   | [0-2^32-1]                     | 60000       | Input video frame rate numerator                                                                              |
+| **FrameRateDenominator**         | --fps-denom                 | [0-2^32-1]                     | 1000        | Input video frame rate denominator                                                                            |
+| **EncoderBitDepth**              | --input-depth               | [8, 10]                        | 8           | Input video file and output bitstream bit-depth                                                               |
+| **CompressedTenBitFormat**       | --compressed-ten-bit-format | [0-1]                          | 0           | Pack 10bit video, handled between the app and library                                                         |
+| **Injector**                     | --inj                       | [0-1]                          | 0           | Inject pictures to the library at defined frame rate                                                          |
+| **InjectorFrameRate**            | --inj-frm-rt                | [0-240]                        | 60          | Set injector frame rate, only applicable with `--inj 1`                                                       |
+| **StatReport**                   | --enable-stat-report        | [0-1]                          | 0           | Calculates and outputs PSNR SSIM metrics at the end of encoding                                               |
+| **Asm**                          | --asm                       | [0-11, c-max]                  | max         | Limit assembly instruction set [c, mmx, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, avx2, avx512, max]       |
+| **LogicalProcessors**            | --lp                        | [0, core count of the machine] | 0           | Target (best effort) number of logical cores to be used. 0 means all. Refer to Appendix A.1                   |
+| **PinnedExecution**              | --pin                       | [0-1]                          | 0           | Pin the execution to the first --lp cores. Overwritten to 0 when `--ss` is set. Refer to Appendix A.1         |
+| **TargetSocket**                 | --ss                        | [-1,1]                         | -1          | Specifies which socket to run on, assumes a max of two sockets. Refer to Appendix A.1                         |
+| **FastDecode**                   | --fast-decode               | [0,1]                          | 0           | Tune settings to output bitstreams that can be decoded faster, [0 = OFF, 1 = ON]                              |
+| **Tune**                         | --tune                      | [0,1]                          | 1           | Specifies whether to use PSNR or VQ as the tuning metric [0 = VQ, 1 = PSNR]                                   |
+
+## Rate Control Options
+
+| **Configuration file parameter**   | **Command line**                   | **Range**        | **Default**       | **Description**                                                                                                        |
+| ---------------------------------- | ---------------------------------- | ---------------- | ----------------- | ---------------------------------------------------------------------------------------------------------------------- |
+| **RateControlMode**                | --rc                               | [0-2]            | 0                 | Rate control mode [0: CRF or CQP (if `--aq-mode` is 0) [Default], 1: VBR, 2: CBR]                                      |
+| **QP**                             | --qp                               | [1-63]           | 35                | Initial QP level value                                                                                                 |
+| **CRF**                            | --crf                              | [1-63]           | 35                | Constant Rate Factor value, setting this value is equal to `--rc 0 --aq-mode 2 --qp x`                                 |
+| **TargetBitRate**                  | --tbr                              | [1-100000]       | 2000              | Target Bitrate (kbps), only applicable for VBR and CBR encoding, also accepts `b`, `k`, and `m` suffixes               |
+| **MaxBitRate**                     | --mbr                              | [1-100000]       | 0                 | Maximum Bitrate (kbps) only applicable for CRF encoding, also accepts `b`, `k`, and `m` suffixes                       |
+| **UseQpFile**                      | --use-q-file                       | [0-1]            | 0                 | Overwrite the encoder default picture based QP assignments and use QP values from `--qp-file`                          |
+| **QpFile**                         | --qpfile                           | any string       | Null              | Path to a file containing per picture QP value                                                                         |
+| **MaxQpAllowed**                   | --max-qp                           | [1-63]           | 63                | Maximum (highest) quantizer, only applicable for VBR and CBR                                                           |
+| **MinQpAllowed**                   | --min-qp                           | [1-62]           | 1                 | Minimum (lowest) quantizer with the max value being max QP value allowed - 1, only applicable for VBR and CBR          |
+| **AdaptiveQuantization**           | --aq-mode                          | [0-2]            | 2                 | Set adaptive QP level [0: off, 1: variance base using AV1 segments, 2: deltaq pred efficiency]                         |
+| **UseFixedQIndexOffsets**          | --use-fixed-qindex-offsets         | [0-2]            | 0                 | Overwrite the encoder default hierarchical layer based QP assignment and use fixed Q index offsets                     |
+| **KeyFrameQIndexOffset**           | --key-frame-qindex-offset          | [-256-255]       | 0                 | Overwrite the encoder default keyframe Q index assignment                                                              |
+| **KeyFrameChromaQIndexOffset**     | --key-frame-chroma-qindex-offset   | [-256-255]       | 0                 | Overwrite the encoder default chroma keyframe Q index assignment                                                       |
+| **LumaYDCQindexOffset**            | --luma-y-dc-qindex-offset          | [-64-63]         | 0                 | Overwrite the encoder default dc Q index offset for luma plane                                                         |
+| **ChromaUDCQindexOffset**          | --chroma-u-dc-qindex-offset        | [-64-63]         | 0                 | Overwrite the encoder default dc Q index offset for chroma Cb plane                                                    |
+| **ChromaUACQindexOffset**          | --chroma-u-ac-qindex-offset        | [-64-63]         | 0                 | Overwrite the encoder default ac Q index offset for chroma Cb plane                                                    |
+| **ChromaVDCQindexOffset**          | --chroma-v-dc-qindex-offset        | [-64-63]         | 0                 | Overwrite the encoder default dc Q index offset for chroma Cr plane                                                    |
+| **ChromaVACQindexOffset**          | --chroma-v-ac-qindex-offset        | [-64-63]         | 0                 | Overwrite the encoder default ac Q index offset for chroma Cr plane                                                    |
+| **QIndexOffsets**                  | --qindex-offsets                   | any string       | `0,0,..,0`        | list of luma Q index offsets per hierarchical layer, separated by `,` with each offset in the range of [-256-255]      |
+| **ChromaQIndexOffsets**            | --chroma-qindex-offsets            | any string       | `0,0,..,0`        | list of chroma Q index offsets per hierarchical layer, separated by `,` with each offset in the range of [-256-255]    |
+| **UnderShootPct**                  | --undershoot-pct                   | [0-100]          | 25                | Allowable datarate undershoot (min) target (%), default depends on the rate control mode                               |
+| **OverShootPct**                   | --overshoot-pct                    | [0-100]          | 25                | Allowable datarate overshoot (max) target (%), default depends on the rate control mode                                |
+| **BufSz**                          | --buf-sz                           | [20-10000]       | 6000              | Client buffer size (ms), only applicable for CBR                                                                       |
+| **BufInitialSz**                   | --buf-initial-sz                   | [20-10000]       | 4000              | Client initial buffer size (ms), only applicable for CBR                                                               |
+| **BufOptimalSz**                   | --buf-optimal-sz                   | [20-10000]       | 5000              | Client optimal buffer size (ms), only applicable for CBR                                                               |
+| **RecodeLoop**                     | --recode-loop                      | [0-4]            | 4                 | Recode loop level, look at the "Recode loop level table" in the user's guide for more info [0: off, 4: preset based]   |
+| **VBRBiasPct**                     | --bias-pct                         | [0-100]          | 100               | CBR/VBR bias [0: CBR-like, 100: VBR-like]                                                                              |
+| **MinSectionPct**                  | --minsection-pct                   | [0-100]          | 0                 | GOP min bitrate (expressed as a percentage of the target rate)                                                         |
+| **MaxSectionPct**                  | --maxsection-pct                   | [0-10000]        | 2000              | GOP max bitrate (expressed as a percentage of the target rate)                                                         |
+| **EnableQM**                       | --enable-qm                        | [0-1]            | 0                 | Enable quantisation matrices                                                                                           |
+| **MinQmLevel**                     | --qm-min                           | [0-15]           | 8                 | Min quant matrix flatness                                                                                              |
+| **MaxQmLevel**                     | --qm-max                           | [0-15]           | 15                | Max quant matrix flatness                                                                                              |
+
+### **UseFixedQIndexOffsets** and more information
+
+`UseFixedQIndexOffsets` and its associated arguments (`HierarchicalLevels`,
+`QIndexOffsets`, `ChromaQIndexOffsets`, `KeyFrameQIndexOffset`,
+`KeyFrameChromaQIndexOffset`) are used together to specify the qindex offsets
+based on frame type and temporal layer when rc is set to 0.
+
+QP value specified by the `--qp` argument is assigned to the pictures at the
+highest temporal layer. It is first converted to a qindex, then the
+corresponding qindex offsets are added on top of it based on the frame types
+(Key/Inter) and temporal layer id.
+
+Qindex offset can be negative. The final qindex value will be clamped within
+the valid min/max qindex range.
+
+For chroma plane, after deciding the qindex for the luma plane, the
+corresponding chroma qindex offsets are added on top of the luma plane qindex
+based on frame types and temporal layer id.
+
+`--qindex-offsets` and `--chroma-qindex-offsets` have to be used after the
+`--hierarchical-levels` parameter. The number of qindex offsets should be
+`HierarchicalLevels` plus 1, and they can be enclosed in `[]` to separate the
+list.
+
+An example command line is:
+
+```bash
+SvtAv1EncApp -i in.y4m -b out.ivf --rc 0 -q 42 --hierarchical-levels 3 --use-fixed-qindex-offsets 1 --qindex-offsets [-12,-8,-4,0] --key-frame-qindex-offset -20 --key-frame-chroma-qindex-offset -6 --chroma-qindex-offsets [-6,0,12,24]
+```
+
+For this command line, corresponding qindex values are:
+
+| **Frame Type**   | **Luma qindex** | **Chroma qindex** |
+|------------------|-----------------|-------------------|
+| **Key Frame**    | 148 (42x4 - 20) | 142 (148 - 6)     |
+| **Layer0 Frame** | 156 (42x4 - 12) | 150 (156 - 6)     |
+| **Layer1 Frame** | 160 (42x4 - 8)  | 160 (160 + 0)     |
+| **Layer2 Frame** | 164 (42x4 - 4)  | 176 (164 + 12)    |
+| **Layer3 Frame** | 168 (42x4 + 0)  | 192 (168 + 24)    |
+
+### **EnableQM** and more information
+
+With `EnableQM`, `MinQmLevel` and `MaxQmLevel`, user can customize the quantization
+matrix used in quantization procedure instead of using the default one. With the default
+quantization matrix, all coefficients share the same weight, whereas with non-default ones,
+coefficients can have different weight through the settings made by users. The deviation
+of weight (or flatness, equivalently) is controlled by arguments `MinQmLevel` and `MaxQmLevel`.
+There are sixteen quantization matrix levels, ranging from level 0 to level 15. The lower
+the level is the larger deviation of weight the quantization matrix will provide. Level 15
+is fully flat in weight and is set as the default quantization matrix. A lower level
+quantization matrix typically results in bitstreams with lower bitrate and slightly worse
+quality in CRF rate control mode. The reduction in bitrate is more obvious with low CRF
+than high CRF.
+
+The quantization matrices feature signals at frame level. When the feature is enabled,
+the encoder decides each frame’s quantization matrix level by normalizing its qindex to
+user specified quantization matrix level range (from `MinQmLevel` to `MaxQmLevel`).
+
+An example command line is:
+
+```bash
+SvtAv1EncApp -i in.y4m -b out.ivf --keyint -1 --enable-qm 1 --qm-min 0 --qm-max 15
+```
+
+### Recode loop level table
+
+| level | description                                                                     |
+|-------|---------------------------------------------------------------------------------|
+| 0     | Off                                                                             |
+| 1     | Allow recode for KF and exceeding maximum frame bandwidth                       |
+| 2     | Allow recode only for key frames, alternate reference frames, and Golden frames |
+| 3     | Allow recode for all frame types based on bitrate constraints                   |
+| 4     | Preset based decision                                                           |
+
+
+### Multi-pass Options
+
+| **Configuration file parameter** | **Command line** | **Range**      | **Default**        | **Description**                                                                                   |
+|----------------------------------|------------------|----------------|--------------------|---------------------------------------------------------------------------------------------------|
+| **Pass**                         | --pass           | [0-3]          | 0                  | Multi-pass selection [0: single pass encode, 1: first pass, 2: second pass, 3: third pass]        |
+| **Stats**                        | --stats          | any string     | "svtav1_2pass.log" | Filename for multi-pass encoding                                                                  |
+| **Passes**                       | --passes         | [1-2]          | 1                  | Number of encoding passes, default is preset dependent [1: one pass encode, 2: multi-pass encode] |
+
+#### **Pass** information
+
+| **Pass** | **Stats** io            |
+|----------|-------------------------|
+| 0        | ""                      |
+| 1        | "w"                     |
+| 2        | "rw" if 3-pass else "r" |
+| 3        | "r"                     |
+
+`--pass 3` is only available for non-crf modes and all passes except single-pass requires the `--stats` parameter to point to a valid path
+
+### GOP size and type Options
+
+| **Configuration file parameter** | **Command line**      | **Range**       | **Default** | **Description**                                                                                                           |
+|----------------------------------|-----------------------|-----------------|-------------|---------------------------------------------------------------------------------------------------------------------------|
+| **Keyint**                       | --keyint              | [-2-`(2^31)-1`] | -2          | GOP size (frames), use `s` suffix for seconds (SvtAv1EncApp only) [-2: ~5 seconds, -1: "infinite" only for CRF, 0: == -1] |
+| **IntraRefreshType**             | --irefresh-type       | [1-2]           | 2           | Intra refresh type [1: FWD Frame (Open GOP), 2: KEY Frame (Closed GOP)]                                                   |
+| **SceneChangeDetection**         | --scd                 | [0-1]           | 0           | Scene change detection control                                                                                            |
+| **Lookahead**                    | --lookahead           | [-1,0-120]      | -1          | Number of frames in the future to look ahead, beyond minigop, temporal filtering, and rate control [-1: auto]             |
+| **HierarchicalLevels**           | --hierarchical-levels | [3-5]           | 4           | Set hierarchical levels beyond the base layer [3: 4 temporal layers, 5: 6 temporal layers]                                |
+| **PredStructure**                | --pred-struct         | [1-2]           | 2           | Set prediction structure [1: low delay, 2: random access]                                                                 |
+
+### AV1 Specific Options
+
+| **Configuration file parameter**   | **Command line**       | **Range**        | **Default**   | **Description**                                                                                                                                                         |
+| ---------------------------------- | ---------------------- | ---------------- | ------------- | ----------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
+| **TileRow**                        | --tile-rows            | [0-6]            | 0             | Number of tile rows to use, `TileRow == log2(x)`, default changes per resolution                                                                                        |
+| **TileCol**                        | --tile-columns         | [0-4]            | 0             | Number of tile columns to use, `TileCol == log2(x)`, default changes per resolution                                                                                     |
+| **LoopFilterEnable**               | --enable-dlf           | [0-1]            | 1             | Deblocking loop filter control                                                                                                                                          |
+| **CDEFLevel**                      | --enable-cdef          | [0-1]            | 1             | Enable Constrained Directional Enhancement Filter                                                                                                                       |
+| **EnableRestoration**              | --enable-restoration   | [0-1]            | 1             | Enable loop restoration filter                                                                                                                                          |
+| **EnableTPLModel**                 | --enable-tpl-la        | [0-1]            | 1             | Temporal Dependency model control, currently forced on library side, only applicable for CRF/CQP                                                                        |
+| **Mfmv**                           | --enable-mfmv          | [-1-1]           | -1            | Motion Field Motion Vector control [-1: auto]                                                                                                                           |
+| **EnableTF**                       | --enable-tf            | [0-1]            | 1             | Enable ALT-REF (temporally filtered) frames                                                                                                                             |
+| **EnableOverlays**                 | --enable-overlays      | [0-1]            | 0             | Enable the insertion of overlayer pictures which will be used as an additional reference frame for the base layer picture                                               |
+| **ScreenContentMode**              | --scm                  | [0-2]            | 2             | Set screen content detection level [0: off, 1: on, 2: content adaptive]                                                                                                 |
+| **RestrictedMotionVector**         | --rmv                  | [0-1]            | 0             | Restrict motion vectors from reaching outside the picture boundary                                                                                                      |
+| **FilmGrain**                      | --film-grain           | [0-50]           | 0             | Enable film grain [0: off, 1-50: level of denoising for film grain]                                                                                                     |
+| **FilmGrainDenoise**               | --film-grain-denoise   | [0-1]            | 1             | Apply denoising when film grain is ON, default is 1 [0: no denoising, film grain data sent in frame header, 1: level of denoising is set by the film-grain parameter]   |
+| **SuperresMode**                   | --superres-mode        | [0-4]            | 0             | Enable super-resolution mode, refer to the super-resolution section below for more info                                                                                 |
+| **SuperresDenom**                  | --superres-denom       | [8-16]           | 8             | Super-resolution denominator, only applicable for mode == 1 [8: no scaling, 16: half-scaling]                                                                           |
+| **SuperresKfDenom**                | --superres-kf-denom    | [8-16]           | 8             | Super-resolution denominator for key frames, only applicable for mode == 1 [8: no scaling, 16: half-scaling]                                                            |
+| **SuperresQthres**                 | --superres-qthres      | [0-63]           | 43            | Super-resolution q-threshold, only applicable for mode == 3                                                                                                             |
+| **SuperresKfQthres**               | --superres-kf-qthres   | [0-63]           | 43            | Super-resolution q-threshold for key frames, only applicable for mode == 3                                                                                              |
+| **SframeInterval**                 | --sframe-dist          | [0-`(2^31)-1`]   | 0             | S-Frame interval (frames) [0: OFF, > 0: ON]                                                                                                                             |
+| **SframeMode**                     | --sframe-mode          | [1-2]            | 2             | S-Frame insertion mode [1: the considered frame will be made into an S-Frame only if it is an altref frame, 2: the next altref frame will be made into an S-Frame]      |
+| **ResizeMode**                     | --resize-mode          | [0-4]            | 0             | Enable reference scaling mode                                                                                                                                           |
+| **ResizeDenom**                    | --resize-denom         | [8-16]           | 8             | Reference scaling denominator, only applicable for mode == 1 [8: no scaling, 16: half-scaling]                                                                          |
+| **ResizeKfDenom**                  | --resize-kf-denom      | [8-16]           | 8             | Reference scaling denominator for key frames, only applicable for mode == 1 [8: no scaling, 16: half-scaling]                                                           |
+
+#### **Super-Resolution**
+
+Super resolution is better described in [the Super-Resolution documentation](./Appendix-Super-Resolution.md),
+but this basically allows the input to be encoded at a lower resolution,
+horizontally, but then later upscaled back to the original resolution by the
+decoder.
+
+| **SuperresMode** | **Value**                                                                                                                   |
+|------------------|-----------------------------------------------------------------------------------------------------------------------------|
+| 0                | None, no frame super-resolution allowed                                                                                     |
+| 1                | All frames are encoded at the specified scale of 8/`denom`, thus a `denom` of 8 means no scaling, and 16 means half-scaling |
+| 2                | All frames are coded at a random scale                                                                                      |
+| 3                | Super-resolution scale for a frame is determined based on the q_index, a qthreshold of 63 means no scaling                  |
+| 4                | Automatically select the super-resolution mode for appropriate frames                                                       |
+
+The performance of the encoder will be affected for all modes other than mode
+0. And for mode 4, it should be noted that the encoder will run at least twice,
+one for down scaling, and another with no scaling, and then it will choose the
+best one for each of the appropriate frames.
+
+For more information on the decision-making process,
+please look at [section 2.2 of the super-resolution doc](./Appendix-Super-Resolution.md#22-determination-of-the-downscaling-factor)
+
+#### **Reference Scaling**
+
+Reference Scaling is better described in [the reference scaling documentation](./Appendix-Reference-Scaling.md),
+but this basically allows the input to be encoded and the output at a lower
+resolution, scaling ratio applys on both horizontally and vertically.
+
+| **ResizeMode** | **Value**                                                                                                                   |
+|------------------|-----------------------------------------------------------------------------------------------------------------------------|
+| 0                | None, no frame resize allowed                                                                                            |
+| 1                | Fixed mode, all frames are encoded at the specified scale of 8/`denom`, thus a `denom` of 8 means no scaling, and 16 means half-scaling |
+| 2                | Random mode, all frames are coded at a random scale, the scaling `denom` can be picked from 8 to 16                        |
+| 3                | Dynamic mode, scale for a frame is determined based on buffer level and average qp in rate control, scaling ratio can be 3/4 or 1/2. This mode can only work in 1-pass CBR low-delay mode                  |
+| 4                | Random access mode, scaling is controlled by scale events, which determine scaling in a specified scaling `denom` or recover to original resolution                                                       |
+
+Example CLI of reference scaling dynamic mode:
+> -i input.yuv -b output.ivf --resize-mode 3 --rc 2 --pred-struct 1 --tbr 1000
+
+TODO: Random access mode is not available until scaling event parameter is
+supported. An example will be added here to guide using random access mode.
+
+### Color Description Options
+
+| **Configuration file parameter**   | **Command line**             | **Range**    | **Default**   | **Description**                                                                                                                            |
+| ---------------------------------- | ---------------------------- | ------------ | ------------- | ------------------------------------------------------------------------------------------------------------------------------------------ |
+| **ColorPrimaries**                 | --color-primaries            | [0-12, 22]   | 2             | Color primaries, refer to the user guide Appendix A.2 for full details                                                                     |
+| **TransferCharacteristics**        | --transfer-characteristics   | [0-22]       | 2             | Transfer characteristics, refer to the user guide Appendix A.2 for full details                                                            |
+| **MatrixCoefficients**             | --matrix-coefficients        | [0-14]       | 2             | Matrix coefficients, refer to the user guide Appendix A.2 for full details                                                                 |
+| **ColorRange**                     | --color-range                | [0-1]        | 0             | Color range [0: Studio, 1: Full]                                                                                                           |
+| **ChromaSamplePosition**           | --chroma-sample-position     | any string   | unknown       | Chroma sample position ['unknown', 'vertical'/'left', 'colocated'/'topleft']                                                               |
+| **MasteringDisplay**               | --mastering-display          | any string   | none          | Mastering display metadata in the format of "G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)", refer to the user guide Appendix A.2 for full details   |
+| **ContentLightLevel**              | --content-light              | any string   | none          | Set content light level in the format of "max_cll,max_fall", refer to the user guide Appendix A.2 for full details                         |
+
+## Appendix A Encoder Parameters
+
+### 1. Thread management parameters
+
+`LogicalProcessors` (`--lp`) and `TargetSocket` (`--ss`) parameters are used to
+management thread affinity on Windows and Ubuntu OS. These are some examples
+how you use them together.
+
+If `LogicalProcessors` and `TargetSocket` are not set, threads are managed by
+OS thread scheduler.
+
+`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --lp 40`
+
+If only `LogicalProcessors` is set, threads run on 40 logical processors.
+Threads may run on dual sockets if 40 is larger than logical processor number
+of a socket.
+
+NOTE: On Windows, thread affinity can be set only by group on system with more
+than 64 logical processors. So, if 40 is larger than logical processor number
+of a single socket, threads run on all logical processors of both sockets.
+
+`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --ss 1`
+
+If only `TargetSocket` is set, threads run on all the logical processors of
+socket 1.
+
+`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --lp 20 --ss 0`
+
+If both `LogicalProcessors` and `TargetSocket` are set, threads run on 20
+logical processors of socket 0. Threads guaranteed to run only on socket 0 if
+20 is larger than logical processor number of socket 0.
+
+The (`--pin`) option allows the user to pin/unpin the execution to/from a
+specific number of cores.
+
+The combinational use of (`--pin`) with (`--lp`) results in memory reduction
+while allowing the execution to work on any of the cores and not restrict it to
+specific cores.
+
+This is an example on how to use them together.
+
+so -lp 4 with --pin 1 would restrict the encoder to work on cpu0-3 and reduce
+the resource allocation to only what's needed to using 4 cores. --lp 4 with
+--pin 1, would reduce the allocation to what's needed for 4 cores but not
+restrict the encoder to run on cpu 0-3, in this case the encoder might end up
+using more than 4 cores due to the multi-threading nature of the encoder, but
+would at least allow for more multiple -lp4 encodes to run on the same machine
+without them being all restricted to run on cpu 0-3 or overflow the memory
+usage.
+
+Example: 72 core machine:
+
+72 jobs x --lp 1 --pin 0 (In order to maximize the CPU utilization 72 jobs are run simultaneously with each job utilitizing 1 core without being pined to a specific core)
+
+36 jobs x --lp 2 --pin 1
+
+18 jobs x --lp 4 --pin 1
+
+(`--ss`) and (`--pin 0`) is not a valid combination.(`--pin`) is overwritten to 1 when (`-ss`) is used.
+
+### 2. AV1 metadata
+
+Please see the subsection 6.4.2, 6.7.3, and 6.7.4 of the [AV1 Bitstream & Decoding Process Specification](https://aomediacodec.github.io/av1-spec/av1-spec.pdf) for more details on some expected values.
+
+The available options for `ColorPrimaries` (`--color-primaries`) are:
+
+- 1: `bt709`, BT.709
+- 2: unspecified, default
+- 4: `bt470m`, BT.470 System M (historical)
+- 5: `bt470bg`, BT.470 System B, G (historical)
+- 6: `bt601`, BT.601
+- 7: `smpte240`, SMPTE 240
+- 8: `film`, Generic film (color filters using illuminant C)
+- 9: `bt2020`, BT.2020, BT.2100
+- 10: `xyz`, SMPTE 428 (CIE 1921 XYZ)
+- 11: `smpte431`, SMPTE RP 431-2
+- 12: `smpte432`, SMPTE EG 432-1
+- 22: `ebu3213`, EBU Tech. 3213-E
+
+The available options for `TransferCharacteristics` (`--transfer-characteristics`) are:
+
+- 1: `bt709`, BT.709
+- 2: unspecified, default
+- 4: `bt470m`, BT.470 System M (historical)
+- 5: `bt470bg`, BT.470 System B, G (historical)
+- 6: `bt601`, BT.601
+- 7: `smpte240`, SMPTE 240 M
+- 8: `linear`, Linear
+- 9: `log100`, Logarithmic (100 : 1 range)
+- 10: `log100-sqrt10`, Logarithmic (100 * Sqrt(10) : 1 range)
+- 11: `iec61966`, IEC 61966-2-4
+- 12: `bt1361`, BT.1361
+- 13: `srgb`, sRGB or sYCC
+- 14: `bt2020-10`, BT.2020 10-bit systems
+- 15: `bt2020-12`, BT.2020 12-bit systems
+- 16: `smpte2084`, SMPTE ST 2084, ITU BT.2100 PQ
+- 17: `smpte428`, SMPTE ST 428
+- 18: `hlg`, BT.2100 HLG, ARIB STD-B67
+
+The available options for `MatrixCoefficients` (`--matrix-coefficients`) are:
+
+- 0: `identity`, Identity matrix
+- 1: `bt709`, BT.709
+- 2: unspecified, default
+- 4: `fcc`, US FCC 73.628
+- 5: `bt470bg`, BT.470 System B, G (historical)
+- 6: `bt601`, BT.601
+- 7: `smpte240`, SMPTE 240 M
+- 8: `ycgco`, YCgCo
+- 9: `bt2020-ncl`, BT.2020 non-constant luminance, BT.2100 YCbCr
+- 10: `bt2020-cl`, BT.2020 constant luminance
+- 11: `smpte2085`, SMPTE ST 2085 YDzDx
+- 12: `chroma-ncl`, Chromaticity-derived non-constant luminance
+- 13: `chroma-cl`, Chromaticity-derived constant luminance
+- 14: `ictcp`, BT.2100 ICtCp
+
+The available options for `ColorRange` (`--color-range`) are:
+
+- 0: `studio`, default
+- 1: `full`
+
+The available options for `ChromaSamplePosition` (`--chroma-sample-position`) are:
+
+- 0: `unknown`, default
+- 1: `vertical`/`left`, horizontally co-located with luma samples, vertical position in
+the middle between two luma samples
+- 2: `colocated`/`topleft`, co-located with luma samples
+
+`MasteringDisplay` (`--mastering-display`) and `ContentLightLevel` (`--content-light`) parameters are used to set the mastering display and content light level in the AV1 bitstream.
+
+`MasteringDisplay` takes the format of `G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)` where
+
+- `G(x,y)` is the green channel of the mastering display
+- `B(x,y)` is the blue channel of the mastering display
+- `R(x,y)` is the red channel of the mastering display
+- `WP(x,y)` is the white point of the mastering display
+- `L(max,min)` is the light level of the mastering display
+
+The `x` and `y` values can be coordinates from 0.0 to 1.0, as specified in CIE 1931 while the min,max values can be floating point values representing candelas per square meter, or nits.
+For the `max,min` values, they are generally specified in the range of 0.0 to 1.0, but there are no constraints on the provided values.
+Invalid values will be clipped accordingly.
+
+`ContentLightLevel` takes the format of `max_cll,max_fall` where both values are integers clipped into a range of 0 to 65535.
+
+Examples:
+
+```bash
+SvtAv1EncApp -i in.y4m -b out.ivf \
+    --mastering-display "G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1)" \
+    --content-light 100,50 \
+    --color-primaries bt2020 \
+    --transfer-characteristics smpte2084 \
+    --matrix-coefficients bt2020-ncl \
+    --chroma-sample-position topleft
+    # Color primary is BT.2020, BT.2100
+    # Transfer characteristic is SMPTE ST 2084, ITU BT.2100 PQ
+    # matrix coefficients is BT.2020 non-constant luminance, BT.2100 YCbCr
+
+# or
+
+ffmpeg -y -i in.mp4 \
+  -strict -2 \
+  -c:a opus \
+  -c:v libsvtav1 \
+  -color_primaries:v bt2020 \
+  -color_trc:v smpte2084 \
+  -color_range:v pc \
+  -chroma_sample_location:v topleft \
+  -svtav1-params \
+    "mastering-display=G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1):\
+    content-light=100,50:\
+    matrix-coefficients=bt2020-ncl:\
+    chroma-sample-position=topleft" \
+  out.mp4
+# chroma-sample-position needs to be repeated because it currently isn't set ffmpeg's side
+```
diff -pruN 0.9.1+dfsg-1/Docs/README.md 1.2.0+dfsg-2/Docs/README.md
--- 0.9.1+dfsg-1/Docs/README.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/README.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,10 +1,22 @@
-# SVT-AV1 Documentation
+[Top level](../README.md)
 
-**User Guides**:
-  * [SVT-AV1 Encoder User Guide](svt-av1_encoder_user_guide.md)
-  * [SVT-AV1 Decoder User Guide](svt-av1_decoder_user_guide.md)
+# Documentation
 
-**Detailed Encoder Documentation**:
+## Guides
+- [System Requirements](System-Requirements.md)
+- [How to run SVT-AV1 within ffmpeg](Ffmpeg.md)
+- [Standalone Encoder Usage](svt-av1_encoder_user_guide.md)
+- [Decoder Usage](svt-av1_decoder_user_guide.md)
+- [List of All Parameters](Parameters.md)
+- [Build Guide](Build-Guide.md)
+- [SVT-AV1 Contribution Guide](Contribute.md)
+- [Common Questions/Issues](CommonQuestions.md)
+
+## Design Documents
+- [Encoder Design](svt-av1-encoder-design.md)
+- [Decoder Design](svt-av1-decoder-design.md)
+
+## Technical Appendices
 - [Adaptive Prediction Structure Appendix](Appendix-Adaptive-Prediction-Structure.md)
 - [Altref and Overlay Pictures Appendix](Appendix-Alt-Refs.md)
 - [CDEF Appendix](Appendix-CDEF.md)
@@ -28,3 +40,4 @@
 - [Super-resolution Appendix](Appendix-Super-Resolution.md)
 - [Temporal Dependency Model](Appendix-TPL.md)
 - [Transform Search Appendix](Appendix-TX-Search.md)
+- [Reference Scaling Appendix](Appendix-Reference-Scaling.md)
diff -pruN 0.9.1+dfsg-1/Docs/svt-av1-decoder-design.md 1.2.0+dfsg-2/Docs/svt-av1-decoder-design.md
--- 0.9.1+dfsg-1/Docs/svt-av1-decoder-design.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/svt-av1-decoder-design.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Decoder Design for SVT-AV1 (Scalable Video Technology for AV1)
 
 ## Table of Contents
@@ -45,11 +47,20 @@
 
 ## Introduction
 
-This document describes the Intel SVT-AV1 decoder design. In particular, the decoder block diagram and multi-threading aspects are described. Besides, the document contains brief descriptions of the SVT-AV1 decoder modules such as parse, reconstruction, etc. This document is meant to be an accompanying document to the &quot;C Model&quot; source code, which contains the more specific details of the inner working of the decoder.
+This document describes the Intel SVT-AV1 decoder design. In particular, the
+decoder block diagram and multi-threading aspects are described. Besides, the
+document contains brief descriptions of the SVT-AV1 decoder modules such as
+parse, reconstruction, etc. This document is meant to be an accompanying
+document to the &quot;C Model&quot; source code, which contains the more
+specific details of the inner working of the decoder.
 
 ## High-level decoder architecture
 
-The high-level decoder pipeline is shown in Figure 1. Further details on individual stages are given in subsequent sections. The multi-threading aspect of the decoder is also explained in detail in a separate section. The details of high-level data structures and frame buffers used in the decoder are also covered briefly later.
+The high-level decoder pipeline is shown in Figure 1. Further details on
+individual stages are given in subsequent sections. The multi-threading aspect
+of the decoder is also explained in detail in a separate section. The details
+of high-level data structures and frame buffers used in the decoder are also
+covered briefly later.
 
 The major modules of the SVT-AV1 decoder are:
 
@@ -65,9 +76,14 @@ The major modules of the SVT-AV1 decoder
 
 ### Bit-Stream Parse
 
-The Parse stage does all tasks related to OBU reading, arithmetic decoding and related mv prediction (such as find\_warp\_samples, etc.) that produce the necessary mode info and residual data info.
-
-The header level parsing of sequence parameters and frame parameters happens separately via read\_sequence\_header\_obu andread\_frame\_header\_obufunctions. Parse Tile module does the parsing of tile group obu data. Figure 2 shows a typical flow of the parse stage.
+The Parse stage does all tasks related to OBU reading, arithmetic decoding and
+related mv prediction (such as find\_warp\_samples, etc.) that produce the
+necessary mode info and residual data info.
+
+The header level parsing of sequence parameters and frame parameters happens
+separately via read\_sequence\_header\_obu
+andread\_frame\_header\_obufunctions. Parse Tile module does the parsing of
+tile group obu data. Figure 2 shows a typical flow of the parse stage.
 
 **Input** : Bitstream buffer
 
@@ -79,13 +95,19 @@ The header level parsing of sequence par
 <a name = "figure-2"></a>
 ##### Figure 2: Parse stage Flow
 
-parse\_frame\_tiles() is the function that will trigger the parsing module in SVT-AV1 decoder. Parsing for each tile will be called by start\_parse\_tile(). Then parsing happen for each superblock in a tile by calling the function parse\_super\_block().
+parse\_frame\_tiles() is the function that will trigger the parsing module in
+SVT-AV1 decoder. Parsing for each tile will be called by start\_parse\_tile().
+Then parsing happen for each superblock in a tile by calling the function
+parse\_super\_block().
 
 Note: The prediction for palette mode happens during the Parse stage itself.
 
 ### Reconstruction
 
-This stage performs prediction, coefficient inverse scan, residual inverse quantization, inverse transform and finally generated the reconstructed data without applying post-processing filters. Figure 3 shows a typical flow of the reconstruction stage.
+This stage performs prediction, coefficient inverse scan, residual inverse
+quantization, inverse transform and finally generated the reconstructed data
+without applying post-processing filters. Figure 3 shows a typical flow of the
+reconstruction stage.
 
 **Input** : ModeInfo buffer, TransformInfo buffer, Coeff buffer
 
@@ -93,19 +115,27 @@ This stage performs prediction, coeffici
 
 ![image3](./img/decoder_reconstruction_stage.png)
 <a name = "figure-3"></a>
-##### Figure 3: Reconstruction stage Flowdecode\_frame\_tiles() function starts reconstruction at frame level. Then decode\_tile\_job()  is called for each tile.
+##### Figure 3: Reconstruction stage Flowdecode\_frame\_tiles() function starts reconstruction at frame level. Then decode\_tile\_job() is called for each tile.
 
 For each Superblock in a tile, decode\_super\_block() function will be called.
 
-The total number of blocks inside a superblock and their corresponding **mode\_info** structure are stored while parsing. This helps to avoid calling of decode\_block() function recursively.
+The total number of blocks inside a superblock and their corresponding
+**mode\_info** structure are stored while parsing. This helps to avoid calling
+of decode\_block() function recursively.
 
 Note: The prediction for palette mode happens during the Parse stage itself.
 
-Note: In single thread mode, decode\_super\_block() will be called immediately after every parse\_super\_block() for better cache efficiency. In this mode decode\_frame\_tiles() will be completely avoided.
+Note: In single thread mode, decode\_super\_block() will be called immediately
+after every parse\_super\_block() for better cache efficiency. In this mode
+decode\_frame\_tiles() will be completely avoided.
 
 ### Loop Filter
 
-The loop filter function is to eliminate (or at least reduce) visually objectionable artifacts associated with the semi-independence of the coding of super blocks and their constituent sub-blocks as per section 7.14 of AV1 spec. This stage applies the loop filter for the entire frame. Flow diagram for Loop Filter is below in Figure 4.
+The loop filter function is to eliminate (or at least reduce) visually
+objectionable artifacts associated with the semi-independence of the coding of
+super blocks and their constituent sub-blocks as per section 7.14 of AV1 spec.
+This stage applies the loop filter for the entire frame. Flow diagram for Loop
+Filter is below in Figure 4.
 
 ![image4](./img/decoder_loop_filter_stage.png)
 <a name = "figure-4"></a>
@@ -115,7 +145,7 @@ The loop filter function is to eliminate
 
 **Output** : Loop Filtered frame.
 
-1. eb\_av1\_loop\_filter\_frame\_init() Initialization of loop filter parameters is performed  here.
+1. eb\_av1\_loop\_filter\_frame\_init() Initialization of loop filter parameters is performed here.
 2. Dec\_loop\_filter\_sb()
   1. Apply dec\_av1\_filter\_block\_plane\_vert()
     * Loop through each block in SB.
@@ -139,7 +169,9 @@ The loop filter function is to eliminate
 
 ### CDEF
 
-The CDEF performs deringing based on the detected direction of blocks as per section 7.15 of AV1 spec. This stage applies the CDEF for the entire frame. The flow diagram for the CDEF is shown in Figure 7.
+The CDEF performs deringing based on the detected direction of blocks as per
+section 7.15 of AV1 spec. This stage applies the CDEF for the entire frame. The
+flow diagram for the CDEF is shown in Figure 7.
 
 ![image7](./img/decoder_cdef_stage.png)
 <a name = "figure-7"></a>
@@ -152,10 +184,10 @@ The CDEF performs deringing based on the
 Steps involved in CDEF:
 
 1. svt\_cdef\_frame()function will be called to start CDEF for a frame.
-2.  For each 64x64 superblock function svt\_cdef\_block()will be called.
+2. For each 64x64 superblock function svt\_cdef\_block()will be called.
   1. The number of non-skip 8x8 blocks calculated.
   2. Store the 3-pixel rows of next SB&#39;s in line and column buffer to do CDEF in original pixels.
-  3. Call eb\_cdef\_filter\_fb()  for each 8x8 non-skip block
+  3. Call eb\_cdef\_filter\_fb() for each 8x8 non-skip block
     * Find the direction of each 8x8 block.
     * Filter the 8x8 block according to the identified direction- eb\_cdef\_filter\_block\_c().
     * Store the results in the destination buffer
@@ -167,7 +199,9 @@ Steps involved in CDEF:
 
 ### Loop Restoration
 
-This stage applies the Loop Restoration for the entire frame and the process is defined as per section 7.17 of AV1 spec. The flow diagram for the LR is shown in Figure 9.
+This stage applies the Loop Restoration for the entire frame and the process is
+defined as per section 7.17 of AV1 spec. The flow diagram for the LR is shown
+in Figure 9.
 
 ![image9](./img/decoder_loop_restoration_stage.png)
 <a name = "figure-9"></a>
@@ -182,16 +216,23 @@ Loop Restoration for a frame starts from
 The steps involved are:
 
 1. Call dec\_av1\_loop\_restoration\_filter\_row() for each row of height sb\_size.
-  * call eb\_dec\_av1\_loop\_restoration\_filter\_unit()  for each LR unit of size 64x64
-    1. Use the  stored CDEF/ LF above/below boundaries form neighbor block based on processing row is outer or inner row respectively by calling the function setup\_processing\_stripe\_boundary() .
+  * call eb\_dec\_av1\_loop\_restoration\_filter\_unit() for each LR unit of size 64x64
+    1. Use the stored CDEF/ LF above/below boundaries form neighbor block based on processing row is outer or inner row respectively by calling the function setup\_processing\_stripe\_boundary() .
     2. Apply the LR filter (stripe\_filter) based on the type of unit\_lrtype.
     3. Restore the LR filtered data back to stripe\_buffer by function restore\_processing\_stripe\_boundary().
 
 ## Multi-Threaded Architecture
 
-Parallelism in the decoder could be achieved at multiple levels. Each thread could, for example, be performing a different task in the decoding pipeline. The decoder will use tile level parallelism for tile parsing jobs. Decoder reconstruction jobs will use tile row-level parallelism, whereas all the post-processing filter jobs will use frame row-level parallelism.
-
-Let **N** be the number of threads configured for the decoder. The decoder library created **(N-1)** threads, which are called **worker threads**. The application thread which calls the decode process is called in the **main thread**. Together the decoder will have **N** working threads.
+Parallelism in the decoder could be achieved at multiple levels. Each thread
+could, for example, be performing a different task in the decoding pipeline.
+The decoder will use tile level parallelism for tile parsing jobs. Decoder
+reconstruction jobs will use tile row-level parallelism, whereas all the
+post-processing filter jobs will use frame row-level parallelism.
+
+Let **N** be the number of threads configured for the decoder. The decoder
+library created **(N-1)** threads, which are called **worker threads**. The
+application thread which calls the decode process is called in the **main
+thread**. Together the decoder will have **N** working threads.
 
 The main thread will perform the following inside the decoder:
 
@@ -236,9 +277,14 @@ Figure 11 shows the flow chart of the wo
 
 ### Tile level Parallelism
 
-The decoder will use tile level parallelism for tile parsing jobs. Let **T** be the number of tiles present in Frame\_i and let **L** be the number of threads working on this frame. Each thread will try to pick up a tile parsing job and execute it as shown in Figure 12 and Figure 13 below.
-
-Please note that the thread number and tile number need not match. Each thread can pick any tile based on job availability. The pictures are just for understanding purpose only.
+The decoder will use tile level parallelism for tile parsing jobs. Let **T** be
+the number of tiles present in Frame\_i and let **L** be the number of threads
+working on this frame. Each thread will try to pick up a tile parsing job and
+execute it as shown in Figure 12 and Figure 13 below.
+
+Please note that the thread number and tile number need not match. Each thread
+can pick any tile based on job availability. The pictures are just for
+understanding purpose only.
 
 ![image12](./img/decoder_tile_parallel_l_gt_t.png)
 <a name = "figure-12"></a>
@@ -250,9 +296,17 @@ Please note that the thread number and t
 
 ### Tile Row-level Parallelism
 
-Decoder reconstruction uses tile row-level parallelism. Wavefront Processing (WPP) will be used to handle data dependencies. Figure 14 shows 9 threads reconstructing 4 Tiles in Frame\_i with Tile Row-level parallelism. Each thread picks a Tile row MT job and works in a WPP manner.
-
-Each thread will try to pick a unique tile that has not yet processed any row and continues to pick the tile-row jobs from the same tile until no more jobs are present in the same tile. If all the jobs in current tile are picked, it switches to the new tile with maximum number of jobs to be processed. If a unique tile that has not yet processed any row is not found, it picks the tile with maximum number of jobs to be processed.
+Decoder reconstruction uses tile row-level parallelism. Wavefront Processing
+(WPP) will be used to handle data dependencies. Figure 14 shows 9 threads
+reconstructing 4 Tiles in Frame\_i with Tile Row-level parallelism. Each thread
+picks a Tile row MT job and works in a WPP manner.
+
+Each thread will try to pick a unique tile that has not yet processed any row
+and continues to pick the tile-row jobs from the same tile until no more jobs
+are present in the same tile. If all the jobs in current tile are picked, it
+switches to the new tile with maximum number of jobs to be processed. If a
+unique tile that has not yet processed any row is not found, it picks the tile
+with maximum number of jobs to be processed.
 
 ![image14](./img/decoder_TileRowMT.png)
 <a name = "figure-14"></a>
@@ -260,53 +314,101 @@ Each thread will try to pick a unique ti
 
 ### Frame Row-level Parallelism
 
-All the post-processing filter jobs will use frame row-level parallelism. Wavefront Processing (WPP) will be used to handle data dependencies if required. LF, CDEF, and LR may work with different unit sizes depending on the available parallelism unit instead of SB.
+All the post-processing filter jobs will use frame row-level parallelism.
+Wavefront Processing (WPP) will be used to handle data dependencies if
+required. LF, CDEF, and LR may work with different unit sizes depending on the
+available parallelism unit instead of SB.
 
 ![image15](./img/decoder_5_threads.png)
 <a name = "figure-15"></a>
 ##### Figure 15: Frame\_i with 5 Threads
 
-Figure 15 shows 5 threads applying post-processing filters on Frame\_i. Each thread picks a Frame row MT job and works in a WPP manner.
+Figure 15 shows 5 threads applying post-processing filters on Frame\_i. Each
+thread picks a Frame row MT job and works in a WPP manner.
 
 ### Job Selection and Sync Points in MT
 
-The job selection is controlled using shared memory and mutex. DecMtRowInfo (for Parse Tile, Recon Tile, CDEF Frame, LR Frame), DecMtMotionProjInfo (for Motion Projection), DecMtParseReconTileInfo (for Frame Recon) and DecMtlfFrameInfo (for LF Frame) data structures hold these memory for job selection.
-
-The sync points are controlled using shared memory and mutex. The following are the shared memory used for various syncs inside the decoder stages, like top-right sync.
-
-1. sb\_recon\_row\_parsed: Array to store SB Recon rows in the Tile that have completed the parsing. This will be used for sb decode row start processing. It will be updated after the parsing of each SB row in a tile finished. If the value of this variable is set, the recon of an SB row starts. This check is done before decoding of an SB row in a tile starts inside decode\_tile().
-2. sb\_recon\_completed\_in\_row: Array to store SBs completed in every SB row of Recon stage. Used for top-right sync. It will be updated with number of SBs being reconstructed after a recon of SB finished. If recon of &#39;Top SB&#39; and &#39;top right SB&#39; is done in the previous row, then only decoding of current SB starts. This check is done before the decoding of SB starts inside the function decode\_tile\_row().
-3. sb\_recon\_row\_map: This map is used to store whether the recon of SB row of a tile is finished. Its value is updated after recon of a tile row is done inside decode\_tile() function.  If the recon of &#39;top, top right, current and bottom  SB row&#39; is done, then only LF  of current row starts. This check is done before starting LF inside the function dec\_av1\_loop\_filter\_frame\_mt().
-4. lf\_row\_map: This is an array variable of  SB rows to store whether the LF of the current row is done or not. It will be set after the LF of the current row is done. If the LF of the current and next row is done, then only we start CDEF of the current row. This check is done before CDEF of current row starts inside the function svt\_cdef\_frame\_mt().
-5. cdef\_completed\_for\_row\_map: Array to store whether CDEF of the current row is done or not. It will be set after the CDEF of the current row is done. If the CDEF of current is done, then only we start LR of the current row. This check is done before LR of current row starts inside the function dec\_av1\_loop\_restoration\_filter\_frame\_mt().
-6. Hard-Syncs: The Following are the points where hard syncs, where all threads wait for the completion of the particular stage before going to the next stage, are happening in the decoder.
-  1. Hard Sync after **MV Projection**. svt\_setup\_motion\_field() is the function where this hard-sync happens.
-  2. Hard Sync after **CDEF** only when the upscaling flag is present. svt\_cdef\_frame\_mt() is the function where this hard-sync happens.
-  3. Hard Sync after **LR**. Function where this hard-sync happens is dec\_av1\_loop\_restoration\_filter\_frame\_mt().
+The job selection is controlled using shared memory and mutex. DecMtRowInfo
+(for Parse Tile, Recon Tile, CDEF Frame, LR Frame), DecMtMotionProjInfo (for
+Motion Projection), DecMtParseReconTileInfo (for Frame Recon) and
+DecMtlfFrameInfo (for LF Frame) data structures hold these memory for job
+selection.
+
+The sync points are controlled using shared memory and mutex. The following are
+the shared memory used for various syncs inside the decoder stages, like
+top-right sync.
+
+1. sb\_recon\_row\_parsed: Array to store SB Recon rows in the Tile that have
+   completed the parsing. This will be used for sb decode row start processing.
+   It will be updated after the parsing of each SB row in a tile finished. If
+   the value of this variable is set, the recon of an SB row starts. This check
+   is done before decoding of an SB row in a tile starts inside decode\_tile().
+2. sb\_recon\_completed\_in\_row: Array to store SBs completed in every SB row
+   of Recon stage. Used for top-right sync. It will be updated with number of
+   SBs being reconstructed after a recon of SB finished. If recon of &#39;Top
+   SB&#39; and &#39;top right SB&#39; is done in the previous row, then only
+   decoding of current SB starts. This check is done before the decoding of SB
+   starts inside the function decode\_tile\_row().
+3. sb\_recon\_row\_map: This map is used to store whether the recon of SB row
+   of a tile is finished. Its value is updated after recon of a tile row is
+   done inside decode\_tile() function. If the recon of &#39;top, top right,
+   current and bottom SB row&#39; is done, then only LF of current row starts.
+   This check is done before starting LF inside the function
+   dec\_av1\_loop\_filter\_frame\_mt().
+4. lf\_row\_map: This is an array variable of SB rows to store whether the LF
+   of the current row is done or not. It will be set after the LF of the
+   current row is done. If the LF of the current and next row is done, then
+   only we start CDEF of the current row. This check is done before CDEF of
+   current row starts inside the function svt\_cdef\_frame\_mt().
+5. cdef\_completed\_for\_row\_map: Array to store whether CDEF of the current
+   row is done or not. It will be set after the CDEF of the current row is
+   done. If the CDEF of current is done, then only we start LR of the current
+   row. This check is done before LR of current row starts inside the function
+   dec\_av1\_loop\_restoration\_filter\_frame\_mt().
+6. Hard-Syncs: The Following are the points where hard syncs, where all threads
+   wait for the completion of the particular stage before going to the next
+   stage, are happening in the decoder.
+  1. Hard Sync after **MV Projection**. svt\_setup\_motion\_field() is the
+     function where this hard-sync happens.
+  2. Hard Sync after **CDEF** only when the upscaling flag is present.
+     svt\_cdef\_frame\_mt() is the function where this hard-sync happens.
+  3. Hard Sync after **LR**. Function where this hard-sync happens is
+     dec\_av1\_loop\_restoration\_filter\_frame\_mt().
 
 ## Frame Level Buffers
 
 The following are some important buffers used in the decoder.
 
-| **Structure Description** | **Granularity** |
-| --- | --- |
-| BlockModeInfo | 4x4 |
-| SB info | SB |
-| TransformInfo | 4x4 |
-| Coeff | 4x4 |
-| Delta Q &amp; Delta LF Params | SB |
-| cdef\_strength | 64x64 |
-| p\_mi\_offset | 4x4 |
+| **Structure Description**     | **Granularity** |
+| ---                           | ---             |
+| BlockModeInfo                 | 4x4             |
+| SB info                       | SB              |
+| TransformInfo                 | 4x4             |
+| Coeff                         | 4x4             |
+| Delta Q &amp; Delta LF Params | SB              |
+| cdef\_strength                | 64x64           |
+| p\_mi\_offset                 | 4x4             |
 
 Table 1 Important Frame level buffers
 
 ### BlockModeInfo
 
-This buffer contains block info required for **Recon**. It is allocated for worst-case every 4x4 block for the entire frame.
-
-Even though the buffer is allocated for every 4x4 in the frame, the structure is not replicated for every 4x4 block. Instead, each block has associated with only one structure even if the block size is more than 4x4. A map with an offset from the start is used for neighbor access purposes. This reduced the need for replication of data structure and better cache efficient usage.
+This buffer contains block info required for **Recon**. It is allocated for
+worst-case every 4x4 block for the entire frame.
 
-Figure 15 shows a sample superblock split to multiple blocks, numbered from 0 to 18. So 19 BlockModeInfo structures are **continuously populated** from SB start location, corresponding to each block (Instead of replicating the structures for all the 1024 4x4 blocks).  Assume this is the first SB in the picture, then Figure 16 shows the map with offset for each location in the SB and stored in p\_mi\_offset buffer. This map will be used for deriving neighbor BlockModeInfo structure at any location if needed.
+Even though the buffer is allocated for every 4x4 in the frame, the structure
+is not replicated for every 4x4 block. Instead, each block has associated with
+only one structure even if the block size is more than 4x4. A map with an
+offset from the start is used for neighbor access purposes. This reduced the
+need for replication of data structure and better cache efficient usage.
+
+Figure 15 shows a sample superblock split to multiple blocks, numbered from 0
+to 18. So 19 BlockModeInfo structures are **continuously populated** from SB
+start location, corresponding to each block (Instead of replicating the
+structures for all the 1024 4x4 blocks). Assume this is the first SB in the
+picture, then Figure 16 shows the map with offset for each location in the SB
+and stored in p\_mi\_offset buffer. This map will be used for deriving neighbor
+BlockModeInfo structure at any location if needed.
 
 ![image16](./img/decoder_Nbr.png)
 <a name = "figure-16"></a>
@@ -318,19 +420,24 @@ Figure 15 shows a sample superblock spli
 
 ### SB info
 
-This buffer stores SB related data. It is allocated for each SB for the entire frame.
+This buffer stores SB related data. It is allocated for each SB for the entire
+frame.
 
 ### TransformInfo
 
-Transform info of a TU unit is stored in this buffer. It is allocated for each TU unit, the worst case for each 4x4 in a frame.
+Transform info of a TU unit is stored in this buffer. It is allocated for each
+TU unit, the worst case for each 4x4 in a frame.
 
 ### Coeff
 
-This buffer contains coeff of each mi\_unit (4x4). Each mi\_unit contains 16 coeffs. For ST, it is allocated for each 4x4 unit for an SB, whereas for MT it is at each 4x4 for the entire frame.
+This buffer contains coeff of each mi\_unit (4x4). Each mi\_unit contains 16
+coeffs. For ST, it is allocated for each 4x4 unit for an SB, whereas for MT it
+is at each 4x4 for the entire frame.
 
 ### delta\_q
 
- This buffer is used to store delat\_q params and is allocated at the SB level for the entire frame.
+ This buffer is used to store delat\_q params and is allocated at the SB level
+ for the entire frame.
 
 ### Delta\_lf
 
@@ -394,7 +501,7 @@ The following are the high-level data st
     + 1: Indicates that the allow\_warped\_motion syntax element may be present
     + 0: Indicates that the allow\_warped\_motion syntax element will not be present
   * **uint8\_t film\_grain\_params\_present** Specifies whether film grain parameters are present in the coded video sequence
-  * **uint8\_t frame\_height\_bits**  Specifies the number of bits minus 1 used for transmitting the frame height syntax elements
+  * **uint8\_t frame\_height\_bits** Specifies the number of bits minus 1 used for transmitting the frame height syntax elements
   * **uint8\_t frame\_id\_length** Used to calculate the number of bits used to encode the frame\_id syntax element.
   * **uint8\_t frame\_id\_numbers\_present\_flag** Specifies whether frame id numbers are present in the coded video sequence
   * **uint8\_t frame\_width\_bits** Specifies the number of bits minus 1 used for transmitting the frame width syntax elements
diff -pruN 0.9.1+dfsg-1/Docs/svt-av1_decoder_user_guide.md 1.2.0+dfsg-2/Docs/svt-av1_decoder_user_guide.md
--- 0.9.1+dfsg-1/Docs/svt-av1_decoder_user_guide.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/svt-av1_decoder_user_guide.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Scalable Video Technology for AV1 Decoder (SVT-AV1 Decoder) User Guide
 
 ## Table of Contents
@@ -8,15 +10,22 @@
 
 ## Introduction
 
-This document describes how to use the Scalable Video Technology for AV1 Decoder (SVT-AV1). In particular, this user guide describes how to run the sample application with the respective dynamically linked library.
+This document describes how to use the Scalable Video Technology for AV1
+Decoder (SVT-AV1). In particular, this user guide describes how to run the
+sample application with the respective dynamically linked library.
 
 ## Sample Application Guide
 
-This section describes how to run the sample decoder application that uses the SVT-AV1 Decoder library. It describes the command line input parameters and the resulting outputs.
+This section describes how to run the sample decoder application that uses the
+SVT-AV1 Decoder library. It describes the command line input parameters and the
+resulting outputs.
 
 ### Running the decoder
 
-This section describes how to run the sample decoder application `SvtAv1DecApp.exe` (on Windows\*) or `SvtAv1DecApp` (on Linux\*) from the command line, including descriptions of the most commonly used input parameters and outputs.
+This section describes how to run the sample decoder application
+`SvtAv1DecApp.exe` (on Windows\*) or `SvtAv1DecApp` (on Linux\*) from the
+command line, including descriptions of the most commonly used input parameters
+and outputs.
 
 The sample application typically takes the following command line parameters:
 
diff -pruN 0.9.1+dfsg-1/Docs/svt-av1-encoder-design.md 1.2.0+dfsg-2/Docs/svt-av1-encoder-design.md
--- 0.9.1+dfsg-1/Docs/svt-av1-encoder-design.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/svt-av1-encoder-design.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,5 @@
+[Top level](../README.md)
+
 # Encoder Design for SVT-AV1 (Scalable Video Technology for AV1 Encoder)
 
 ## Table of Contents
@@ -74,7 +76,13 @@
 
 # Introduction
 
-This document describes the Intel SVT-AV1 encoder design. In particular, the encoder block diagram and the system resource manager are described. A brief description of the various processes involved in the encoder pipeline is also presented. Details on the encoder processes are included in appendices. This document is meant to be an accompanying document to the “C Model” source code, which contains the more specific details of the inner workings of each algorithm.
+This document describes the Intel SVT-AV1 encoder design. In particular, the
+encoder block diagram and the system resource manager are described. A brief
+description of the various processes involved in the encoder pipeline is also
+presented. Details on the encoder processes are included in appendices. This
+document is meant to be an accompanying document to the “C Model” source code,
+which contains the more specific details of the inner workings of each
+algorithm.
 
 # Definitions
 
@@ -82,34 +90,38 @@ This section contains definitions used t
 
 ### General Definitions
 
-|  **Term** |  **Definition** |
-|---|---|
-| Picture  |  Collection of luma and chroma samples assembled into rectangular regions with a width, height and sample bit-depth. |
-|  Super block (SB) |  A square block of luma and chroma samples defined to have a size of either 64x64 or 128x128 luma samples. |
-|Block|  A square or rectangular region of data that is part of a SB and that is obtained through the partitioning of the SB. |
-| Transform block  | A square or rectangular region of data whose size is the same as or smaller than the size of the corresponding block. |
-| Bitstream  | A collection of bits corresponding to entropy coded data. |
-| Syntax elements  | Pre-entropy coder encoded symbols used during the decoding reconstruction process.|
-| Tiles  |  A rectangular collection of SBs which are independently decodable. |
-| Groups-of-Pictures (GoP)  | A collection of pictures with a particular referencing structure. |
-| SAD  | Sum of absolute differences, representing the sum of absolute values of sample differences; distortion measurement. |
-| SSE  | Sum of squared sample error; distortion measurement. |
+| **Term**                 | **Definition**                                                                                                        |
+| ---                      | ---                                                                                                                   |
+| Picture                  | Collection of luma and chroma samples assembled into rectangular regions with a width, height and sample bit-depth.   |
+| Super block (SB)         | A square block of luma and chroma samples defined to have a size of either 64x64 or 128x128 luma samples.             |
+| Block                    | A square or rectangular region of data that is part of a SB and that is obtained through the partitioning of the SB.  |
+| Transform block          | A square or rectangular region of data whose size is the same as or smaller than the size of the corresponding block. |
+| Bitstream                | A collection of bits corresponding to entropy coded data.                                                             |
+| Syntax elements          | Pre-entropy coder encoded symbols used during the decoding reconstruction process.                                    |
+| Tiles                    | A rectangular collection of SBs which are independently decodable.                                                    |
+| Groups-of-Pictures (GoP) | A collection of pictures with a particular referencing structure.                                                     |
+| SAD                      | Sum of absolute differences, representing the sum of absolute values of sample differences; distortion measurement.   |
+| SSE                      | Sum of squared sample error; distortion measurement.                                                                  |
 
 
 ### Source Partitioning
 
-The source video is partitioned into various groupings of various spatial and temporal divisions.
-The following partitions and nomenclature are used extensively within this document and the source code.
-Furthermore, the following partitioning scheme determines data flow and influences algorithmic designs.
-At the highest level, pictures in the source video are grouped into groups of pictures (GoPs)
-that are defined according the prediction structure. Figure 1 shows an example of the relationship between
-pictures contained in a five-layer prediction structure where each frame references only one picture in
-each direction. In a prediction structure, each picture is of a particular prediction type and belongs to
-a specific temporal layer. Also, each picture might reference other pictures depending on its picture type
-and it might be referenced itself multiple times or not at all depending on the Prediction Structure used
-and the picture’s relative position within the period. In the example shown in Figure 1, Pictures 0 and 16 are
-said to belong to temporal layer 0 or base layer, whereas pictures 1, 3, 5, 7, 9, 11, 13 and 15 are said to
-belong to the non-reference layer or temporal layer 4.
+The source video is partitioned into various groupings of various spatial and
+temporal divisions. The following partitions and nomenclature are used
+extensively within this document and the source code. Furthermore, the
+following partitioning scheme determines data flow and influences algorithmic
+designs. At the highest level, pictures in the source video are grouped into
+groups of pictures (GoPs) that are defined according the prediction structure.
+Figure 1 shows an example of the relationship between pictures contained in a
+five-layer prediction structure where each frame references only one picture in
+each direction. In a prediction structure, each picture is of a particular
+prediction type and belongs to a specific temporal layer. Also, each picture
+might reference other pictures depending on its picture type and it might be
+referenced itself multiple times or not at all depending on the Prediction
+Structure used and the picture’s relative position within the period. In the
+example shown in Figure 1, Pictures 0 and 16 are said to belong to temporal
+layer 0 or base layer, whereas pictures 1, 3, 5, 7, 9, 11, 13 and 15 are said
+to belong to the non-reference layer or temporal layer 4.
 
 ![image1](./img/image1.png)
 <a name = "figure-1"></a>
@@ -194,28 +206,34 @@ processing is shown in Figure 5.
 
 ## Inter-process data and control management
 
-*System resource managers* perform inter-process data and control
-management. They manage *objects* and connect processes to one another
-by controlling how objects are passed. Objects encapsulate data and
-control information and are organized into four types: results, sequence
-control sets, picture control sets, and picture descriptors. Objects are
-described later in this section.
-
-Figure 6 shows a block diagram of a system resource manager. As depicted
-in the diagram, the empty object path begins when an empty object from the
-empty object FIFO is assigned to one of N producer processes. The producer
-process fills the empty object with data and control information and queues
-the now full object onto the full object FIFO. In a similar manner, the full object path begins when a full object from
-the full object FIFO is assigned to one of M consumer processes. The consumer
-process uses the information in the full object and completes the data path by
-queuing the now empty object back onto the original empty object FIFO. To better
-understand how the encoder block diagram in Figure 4 and the system resource manager
-block diagram in Figure 6 relate to one another, we have used matching line colors to
-indicate corresponding object flow. It is important to note that each encoder process
-acts as both a producer and consumer of objects to processes occurring later, and
-respectively, earlier in the encoder pipeline.
-
-The system resource manager dynamically assigns objects to processes to minimize idle process time. In addition, separate coordination of the empty and full object paths allows a great deal of configuration flexibility. This flexibility is important when, for example, producer and consumer processes require differing amounts of computational resources. In this case, a system resource manager may have N producers and M consumers where N is not equal to M.
+*System resource managers* perform inter-process data and control management.
+They manage *objects* and connect processes to one another by controlling how
+objects are passed. Objects encapsulate data and control information and are
+organized into four types: results, sequence control sets, picture control
+sets, and picture descriptors. Objects are described later in this section.
+
+Figure 6 shows a block diagram of a system resource manager. As depicted in the
+diagram, the empty object path begins when an empty object from the empty
+object FIFO is assigned to one of N producer processes. The producer process
+fills the empty object with data and control information and queues the now
+full object onto the full object FIFO. In a similar manner, the full object
+path begins when a full object from the full object FIFO is assigned to one of
+M consumer processes. The consumer process uses the information in the full
+object and completes the data path by queuing the now empty object back onto
+the original empty object FIFO. To better understand how the encoder block
+diagram in Figure 4 and the system resource manager block diagram in Figure 6
+relate to one another, we have used matching line colors to indicate
+corresponding object flow. It is important to note that each encoder process
+acts as both a producer and consumer of objects to processes occurring later,
+and respectively, earlier in the encoder pipeline.
+
+The system resource manager dynamically assigns objects to processes to
+minimize idle process time. In addition, separate coordination of the empty and
+full object paths allows a great deal of configuration flexibility. This
+flexibility is important when, for example, producer and consumer processes
+require differing amounts of computational resources. In this case, a system
+resource manager may have N producers and M consumers where N is not equal to
+M.
 
 ![image6](./img/image6.png)
 <a name = "figure-6"></a>
@@ -449,13 +467,15 @@ sequence. The relevant data structure is
 ### Picture Control Set (PCS)
 
 The Picture Control Set contains the information for individually coded
-pictures. The information is split between Picture Parent Control Set
-and Picture Control Set. Picture Parent Control Set is used in the first few processes
-in the pipeline (from Resource Coordination process to Source-Based Operations process) and lasts the whole time the picture is in the encoder pipeline. The Picture
-control set includes a pointer to the Picture Parent Control Set and
-includes also additional information needed in the subsequent processes
-starting at the Picture Manager process (i.e. in the closed loop). The relevant data structures
-are PictureParentControlSet\_s and PictureControlSet\_s.
+pictures. The information is split between Picture Parent Control Set and
+Picture Control Set. Picture Parent Control Set is used in the first few
+processes in the pipeline (from Resource Coordination process to Source-Based
+Operations process) and lasts the whole time the picture is in the encoder
+pipeline. The Picture control set includes a pointer to the Picture Parent
+Control Set and includes also additional information needed in the subsequent
+processes starting at the Picture Manager process (i.e. in the closed loop).
+The relevant data structures are PictureParentControlSet\_s and
+PictureControlSet\_s.
 
 ##### <a name = "table-3"> Table 3: Examples of Picture Parent Control Set Members </a>
 
@@ -573,7 +593,12 @@ flags, as shown in the table below.
 
 ### Motion Estimation Process
 
-The Motion Estimation (ME) process performs motion estimation.  This process has access to the current input picture as well as to the input pictures the current picture uses as references according to the prediction structure pattern. The Motion Estimation process is multithreaded, so pictures can be processed out of order as long as all inputs are available. More details are available in the motion estimation appendix.
+The Motion Estimation (ME) process performs motion estimation. This process has
+access to the current input picture as well as to the input pictures the
+current picture uses as references according to the prediction structure
+pattern. The Motion Estimation process is multithreaded, so pictures can be
+processed out of order as long as all inputs are available. More details are
+available in the motion estimation appendix.
 
 ### Initial Rate Control Process
 
@@ -593,7 +618,9 @@ in the Initial Rate Control process is i
 ### Source-based Operations Process
 
 Source-based operations process involves several analysis algorithms to
-identify spatiotemporal characteristics of the input pictures. Additional analysis is performed using the Temporal Dependency Model algorithm discussed in the TPL appendix.
+identify spatiotemporal characteristics of the input pictures. Additional
+analysis is performed using the Temporal Dependency Model algorithm discussed
+in the TPL appendix.
 
 ### Picture Manager Process
 
@@ -639,18 +666,28 @@ Picture into the pipeline.
 
 ### Rate Control Process
 
-The Rate Control process uses the distortion and image statistics
-generated in previous processes, the current picture’s bit budget, and
-previous picture statistics to set the QP and the bit budget for each
-picture. The encoder currently supports VBR -type of rate control. Details of the rate control algorithm are discussed in the rate control appendix.
+The Rate Control process uses the distortion and image statistics generated in
+previous processes, the current picture’s bit budget, and previous picture
+statistics to set the QP and the bit budget for each picture. The encoder
+currently supports VBR -type of rate control. Details of the rate control
+algorithm are discussed in the rate control appendix.
 
 ### Mode Decision Configuration Process
 
-The Mode Decision Configuration Process operates at the picture-level, and involves several initialization steps, such as setting flags for some features (e.g. OBMC, warped motion, etc.). Examples of the initializations include initializations for picture chroma QP offsets, CDEF strength, self-guided restoration filter parameters, quantization parameters, lambda arrays, and syntax, mv and coefficient rate estimation arrays.
+The Mode Decision Configuration Process operates at the picture-level, and
+involves several initialization steps, such as setting flags for some features
+(e.g. OBMC, warped motion, etc.). Examples of the initializations include
+initializations for picture chroma QP offsets, CDEF strength, self-guided
+restoration filter parameters, quantization parameters, lambda arrays, and
+syntax, mv and coefficient rate estimation arrays.
 
 ### Mode Decision
 
-The mode decision (MD) process involves selecting the partitioning and coding modes for each SB in the picture.  The process acts on each SB and produces AV1 conformant reconstructed samples for the picture, as well as all mode information used for coding each block. More details on the Mode Decision process are included in the mode decision appendix.
+The mode decision (MD) process involves selecting the partitioning and coding
+modes for each SB in the picture. The process acts on each SB and produces AV1
+conformant reconstructed samples for the picture, as well as all mode
+information used for coding each block. More details on the Mode Decision
+process are included in the mode decision appendix.
 
 ### Deblocking Loop Filter Process
 
@@ -662,7 +699,7 @@ all horizontal edges.
 
 The steps involved in the deblocking filter are as follows:
 
-1.  Determine the loopfilter level and sharpness. Both are frame level parameters.
+1. Determine the loopfilter level and sharpness. Both are frame level parameters.
     * The level takes value in \[0, 63\] and can be set using different methods:
 
       * 0 to disable filtering,
@@ -673,17 +710,17 @@ The steps involved in the deblocking fil
 
     * The sharpness takes value in \[0, 7\]. For keyframes, sharpness=0, else sharpness is set to the input sharpness setting.
 
-2.  Identify edges to filter.
+2. Identify edges to filter.
 
-3.  Determine adaptive filter strength parameters: lvl, limit, blimit
+3. Determine adaptive filter strength parameters: lvl, limit, blimit
     and thresh. These are block level properties. They build on the
     frame level settings and include refinements based on segmentation,
     coding mode, reference picture and loop filter data.
 
-4.  Determine filter masks: High edge variance mask (hevMask),
+4. Determine filter masks: High edge variance mask (hevMask),
     fiterMask, flatMask and flatMask2.
 
-5.  Select and apply filters.
+5. Select and apply filters.
 
 A more detailed description of the deblocking loop filter is presented in the Appendix.
 
@@ -698,7 +735,10 @@ The filtering is applied on an 8x8 block
 
 The filtering algorithm involves the following steps:
 
-  1. Identify the direction of the block (i.e. direction of edges). Eight directions (0 to 7) could be identified. The search is performed on an 8x8 block basis. The search is performed for the luma component and the direction is assumed to be the same for the chroma components.
+  1. Identify the direction of the block (i.e. direction of edges). Eight
+     directions (0 to 7) could be identified. The search is performed on an 8x8
+     block basis. The search is performed for the luma component and the
+     direction is assumed to be the same for the chroma components.
 
   2. Apply a nonlinear filter along the edge in the identified direction.
       * Primary filtering: Filter taps are aligned in the direction of the block. The main goal is to address ringing artifacts.
@@ -748,7 +788,8 @@ picture-decoding order.
 
 ## Detailed Feature Implementation Design Appendices
 
-The following appendices highlight the design and implementation of features in much greater detail than this document.
+The following appendices highlight the design and implementation of features in
+much greater detail than this document.
 
 - [Adaptive Prediction Structure Appendix](Appendix-Adaptive-Prediction-Structure.md)
 - [Altref and Overlay Pictures Appendix](Appendix-Alt-Refs.md)
@@ -773,8 +814,12 @@ The following appendices highlight the d
 - [Super-resolution Appendix](Appendix-Super-Resolution.md)
 - [Temporal Dependency Model](Appendix-TPL.md)
 - [Transform Search Appendix](Appendix-TX-Search.md)
+- [Reference Scaling Appendix](Appendix-Reference-Scaling.md)
 
 
 ## Notes
 
-The information in this document was compiled at <mark>v0.9.0</mark> may not reflect the latest status of the encoder design. For the most up-to-date settings and implementation, it's recommended to visit the section of the code implementing the feature / section in question.
+The information in this document was compiled at <mark>v1.2.0</mark> may not
+reflect the latest status of the encoder design. For the most up-to-date
+settings and implementation, it's recommended to visit the section of the code
+implementing the feature / section in question.
diff -pruN 0.9.1+dfsg-1/Docs/svt-av1_encoder_user_guide.md 1.2.0+dfsg-2/Docs/svt-av1_encoder_user_guide.md
--- 0.9.1+dfsg-1/Docs/svt-av1_encoder_user_guide.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/svt-av1_encoder_user_guide.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,3 +1,7 @@
+[Top level](../README.md)
+
+See also, the [list of all parameters](Parameters.md)
+
 # Scalable Video Technology for AV1 Encoder (SVT-AV1 Encoder) User Guide
 
 ## Table of Contents
@@ -8,15 +12,18 @@
     - [Compressed 10-bit format](#compressed-10-bit-format)
     - [Running the encoder](#running-the-encoder)
     - [Sample command lines](#sample-command-lines)
-    - [List of all configuration parameters](#list-of-all-configuration-parameters)
 
 ## Introduction
 
-This document describes how to use the Scalable Video Technology for AV1 Encoder (SVT-AV1). In particular, this user guide describes how to run the sample application with the respective dynamically linked library.
+This document describes how to use the Scalable Video Technology for AV1
+Encoder (SVT-AV1). In particular, this user guide describes how to run the
+sample application with the respective dynamically linked library.
 
 ## Sample Application Guide
 
-This section describes how to run the sample encoder application that uses the SVT-AV1 Encoder library. It describes the input video format, the command line input parameters and the resulting outputs.
+This section describes how to run the sample encoder application that uses the
+SVT-AV1 Encoder library. It describes the input video format, the command line
+input parameters and the resulting outputs.
 
 ### Input Video Format
 
@@ -30,25 +37,40 @@ _10-bit yuv420p10le_\
 
 ### Compressed 10-bit format
 
-In order to reduce the size of the input original YUV file, the SVT-AV1 Encoder uses a compressed 10-bit format allowing the software to achieve a higher speed and channel density levels. The conversion between the 10-bit yuv420p10le and the compressed 10-bit format is a lossless operation and is performed using the following steps.
+In order to reduce the size of the input original YUV file, the SVT-AV1 Encoder
+uses a compressed 10-bit format allowing the software to achieve a higher speed
+and channel density levels. The conversion between the 10-bit yuv420p10le and
+the compressed 10-bit format is a lossless operation and is performed using the
+following steps.
 
 #### Unpack the 10-bit picture
 
-This step consists of separating the 10 bit video samples into 8 bit and 2 bit planes so that each 10-bit picture will be represented as two separate pictures as shown in the figure below. As a result of the operation, the 2 least significant bits of the 10 bits will be written into a full byte.
+This step consists of separating the 10 bit video samples into 8 bit and 2 bit
+planes so that each 10-bit picture will be represented as two separate pictures
+as shown in the figure below. As a result of the operation, the 2 least
+significant bits of the 10 bits will be written into a full byte.
 
 _10-bit yuv420p10le unpacked_\
 ![10-bit yuv420p10le unpacked](img/10bit_unpacked.png "10-bit yuv420p10le unpacked")
 
 #### Compress the 2 bit Plane
 
-The unpacking steps separates the 10bits into a group of 8 bits and a group of 2 bits, where the 2 bits are stored in a byte. In this step, every group of consecutive 4 bytes, each containing 2bits from the unpacking step, are compressed into one byte. As a result, each 10bit picture will be represented as two separate pictures as shown in the figure below.
+The unpacking steps separates the 10bits into a group of 8 bits and a group of
+2 bits, where the 2 bits are stored in a byte. In this step, every group of
+consecutive 4 bytes, each containing 2bits from the unpacking step, are
+compressed into one byte. As a result, each 10bit picture will be represented
+as two separate pictures as shown in the figure below.
 
 _10-bit yuv420p10le compressed_\
 ![10-bit yuv420p10le compressed](img/10bit_packed.png "10-bit yuv420p10le compressed")
 
 #### Unroll the 64x64
 
-Now for a faster read of the samples, every 64x64 block of the 2 bit picture should be written into a one dimensional array. Therefore, the top left 64x64 sample block which is now written into a 16 bytes x 64 bytes after the compression of the 2bit samples, will be written into a 1024 bytes x 1 byte array as shown in the picture below.
+Now for a faster read of the samples, every 64x64 block of the 2 bit picture
+should be written into a one dimensional array. Therefore, the top left 64x64
+sample block which is now written into a 16 bytes x 64 bytes after the
+compression of the 2bit samples, will be written into a 1024 bytes x 1 byte
+array as shown in the picture below.
 
 _64x64 block after 2 bit compression_\
 ![64x64 block after 2 bit compression](img/64x64_after_2bit_compression.png "64x64 block after 2 bit compression")
@@ -58,21 +80,31 @@ _64x64 block after unrolling_\
 
 ### Running the encoder
 
-This section describes how to run the sample encoder application `SvtAv1EncApp.exe` (on Windows\*) or `SvtAv1EncApp` (on Linux\*) from the command line, including descriptions of the most commonly used input parameters and outputs.
+This section describes how to run the sample encoder application
+`SvtAv1EncApp.exe` (on Windows\*) or `SvtAv1EncApp` (on Linux\*) from the
+command line, including descriptions of the most commonly used input parameters
+and outputs.
 
 The sample application typically takes the following command line parameters:
 
 `-c filename` **[Optional]**
 
-A text file that contains encoder parameters such as input file name, quantization parameter etc. Refer to the comments in the Config/Sample.cfg for specific details. The list of encoder parameters are also listed below. Note that command line parameters take precedence over the parameters included in the configuration file when there is a conflict.
+A text file that contains encoder parameters such as input file name,
+quantization parameter etc. Refer to the comments in the Config/Sample.cfg for
+specific details. The list of encoder parameters are also listed below. Note
+that command line parameters take precedence over the parameters included in
+the configuration file when there is a conflict.
 
 `-i filename` **[Required]**
 
-A YUV file (e.g. 8 bit 4:2:0 planar) containing the video sequence that will be encoded. The dimensions of each image are specified by `-w` and `-h` as indicated below.
+A YUV file (e.g. 8 bit 4:2:0 planar) containing the video sequence that will be
+encoded. The dimensions of each image are specified by `-w` and `-h` as
+indicated below.
 
 `-b filename` **[Optional]**
 
-The resulting encoded bit stream file in binary format. If none specified, no output bit stream will be produced by the encoder.
+The resulting encoded bit stream file in binary format. If none specified, no
+output bit stream will be produced by the encoder.
 
 `-w integer` **[Required]**
 
@@ -84,362 +116,69 @@ The height of each input image in units
 
 `-n integer` **[Optional]**
 
-The number of frames of the sequence to encode. e.g. 100. If the input frame count is larger than the number of frames in the input video, the encoder will loopback to the first frame when it is done.
+The number of frames of the sequence to encode. e.g. 100. If the input frame
+count is larger than the number of frames in the input video, the encoder will
+loopback to the first frame when it is done.
 
 `--keyint integer` **[Optional]**
 
-The intra period defines the interval of frames after which you insert an Intra refresh. It is strongly recommended to use (multiple of 8) -1 the closest to 1 second (e.g. 55, 47, 31, 23 should be used for 60, 50, 30, (24 or 25) respectively). When using closed gop (-irefresh-type 2) add 1 to the value above (e.g. 56 instead of 55).
-
-`--rc integer` **[Optional]**
+The keyint defines the display order location at which the encoder would insert
+a keyframe. It is recommended to use a value that is (a multiple of the mini
+GOP size `(default 16)) + 1` so that the keyframe does not break a mini GOP
+formation. When using forward frame, it is recommended that the keyint value is
+placed at a multiple of mini-gop size. The mini-gop size is measured by `1 <<
+hierarchical-levels`.
 
-This token sets the bitrate control encoding mode [1: Variable Bitrate, 0: Constant QP OR Constant Rate Factor]. When `--rc` is set to 1.
+SvtAv1EncApp only: optionally accepts a `s` suffix to indicate to use `keyint *
+frame-rate` as the keyint value. e.g. `--keyint 5s` for 5 seconds
 
-With `--rc` set to 0, if `--crf` is used then enable-tpl-la is forced to 1, however, if `-q`/`--qp` is used then the encoder will work in CRF mode if `--enable-tpl-la` is set to 1 and in CQP mode (fixed qp offsets regardless of the content) when `--enable-tpl-la` is set to 0.
-
-If a qp/crf value is not specified, a default value is assigned (50).
+`--rc integer` **[Optional]**
 
-For example, the following command encodes 100 frames of the YUV video sequence into the bin bit stream file. The picture is 1920 luma pixels wide and 1080 pixels high using the `Sample.cfg` configuration. The QP equals 30 and the md5 checksum is not included in the bit stream.
+The rc token sets the bitrate control encoding mode [0: Constant QP OR Constant
+Rate Factor, 1: Variable Bitrate, 2: Constant Bitrate].
 
-`SvtAv1EncApp.exe -c Sample.cfg -i CrowdRun_1920x1080.yuv -w 1920 -h 1080 -n 100 -q 30 --keyint 31 -b CrowdRun_1920x1080_qp30.bin`
+With `--rc` set to 0, CQP (fixed qp offsets regardless of the content) mode is
+enabled using `--aq-mode 0`, else CRF mode (default) will be used. If `--crf`
+is set, then aq-mode will be forced to 2, however, if `-q`/`--qp` is set, then
+the encoder will use whatever is set for aq-mode.
 
-It should be noted that not all the encoder parameters present in the `Sample.cfg` can be changed using the command line.
+If a qp/crf value is not specified, a default value is assigned (35).
 
 ### Sample command lines
 
 Here are some sample encode command lines
 
 #### 1 pass CRF at maximum speed from 24fps yuv 1920x1080 input
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 8 -b output.ivf`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 12 -b output.ivf`
+
+#### Mutli-pass CRF at maximum quality from 24fps yuv 1920x1080 input
+Single command line :
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 0 --passes 2 -b output.ivf`
+
+or
+
+Multiple command lines :
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 0 --pass 1 --stats stat_file.stat`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 0 --pass 2 --stats stat_file.stat -b output.ivf `
 
 #### 1 pass VBR 1000 Kbps at medium speed from 24fps yuv 1920x1080 input
 `SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 5 -b output.ivf`
 
-#### 2 pass VBR 1000 Kbps at maximum quality from 24fps yuv 1920x1080 input
-1 command line :
+#### Multi-pass VBR 1000 Kbps at maximum quality from 24fps yuv 1920x1080 input
+Single command line :
 
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --irefresh-type 2 --passes 2 --stats stat_file.stat -b output.ivf`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --passes 2 --stats stat_file.stat -b output.ivf`
 
 or
 
-2 command lines :
+Multiple command lines :
 
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 8 --irefresh-type 2 --pass 1 --stats stat_file.stat`
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --irefresh-type 2 --pass 2 --stats stat_file.stat -b output.ivf`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --pass 1 --stats stat_file.stat`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --pass 2 --stats stat_file.stat`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --rc 1 --tbr 1000 --preset 0 --pass 3 --stats stat_file.stat -b output.ivf`
 
 #### 1 pass CRF at maximum speed from 24fps yuv 1920x1080 input with full range video signal
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 8 --color-range 1 -b output.ivf`
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 12 --color-range full -b output.ivf`
 
 #### 1 pass CRF at maximum speed from 24fps yuv 1920x1080 input with colorimetry set to BT.709
-`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 8 --color-primaries 1 --transfer-characteristics 1 --matrix-coefficients 1 -b output.ivf`
-
-### List of all configuration parameters
-
-The encoder parameters present in the `Sample.cfg` file are listed in this table below along with their status of support, command line parameter and the range of values that the parameters can take.
-
-#### Options
-
-| **Configuration file parameter** | **Command line**   | **Range**  | **Default** | **Description**                                                                                                 |
-|----------------------------------|--------------------|------------|-------------|-----------------------------------------------------------------------------------------------------------------|
-|                                  | --help             |            |             | Shows the command line options currently available                                                              |
-|                                  | --version          |            |             | Shows the version of the library that's linked to the library                                                   |
-| **InputFile**                    | -i                 | any string | None        | Input raw video (y4m and yuv) file path, use `stdin` to read from pipe                                          |
-| **StreamFile**                   | -b                 | any string | None        | Output compressed (ivf) file path, use `stdout` to write to pipe                                                |
-|                                  | -c                 | any string | None        | Configuration file path                                                                                         |
-| **ErrorFile**                    | --errlog           | any string | `stderr`    | Error file path                                                                                                 |
-| **ReconFile**                    | -o                 | any string | None        | Reconstructed yuv file path                                                                                     |
-| **StatFile**                     | --stat-file        | any string | None        | PSNR / SSIM per picture stat output file path, requires `--enable-stat-report 1`                                |
-| **PredStructFile**               | --pred-struct-file | any string | None        | Manual prediction structure file path                                                                           |
-| **Progress**                     | --progress         | [0-2]      | 1           | Verbosity of the output [0: no progress is printed, 2: aomenc style output]                                     |
-| **NoProgress**                   | --no-progress      | [0-1]      | 0           | Do not print out progress [1: `--progress 0`, 0: `--progress 1`]                                                |
-| **EncoderMode**                  | --preset           | [-2-13]    | 12          | Encoder preset, presets < 0 are for debugging. Higher presets means faster encodes, but with a quality tradeoff |
-| **SvtAv1Params**                 | --svtav1-params    | any string | None        | Colon-separated list of `key=value` pairs of parameters with keys based on command line options without `--`    |
-|                                  | --nch              | [1-6]      | 1           | Number of channels (library instance) that will be instantiated                                                 |
-
-##### Usage of **SvtAv1Params**
-
-To use the `--svtav1-params` option, the syntax is `--svtav1-params option1=value1:option2=value2...`.
-
-An example is:
-
-```bash
-SvtAv1EncApp \
-  -i input.y4m \
-  -b output.ivf \
-  --svtav1-params \
-  "preset=10:crf=30:irefresh-type=kf:matrix-coefficients=bt709:mastering-display=G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1)"
-```
-
-This will set `--preset` to 10 and `--crf` to 30 inside the API along with some other parameters.
-
-Do note however, that there is no error checking for duplicate keys and only for invalid keys or values.
-
-For more information on valid values for specific keys, refer to the [EbEncSettings](../Source/Lib/Encoder/Globals/EbEncSettings.c) file.
-
-#### Encoder Global Options
-
-| **Configuration file parameter** | **Command line**            | **Range**                      | **Default** | **Description**                                                                                               |
-|----------------------------------|-----------------------------|--------------------------------|-------------|---------------------------------------------------------------------------------------------------------------|
-| **SourceWidth**                  | -w                          | [64-16384]                     | None        | Frame width in pixels, inferred if y4m.                                                                       |
-| **SourceHeight**                 | -h                          | [64-8704]                      | None        | Frame height in pixels, inferred if y4m.                                                                      |
-| **FrameToBeEncoded**             | -n                          | [0-`(2^63)-1`]                 | 0           | Number of frames to encode. If `n` is larger than the input, the encoder will loop back and continue encoding |
-| **BufferedInput**                | --nb                        | [-1, 1-`(2^31)-1`]             | -1          | Buffer `n` input frames into memory and use them to encode                                                    |
-| **EncoderColorFormat**           | --color-format              | [0-3]                          | 1           | Color format, only yuv420 is supported at this time [0: yuv400, 1: yuv420, 2: yuv422, 3: yuv444]              |
-| **Profile**                      | --profile                   | [0-2]                          | 0           | Bitstream profile [0: main, 1: high, 2: professional]                                                         |
-| **Level**                        | --level                     | [0,2.0-7.3]                    | 0           | Bitstream level, defined in A.3 of the av1 spec [0: auto]                                                     |
-| **HighDynamicRangeInput**        | --enable-hdr                | [0-1]                          | 0           | Enable writing of HDR metadata in the bitstream                                                               |
-| **FrameRate**                    | --fps                       | [1-240]                        | 25          | Input video frame rate, integer values only, inferred if y4m                                                  |
-| **FrameRateNumerator**           | --fps-num                   | [0-2^32-1]                     | 25000       | Input video frame rate numerator                                                                              |
-| **FrameRateDenominator**         | --fps-denom                 | [0-2^32-1]                     | 1000        | Input video frame rate denominator                                                                            |
-| **EncoderBitDepth**              | --input-depth               | [8, 10]                        | 8           | Input video file and output bitstream bit-depth                                                               |
-| **CompressedTenBitFormat**       | --compressed-ten-bit-format | [0-1]                          | 0           | Pack 10bit video, handled between the app and library                                                         |
-| **Injector**                     | --inj                       | [0-1]                          | 0           | Inject pictures to the library at defined frame rate                                                          |
-| **InjectorFrameRate**            | --inj-frm-rt                | [0-240]                        | 60          | Set injector frame rate, only applicable with `--inj 1`                                                       |
-| **StatReport**                   | --enable-stat-report        | [0-1]                          | 0           | Calculates and outputs PSNR SSIM metrics at the end of encoding                                               |
-| **Asm**                          | --asm                       | [0-11, c-max]                  | max         | Limit assembly instruction set [c, mmx, sse, sse2, sse3, ssse3, sse4_1, sse4_2, avx, avx2, avx512, max]       |
-| **LogicalProcessors**            | --lp                        | [0, core count of the machine] | 0           | Target (best effort) number of logical cores to be used. 0 means all. Refer to Appendix A.1                   |
-| **PinnedExecution**              | --pin                       | [0-1]                          | 0           | Pin the execution to the first --lp cores. Overwritten to 0 when `--ss` is set. Refer to Appendix A.1         |
-| **TargetSocket**                 | --ss                        | [-1,1]                         | -1          | Specifies which socket to run on, assumes a max of two sockets. Refer to Appendix A.1                         |
-| **FastDecode**                   | --fast-decode               | [0,3]                          | 0           | Tune settings to output bitstreams that can be decoded faster, higher values for faster decoding              |
-| **Tune**                         | --tune                      | [0,1]                          | 1           | Specifies whether to use PSNR or VQ as the tuning metric [0 = VQ, 1 = PSNR]                                   |
-
-#### Rate Control Options
-
-| **Configuration file parameter** | **Command line**                 | **Range**      | **Default**     | **Description**                                                                                                      |
-|----------------------------------|----------------------------------|----------------|-----------------|----------------------------------------------------------------------------------------------------------------------|
-| **RateControlMode**              | --rc                             | [0-2]          | 0               | Rate control mode [0: CRF or CQP (if `--enable-tpl-la` is 0) [Default], 1: VBR, 2: CBR]                              |
-| **QP**                           | --qp                             | [1-63]         | 50              | Initial QP level value                                                                                               |
-| **CRF**                          | --crf                            | [1-63]         | 50              | Constant Rate Factor value, setting this value is equal to `--rc 0 --enable-tpl-la 1 --qp x`                         |
-| **TargetBitRate**                | --tbr                            | [1-4294967]    | 2000            | Target Bitrate (kbps), only applicable for VBR and CBR encoding                                                      |
-| **MaxBitRate**                   | --mbr                            | [1-4294967]    | 0               | Maximum Bitrate (kbps) only applicable for CRF and VBR encoding                                                      |
-| **UseQpFile**                    | --use-q-file                     | [0-1]          | 0               | Overwrite the encoder default picture based QP assignments and use QP values from `--qp-file`                        |
-| **QpFile**                       | --qpfile                         | any string     | Null            | Path to a file containing per picture QP value                                                                       |
-| **MaxQpAllowed**                 | --max-qp                         | [1-63]         | 63              | Maximum (highest) quantizer, only applicable for VBR and CBR                                                         |
-| **MinQpAllowed**                 | --min-qp                         | [1-63]         | 1               | Minimum (lowest) quantizer, only applicable for VBR and CBR                                                          |
-| **AdaptiveQuantization**         | --aq-mode                        | [0-2]          | 2               | Set adaptive QP level [0: off, 1: variance base using AV1 segments, 2: deltaq pred efficiency]                       |
-| **VBVBufSize**                   | --vbv-bufsize                    | [1-4294967]    | `TargetBitRate` | VBV buffer size.                                                                                                     |
-| **UseFixedQIndexOffsets**        | --use-fixed-qindex-offsets       | [0-1]          | 0               | Overwrite the encoder default hierarchical layer based QP assignment and use fixed Q index offsets                   |
-| **KeyFrameQIndexOffset**         | --key-frame-qindex-offset        | [-256-255]     | 0               | Overwrite the encoder default keyframe Q index assignment                                                            |
-| **KeyFrameChromaQIndexOffset**   | --key-frame-chroma-qindex-offset | [-256-255]     | 0               | Overwrite the encoder default chroma keyframe Q index assignment                                                     |
-| **QIndexOffsets**                | --qindex-offsets                 | any string     | `0,0,..,0`      | list of luma Q index offsets per hierarchical layer, separated by `,` with each offset in the range of [-256-255]    |
-| **ChromaQIndexOffsets**          | --chroma-qindex-offsets          | any string     | `0,0,..,0`      | list of chroma Q index offsets per hierarchical layer, separated by `,` with each offset in the range of [-256-255]  |
-| **UnderShootPct**                | --undershoot-pct                 | [0-100]        | 25              | Allowable datarate undershoot (min) target (%), default depends on the rate control mode                             |
-| **OverShootPct**                 | --overshoot-pct                  | [0-100]        | 25              | Allowable datarate overshoot (max) target (%), default depends on the rate control mode                              |
-| **BufSz**                        | --buf-sz                         | [0-`(2^63)-1`] | 6000            | Client buffer size (ms), only applicable for CBR                                                                     |
-| **BufInitialSz**                 | --buf-initial-sz                 | [0-`(2^63)-1`] | 4000            | Client initial buffer size (ms), only applicable for CBR                                                             |
-| **BufOptimalSz**                 | --buf-optimal-sz                 | [0-`(2^63)-1`] | 5000            | Client optimal buffer size (ms), only applicable for CBR                                                             |
-| **RecodeLoop**                   | --recode-loop                    | [0-4]          | 4               | Recode loop level, look at the "Recode loop level table" in the user's guide for more info [0: off, 4: preset based] |
-| **VBRBiasPct**                   | --bias-pct                       | [0-100]        | 50              | CBR/VBR bias [0: CBR-like, 100: VBR-like]                                                                            |
-| **MinSectionPct**                | --minsection-pct                 | [0-`(2^32)-1`] | 0               | GOP min bitrate (expressed as a percentage of the target rate)                                                       |
-| **MaxSectionPct**                | --maxsection-pct                 | [0-`(2^32)-1`] | 2000            | GOP max bitrate (expressed as a percentage of the target rate)                                                       |
-
-##### **UseFixedQIndexOffsets** and more information
-
-`UseFixedQIndexOffsets` and its associated arguments (`HierarchicalLevels`, `QIndexOffsets`, `ChromaQIndexOffsets`, `KeyFrameQIndexOffset`, `KeyFrameChromaQIndexOffset`)
-are used together to specify the qindex offsets based on frame type and temporal layer when rc is set to 0.
-
-QP value specified by the `--qp` argument is assigned to the pictures at the highest temporal layer.
-It is first converted to a qindex, then the corresponding qindex offsets are added on top of it based on the frame types (Key/Inter) and temporal layer id.
-
-Qindex offset can be negative.
-The final qindex value will be clamped within the valid min/max qindex range.
-
-For chroma plane, after deciding the qindex for the luma plane, the corresponding chroma qindex offsets are added on top of the luma plane qindex based on frame types and temporal layer id.
-
-`--qindex-offsets` and `--chroma-qindex-offsets` have to be used after the `--hierarchical-levels` parameter.
-The number of qindex offsets should be `HierarchicalLevels` plus 1, and they can be enclosed in `[]` to separate the list.
-
-An example command line is:
-
-```bash
-SvtAv1EncApp -i in.y4m -b out.ivf --rc 0 -q 42 --hierarchical-levels 3 --use-fixed-qindex-offsets 1 --qindex-offsets [-12,-8,-4,0] --key-frame-qindex-offset -20 --key-frame-chroma-qindex-offset -6 --chroma-qindex-offsets [-6,0,12,24]
-```
-
-For this command line, corresponding qindex values are:
-
-| **Frame Type**   | **Luma qindex** | **Chroma qindex** |
-|------------------|-----------------|-------------------|
-| **Key Frame**    | 148 (42x4 - 20) | 142 (148 - 6)     |
-| **Layer0 Frame** | 156 (42x4 - 12) | 150 (156 - 6)     |
-| **Layer1 Frame** | 160 (42x4 - 8)  | 160 (160 + 0)     |
-| **Layer2 Frame** | 164 (42x4 - 4)  | 176 (164 + 12)    |
-| **Layer3 Frame** | 168 (42x4 + 0)  | 192 (168 + 24)    |
-
-##### Recode loop level table
-
-| level | description                                                                     |
-|-------|---------------------------------------------------------------------------------|
-| 0     | Off                                                                             |
-| 1     | Allow recode for KF and exceeding maximum frame bandwidth                       |
-| 2     | Allow recode only for key frames, alternate reference frames, and Golden frames |
-| 3     | Allow recode for all frame types based on bitrate constraints                   |
-| 4     | Preset based decision                                                           |
-
-
-#### Multi-pass Options
-
-| **Configuration file parameter** | **Command line** | **Range**      | **Default**        | **Description**                                                                                   |
-|----------------------------------|------------------|----------------|--------------------|---------------------------------------------------------------------------------------------------|
-| **Pass**                         | --pass           | [0-3]          | 0                  | Multi-pass selection [0: single pass encode, 1: first pass, 2: second pass, 3: third pass]        |
-| **Stats**                        | --stats          | any string     | "svtav1_2pass.log" | Filename for multi-pass encoding                                                                  |
-| **Passes**                       | --passes         | [1-2]          | 1                  | Number of encoding passes, default is preset dependent [1: one pass encode, 2: multi-pass encode] |
-
-##### **Pass** information
-
-| **Pass** | **Stats** io            |
-|----------|-------------------------|
-| 0        | ""                      |
-| 1        | "w"                     |
-| 2        | "rw" if 3-pass else "r" |
-| 3        | "r"                     |
-
-`--pass 3` is only available for non-crf modes and all passes except single-pass requires the `--stats` parameter to point to a valid path
-
-#### GOP size and type Options
-
-| **Configuration file parameter** | **Command line**      | **Range**       | **Default** | **Description**                                                                                                 |
-|----------------------------------|-----------------------|-----------------|-------------|-----------------------------------------------------------------------------------------------------------------|
-| **Keyint**                       | --keyint              | [-2-`(2^31)-1`] | -2          | GOP size (frames) [-2: ~2 seconds, -1: "infinite" and only applicable for CRF, 0: same as -1]                   |
-| **IntraRefreshType**             | --irefresh-type       | [1-2]           | 2           | Intra refresh type [1: FWD Frame (Open GOP), 2: KEY Frame (Closed GOP)]                                         |
-| **SceneChangeDetection**         | --scd                 | [0-1]           | 0           | Scene change detection control                                                                                  |
-| **Lookahead**                    | --lookahead           | [-1,0-120]      | -1          | Number of frames in the future to look ahead, beyond minigop, temporal filtering, and rate control [-1: auto]   |
-| **HierarchicalLevels**           | --hierarchical-levels | [3-5]           | 4           | Set hierarchical levels beyond the base layer [3: 4 temporal layers, 5: 6 temporal layers]                      |
-| **PredStructure**                | --pred-struct         | [0-2]           | 2           | Set prediction structure [0: low delay P-frames, 1: low delay B-frames, 2: random access]                       |
-
-#### AV1 Specific Options
-
-| **Configuration file parameter** | **Command line**     | **Range** | **Default** | **Description**                                                                                                           |
-|----------------------------------|----------------------|-----------|-------------|---------------------------------------------------------------------------------------------------------------------------|
-| **TileRow**                      | --tile-rows          | [0-6]     | 0           | Number of tile rows to use, `TileRow == log2(x)`, default changes per resolution                                          |
-| **TileCol**                      | --tile-columns       | [0-4]     | 0           | Number of tile columns to use, `TileCol == log2(x)`, default changes per resolution                                       |
-| **LoopFilterEnable**             | --enable-dlf         | [0-1]     | 1           | Deblocking loop filter control                                                                                            |
-| **CDEFLevel**                    | --enable-cdef        | [0-1]     | 1           | Enable Constrained Directional Enhancement Filter                                                                         |
-| **EnableRestoration**            | --enable-restoration | [0-1]     | 1           | Enable loop restoration filter                                                                                            |
-| **EnableTPLModel**               | --enable-tpl-la      | [0-1]     | 1           | Temporal Dependency model control, currently forced on library side, only applicable for CRF/CQP                          |
-| **Mfmv**                         | --enable-mfmv        | [-1-1]    | -1          | Motion Field Motion Vector control [-1: auto]                                                                             |
-| **EnableTF**                     | --enable-tf          | [0-1]     | 1           | Enable ALT-REF (temporally filtered) frames                                                                               |
-| **EnableOverlays**               | --enable-overlays    | [0-1]     | 0           | Enable the insertion of overlayer pictures which will be used as an additional reference frame for the base layer picture |
-| **ScreenContentMode**            | --scm                | [0-2]     | 2           | Set screen content detection level [0: off, 1: on, 2: content adaptive]                                                   |
-| **RestrictedMotionVector**       | --rmv                | [0-1]     | 0           | Restrict motion vectors from reaching outside the picture boundary                                                        |
-| **FilmGrain**                    | --film-grain         | [0-50]    | 0           | Enable film grain [0: off, 1-50: level of denoising for film grain]                                                       |
-| **SuperresMode**                 | --superres-mode      | [0-4]     | 0           | Enable super-resolution mode, refer to the super-resolution section below for more info                                   |
-| **SuperresDenom**                | --superres-denom     | [8-16]    | 8           | Super-resolution denominator, only applicable for mode == 1 [8: no scaling, 16: half-scaling]                             |
-| **SuperresKfDenom**              | --superres-kf-denom  | [8-16]    | 8           | Super-resolution denominator for key frames, only applicable for mode == 1 [8: no scaling, 16: half-scaling]              |
-| **SuperresQthres**               | --superres-qthres    | [0-63]    | 43          | Super-resolution q-threshold, only applicable for mode == 3                                                               |
-| **SuperresKfQthres**             | --superres-kf-qthres | [0-63]    | 43          | Super-resolution q-threshold for key frames, only applicable for mode == 3                                                |
-
-##### **Super-Resolution**
-
-Super resolution is better described in [the Super-Resolution documentation](./Appendix-Super-Resolution.md), but
-this basically allows the input to be encoded at a lower resolution, horizontally, but then later upscaled back to
-the original resolution by the decoder.
-
-| **SuperresMode** | **Value**                                                                                                                   |
-|------------------|-----------------------------------------------------------------------------------------------------------------------------|
-| 0                | None, no frame super-resolution allowed                                                                                     |
-| 1                | All frames are encoded at the specified scale of 8/`denom`, thus a `denom` of 8 means no scaling, and 16 means half-scaling |
-| 2                | All frames are coded at a random scale                                                                                      |
-| 3                | Super-resolution scale for a frame is determined based on the q_index, a qthreshold of 63 means no scaling                  |
-| 4                | Automatically select the super-resolution mode for appropriate frames                                                       |
-
-The performance of the encoder will be affected for all modes other than mode 0. And for mode 4, it should be noted that
-the encoder will run at least twice, one for down scaling, and another with no scaling, and then it will choose the best
-one for each of the appropriate frames.
-
-For more information on the decision-making process,
-please look at [section 2.2 of the super-resolution doc](./Appendix-Super-Resolution.md#22-determination-of-the-downscaling-factor)
-
-#### Color Description Options
-
-| **Configuration file parameter** | **Command line**           | **Range**  | **Default** | **Description**                                                                                                                          |
-|----------------------------------|----------------------------|------------|-------------|------------------------------------------------------------------------------------------------------------------------------------------|
-| **ColorPrimaries**               | --color-primaries          | [0-12, 22] | 2           | Color primaries, refer to the user guide Appendix A.2 for full details                                                                   |
-| **TransferCharacteristics**      | --transfer-characteristics | [0-22]     | 2           | Transfer characteristics, refer to the user guide Appendix A.2 for full details                                                          |
-| **MatrixCoefficients**           | --matrix-coefficients      | [0-14]     | 2           | Matrix coefficients, refer to the user guide Appendix A.2 for full details                                                               |
-| **ColorRange**                   | --color-range              | [0-1]      | 0           | Color range [0: Studio, 1: Full]                                                                                                         |
-| **MasteringDisplay**             | --mastering-display        | any string | none        | Mastering display metadata in the format of "G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)", refer to the user guide Appendix A.2 for full details |
-| **ContentLightLevel**            | --content-light            | any string | none        | Set content light level in the format of "max_cll,max_fall", refer to the user guide Appendix A.2 for full details                       |
-
-## Appendix A Encoder Parameters
-
-### 1. Thread management parameters
-
-`LogicalProcessors` (`--lp`) and `TargetSocket` (`--ss`) parameters are used to management thread affinity on Windows and Ubuntu OS. These are some examples how you use them together.
-
-If `LogicalProcessors` and `TargetSocket` are not set, threads are managed by OS thread scheduler.
-
-`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --lp 40`
-
-If only `LogicalProcessors` is set, threads run on 40 logical processors. Threads may run on dual sockets if 40 is larger than logical processor number of a socket.
-
-NOTE: On Windows, thread affinity can be set only by group on system with more than 64 logical processors. So, if 40 is larger than logical processor number of a single socket, threads run on all logical processors of both sockets.
-
-`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --ss 1`
-
-If only `TargetSocket` is set, threads run on all the logical processors of socket 1.
-
-`SvtAv1EncApp.exe -i in.yuv -w 3840 -h 2160 --lp 20 --ss 0`
-
-If both `LogicalProcessors` and `TargetSocket` are set, threads run on 20 logical processors of socket 0. Threads guaranteed to run only on socket 0 if 20 is larger than logical processor number of socket 0.
-
-The (`--pin`) option allows the user to pin/unpin the execution to/from a specific number of cores.
-
-The combinational use of (`--pin`)  with (`--lp`) results in memory reduction while allowing the execution to work on any of the cores and not restrict it to specific cores.
-
-This is an example on how to use them together.
-
-so -lp 4 with --pin 1 would restrict the encoder to work on cpu0-3 and reduce the resource allocation to only what's needed to using 4 cores. --lp 4 with --pin 1, would reduce the allocation to what's needed for 4 cores but not restrict the encoder to run on cpu 0-3, in this case the encoder might end up using more than 4 cores due to the multi-threading nature of the encoder, but would at least allow for more multiple -lp4 encodes to run on the same machine without them being all restricted to run on cpu 0-3 or overflow the memory usage.
-
-Example: 72 core machine:
-
-72 jobs x --lp 1 --pin 0 (In order to maximize the CPU utilization 72 jobs are run simultaneously with each job utilitizing 1 core without being pined to a specific core)
-
-36 jobs x --lp 2 --pin 1
-
-18 jobs x --lp 4 --pin 1
-
-(`--ss`) and (`--pin 0`) is not a valid combination.(`--pin`) is overwritten to 1 when (`-ss`) is used.
-
-### 2. AV1 metadata
-
-Please see the subsection 6.4.2, 6.7.3, and 6.7.4 of the [AV1 Bitstream & Decoding Process Specification](https://aomediacodec.github.io/av1-spec/av1-spec.pdf) for more details on some expected values.
-
-`MasteringDisplay` (`--mastering-display`) and `ContentLightLevel` (`--content-light`) parameters are used to set the mastering display and content light level in the AV1 bitstream.
-
-`MasteringDisplay` takes the format of `G(x,y)B(x,y)R(x,y)WP(x,y)L(max,min)` where
-
-- `G(x,y)` is the green channel of the mastering display
-- `B(x,y)` is the blue channel of the mastering display
-- `R(x,y)` is the red channel of the mastering display
-- `WP(x,y)` is the white point of the mastering display
-- `L(max,min)` is the light level of the mastering display
-
-The `x` and `y` values can be coordinates from 0.0 to 1.0, as specified in CIE 1931 while the min,max values can be floating point values representing candelas per square meter, or nits.
-For the `max,min` values, they are generally specified in the range of 0.0 to 1.0, but there are no constraints on the provided values.
-Invalid values will be clipped accordingly.
-
-`ContentLightLevel` takes the format of `max_cll,max_fall` where both values are integers clipped into a range of 0 to 65535.
-
-Examples:
-
-```bash
-SvtAv1EncApp -i int.y4m -b out.ivf \
-    --mastering-display "G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1)" \
-    --content-light 100,50 \
-    --color-primaries 9 \
-    --transfer-characteristics 16 \
-    --matrix-coefficients 9
-    # Color primary 9 is BT.2020, BT.2100
-    # Transfer characteristic 16 is SMPTE ST 2084, ITU BT.2100 PQ
-    # matrix coefficients 9 is BT.2020 non-constant luminance, BT.2100 YCbCr
-
-# or
-
-ffmpeg -i in.mp4 -strict -1 -f yuv4mpegpipe - |
-  SvtAv1EncApp -i stdin -b stdout \
-    --mastering-display "G(0.2649,0.6900)B(0.1500,0.0600)R(0.6800,0.3200)WP(0.3127,0.3290)L(1000.0,1)" \
-    --content-light 100,50 \
-    --color-primaries 9 \
-    --transfer-characteristics 16 \
-    --matrix-coefficients 9 |
-  ffmpeg -y -i - -i audio.ogg -c copy out.mp4
-```
+`SvtAv1EncApp -i input.yuv -w 1920 -h 1080 --fps 24 --crf 30 --preset 12 --color-primaries bt709 --transfer-characteristics bt709 --matrix-coefficients bt709 -b output.ivf`
diff -pruN 0.9.1+dfsg-1/Docs/System-Requirements.md 1.2.0+dfsg-2/Docs/System-Requirements.md
--- 0.9.1+dfsg-1/Docs/System-Requirements.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Docs/System-Requirements.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,40 @@
+[Top level](../README.md)
+
+# System Requirements
+
+## Operating System
+
+The SVT-AV1 Encoder has been validated to run on Windows and Linux 64-bit
+operating systems and the list below represents the specific OS configurations
+that the encoder application and library were tested and validated on:
+
+- __Windows (64-bit):__
+  - Windows Server 2016
+- __Linux (64-bit):__
+  - Ubuntu 16.04 Server LTS
+  - Ubuntu 18.04 Server LTS
+  - Ubuntu 20.04 Server LTS
+- __Other Unix-like (64-bit):__
+  - MacOS
+
+## Hardware
+
+The SVT-AV1 Encoder library primarily supports the x86 architecture with handwritten
+simd assembly code, however, the encoder can be compiled and run on any architecture
+that a valid C99 compiler can target, with varying limited support for non-x86 CPUs.
+
+- __CPU Requirements__
+
+  In order to achieve the performance targeted by the SVT-AV1 Encoder, a modern
+  x86-64 CPU is highly recommended with at least AVX2 support when running the
+  encoder, however, simd support is extended all the way back to sse2.
+
+- __RAM Requirements__
+
+  The SVT-AV1 Encoder will adapt itself to the system on which it is being run,
+  albeit only to a slight degree. The memory requirements for encoding will
+  depend primarily on the number of cores the encoder will target (`--lp`),
+  the input resolution and bit-depth (`-h`, `-w`, and `--input-depth`), look
+  ahead distance (`--lookahead`), and hierarchical levels (`--hierarchical-levels`).
+  The encoder will display an error if the system lacks enough RAM prior to the
+  start of the encode.
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0001-avcodec-libsvtav1-Fix-duplicate-definition-of-caps_i.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0001-avcodec-libsvtav1-Fix-duplicate-definition-of-caps_i.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0001-avcodec-libsvtav1-Fix-duplicate-definition-of-caps_i.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0001-avcodec-libsvtav1-Fix-duplicate-definition-of-caps_i.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,36 @@
+From 6b1121f90508fa02739fb8fa698e9e47d9949cf5 Mon Sep 17 00:00:00 2001
+From: Limin Wang <lance.lmwang@gmail.com>
+Date: Tue, 24 Aug 2021 18:27:07 +0800
+Subject: [PATCH 01/14] avcodec/libsvtav1: Fix duplicate definition of
+ caps_internal
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reviewed-by: Jan Ekström <jeebjp@gmail.com>
+Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
+---
+ libavcodec/libsvtav1.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index cfd93a2484..138241b736 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -561,12 +561,11 @@ AVCodec ff_libsvtav1_encoder = {
+     .receive_packet = eb_receive_packet,
+     .close          = eb_enc_close,
+     .capabilities   = AV_CODEC_CAP_DELAY | AV_CODEC_CAP_OTHER_THREADS,
+-    .caps_internal  = FF_CODEC_CAP_AUTO_THREADS,
++    .caps_internal  = FF_CODEC_CAP_AUTO_THREADS | FF_CODEC_CAP_INIT_CLEANUP,
+     .pix_fmts       = (const enum AVPixelFormat[]){ AV_PIX_FMT_YUV420P,
+                                                     AV_PIX_FMT_YUV420P10,
+                                                     AV_PIX_FMT_NONE },
+     .priv_class     = &class,
+     .defaults       = eb_enc_defaults,
+-    .caps_internal  = FF_CODEC_CAP_INIT_CLEANUP,
+     .wrapper_name   = "libsvtav1",
+ };
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0002-avcodec-libsvtav1-make-coded-GOP-type-configurable.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0002-avcodec-libsvtav1-make-coded-GOP-type-configurable.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0002-avcodec-libsvtav1-make-coded-GOP-type-configurable.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0002-avcodec-libsvtav1-make-coded-GOP-type-configurable.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,39 @@
+From 46059c7636a8e16a61d41060fa13b15bbe283ad4 Mon Sep 17 00:00:00 2001
+From: Limin Wang <lance.lmwang@gmail.com>
+Date: Fri, 17 Sep 2021 10:02:02 +0800
+Subject: [PATCH 02/14] avcodec/libsvtav1: make coded GOP type configurable
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reviewed-by: Jan Ekström <jeebjp@gmail.com>
+Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
+---
+ libavcodec/libsvtav1.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 138241b736..f57cdba63a 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -210,7 +210,8 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->min_qp_allowed       = avctx->qmin;
+     }
+ 
+-    param->intra_refresh_type       = 2; /* Real keyframes only */
++    /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
++    param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
+ 
+     if (svt_enc->la_depth >= 0)
+         param->look_ahead_distance  = svt_enc->la_depth;
+@@ -545,6 +546,7 @@ static const AVClass class = {
+ 
+ static const AVCodecDefault eb_enc_defaults[] = {
+     { "b",         "7M"    },
++    { "flags",     "+cgop" },
+     { "g",         "-1"    },
+     { "qmin",      "0"     },
+     { "qmax",      "63"    },
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0003-avcodec-libsvtav1-Fix-value-range-for-rc-mode.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0003-avcodec-libsvtav1-Fix-value-range-for-rc-mode.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0003-avcodec-libsvtav1-Fix-value-range-for-rc-mode.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0003-avcodec-libsvtav1-Fix-value-range-for-rc-mode.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,30 @@
+From b8357cd00de896c7f5826544760f7b17ea9ee677 Mon Sep 17 00:00:00 2001
+From: Limin Wang <lance.lmwang@gmail.com>
+Date: Sat, 18 Sep 2021 08:24:17 +0800
+Subject: [PATCH 03/14] avcodec/libsvtav1: Fix value range for rc mode
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Reviewed-by: Jan Ekström <jeebjp@gmail.com>
+Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
+---
+ libavcodec/libsvtav1.c | 2 +-
+ 1 file changed, 1 insertion(+), 1 deletion(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index f57cdba63a..173979756d 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -520,7 +520,7 @@ static const AVOption options[] = {
+ #undef LEVEL
+ 
+     { "rc", "Bit rate control mode", OFFSET(rc_mode),
+-      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 3, VE , "rc"},
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, VE , "rc"},
+         { "cqp", "Constant quantizer", 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },
+         { "vbr", "Variable Bit Rate, use a target bitrate for the entire stream", 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "rc" },
+         { "cvbr", "Constrained Variable Bit Rate, use a target bitrate for each GOP", 0, AV_OPT_TYPE_CONST,{ .i64 = 2 },  INT_MIN, INT_MAX, VE, "rc" },
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0004-avcodec-libsvtav1-properly-enforce-CQP-mode-when-set.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0004-avcodec-libsvtav1-properly-enforce-CQP-mode-when-set.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0004-avcodec-libsvtav1-properly-enforce-CQP-mode-when-set.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0004-avcodec-libsvtav1-properly-enforce-CQP-mode-when-set.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,29 @@
+From 5162294f8af44787c387984749efec887b350335 Mon Sep 17 00:00:00 2001
+From: Limin Wang <lance.lmwang@gmail.com>
+Date: Sat, 25 Sep 2021 22:09:05 +0800
+Subject: [PATCH 04/14] avcodec/libsvtav1: properly enforce CQP mode when set
+ in wrapper
+
+SVT-AV1 seems to have switched their default from CQP to CRF in February,
+so enforce the controlling option accordingly.
+
+Signed-off-by: Limin Wang <lance.lmwang@gmail.com>
+---
+ libavcodec/libsvtav1.c | 1 +
+ 1 file changed, 1 insertion(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 173979756d..24800d1eaf 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -205,6 +205,7 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
+     }
+ 
++    param->enable_tpl_la = !!param->rate_control_mode;
+     if (param->rate_control_mode) {
+         param->max_qp_allowed       = avctx->qmax;
+         param->min_qp_allowed       = avctx->qmin;
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0005-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0005-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0005-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0005-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,121 @@
+From 2ea8a4b4ae29f382ec06c03ce050158af623193d Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Wed, 16 Feb 2022 10:37:02 -0300
+Subject: [PATCH 05/14] avcodec/libsvtav1: add a svtav1-params option to pass a
+ list of key=value parameters
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ doc/encoders.texi      |  4 ++++
+ libavcodec/libsvtav1.c | 52 ++++++++++++++++++++++++++++++------------
+ 2 files changed, 42 insertions(+), 14 deletions(-)
+
+diff --git a/doc/encoders.texi b/doc/encoders.texi
+index a92eb0eb2f..d6d0dc7080 100644
+--- a/doc/encoders.texi
++++ b/doc/encoders.texi
+@@ -1795,6 +1795,10 @@ Set log2 of the number of rows of tiles to use (0-6).
+ @item tile_columns
+ Set log2 of the number of columns of tiles to use (0-4).
+ 
++@item svtav1-params
++Set SVT-AV1 options using a list of @var{key}=@var{value} pairs separated
++by ":". See the SVT-AV1 encoder user guide for a list of accepted parameters.
++
+ @end table
+ 
+ @section libkvazaar
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 24800d1eaf..7c0b034d92 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -60,6 +60,7 @@ typedef struct SvtContext {
+     EOS_STATUS eos_flag;
+ 
+     // User options.
++    AVDictionary *svtav1_opts;
+     int hierarchical_level;
+     int la_depth;
+     int enc_mode;
+@@ -151,6 +152,41 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+ {
+     SvtContext *svt_enc = avctx->priv_data;
+     const AVPixFmtDescriptor *desc;
++    AVDictionaryEntry *en = NULL;
++
++    // Update param from options
++    param->hierarchical_levels      = svt_enc->hierarchical_level;
++    param->enc_mode                 = svt_enc->enc_mode;
++    param->tier                     = svt_enc->tier;
++    param->rate_control_mode        = svt_enc->rc_mode;
++    param->scene_change_detection   = svt_enc->scd;
++    param->qp                       = svt_enc->qp;
++
++    if (svt_enc->la_depth >= 0)
++        param->look_ahead_distance  = svt_enc->la_depth;
++
++    param->tile_columns = svt_enc->tile_columns;
++    param->tile_rows    = svt_enc->tile_rows;
++
++#if SVT_AV1_CHECK_VERSION(0, 9, 1)
++    while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
++        EbErrorType ret = svt_av1_enc_parse_parameter(param, en->key, en->value);
++        if (ret != EB_ErrorNone) {
++            int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
++            av_log(avctx, level, "Error parsing option %s: %s.\n", en->key, en->value);
++            if (avctx->err_recognition & AV_EF_EXPLODE)
++                return AVERROR(EINVAL);
++        }
++    }
++#else
++    if ((en = av_dict_get(svt_enc->svtav1_opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
++        int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
++        av_log(avctx, level, "svt-params needs libavcodec to be compiled with SVT-AV1 "
++                             "headers >= 0.9.1.\n");
++        if (avctx->err_recognition & AV_EF_EXPLODE)
++            return AVERROR(ENOSYS);
++    }
++#endif
+ 
+     param->source_width     = avctx->width;
+     param->source_height    = avctx->height;
+@@ -184,14 +220,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    // Update param from options
+-    param->hierarchical_levels      = svt_enc->hierarchical_level;
+-    param->enc_mode                 = svt_enc->enc_mode;
+-    param->tier                     = svt_enc->tier;
+-    param->rate_control_mode        = svt_enc->rc_mode;
+-    param->scene_change_detection   = svt_enc->scd;
+-    param->qp                       = svt_enc->qp;
+-
+     param->target_bit_rate          = avctx->bit_rate;
+ 
+     if (avctx->gop_size > 0)
+@@ -214,12 +242,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
+     param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
+ 
+-    if (svt_enc->la_depth >= 0)
+-        param->look_ahead_distance  = svt_enc->la_depth;
+-
+-    param->tile_columns = svt_enc->tile_columns;
+-    param->tile_rows    = svt_enc->tile_rows;
+-
+     return 0;
+ }
+ 
+@@ -535,6 +557,8 @@ static const AVOption options[] = {
+     { "tile_columns", "Log2 of number of tile columns to use", OFFSET(tile_columns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, VE},
+     { "tile_rows", "Log2 of number of tile rows to use", OFFSET(tile_rows), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 6, VE},
+ 
++    { "svtav1-params", "Set the SVT-AV1 configuration using a :-separated list of key=value parameters", OFFSET(svtav1_opts), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
++
+     {NULL},
+ };
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0006-avcodec-libsvtav1-update-some-options-and-defaults.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0006-avcodec-libsvtav1-update-some-options-and-defaults.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0006-avcodec-libsvtav1-update-some-options-and-defaults.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0006-avcodec-libsvtav1-update-some-options-and-defaults.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,189 @@
+From c366190368195cd20a36d8cc3b062352681c30ff Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Thu, 24 Feb 2022 20:36:46 -0300
+Subject: [PATCH 06/14] avcodec/libsvtav1: update some options and defaults
+
+And bump the minimum required version to 0.9.0
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ configure              |  2 +-
+ doc/encoders.texi      | 23 ++++--------------
+ libavcodec/libsvtav1.c | 55 +++++++++++++++++++++++++++---------------
+ 3 files changed, 41 insertions(+), 39 deletions(-)
+
+diff --git a/configure b/configure
+index d7a3f507e8..467b529e46 100755
+--- a/configure
++++ b/configure
+@@ -6429,7 +6429,7 @@ enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
+ enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
+ enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+ enabled libsrt            && require_pkg_config libsrt "srt >= 1.3.0" srt/srt.h srt_socket
+-enabled libsvtav1         && require_pkg_config libsvtav1 "SvtAv1Enc >= 0.8.4" EbSvtAv1Enc.h svt_av1_enc_init_handle
++enabled libsvtav1         && require_pkg_config libsvtav1 "SvtAv1Enc >= 0.9.0" EbSvtAv1Enc.h svt_av1_enc_init_handle
+ enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
+ enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
+ enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
+diff --git a/doc/encoders.texi b/doc/encoders.texi
+index d6d0dc7080..8d11cb2d68 100644
+--- a/doc/encoders.texi
++++ b/doc/encoders.texi
+@@ -1754,28 +1754,15 @@ Set the operating point level.
+ @item tier
+ Set the operating point tier.
+ 
+-@item rc
+-Set the rate control mode to use.
+-
+-Possible modes:
+-@table @option
+-@item cqp
+-Constant quantizer: use fixed values of qindex (dependent on the frame type)
+-throughout the stream.  This mode is the default.
+-
+-@item vbr
+-Variable bitrate: use a target bitrate for the whole stream.
+-
+-@item cvbr
+-Constrained variable bitrate: use a target bitrate for each GOP.
+-@end table
+-
+ @item qmax
+ Set the maximum quantizer to use when using a bitrate mode.
+ 
+ @item qmin
+ Set the minimum quantizer to use when using a bitrate mode.
+ 
++@item crf
++Constant rate factor value used in crf rate control mode (0-63).
++
+ @item qp
+ Set the quantizer used in cqp rate control mode (0-63).
+ 
+@@ -1786,8 +1773,8 @@ Enable scene change detection.
+ Set number of frames to look ahead (0-120).
+ 
+ @item preset
+-Set the quality-speed tradeoff, in the range 0 to 8.  Higher values are
+-faster but lower quality.  Defaults to 8 (highest speed).
++Set the quality-speed tradeoff, in the range 0 to 13.  Higher values are
++faster but lower quality.
+ 
+ @item tile_rows
+ Set log2 of the number of rows of tiles to use (0-6).
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 7c0b034d92..5b6fe90bb2 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -64,7 +64,7 @@ typedef struct SvtContext {
+     int hierarchical_level;
+     int la_depth;
+     int enc_mode;
+-    int rc_mode;
++    int crf;
+     int scd;
+     int qp;
+ 
+@@ -156,11 +156,32 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+ 
+     // Update param from options
+     param->hierarchical_levels      = svt_enc->hierarchical_level;
+-    param->enc_mode                 = svt_enc->enc_mode;
++
++    if (svt_enc->enc_mode >= 0)
++        param->enc_mode             = svt_enc->enc_mode;
++
+     param->tier                     = svt_enc->tier;
+-    param->rate_control_mode        = svt_enc->rc_mode;
++
++    if (avctx->bit_rate) {
++        param->target_bit_rate      = avctx->bit_rate;
++        if (avctx->rc_max_rate != avctx->bit_rate)
++            param->rate_control_mode = 1;
++        else
++            param->rate_control_mode = 2;
++    }
++    param->max_bit_rate             = avctx->rc_max_rate;
++    param->vbv_bufsize              = avctx->rc_buffer_size;
++
++    if (svt_enc->crf > 0) {
++        param->qp                   = svt_enc->crf;
++        param->rate_control_mode    = 0;
++        param->enable_tpl_la        = 1;
++    } else if (svt_enc->qp > 0) {
++        param->qp                   = svt_enc->qp;
++        param->rate_control_mode    = 0;
++        param->enable_tpl_la        = 0;
++    }
+     param->scene_change_detection   = svt_enc->scd;
+-    param->qp                       = svt_enc->qp;
+ 
+     if (svt_enc->la_depth >= 0)
+         param->look_ahead_distance  = svt_enc->la_depth;
+@@ -220,8 +241,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    param->target_bit_rate          = avctx->bit_rate;
+-
+     if (avctx->gop_size > 0)
+         param->intra_period_length  = avctx->gop_size - 1;
+ 
+@@ -233,8 +252,8 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
+     }
+ 
+-    param->enable_tpl_la = !!param->rate_control_mode;
+-    if (param->rate_control_mode) {
++    avctx->bit_rate                 = param->target_bit_rate;
++    if (avctx->bit_rate) {
+         param->max_qp_allowed       = avctx->qmax;
+         param->min_qp_allowed       = avctx->qmin;
+     }
+@@ -504,8 +523,8 @@ static const AVOption options[] = {
+     { "la_depth", "Look ahead distance [0, 120]", OFFSET(la_depth),
+       AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 120, VE },
+ 
+-    { "preset", "Encoding preset [0, 8]",
+-      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = MAX_ENC_PRESET }, 0, MAX_ENC_PRESET, VE },
++    { "preset", "Encoding preset",
++      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_ENC_PRESET, VE },
+ 
+     { "tier", "Set operating point tier", OFFSET(tier),
+       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },
+@@ -542,14 +561,10 @@ static const AVOption options[] = {
+         { LEVEL("7.3", 73) },
+ #undef LEVEL
+ 
+-    { "rc", "Bit rate control mode", OFFSET(rc_mode),
+-      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, VE , "rc"},
+-        { "cqp", "Constant quantizer", 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },
+-        { "vbr", "Variable Bit Rate, use a target bitrate for the entire stream", 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "rc" },
+-        { "cvbr", "Constrained Variable Bit Rate, use a target bitrate for each GOP", 0, AV_OPT_TYPE_CONST,{ .i64 = 2 },  INT_MIN, INT_MAX, VE, "rc" },
+-
+-    { "qp", "Quantizer to use with cqp rate control mode", OFFSET(qp),
+-      AV_OPT_TYPE_INT, { .i64 = 50 }, 0, 63, VE },
++    { "crf", "Constant Rate Factor value", OFFSET(crf),
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
++    { "qp", "Initial Quantizer level value", OFFSET(qp),
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
+ 
+     { "sc_detection", "Scene change detection", OFFSET(scd),
+       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+@@ -570,10 +585,10 @@ static const AVClass class = {
+ };
+ 
+ static const AVCodecDefault eb_enc_defaults[] = {
+-    { "b",         "7M"    },
++    { "b",         "0"    },
+     { "flags",     "+cgop" },
+     { "g",         "-1"    },
+-    { "qmin",      "0"     },
++    { "qmin",      "1"     },
+     { "qmax",      "63"    },
+     { NULL },
+ };
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0007-avcodec-libsvtav1-deprecate-some-options.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0007-avcodec-libsvtav1-deprecate-some-options.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0007-avcodec-libsvtav1-deprecate-some-options.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0007-avcodec-libsvtav1-deprecate-some-options.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,142 @@
+From 8eab6f7e701cf178731b95dbe9ada29a0b8bf9d0 Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Sun, 13 Mar 2022 15:16:29 -0300
+Subject: [PATCH 07/14] avcodec/libsvtav1: deprecate some options
+
+svtav1-params should be used for these.
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 56 ++++++++++++++++++++++--------------------
+ libavcodec/version.h   |  1 +
+ 2 files changed, 31 insertions(+), 26 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 5b6fe90bb2..00be46e22b 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -61,17 +61,19 @@ typedef struct SvtContext {
+ 
+     // User options.
+     AVDictionary *svtav1_opts;
++#if FF_API_SVTAV1_OPTS
+     int hierarchical_level;
+     int la_depth;
+-    int enc_mode;
+-    int crf;
+     int scd;
+-    int qp;
+ 
+     int tier;
+ 
+     int tile_columns;
+     int tile_rows;
++#endif
++    int enc_mode;
++    int crf;
++    int qp;
+ } SvtContext;
+ 
+ static const struct {
+@@ -155,13 +157,20 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     AVDictionaryEntry *en = NULL;
+ 
+     // Update param from options
++#if FF_API_SVTAV1_OPTS
+     param->hierarchical_levels      = svt_enc->hierarchical_level;
++    param->tier                     = svt_enc->tier;
++    param->scene_change_detection   = svt_enc->scd;
++    param->tile_columns             = svt_enc->tile_columns;
++    param->tile_rows                = svt_enc->tile_rows;
++
++    if (svt_enc->la_depth >= 0)
++        param->look_ahead_distance  = svt_enc->la_depth;
++#endif
+ 
+     if (svt_enc->enc_mode >= 0)
+         param->enc_mode             = svt_enc->enc_mode;
+ 
+-    param->tier                     = svt_enc->tier;
+-
+     if (avctx->bit_rate) {
+         param->target_bit_rate      = avctx->bit_rate;
+         if (avctx->rc_max_rate != avctx->bit_rate)
+@@ -181,13 +190,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->rate_control_mode    = 0;
+         param->enable_tpl_la        = 0;
+     }
+-    param->scene_change_detection   = svt_enc->scd;
+-
+-    if (svt_enc->la_depth >= 0)
+-        param->look_ahead_distance  = svt_enc->la_depth;
+-
+-    param->tile_columns = svt_enc->tile_columns;
+-    param->tile_rows    = svt_enc->tile_rows;
+ 
+ #if SVT_AV1_CHECK_VERSION(0, 9, 1)
+     while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
+@@ -515,21 +517,22 @@ static av_cold int eb_enc_close(AVCodecContext *avctx)
+ #define OFFSET(x) offsetof(SvtContext, x)
+ #define VE AV_OPT_FLAG_VIDEO_PARAM | AV_OPT_FLAG_ENCODING_PARAM
+ static const AVOption options[] = {
+-    { "hielevel", "Hierarchical prediction levels setting", OFFSET(hierarchical_level),
+-      AV_OPT_TYPE_INT, { .i64 = 4 }, 3, 4, VE , "hielevel"},
++#if FF_API_SVTAV1_OPTS
++    { "hielevel", "Hierarchical prediction levels setting (Deprecated, use svtav1-params)", OFFSET(hierarchical_level),
++      AV_OPT_TYPE_INT, { .i64 = 4 }, 3, 4, VE | AV_OPT_FLAG_DEPRECATED , "hielevel"},
+         { "3level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 3 },  INT_MIN, INT_MAX, VE, "hielevel" },
+         { "4level", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 4 },  INT_MIN, INT_MAX, VE, "hielevel" },
+ 
+-    { "la_depth", "Look ahead distance [0, 120]", OFFSET(la_depth),
+-      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 120, VE },
+-
+-    { "preset", "Encoding preset",
+-      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_ENC_PRESET, VE },
++    { "la_depth", "Look ahead distance [0, 120] (Deprecated, use svtav1-params)", OFFSET(la_depth),
++      AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 120, VE | AV_OPT_FLAG_DEPRECATED },
+ 
+-    { "tier", "Set operating point tier", OFFSET(tier),
+-      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },
++    { "tier", "Set operating point tier (Deprecated, use svtav1-params)", OFFSET(tier),
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE | AV_OPT_FLAG_DEPRECATED, "tier" },
+         { "main", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 0 }, 0, 0, VE, "tier" },
+         { "high", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = 1 }, 0, 0, VE, "tier" },
++#endif
++    { "preset", "Encoding preset",
++      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_ENC_PRESET, VE },
+ 
+     FF_AV1_PROFILE_OPTS
+ 
+@@ -565,12 +568,13 @@ static const AVOption options[] = {
+       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
+     { "qp", "Initial Quantizer level value", OFFSET(qp),
+       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
++#if FF_API_SVTAV1_OPTS
++    { "sc_detection", "Scene change detection (Deprecated, use svtav1-params)", OFFSET(scd),
++      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE | AV_OPT_FLAG_DEPRECATED },
+ 
+-    { "sc_detection", "Scene change detection", OFFSET(scd),
+-      AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+-
+-    { "tile_columns", "Log2 of number of tile columns to use", OFFSET(tile_columns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, VE},
+-    { "tile_rows", "Log2 of number of tile rows to use", OFFSET(tile_rows), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 6, VE},
++    { "tile_columns", "Log2 of number of tile columns to use (Deprecated, use svtav1-params)", OFFSET(tile_columns), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 4, VE | AV_OPT_FLAG_DEPRECATED },
++    { "tile_rows", "Log2 of number of tile rows to use (Deprecated, use svtav1-params)", OFFSET(tile_rows), AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 6, VE | AV_OPT_FLAG_DEPRECATED },
++#endif
+ 
+     { "svtav1-params", "Set the SVT-AV1 configuration using a :-separated list of key=value parameters", OFFSET(svtav1_opts), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
+ 
+diff --git a/libavcodec/version.h b/libavcodec/version.h
+index cfdde46960..6987584670 100644
+--- a/libavcodec/version.h
++++ b/libavcodec/version.h
+@@ -168,5 +168,6 @@
+ #ifndef FF_API_INIT_PACKET
+ #define FF_API_INIT_PACKET         (LIBAVCODEC_VERSION_MAJOR < 60)
+ #endif
++#define FF_API_SVTAV1_OPTS         (LIBAVCODEC_VERSION_MAJOR < 60)
+ 
+ #endif /* AVCODEC_VERSION_H */
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0008-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0008-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0008-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0008-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,32 @@
+From abce9bfb144acc173a7d7d403fcecff915a6e2f1 Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Sun, 13 Mar 2022 15:17:40 -0300
+Subject: [PATCH 08/14] avcodec/libsvtav1: fine tune qp mode settings
+
+As requested in https://gitlab.com/AOMediaCodec/SVT-AV1/-/issues/1829
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 00be46e22b..8c54372065 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -184,11 +184,10 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     if (svt_enc->crf > 0) {
+         param->qp                   = svt_enc->crf;
+         param->rate_control_mode    = 0;
+-        param->enable_tpl_la        = 1;
+     } else if (svt_enc->qp > 0) {
+         param->qp                   = svt_enc->qp;
+         param->rate_control_mode    = 0;
+-        param->enable_tpl_la        = 0;
++        param->enable_adaptive_quantization = 0;
+     }
+ 
+ #if SVT_AV1_CHECK_VERSION(0, 9, 1)
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0009-avcodec-libsvtav1-pass-color-description-info.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0009-avcodec-libsvtav1-pass-color-description-info.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0009-avcodec-libsvtav1-pass-color-description-info.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0009-avcodec-libsvtav1-pass-color-description-info.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,34 @@
+From 695bc602ff1fe5d2bf508431b12204e86fb56644 Mon Sep 17 00:00:00 2001
+From: Christopher Degawa <ccom@randomderp.com>
+Date: Thu, 22 Jul 2021 21:02:10 -0500
+Subject: [PATCH 09/14] avcodec/libsvtav1: pass color description info
+
+Signed-off-by: Christopher Degawa <ccom@randomderp.com>
+---
+ libavcodec/libsvtav1.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 8c54372065..5cc2a6e023 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -227,6 +227,16 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         return AVERROR(EINVAL);
+     }
+ 
++    param->color_primaries          = avctx->color_primaries;
++    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
++                                      AVCOL_SPC_RGB : avctx->colorspace;
++    param->transfer_characteristics = avctx->color_trc;
++
++    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
++        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
++    else
++        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
++
+     if (avctx->profile != FF_PROFILE_UNKNOWN)
+         param->profile = avctx->profile;
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0010-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0010-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0010-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0010-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,129 @@
+From 690f3b794f44d19eaf54fc33dc17fdee9317252d Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Sun, 27 Mar 2022 20:06:51 -0300
+Subject: [PATCH 10/14] avcodec/libsvtav1: give svtav1-params priority over
+ avctx values
+
+If the svt equivalent option to an avctx AVOption is passed by the user
+then it should have priority. The exception are fields like dimensions, bitdepth
+and pixel format, which must match what lavc will feed the encoder after init.
+
+This addresses libsvt-av1 issue #1858.
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 69 +++++++++++++++++++++---------------------
+ 1 file changed, 34 insertions(+), 35 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 5cc2a6e023..ef9ae00235 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -177,6 +177,9 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+             param->rate_control_mode = 1;
+         else
+             param->rate_control_mode = 2;
++
++        param->max_qp_allowed       = avctx->qmax;
++        param->min_qp_allowed       = avctx->qmin;
+     }
+     param->max_bit_rate             = avctx->rc_max_rate;
+     param->vbv_bufsize              = avctx->rc_buffer_size;
+@@ -190,6 +193,37 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->enable_adaptive_quantization = 0;
+     }
+ 
++    desc = av_pix_fmt_desc_get(avctx->pix_fmt);
++    param->color_primaries          = avctx->color_primaries;
++    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
++                                      AVCOL_SPC_RGB : avctx->colorspace;
++    param->transfer_characteristics = avctx->color_trc;
++
++    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
++        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
++    else
++        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
++
++    if (avctx->profile != FF_PROFILE_UNKNOWN)
++        param->profile = avctx->profile;
++
++    if (avctx->level != FF_LEVEL_UNKNOWN)
++        param->level = avctx->level;
++
++    if (avctx->gop_size > 0)
++        param->intra_period_length  = avctx->gop_size - 1;
++
++    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
++        param->frame_rate_numerator   = avctx->framerate.num;
++        param->frame_rate_denominator = avctx->framerate.den;
++    } else {
++        param->frame_rate_numerator   = avctx->time_base.den;
++        param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
++    }
++
++    /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
++    param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
++
+ #if SVT_AV1_CHECK_VERSION(0, 9, 1)
+     while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
+         EbErrorType ret = svt_av1_enc_parse_parameter(param, en->key, en->value);
+@@ -213,7 +247,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     param->source_width     = avctx->width;
+     param->source_height    = avctx->height;
+ 
+-    desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+     param->encoder_bit_depth = desc->comp[0].depth;
+ 
+     if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1)
+@@ -227,22 +260,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         return AVERROR(EINVAL);
+     }
+ 
+-    param->color_primaries          = avctx->color_primaries;
+-    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
+-                                      AVCOL_SPC_RGB : avctx->colorspace;
+-    param->transfer_characteristics = avctx->color_trc;
+-
+-    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
+-        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
+-    else
+-        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+-
+-    if (avctx->profile != FF_PROFILE_UNKNOWN)
+-        param->profile = avctx->profile;
+-
+-    if (avctx->level != FF_LEVEL_UNKNOWN)
+-        param->level = avctx->level;
+-
+     if ((param->encoder_color_format == EB_YUV422 || param->encoder_bit_depth > 10)
+          && param->profile != FF_PROFILE_AV1_PROFESSIONAL ) {
+         av_log(avctx, AV_LOG_WARNING, "Forcing Professional profile\n");
+@@ -252,25 +269,7 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    if (avctx->gop_size > 0)
+-        param->intra_period_length  = avctx->gop_size - 1;
+-
+-    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+-        param->frame_rate_numerator   = avctx->framerate.num;
+-        param->frame_rate_denominator = avctx->framerate.den;
+-    } else {
+-        param->frame_rate_numerator   = avctx->time_base.den;
+-        param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
+-    }
+-
+     avctx->bit_rate                 = param->target_bit_rate;
+-    if (avctx->bit_rate) {
+-        param->max_qp_allowed       = avctx->qmax;
+-        param->min_qp_allowed       = avctx->qmin;
+-    }
+-
+-    /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
+-    param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0011-avcodec-libsvtav1-pass-pict_type-to-library.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0011-avcodec-libsvtav1-pass-pict_type-to-library.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0011-avcodec-libsvtav1-pass-pict_type-to-library.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0011-avcodec-libsvtav1-pass-pict_type-to-library.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,41 @@
+From 409bc75030c577b79c5655e5ba1a0ba6d2526327 Mon Sep 17 00:00:00 2001
+From: Christopher Degawa <ccom@randomderp.com>
+Date: Mon, 25 Apr 2022 17:54:38 -0500
+Subject: [PATCH 11/14] avcodec/libsvtav1: pass pict_type to library
+
+match the behavior of SvtAv1EncApp to ensure pic_type is always set
+before passing it to the library.
+
+The other options for pic_type aren't currently used inside the library,
+so they aren't introduced in this patch.
+
+Signed-off-by: Christopher Degawa <ccom@randomderp.com>
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index ef9ae00235..333b22ca4c 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -403,6 +403,16 @@ static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+     headerPtr->p_app_private = NULL;
+     headerPtr->pts           = frame->pts;
+ 
++    switch (frame->pict_type) {
++    case AV_PICTURE_TYPE_I:
++        headerPtr->pic_type = EB_AV1_KEY_PICTURE;
++        break;
++    default:
++        // Actually means auto, or default.
++        headerPtr->pic_type = EB_AV1_INVALID_PICTURE;
++        break;
++    }
++
+     svt_av1_enc_send_picture(svt_enc->svt_handle, headerPtr);
+ 
+     return 0;
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0012-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0012-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0012-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0012-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,52 @@
+From bb022d8d110bbd04d840d16fb313a5c664ac5d0b Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 25 Apr 2022 23:35:17 +0300
+Subject: [PATCH 12/14] avcodec/libsvtav1: add support for setting chroma
+ sample location
+
+Support for configuring this was added with version 1.0.0.
+---
+ libavcodec/libsvtav1.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 333b22ca4c..caff1299b8 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -204,6 +204,33 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     else
+         param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+ 
++#if SVT_AV1_CHECK_VERSION(1, 0, 0)
++    if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) {
++        const char *name =
++            av_chroma_location_name(avctx->chroma_sample_location);
++
++        switch (avctx->chroma_sample_location) {
++        case AVCHROMA_LOC_LEFT:
++            param->chroma_sample_position = EB_CSP_VERTICAL;
++            break;
++        case AVCHROMA_LOC_TOPLEFT:
++            param->chroma_sample_position = EB_CSP_COLOCATED;
++            break;
++        default:
++            if (!name)
++                break;
++
++            av_log(avctx, AV_LOG_WARNING,
++                   "Specified chroma sample location %s is unsupported "
++                   "on the AV1 bit stream level. Usage of a container that "
++                   "allows passing this information - such as Matroska - "
++                   "is recommended.\n",
++                   name);
++            break;
++        }
++    }
++#endif
++
+     if (avctx->profile != FF_PROFILE_UNKNOWN)
+         param->profile = avctx->profile;
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0013-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0013-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0013-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0013-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,30 @@
+From d668abda52fca482f6c74bc0441fbe1691cf5efa Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 9 May 2022 21:21:33 +0300
+Subject: [PATCH 13/14] avcodec/libsvtav1: update avctx bit rate according to
+ RC mode
+
+This way we can filter out the default value for this member, which
+is nonzero. Bases on the current affairs that bit rate based rate
+control is nonzero in SVT-AV1.
+---
+ libavcodec/libsvtav1.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index caff1299b8..8fc506c29d 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -296,7 +296,8 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    avctx->bit_rate                 = param->target_bit_rate;
++    avctx->bit_rate = param->rate_control_mode > 0 ?
++                      param->target_bit_rate : 0;
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0014-avcodec-libsvtav1-signal-CPB-properties-through-side.patch 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0014-avcodec-libsvtav1-signal-CPB-properties-through-side.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n4.4/0014-avcodec-libsvtav1-signal-CPB-properties-through-side.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n4.4/0014-avcodec-libsvtav1-signal-CPB-properties-through-side.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,46 @@
+From ac755bb2189e7f471c3c863082790324274f1b8a Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 9 May 2022 21:29:44 +0300
+Subject: [PATCH 14/14] avcodec/libsvtav1: signal CPB properties through side
+ data
+
+This way values such as maxrate/bufsize can be utilized further
+down the chain.
+
+First, syncs up the max_rate and buffer_size from SVT-AV1 back to
+avctx, and then in case at least one of the utilized values is
+nonzero, adds the CPB properties side data.
+---
+ libavcodec/libsvtav1.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 8fc506c29d..7ed1dd14a5 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -296,8 +296,20 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    avctx->bit_rate = param->rate_control_mode > 0 ?
+-                      param->target_bit_rate : 0;
++    avctx->bit_rate       = param->rate_control_mode > 0 ?
++                            param->target_bit_rate : 0;
++    avctx->rc_max_rate    = param->max_bit_rate;
++    avctx->rc_buffer_size = param->vbv_bufsize;
++
++    if (avctx->bit_rate || avctx->rc_max_rate || avctx->rc_buffer_size) {
++        AVCPBProperties *cpb_props = ff_add_cpb_side_data(avctx);
++        if (!cpb_props)
++            return AVERROR(ENOMEM);
++
++        cpb_props->buffer_size = avctx->rc_buffer_size;
++        cpb_props->max_bitrate = avctx->rc_max_rate;
++        cpb_props->avg_bitrate = avctx->bit_rate;
++    }
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0001-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0001-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0001-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0001-avcodec-libsvtav1-add-a-svtav1-params-option-to-pass.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,121 @@
+From 069b42f6dbdd65ee1409d0a0315d39d83759e16f Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Wed, 16 Feb 2022 10:37:02 -0300
+Subject: [PATCH 1/9] avcodec/libsvtav1: add a svtav1-params option to pass a
+ list of key=value parameters
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ doc/encoders.texi      |  4 ++++
+ libavcodec/libsvtav1.c | 52 ++++++++++++++++++++++++++++++------------
+ 2 files changed, 42 insertions(+), 14 deletions(-)
+
+diff --git a/doc/encoders.texi b/doc/encoders.texi
+index e3b61de5a1..096ee8625b 100644
+--- a/doc/encoders.texi
++++ b/doc/encoders.texi
+@@ -1816,6 +1816,10 @@ Set log2 of the number of rows of tiles to use (0-6).
+ @item tile_columns
+ Set log2 of the number of columns of tiles to use (0-4).
+ 
++@item svtav1-params
++Set SVT-AV1 options using a list of @var{key}=@var{value} pairs separated
++by ":". See the SVT-AV1 encoder user guide for a list of accepted parameters.
++
+ @end table
+ 
+ @section libkvazaar
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 6196da25e6..ce3b0ba7ac 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -60,6 +60,7 @@ typedef struct SvtContext {
+     EOS_STATUS eos_flag;
+ 
+     // User options.
++    AVDictionary *svtav1_opts;
+     int hierarchical_level;
+     int la_depth;
+     int enc_mode;
+@@ -151,6 +152,41 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+ {
+     SvtContext *svt_enc = avctx->priv_data;
+     const AVPixFmtDescriptor *desc;
++    AVDictionaryEntry *en = NULL;
++
++    // Update param from options
++    param->hierarchical_levels      = svt_enc->hierarchical_level;
++    param->enc_mode                 = svt_enc->enc_mode;
++    param->tier                     = svt_enc->tier;
++    param->rate_control_mode        = svt_enc->rc_mode;
++    param->scene_change_detection   = svt_enc->scd;
++    param->qp                       = svt_enc->qp;
++
++    if (svt_enc->la_depth >= 0)
++        param->look_ahead_distance  = svt_enc->la_depth;
++
++    param->tile_columns = svt_enc->tile_columns;
++    param->tile_rows    = svt_enc->tile_rows;
++
++#if SVT_AV1_CHECK_VERSION(0, 9, 1)
++    while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
++        EbErrorType ret = svt_av1_enc_parse_parameter(param, en->key, en->value);
++        if (ret != EB_ErrorNone) {
++            int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
++            av_log(avctx, level, "Error parsing option %s: %s.\n", en->key, en->value);
++            if (avctx->err_recognition & AV_EF_EXPLODE)
++                return AVERROR(EINVAL);
++        }
++    }
++#else
++    if ((en = av_dict_get(svt_enc->svtav1_opts, "", NULL, AV_DICT_IGNORE_SUFFIX))) {
++        int level = (avctx->err_recognition & AV_EF_EXPLODE) ? AV_LOG_ERROR : AV_LOG_WARNING;
++        av_log(avctx, level, "svt-params needs libavcodec to be compiled with SVT-AV1 "
++                             "headers >= 0.9.1.\n");
++        if (avctx->err_recognition & AV_EF_EXPLODE)
++            return AVERROR(ENOSYS);
++    }
++#endif
+ 
+     param->source_width     = avctx->width;
+     param->source_height    = avctx->height;
+@@ -184,14 +220,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    // Update param from options
+-    param->hierarchical_levels      = svt_enc->hierarchical_level;
+-    param->enc_mode                 = svt_enc->enc_mode;
+-    param->tier                     = svt_enc->tier;
+-    param->rate_control_mode        = svt_enc->rc_mode;
+-    param->scene_change_detection   = svt_enc->scd;
+-    param->qp                       = svt_enc->qp;
+-
+     param->target_bit_rate          = avctx->bit_rate;
+ 
+     if (avctx->gop_size > 0)
+@@ -214,12 +242,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
+     param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
+ 
+-    if (svt_enc->la_depth >= 0)
+-        param->look_ahead_distance  = svt_enc->la_depth;
+-
+-    param->tile_columns = svt_enc->tile_columns;
+-    param->tile_rows    = svt_enc->tile_rows;
+-
+     return 0;
+ }
+ 
+@@ -535,6 +557,8 @@ static const AVOption options[] = {
+     { "tile_columns", "Log2 of number of tile columns to use", OFFSET(tile_columns), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 4, VE},
+     { "tile_rows", "Log2 of number of tile rows to use", OFFSET(tile_rows), AV_OPT_TYPE_INT, {.i64 = 0}, 0, 6, VE},
+ 
++    { "svtav1-params", "Set the SVT-AV1 configuration using a :-separated list of key=value parameters", OFFSET(svtav1_opts), AV_OPT_TYPE_DICT, { 0 }, 0, 0, VE },
++
+     {NULL},
+ };
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0002-avcodec-libsvtav1-update-some-options-and-defaults.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0002-avcodec-libsvtav1-update-some-options-and-defaults.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0002-avcodec-libsvtav1-update-some-options-and-defaults.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0002-avcodec-libsvtav1-update-some-options-and-defaults.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,189 @@
+From 3c99da58276c6d947caf27dc3427290a42d18ceb Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Thu, 24 Feb 2022 20:36:46 -0300
+Subject: [PATCH 2/9] avcodec/libsvtav1: update some options and defaults
+
+And bump the minimum required version to 0.9.0
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ configure              |  2 +-
+ doc/encoders.texi      | 23 ++++--------------
+ libavcodec/libsvtav1.c | 55 +++++++++++++++++++++++++++---------------
+ 3 files changed, 41 insertions(+), 39 deletions(-)
+
+diff --git a/configure b/configure
+index 6b5ef6332e..a1d74b9ed6 100755
+--- a/configure
++++ b/configure
+@@ -6604,7 +6604,7 @@ enabled libsoxr           && require libsoxr soxr.h soxr_create -lsoxr
+ enabled libssh            && require_pkg_config libssh libssh libssh/sftp.h sftp_init
+ enabled libspeex          && require_pkg_config libspeex speex speex/speex.h speex_decoder_init
+ enabled libsrt            && require_pkg_config libsrt "srt >= 1.3.0" srt/srt.h srt_socket
+-enabled libsvtav1         && require_pkg_config libsvtav1 "SvtAv1Enc >= 0.8.4" EbSvtAv1Enc.h svt_av1_enc_init_handle
++enabled libsvtav1         && require_pkg_config libsvtav1 "SvtAv1Enc >= 0.9.0" EbSvtAv1Enc.h svt_av1_enc_init_handle
+ enabled libtensorflow     && require libtensorflow tensorflow/c/c_api.h TF_Version -ltensorflow
+ enabled libtesseract      && require_pkg_config libtesseract tesseract tesseract/capi.h TessBaseAPICreate
+ enabled libtheora         && require libtheora theora/theoraenc.h th_info_init -ltheoraenc -ltheoradec -logg
+diff --git a/doc/encoders.texi b/doc/encoders.texi
+index 096ee8625b..dce61b4938 100644
+--- a/doc/encoders.texi
++++ b/doc/encoders.texi
+@@ -1775,28 +1775,15 @@ This is the default.
+ @item high
+ @end table
+ 
+-@item rc
+-Set the rate control mode to use.
+-
+-Possible modes:
+-@table @option
+-@item cqp
+-Constant quantizer: use fixed values of qindex (dependent on the frame type)
+-throughout the stream.  This mode is the default.
+-
+-@item vbr
+-Variable bitrate: use a target bitrate for the whole stream.
+-
+-@item cvbr
+-Constrained variable bitrate: use a target bitrate for each GOP.
+-@end table
+-
+ @item qmax
+ Set the maximum quantizer to use when using a bitrate mode.
+ 
+ @item qmin
+ Set the minimum quantizer to use when using a bitrate mode.
+ 
++@item crf
++Constant rate factor value used in crf rate control mode (0-63).
++
+ @item qp
+ Set the quantizer used in cqp rate control mode (0-63).
+ 
+@@ -1807,8 +1794,8 @@ Enable scene change detection.
+ Set number of frames to look ahead (0-120).
+ 
+ @item preset
+-Set the quality-speed tradeoff, in the range 0 to 8.  Higher values are
+-faster but lower quality.  Defaults to 8 (highest speed).
++Set the quality-speed tradeoff, in the range 0 to 13.  Higher values are
++faster but lower quality.
+ 
+ @item tile_rows
+ Set log2 of the number of rows of tiles to use (0-6).
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index ce3b0ba7ac..fecb9f4aaa 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -64,7 +64,7 @@ typedef struct SvtContext {
+     int hierarchical_level;
+     int la_depth;
+     int enc_mode;
+-    int rc_mode;
++    int crf;
+     int scd;
+     int qp;
+ 
+@@ -156,11 +156,32 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+ 
+     // Update param from options
+     param->hierarchical_levels      = svt_enc->hierarchical_level;
+-    param->enc_mode                 = svt_enc->enc_mode;
++
++    if (svt_enc->enc_mode >= 0)
++        param->enc_mode             = svt_enc->enc_mode;
++
+     param->tier                     = svt_enc->tier;
+-    param->rate_control_mode        = svt_enc->rc_mode;
++
++    if (avctx->bit_rate) {
++        param->target_bit_rate      = avctx->bit_rate;
++        if (avctx->rc_max_rate != avctx->bit_rate)
++            param->rate_control_mode = 1;
++        else
++            param->rate_control_mode = 2;
++    }
++    param->max_bit_rate             = avctx->rc_max_rate;
++    param->vbv_bufsize              = avctx->rc_buffer_size;
++
++    if (svt_enc->crf > 0) {
++        param->qp                   = svt_enc->crf;
++        param->rate_control_mode    = 0;
++        param->enable_tpl_la        = 1;
++    } else if (svt_enc->qp > 0) {
++        param->qp                   = svt_enc->qp;
++        param->rate_control_mode    = 0;
++        param->enable_tpl_la        = 0;
++    }
+     param->scene_change_detection   = svt_enc->scd;
+-    param->qp                       = svt_enc->qp;
+ 
+     if (svt_enc->la_depth >= 0)
+         param->look_ahead_distance  = svt_enc->la_depth;
+@@ -220,8 +241,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    param->target_bit_rate          = avctx->bit_rate;
+-
+     if (avctx->gop_size > 0)
+         param->intra_period_length  = avctx->gop_size - 1;
+ 
+@@ -233,8 +252,8 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
+     }
+ 
+-    param->enable_tpl_la = !!param->rate_control_mode;
+-    if (param->rate_control_mode) {
++    avctx->bit_rate                 = param->target_bit_rate;
++    if (avctx->bit_rate) {
+         param->max_qp_allowed       = avctx->qmax;
+         param->min_qp_allowed       = avctx->qmin;
+     }
+@@ -504,8 +523,8 @@ static const AVOption options[] = {
+     { "la_depth", "Look ahead distance [0, 120]", OFFSET(la_depth),
+       AV_OPT_TYPE_INT, { .i64 = -1 }, -1, 120, VE },
+ 
+-    { "preset", "Encoding preset [0, 8]",
+-      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = MAX_ENC_PRESET }, 0, MAX_ENC_PRESET, VE },
++    { "preset", "Encoding preset",
++      OFFSET(enc_mode), AV_OPT_TYPE_INT, { .i64 = -1 }, -1, MAX_ENC_PRESET, VE },
+ 
+     { "tier", "Set operating point tier", OFFSET(tier),
+       AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 1, VE, "tier" },
+@@ -542,14 +561,10 @@ static const AVOption options[] = {
+         { LEVEL("7.3", 73) },
+ #undef LEVEL
+ 
+-    { "rc", "Bit rate control mode", OFFSET(rc_mode),
+-      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 2, VE , "rc"},
+-        { "cqp", "Constant quantizer", 0, AV_OPT_TYPE_CONST, { .i64 = 0 },  INT_MIN, INT_MAX, VE, "rc" },
+-        { "vbr", "Variable Bit Rate, use a target bitrate for the entire stream", 0, AV_OPT_TYPE_CONST, { .i64 = 1 },  INT_MIN, INT_MAX, VE, "rc" },
+-        { "cvbr", "Constrained Variable Bit Rate, use a target bitrate for each GOP", 0, AV_OPT_TYPE_CONST,{ .i64 = 2 },  INT_MIN, INT_MAX, VE, "rc" },
+-
+-    { "qp", "Quantizer to use with cqp rate control mode", OFFSET(qp),
+-      AV_OPT_TYPE_INT, { .i64 = 50 }, 0, 63, VE },
++    { "crf", "Constant Rate Factor value", OFFSET(crf),
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
++    { "qp", "Initial Quantizer level value", OFFSET(qp),
++      AV_OPT_TYPE_INT, { .i64 = 0 }, 0, 63, VE },
+ 
+     { "sc_detection", "Scene change detection", OFFSET(scd),
+       AV_OPT_TYPE_BOOL, { .i64 = 0 }, 0, 1, VE },
+@@ -570,10 +585,10 @@ static const AVClass class = {
+ };
+ 
+ static const AVCodecDefault eb_enc_defaults[] = {
+-    { "b",         "7M"    },
++    { "b",         "0"    },
+     { "flags",     "+cgop" },
+     { "g",         "-1"    },
+-    { "qmin",      "0"     },
++    { "qmin",      "1"     },
+     { "qmax",      "63"    },
+     { NULL },
+ };
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0003-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0003-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0003-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0003-avcodec-libsvtav1-fine-tune-qp-mode-settings.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,32 @@
+From 50e1556c45f6e2f8347d66157d8e729b50b6e190 Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Sun, 13 Mar 2022 15:17:40 -0300
+Subject: [PATCH 3/9] avcodec/libsvtav1: fine tune qp mode settings
+
+As requested in https://gitlab.com/AOMediaCodec/SVT-AV1/-/issues/1829
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 3 +--
+ 1 file changed, 1 insertion(+), 2 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index fecb9f4aaa..826a3c084b 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -175,11 +175,10 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     if (svt_enc->crf > 0) {
+         param->qp                   = svt_enc->crf;
+         param->rate_control_mode    = 0;
+-        param->enable_tpl_la        = 1;
+     } else if (svt_enc->qp > 0) {
+         param->qp                   = svt_enc->qp;
+         param->rate_control_mode    = 0;
+-        param->enable_tpl_la        = 0;
++        param->enable_adaptive_quantization = 0;
+     }
+     param->scene_change_detection   = svt_enc->scd;
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0004-avcodec-libsvtav1-pass-color-description-info.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0004-avcodec-libsvtav1-pass-color-description-info.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0004-avcodec-libsvtav1-pass-color-description-info.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0004-avcodec-libsvtav1-pass-color-description-info.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,34 @@
+From c9fdbcfa78118a868ac51301fc34e2b1ce371fd2 Mon Sep 17 00:00:00 2001
+From: Christopher Degawa <ccom@randomderp.com>
+Date: Thu, 22 Jul 2021 21:02:10 -0500
+Subject: [PATCH 4/9] avcodec/libsvtav1: pass color description info
+
+Signed-off-by: Christopher Degawa <ccom@randomderp.com>
+---
+ libavcodec/libsvtav1.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 826a3c084b..49d3178fab 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -225,6 +225,16 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         return AVERROR(EINVAL);
+     }
+ 
++    param->color_primaries          = avctx->color_primaries;
++    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
++                                      AVCOL_SPC_RGB : avctx->colorspace;
++    param->transfer_characteristics = avctx->color_trc;
++
++    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
++        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
++    else
++        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
++
+     if (avctx->profile != FF_PROFILE_UNKNOWN)
+         param->profile = avctx->profile;
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0005-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0005-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0005-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0005-avcodec-libsvtav1-give-svtav1-params-priority-over-a.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,129 @@
+From f579c1aca11da1664c4cebc9e2baa84722725681 Mon Sep 17 00:00:00 2001
+From: James Almer <jamrial@gmail.com>
+Date: Sun, 27 Mar 2022 20:06:51 -0300
+Subject: [PATCH 5/9] avcodec/libsvtav1: give svtav1-params priority over avctx
+ values
+
+If the svt equivalent option to an avctx AVOption is passed by the user
+then it should have priority. The exception are fields like dimensions, bitdepth
+and pixel format, which must match what lavc will feed the encoder after init.
+
+This addresses libsvt-av1 issue #1858.
+
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 69 +++++++++++++++++++++---------------------
+ 1 file changed, 34 insertions(+), 35 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 49d3178fab..dc2da06e80 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -168,6 +168,9 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+             param->rate_control_mode = 1;
+         else
+             param->rate_control_mode = 2;
++
++        param->max_qp_allowed       = avctx->qmax;
++        param->min_qp_allowed       = avctx->qmin;
+     }
+     param->max_bit_rate             = avctx->rc_max_rate;
+     param->vbv_bufsize              = avctx->rc_buffer_size;
+@@ -188,6 +191,37 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     param->tile_columns = svt_enc->tile_columns;
+     param->tile_rows    = svt_enc->tile_rows;
+ 
++    desc = av_pix_fmt_desc_get(avctx->pix_fmt);
++    param->color_primaries          = avctx->color_primaries;
++    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
++                                      AVCOL_SPC_RGB : avctx->colorspace;
++    param->transfer_characteristics = avctx->color_trc;
++
++    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
++        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
++    else
++        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
++
++    if (avctx->profile != FF_PROFILE_UNKNOWN)
++        param->profile = avctx->profile;
++
++    if (avctx->level != FF_LEVEL_UNKNOWN)
++        param->level = avctx->level;
++
++    if (avctx->gop_size > 0)
++        param->intra_period_length  = avctx->gop_size - 1;
++
++    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
++        param->frame_rate_numerator   = avctx->framerate.num;
++        param->frame_rate_denominator = avctx->framerate.den;
++    } else {
++        param->frame_rate_numerator   = avctx->time_base.den;
++        param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
++    }
++
++    /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
++    param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
++
+ #if SVT_AV1_CHECK_VERSION(0, 9, 1)
+     while ((en = av_dict_get(svt_enc->svtav1_opts, "", en, AV_DICT_IGNORE_SUFFIX))) {
+         EbErrorType ret = svt_av1_enc_parse_parameter(param, en->key, en->value);
+@@ -211,7 +245,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     param->source_width     = avctx->width;
+     param->source_height    = avctx->height;
+ 
+-    desc = av_pix_fmt_desc_get(avctx->pix_fmt);
+     param->encoder_bit_depth = desc->comp[0].depth;
+ 
+     if (desc->log2_chroma_w == 1 && desc->log2_chroma_h == 1)
+@@ -225,22 +258,6 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         return AVERROR(EINVAL);
+     }
+ 
+-    param->color_primaries          = avctx->color_primaries;
+-    param->matrix_coefficients      = (desc->flags & AV_PIX_FMT_FLAG_RGB) ?
+-                                      AVCOL_SPC_RGB : avctx->colorspace;
+-    param->transfer_characteristics = avctx->color_trc;
+-
+-    if (avctx->color_range != AVCOL_RANGE_UNSPECIFIED)
+-        param->color_range = avctx->color_range == AVCOL_RANGE_JPEG;
+-    else
+-        param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+-
+-    if (avctx->profile != FF_PROFILE_UNKNOWN)
+-        param->profile = avctx->profile;
+-
+-    if (avctx->level != FF_LEVEL_UNKNOWN)
+-        param->level = avctx->level;
+-
+     if ((param->encoder_color_format == EB_YUV422 || param->encoder_bit_depth > 10)
+          && param->profile != FF_PROFILE_AV1_PROFESSIONAL ) {
+         av_log(avctx, AV_LOG_WARNING, "Forcing Professional profile\n");
+@@ -250,25 +267,7 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    if (avctx->gop_size > 0)
+-        param->intra_period_length  = avctx->gop_size - 1;
+-
+-    if (avctx->framerate.num > 0 && avctx->framerate.den > 0) {
+-        param->frame_rate_numerator   = avctx->framerate.num;
+-        param->frame_rate_denominator = avctx->framerate.den;
+-    } else {
+-        param->frame_rate_numerator   = avctx->time_base.den;
+-        param->frame_rate_denominator = avctx->time_base.num * avctx->ticks_per_frame;
+-    }
+-
+     avctx->bit_rate                 = param->target_bit_rate;
+-    if (avctx->bit_rate) {
+-        param->max_qp_allowed       = avctx->qmax;
+-        param->min_qp_allowed       = avctx->qmin;
+-    }
+-
+-    /* 2 = IDR, closed GOP, 1 = CRA, open GOP */
+-    param->intra_refresh_type = avctx->flags & AV_CODEC_FLAG_CLOSED_GOP ? 2 : 1;
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0006-avcodec-libsvtav1-pass-pict_type-to-library.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0006-avcodec-libsvtav1-pass-pict_type-to-library.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0006-avcodec-libsvtav1-pass-pict_type-to-library.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0006-avcodec-libsvtav1-pass-pict_type-to-library.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,41 @@
+From e10d57e825aabde0a1c4e708589359ba1ee5bee4 Mon Sep 17 00:00:00 2001
+From: Christopher Degawa <ccom@randomderp.com>
+Date: Mon, 25 Apr 2022 17:54:38 -0500
+Subject: [PATCH 6/9] avcodec/libsvtav1: pass pict_type to library
+
+match the behavior of SvtAv1EncApp to ensure pic_type is always set
+before passing it to the library.
+
+The other options for pic_type aren't currently used inside the library,
+so they aren't introduced in this patch.
+
+Signed-off-by: Christopher Degawa <ccom@randomderp.com>
+Signed-off-by: James Almer <jamrial@gmail.com>
+---
+ libavcodec/libsvtav1.c | 10 ++++++++++
+ 1 file changed, 10 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index dc2da06e80..ccb05cddfe 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -401,6 +401,16 @@ static int eb_send_frame(AVCodecContext *avctx, const AVFrame *frame)
+     headerPtr->p_app_private = NULL;
+     headerPtr->pts           = frame->pts;
+ 
++    switch (frame->pict_type) {
++    case AV_PICTURE_TYPE_I:
++        headerPtr->pic_type = EB_AV1_KEY_PICTURE;
++        break;
++    default:
++        // Actually means auto, or default.
++        headerPtr->pic_type = EB_AV1_INVALID_PICTURE;
++        break;
++    }
++
+     svt_av1_enc_send_picture(svt_enc->svt_handle, headerPtr);
+ 
+     return 0;
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0007-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0007-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0007-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0007-avcodec-libsvtav1-add-support-for-setting-chroma-sam.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,52 @@
+From d585bfeba9ba746f7ead27dc761a1b2a0967d55d Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 25 Apr 2022 23:35:17 +0300
+Subject: [PATCH 7/9] avcodec/libsvtav1: add support for setting chroma sample
+ location
+
+Support for configuring this was added with version 1.0.0.
+---
+ libavcodec/libsvtav1.c | 27 +++++++++++++++++++++++++++
+ 1 file changed, 27 insertions(+)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index ccb05cddfe..8eb3d07843 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -202,6 +202,33 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+     else
+         param->color_range = !!(desc->flags & AV_PIX_FMT_FLAG_RGB);
+ 
++#if SVT_AV1_CHECK_VERSION(1, 0, 0)
++    if (avctx->chroma_sample_location != AVCHROMA_LOC_UNSPECIFIED) {
++        const char *name =
++            av_chroma_location_name(avctx->chroma_sample_location);
++
++        switch (avctx->chroma_sample_location) {
++        case AVCHROMA_LOC_LEFT:
++            param->chroma_sample_position = EB_CSP_VERTICAL;
++            break;
++        case AVCHROMA_LOC_TOPLEFT:
++            param->chroma_sample_position = EB_CSP_COLOCATED;
++            break;
++        default:
++            if (!name)
++                break;
++
++            av_log(avctx, AV_LOG_WARNING,
++                   "Specified chroma sample location %s is unsupported "
++                   "on the AV1 bit stream level. Usage of a container that "
++                   "allows passing this information - such as Matroska - "
++                   "is recommended.\n",
++                   name);
++            break;
++        }
++    }
++#endif
++
+     if (avctx->profile != FF_PROFILE_UNKNOWN)
+         param->profile = avctx->profile;
+ 
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0008-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0008-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0008-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0008-avcodec-libsvtav1-update-avctx-bit-rate-according-to.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,30 @@
+From 09400412776e25151cb82909ee65479ee9010f7c Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 9 May 2022 21:21:33 +0300
+Subject: [PATCH 8/9] avcodec/libsvtav1: update avctx bit rate according to RC
+ mode
+
+This way we can filter out the default value for this member, which
+is nonzero. Bases on the current affairs that bit rate based rate
+control is nonzero in SVT-AV1.
+---
+ libavcodec/libsvtav1.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 8eb3d07843..691bc27133 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -294,7 +294,8 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    avctx->bit_rate                 = param->target_bit_rate;
++    avctx->bit_rate = param->rate_control_mode > 0 ?
++                      param->target_bit_rate : 0;
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0009-avcodec-libsvtav1-signal-CPB-properties-through-side.patch 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0009-avcodec-libsvtav1-signal-CPB-properties-through-side.patch
--- 0.9.1+dfsg-1/ffmpeg_plugin/n5.0/0009-avcodec-libsvtav1-signal-CPB-properties-through-side.patch	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/n5.0/0009-avcodec-libsvtav1-signal-CPB-properties-through-side.patch	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,46 @@
+From 013b64acf7e59a76f0606d00011733402404f205 Mon Sep 17 00:00:00 2001
+From: =?UTF-8?q?Jan=20Ekstr=C3=B6m?= <jeebjp@gmail.com>
+Date: Mon, 9 May 2022 21:29:44 +0300
+Subject: [PATCH 9/9] avcodec/libsvtav1: signal CPB properties through side
+ data
+
+This way values such as maxrate/bufsize can be utilized further
+down the chain.
+
+First, syncs up the max_rate and buffer_size from SVT-AV1 back to
+avctx, and then in case at least one of the utilized values is
+nonzero, adds the CPB properties side data.
+---
+ libavcodec/libsvtav1.c | 16 ++++++++++++++--
+ 1 file changed, 14 insertions(+), 2 deletions(-)
+
+diff --git a/libavcodec/libsvtav1.c b/libavcodec/libsvtav1.c
+index 691bc27133..309ec97f31 100644
+--- a/libavcodec/libsvtav1.c
++++ b/libavcodec/libsvtav1.c
+@@ -294,8 +294,20 @@ static int config_enc_params(EbSvtAv1EncConfiguration *param,
+         param->profile = FF_PROFILE_AV1_HIGH;
+     }
+ 
+-    avctx->bit_rate = param->rate_control_mode > 0 ?
+-                      param->target_bit_rate : 0;
++    avctx->bit_rate       = param->rate_control_mode > 0 ?
++                            param->target_bit_rate : 0;
++    avctx->rc_max_rate    = param->max_bit_rate;
++    avctx->rc_buffer_size = param->vbv_bufsize;
++
++    if (avctx->bit_rate || avctx->rc_max_rate || avctx->rc_buffer_size) {
++        AVCPBProperties *cpb_props = ff_add_cpb_side_data(avctx);
++        if (!cpb_props)
++            return AVERROR(ENOMEM);
++
++        cpb_props->buffer_size = avctx->rc_buffer_size;
++        cpb_props->max_bitrate = avctx->rc_max_rate;
++        cpb_props->avg_bitrate = avctx->bit_rate;
++    }
+ 
+     return 0;
+ }
+-- 
+2.36.1
+
diff -pruN 0.9.1+dfsg-1/ffmpeg_plugin/README.md 1.2.0+dfsg-2/ffmpeg_plugin/README.md
--- 0.9.1+dfsg-1/ffmpeg_plugin/README.md	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/ffmpeg_plugin/README.md	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,100 @@
+# FFmpeg patches for SVT-AV1
+
+This directory contains patches and backported commits that might be of interest
+to users of libsvtav1 as a single patch file.
+
+For the original commits the patch was generated from, see <https://gitlab.com/1480c1/FFmpeg/> and look at the svt-av1/n* branches.
+
+## Changes
+
+Notes:
+
+- The patches for n4.4 have been tested to apply cleanly to n4.4.1 and n4.4.2.
+- The patches for n5.0 have been tested to apply cleanly to n5.0.1.
+
+### n5.0 - [svt-av1/n5.0](https://gitlab.com/1480c1/FFmpeg/-/tree/svt-av1/n5.0)
+
+#### Using SVT-AV1 v1.0?
+
+- [c33b404885](https://gitlab.com/1480c1/FFmpeg/-/commit/c33b404885): Backport `-svtav1-params:v`
+- [1dddb930aa](https://gitlab.com/1480c1/FFmpeg/-/commit/1dddb930aa): Backport `-crf:v`, remove `-rc:v` and instead use `-b:v`, `-maxrate:v`, `-crf:v`, and `-qp:v` to set rc mode
+- [50bc872635](https://gitlab.com/1480c1/FFmpeg/-/commit/50bc872635): Backport patch for using aq-mode to determine crf or qp
+- [51c0b9e829](https://gitlab.com/1480c1/FFmpeg/-/commit/51c0b9e829): Backport patch for passing color description info
+- [e3c4442b24](https://gitlab.com/1480c1/FFmpeg/-/commit/e3c4442b24): Backport patch for parsing svtav1-params last
+- [ded0334d21](https://gitlab.com/1480c1/FFmpeg/-/commit/ded0334d21): Backport patch for choma-sample-location
+- [70887d44ff](https://gitlab.com/1480c1/FFmpeg/-/commit/70887d44ff): Backport patch for not setting tbr if it's not needed
+- [fe100bc556](https://gitlab.com/1480c1/FFmpeg/-/commit/fe100bc556): Backport patch for passing bitrate properties through cpb side data
+
+#### Using SVT-AV1 v1.1?
+
+- [6fd1533057](https://gitlab.com/1480c1/FFmpeg/-/commit/6fd1533057): Backport patch for passing pict_type to libsvtav1 (for force key frame feature)
+
+---
+
+### n4.4 - [svt-av1/n4.4](https://gitlab.com/1480c1/FFmpeg/-/tree/svt-av1/n4.4)
+
+#### Using SVT-AV1 v1.0?
+
+- [04b89e8ae3](https://gitlab.com/1480c1/FFmpeg/-/commit/04b89e8ae3): Backport fix for caps_internal
+- [64e2fb3f9d](https://gitlab.com/1480c1/FFmpeg/-/commit/64e2fb3f9d): Backport change for gop type
+- [0463f5d6d5](https://gitlab.com/1480c1/FFmpeg/-/commit/0463f5d6d5): Backport fix for rc range
+- [c5f3143090](https://gitlab.com/1480c1/FFmpeg/-/commit/c5f3143090): Backport fix CQP mode, left in to allow for patch to apply cleanly
+- [c33b404885](https://gitlab.com/1480c1/FFmpeg/-/commit/c33b404885): Backport `-svtav1-params:v`
+- [1dddb930aa](https://gitlab.com/1480c1/FFmpeg/-/commit/1dddb930aa): Backport `-crf:v`, remove `-rc:v` and instead use `-b:v`, `-maxrate:v`, `-crf:v`, and `-qp:v` to set rc mode
+- [50bc872635](https://gitlab.com/1480c1/FFmpeg/-/commit/50bc872635): Backport patch for using aq-mode to determine crf or qp
+- [51c0b9e829](https://gitlab.com/1480c1/FFmpeg/-/commit/51c0b9e829): Backport patch for passing color description info
+- [e3c4442b24](https://gitlab.com/1480c1/FFmpeg/-/commit/e3c4442b24): Backport patch for parsing svtav1-params last
+- [ded0334d21](https://gitlab.com/1480c1/FFmpeg/-/commit/ded0334d21): Backport patch for choma-sample-location
+- [70887d44ff](https://gitlab.com/1480c1/FFmpeg/-/commit/70887d44ff): Backport patch for not setting tbr if it's not needed
+- [fe100bc556](https://gitlab.com/1480c1/FFmpeg/-/commit/fe100bc556): Backport patch for passing bitrate properties through cpb side data
+
+#### Using SVT-AV1 v1.1?
+
+- [6fd1533057](https://gitlab.com/1480c1/FFmpeg/-/commit/6fd1533057): Backport patch for passing pict_type to libsvtav1 (for force key frame feature)
+
+## How to build
+
+Assuming `$PWD` == the root of your SVT-AV1 clone and you have already built
+and installed SVT-AV1 and your `PKG_CONFIG_PATH` environment variable is setup
+so that `pkg-config --libs SvtAv1Enc` works properly, this may require exporting
+`PKG_CONFIG_PATH` to `/usr/local/lib/pkgconfig` or where your prefix is setup
+
+For n5.0:
+
+```bash
+git clone --branch n5.0 https://github.com/FFmpeg/FFmpeg.git
+git -C FFmpeg am "$PWD/ffmpeg_plugin/n5.0"/*.patch
+```
+
+For tags n4.4*, follow the above steps but replace `n5.0` with `n4.4` or whichever tag you want to use.
+
+```bash
+mkdir -p ffmpeg-build
+(
+    cd ffmpeg-build
+    ../FFmpeg/configure --enable-libsvtav1 # Append other options as needed
+)
+make -C ffmpeg-build -j$(($(nproc) + 2))
+```
+
+Adapt as needed for depending on your setup
+
+## Sample command lines
+
+Basic ffmpeg line
+
+```bash
+ffmpeg -y -i input.mkv -c:v libsvtav1 -crf 30 output.webm
+```
+
+FFmpeg line with crf+maxrate for capped CRF
+
+```bash
+ffmpeg -y -i input.mkv -c:v libsvtav1 -crf 30 -maxrate 6M output.webm
+```
+
+FFmpeg line with svtav1-params setting lp and asm
+
+```bash
+ffmpeg -y -i input.mkv -c:v libsvtav1 -crf 30 -svtav1-params lp=4:asm=sse4_1 output.webm
+```
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/bsd/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/bsd/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/bsd/.gitlab-ci.yml	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/bsd/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,51 @@
+#
+# BSD-like CI jobs
+#
+
+#
+# Anchors
+#
+
+.bsd-compiler-base:
+  extends: .common-ci-base
+  stage: compile
+  tags:
+    - gitlab-org
+  image: registry.gitlab.com/aomediacodec/aom-testing/ubuntu2004-bsd
+  before_script:
+    - |
+      case $CROSS_TARGET in
+      freebsd) EXTRA_CMAKE_FLAGS="--toolchain=/opt/cross-freebsd/toolchain.cmake${EXTRA_CMAKE_FLAGS:+ $EXTRA_CMAKE_FLAGS}" ;;
+      openbsd) EXTRA_CMAKE_FLAGS="--toolchain=/opt/cross-openbsd/toolchain.cmake${EXTRA_CMAKE_FLAGS:+ $EXTRA_CMAKE_FLAGS}" ;;
+      esac
+  script: !reference [.compiler-script]
+
+FreeBSD (Clang):
+  extends: .bsd-compiler-base
+  variables:
+    GIT_DEPTH: 0
+    CROSS_TARGET: freebsd
+  parallel:
+    matrix:
+      - CMAKE_BUILD_TYPE: [Release, Debug]
+  artifacts:
+    untracked: false
+    expire_in: 30 days
+    paths:
+      - Bin/*/SvtAv1EncApp
+      - Bin/*/SvtAv1DecApp
+
+OpenBSD (Clang):
+  extends: .bsd-compiler-base
+  variables:
+    GIT_DEPTH: 0
+    CROSS_TARGET: openbsd
+  parallel:
+    matrix:
+      - CMAKE_BUILD_TYPE: [Release, Debug]
+  artifacts:
+    untracked: false
+    expire_in: 30 days
+    paths:
+      - Bin/*/SvtAv1EncApp
+      - Bin/*/SvtAv1DecApp
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/common/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/common/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/common/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/common/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -13,23 +13,63 @@
     - "**/*.h"
     - "**/*.in"
     - "**/.gitlab-ci.yml"
+    - "**/*.patch"
 
-.compiler-variables:
-  CFLAGS: -Werror -Wshadow -pipe $EXTRA_CFLAGS
-  CXXFLAGS: -Werror -Wshadow -pipe $EXTRA_CXXFLAGS
-  LDFLAGS: -Werror -Wshadow -pipe $EXTRA_LDFLAGS
+.compiler-variables: &compiler-variables
+  CFLAGS: -Werror -Wshadow -pipe
+  CXXFLAGS: -Werror -Wshadow -pipe
+  LDFLAGS: -Werror -Wshadow -pipe
   CMAKE_BUILD_TYPE: Release
   BUILD_SHARED_LIBS: "OFF"
   PREFIX_DIR: /usr/local
+  FFMPEG_CONFIG_FLAGS: ""
 
 .compiler-script:
+  - export CFLAGS="$CFLAGS${EXTRA_CFLAGS:+ $EXTRA_CFLAGS}"
+  - export CXXFLAGS="$CXXFLAGS${EXTRA_CXXFLAGS:+ $EXTRA_CXXFLAGS}"
+  - export LDFLAGS="$LDFLAGS${EXTRA_LDFLAGS:+ $EXTRA_LDFLAGS}"
+
+  - |
+    cat << EOF
+    Configuration
+    ============
+    CFLAGS="$CFLAGS"
+    CXXFLAGS="$CXXFLAGS"
+    LDFLAGS="$LDFLAGS"
+    CMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE"
+    BUILD_SHARED_LIBS="$BUILD_SHARED_LIBS"
+    PREFIX_DIR="$PREFIX_DIR"
+    ============
+    Running
+    ============
+    cmake -B Build \\
+      -DREPRODUCIBLE_BUILDS=ON \\
+      -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE" \\
+      -DBUILD_SHARED_LIBS="$BUILD_SHARED_LIBS" \\
+      -DCMAKE_INSTALL_PREFIX="$PREFIX_DIR" \\
+      -DCMAKE_C_FLAGS="$CFLAGS" \\
+      -DCMAKE_CXX_FLAGS="$CXXFLAGS" \\
+      -DCMAKE_EXE_LINKER_FLAGS="$LDFLAGS" \\
+      -DCMAKE_SHARED_LINKER_FLAGS="$LDFLAGS" \\
+      -DCMAKE_MODULE_LINKER_FLAGS="$LDFLAGS" \\
+      $EXTRA_CMAKE_FLAGS
+    EOF
+
   - eval cmake
     -B Build
     -DREPRODUCIBLE_BUILDS=ON
-    -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE"
-    -DBUILD_SHARED_LIBS="$BUILD_SHARED_LIBS"
-    -DCMAKE_INSTALL_PREFIX="$PREFIX_DIR"
-    $EXTRA_CMAKE_FLAGS
+    -DCMAKE_BUILD_TYPE=\""$CMAKE_BUILD_TYPE"\"
+    -DBUILD_SHARED_LIBS=\""$BUILD_SHARED_LIBS"\"
+    -DCMAKE_INSTALL_PREFIX=\""$PREFIX_DIR"\"
+    -DCMAKE_C_FLAGS=\""$CFLAGS"\"
+    -DCMAKE_CXX_FLAGS=\""$CXXFLAGS"\"
+    -DCMAKE_EXE_LINKER_FLAGS=\""$LDFLAGS"\"
+    -DCMAKE_SHARED_LINKER_FLAGS=\""$LDFLAGS"\"
+    -DCMAKE_MODULE_LINKER_FLAGS=\""$LDFLAGS"\"
+    $EXTRA_CMAKE_FLAGS || {
+      cat Build/CMakeFiles/CMakeError.log;
+      exit 1;
+    }
   - cmake --build Build --config "$CMAKE_BUILD_TYPE" ${TARGET:+--target $TARGET}
 
 .linux-extract-videos:
@@ -47,7 +87,7 @@
   untracked: false
   expire_in: 1 days
   paths:
-    - "test-*-*-*bit-m*.ivf"
+    - "*.ivf"
 
 .enc-test-script: |
   for CMAKE_BUILD_TYPE in Debug Release; do
@@ -56,6 +96,27 @@
     ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp --preset 8 -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
   done
 
+.enc-ext-test-script: |
+  for CMAKE_BUILD_TYPE in Debug Release; do
+    test -e "./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp" || continue
+
+    # 1-pass VBR
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 1 --tbr 100 --lp 1 -b "vbr1pass-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m8.ivf" --preset 8
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 1 --tbr 100 --lp 1 -b "vbr1pass-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m12.ivf" --preset 12
+
+    # 2-pass VBR
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 1 --tbr 100 --lp 1 -b "vbr2pass-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m8.ivf" --passes 2 --preset 8
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 1 --tbr 100 --lp 1 -b "vbr2pass-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m12.ivf" --passes 2 --preset 12
+
+    # 1-pass CBR
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 2 --tbr 100 --lp 1 -b "cbr-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m8.ivf" --pred-struct 1 --preset 8
+    ./Bin/$CMAKE_BUILD_TYPE/SvtAv1EncApp -i "$SVT_ENCTEST_FILENAME" --rc 2 --tbr 100 --lp 1 -b "cbr-${BRANCH:-pr}-$(uname)-${CMAKE_BUILD_TYPE}-${SVT_ENCTEST_BITNESS}bit-m12.ivf" --pred-struct 1 --preset 12
+  done
+
+.ffmpeg-before-script-git-setup:
+  - git config --global user.email "ci@gitlab.com"
+  - git config --global user.name "GitLab CI"
+
 .ffmpeg-before-script-clone:
   - git clone $PWD svtav1-src
   - git clone https://aomedia.googlesource.com/aom aom-src
@@ -67,6 +128,7 @@
 .ffmpeg-before-script-export:
   - true "${CMAKE_BUILD_TYPE:=Release}" "${BUILD_SHARED_LIBS:=OFF}" "${PREFIX_DIR:=/usr/local}"
   - export PKG_CONFIG_PATH=$PREFIX_DIR/lib/pkgconfig${PKG_CONFIG_PATH:+:PKG_CONFIG_PATH}
+  - export PATH="${PREFIX_DIR}/bin:${PATH}"
 
 # SVT-AV1
 .ffmpeg-svtav1-script:
@@ -80,6 +142,7 @@
     -DBUILD_DEC=OFF
     -DREPRODUCIBLE_BUILDS=ON
   - cmake --build svtav1-build --config Release --target install
+  - FFMPEG_CONFIG_FLAGS="${FFMPEG_CONFIG_FLAGS:+$FFMPEG_CONFIG_FLAGS }--enable-libsvtav1"
 # aom
 .ffmpeg-aom-script:
   - cmake
@@ -94,6 +157,7 @@
     -DENABLE_TESTDATA=0
     -DENABLE_TOOLS=0
   - cmake --build aom-build --config Release --target install
+  - FFMPEG_CONFIG_FLAGS="${FFMPEG_CONFIG_FLAGS:+$FFMPEG_CONFIG_FLAGS }--enable-libaom"
 # libvpx
 .ffmpeg-libvpx-script:
   - mkdir vpx-build
@@ -116,6 +180,7 @@
     --prefix="${PREFIX_DIR}"
   - make -j $(getconf _NPROCESSORS_ONLN 2>/dev/null || sysctl -n hw.ncpu) install
   - cd -
+  - FFMPEG_CONFIG_FLAGS="${FFMPEG_CONFIG_FLAGS:+$FFMPEG_CONFIG_FLAGS }--enable-libvpx"
 # dav1d
 .ffmpeg-dav1d-script:
   - meson setup
@@ -128,6 +193,7 @@
     -Denable_tools=false
     dav1d-build dav1d-src
   - meson install -C dav1d-build
+  - FFMPEG_CONFIG_FLAGS="${FFMPEG_CONFIG_FLAGS:+$FFMPEG_CONFIG_FLAGS }--enable-libdav1d"
 # vmaf
 .ffmpeg-vmaf-script:
   - meson setup
@@ -141,6 +207,7 @@
     -Denable_float=true
     vmaf-build vmaf-src/libvmaf
   - meson install -C vmaf-build
+  - FFMPEG_CONFIG_FLAGS="${FFMPEG_CONFIG_FLAGS:+$FFMPEG_CONFIG_FLAGS }--enable-libvmaf"
 # FFmpeg
 # Uses ld=CXX for libvmaf to autolink the stdc++ library
 .ffmpeg-ffmpeg-script:
@@ -154,11 +221,7 @@
     --ld="${CXX:-ccache g++}"
     --enable-gpl --enable-static
     --prefix="${PREFIX_DIR}"
-    --enable-libaom
-    --enable-libdav1d
-    --enable-libsvtav1
-    --enable-libvmaf
-    --enable-libvpx
+    ${FFMPEG_CONFIG_FLAGS}
     --disable-shared || { less ffbuild/config.log; exit 1; }
   - make -j $(getconf _NPROCESSORS_ONLN 2>/dev/null || sysctl -n hw.ncpu) install
   - cp ./ffmpeg $CI_PROJECT_DIR
@@ -168,4 +231,4 @@
 .common-ci-base:
   rules:
     - !reference [.source_code_rules]
-
+  variables: *compiler-variables
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/linux/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/linux/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/linux/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/linux/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -12,7 +12,6 @@
   tags:
     - gitlab-org
   image: registry.gitlab.com/aomediacodec/aom-testing/ubuntu2004
-  variables: !reference [.compiler-variables]
   script: !reference [.compiler-script]
 
 .linux-test-base:
@@ -24,16 +23,34 @@
   before_script:
     - !reference [.linux-extract-videos]
 
-.linux-unit-test-base:
+.linux-enc-test-base:
+  extends: .linux-test-base
+  script:
+    - !reference [.enc-test-script]
+  parallel: !reference [.enc-test-parallel]
+  artifacts: !reference [.enc-test-artifacts]
+  needs:
+    - Linux (GCC 11, Static)
+
+.linux-default-test-base:
   extends: .linux-test-base
   variables:
-    GTEST_OUTPUT: xml:report.xml
+    BRANCH: default
+  script:
+    - !reference [.enc-test-script]
+  parallel: !reference [.enc-test-parallel]
+  artifacts: !reference [.enc-test-artifacts]
+  needs:
+    - Linux (GCC 11, Default, Static)
+
+.linux-unit-test-base:
+  extends: .linux-test-base
   artifacts:
     when: always
     reports:
-      junit: report.xml
+      junit: reports/*.xml
   needs:
-    - Linux (GCC 10, Tests, Static)
+    - Linux (GCC 11, Tests, Static)
 
 #
 # Compile jobs
@@ -53,14 +70,27 @@ Linux (Valgrind):
     paths:
       - valgrind/
 
+Linux (Alpine, musl):
+  extends: .linux-compiler-base
+  image: registry.gitlab.com/aomediacodec/aom-testing/alpine3-cc
+  variables:
+    EXTRA_LDFLAGS: -static -static-libgcc
+    EXTRA_CMAKE_FLAGS: -DCMAKE_OUTPUT_DIRECTORY=musl
+  artifacts:
+    untracked: false
+    expire_in: 30 days
+    paths:
+      - musl/SvtAv1EncApp
+      - musl/SvtAv1DecApp
+
 Linux (Clang):
   extends: .linux-compiler-base
   parallel:
     matrix:
       - CC: clang-6.0
         CXX: clang++-6.0
-      - CC: clang-10
-        CXX: clang++-10
+      - CC: clang-12
+        CXX: clang++-12
 
 Linux (GCC 4):
   extends: .linux-compiler-base
@@ -76,8 +106,8 @@ Linux (GCC):
         CXX: g++-8
       - CC: gcc-9
         CXX: g++-9
-      - CC: gcc-10
-        CXX: g++-10
+      - CC: gcc-11
+        CXX: g++-11
         EXTRA_CMAKE_FLAGS: -DENABLE_AVX512=ON
       - CC: aarch64-linux-gnu-gcc
         CXX: aarch64-linux-gnu-g++
@@ -98,11 +128,11 @@ Linux (GCC):
         ;;
       esac
 
-Linux (GCC 10, Static):
+Linux (GCC 11, Static):
   extends: .linux-compiler-base
   variables:
-    CC: gcc-10
-    CXX: g++-10
+    CC: gcc-11
+    CXX: g++-11
     EXTRA_LDFLAGS: -static -static-libgcc -static-libstdc++
     GIT_DEPTH: 0
   parallel:
@@ -115,11 +145,11 @@ Linux (GCC 10, Static):
       - Bin/*/SvtAv1EncApp
       - Bin/*/SvtAv1DecApp
 
-Linux (GCC 10, Tests, Static):
+Linux (GCC 11, Tests, Static):
   extends: .linux-compiler-base
   variables:
-    CC: gcc-10
-    CXX: g++-10
+    CC: gcc-11
+    CXX: g++-11
     EXTRA_CFLAGS: -g
     EXTRA_CXXFLAGS: -g
     EXTRA_LDFLAGS: -static -static-libgcc -static-libstdc++
@@ -132,37 +162,44 @@ Linux (GCC 10, Tests, Static):
       - Bin/Release/SvtAv1ApiTests
       - Bin/Release/SvtAv1E2ETests
 
-Linux Sanitizer Compile:
+Linux (GCC 11, Default, Static):
   extends: .linux-compiler-base
   variables:
-    CC: clang-10
-    CXX: clang++-10
-    CMAKE_BUILD_TYPE: Debug
-    EXTRA_CMAKE_FLAGS: -DCMAKE_OUTPUT_DIRECTORY=$SANITIZER -DSANITIZER=$SANITIZER
+    CC: gcc-11
+    CXX: g++-11
+    EXTRA_LDFLAGS: -static -static-libgcc -static-libstdc++
+    GIT_DEPTH: 0
   parallel:
     matrix:
-      - SANITIZER: [address, memory, thread]
+      - CMAKE_BUILD_TYPE: [Release, Debug]
+  before_script:
+    - git fetch ${CI_MERGE_REQUEST_PROJECT_URL:-https://gitlab.com/AOMediaCodec/SVT-AV1.git} "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-HEAD}"
+    - git checkout FETCH_HEAD
   artifacts:
     untracked: false
-    expire_in: 1 days
+    expire_in: 30 days
     paths:
-      - address/
-      - memory/
-      - thread/
+      - Bin/*/SvtAv1EncApp
+      - Bin/*/SvtAv1DecApp
 
-Linux Integer Overflow Compile:
+Linux Sanitizer Compile:
   extends: .linux-compiler-base
   variables:
-    CC: clang-10
-    CXX: clang++-10
+    CC: clang-12
+    CXX: clang++-12
     CMAKE_BUILD_TYPE: Debug
-    EXTRA_CFLAGS: -fsanitize=signed-integer-overflow,unsigned-integer-overflow
-    EXTRA_CXXFLAGS: -fsanitize=signed-integer-overflow,unsigned-integer-overflow
+    EXTRA_CMAKE_FLAGS: -DCMAKE_OUTPUT_DIRECTORY=$SANITIZER -DSANITIZER=$SANITIZER
+  parallel:
+    matrix:
+      - SANITIZER: [address, memory, thread, "signed-integer-overflow,unsigned-integer-overflow"]
   artifacts:
     untracked: false
     expire_in: 1 days
     paths:
-      - Bin/Debug/SvtAv1EncApp
+      - address/
+      - memory/
+      - thread/
+      - signed-integer-overflow,unsigned-integer-overflow/
 
 #
 # Test jobs
@@ -174,10 +211,14 @@ Valgrind:
   parallel:
     matrix:
       - PASSES: 2
-      - PASS: 1
+      - PASS: [1, ]
+        RC: [--rc 1 --tbr 500, -q 35]
+        REF_SCALE: [--resize-mode 0, --resize-mode 1, --resize-mode 2, --resize-mode 3, --resize-mode 4]
   script:
     # --error-limit=no --leak-check=full --show-leak-kinds=all makes the log very huge and takes around 16 minutes
-    - valgrind --error-exitcode=1 --track-origins=yes --suppressions=/usr/lib/valgrind/debian.supp -- ./valgrind/SvtAv1EncApp --preset 6 ${PASS:+--pass $PASS} -i akiyo_cif.y4m --rc 1 --tbr 500 -n 10 --lp 7 -b test1.ivf
+    - printf 'testing:\n'
+    - echo ./valgrind/SvtAv1EncApp --preset 6 ${PASS:+--pass $PASS} -i akiyo_cif.y4m ${RC} -n 10 --lp 7 -b test1.ivf ${REF_SCALE}
+    - valgrind --error-exitcode=1 --track-origins=yes --suppressions=/usr/lib/valgrind/debian.supp -- ./valgrind/SvtAv1EncApp --preset 6 ${PASS:+--pass $PASS} -i akiyo_cif.y4m ${RC} -n 10 --lp 7 -b test1.ivf ${REF_SCALE}
   needs:
     - Linux (Valgrind)
 
@@ -197,37 +238,36 @@ Linux Sanitizer Test:
       - SANITIZER: memory
         svt_asm: 0
       - SANITIZER: thread
+      - SANITIZER: "signed-integer-overflow,unsigned-integer-overflow"
   script:
     - |
-      command="$PWD/$SANITIZER/SvtAv1EncApp -i Chimera-Aerial_480x264_2997fps_10bit_420_150frames.y4m ${svt_asm:+--asm $svt_asm} -n 60 --preset 8 -b output.ivf"
+      command="$PWD/$SANITIZER/SvtAv1EncApp -i Chimera-Aerial_480x264_2997fps_10bit_420_150frames.y4m ${svt_asm:+--asm $svt_asm} -n 60 --preset 8 -b output.ivf --progress 0"
       case $SANITIZER in
-      address|memory) $command ;;
-      *) gdb -q -ex "handle SIG32 nostop" -ex r -ex bt -ex q --return-child-result --args $command ;;
+      thread) gdb -q -ex "handle SIG32 nostop" -ex r -ex bt -ex q --return-child-result --args $command ;;
+      *) $command ;;
       esac
   needs:
     - Linux Sanitizer Compile
 
 Linux Unit Tests:
   extends: .linux-unit-test-base
-  parallel:
-    matrix:
-      - UNIT_TEST: [SvtAv1UnitTests, SvtAv1E2ETests]
-        GTEST_TOTAL_SHARDS: 10
-        GTEST_SHARD_INDEX: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
   variables:
     SVT_AV1_TEST_VECTOR_PATH: $CI_PROJECT_DIR/test/vectors
+    TOTAL_SHARDS: 20
+  parallel:
+    matrix:
+      - SHARD_INDEX: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
   cache:
     key: testvectors
     paths:
       - test/vectors
     policy: pull-push
+  before_script:
+    - curl -Ls "https://raw.githubusercontent.com/1480c1/gtest-parallel/unique_id/gtest_parallel.py" > gtest_parallel.py
   script:
-    - |
-      if [ -n "$SVT_AV1_TEST_VECTOR_PATH" ]; then
-        cmake -B Build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON
-        cmake --build Build --target TestVectors
-      fi
-    - ./Bin/Release/$UNIT_TEST
+    - cmake -B Build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON
+    - cmake --build Build --target TestVectors
+    - python3 gtest_parallel.py --print_test_times --shard_index=$SHARD_INDEX --shard_count=$TOTAL_SHARDS ./Bin/Release/SvtAv1UnitTests ./Bin/Release/SvtAv1E2ETests -- --gtest_output="xml:reports/{#}.xml"
 
 Linux RC Test:
   extends: .linux-test-base
@@ -258,86 +298,57 @@ Linux RC Test:
       done
       $ret
   needs:
-    - Linux (GCC 10, Static)
+    - Linux (GCC 11, Static)
 
 Linux Enc Test:
+  extends: .linux-enc-test-base
+
+Linux Enc Musl Test:
   extends: .linux-test-base
-  stage: test
   script:
-    - !reference [.enc-test-script]
-  parallel: !reference [.enc-test-parallel]
-  artifacts: !reference [.enc-test-artifacts]
+    - ./musl/SvtAv1EncApp -i akiyo_cif.y4m -b test1.ivf
   needs:
-    - Linux (GCC 10, Static)
+    - Linux (Alpine, musl)
+
+Linux Defaut Test:
+  extends: .linux-default-test-base
+
+Linux Enc CBRVBR Test:
+  extends: .linux-enc-test-base
+  script:
+    - !reference [.enc-ext-test-script]
+
+Linux Default CBRVBR Test:
+  extends: .linux-default-test-base
+  script:
+    - !reference [.enc-ext-test-script]
 
 Linux Intra Enc Test:
-  extends: .linux-test-base
-  stage: test
+  extends: .linux-enc-test-base
   script:
     - |
       for INTRA_PERIOD in -1 200; do
         ./Bin/Release/SvtAv1EncApp --preset 2 --keyint $INTRA_PERIOD -i "$SVT_ENCTEST_FILENAME" -n 17 -b "test-${BRANCH:-pr}-$(uname)-intra${INTRA_PERIOD}-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
         ./Bin/Release/SvtAv1EncApp --preset 8 --keyint $INTRA_PERIOD -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-${BRANCH:-pr}-$(uname)-intra${INTRA_PERIOD}-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
       done
-  parallel: !reference [.enc-test-parallel]
-  artifacts: !reference [.enc-test-artifacts]
-  needs:
-    - Linux (GCC 10, Static)
-
-Linux Integer Overflow Test:
-  extends:
-    - .linux-test-base
-  stage: test
-  allow_failure: false
-  script:
-    - |
-      ret=true
-      ./Bin/Debug/SvtAv1EncApp --preset 2 -i "$SVT_ENCTEST_FILENAME" -n 17 -b "test-int-overflow-${BRANCH:-pr}-$(uname)-${SVT_ENCTEST_BITNESS}bit-m2.ivf" 2> "test-int-overflow-${BRANCH:-pr}-$(uname)-${SVT_ENCTEST_BITNESS}bit-m2.log"
-      ./Bin/Debug/SvtAv1EncApp --preset 8 -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-int-overflow-${BRANCH:-pr}-$(uname)-${SVT_ENCTEST_BITNESS}bit-m8.ivf" 2> "test-int-overflow-${BRANCH:-pr}-$(uname)-${SVT_ENCTEST_BITNESS}bit-m8.log"
-      num_errors=$(sort -u test-int-overflow-${BRANCH:-pr}-$(uname)-${SVT_ENCTEST_BITNESS}bit-m*.log | awk 'BEGIN{count=0}/error/ {print $0 > "/dev/stderr"; count=count+1} END{print count}')
-
-      if [[ $num_errors > 0 ]]; then
-        ret=false
-        echo "Found $num_errors integer overflows"
-      fi
-      $ret
-  parallel: !reference [.enc-test-parallel]
-  needs:
-    - Linux Integer Overflow Compile
 
 Linux Passes vs Passes Test:
-  extends: .linux-test-base
-  stage: test
-  script:
-      - |
-         ./Bin/Release/SvtAv1EncApp --preset 2 --passes 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 -b "test-${BRANCH:-pr}-$(uname)-passes=2-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
-         ./Bin/Release/SvtAv1EncApp --preset 8 --passes 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-${BRANCH:-pr}-$(uname)-passes=2-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
-         ./Bin/Release/SvtAv1EncApp --preset 2 --pass 1 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.stat"
-         ./Bin/Release/SvtAv1EncApp --preset 2 --pass 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.stat" -b "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
-         ./Bin/Release/SvtAv1EncApp --preset 8 --pass 1 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.stat"
-         ./Bin/Release/SvtAv1EncApp --preset 8 --pass 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.stat" -b "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
-
-  parallel: !reference [.enc-test-parallel]
-  artifacts: !reference [.enc-test-artifacts]
-  needs:
-    - Linux (GCC 10, Static)
-
-Linux Default Branch:
-  extends:
-    - .linux-compiler-base
-    - .linux-test-base
-  stage: compile
-  variables:
-    BRANCH: default
-  before_script:
-    - git fetch ${CI_MERGE_REQUEST_PROJECT_URL:-https://gitlab.com/AOMediaCodec/SVT-AV1.git} "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-master}"
-    - git checkout FETCH_HEAD
-    - !reference [.linux-extract-videos]
+  extends: .linux-enc-test-base
   script:
-    - !reference [.compiler-script]
-    - !reference [.enc-test-script]
-  parallel: !reference [.enc-test-parallel]
-  artifacts: !reference [.enc-test-artifacts]
+    - ./Bin/Release/SvtAv1EncApp --preset 2 --passes 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 -b "test-${BRANCH:-pr}-$(uname)-passes=2-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
+    - ./Bin/Release/SvtAv1EncApp --preset 8 --passes 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-${BRANCH:-pr}-$(uname)-passes=2-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
+    - ./Bin/Release/SvtAv1EncApp --preset 2 --pass 1 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.stat"
+    - ./Bin/Release/SvtAv1EncApp --preset 2 --pass 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.stat" -b "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
+    - ./Bin/Release/SvtAv1EncApp --preset 8 --pass 1 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.stat"
+    - ./Bin/Release/SvtAv1EncApp --preset 8 --pass 2 --irefresh-type 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 --stats "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.stat" -b "test-${BRANCH:-pr}-$(uname)-pass=2-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
+
+Linux Multiple Channels Test:
+  extends: .linux-enc-test-base
+  script:
+    - ./Bin/Release/SvtAv1EncApp --preset 2 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 17 -b "test-${BRANCH:-pr}-$(uname)-nch=1-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
+    - ./Bin/Release/SvtAv1EncApp --preset 8 --keyint -1 -i "$SVT_ENCTEST_FILENAME" -n 120 -b "test-${BRANCH:-pr}-$(uname)-nch=1-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
+    - ./Bin/Release/SvtAv1EncApp --nch 2 --preset 2 2 --keyint -1 -1 -i "$SVT_ENCTEST_FILENAME" "$SVT_ENCTEST_FILENAME" -n 17 17 -b "test-${BRANCH:-pr}-$(uname)-nch=2-1-${SVT_ENCTEST_BITNESS}bit-m2.ivf" "test-${BRANCH:-pr}-$(uname)-nch=2-2-${SVT_ENCTEST_BITNESS}bit-m2.ivf"
+    - ./Bin/Release/SvtAv1EncApp --nch 2 --preset 8 8 --keyint -1 -1 -i "$SVT_ENCTEST_FILENAME" "$SVT_ENCTEST_FILENAME" -n 120 120 -b "test-${BRANCH:-pr}-$(uname)-nch=2-1-${SVT_ENCTEST_BITNESS}bit-m8.ivf" "test-${BRANCH:-pr}-$(uname)-nch=2-2-${SVT_ENCTEST_BITNESS}bit-m8.ivf"
 
 Enc Default Diff test:
   extends: .common-ci-base
@@ -345,21 +356,37 @@ Enc Default Diff test:
   image: registry.gitlab.com/aomediacodec/aom-testing/alpine3
   allow_failure: true
   script:
+    - success=true
     - |
-      success=true
-      for dist in *.ivf; do
-          case $dist in
-          test-pr-Linux-Release-*bit-*.ivf) continue ;;
-          *-8bit-m2.ivf) diff -q test-pr-Linux-Release-8bit-m2.ivf "$dist" || success=false ;;
-          *-8bit-m8.ivf) diff -q test-pr-Linux-Release-8bit-m8.ivf "$dist" || success=false ;;
-          *-10bit-m2.ivf) diff -q test-pr-Linux-Release-10bit-m2.ivf "$dist" || success=false ;;
-          *-10bit-m8.ivf) diff -q test-pr-Linux-Release-10bit-m8.ivf "$dist" || success=false ;;
-          esac
+      for dist in test-*-*-*-*bit-*.ivf; do
+        case $dist in *-default-Linux-Release-*bit-*.ivf) continue;; esac
+        printf 'Testing %s\n' "$dist"
+        case $dist in
+        test-*-8bit-m2.ivf) diff -q test-default-Linux-Release-8bit-m2.ivf "$dist" || success=false ;;
+        test-*-8bit-m8.ivf) diff -q test-default-Linux-Release-8bit-m8.ivf "$dist" || success=false ;;
+        test-*-10bit-m2.ivf) diff -q test-default-Linux-Release-10bit-m2.ivf "$dist" || success=false ;;
+        test-*-10bit-m8.ivf) diff -q test-default-Linux-Release-10bit-m8.ivf "$dist" || success=false ;;
+
+        vbr1pass-*-8bit-m2.ivf) diff -q vbr1pass-default-Linux-Release-8bit-m2.ivf "$dist" || success=false ;;
+        vbr1pass-*-8bit-m8.ivf) diff -q vbr1pass-default-Linux-Release-8bit-m8.ivf "$dist" || success=false ;;
+        vbr1pass-*-10bit-m2.ivf) diff -q vbr1pass-default-Linux-Release-10bit-m2.ivf "$dist" || success=false ;;
+        vbr1pass-*-10bit-m8.ivf) diff -q vbr1pass-default-Linux-Release-10bit-m8.ivf "$dist" || success=false ;;
+        vbr2pass-*-8bit-m2.ivf) diff -q vbr2pass-default-Linux-Release-8bit-m2.ivf "$dist" || success=false ;;
+        vbr2pass-*-8bit-m8.ivf) diff -q vbr2pass-default-Linux-Release-8bit-m8.ivf "$dist" || success=false ;;
+        vbr2pass-*-10bit-m2.ivf) diff -q vbr2pass-default-Linux-Release-10bit-m2.ivf "$dist" || success=false ;;
+        vbr2pass-*-10bit-m8.ivf) diff -q vbr2pass-default-Linux-Release-10bit-m8.ivf "$dist" || success=false ;;
+        cbr-*-8bit-m2.ivf) diff -q cbr-default-Linux-Release-8bit-m2.ivf "$dist" || success=false ;;
+        cbr-*-8bit-m8.ivf) diff -q cbr-default-Linux-Release-8bit-m8.ivf "$dist" || success=false ;;
+        cbr-*-10bit-m2.ivf) diff -q cbr-default-Linux-Release-10bit-m2.ivf "$dist" || success=false ;;
+        cbr-*-10bit-m8.ivf) diff -q cbr-default-Linux-Release-10bit-m8.ivf "$dist" || success=false ;;
+        esac
       done
-      $success
+    - $success
   needs:
     - Linux Enc Test
-    - Linux Default Branch
+    - Linux Defaut Test
+    - Linux Enc CBRVBR Test
+    - Linux Default CBRVBR Test
 
 Enc Intra Diff Test:
   extends: .common-ci-base
@@ -384,8 +411,8 @@ Linux Gstreamer (Static):
   extends: .linux-test-base
   stage: compile
   variables:
-    CC: gcc-10
-    CXX: g++-10
+    CC: gcc-11
+    CXX: g++-11
     CFLAGS: -pipe
     CXXFLAGS: -pipe
     LDFLAGS: -pipe
@@ -403,17 +430,53 @@ Linux Gstreamer (Static):
         ! webmmux \
         ! filesink location=akiyo.mkv
 
+Linux libavif (Static):
+  extends: .linux-test-base
+  stage: compile
+  variables:
+    CC: gcc-11
+    CXX: g++-11
+    CFLAGS: -pipe
+    CXXFLAGS: -pipe
+    LDFLAGS: -pipe
+    PKG_CONFIG_PATH: /usr/local/lib/pkgconfig
+    GIT_DEPTH: 0
+    CMAKE_BUILD_TYPE: Debug
+  before_script:
+    - !reference [.ffmpeg-before-script-git-setup]
+    - git clone $PWD svtav1-src
+    - git clone https://github.com/AOMediaCodec/libavif.git libavif-src
+    - git clone https://code.videolan.org/videolan/dav1d.git dav1d-src
+    - !reference [.ffmpeg-before-script-export]
+    - apt update && apt install -yyy libpng-dev libjpeg-dev
+  script:
+    - !reference [.ffmpeg-svtav1-script]
+    - !reference [.ffmpeg-dav1d-script]
+    - cmake
+      -S libavif-src
+      -B libavif-build
+      -DCMAKE_BUILD_TYPE="$CMAKE_BUILD_TYPE"
+      -DBUILD_SHARED_LIBS="$BUILD_SHARED_LIBS"
+      -DCMAKE_INSTALL_PREFIX="$PREFIX_DIR"
+      -DAVIF_CODEC_DAV1D=ON
+      -DAVIF_CODEC_SVT=ON
+      -DAVIF_BUILD_EXAMPLES=ON
+      -DAVIF_BUILD_APPS=ON
+    - cmake --build libavif-build --config Release --target install
+    - valgrind --error-exitcode=1 --track-origins=yes --suppressions=/usr/lib/valgrind/debian.supp -- avifenc -c svt -d 8 -y 420 -r limited -j 2 ./Docs/img/me_fig_new.png out.avif
+
 Linux FFmpeg (Static):
   extends: .linux-test-base
   stage: compile
   variables:
-    CC: gcc-10
-    CXX: g++-10
+    CC: gcc-11
+    CXX: g++-11
     CFLAGS: -pipe
     CXXFLAGS: -pipe
     LDFLAGS: -pipe -static -static-libgcc -static-libstdc++
     GIT_DEPTH: 0
   before_script:
+    - !reference [.ffmpeg-before-script-git-setup]
     - !reference [.ffmpeg-before-script-clone]
     - !reference [.ffmpeg-before-script-export]
   script:
@@ -442,6 +505,40 @@ Linux FFmpeg (Static):
     paths:
       - ffmpeg
 
+Linux FFmpeg (n5.0, Static):
+  extends: .linux-test-base
+  stage: compile
+  variables:
+    CC: gcc-11
+    CXX: g++-11
+    CFLAGS: -pipe
+    CXXFLAGS: -pipe
+    LDFLAGS: -pipe -static -static-libgcc -static-libstdc++
+    GIT_DEPTH: 0
+    CMAKE_BUILD_TYPE: Debug
+  before_script:
+    - !reference [.ffmpeg-before-script-git-setup]
+    - !reference [.ffmpeg-before-script-clone]
+    - git -C ffmpeg-src checkout n5.0
+    - git -C ffmpeg-src am -3 "$PWD/ffmpeg_plugin/n5.0/"*.patch
+    - !reference [.ffmpeg-before-script-export]
+  script:
+    - !reference [.ffmpeg-svtav1-script]
+    - !reference [.ffmpeg-ffmpeg-script]
+    - ccache -s
+  after_script:
+    - ./ffmpeg -y -s:v 64x64 -f rawvideo -i /dev/zero
+      -vframes 10 -c:v libsvtav1 test.mkv
+      -vframes 10 -c:v libsvtav1 test.mp4
+      -vframes 10 -c:v libsvtav1 test.webm
+      -vframes 10 -c:v libsvtav1 test.ivf
+      -vframes 10 -c:v libsvtav1 test.obu
+  artifacts:
+    untracked: false
+    expire_in: 30 days
+    paths:
+      - ffmpeg
+
 Decoder Test:
   extends: .linux-test-base
   stage: .post
@@ -461,5 +558,5 @@ Decoder Test:
     - diff decout10.yuv ffmpegout10.yuv
   needs:
     - Linux Enc Test
-    - Linux (GCC 10, Static)
+    - Linux (GCC 11, Static)
     - Linux FFmpeg (Static)
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/macos/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/macos/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/macos/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/macos/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -28,12 +28,10 @@
 
 .macos-unit-test-base:
   extends: .macos-test-base
-  variables:
-    GTEST_OUTPUT: xml:report.xml
   artifacts:
     when: always
     reports:
-      junit: report.xml
+      junit: reports/*.xml
   needs:
     - macOS (Static, Tests)
 
@@ -68,25 +66,23 @@ macOS (Static, Tests):
 
 macOS Unit Tests:
   extends: .macos-unit-test-base
+  variables:
+    SVT_AV1_TEST_VECTOR_PATH: $CI_PROJECT_DIR/test/vectors
+    TOTAL_SHARDS: 4
   parallel:
     matrix:
-      - UNIT_TEST: SvtAv1UnitTests
-        GTEST_TOTAL_SHARDS: 4
-        GTEST_SHARD_INDEX: [0, 1, 2, 3]
-      - UNIT_TEST: SvtAv1E2ETests
-        SVT_AV1_TEST_VECTOR_PATH: $CI_PROJECT_DIR/test/vectors
+      - SHARD_INDEX: [0, 1, 2, 3]
   cache:
     key: testvectors
     paths:
       - test/vectors
     policy: pull-push
+  before_script:
+    - curl -Ls "https://raw.githubusercontent.com/1480c1/gtest-parallel/unique_id/gtest_parallel.py" > gtest_parallel.py
   script:
-    - |
-      if [ -n "$SVT_AV1_TEST_VECTOR_PATH" ]; then
-        cmake -B Build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON
-        cmake --build Build --target TestVectors
-      fi
-    - ./Bin/Release/$UNIT_TEST
+    - cmake -B Build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON
+    - cmake --build Build --target TestVectors
+    - python3 gtest_parallel.py --print_test_times --shard_index=$SHARD_INDEX --shard_count=$TOTAL_SHARDS ./Bin/Release/SvtAv1UnitTests ./Bin/Release/SvtAv1E2ETests -- --gtest_output="xml:reports/{#}.xml"
 
 macOS Enc Test:
   extends: .macos-test-base
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/standard/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/standard/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/standard/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/standard/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -6,6 +6,7 @@ include:
   - local: /.gitlab/workflows/windows/.gitlab-ci.yml
   - local: /.gitlab/workflows/macos/.gitlab-ci.yml
   - local: /.gitlab/workflows/linux/.gitlab-ci.yml
+  - local: /.gitlab/workflows/bsd/.gitlab-ci.yml
 
 Enc OS Diff Test:
   extends: .common-ci-base
diff -pruN 0.9.1+dfsg-1/.gitlab/workflows/windows/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab/workflows/windows/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab/workflows/windows/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab/workflows/windows/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -33,12 +33,10 @@
 
 .windows-unit-test-base:
   extends: .windows-test-base
-  variables:
-    GTEST_OUTPUT: xml:report.xml
   artifacts:
     when: always
     reports:
-      junit: report.xml
+      junit: reports/*.xml
   needs:
     - Win64 (MSVC, Tests)
 
@@ -49,8 +47,8 @@
 Win64 (MSVC, Tests):
   extends: .windows-compiler-base
   variables:
-    CFLAGS: /WX
-    CXXFLAGS: /WX
+    CFLAGS: /WX /D_CRT_SECURE_NO_WARNINGS=1
+    CXXFLAGS: /WX /D_CRT_SECURE_NO_WARNINGS=1
     CMAKE_GENERATOR: Visual Studio 16 2019
   parallel:
     matrix:
@@ -69,22 +67,23 @@ Win64 (MSVC, Tests):
 
 Win64 Unit Tests:
   extends: .windows-unit-test-base
+  variables:
+    SVT_AV1_TEST_VECTOR_PATH: $CI_PROJECT_DIR/test/vectors
+    TOTAL_SHARDS: 4
   parallel:
     matrix:
-      - UNIT_TEST: SvtAv1UnitTests
-        GTEST_TOTAL_SHARDS: 3
-        GTEST_SHARD_INDEX: [0, 1, 2]
-      - UNIT_TEST: SvtAv1E2ETests
-        SVT_AV1_TEST_VECTOR_PATH: $CI_PROJECT_DIR/test/vectors
+      - SHARD_INDEX: [0, 1, 2, 3]
   cache:
     key: testvectors
     paths:
       - test/vectors
     policy: pull-push
+  before_script:
+    - Invoke-WebRequest -OutFile gtest_parallel.py -Uri https://raw.githubusercontent.com/1480c1/gtest-parallel/unique_id/gtest_parallel.py
   script:
     - cmake -B Build -DCMAKE_BUILD_TYPE=Release -DBUILD_TESTING=ON
     - cmake --build Build --target TestVectors
-    - "& ./Bin/Release/$env:UNIT_TEST --gtest_filter=-*FFT*"
+    - python gtest_parallel.py --print_test_times --shard_index=$env:SHARD_INDEX --shard_count=$env:TOTAL_SHARDS ./Bin/Release/SvtAv1UnitTests ./Bin/Release/SvtAv1E2ETests -- --gtest_output="xml:reports/{#}.xml"
 
 Win64 Enc Test:
   extends: .windows-test-base
diff -pruN 0.9.1+dfsg-1/.gitlab-ci.yml 1.2.0+dfsg-2/.gitlab-ci.yml
--- 0.9.1+dfsg-1/.gitlab-ci.yml	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/.gitlab-ci.yml	2022-08-01 19:12:00.000000000 +0000
@@ -8,6 +8,8 @@ workflow:
 stages:
   - compile
   - test
+  - test-2
+  - test-3
 
 variables:
   CMAKE_GENERATOR: Ninja
@@ -17,14 +19,14 @@ default:
   interruptible: true
   retry: 2
   cache:
-    key: ${CI_JOB_NAME}
+    key: "$CI_COMMIT_REF_SLUG"
     paths:
       - .ccache
     policy: pull-push
 
 include:
   - local: /.gitlab/workflows/common/.gitlab-ci.yml
-  - local: /.gitlab/workflows/nightly/.gitlab-ci.yml
+  - remote: 'https://gitlab.com/AOMediaCodec/aom-testing/-/raw/ci-build-testing/.gitlab/workflows/nightly/.gitlab-ci.yml'
     rules:
       - if: $NIGHTLY != null
   - local: /.gitlab/workflows/standard/.gitlab-ci.yml
@@ -40,6 +42,19 @@ Style check:
   image: registry.gitlab.com/aomediacodec/aom-testing/alpine3
   script:
     - ./test/stylecheck.sh
+    - git fetch ${CI_MERGE_REQUEST_PROJECT_URL:-https://gitlab.com/AOMediaCodec/SVT-AV1.git} "${CI_MERGE_REQUEST_TARGET_BRANCH_NAME:-HEAD}"
+    - d=$(git diff FETCH_HEAD | /usr/share/clang/clang-format-diff.py -p1)
+    - |
+      if [ -n "$d" ]; then
+        cat <<FOE
+      clang-format check failed!
+      Please run inside a posix compatible shell with git and amend or commit the result
+      git apply -p0 <<EOF
+      $d
+      EOF
+      FOE
+        exit 1
+      fi
 
 Static analysis (cppcheck):
   rules:
diff -pruN 0.9.1+dfsg-1/gstreamer-plugin/gstsvtav1enc.c 1.2.0+dfsg-2/gstreamer-plugin/gstsvtav1enc.c
--- 0.9.1+dfsg-1/gstreamer-plugin/gstsvtav1enc.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/gstreamer-plugin/gstsvtav1enc.c	2022-08-01 19:12:00.000000000 +0000
@@ -69,7 +69,6 @@ static gboolean gst_svtav1enc_propose_al
 static gboolean gst_svtav1enc_flush (GstVideoEncoder * encoder);
 
 /* helpers */
-void set_default_svt_configuration (EbSvtAv1EncConfiguration * svt_config);
 gint compare_video_code_frame_and_pts (const void *video_codec_frame_ptr,
     const void *pts_ptr);
 
@@ -309,7 +308,6 @@ gst_svtav1enc_init (GstSvtAv1Enc * svtav
     return;
   }
   /* setting configuration here since svt_av1_enc_init_handle overrides it */
-  set_default_svt_configuration (svtav1enc->svt_config);
   GST_OBJECT_UNLOCK (svtav1enc);
 }
 
@@ -491,6 +489,7 @@ gst_svtav1enc_allocate_svt_buffers (GstS
   svtav1enc->input_buf->size = sizeof (EbBufferHeaderType);
   svtav1enc->input_buf->p_app_private = NULL;
   svtav1enc->input_buf->pic_type = EB_AV1_INVALID_PICTURE;
+  svtav1enc->input_buf->metadata = NULL;
 
   return TRUE;
 }
@@ -521,15 +520,11 @@ gst_svtav1enc_configure_svt (GstSvtAv1En
   svtav1enc->svt_config->source_height = GST_VIDEO_INFO_HEIGHT (info);
   svtav1enc->svt_config->frame_rate_numerator = GST_VIDEO_INFO_FPS_N (info)> 0 ? GST_VIDEO_INFO_FPS_N (info) : 1;
   svtav1enc->svt_config->frame_rate_denominator = GST_VIDEO_INFO_FPS_D (info) > 0 ? GST_VIDEO_INFO_FPS_D (info) : 1;
-  svtav1enc->svt_config->frame_rate =
-      svtav1enc->svt_config->frame_rate_numerator /
-      svtav1enc->svt_config->frame_rate_denominator;
-
-  if (svtav1enc->svt_config->frame_rate < 1000) {
-      svtav1enc->svt_config->frame_rate = svtav1enc->svt_config->frame_rate << 16;
-  }
-
-  GST_LOG_OBJECT(svtav1enc, "width %d, height %d, framerate %d", svtav1enc->svt_config->source_width, svtav1enc->svt_config->source_height, svtav1enc->svt_config->frame_rate);
+  GST_LOG_OBJECT(svtav1enc,
+                "width %d, height %d, framerate %d",
+                svtav1enc->svt_config->source_width,
+                svtav1enc->svt_config->source_height,
+                svtav1enc->svt_config->frame_rate_numerator / svtav1enc->svt_config->frame_rate_denominator);
 
   /* TODO: better handle HDR metadata when GStreamer will have such support
    * https://gitlab.freedesktop.org/gstreamer/gst-plugins-base/issues/400 */
@@ -561,80 +556,6 @@ gst_svtav1enc_start_svt (GstSvtAv1Enc *
   return TRUE;
 }
 
-void
-set_default_svt_configuration (EbSvtAv1EncConfiguration * svt_config)
-{
-  memset(svt_config, 0, sizeof(EbSvtAv1EncConfiguration));
-  svt_config->source_width = 0;
-  svt_config->source_height = 0;
-  svt_config->intra_period_length = PROP_GOP_SIZE_DEFAULT - 1;
-  svt_config->intra_refresh_type = PROP_INTRA_REFRESH_DEFAULT;
-  svt_config->enc_mode = PROP_ENCMODE_DEFAULT;
-  svt_config->frame_rate = 25;
-  svt_config->frame_rate_denominator = 1;
-  svt_config->frame_rate_numerator = 25;
-  svt_config->hierarchical_levels = PROP_HIERARCHICAL_LEVEL_DEFAULT;
-  svt_config->pred_structure = PROP_PRED_STRUCTURE_DEFAULT;
-  svt_config->scene_change_detection = PROP_SCD_DEFAULT;
-  svt_config->rate_control_mode = PROP_RC_MODE_DEFAULT; // todo: add CVBR
-  svt_config->target_bit_rate = PROP_BITRATE_DEFAULT;
-  svt_config->max_qp_allowed = PROP_QP_MAX_DEFAULT;
-  svt_config->min_qp_allowed = PROP_QP_MIN_DEFAULT;
-  svt_config->screen_content_mode = FALSE;
-  svt_config->enable_adaptive_quantization = FALSE;
-  svt_config->qp = PROP_QP_DEFAULT;
-  svt_config->use_qp_file = FALSE;
-  svt_config->enable_dlf_flag = (PROP_DEBLOCKING_DEFAULT == TRUE);
-  svt_config->film_grain_denoise_strength = FALSE;
-  svt_config->cdef_level = -1;
-  svt_config->enable_restoration_filtering = -1;
-  svt_config->enable_mfmv = -1;
-  // HME parameters
-  svt_config->channel_id = 0;
-  svt_config->active_channel_count = 1;
-  svt_config->recon_enabled = FALSE;
-
-  // thread affinity
-  svt_config->logical_processors = PROP_CORES_DEFAULT;
-  svt_config->target_socket = PROP_SOCKET_DEFAULT;
-  svt_config->pin_threads = 0;
-
-  // tile based encoding
-  svt_config->tile_columns = 0;
-  svt_config->tile_rows = 0;
-  svt_config->restricted_motion_vector = FALSE;
-
-  // alt-ref
-  svt_config->enable_tf = TRUE;
-  svt_config->enable_overlays = FALSE;
-
-  // super resolution
-  svt_config->superres_mode = FALSE; // SUPERRES_NONE
-  svt_config->superres_denom = 8;
-  svt_config->superres_kf_denom = 8;
-  svt_config->superres_qthres = 43;
-
-  // latency
-
-  // Annex A
-  svt_config->profile = 0;
-  svt_config->tier = 0;
-  svt_config->level = 0;
-
-  svt_config->stat_report = FALSE;
-  svt_config->high_dynamic_range_input = FALSE;
-  svt_config->encoder_bit_depth = 8;
-  svt_config->encoder_color_format = 1; // todo. Only 420 for now.
-  svt_config->compressed_ten_bit_format = FALSE;
-  svt_config->use_cpu_flags = CPU_FLAGS_ALL;
-
-  // color description
-  svt_config->color_range = 0;
-  svt_config->color_primaries = 2;
-  svt_config->transfer_characteristics = 2;
-  svt_config->matrix_coefficients = 2;
-}
-
 GstFlowReturn
 gst_svtav1enc_encode (GstSvtAv1Enc * svtav1enc, GstVideoCodecFrame * frame)
 {
@@ -644,6 +565,7 @@ gst_svtav1enc_encode (GstSvtAv1Enc * svt
   EbSvtIOFormat *input_picture_buffer =
       (EbSvtIOFormat *) svtav1enc->input_buf->p_buffer;
   GstVideoFrame video_frame;
+  EbPrivDataNode private_data;
 
   if (!gst_video_frame_map (&video_frame, &svtav1enc->state->info,
           frame->input_buffer, GST_MAP_READ)) {
@@ -669,7 +591,12 @@ gst_svtav1enc_encode (GstSvtAv1Enc * svt
 
   /* Fill in Buffers Header control data */
   input_buffer->flags = 0;
-  input_buffer->p_app_private = (void *) frame;
+  // private data is copied in svt_av1_enc_send_picture
+  private_data.node_type = PRIVATE_DATA;
+  private_data.size = sizeof (GstVideoCodecFrame);
+  private_data.data = (void *) frame;
+  private_data.next = NULL;
+  input_buffer->p_app_private = (void *) &private_data;
   input_buffer->pts = frame->pts;
   input_buffer->pic_type = EB_AV1_INVALID_PICTURE;
 
@@ -677,6 +604,8 @@ gst_svtav1enc_encode (GstSvtAv1Enc * svt
     input_buffer->pic_type = EB_AV1_KEY_PICTURE;
   }
 
+  input_buffer->metadata = NULL;
+
   res = svt_av1_enc_send_picture(svtav1enc->svt_encoder, input_buffer);
   if (res != EB_ErrorNone) {
     GST_ELEMENT_ERROR (svtav1enc, LIBRARY, ENCODE, (NULL), ("error in sending picture to encoder"));
@@ -699,6 +628,7 @@ gst_svtav1enc_send_eos (GstSvtAv1Enc * s
   input_buffer.p_app_private = NULL;
   input_buffer.flags = EB_BUFFERFLAG_EOS;
   input_buffer.p_buffer = NULL;
+  input_buffer.metadata = NULL;
 
   ret = svt_av1_enc_send_picture(svtav1enc->svt_encoder, &input_buffer);
 
@@ -758,7 +688,8 @@ gst_svtav1enc_dequeue_encoded_frames (Gs
        * it's not currently the case with SVT-AV1
        * so we fallback on using its PTS to find it back */
       if (output_buf->p_app_private) {
-        frame = (GstVideoCodecFrame *) output_buf->p_app_private;
+        EbPrivDataNode *private_data = (EbPrivDataNode *) output_buf->p_app_private;
+        frame = (GstVideoCodecFrame *) private_data->data;
       } else {
         pending_frames = gst_video_encoder_get_frames (GST_VIDEO_ENCODER
             (svtav1enc));
@@ -907,17 +838,19 @@ gst_svtav1enc_set_format (GstVideoEncode
   gst_svtav1enc_allocate_svt_buffers (svtav1enc);
   gst_svtav1enc_start_svt (svtav1enc);
 
-  uint32_t fps = (uint32_t)((svtav1enc->svt_config->frame_rate > 1000) ?
-      svtav1enc->svt_config->frame_rate >> 16 : svtav1enc->svt_config->frame_rate);
-  fps = fps > 120 ? 120 : fps;
-  fps = fps < 24 ? 24 : fps;
+  uint32_t fps = svtav1enc->svt_config->frame_rate_numerator /
+                    svtav1enc->svt_config->frame_rate_denominator;
+  fps          = fps > 120 ? 120 : fps;
+  fps          = fps < 24 ? 24 : fps;
 
-  min_latency_frames =  ((fps * 5) >> 2);
+  min_latency_frames = ((fps * 5) >> 2);
 
   /* TODO: find a better value for max_latency */
-  gst_video_encoder_set_latency (encoder,
-      min_latency_frames * GST_SECOND / svtav1enc->svt_config->frame_rate,
-      3 * GST_SECOND);
+  gst_video_encoder_set_latency(encoder,
+                                min_latency_frames * GST_SECOND /
+                                    (svtav1enc->svt_config->frame_rate_numerator /
+                                     svtav1enc->svt_config->frame_rate_denominator),
+                                3 * GST_SECOND);
 
   src_caps =
       gst_static_pad_template_get_caps (&gst_svtav1enc_src_pad_template);
diff -pruN 0.9.1+dfsg-1/README.md 1.2.0+dfsg-2/README.md
--- 0.9.1+dfsg-1/README.md	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/README.md	2022-08-01 19:12:00.000000000 +0000
@@ -1,186 +1,91 @@
 # Scalable Video Technology for AV1 (SVT-AV1 Encoder and Decoder)
 
-The Scalable Video Technology for AV1 (SVT-AV1 Encoder and Decoder) is an AV1-compliant encoder/decoder library core. The SVT-AV1 encoder development is a work-in-progress targeting performance levels applicable to both VOD and Live encoding / transcoding video applications. The SVT-AV1 decoder implementation is targeting future codec research activities.
+The Scalable Video Technology for AV1 (SVT-AV1 Encoder and Decoder) is an
+AV1-compliant software encoder/decoder library. The work on the SVT-AV1 encoder
+targets the development of a production-quality AV1-encoder with performance
+levels applicable to a wide range of applications, from premium VOD to
+real-time and live encoding/transcoding. The SVT-AV1 decoder implementation
+targets future codec research activities.
+
+The SVT-AV1 project was initially founded by Intel in partnership with Netflix,
+and was then [adopted](https://aomedia.org/press%20releases/aomedia-software-implementation-working-group-to-bring-av1-to-more-video-platforms/)
+by the Alliance of Open Media (AOM) Software Implementation Working Group
+(SIWG), in August 2020, to carry on the group's mission.
+
+The canonical URL for this project is at <https://gitlab.com/AOMediaCodec/SVT-AV1>
 
 ## License
 
-Up to v0.8.7, SVT-AV1 is licensed under the BSD-2-clause license and the Alliance for Open Media Patent License 1.0. See [LICENSE](LICENSE-BSD2.md) and [PATENTS](PATENTS.md) for details.
-Starting from v0.8.8, SVT-AV1 is licensed under the BSD-3-clause clear license and the Alliance for Open Media Patent License 1.0. See [LICENSE](LICENSE.md) and [PATENTS](PATENTS.md) for details.
+Up to v0.8.7, SVT-AV1 is licensed under the BSD-2-clause license and the
+Alliance for Open Media Patent License 1.0. See [LICENSE](LICENSE-BSD2.md) and
+[PATENTS](PATENTS.md) for details. Starting from v0.9, SVT-AV1 is licensed
+under the BSD-3-clause clear license and the Alliance for Open Media Patent
+License 1.0. See [LICENSE](LICENSE.md) and [PATENTS](PATENTS.md) for details.
 
 ## Documentation
 
-More details about the SVT-AV1 usage and implementation can be found under:
-
-- [svt-av1-encoder-user-guide](Docs/svt-av1_encoder_user_guide.md)
-- [svt-av1-decoder-user-guide](Docs/svt-av1_decoder_user_guide.md)
-- [svt-av1-documentation-page](Docs/README.md)
-
-## System Requirements
-
-### Operating System
-
-SVT-AV1 Encoder may run on any Windows* or Linux* 64 bit operating systems. The list below represents the operating systems that the encoder application and library were tested and validated on:
-
-- __Windows* Operating Systems (64-bit):__
-  - Windows* Server 2016
-- __Linux* Operating Systems (64-bit):__
-  - Ubuntu* 16.04 Server LTS
-  - Ubuntu* 18.04 Server LTS
-- __Unix* Operating Systems (64-bit):__
-  - MacOS
-
-### Hardware
-
-The SVT-AV1 Encoder library supports the x86 architecture
-
-- __CPU Requirements__
-
-    In order to achieve the performance targeted by the SVT-AV1 Encoder, the specific CPU model listed above would need to be used when running the encoder. Otherwise, the encoder runs on any 5th Generation Intel® Core™ processor, (Intel® Xeon® CPUs, E5-v4 or newer).
-
-- __RAM Requirements__
-
-    The SVT-AV1 Encoder adapts to the system on which it is being run. The memory requirements depend on the number of cores the system contains, the input frame rate of the input sequence (`-fps`) and the look ahead distance passed to the encoder (`-lad`). The SVT-AV1 Encoder application will display an error if the system does not have enough RAM to support the encode prior to the start of the encode. The following table shows the minimum amount of RAM required for some standard resolutions of 10bit video per stream:
-
-|       Resolution      | 8-vCPU Commit Size (GB)| 40-vCPU Commit Size (GB)|
-|-----------------------|------------------------|-------------------------|
-|       4k              |           14           |           24            |
-|       1080p           |            6           |           10            |
-|       720p            |            4           |            7            |
-|       480p            |            3           |            5            |
-
-## Build and Install
-
-### Windows* Operating Systems (64-bit)
-
-- __Build Requirements__
-  - Visual Studio* 2017 (download [here](https://www.visualstudio.com/vs/older-downloads/)) or 2019 (download [here](https://visualstudio.microsoft.com/downloads/))
-  - CMake 3.5 or later (download [here](https://github.com/Kitware/CMake/releases/download/v3.14.5/cmake-3.14.5-win64-x64.msi))
-  - YASM Assembler version 1.2.0 or later
-    - Download the yasm exe from the following [link](http://www.tortall.net/projects/yasm/releases/yasm-1.3.0-win64.exe)
-    - Rename yasm-*-win64.exe to yasm.exe
-    - Copy yasm.exe into a location that is in the `PATH` environment variable
-
-- __Build Instructions__
-  - Build the project by following the steps below
-    - cd into `Build\windows`
-    - run `build.bat <2019|2017|2015>` [This will generate the .sln files and build the project]
-
-- __Binaries Location__
-  - Binaries can be found under `<repo dir>/Bin/Release` or `<repo dir>/Bin/Debug`, depending on whether Debug or Release were selected in the build mode.
-
-- __Installation__
-
-  For the binaries to operate properly on your system, the following conditions have to be met:
-  - On any of the Windows* Operating Systems listed in the OS requirements section, install Visual Studio* 2015/2017/2019
-  - Once the installation is complete, copy the binaries to a location making sure that both the sample application `SvtAv1EncApp.exe` and library `SvtAv1Enc.dll` are in the same folder.
-  - Open the command prompt window at the chosen location and run the sample application to encode: `SvtAV1EncApp.exe -i [in.yuv] -w [width] -h [height] -b [out.ivf]`
-  - Sample application supports reading from pipe. E.g. `ffmpeg -i [input.mp4] -nostdin -f rawvideo -pix_fmt yuv420p - | SvtAv1EncApp.exe -i stdin -n [number_of_frames_to_encode] -w [width] -h [height]`
-
-### Linux* Operating Systems (64-bit)
-
-Note - a Dockerfile is provided to build the encoder into a tiny Alpine Linux Docker image.
-
-- __Build Requirements__
-  - GCC 5.4.0 or later
-  - CMake 3.5.1 or later
-  - YASM Assembler version 1.2.0 or later
-
-- __Build Instructions__
-  - `cd Build/linux`
-  - `./build.sh <release | debug>`
-
-- __Sample Binaries location__
-  - Binaries can be found under `Bin/Release` and/or `Bin/Debug`
-
-- __Clang usage__
-  - To install Clang-11 on Ubuntu 20.04 execute single command: `sudo apt install clang-11`
-  - To install Clang-11 on Ubuntu 18.04 execute commands:
-    - `wget -O - https://apt.llvm.org/llvm-snapshot.gpg.key | sudo apt-key add -`
-    - `sudo apt-add-repository "deb http://apt.llvm.org/bionic/ llvm-toolchain-bionic-11 main"`
-    - `sudo apt install clang-11`
-  - To build SVT-AV1 using Clang-11:
-    - `export CC="clang-11"`
-    - `export CXX="clang++-11"`
-    - `./build.sh <release | debug>`
-
-
-- __Installation__
-
-  For the binaries to operate properly on your system, the following conditions have to be met:
-
-  - On any of the Linux* Operating Systems listed above, copy the binaries under a location of your choice.
-  - Change the permissions on the sample application `SvtAV1EncApp` executable by running the command: `chmod +x SvtAv1EncApp`
-  - cd into your chosen location
-  - Run the sample application to encode: `./SvtAv1EncApp -i [in.yuv] -w [width] -h [height] -b [out.ivf]`
-  - Sample application supports reading from pipe. E.g. `ffmpeg -i [input.mp4] -nostdin -f rawvideo -pix_fmt yuv420p - | ./SvtAv1EncApp -i stdin -n [number_of_frames_to_encode] -w [width] -h [height]`
-
-## SVT-AV1 ffmpeg plugin installation
-
-### 1. Build and install SVT-AV1
-
-``` bash
-git clone --depth=1 https://gitlab.com/AOMediaCodec/SVT-AV1.git
-cd SVT-AV1
-cd Build
-cmake .. -G"Unix Makefiles" -DCMAKE_BUILD_TYPE=Release
-make -j $(nproc)
-sudo make install
-```
-
-###  2. Enable libsvtav1 in FFmpeg
-
-NOTE: If you wish to use an FFmpeg tag or release before 4.4, please go [here](https://gitlab.com/AOMediaCodec/SVT-AV1/tree/v0.8.4/ffmpeg_plugin) and consult that page to properly patch ffmpeg for use with SVT-AV1.
-
-``` bash
-   git clone --depth=1 https://github.com/FFmpeg/FFmpeg ffmpeg
-   cd ffmpeg
-   export LD_LIBRARY_PATH+=":/usr/local/lib"
-   export PKG_CONFIG_PATH+=":/usr/local/lib/pkgconfig"
-   ./configure --enable-libsvtav1
-   make -j $(nproc)
-```
-
-###  3. Verify that ffmpeg is working
-
-``` bash
-./ffmpeg -i input.mp4 -c:v libsvtav1 -y test.mp4
-```
-
-## How to evaluate by ready-to-run executables with docker
-
-Refer to the guide [here](https://github.com/OpenVisualCloud/Dockerfiles/blob/master/doc/svt.md#Evaluate-SVT).
-
-## Demo features and limitations
-
-- **Multi-instance support:** The multi-instance functionality is a demo feature implemented in the SVT-AV1 Encoder sample application as an example of one sample application using multiple encoding libraries. Encoding using the multi-instance support is limited to only 6 simultaneous streams. For example two channels encoding on Windows: `SvtAV1EncApp.exe -nch 2 -c firstchannel.cfg secondchannel.cfg`
-- **Features enabled:** The library will display an error message any feature combination that is not currently supported.
-
-## How to Contribute
-
-We welcome community contributions to the SVT-AV1 Encoder and Decoder. Thank you for your time! By contributing to the project, you agree to the license, patent and copyright terms in the AOM License and Patent License  and to the release of your contribution under these terms. See [LICENSE](LICENSE.md) and [PATENTS](PATENTS.md) for details.
-
-## Contributor agreement
-
-You will be required to execute the appropriate [contributor agreement](http://aomedia.org/license/) to ensure that the AOMedia Project has the right to distribute your changes.
-
-### Contribution process
-
-- Follow the [coding guidelines](STYLE.md) and the [contributing guidelines](CONTRIBUTING.md)
-
-- Validate that your changes do not break a build
-
-- Perform smoke tests and ensure they pass
-
-- Submit a pull request for review to the maintainer
-
-### How to Report Bugs and Provide Feedback
-
-Use the [Issues](https://gitlab.com/AOMediaCodec/SVT-AV1/issues) tab on GitLab. To avoid duplicate issues, please make sure you go through the existing issues before logging a new one.
-
-## IRC
-
-`#svt` on Libera.chat. Join via [Libera Webchat](https://web.libera.chat/?channel=#svt) or use your favorite IRC client.
+**Guides**
+- [System Requirements](Docs/System-Requirements.md)
+- [How to run SVT-AV1 within ffmpeg](Docs/Ffmpeg.md)
+- [Standalone Encoder Usage](Docs/svt-av1_encoder_user_guide.md)
+- [Decoder Usage](Docs/svt-av1_decoder_user_guide.md)
+- [List of All Parameters](Docs/Parameters.md)
+- [Build Guide](Docs/Build-Guide.md)
+
+**Common Questions/Issues**
+- [What presets do](Docs/CommonQuestions.md#what-presets-do)
+- [Scene change detection](Docs/CommonQuestions.md#scene-change-detection)
+- [GOP size selection](Docs/CommonQuestions.md#gop-size-selection)
+- [Threading and efficiency](Docs/CommonQuestions.md#threading-and-efficiency)
+- [Practical advice about grain synthesis](Docs/CommonQuestions.md#practical-advice-about-grain-synthesis)
+- [Improving decoding performance](Docs/CommonQuestions.md#improving-decoding-performance)
+- [Tuning for animation](Docs/CommonQuestions.md#tuning-for-animation)
+- [8 vs. 10-bit encoding](Docs/CommonQuestions.md#8-or-10-bit-encoding)
+- [HDR and SDR video](Docs/CommonQuestions.md#hdr-and-sdr)
+- [Options that give the best encoding bang-for-buck](Docs/CommonQuestions.md#options-that-give-the-best-encoding-bang-for-buck)
+- [Multi-pass encoding](Docs/CommonQuestions.md#multi-pass-encoding)
+- [CBR, VBR, and CRF modes](Docs/CommonQuestions.md#bitrate-control-modes)
+
+**Presentations**
+- [Big Apple Video 2019](https://www.youtube.com/watch?v=lXqOaYNo8m0)
+- [Video @ Scale 2021](https://atscaleconference.com/videos/highly-efficient-svt-av1-based-solutions-for-vod-applications/?contact-form-id=124119&contact-form-sent=163268&contact-form-hash=d4bb3fd420fae91cd39c11bdb69f970a05a152a9&_wpnonce=bba8096d24#contact-form-124119)
+
+**Papers and Blogs**
+- [Netflix Blog 2020](https://netflixtechblog.com/svt-av1-an-open-source-av1-encoder-and-decoder-ad295d9b5ca2)
+- [SPIE 2020](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11510/1151021/The-SVT-AV1-encoder--overview-features-and-speed-quality/10.1117/12.2569270.full)
+- [SPIE 2021](https://www.spiedigitallibrary.org/conference-proceedings-of-spie/11842/118420T/Towards-much-better-SVT-AV1-quality-cycles-tradeoffs-for-VOD/10.1117/12.2595598.full)
+- [SVT-AV1 - Tech Blog 2022](https://networkbuilders.intel.com/blog/svt-av1-enables-highly-efficient-large-scale-video-on-demand-vod-services)
+
+**Design Documents**
+- [Encoder Design](Docs/svt-av1-encoder-design.md)
+- [Decoder Design](Docs/svt-av1-decoder-design.md)
+
+**Technical Appendices**
+- [Adaptive Prediction Structure Appendix](Docs/Appendix-Adaptive-Prediction-Structure.md)
+- [Altref and Overlay Pictures Appendix](Docs/Appendix-Alt-Refs.md)
+- [CDEF Appendix](Docs/Appendix-CDEF.md)
+- [CfL Appendix](Docs/Appendix-CfL.md)
+- [Compliant Subpel Interpolation Filter Search Appendix](Docs/Appendix-Compliant-Subpel-Interpolation-Filter-Search.md)
+- [Compound Mode Prediction Appendix](Docs/Appendix-Compound-Mode-Prediction.md)
+- [Deblocking Loop Filter (LF) Appendix](Docs/Appendix-DLF.md)
+- [Film Grain Synthesis](Docs/Appendix-Film-Grain-Synthesis.md)
+- [Global Motion Appendix](Docs/Appendix-Global-Motion.md)
+- [Intra Block Copy Appendix](Docs/Appendix-Intra-Block-Copy.md)
+- [IPP Pass Appendix](Docs/Appendix-IPP-Pass.md)
+- [Local Warped Motion appendix](Docs/Appendix-Local-Warped-Motion.md)
+- [Mode Decision Appendix](Docs/Appendix-Mode-Decision.md)
+- [Motion Estimation Appendix](Docs/Appendix-Open-Loop-Motion-Estimation.md)
+- [Overlapped Block Motion Compensation Appendix](Docs/Appendix-Overlapped-Block-Motion-Compensation.md)
+- [Palette Prediction Appendix](Docs/Appendix-Palette-Prediction.md)
+- [Rate Control Appendix](Docs/Appendix-Rate-Control.md)
+- [Recursive Intra Appendix](Docs/Appendix-Recursive-Intra.md)
+- [Restoration Filter Appendix](Docs/Appendix-Restoration-Filter.md)
+- [SQ Weight Appendix](Docs/Appendix-SQ-Weight.md)
+- [Super-resolution Appendix](Docs/Appendix-Super-Resolution.md)
+- [Temporal Dependency Model](Docs/Appendix-TPL.md)
+- [Transform Search Appendix](Docs/Appendix-TX-Search.md)
+- [Reference Scaling Appendix](Docs/Appendix-Reference-Scaling.md)
 
-## Performance tracker
+**How Can I Contribute?**
+- [SVT-AV1 Contribution Guide](Docs/Contribute.md)
 
-Performance and raw data files available at <http://videocodectracker.dev>
diff -pruN 0.9.1+dfsg-1/Source/API/EbDebugMacros.h 1.2.0+dfsg-2/Source/API/EbDebugMacros.h
--- 0.9.1+dfsg-1/Source/API/EbDebugMacros.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbDebugMacros.h	2022-08-01 19:12:00.000000000 +0000
@@ -41,6 +41,7 @@ extern "C" {
 #define DEBUG_TPL               0 // Prints to debug TPL
 #define DETAILED_FRAME_OUTPUT   0 // Prints detailed frame output from the library for debugging
 #define TUNE_CHROMA_SSIM        0 // Allows for Chroma and SSIM BDR-based Tuning
+#define TUNE_CQP_CHROMA_SSIM    0 // Tune CQP qp scaling towards improved chroma and SSIM BDR
 
 #define MIN_PIC_PARALLELIZATION 0 // Use the minimum amount of picture parallelization
 #define SRM_REPORT              0 // Report SRM status
@@ -52,7 +53,6 @@ extern "C" {
 #else
 #define REMOVE_LP1_LPN_DIFF     0 // Disallow single-thread/multi-thread differences
 #endif
-#define FIX_1PVBR               1 // Derive initial qp based on target bitrate
 // Super-resolution debugging code
 #define DEBUG_SCALING           0
 #define DEBUG_TF                0
@@ -61,6 +61,11 @@ extern "C" {
 #define DEBUG_SUPERRES_ENERGY   0
 #define DEBUG_RC_CAP_LOG        0 // Prints for RC cap
 
+// Switch frame debugging code
+#define DEBUG_SFRAME            0
+// Quantization matrices
+#define DEBUG_QM_LEVEL          0
+
 #ifdef __cplusplus
 }
 #endif // __cplusplus
diff -pruN 0.9.1+dfsg-1/Source/API/EbSvtAv1Dec.h 1.2.0+dfsg-2/Source/API/EbSvtAv1Dec.h
--- 0.9.1+dfsg-1/Source/API/EbSvtAv1Dec.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbSvtAv1Dec.h	2022-08-01 19:12:00.000000000 +0000
@@ -39,10 +39,10 @@ typedef struct EbAV1StreamInfo {
     EbColorConfig color_config;
 
     /* Film Grain Synthesis Present */
-    EbBool film_grain_params_present;
+    Bool film_grain_params_present;
 
     /* The stream is in annex_b format */
-    EbBool is_annex_b;
+    Bool is_annex_b;
 } EbAV1StreamInfo;
 
 typedef struct EbAV1FrameInfo {
@@ -70,7 +70,7 @@ typedef struct EbSvtAv1DecConfiguration
     /* Skip film grain synthesis if it is present in the bitstream. Can be used for debugging purpose.
      *
      * Default is 0 */
-    EbBool skip_film_grain;
+    Bool skip_film_grain;
 
     /* Skip N output frames in the display order.
      *
@@ -96,7 +96,7 @@ typedef struct EbSvtAv1DecConfiguration
      *
      * Default is 0. */
 
-    EbBool eight_bit_output;
+    Bool eight_bit_output;
 
     /* Picture parameters */
     uint32_t max_picture_width;
@@ -139,7 +139,7 @@ typedef struct EbSvtAv1DecConfiguration
     /* Decoder internal bit-depth is set to 16-bit even if the bitstream is 8-bit
  *
  * Default is 0. */
-    EbBool is_16bit_pipeline;
+    Bool is_16bit_pipeline;
 } EbSvtAv1DecConfiguration;
 
 /* STEP 1: Call the library to construct a Component Handle.
diff -pruN 0.9.1+dfsg-1/Source/API/EbSvtAv1Enc.h 1.2.0+dfsg-2/Source/API/EbSvtAv1Enc.h
--- 0.9.1+dfsg-1/Source/API/EbSvtAv1Enc.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbSvtAv1Enc.h	2022-08-01 19:12:00.000000000 +0000
@@ -21,14 +21,47 @@ extern "C" {
 #include <stdlib.h>
 #include <stdio.h>
 
+/**
+ * @brief SVT-AV1 encoder ABI version
+ *
+ * Should be increased by 1 everytime a public struct in the encoder
+ * has been modified, and reset anytime the major API version has
+ * been changed. Used to keep track if a field has been added or not.
+ */
+#define SVT_AV1_ENC_ABI_VERSION 4
+
 //***HME***
 
 #define MAX_HIERARCHICAL_LEVEL 6
 #define REF_LIST_MAX_DEPTH 4
-#define MAX_ENC_PRESET 13
-#define NUM_MV_COMPONENTS 2
-#define NUM_MV_HIST 2
-#define MAX_MV_HIST_SIZE 2 * REF_LIST_MAX_DEPTH *NUM_MV_COMPONENTS *NUM_MV_HIST
+/*!\brief Decorator indicating that given struct/union/enum is packed */
+#ifndef ATTRIBUTE_PACKED
+#if defined(__GNUC__) && __GNUC__
+#define ATTRIBUTE_PACKED __attribute__((packed))
+#else
+#define ATTRIBUTE_PACKED
+#endif
+#endif /* ATTRIBUTE_PACKED */
+typedef enum ATTRIBUTE_PACKED {
+    ENC_MRS        = -2, // Highest quality research mode (slowest)
+    ENC_MR         = -1, //Research mode with higher quality than M0
+    ENC_M0         = 0,
+    ENC_M1         = 1,
+    ENC_M2         = 2,
+    ENC_M3         = 3,
+    ENC_M4         = 4,
+    ENC_M5         = 5,
+    ENC_M6         = 6,
+    ENC_M7         = 7,
+    ENC_M8         = 8,
+    ENC_M9         = 9,
+    ENC_M10        = 10,
+    ENC_M11        = 11,
+    ENC_M12        = 12,
+    ENC_M13        = 13,
+    MAX_ENC_PRESET = ENC_M13
+} EncMode;
+
 #define DEFAULT -1
 
 #define EB_BUFFERFLAG_EOS 0x00000001 // signals the last packet of the stream
@@ -102,6 +135,16 @@ typedef enum {
     SUPERRES_AUTO_SEARCH_TYPES
 } SUPERRES_AUTO_SEARCH_TYPE;
 
+// reference scaling modes
+typedef enum {
+    RESIZE_NONE, // No frame resize allowed.
+    RESIZE_FIXED, // All frames are coded at the specified scale.
+    RESIZE_RANDOM, // All frames are coded at a random scale.
+    RESIZE_DYNAMIC, // Resize scale for a frame in dynamic.
+    RESIZE_RANDOM_ACCESS, // Resize scale frame by event in random access
+    RESIZE_MODES
+} RESIZE_MODE;
+
 /** The SvtAv1IntraRefreshType is used to describe the intra refresh type.
 */
 typedef enum SvtAv1IntraRefreshType {
@@ -125,11 +168,18 @@ typedef struct SvtAv1FixedBuf {
     uint64_t sz; /**< Length of the buffer, in chars */
 } SvtAv1FixedBuf; /**< alias for struct aom_fixed_buf */
 
+/** Indicates how an S-Frame should be inserted.
+*/
+typedef enum EbSFrameMode {
+    SFRAME_STRICT_BASE =
+        1, /**< The considered frame will be made into an S-Frame only if it is a base layer inter frame */
+    SFRAME_NEAREST_BASE =
+        2, /**< If the considered frame is not an altref frame, the next base layer inter frame will be made into an S-Frame */
+} EbSFrameMode;
+
 // Will contain the EbEncApi which will live in the EncHandle class
 // Only modifiable during config-time.
 typedef struct EbSvtAv1EncConfiguration {
-    // Encoding preset
-
     /**
      * @brief Encoder preset used.
      * -2 and -1 are for debug purposes and should not be used.
@@ -146,14 +196,17 @@ typedef struct EbSvtAv1EncConfiguration
 
     /* The intra period defines the interval of frames after which you insert an
      * Intra refresh. It is strongly recommended to set the value to multiple of
-     * 8 minus 1 the closest to 1 second (e.g. 55, 47, 31, 23 should be used for
-     * 60, 50, 30, (24 or 25) respectively.
+     * 2^(hierarchical_levels), subtracting one if using open GOP (intra_refresh_type == 1).
+     * For instance, to get a 5-second GOP (default being >=5 seconds)
+     * with hierarchical_levels = 3 and open GOP you could use 319, 279, 159
+     * for 60, 50, or 30 respectively.
      *
      * -1 = no intra update.
      * -2 = auto.
      *
      * Default is -2. */
     int32_t intra_period_length;
+
     /* Random access.
      *
      * 1 = CRA, open GOP.
@@ -205,15 +258,13 @@ typedef struct EbSvtAv1EncConfiguration
      */
     uint32_t source_height;
 
-    /* The frequecy of images being displayed. If the number is less than 1000,
-     * the input frame rate is an integer number between 1 and 60, else the input
-     * number is in Q16 format, shifted by 16 bits, where max allowed is 240 fps.
-     * If FrameRateNumerator and FrameRateDenominator are both not equal to zero,
-     * the encoder will ignore this parameter.
-     *
-     * Default is 25. */
-    uint32_t frame_rate;
-
+    /* Specifies the maximum frame width/height for the frames represented by the sequence header
+     * (max_frame_width_minus_1 and max_frame_height_minus_1, spec 5.5.1).
+     * Actual frame height could be equal to or less than this value. E.g. Use this value to indicate
+     * the maximum height between renditions when switch frame feature is on.
+     */
+    uint32_t forced_max_frame_width;
+    uint32_t forced_max_frame_height;
     /* Frame rate numerator. When zero, the encoder will use -fps if
      * FrameRateDenominator is also zero, otherwise an error is returned.
      *
@@ -246,107 +297,124 @@ typedef struct EbSvtAv1EncConfiguration
      * Default is 0. */
     uint32_t compressed_ten_bit_format;
 
-    /* Instruct the library to calculate the recon to source for PSNR calculation
-    *
-    * Default is 0.*/
-    uint32_t stat_report;
-
-    // Quantization
-    /* Initial quantization parameter for the Intra pictures used under constant
-     * qp rate control mode.
+    /**
+     * @brief Enable writing of HDR metadata in the bitstream
      *
-     * Default is 50. */
-    uint32_t qp;
-
-    /* force qp values for every picture that are passed in the header pointer
-    *
-    * Default is 0.*/
-    EbBool use_qp_file;
-
-    /* use fixed qp offset for every picture based on temporal layer index
-    *
-    * Default is 0.*/
-    EbBool  use_fixed_qindex_offsets;
-    int32_t qindex_offsets[EB_MAX_TEMPORAL_LAYERS];
-    int32_t key_frame_chroma_qindex_offset;
-    int32_t key_frame_qindex_offset;
-    int32_t chroma_qindex_offsets[EB_MAX_TEMPORAL_LAYERS];
-
-    // input / output buffer to be used for multi-pass encoding
-    SvtAv1FixedBuf rc_stats_buffer;
-    int            pass;
+     * Default is false.
+     */
+    Bool high_dynamic_range_input;
 
-    // Deblock Filter
+    /**
+     * @brief Bitstream profile to use.
+     * 0: main, 1: high, 2: professional.
+     *
+     * Min is MAIN_PROFILE.
+     * Max is PROFESSIONAL_PROFILE.
+     * Default is MAIN_PROFILE.
+     */
+    EbAv1SeqProfile profile;
+    /* Constraints for bitstream in terms of max bitrate and max buffer size.
+     *
+     * 0 = Main, for most applications.
+     * 1 = High, for demanding applications.
+     *
+     * Default is 0. */
+    uint32_t tier;
 
     /**
-     * @brief Deblocking loop filter control
+     * @brief Bitstream level.
+     * 0: autodetect from bitstream, 20: level 2.0, 63: level 6.3, only levels 2.0-6.3 are properly defined.
+     * The levels are defined at https://aomediacodec.github.io/av1-spec/av1-spec.pdf
+     * under "A.3. Levels".
      *
-     * Default is true.
+     * Min is 0.
+     * Max is 73.
+     * Default is 0.
      */
-    EbBool enable_dlf_flag;
+    uint32_t level;
 
-    /* Film grain denoising the input picture
-    * Flag to enable the denoising
+    /* Color description present flag
     *
-    * Default is 0. */
-    uint32_t film_grain_denoise_strength;
-
-    /* CDEF Level
+    * It is not necessary to set this parameter manually.
+    * It is set internally to true once one of the color_primaries, transfer_characteristics or
+    * matrix coefficients is set to non-default value.
     *
-    * Default is -1. */
-    int cdef_level;
+    Default is false. */
+    Bool color_description_present_flag;
+    /* Color primaries
+    * values are from EbColorPrimaries
+    Default is 2 (CP_UNSPECIFIED). */
+    EbColorPrimaries color_primaries;
+    /* Transfer characteristics
+    * values are from EbTransferCharacteristics
+    Default is 2 (TC_UNSPECIFIED). */
+    EbTransferCharacteristics transfer_characteristics;
+    /* Matrix coefficients
+    * values are from EbMatrixCoefficients
+    Default is 2 (MC_UNSPECIFIED). */
+    EbMatrixCoefficients matrix_coefficients;
+    /* Color range
+    * values are from EbColorRange
+    * 0: studio swing.
+    * 1: full swing.
+    Default is 0. */
+    EbColorRange color_range;
+    /* Mastering display metadata
+    * values are from set using svt_aom_parse_mastering_display()
+    */
+    struct EbSvtAv1MasteringDisplayInfo mastering_display;
+    /* Content light level
+    * values are from set using svt_aom_parse_content_light_level()
+    */
+    struct EbContentLightLevel content_light_level;
 
-    /* Restoration filtering
-    *  enable/disable
-    *  set Self-Guided (sg) mode
-    *  set Wiener (wn) mode
-    *
-    * Default is -1. */
-    int enable_restoration_filtering;
-    /* motion field motion vector
-    *
-    *  Default is -1. */
-    int enable_mfmv;
+    /* Chroma sample position
+     * Values as per 6.4.2 of the specification:
+     * EB_CSP_UNKNOWN:   default
+     * EB_CSP_VERTICAL:  value 0 from H.273 AKA "left"
+     * EB_CSP_COLOCATED: value 2 from H.273 AKA "top left"
+     */
+    EbChromaSamplePosition chroma_sample_position;
 
-    // Rate Control
+    // End input info
 
     /* Rate control mode.
      *
      * 0 = Constant QP.
      * 1 = Variable Bit Rate, achieve the target bitrate at entire stream.
-     * 2 = Constrained Variable Bit Rate, achieve the target bitrate at each gop
+     * 2 = Constant Bit Rate, achieve the target bitrate
      * Default is 0. */
     uint32_t rate_control_mode;
-    /* Flag to enable the scene change detection algorithm.
-     *
-     * Default is 1. */
-    uint32_t scene_change_detection;
 
-    /* When RateControlMode is set to 1 it's best to set this parameter to be
-     * equal to the Intra period value (such is the default set by the encoder).
-     * When CQP is chosen, then a (2 * minigopsize +1) look ahead is recommended.
+    // Rate control tuning
+
+    // Quantization
+    /* Initial quantization parameter for the Intra pictures used under constant
+     * qp rate control mode.
      *
-     * Default depends on rate control mode.*/
-    uint32_t look_ahead_distance;
+     * Default is 50. */
+    uint32_t qp;
 
-    /* Enable TPL in look ahead
-     * 0 = disable TPL in look ahead
-     * 1 = enable TPL in look ahead
-     * Default is 0  */
-    uint8_t enable_tpl_la;
+    /* force qp values for every picture that are passed in the header pointer
+    *
+    * Default is 0.*/
+    Bool use_qp_file;
 
-    /* Target bitrate in bits/second, only apllicable when rate control mode is
-     * set to 2 or 3.
+    /* Target bitrate in bits/second, only applicable when rate control mode is
+     * set to 1 (VBR) or 2 (CBR).
      *
-     * Default is 7000000. */
+     * Default is 2000000. */
     uint32_t target_bit_rate;
     /* maximum bitrate in bits/second, only apllicable when rate control mode is
      * set to 0.
      *
      * Default is 0. */
     uint32_t max_bit_rate;
-    /* VBV Buffer size */
+
+#if !SVT_AV1_CHECK_VERSION(1, 3, 0)
+    /* DEPRECATED: to be removed in 1.3.0. */
     uint32_t vbv_bufsize;
+#endif
 
     /* Maxium QP value allowed for rate control use, only applicable when rate
      * control mode is set to 1. It has to be greater or equal to minQpAllowed.
@@ -359,7 +427,7 @@ typedef struct EbSvtAv1EncConfiguration
      * Default is 0. */
     uint32_t min_qp_allowed;
 
-    /* TWO PASS DATARATE CONTROL OPTIONS.
+    /* DATARATE CONTROL OPTIONS.
      * Indicates the bias (expressed on a scale of 0 to 100) for determining
      * target size for the current frame. The value 0 indicates the optimal CBR
      * mode value should be used, and 100 indicates the optimal VBR mode value
@@ -394,6 +462,101 @@ typedef struct EbSvtAv1EncConfiguration
      * application, and is expressed in units of time(milliseconds).*/
     int64_t maximum_buffer_size_ms;
 
+    // input / output buffer to be used for multi-pass encoding
+    SvtAv1FixedBuf rc_stats_buffer;
+    int            pass;
+
+    // End rate control tuning
+
+    // Individual tuning flags
+    /* use fixed qp offset for every picture based on temporal layer index
+    * 0: off (use the auto mode QP)
+    * 1: on (the offset is applied on top of the user QP)
+    * 2: on (the offset is applied on top of the auto mode QP)
+    *
+    * Default is 0.*/
+    uint8_t use_fixed_qindex_offsets;
+    int32_t qindex_offsets[EB_MAX_TEMPORAL_LAYERS];
+    int32_t key_frame_chroma_qindex_offset;
+    int32_t key_frame_qindex_offset;
+    int32_t chroma_qindex_offsets[EB_MAX_TEMPORAL_LAYERS];
+
+    int32_t luma_y_dc_qindex_offset;
+    int32_t chroma_u_dc_qindex_offset;
+    int32_t chroma_u_ac_qindex_offset;
+    int32_t chroma_v_dc_qindex_offset;
+    int32_t chroma_v_ac_qindex_offset;
+
+    /**
+     * @brief Deblocking loop filter control
+     *
+     * Default is true.
+     */
+    Bool enable_dlf_flag;
+
+    /* Film grain denoising the input picture
+    * Flag to enable the denoising
+    *
+    * Default is 0. */
+    uint32_t film_grain_denoise_strength;
+
+    /**
+    * @brief Determines how much denoising is used.
+    * Only applicable when film grain is ON.
+    *
+    * 0 is no denoising
+    * 1 is full denoising
+    */
+    uint8_t film_grain_denoise_apply;
+
+    /* CDEF Level
+    *
+    * Default is -1. */
+    int cdef_level;
+
+    /* Restoration filtering
+    *  enable/disable
+    *  set Self-Guided (sg) mode
+    *  set Wiener (wn) mode
+    *
+    * Default is -1. */
+    int enable_restoration_filtering;
+    /* motion field motion vector
+    *
+    *  Default is -1. */
+    int enable_mfmv;
+
+    /* Flag to enable the scene change detection algorithm.
+     *
+     * Default is 1. */
+    uint32_t scene_change_detection;
+
+    /**
+     * @brief API signal to constrain motion vectors.
+     *
+     * Default is false.
+     */
+    Bool restricted_motion_vector;
+
+    /* Log 2 Tile Rows and columns . 0 means no tiling,1 means that we split the dimension
+        * into 2
+        * Default is 0. */
+    int32_t tile_columns;
+    int32_t tile_rows;
+
+    /* When RateControlMode is set to 1 it's best to set this parameter to be
+     * equal to the Intra period value (such is the default set by the encoder).
+     * When CQP is chosen, then a (2 * minigopsize +1) look ahead is recommended.
+     *
+     * Default depends on rate control mode.*/
+    uint32_t look_ahead_distance;
+
+    /* Enable TPL in look ahead
+     * 0 = disable TPL in look ahead
+     * 1 = enable TPL in look ahead
+     * Default is 0  */
+    uint8_t enable_tpl_la;
+
     /* recode_loop indicates the recode levels,
      * DISALLOW_RECODE = 0, No recode.
      * ALLOW_RECODE_KFMAXBW = 1, Allow recode for KF and exceeding maximum frame bandwidth.
@@ -412,50 +575,76 @@ typedef struct EbSvtAv1EncConfiguration
 
     /* Enable adaptive quantization within a frame using segmentation.
      *
+     * For rate control mode 0, setting this to 0 will use CQP mode, else CRF mode will be used.
      * Default is 2. */
     uint8_t enable_adaptive_quantization;
 
-    // Tresholds
-
     /**
-     * @brief Enable writing of HDR metadata in the bitstream
+     * @brief Enable use of ALT-REF (temporally filtered) frames.
      *
-     * Default is false.
+     * Default is true.
      */
-    EbBool high_dynamic_range_input;
+    Bool enable_tf;
 
+    Bool enable_overlays;
     /**
-     * @brief Bitstream profile to use.
-     * 0: main, 1: high, 2: professional.
+     * @brief Tune for a particular metric; 0: VQ, 1: PSNR
      *
-     * Min is MAIN_PROFILE.
-     * Max is PROFESSIONAL_PROFILE.
-     * Default is MAIN_PROFILE.
+     * Default is 1.
      */
-    EbAv1SeqProfile profile;
-    /* Constraints for bitstream in terms of max bitrate and max buffer size.
-     *
-     * 0 = Main, for most applications.
-     * 1 = High, for demanding applications.
+    uint8_t tune;
+
+    // super-resolution parameters
+    uint8_t superres_mode;
+    uint8_t superres_denom;
+    uint8_t superres_kf_denom;
+    uint8_t superres_qthres;
+    uint8_t superres_kf_qthres;
+    uint8_t superres_auto_search_type;
+
+    /**
+     * @brief API signal containing the manual prediction structure parameters.
+     * Only used when enable_manual_pred_struct is enabled. This list is copied
+     * into internal buffers after svt_av1_enc_set_parameter().
+     */
+    PredictionStructureConfigEntry pred_struct[1 << (MAX_HIERARCHICAL_LEVEL - 1)];
+
+    /**
+     * @brief API signal to overwrite the encoder's default prediction structure.
      *
-     * Default is 0. */
-    uint32_t tier;
+     * Default is false.
+     */
+    Bool enable_manual_pred_struct;
 
     /**
-     * @brief Bitstream level.
-     * 0: autodetect from bitstream, 20: level 2.0, 63: level 6.3, only levels 2.0-6.3 are properly defined.
-     * The levels are defined at https://aomediacodec.github.io/av1-spec/av1-spec.pdf
-     * under "A.3. Levels".
+     * @brief API signal specifying the size (number of entries) of the manual prediction structure buffer.
+     * Only checked and used when enable_manual_pred_struct is enabled.
      *
-     * Min is 0.
-     * Max is 73.
+     * Min is 1.
+     * Max is 32.
      * Default is 0.
      */
-    uint32_t level;
+    int32_t manual_pred_struct_entry_num;
 
-    /* CPU FLAGS to limit assembly instruction set used by encoder.
-    * Default is CPU_FLAGS_ALL. */
-    CPU_FLAGS use_cpu_flags;
+    /* Decoder-speed-targeted encoder optimization level (produce bitstreams that can be decoded faster).
+    * 0: No decoder speed optimization
+    * 1: Decoder speed optimization enabled (fast decode)
+    */
+    Bool fast_decode;
+
+    /* S-Frame interval (frames)
+    * 0: S-Frame off
+    * >0: S-Frame on and indicates the number of frames after which a frame may be coded as an S-Frame
+    */
+    int32_t sframe_dist;
+    /* Indicates how an S-Frame should be inserted
+    * values are from EbSFrameMode
+    * SFRAME_STRICT_ARF: the considered frame will be made into an S-Frame only if it is an altref frame
+    * SFRAME_NEAREST_ARF: if the considered frame is not an altref frame, the next altref frame will be made into an S-Frame
+    */
+    EbSFrameMode sframe_mode;
+
+    // End of individual tuning flags
 
     // Application Specific parameters
 
@@ -477,13 +666,6 @@ typedef struct EbSvtAv1EncConfiguration
      */
     uint32_t active_channel_count;
 
-    /**
-     * @brief API signal to constrain motion vectors.
-     *
-     * Default is false.
-     */
-    EbBool restricted_motion_vector;
-
     // Threads management
 
     /* The number of logical processor which encoder threads run on. If
@@ -509,7 +691,15 @@ typedef struct EbSvtAv1EncConfiguration
      * Default is -1. */
     int32_t target_socket;
 
+    /* CPU FLAGS to limit assembly instruction set used by encoder.
+    * Default is EB_CPU_FLAGS_ALL. */
+    EbCpuFlags use_cpu_flags;
+
     // Debug tools
+    /* Instruct the library to calculate the recon to source for PSNR calculation
+    *
+    * Default is 0.*/
+    uint32_t stat_report;
 
     /**
      * @brief API Signal to output reconstructed yuv used for debug purposes.
@@ -517,102 +707,59 @@ typedef struct EbSvtAv1EncConfiguration
      *
      * Default is false.
      */
-    EbBool recon_enabled;
-
-    /* Log 2 Tile Rows and colums . 0 means no tiling,1 means that we split the dimension
-        * into 2
-        * Default is 0. */
-    int32_t tile_columns;
-    int32_t tile_rows;
+    Bool recon_enabled;
+    // 1.0.0: Any additional fields shall go after here
 
     /**
-     * @brief Enable use of ALT-REF (temporally filtered) frames.
+     * @brief Signal that force-key-frames is enabled.
      *
-     * Default is true.
      */
-    EbBool enable_tf;
+    Bool force_key_frames;
 
-    EbBool enable_overlays;
     /**
-     * @brief Tune for a particular metric; 0: VQ, 1: PSNR
-     *
-     * Default is 1.
+     * @brief Signal to the library to treat intra_period_length as seconds and
+     * multiply by fps_num/fps_den.
      */
-    uint8_t tune;
-    // super-resolution parameters
-    uint8_t superres_mode;
-    uint8_t superres_denom;
-    uint8_t superres_kf_denom;
-    uint8_t superres_qthres;
-    uint8_t superres_kf_qthres;
-    uint8_t superres_auto_search_type;
+    Bool multiply_keyint;
 
+    // reference scaling parameters
     /**
-     * @brief API signal containing the manual prediction structure parameters.
-     * Only used when enable_manual_pred_struct is enabled. This list is copied
-     * into internal buffers after svt_av1_enc_set_parameter().
+     * @brief Reference scaling mode
+     * the available modes are defined in RESIZE_MODE
      */
-    PredictionStructureConfigEntry pred_struct[1 << (MAX_HIERARCHICAL_LEVEL - 1)];
-
+    uint8_t resize_mode;
     /**
-     * @brief API signal to overwrite the encoder's default prediction structure.
+     * @brief Resize denominator
+     * this value can be from 8 to 16, means downscaling to 8/8-8/16 of original
+     * resolution in both width and height
+     */
+    uint8_t resize_denom;
+    /**
+     * @brief Resize denominator of key frames
+     * this value can be from 8 to 16, means downscaling to 8/8-8/16 of original
+     * resolution in both width and height
+     */
+    uint8_t resize_kf_denom;
+    /**
+     * @brief Signal to the library to enable quantisation matrices
      *
      * Default is false.
      */
-    EbBool enable_manual_pred_struct;
-
+    Bool enable_qm;
     /**
-     * @brief API signal specifying the size (number of entries) of the manual prediction structure buffer.
-     * Only checked and used when enable_manual_pred_struct is enabled.
-     *
-     * Min is 1.
-     * Max is 32.
-     * Default is 0.
+     * @brief Min quant matrix flatness. Applicable when enable_qm is true.
+     * Min value is 0.
+     * Max value is 15.
+     * Default is 8.
      */
-    int32_t manual_pred_struct_entry_num;
-
-    // Color description
-    /* Color description present flag
-    *
-    * It is not necessary to set this parameter manually.
-    * It is set internally to true once one of the color_primaries, transfer_characteristics or
-    * matrix coefficients is set to non-default value.
-    *
-    Default is false. */
-    EbBool color_description_present_flag;
-    /* Color primaries
-    * values are from EbColorPrimaries
-    Default is 2 (CP_UNSPECIFIED). */
-    uint8_t color_primaries;
-    /* Transfer characteristics
-    * values are from EbTransferCharacteristics
-    Default is 2 (TC_UNSPECIFIED). */
-    uint8_t transfer_characteristics;
-    /* Matrix coefficients
-    * values are from EbMatrixCoefficients
-    Default is 2 (MC_UNSPECIFIED). */
-    uint8_t matrix_coefficients;
-    /* Color range
-    * values are from EbColorRange
-    * 0: studio swing.
-    * 1: full swing.
-    Default is 0. */
-    uint8_t color_range;
-    /* Mastering display metadata
-    * values are from set using svt_aom_parse_mastering_display()
-    */
-    struct EbSvtAv1MasteringDisplayInfo mastering_display;
-    /* Content light level
-    * values are from set using svt_aom_parse_content_light_level()
-    */
-    struct EbContentLightLevel content_light_level;
-    /* Decoder speed optimization level
-    * 0: No decoder speed optimization
-    * 1: Low-level decoder speed optimization (fast decode)
-    * 2: Medium-level decoder speed optimization (faster decode)
-    * 3: High-level decoder speed optimization (fastest decode)
-    */
-    uint8_t fast_decode;
+    uint8_t min_qm_level;
+    /**
+     * @brief Max quant matrix flatness. Applicable when enable_qm is true.
+     * Min value is 0.
+     * Max value is 15.
+     * Default is 15.
+     */
+    uint8_t max_qm_level;
 } EbSvtAv1EncConfiguration;
 
 /**
diff -pruN 0.9.1+dfsg-1/Source/API/EbSvtAv1Formats.h 1.2.0+dfsg-2/Source/API/EbSvtAv1Formats.h
--- 0.9.1+dfsg-1/Source/API/EbSvtAv1Formats.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbSvtAv1Formats.h	2022-08-01 19:12:00.000000000 +0000
@@ -98,7 +98,14 @@ typedef enum EbColorRange {
 } EbColorRange; /**< alias for enum aom_color_range */
 
 /* AV1 bit depth */
-typedef enum EbBitDepth { EB_EIGHT_BIT = 8, EB_TEN_BIT = 10, EB_TWELVE_BIT = 12 } EbBitDepth;
+typedef enum EbBitDepth {
+    EB_EIGHT_BIT     = 8,
+    EB_TEN_BIT       = 10,
+    EB_TWELVE_BIT    = 12,
+    EB_FOURTEEN_BIT  = 14, // Not supported
+    EB_SIXTEEN_BIT   = 16, // Not supported
+    EB_THIRTYTWO_BIT = 32, // Not supported
+} EbBitDepth;
 
 /* AV1 Chroma Format */
 typedef enum EbColorFormat { EB_YUV400, EB_YUV420, EB_YUV422, EB_YUV444 } EbColorFormat;
diff -pruN 0.9.1+dfsg-1/Source/API/EbSvtAv1.h 1.2.0+dfsg-2/Source/API/EbSvtAv1.h
--- 0.9.1+dfsg-1/Source/API/EbSvtAv1.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbSvtAv1.h	2022-08-01 19:12:00.000000000 +0000
@@ -23,15 +23,9 @@ extern "C" {
 struct SvtMetadataArray;
 
 // API Version
-#define SVT_AV1_VERSION_MAJOR 0
-#define SVT_AV1_VERSION_MINOR 9
-#define SVT_AV1_VERSION_PATCHLEVEL 1
-
-#ifndef SVT_VERSION_MAJOR
-#define SVT_VERSION_MAJOR SVT_AV1_VERSION_MAJOR
-#define SVT_VERSION_MINOR SVT_AV1_VERSION_MINOR
-#define SVT_VERSION_PATCHLEVEL SVT_AV1_VERSION_PATCHLEVEL
-#endif
+#define SVT_AV1_VERSION_MAJOR 1
+#define SVT_AV1_VERSION_MINOR 2
+#define SVT_AV1_VERSION_PATCHLEVEL 0
 
 #define SVT_AV1_CHECK_VERSION(major, minor, patch)                            \
     (SVT_AV1_VERSION_MAJOR > (major) ||                                       \
@@ -83,14 +77,13 @@ typedef enum EbAv1PictureType {
     EB_AV1_INVALID_PICTURE       = 0xFF
 } EbAv1PictureType;
 
-/** The EbBool type is intended to be used to represent a true or a false
+/** The Bool type is intended to be used to represent a true or a false
 value when passing parameters to and from the eBrisk API.  The
-EbBool is a 32 bit quantity and is aligned on a 32 bit word boundary.
+Bool is a 32 bit quantity and is aligned on a 32 bit word boundary.
 */
-
-#define EbBool uint8_t
-#define EB_FALSE 0
-#define EB_TRUE 1
+typedef uint8_t Bool;
+#define FALSE 0
+#define TRUE 1
 
 typedef struct EbBufferHeaderType {
     // EbBufferHeaderType size
@@ -111,11 +104,11 @@ typedef struct EbBufferHeaderType {
     int64_t  pts;
 
     // pic info
-    uint32_t qp;
-    uint32_t pic_type;
-    uint64_t luma_sse;
-    uint64_t cr_sse;
-    uint64_t cb_sse;
+    uint32_t         qp;
+    EbAv1PictureType pic_type;
+    uint64_t         luma_sse;
+    uint64_t         cr_sse;
+    uint64_t         cb_sse;
     // pic flags
     uint32_t flags;
 
@@ -160,12 +153,6 @@ typedef enum EbAv1SeqProfile {
     PROFESSIONAL_PROFILE = 2
 } EbAv1SeqProfile;
 
-typedef enum AomBitDepth {
-    AOM_BITS_8  = 8, /**<  8 bits */
-    AOM_BITS_10 = 10, /**< 10 bits */
-    AOM_BITS_12 = 12, /**< 12 bits */
-} AomBitDepth;
-
 // For 8-bit and 10-bit packed inputs and outputs, the luma, cb, and cr fields should be used
 //   for the three input picture planes.  However, for 10-bit unpacked planes the
 //   lumaExt, cbExt, and crExt fields should be used hold the extra 2-bits of
@@ -196,11 +183,6 @@ typedef struct EbSvtIOFormat //former Eb
     EbBitDepth    bit_depth;
 } EbSvtIOFormat;
 
-typedef struct BitstreamLevel {
-    uint8_t major;
-    uint8_t minor;
-} BitstreamLevel;
-
 typedef struct EbOperatingParametersInfo {
     /*!<Specifies the time interval between the arrival of the first bit in the
      * smoothing buffer and the subsequent removal of the data that belongs to
@@ -239,11 +221,11 @@ typedef struct EbAV1OperatingPoint {
 
 typedef struct EbColorConfig {
     /*!< bit depth */
-    uint32_t bit_depth;
+    EbBitDepth bit_depth;
 
     /*!< 1: Indicates that the video does not contain U and V color planes.
      *   0: Indicates that the video contains Y, U, and V color planes. */
-    EbBool mono_chrome;
+    Bool mono_chrome;
 
     /*!< Specify the chroma subsampling format */
     uint8_t subsampling_x;
@@ -255,7 +237,7 @@ typedef struct EbColorConfig {
             matrix_coefficients are present. color_description_present_flag
      *   0: Specifies that color_primaries, transfer_characteristics and
             matrix_coefficients are not present */
-    EbBool color_description_present_flag;
+    Bool color_description_present_flag;
 
     /*!< An integer that is defined by the "Color primaries" section of
      * ISO/IEC 23091-4/ITU-T H.273 */
@@ -279,13 +261,13 @@ typedef struct EbColorConfig {
     /*!< 1: Indicates that the U and V planes may have separate delta quantizer
      *   0: Indicates that the U and V planes will share the same delta
             quantizer value */
-    EbBool separate_uv_delta_q;
+    Bool separate_uv_delta_q;
 
 } EbColorConfig;
 
 typedef struct EbTimingInfo {
     /*!< Timing info present flag */
-    EbBool timing_info_present;
+    Bool timing_info_present;
 
     /*!< Number of time units of a clock operating at the frequency time_scale
      * Hz that corresponds to one increment of a clock tick counter*/
@@ -306,28 +288,50 @@ typedef struct EbTimingInfo {
 
 } EbTimingInfo;
 
+// structure to be allocated at the sample application and passed to the library
+// on a per picture basis through the p_app_private field in the EbBufferHeaderType structure
+// this structure and the data inside would be casted, validated, then copied at the
+// svt_av1_enc_send_picture API call
+typedef enum {
+    PRIVATE_DATA, // data to be passed through and written to the bitstream
+    //FILM_GRAIN_PARAM,        // passing film grain parameters per picture
+    REF_FRAME_SCALING_EVENT, // reference frame scaling data per picture
+    PRIVATE_DATA_TYPES // end of private data types
+} PrivDataType;
+typedef struct EbPrivDataNode {
+    PrivDataType           node_type;
+    void                  *data; // pointer to data structure e.g. EbRefFrameScale or AomFilmGrain
+    uint32_t               size; // size of data being sent for the library to know how much to copy
+    struct EbPrivDataNode *next; // pointer to the next node, NULL if done.
+} EbPrivDataNode;
+typedef struct EbRefFrameScale {
+    uint8_t  scale_mode; // scaling mode, support for RESIZE_NONE, RESIZE_FIXED and RESIZE_RANDOM
+    uint32_t scale_denom; // scaling denominator for non-key frame, from 8~16
+    uint32_t scale_kf_denom; // scaling denominator for key frame, from 8~16
+} EbRefFrameScale;
+
 /**
 CPU FLAGS
 */
-typedef uint64_t CPU_FLAGS;
-#define CPU_FLAGS_MMX (1 << 0)
-#define CPU_FLAGS_SSE (1 << 1)
-#define CPU_FLAGS_SSE2 (1 << 2)
-#define CPU_FLAGS_SSE3 (1 << 3)
-#define CPU_FLAGS_SSSE3 (1 << 4)
-#define CPU_FLAGS_SSE4_1 (1 << 5)
-#define CPU_FLAGS_SSE4_2 (1 << 6)
-#define CPU_FLAGS_AVX (1 << 7)
-#define CPU_FLAGS_AVX2 (1 << 8)
-#define CPU_FLAGS_AVX512F (1 << 9)
-#define CPU_FLAGS_AVX512CD (1 << 10)
-#define CPU_FLAGS_AVX512DQ (1 << 11)
-#define CPU_FLAGS_AVX512ER (1 << 12)
-#define CPU_FLAGS_AVX512PF (1 << 13)
-#define CPU_FLAGS_AVX512BW (1 << 14)
-#define CPU_FLAGS_AVX512VL (1 << 15)
-#define CPU_FLAGS_ALL ((CPU_FLAGS_AVX512VL << 1) - 1)
-#define CPU_FLAGS_INVALID (1ULL << (sizeof(CPU_FLAGS) * 8ULL - 1ULL))
+typedef uint64_t EbCpuFlags;
+#define EB_CPU_FLAGS_MMX (1 << 0)
+#define EB_CPU_FLAGS_SSE (1 << 1)
+#define EB_CPU_FLAGS_SSE2 (1 << 2)
+#define EB_CPU_FLAGS_SSE3 (1 << 3)
+#define EB_CPU_FLAGS_SSSE3 (1 << 4)
+#define EB_CPU_FLAGS_SSE4_1 (1 << 5)
+#define EB_CPU_FLAGS_SSE4_2 (1 << 6)
+#define EB_CPU_FLAGS_AVX (1 << 7)
+#define EB_CPU_FLAGS_AVX2 (1 << 8)
+#define EB_CPU_FLAGS_AVX512F (1 << 9)
+#define EB_CPU_FLAGS_AVX512CD (1 << 10)
+#define EB_CPU_FLAGS_AVX512DQ (1 << 11)
+#define EB_CPU_FLAGS_AVX512ER (1 << 12)
+#define EB_CPU_FLAGS_AVX512PF (1 << 13)
+#define EB_CPU_FLAGS_AVX512BW (1 << 14)
+#define EB_CPU_FLAGS_AVX512VL (1 << 15)
+#define EB_CPU_FLAGS_ALL ((EB_CPU_FLAGS_AVX512VL << 1) - 1)
+#define EB_CPU_FLAGS_INVALID (1ULL << (sizeof(EbCpuFlags) * 8ULL - 1ULL))
 
 #ifdef __cplusplus
 }
diff -pruN 0.9.1+dfsg-1/Source/API/EbSvtAv1Metadata.h 1.2.0+dfsg-2/Source/API/EbSvtAv1Metadata.h
--- 0.9.1+dfsg-1/Source/API/EbSvtAv1Metadata.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/API/EbSvtAv1Metadata.h	2022-08-01 19:12:00.000000000 +0000
@@ -30,6 +30,7 @@ typedef enum EbAv1MetadataType {
     EB_AV1_METADATA_TYPE_SCALABILITY    = 3,
     EB_AV1_METADATA_TYPE_ITUT_T35       = 4,
     EB_AV1_METADATA_TYPE_TIMECODE       = 5,
+    EB_AV1_METADATA_TYPE_FRAME_SIZE     = 6,
 } EbAv1MetadataType;
 
 /*!\brief Metadata payload. */
@@ -45,6 +46,17 @@ typedef struct SvtMetadataArray {
     SvtMetadataT **metadata_array; /* Array of metadata structs */
 } SvtMetadataArrayT;
 
+/*!\brief Frame size struct in metadata. */
+typedef struct SvtMetadataFrameSize {
+    uint16_t width; /**< pixel width of frame */
+    uint16_t height; /**< pixel height of frame */
+    uint16_t disp_width; /**< display pixel width of frame */
+    uint16_t disp_height; /**< display pixel height of frame */
+    uint16_t stride; /**< pixel stride of frame */
+    uint16_t subsampling_x; /**< subsampling of Cb/Cr in width */
+    uint16_t subsampling_y; /**< subsampling of Cb/Cr in height */
+} SvtMetadataFrameSizeT;
+
 /*!\brief Allocate memory for SvtMetadataT struct.
  *
  * Allocates storage for the metadata payload, sets its type and copies the
diff -pruN 0.9.1+dfsg-1/Source/App/DecApp/EbDecParamParser.c 1.2.0+dfsg-2/Source/App/DecApp/EbDecParamParser.c
--- 0.9.1+dfsg-1/Source/App/DecApp/EbDecParamParser.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/DecApp/EbDecParamParser.c	2022-08-01 19:12:00.000000000 +0000
@@ -38,7 +38,7 @@ static void set_bit_depth(const char *va
     cfg->max_bit_depth = strtoul(value, NULL, 0);
 };
 static void set_decoder_16bit_pipeline(const char *value, EbSvtAv1DecConfiguration *cfg) {
-    cfg->is_16bit_pipeline = (EbBool)strtoul(value, NULL, 0);
+    cfg->is_16bit_pipeline = (Bool)strtoul(value, NULL, 0);
     if (cfg->is_16bit_pipeline != 1 && cfg->is_16bit_pipeline != 0) {
         fprintf(stderr, "Warning : Invalid value for is_16bit_pipeline, setting value to 0. \n");
         cfg->is_16bit_pipeline = 0;
@@ -102,9 +102,9 @@ static void show_help() {
     H0(" -parallel-frames <arg>    Number of frames to be processed in parallel \n");
     H0(" -md5                      MD5 support flag \n");
     H0(" -fps-frm                  Show fps after each frame decoded\n");
-    H0(" -fps-summary              Show fps summary");
-    H0(" -skip-film-grain          Disable Film Grain");
-    H0(" -16bit-pipeline           Enable 16b pipeline. [1 - enable, 0 - disable]");
+    H0(" -fps-summary              Show fps summary\n");
+    H0(" -skip-film-grain          Disable Film Grain\n");
+    H0(" -16bit-pipeline           Enable 16b pipeline. [1 - enable, 0 - disable]\n");
 
     exit(1);
 }
diff -pruN 0.9.1+dfsg-1/Source/App/DecApp/EbDecParamParser.h 1.2.0+dfsg-2/Source/App/DecApp/EbDecParamParser.h
--- 0.9.1+dfsg-1/Source/App/DecApp/EbDecParamParser.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/DecApp/EbDecParamParser.h	2022-08-01 19:12:00.000000000 +0000
@@ -59,7 +59,7 @@
 typedef struct ConfigEntry {
     const char *token;
     const char *name;
-    EbBool      value_required;
+    Bool        value_required;
     void (*scf)(const char *, EbSvtAv1DecConfiguration *);
 } ConfigEntry;
 
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppConfig.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppConfig.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppConfig.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppConfig.c	2022-08-01 19:12:00.000000000 +0000
@@ -12,6 +12,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 #include <sys/stat.h>
 
 #include "EbSvtAv1Metadata.h"
@@ -26,6 +27,8 @@
 #include <sys/file.h>
 #endif
 
+#include "EbAppOutputivf.h"
+
 #if !defined(_WIN32) || !defined(HAVE_STRNLEN_S)
 #include "third_party/safestringlib/safe_str_lib.h"
 #endif
@@ -64,6 +67,11 @@
 #define KEY_FRAME_QINDEX_OFFSET_TOKEN "--key-frame-qindex-offset"
 #define KEY_FRAME_CHROMA_QINDEX_OFFSET_TOKEN "--key-frame-chroma-qindex-offset"
 #define CHROMA_QINDEX_OFFSETS_TOKEN "--chroma-qindex-offsets"
+#define LUMA_Y_DC_QINDEX_OFFSET_TOKEN "--luma-y-dc-qindex-offset"
+#define CHROMA_U_DC_QINDEX_OFFSET_TOKEN "--chroma-u-dc-qindex-offset"
+#define CHROMA_U_AC_QINDEX_OFFSET_TOKEN "--chroma-u-ac-qindex-offset"
+#define CHROMA_V_DC_QINDEX_OFFSET_TOKEN "--chroma-v-dc-qindex-offset"
+#define CHROMA_V_AC_QINDEX_OFFSET_TOKEN "--chroma-v-ac-qindex-offset"
 
 #define FRAME_RATE_TOKEN "--fps"
 #define FRAME_RATE_NUMERATOR_TOKEN "--fps-num"
@@ -77,6 +85,7 @@
 #define TIER_TOKEN "--tier"
 #define LEVEL_TOKEN "--level"
 #define FILM_GRAIN_TOKEN "--film-grain"
+#define FILM_GRAIN_DENOISE_APPLY_TOKEN "--film-grain-denoise"
 #define INTRA_REFRESH_TYPE_TOKEN "--irefresh-type" // no Eval
 #define CDEF_ENABLE_TOKEN "--enable-cdef"
 #define SCREEN_CONTENT_TOKEN "--scm"
@@ -92,11 +101,15 @@
 #define SUPERRES_QTHRES "--superres-qthres"
 #define SUPERRES_KF_QTHRES "--superres-kf-qthres"
 // --- end: SUPER-RESOLUTION SUPPORT
+// --- start: REFERENCE SCALING SUPPORT
+#define RESIZE_MODE_INPUT "--resize-mode"
+#define RESIZE_DENOM "--resize-denom"
+#define RESIZE_KF_DENOM "--resize-kf-denom"
+// --- end: REFERENCE SCALING SUPPORT
 #define RATE_CONTROL_ENABLE_TOKEN "--rc"
 #define TARGET_BIT_RATE_TOKEN "--tbr"
 #define MAX_BIT_RATE_TOKEN "--mbr"
 #define MAX_QP_TOKEN "--max-qp"
-#define VBV_BUFSIZE_TOKEN "--vbv-bufsize"
 #define MIN_QP_TOKEN "--min-qp"
 #define VBR_BIAS_PCT_TOKEN "--bias-pct"
 #define VBR_MIN_SECTION_PCT_TOKEN "--minsection-pct"
@@ -158,56 +171,49 @@
 #define QP_LONG_TOKEN "--qp"
 #define CRF_LONG_TOKEN "--crf"
 #define LOOP_FILTER_ENABLE "--enable-dlf"
+#define FORCED_MAX_FRAME_WIDTH_TOKEN "--forced-max-frame-width"
+#define FORCED_MAX_FRAME_HEIGHT_TOKEN "--forced-max-frame-height"
 
 #define COLOR_PRIMARIES_NEW_TOKEN "--color-primaries"
 #define TRANSFER_CHARACTERISTICS_NEW_TOKEN "--transfer-characteristics"
 #define MATRIX_COEFFICIENTS_NEW_TOKEN "--matrix-coefficients"
 #define COLOR_RANGE_NEW_TOKEN "--color-range"
+#define CHROMA_SAMPLE_POSITION_TOKEN "--chroma-sample-position"
 #define MASTERING_DISPLAY_TOKEN "--mastering-display"
 #define CONTENT_LIGHT_LEVEL_TOKEN "--content-light"
 
-#define ENC_MRS -2 // Highest quality research mode (slowest)
-#define ENC_MR -1 //Research mode with higher quality than M0
-#define ENC_M0 0
-#define ENC_M1 1
-#define ENC_M2 2
-#define ENC_M3 3
-#define ENC_M4 4
-#define ENC_M5 5
-#define ENC_M6 6
-#define ENC_M7 7
-#define ENC_M8 8
-#define ENC_M9 9
-#define ENC_M10 10
-#define ENC_M11 11
-#define ENC_M12 12
-#define ENC_M13 13
+#define SFRAME_DIST_TOKEN "--sframe-dist"
+#define SFRAME_MODE_TOKEN "--sframe-mode"
+
+#define ENABLE_QM_TOKEN "--enable-qm"
+#define MIN_QM_LEVEL_TOKEN "--qm-min"
+#define MAX_QM_LEVEL_TOKEN "--qm-max"
 #ifdef _WIN32
 static HANDLE get_file_handle(FILE *fp) { return (HANDLE)_get_osfhandle(_fileno(fp)); }
 #endif
 
-static EbBool fopen_and_lock(FILE **file, const char *name, EbBool write) {
+static Bool fopen_and_lock(FILE **file, const char *name, Bool write) {
     if (!file || !name)
-        return EB_FALSE;
+        return FALSE;
 
     const char *mode = write ? "wb" : "rb";
     FOPEN(*file, name, mode);
     if (!*file)
-        return EB_FALSE;
+        return FALSE;
 
 #ifdef _WIN32
     HANDLE handle = get_file_handle(*file);
     if (handle == INVALID_HANDLE_VALUE)
-        return EB_FALSE;
+        return FALSE;
     if (LockFile(handle, 0, 0, MAXDWORD, MAXDWORD))
-        return EB_TRUE;
+        return TRUE;
 #else
     int fd = fileno(*file);
     if (flock(fd, LOCK_EX | LOCK_NB) == 0)
-        return EB_TRUE;
+        return TRUE;
 #endif
     fprintf(stderr, "ERROR: locking %s failed, is it used by other encoder?\n", name);
-    return EB_FALSE;
+    return FALSE;
 }
 
 /**********************************
@@ -224,7 +230,7 @@ static void set_cfg_input_file(const cha
 
     if (!strcmp(filename, "stdin")) {
         cfg->input_file         = stdin;
-        cfg->input_file_is_fifo = EB_TRUE;
+        cfg->input_file_is_fifo = TRUE;
     } else
         FOPEN(cfg->input_file, filename, "rb");
 
@@ -256,7 +262,7 @@ static void set_pred_struct_file(const c
 #else
     cfg->input_pred_struct_filename = _strdup(value);
 #endif
-    cfg->config.enable_manual_pred_struct = EB_TRUE;
+    cfg->config.enable_manual_pred_struct = TRUE;
 }
 
 static void set_cfg_stream_file(const char *value, EbConfig *cfg) {
@@ -320,6 +326,12 @@ static void set_cfg_source_width(const c
 static void set_cfg_source_height(const char *value, EbConfig *cfg) {
     cfg->config.source_height = strtoul(value, NULL, 0);
 };
+static void set_cfg_forced_max_frame_width(const char *value, EbConfig *cfg) {
+    cfg->config.forced_max_frame_width = strtoul(value, NULL, 0);
+}
+static void set_cfg_forced_max_frame_height(const char *value, EbConfig *cfg) {
+    cfg->config.forced_max_frame_height = strtoul(value, NULL, 0);
+}
 static void set_cfg_frames_to_be_encoded(const char *value, EbConfig *cfg) {
     cfg->frames_to_be_encoded = strtol(value, NULL, 0);
 };
@@ -340,15 +352,8 @@ static void set_progress(const char *val
     }
 }
 static void set_frame_rate(const char *value, EbConfig *cfg) {
-    cfg->config.frame_rate = strtoul(value, NULL, 0);
-    if (cfg->config.frame_rate <= 1000) {
-        cfg->config.frame_rate_numerator   = cfg->config.frame_rate * 1000;
-        cfg->config.frame_rate_denominator = 1000;
-        cfg->config.frame_rate <<= 16;
-    } else {
-        cfg->config.frame_rate_numerator   = (cfg->config.frame_rate >> 16) * 1000;
-        cfg->config.frame_rate_denominator = 1000;
-    }
+    cfg->config.frame_rate_numerator   = strtoul(value, NULL, 0);
+    cfg->config.frame_rate_denominator = 1;
 }
 
 static void set_frame_rate_numerator(const char *value, EbConfig *cfg) {
@@ -369,7 +374,15 @@ static void set_compressed_ten_bit_forma
 static void set_enc_mode(const char *value, EbConfig *cfg) {
     cfg->config.enc_mode = (uint8_t)strtoul(value, NULL, 0);
 };
-
+static void set_enable_qm(const char *value, EbConfig *cfg) {
+    cfg->config.enable_qm = !!strtoul(value, NULL, 0);
+};
+static void set_min_qm_level(const char *value, EbConfig *cfg) {
+    cfg->config.min_qm_level = (uint8_t)strtoul(value, NULL, 0);
+};
+static void set_max_qm_level(const char *value, EbConfig *cfg) {
+    cfg->config.max_qm_level = (uint8_t)strtoul(value, NULL, 0);
+};
 /**
  * @brief split colon separated string into key=value pairs
  *
@@ -433,10 +446,8 @@ static void parse_svtav1_params(const ch
 static void set_cfg_intra_period(const char *value, EbConfig *cfg) {
     cfg->config.intra_period_length = strtol(value, NULL, 0);
 };
-// --keyint 0 == --keyint -1
 static void set_keyint(const char *value, EbConfig *cfg) {
-    const long keyint               = strtol(value, NULL, 0);
-    cfg->config.intra_period_length = keyint < 0 ? keyint : keyint - 1;
+    svt_av1_enc_parse_parameter(&cfg->config, "keyint", value);
 }
 static void set_cfg_intra_refresh_type(const char *value, EbConfig *cfg) {
     switch (strtol(value, NULL, 0)) {
@@ -457,16 +468,16 @@ static void set_cfg_qp(const char *value
     cfg->config.qp = strtoul(value, NULL, 0);
 };
 static void set_cfg_crf(const char *value, EbConfig *cfg) {
-    cfg->config.qp                = strtoul(value, NULL, 0);
-    cfg->config.rate_control_mode = 0;
-    cfg->config.enable_tpl_la     = 1;
+    cfg->config.qp                           = strtoul(value, NULL, 0);
+    cfg->config.rate_control_mode            = 0;
+    cfg->config.enable_adaptive_quantization = 2;
 }
 static void set_cfg_use_qp_file(const char *value, EbConfig *cfg) {
-    cfg->config.use_qp_file = (EbBool)strtol(value, NULL, 0);
+    cfg->config.use_qp_file = (Bool)strtol(value, NULL, 0);
 };
 
 static void set_cfg_use_fixed_qindex_offsets(const char *value, EbConfig *cfg) {
-    cfg->config.use_fixed_qindex_offsets = (EbBool)strtol(value, NULL, 0);
+    cfg->config.use_fixed_qindex_offsets = (Bool)strtol(value, NULL, 0);
 }
 
 static void set_cfg_key_frame_qindex_offset(const char *value, EbConfig *cfg) {
@@ -477,11 +488,32 @@ static void set_cfg_key_frame_chroma_qin
     cfg->config.key_frame_chroma_qindex_offset = (int32_t)strtol(value, NULL, 0);
 }
 
+static void set_cfg_luma_y_dc_qindex_offset(const char *value, EbConfig *cfg) {
+    cfg->config.luma_y_dc_qindex_offset = (int32_t)strtol(value, NULL, 0);
+}
+
+static void set_cfg_chroma_u_dc_qindex_offset(const char *value, EbConfig *cfg) {
+    cfg->config.chroma_u_dc_qindex_offset = (int32_t)strtol(value, NULL, 0);
+}
+
+static void set_cfg_chroma_u_ac_qindex_offset(const char *value, EbConfig *cfg) {
+    cfg->config.chroma_u_ac_qindex_offset = (int32_t)strtol(value, NULL, 0);
+}
+
+static void set_cfg_chroma_v_dc_qindex_offset(const char *value, EbConfig *cfg) {
+    cfg->config.chroma_v_dc_qindex_offset = (int32_t)strtol(value, NULL, 0);
+}
+
+static void set_cfg_chroma_v_ac_qindex_offset(const char *value, EbConfig *cfg) {
+    cfg->config.chroma_v_ac_qindex_offset = (int32_t)strtol(value, NULL, 0);
+}
+
 //assume the input list of values are in the format of "[v1,v2,v3,...]"
 int arg_parse_list(const char *value, int *list, int n) {
     const char *ptr = value;
     char       *endptr;
     int         i = 0;
+    memset(list, 0, n);
     while (ptr[0] != '\0') {
         if (ptr[0] == '[' || ptr[0] == ']') {
             ptr++;
@@ -505,26 +537,19 @@ int arg_parse_list(const char *value, in
 }
 
 static void set_cfg_qindex_offsets(const char *value, EbConfig *cfg) {
-    if (cfg->config.hierarchical_levels == 0) {
-        fprintf(stderr,
-                "qindex offsets parameter should be specificied after hierachical_levels\n");
-        exit(1);
-    }
-    arg_parse_list(value, cfg->config.qindex_offsets, cfg->config.hierarchical_levels + 1);
+    arg_parse_list(value, cfg->config.qindex_offsets, EB_MAX_TEMPORAL_LAYERS);
 }
 
 static void set_cfg_chroma_qindex_offsets(const char *value, EbConfig *cfg) {
-    if (cfg->config.hierarchical_levels == 0) {
-        fprintf(stderr,
-                "chroma qindex offsets parameter should be specificied after hierachical_levels\n");
-        exit(1);
-    }
-    arg_parse_list(value, cfg->config.chroma_qindex_offsets, cfg->config.hierarchical_levels + 1);
+    arg_parse_list(value, cfg->config.chroma_qindex_offsets, EB_MAX_TEMPORAL_LAYERS);
 }
 
 static void set_cfg_film_grain(const char *value, EbConfig *cfg) {
     cfg->config.film_grain_denoise_strength = strtol(value, NULL, 0);
 }; //not bool to enable possible algorithm extension in the future
+static void set_cfg_film_grain_denoise_apply(const char *value, EbConfig *cfg) {
+    cfg->config.film_grain_denoise_apply = (Bool)strtol(value, NULL, 0);
+};
 static void set_enable_dlf_flag(const char *value, EbConfig *cfg) {
     cfg->config.enable_dlf_flag = !!strtoul(value, NULL, 0);
 }
@@ -533,13 +558,14 @@ static void set_cdef_enable(const char *
     cfg->config.cdef_level = -!!strtoul(value, NULL, 0);
 };
 static void set_enable_restoration_flag(const char *value, EbConfig *cfg) {
-    cfg->config.enable_restoration_filtering = -!!strtoul(value, NULL, 0);
+    // Set loop restoration to either DEFAULT(1) or 0
+    cfg->config.enable_restoration_filtering = !!strtoul(value, NULL, 0);
 };
 static void set_enable_mfmv_flag(const char *value, EbConfig *cfg) {
     cfg->config.enable_mfmv = strtol(value, NULL, 0);
 };
 static void set_fast_decode_flag(const char *value, EbConfig *cfg) {
-    cfg->config.fast_decode = (uint8_t)strtol(value, NULL, 0);
+    cfg->config.fast_decode = (Bool)strtol(value, NULL, 0);
 };
 static void set_tile_row(const char *value, EbConfig *cfg) {
     cfg->config.tile_rows = strtoul(value, NULL, 0);
@@ -557,13 +583,12 @@ static void set_rate_control_mode(const
     cfg->config.rate_control_mode = strtoul(value, NULL, 0);
 };
 static void set_target_bit_rate(const char *value, EbConfig *cfg) {
-    cfg->config.target_bit_rate = 1000 * strtoul(value, NULL, 0);
+    if (svt_av1_enc_parse_parameter(&cfg->config, "tbr", value) != EB_ErrorNone)
+        fprintf(stderr, "Invalid value for target bit rate\n");
 };
 static void set_max_bit_rate(const char *value, EbConfig *cfg) {
-    cfg->config.max_bit_rate = 1000 * strtoul(value, NULL, 0);
-};
-static void set_vbv_buf_size(const char *value, EbConfig *cfg) {
-    cfg->config.vbv_bufsize = 1000 * strtoul(value, NULL, 0);
+    if (svt_av1_enc_parse_parameter(&cfg->config, "mbr", value) != EB_ErrorNone)
+        fprintf(stderr, "Invalid value for max bit rate\n");
 };
 static void set_max_qp_allowed(const char *value, EbConfig *cfg) {
     cfg->config.max_qp_allowed = strtoul(value, NULL, 0);
@@ -600,7 +625,7 @@ static void set_recode_loop(const char *
     cfg->config.recode_loop = strtoul(value, NULL, 0);
 };
 static void set_adaptive_quantization(const char *value, EbConfig *cfg) {
-    cfg->config.enable_adaptive_quantization = (EbBool)strtol(value, NULL, 0);
+    cfg->config.enable_adaptive_quantization = (Bool)strtol(value, NULL, 0);
 };
 static void set_screen_content_mode(const char *value, EbConfig *cfg) {
     cfg->config.screen_content_mode = strtoul(value, NULL, 0);
@@ -611,11 +636,11 @@ static void set_enable_tf(const char *va
 };
 
 static void set_enable_overlays(const char *value, EbConfig *cfg) {
-    cfg->config.enable_overlays = (EbBool)strtoul(value, NULL, 0);
+    cfg->config.enable_overlays = (Bool)strtoul(value, NULL, 0);
 };
 
 static void set_tune(const char *value, EbConfig *cfg) {
-    cfg->config.tune = (EbBool)strtoul(value, NULL, 0);
+    cfg->config.tune = (Bool)strtoul(value, NULL, 0);
 };
 // --- end: ALTREF_FILTERING_SUPPORT
 // --- start: SUPER-RESOLUTION SUPPORT
@@ -635,6 +660,17 @@ static void set_superres_kf_qthres(const
     cfg->config.superres_kf_qthres = (uint8_t)strtoul(value, NULL, 0);
 };
 // --- end: SUPER-RESOLUTION SUPPORT
+// --- start: REFERENCE SCALING SUPPORT
+static void set_resize_mode(const char *value, EbConfig *cfg) {
+    cfg->config.resize_mode = (RESIZE_MODE)strtoul(value, NULL, 0);
+};
+static void set_resize_denom(const char *value, EbConfig *cfg) {
+    cfg->config.resize_denom = (uint8_t)strtoul(value, NULL, 0);
+};
+static void set_resize_kf_denom(const char *value, EbConfig *cfg) {
+    cfg->config.resize_kf_denom = (uint8_t)strtoul(value, NULL, 0);
+};
+// --- end: REFERENCE SCALING SUPPORT
 static void set_high_dynamic_range_input(const char *value, EbConfig *cfg) {
     cfg->config.high_dynamic_range_input = !!strtol(value, NULL, 0);
 };
@@ -664,32 +700,32 @@ static void set_injector_frame_rate(cons
 static void set_asm_type(const char *value, EbConfig *cfg) {
     const struct {
         const char *name;
-        CPU_FLAGS   flags;
+        EbCpuFlags  flags;
     } param_maps[] = {
         {"c", 0},
         {"0", 0},
-        {"mmx", (CPU_FLAGS_MMX << 1) - 1},
-        {"1", (CPU_FLAGS_MMX << 1) - 1},
-        {"sse", (CPU_FLAGS_SSE << 1) - 1},
-        {"2", (CPU_FLAGS_SSE << 1) - 1},
-        {"sse2", (CPU_FLAGS_SSE2 << 1) - 1},
-        {"3", (CPU_FLAGS_SSE2 << 1) - 1},
-        {"sse3", (CPU_FLAGS_SSE3 << 1) - 1},
-        {"4", (CPU_FLAGS_SSE3 << 1) - 1},
-        {"ssse3", (CPU_FLAGS_SSSE3 << 1) - 1},
-        {"5", (CPU_FLAGS_SSSE3 << 1) - 1},
-        {"sse4_1", (CPU_FLAGS_SSE4_1 << 1) - 1},
-        {"6", (CPU_FLAGS_SSE4_1 << 1) - 1},
-        {"sse4_2", (CPU_FLAGS_SSE4_2 << 1) - 1},
-        {"7", (CPU_FLAGS_SSE4_2 << 1) - 1},
-        {"avx", (CPU_FLAGS_AVX << 1) - 1},
-        {"8", (CPU_FLAGS_AVX << 1) - 1},
-        {"avx2", (CPU_FLAGS_AVX2 << 1) - 1},
-        {"9", (CPU_FLAGS_AVX2 << 1) - 1},
-        {"avx512", (CPU_FLAGS_AVX512VL << 1) - 1},
-        {"10", (CPU_FLAGS_AVX512VL << 1) - 1},
-        {"max", CPU_FLAGS_ALL},
-        {"11", CPU_FLAGS_ALL},
+        {"mmx", (EB_CPU_FLAGS_MMX << 1) - 1},
+        {"1", (EB_CPU_FLAGS_MMX << 1) - 1},
+        {"sse", (EB_CPU_FLAGS_SSE << 1) - 1},
+        {"2", (EB_CPU_FLAGS_SSE << 1) - 1},
+        {"sse2", (EB_CPU_FLAGS_SSE2 << 1) - 1},
+        {"3", (EB_CPU_FLAGS_SSE2 << 1) - 1},
+        {"sse3", (EB_CPU_FLAGS_SSE3 << 1) - 1},
+        {"4", (EB_CPU_FLAGS_SSE3 << 1) - 1},
+        {"ssse3", (EB_CPU_FLAGS_SSSE3 << 1) - 1},
+        {"5", (EB_CPU_FLAGS_SSSE3 << 1) - 1},
+        {"sse4_1", (EB_CPU_FLAGS_SSE4_1 << 1) - 1},
+        {"6", (EB_CPU_FLAGS_SSE4_1 << 1) - 1},
+        {"sse4_2", (EB_CPU_FLAGS_SSE4_2 << 1) - 1},
+        {"7", (EB_CPU_FLAGS_SSE4_2 << 1) - 1},
+        {"avx", (EB_CPU_FLAGS_AVX << 1) - 1},
+        {"8", (EB_CPU_FLAGS_AVX << 1) - 1},
+        {"avx2", (EB_CPU_FLAGS_AVX2 << 1) - 1},
+        {"9", (EB_CPU_FLAGS_AVX2 << 1) - 1},
+        {"avx512", (EB_CPU_FLAGS_AVX512VL << 1) - 1},
+        {"10", (EB_CPU_FLAGS_AVX512VL << 1) - 1},
+        {"max", EB_CPU_FLAGS_ALL},
+        {"11", EB_CPU_FLAGS_ALL},
     };
     const uint32_t para_map_size = sizeof(param_maps) / sizeof(param_maps[0]);
     uint32_t       i;
@@ -701,7 +737,7 @@ static void set_asm_type(const char *val
         }
     }
 
-    cfg->config.use_cpu_flags = CPU_FLAGS_INVALID;
+    cfg->config.use_cpu_flags = EB_CPU_FLAGS_INVALID;
 };
 static void set_logical_processors(const char *value, EbConfig *cfg) {
     cfg->config.logical_processors = (uint32_t)strtoul(value, NULL, 0);
@@ -716,16 +752,19 @@ static void set_restricted_motion_vector
     cfg->config.restricted_motion_vector = !!strtol(value, NULL, 0);
 };
 static void set_cfg_color_primaries(const char *value, EbConfig *cfg) {
-    cfg->config.color_primaries = (uint8_t)strtoul(value, NULL, 0);
+    svt_av1_enc_parse_parameter(&cfg->config, "color-primaries", value);
 }
 static void set_cfg_transfer_characteristics(const char *value, EbConfig *cfg) {
-    cfg->config.transfer_characteristics = (uint8_t)strtoul(value, NULL, 0);
+    svt_av1_enc_parse_parameter(&cfg->config, "transfer-characteristics", value);
 }
 static void set_cfg_matrix_coefficients(const char *value, EbConfig *cfg) {
-    cfg->config.matrix_coefficients = (uint8_t)strtoul(value, NULL, 0);
+    svt_av1_enc_parse_parameter(&cfg->config, "matrix-coefficients", value);
 }
 static void set_cfg_color_range(const char *value, EbConfig *cfg) {
-    cfg->config.color_range = (uint8_t)strtoul(value, NULL, 0);
+    svt_av1_enc_parse_parameter(&cfg->config, "color-range", value);
+}
+static void set_cfg_chroma_sample_position(const char *value, EbConfig *cfg) {
+    svt_av1_enc_parse_parameter(&cfg->config, "chroma-sample-position", value);
 }
 static void set_cfg_mastering_display(const char *value, EbConfig *cfg) {
     if (!svt_aom_parse_mastering_display(&cfg->config.mastering_display, value))
@@ -735,6 +774,12 @@ static void set_cfg_content_light(const
     if (!svt_aom_parse_content_light_level(&cfg->config.content_light_level, value))
         fprintf(stderr, "Failed to parse content light level info properly\n");
 }
+static void set_cfg_sframe_dist(const char *value, EbConfig *cfg) {
+    cfg->config.sframe_dist = (int32_t)strtol(value, NULL, 0);
+}
+static void set_cfg_sframe_mode(const char *value, EbConfig *cfg) {
+    cfg->config.sframe_mode = (EbSFrameMode)strtoul(value, NULL, 0);
+}
 
 enum CfgType {
     SINGLE_INPUT, // Configuration parameters that have only 1 value input
@@ -812,7 +857,7 @@ ConfigEntry config_entry_options[] = {
     {SINGLE_INPUT,
      PRESET_TOKEN,
      "Encoder preset, presets < 0 are for debugging. Higher presets means faster encodes, but with "
-     "a quality tradeoff, default is 12 [-2-13]",
+     "a quality tradeoff, default is 10 [-2-13]",
      set_enc_mode},
 
     {SINGLE_INPUT,
@@ -843,6 +888,16 @@ ConfigEntry config_entry_global_options[
      set_cfg_source_height},
 
     {SINGLE_INPUT,
+     FORCED_MAX_FRAME_WIDTH_TOKEN,
+     "Maximum frame width value to force, default is 0 [64-16384]",
+     set_cfg_forced_max_frame_width},
+
+    {SINGLE_INPUT,
+     FORCED_MAX_FRAME_HEIGHT_TOKEN,
+     "Maximum frame height value to force, default is 0 [64-8704]",
+     set_cfg_forced_max_frame_height},
+
+    {SINGLE_INPUT,
      NUMBER_OF_PICTURES_TOKEN,
      "Number of frames to encode. If `n` is larger than the input, the encoder will loop back and "
      "continue encoding, default is 0 [0: until EOF, 1-`(2^63)-1`]",
@@ -878,11 +933,11 @@ ConfigEntry config_entry_global_options[
      set_high_dynamic_range_input},
     {SINGLE_INPUT,
      FRAME_RATE_TOKEN,
-     "Input video frame rate, integer values only, inferred if y4m, default is 25 [1-240]",
+     "Input video frame rate, integer values only, inferred if y4m, default is 60 [1-240]",
      set_frame_rate},
     {SINGLE_INPUT,
      FRAME_RATE_NUMERATOR_TOKEN,
-     "Input video frame rate numerator, default is 25000 [0-2^32-1]",
+     "Input video frame rate numerator, default is 60000 [0-2^32-1]",
      set_frame_rate_numerator},
     {SINGLE_INPUT,
      FRAME_RATE_DENOMINATOR_TOKEN,
@@ -940,23 +995,23 @@ ConfigEntry config_entry_rc[] = {
     // Rate Control
     {SINGLE_INPUT,
      RATE_CONTROL_ENABLE_TOKEN,
-     "Rate control mode, default is 0 [0: CRF or CQP (if `--enable-tpl-la` is 0), 1: VBR, 2: CBR]",
+     "Rate control mode, default is 0 [0: CRF or CQP (if `--aq-mode` is 0), 1: VBR, 2: CBR]",
      set_rate_control_mode},
-    {SINGLE_INPUT, QP_TOKEN, "Initial QP level value, default is 50 [1-63]", set_cfg_qp},
-    {SINGLE_INPUT, QP_LONG_TOKEN, "Initial QP level value, default is 50 [1-63]", set_cfg_qp},
+    {SINGLE_INPUT, QP_TOKEN, "Initial QP level value, default is 35 [1-63]", set_cfg_qp},
+    {SINGLE_INPUT, QP_LONG_TOKEN, "Initial QP level value, default is 35 [1-63]", set_cfg_qp},
     {SINGLE_INPUT,
      CRF_LONG_TOKEN,
-     "Constant Rate Factor value, setting this value is equal to `--rc 0 --enable-tpl-la 1 --qp "
-     "x`, default is 50 [1-63]",
+     "Constant Rate Factor value, setting this value is equal to `--rc 0 --aq-mode 2 --qp "
+     "x`, default is 35 [1-63]",
      set_cfg_crf},
 
     {SINGLE_INPUT,
      TARGET_BIT_RATE_TOKEN,
-     "Target Bitrate (kbps), only applicable for VBR and CBR encoding, default is 7000 [1-4294967]",
+     "Target Bitrate (kbps), only applicable for VBR and CBR encoding, default is 7000 [1-100000]",
      set_target_bit_rate},
     {SINGLE_INPUT,
      MAX_BIT_RATE_TOKEN,
-     "Maximum Bitrate (kbps) only applicable for CRF and VBR encoding, default is 0 [1-4294967]",
+     "Maximum Bitrate (kbps) only applicable for CRF encoding, default is 0 [1-100000]",
      set_max_bit_rate},
     {SINGLE_INPUT,
      USE_QP_FILE_TOKEN,
@@ -981,12 +1036,6 @@ ConfigEntry config_entry_rc[] = {
      "Set adaptive QP level, default is 2 [0: off, 1: variance base using AV1 segments, 2: deltaq "
      "pred efficiency]",
      set_adaptive_quantization},
-
-    {SINGLE_INPUT,
-     VBV_BUFSIZE_TOKEN,
-     "VBV buffer size, default is the value of `--tbr` [1-4294967]",
-     set_vbv_buf_size},
-
     {SINGLE_INPUT,
      USE_FIXED_QINDEX_OFFSETS_TOKEN,
      "Overwrite the encoder default hierarchical layer based QP assignment and use fixed Q index "
@@ -1010,7 +1059,26 @@ ConfigEntry config_entry_rc[] = {
      "list of chroma Q index offsets per hierarchical layer, separated by `,` with each offset in "
      "the range of [-256-255], default is `0,0,..,0`",
      set_cfg_chroma_qindex_offsets},
-
+    {SINGLE_INPUT,
+     LUMA_Y_DC_QINDEX_OFFSET_TOKEN,
+     "Luma Y DC Qindex Offset",
+     set_cfg_luma_y_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_U_DC_QINDEX_OFFSET_TOKEN,
+     "Chroma U DC Qindex Offset",
+     set_cfg_chroma_u_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_U_AC_QINDEX_OFFSET_TOKEN,
+     "Chroma U AC Qindex Offset",
+     set_cfg_chroma_u_ac_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_V_DC_QINDEX_OFFSET_TOKEN,
+     "Chroma V DC Qindex Offset",
+     set_cfg_chroma_v_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_V_AC_QINDEX_OFFSET_TOKEN,
+     "Chroma V AC Qindex Offset",
+     set_cfg_chroma_v_ac_qindex_offset},
     {SINGLE_INPUT,
      UNDER_SHOOT_PCT_TOKEN,
      "Allowable datarate undershoot (min) target (percentage), default is 25, but can change based "
@@ -1023,15 +1091,15 @@ ConfigEntry config_entry_rc[] = {
      set_over_shoot_pct},
     {SINGLE_INPUT,
      BUFFER_SIZE_TOKEN,
-     "Client buffer size (ms), only applicable for CBR, default is 6000 [0-`(2^63)-1`]",
+     "Client buffer size (ms), only applicable for CBR, default is 6000 [0-10000]",
      set_buf_sz},
     {SINGLE_INPUT,
      BUFFER_INITIAL_SIZE_TOKEN,
-     "Client initial buffer size (ms), only applicable for CBR, default is 4000 [0-`(2^63)-1`]",
+     "Client initial buffer size (ms), only applicable for CBR, default is 4000 [0-10000]",
      set_buf_initial_sz},
     {SINGLE_INPUT,
      BUFFER_OPTIMAL_SIZE_TOKEN,
-     "Client optimal buffer size (ms), only applicable for CBR, default is 5000 [0-`(2^63)-1`]",
+     "Client optimal buffer size (ms), only applicable for CBR, default is 5000 [0-10000]",
      set_buf_optimal_sz},
     {SINGLE_INPUT,
      RECODE_LOOP_TOKEN,
@@ -1044,13 +1112,24 @@ ConfigEntry config_entry_rc[] = {
      set_vbr_bias_pct},
     {SINGLE_INPUT,
      VBR_MIN_SECTION_PCT_TOKEN,
-     "GOP min bitrate (expressed as a percentage of the target rate), default is 0 [0-`(2^32)-1`]",
+     "GOP min bitrate (expressed as a percentage of the target rate), default is 0 [0-100]",
      set_vbr_min_section_pct},
     {SINGLE_INPUT,
      VBR_MAX_SECTION_PCT_TOKEN,
-     "GOP max bitrate (expressed as a percentage of the target rate), default is 2000 "
-     "[0-`(2^32)-1`]",
+     "GOP max bitrate (expressed as a percentage of the target rate), default is 2000 [0-10000]",
      set_vbr_max_section_pct},
+    {SINGLE_INPUT,
+     ENABLE_QM_TOKEN,
+     "Enable quantisation matrices, default is 0 [0-1]",
+     set_enable_qm},
+    {SINGLE_INPUT,
+     MIN_QM_LEVEL_TOKEN,
+     "Min quant matrix flatness, default is 8 [0-15]",
+     set_min_qm_level},
+    {SINGLE_INPUT,
+     MAX_QM_LEVEL_TOKEN,
+     "Max quant matrix flatness, default is 15 [0-15]",
+     set_max_qm_level},
     // Termination
     {SINGLE_INPUT, NULL, NULL, NULL}};
 
@@ -1076,7 +1155,7 @@ ConfigEntry config_entry_2p[] = {
 ConfigEntry config_entry_intra_refresh[] = {
     {SINGLE_INPUT,
      KEYINT_TOKEN,
-     "GOP size (frames), default is -2 [-2: ~2 seconds, -1: \"infinite\" and only applicable for "
+     "GOP size (frames), default is -2 [-2: ~5 seconds, -1: \"infinite\" and only applicable for "
      "CRF, 0: same as -1]",
      set_keyint},
     {SINGLE_INPUT,
@@ -1100,7 +1179,7 @@ ConfigEntry config_entry_intra_refresh[]
      set_hierarchical_levels},
     {SINGLE_INPUT,
      PRED_STRUCT_TOKEN,
-     "Set prediction structure, default is 2 [0: low delay P-frames, 1: low delay B-frames, 2: "
+     "Set prediction structure, default is 2 [1: low delay frames, 2: "
      "random access]",
      set_cfg_pred_structure},
     // Termination
@@ -1144,7 +1223,7 @@ ConfigEntry config_entry_specific[] = {
      set_enable_mfmv_flag},
     {SINGLE_INPUT,
      FAST_DECODE_TOKEN,
-     "Fast Decoder levels, default is 0 [0-3]",
+     "Fast Decoder levels, default is 0 [0-1]",
      set_fast_decode_flag},
     // --- start: ALTREF_FILTERING_SUPPORT
     {SINGLE_INPUT,
@@ -1180,6 +1259,13 @@ ConfigEntry config_entry_specific[] = {
      "Enable film grain, default is 0 [0: off, 1-50: level of denoising for film grain]",
      set_cfg_film_grain},
 
+    {SINGLE_INPUT,
+     FILM_GRAIN_DENOISE_APPLY_TOKEN,
+     "Apply denoising when film grain is ON, default is 1 [0: no denoising, film grain data is "
+     "still in frame header, "
+     "1: level of denoising is set by the film-grain parameter]",
+     set_cfg_film_grain_denoise_apply},
+
     // --- start: SUPER-RESOLUTION SUPPORT
     {SINGLE_INPUT,
      SUPERRES_MODE_INPUT,
@@ -1206,6 +1292,35 @@ ConfigEntry config_entry_specific[] = {
      "[0-63]",
      set_superres_kf_qthres},
     // --- end: SUPER-RESOLUTION SUPPORT
+
+    // --- start: SWITCH_FRAME SUPPORT
+    {SINGLE_INPUT,
+     SFRAME_DIST_TOKEN,
+     "S-Frame interval (frames) (0: OFF[default], > 0: ON)",
+     set_cfg_sframe_dist},
+    {SINGLE_INPUT,
+     SFRAME_MODE_TOKEN,
+     "S-Frame insertion mode ([1-2], 1: the considered frame will be made into an S-Frame only if "
+     "it is an altref frame,"
+     " 2: the next altref frame will be made into an S-Frame[default])",
+     set_cfg_sframe_mode},
+    // --- end: SWITCH_FRAME SUPPORT
+    // --- start: REFERENCE SCALING SUPPORT
+    {SINGLE_INPUT,
+     RESIZE_MODE_INPUT,
+     "Enable resize mode [0: none, 1: fixed scale, 2: random scale, 3: dynamic scale, 4: random "
+     "access]",
+     set_resize_mode},
+    {SINGLE_INPUT,
+     RESIZE_DENOM,
+     "Resize denominator, only applicable for mode == 1 [8-16]",
+     set_resize_denom},
+    {SINGLE_INPUT,
+     RESIZE_KF_DENOM,
+     "Resize denominator for key frames, only applicable for mode == 1 [8-16]",
+     set_resize_kf_denom},
+    // --- end: REFERENCE SCALING SUPPORT
+
     // Termination
     {SINGLE_INPUT, NULL, NULL, NULL}};
 
@@ -1227,6 +1342,11 @@ ConfigEntry config_entry_color_descripti
      COLOR_RANGE_NEW_TOKEN,
      "Color range, default is 0 [0: Studio, 1: Full]",
      set_cfg_color_range},
+    {SINGLE_INPUT,
+     CHROMA_SAMPLE_POSITION_TOKEN,
+     "Chroma sample position, default is 'unknown' ['unknown', 'vertical'/'left', "
+     "'colocated'/'topleft']",
+     set_cfg_chroma_sample_position},
 
     {SINGLE_INPUT,
      MASTERING_DISPLAY_TOKEN,
@@ -1264,7 +1384,15 @@ ConfigEntry config_entry[] = {
     {SINGLE_INPUT, WIDTH_LONG_TOKEN, "SourceWidth", set_cfg_source_width},
     {SINGLE_INPUT, HEIGHT_TOKEN, "SourceHeight", set_cfg_source_height},
     {SINGLE_INPUT, HEIGHT_LONG_TOKEN, "SourceHeight", set_cfg_source_height},
-
+    {SINGLE_INPUT,
+     FORCED_MAX_FRAME_WIDTH_TOKEN,
+     "ForcedMaximumFrameWidth",
+     set_cfg_forced_max_frame_width},
+    {SINGLE_INPUT,
+     FORCED_MAX_FRAME_HEIGHT_TOKEN,
+     "ForcedMaximumFrameHeight",
+     set_cfg_forced_max_frame_height},
+    // Prediction Structure
     {SINGLE_INPUT, NUMBER_OF_PICTURES_TOKEN, "FrameToBeEncoded", set_cfg_frames_to_be_encoded},
     {SINGLE_INPUT, NUMBER_OF_PICTURES_LONG_TOKEN, "FrameToBeEncoded", set_cfg_frames_to_be_encoded},
     {SINGLE_INPUT, BUFFERED_INPUT_TOKEN, "BufferedInput", set_buffered_input},
@@ -1322,8 +1450,6 @@ ConfigEntry config_entry[] = {
 
     {SINGLE_INPUT, ADAPTIVE_QP_ENABLE_NEW_TOKEN, "AdaptiveQuantization", set_adaptive_quantization},
 
-    {SINGLE_INPUT, VBV_BUFSIZE_TOKEN, "VBVBufSize", set_vbv_buf_size},
-
     //   qindex offsets
     {SINGLE_INPUT,
      USE_FIXED_QINDEX_OFFSETS_TOKEN,
@@ -1342,7 +1468,26 @@ ConfigEntry config_entry[] = {
      CHROMA_QINDEX_OFFSETS_TOKEN,
      "ChromaQIndexOffsets",
      set_cfg_chroma_qindex_offsets},
-
+    {SINGLE_INPUT,
+     LUMA_Y_DC_QINDEX_OFFSET_TOKEN,
+     "LumaYDCQindexOffset",
+     set_cfg_luma_y_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_U_DC_QINDEX_OFFSET_TOKEN,
+     "ChromaUDCQindexOffset",
+     set_cfg_chroma_u_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_U_AC_QINDEX_OFFSET_TOKEN,
+     "ChromaUACQindexOffset",
+     set_cfg_chroma_u_ac_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_V_DC_QINDEX_OFFSET_TOKEN,
+     "ChromaVDCQindexOffset",
+     set_cfg_chroma_v_dc_qindex_offset},
+    {SINGLE_INPUT,
+     CHROMA_V_AC_QINDEX_OFFSET_TOKEN,
+     "ChromaVACQindexOffset",
+     set_cfg_chroma_v_ac_qindex_offset},
     {SINGLE_INPUT, UNDER_SHOOT_PCT_TOKEN, "UnderShootPct", set_under_shoot_pct},
     {SINGLE_INPUT, OVER_SHOOT_PCT_TOKEN, "OverShootPct", set_over_shoot_pct},
     {SINGLE_INPUT, BUFFER_SIZE_TOKEN, "BufSz", set_buf_sz},
@@ -1390,6 +1535,10 @@ ConfigEntry config_entry[] = {
      "RestrictedMotionVector",
      set_restricted_motion_vector},
     {SINGLE_INPUT, FILM_GRAIN_TOKEN, "FilmGrain", set_cfg_film_grain},
+    {SINGLE_INPUT,
+     FILM_GRAIN_DENOISE_APPLY_TOKEN,
+     "FilmGrainDenoise",
+     set_cfg_film_grain_denoise_apply},
 
     //   Super-resolution support
     {SINGLE_INPUT, SUPERRES_MODE_INPUT, "SuperresMode", set_superres_mode},
@@ -1398,6 +1547,14 @@ ConfigEntry config_entry[] = {
     {SINGLE_INPUT, SUPERRES_QTHRES, "SuperresQthres", set_superres_qthres},
     {SINGLE_INPUT, SUPERRES_KF_QTHRES, "SuperresKfQthres", set_superres_kf_qthres},
 
+    // Switch frame support
+    {SINGLE_INPUT, SFRAME_DIST_TOKEN, "SframeInterval", set_cfg_sframe_dist},
+    {SINGLE_INPUT, SFRAME_MODE_TOKEN, "SframeMode", set_cfg_sframe_mode},
+    // Reference Scaling support
+    {SINGLE_INPUT, RESIZE_MODE_INPUT, "ResizeMode", set_resize_mode},
+    {SINGLE_INPUT, RESIZE_DENOM, "ResizeDenom", set_resize_denom},
+    {SINGLE_INPUT, RESIZE_KF_DENOM, "ResizeKfDenom", set_resize_kf_denom},
+
     // Color Description Options
     {SINGLE_INPUT, COLOR_PRIMARIES_NEW_TOKEN, "ColorPrimaries", set_cfg_color_primaries},
     {SINGLE_INPUT,
@@ -1409,8 +1566,17 @@ ConfigEntry config_entry[] = {
      "MatrixCoefficients",
      set_cfg_matrix_coefficients},
     {SINGLE_INPUT, COLOR_RANGE_NEW_TOKEN, "ColorRange", set_cfg_color_range},
+    {SINGLE_INPUT,
+     CHROMA_SAMPLE_POSITION_TOKEN,
+     "ChromaSamplePosition",
+     set_cfg_chroma_sample_position},
     {SINGLE_INPUT, MASTERING_DISPLAY_TOKEN, "MasteringDisplay", set_cfg_mastering_display},
     {SINGLE_INPUT, CONTENT_LIGHT_LEVEL_TOKEN, "ContentLightLevel", set_cfg_content_light},
+
+    // QM
+    {SINGLE_INPUT, ENABLE_QM_TOKEN, "EnableQM", set_enable_qm},
+    {SINGLE_INPUT, MIN_QM_LEVEL_TOKEN, "MinQmLevel", set_min_qm_level},
+    {SINGLE_INPUT, MAX_QM_LEVEL_TOKEN, "MaxQmLevel", set_max_qm_level},
     // Termination
     {SINGLE_INPUT, NULL, NULL, NULL}};
 
@@ -1436,11 +1602,6 @@ void svt_config_dtor(EbConfig *config_pt
     if (!config_ptr)
         return;
     // Close any files that are open
-    if (config_ptr->config_file) {
-        fclose(config_ptr->config_file);
-        config_ptr->config_file = (FILE *)NULL;
-    }
-
     if (config_ptr->input_file) {
         if (!config_ptr->input_file_is_fifo)
             fclose(config_ptr->input_file);
@@ -1448,6 +1609,8 @@ void svt_config_dtor(EbConfig *config_pt
     }
 
     if (config_ptr->bitstream_file) {
+        if (!fseek(config_ptr->bitstream_file, 0, SEEK_SET))
+            write_ivf_stream_header(config_ptr, config_ptr->frames_encoded);
         fclose(config_ptr->bitstream_file);
         config_ptr->bitstream_file = (FILE *)NULL;
     }
@@ -1457,11 +1620,6 @@ void svt_config_dtor(EbConfig *config_pt
         config_ptr->recon_file = (FILE *)NULL;
     }
 
-    if (config_ptr->input_pred_struct_file) {
-        fclose(config_ptr->input_pred_struct_file);
-        config_ptr->input_pred_struct_file = (FILE *)NULL;
-    }
-
     if (config_ptr->input_pred_struct_filename) {
         free(config_ptr->input_pred_struct_filename);
         config_ptr->input_pred_struct_filename = NULL;
@@ -1481,6 +1639,12 @@ void svt_config_dtor(EbConfig *config_pt
         fclose(config_ptr->stat_file);
         config_ptr->stat_file = (FILE *)NULL;
     }
+
+    if (config_ptr->output_stat_file) {
+        fclose(config_ptr->output_stat_file);
+        config_ptr->output_stat_file = (FILE *)NULL;
+    }
+
     free((void *)config_ptr->stats);
     free(config_ptr);
     return;
@@ -1497,7 +1661,7 @@ EbErrorType enc_channel_ctor(EncChannel
     c->exit_cond_output = APP_ExitConditionError;
     c->exit_cond_recon  = APP_ExitConditionError;
     c->exit_cond_input  = APP_ExitConditionError;
-    c->active           = EB_FALSE;
+    c->active           = FALSE;
     return svt_av1_enc_init_handle(
         &c->app_callback->svt_encoder_handle, c->app_callback, &c->config->config);
 }
@@ -1689,16 +1853,18 @@ static int32_t find_token(int32_t argc,
 static int32_t read_config_file(EbConfig *config, char *config_path, uint32_t instance_idx) {
     int32_t return_error = 0;
 
+    FILE *config_file;
+
     // Open the config file
-    FOPEN(config->config_file, config_path, "rb");
+    FOPEN(config_file, config_path, "rb");
 
-    if (config->config_file != (FILE *)NULL) {
-        int32_t config_file_size   = find_file_size(config->config_file);
+    if (config_file) {
+        int32_t config_file_size   = find_file_size(config_file);
         char   *config_file_buffer = (char *)malloc(config_file_size);
 
-        if (config_file_buffer != (char *)NULL) {
+        if (config_file_buffer) {
             int32_t result_size = (int32_t)fread(
-                config_file_buffer, 1, config_file_size, config->config_file);
+                config_file_buffer, 1, config_file_size, config_file);
 
             if (result_size == config_file_size) {
                 parse_config_file(config, config_file_buffer, config_file_size);
@@ -1712,8 +1878,7 @@ static int32_t read_config_file(EbConfig
         }
 
         free(config_file_buffer);
-        fclose(config->config_file);
-        config->config_file = (FILE *)NULL;
+        fclose(config_file);
     } else {
         fprintf(stderr,
                 "Error channel %u: Couldn't open Config File: %s\n",
@@ -1726,7 +1891,7 @@ static int32_t read_config_file(EbConfig
 }
 
 /* get config->rc_stats_buffer from config->input_stat_file */
-EbBool load_twopass_stats_in(EbConfig *cfg) {
+Bool load_twopass_stats_in(EbConfig *cfg) {
     EbSvtAv1EncConfiguration *config = &cfg->config;
 #ifdef _WIN32
     int          fd = _fileno(cfg->input_stat_file);
@@ -1738,17 +1903,17 @@ EbBool load_twopass_stats_in(EbConfig *c
     int         ret         = fstat(fd, &file_stat);
 #endif
     if (ret) {
-        return EB_FALSE;
+        return FALSE;
     }
     config->rc_stats_buffer.buf = malloc(file_stat.st_size);
     if (config->rc_stats_buffer.buf) {
         config->rc_stats_buffer.sz = (uint64_t)file_stat.st_size;
         if (fread(config->rc_stats_buffer.buf, 1, file_stat.st_size, cfg->input_stat_file) !=
             (size_t)file_stat.st_size) {
-            return EB_FALSE;
+            return FALSE;
         }
         if (file_stat.st_size == 0) {
-            return EB_FALSE;
+            return FALSE;
         }
     }
     return config->rc_stats_buffer.buf != NULL;
@@ -1759,7 +1924,7 @@ EbErrorType handle_stats_file(EbConfig *
     case ENC_SINGLE_PASS: {
         const char *stats = config->stats ? config->stats : "svtav1_2pass.log";
         if (config->config.pass == 1) {
-            if (!fopen_and_lock(&config->output_stat_file, stats, EB_TRUE)) {
+            if (!fopen_and_lock(&config->output_stat_file, stats, TRUE)) {
                 fprintf(config->error_log_file,
                         "Error instance %u: can't open stats file %s for write \n",
                         channel_number + 1,
@@ -1772,7 +1937,7 @@ EbErrorType handle_stats_file(EbConfig *
         // In this pass, data is read from the file, copied to memory, updated and
         // written back to the same file
         else if (config->config.pass == 2 && config->config.rate_control_mode == 1) {
-            if (!fopen_and_lock(&config->input_stat_file, stats, EB_FALSE)) {
+            if (!fopen_and_lock(&config->input_stat_file, stats, FALSE)) {
                 fprintf(config->error_log_file,
                         "Error instance %u: can't read stats file %s for read\n",
                         channel_number + 1,
@@ -1793,7 +1958,7 @@ EbErrorType handle_stats_file(EbConfig *
                 config->input_stat_file = (FILE *)NULL;
             }
             // Open the file in write mode
-            if (!fopen_and_lock(&config->output_stat_file, stats, EB_TRUE)) {
+            if (!fopen_and_lock(&config->output_stat_file, stats, TRUE)) {
                 fprintf(config->error_log_file,
                         "Error instance %u: can't open stats file %s for write \n",
                         channel_number + 1,
@@ -1804,7 +1969,7 @@ EbErrorType handle_stats_file(EbConfig *
         // Final pass: pass = 2 for CRF and pass = 3 for VBR
         else if ((config->config.pass == 2 && config->config.rate_control_mode == 0) ||
                  (config->config.pass == 3 && config->config.rate_control_mode == 1)) {
-            if (!fopen_and_lock(&config->input_stat_file, stats, EB_FALSE)) {
+            if (!fopen_and_lock(&config->input_stat_file, stats, FALSE)) {
                 fprintf(config->error_log_file,
                         "Error instance %u: can't read stats file %s for read\n",
                         channel_number + 1,
@@ -1826,7 +1991,7 @@ EbErrorType handle_stats_file(EbConfig *
         // for combined two passes,
         // we only ouptut first pass stats when user explicitly set the --stats
         if (config->stats) {
-            if (!fopen_and_lock(&config->output_stat_file, config->stats, EB_TRUE)) {
+            if (!fopen_and_lock(&config->output_stat_file, config->stats, TRUE)) {
                 fprintf(config->error_log_file,
                         "Error instance %u: can't open stats file %s for write \n",
                         channel_number + 1,
@@ -1886,10 +2051,24 @@ static EbErrorType app_verify_config(EbC
         return_error = EB_ErrorBadParameter;
     }
 
+    if (config->buffered_input == 0) {
+        fprintf(config->error_log_file,
+                "Error instance %u: Buffered Input cannot be 0\n",
+                channel_number + 1);
+        return_error = EB_ErrorBadParameter;
+    }
+
     if (config->buffered_input < -1) {
         fprintf(config->error_log_file,
-                "Error instance %u: Invalid buffered_input. buffered_input must greater or equal "
-                "to -1\n",
+                "Error instance %u: Invalid buffered_input. buffered_input must be -1 or greater "
+                "than or equal to 1\n",
+                channel_number + 1);
+        return_error = EB_ErrorBadParameter;
+    }
+
+    if (config->buffered_input != -1 && config->y4m_input) {
+        fprintf(config->error_log_file,
+                "Error instance %u: Buffered input is currently not available with y4m inputs\n",
                 channel_number + 1);
         return_error = EB_ErrorBadParameter;
     }
@@ -1902,7 +2081,7 @@ static EbErrorType app_verify_config(EbC
         return_error = EB_ErrorBadParameter;
     }
 
-    if (config->config.use_qp_file == EB_TRUE && config->qp_file == NULL) {
+    if (config->config.use_qp_file == TRUE && config->qp_file == NULL) {
         fprintf(config->error_log_file,
                 "Error instance %u: Could not find QP file, UseQpFile is set to 1\n",
                 channel_number + 1);
@@ -1929,6 +2108,18 @@ static EbErrorType app_verify_config(EbC
                 channel_number + 1);
         return_error = EB_ErrorBadParameter;
     }
+    if (config->config.frame_rate_numerator == 0 || config->config.frame_rate_denominator == 0) {
+        fprintf(config->error_log_file,
+                "Error Instance %u: The frame_rate_numerator and frame_rate_denominator should be "
+                "greater than 0\n",
+                channel_number + 1);
+        return_error = EB_ErrorBadParameter;
+    } else if (config->config.frame_rate_numerator / config->config.frame_rate_denominator > 240) {
+        fprintf(config->error_log_file,
+                "Error Instance %u: The maximum allowed frame_rate is 240 fps\n",
+                channel_number + 1);
+        return_error = EB_ErrorBadParameter;
+    }
 
     return return_error;
 }
@@ -2154,7 +2345,7 @@ uint32_t get_number_of_channels(int32_t
     return 1;
 }
 
-static EbBool check_two_pass_conflicts(int32_t argc, char *const argv[]) {
+static Bool check_two_pass_conflicts(int32_t argc, char *const argv[]) {
     char        config_string[COMMAND_LINE_MAX_SIZE];
     const char *conflicts[] = {
         PASS_TOKEN,
@@ -2166,11 +2357,11 @@ static EbBool check_two_pass_conflicts(i
         if (find_token(argc, argv, token, config_string) == 0) {
             fprintf(
                 stderr, "[SVT-Error]: --passes is not accepted in combination with %s\n", token);
-            return EB_TRUE;
+            return TRUE;
         }
         i++;
     }
-    return EB_FALSE;
+    return FALSE;
 }
 /*
 * Returns the number of passes, multi_pass_mode
@@ -2190,11 +2381,6 @@ uint32_t get_passes(int32_t argc, char *
             fprintf(stderr, "Error: The rate control mode must be [0 - 2] \n");
             return 0;
         }
-        if (rc_mode == 2) {
-            // this is covered in the library
-            //fprintf(stderr, "[SVT-Warning]: CBR Rate control is currently not supported, switching to VBR \n");
-            rc_mode = 1;
-        }
     }
 
     int32_t passes     = -1;
@@ -2214,11 +2400,7 @@ uint32_t get_passes(int32_t argc, char *
             }
 #else
             struct stat st;
-            if (!stat(config_string, &st)) {
-                if (S_ISFIFO(st.st_mode)) {
-                    using_fifo = 1;
-                }
-            }
+            using_fifo = !stat(config_string, &st) && S_ISFIFO(st.st_mode);
 #endif
         }
     }
@@ -2230,8 +2412,7 @@ uint32_t get_passes(int32_t argc, char *
         }
     }
 
-    if ((!find_token(argc, argv, INTRA_PERIOD_TOKEN, NULL) ||
-         !find_token(argc, argv, "-" INTRA_PERIOD_TOKEN, NULL)) &&
+    if (!find_token(argc, argv, INTRA_PERIOD_TOKEN, NULL) &&
         !find_token(argc, argv, KEYINT_TOKEN, NULL)) {
         fprintf(stderr,
                 "[SVT-Warning]: --keyint and --intra-period specified, --keyint will take "
@@ -2239,19 +2420,18 @@ uint32_t get_passes(int32_t argc, char *
     }
 
     if (find_token(argc, argv, INTRA_PERIOD_TOKEN, config_string) == 0 ||
-        find_token(argc, argv, "-" INTRA_PERIOD_TOKEN, config_string) == 0 ||
         find_token(argc, argv, KEYINT_TOKEN, config_string) == 0) {
         ip = strtol(config_string, NULL, 0);
         if (find_token(argc, argv, KEYINT_TOKEN, NULL) == 0) {
             fprintf(stderr, "[SVT-Warning]: --keyint is now intra-period + 1!\n");
-            --ip;
+            ip = ip < 0 ? ip : ip - 1;
         } else
             fprintf(stderr, "[SVT-Warning]: --intra-period is deprecated for --keyint\n");
         if ((ip < -2 || ip > 2 * ((1 << 30) - 1)) && rc_mode == 0) {
-            fprintf(stderr, "[SVT-Error]: The intra period must be [-2, 2^31-2]  \n");
+            fprintf(stderr, "[SVT-Error]: The intra period must be [-2, 2^31-2], input %d\n", ip);
             return 0;
         }
-        if ((ip < 0) && rc_mode >= 1) {
+        if ((ip < 0) && rc_mode == 1) {
             fprintf(stderr,
                     "[SVT-Error]: The intra period must be > 0 for RateControlMode %d \n",
                     rc_mode);
@@ -2311,6 +2491,14 @@ uint32_t get_passes(int32_t argc, char *
                 *multi_pass_mode = THREE_PASS_IPP_SAMEPRED_FINAL;
             }
         }
+    } else {
+        if (passes > 1) {
+            fprintf(
+                stderr,
+                "[SVT-Warning]: Multipass CBR is not supported. Switching to 1-pass encoding\n\n");
+            passes = 1;
+        }
+        *multi_pass_mode = SINGLE_PASS;
     }
 
     // Set the settings for each pass based on multi_pass_mode
@@ -2339,7 +2527,7 @@ void mark_token_as_read(const char *toke
     }
 }
 
-static EbBool is_negative_number(const char *string) {
+static Bool is_negative_number(const char *string) {
     char *end;
     return strtol(string, &end, 10) < 0 && *end == '\0';
 }
@@ -2508,15 +2696,17 @@ static int32_t read_pred_struct_file(EbC
                                      uint32_t instance_idx) {
     int32_t return_error = 0;
 
-    FOPEN(config->input_pred_struct_file, PredStructPath, "rb");
+    FILE *input_pred_struct_file;
 
-    if (config->input_pred_struct_file != (FILE *)NULL) {
-        int32_t config_file_size   = find_file_size(config->input_pred_struct_file);
+    FOPEN(input_pred_struct_file, PredStructPath, "rb");
+
+    if (input_pred_struct_file) {
+        int32_t config_file_size   = find_file_size(input_pred_struct_file);
         char   *config_file_buffer = (char *)malloc(config_file_size);
 
-        if (config_file_buffer != (char *)NULL) {
+        if (config_file_buffer) {
             int32_t result_size = (int32_t)fread(
-                config_file_buffer, 1, config_file_size, config->input_pred_struct_file);
+                config_file_buffer, 1, config_file_size, input_pred_struct_file);
 
             if (result_size == config_file_size) {
                 parse_pred_struct_file(config, config_file_buffer, config_file_size);
@@ -2530,8 +2720,7 @@ static int32_t read_pred_struct_file(EbC
         }
 
         free(config_file_buffer);
-        fclose(config->input_pred_struct_file);
-        config->input_pred_struct_file = (FILE *)NULL;
+        fclose(input_pred_struct_file);
     } else {
         fprintf(stderr,
                 "Error channel %u: Couldn't open Manual Prediction Structure File: %s\n",
@@ -2543,7 +2732,7 @@ static int32_t read_pred_struct_file(EbC
     return return_error;
 }
 
-static EbBool warn_legacy_token(const char *const token) {
+static Bool warn_legacy_token(const char *const token) {
     static struct warn_set {
         const char *old_token;
         const char *new_token;
@@ -2566,9 +2755,9 @@ static EbBool warn_legacy_token(const ch
                 "[SVT-Error]: %s has been removed, use %s instead\n",
                 tok->old_token,
                 tok->new_token);
-        return EB_TRUE;
+        return TRUE;
     }
-    return EB_FALSE;
+    return FALSE;
 }
 
 /******************************************
@@ -2580,19 +2769,31 @@ EbErrorType read_command_line(int32_t ar
     char        config_string[COMMAND_LINE_MAX_SIZE]; // for one input options
     char       *config_strings[MAX_CHANNEL_NUMBER]; // for multiple input options
     char       *cmd_copy[MAX_NUM_TOKENS]; // keep track of extra tokens
+    char       *arg_copy[MAX_NUM_TOKENS]; // keep track of extra arguments
     uint32_t    index         = 0;
     int32_t     cmd_token_cnt = 0; // total number of tokens
+    int32_t     cmd_arg_cnt   = 0; // total number of arguments
     int32_t     ret_y4m;
 
     for (index = 0; index < MAX_CHANNEL_NUMBER; ++index)
         config_strings[index] = (char *)malloc(sizeof(char) * COMMAND_LINE_MAX_SIZE);
     // Copy tokens (except for CHANNEL_NUMBER_TOKEN and PASSES_TOKEN ) into a temp token buffer hosting all tokens that are passed through the command line
-    size_t len = COMMAND_LINE_MAX_SIZE;
+    size_t len                = COMMAND_LINE_MAX_SIZE;
+    Bool   process_prev_token = 1;
     for (int32_t token_index = 0; token_index < argc; ++token_index) {
-        if ((argv[token_index][0] == '-') &&
-            strncmp(argv[token_index], CHANNEL_NUMBER_TOKEN, len) &&
-            strncmp(argv[token_index], PASSES_TOKEN, len) && !is_negative_number(argv[token_index]))
-            cmd_copy[cmd_token_cnt++] = argv[token_index];
+        if (strncmp(argv[token_index], CHANNEL_NUMBER_TOKEN, len) &&
+            strncmp(argv[token_index], PASSES_TOKEN, len)) {
+            if (!is_negative_number(argv[token_index]) && process_prev_token) {
+                if (argv[token_index][0] == '-')
+                    cmd_copy[cmd_token_cnt++] = argv[token_index];
+                else if (token_index)
+                    arg_copy[cmd_arg_cnt++] = argv[token_index];
+            } else {
+                process_prev_token = 1;
+            }
+        } else {
+            process_prev_token = 0;
+        }
     }
 
     /***************************************************************************************************/
@@ -2604,7 +2805,8 @@ EbErrorType read_command_line(int32_t ar
         mark_token_as_read(CONFIG_FILE_TOKEN, cmd_copy, &cmd_token_cnt);
         // Parse the config file
         for (index = 0; index < num_channels; ++index) {
-            EncChannel *c   = channels + index;
+            EncChannel *c = channels + index;
+            mark_token_as_read(config_strings[index], arg_copy, &cmd_arg_cnt);
             c->return_error = (EbErrorType)read_config_file(
                 c->config, config_strings[index], index);
             return_error = (EbErrorType)(return_error & c->return_error);
@@ -2614,7 +2816,8 @@ EbErrorType read_command_line(int32_t ar
         mark_token_as_read(CONFIG_FILE_LONG_TOKEN, cmd_copy, &cmd_token_cnt);
         // Parse the config file
         for (index = 0; index < num_channels; ++index) {
-            EncChannel *c   = channels + index;
+            EncChannel *c = channels + index;
+            mark_token_as_read(config_strings[index], arg_copy, &cmd_arg_cnt);
             c->return_error = (EbErrorType)read_config_file(
                 c->config, config_strings[index], index);
             return_error = (EbErrorType)(return_error & c->return_error);
@@ -2632,17 +2835,27 @@ EbErrorType read_command_line(int32_t ar
     /********************************************************************************************************/
 
     // Check tokens for invalid tokens
-    for (char *const *indx = argv + 1; *indx; ++indx) {
-        // stop at --
-        if (!strcmp(*indx, "--"))
-            break;
-        // Check removed tokens
-        if (warn_legacy_token(*indx))
-            return EB_ErrorBadParameter;
-        // exclude single letter tokens
-        if ((*indx)[0] == '-' && (*indx)[1] != '-' && (*indx)[2] != '\0') {
-            fprintf(stderr, "[SVT-Error]: single dash long tokens have been removed!\n");
-            return EB_ErrorBadParameter;
+    {
+        bool next_is_value = false;
+        for (char *const *indx = argv + 1; *indx; ++indx) {
+            // stop at --
+            if (!strcmp(*indx, "--"))
+                break;
+            // skip the token if the previous token was an argument
+            // assumes all of our tokens flip flop between being an argument and a value
+            if (next_is_value) {
+                next_is_value = false;
+                continue;
+            }
+            // Check removed tokens
+            if (warn_legacy_token(*indx))
+                return EB_ErrorBadParameter;
+            // exclude single letter tokens
+            if ((*indx)[0] == '-' && (*indx)[1] != '-' && (*indx)[2] != '\0') {
+                fprintf(stderr, "[SVT-Error]: single dash long tokens have been removed!\n");
+                return EB_ErrorBadParameter;
+            }
+            next_is_value = true;
         }
     }
 
@@ -2658,6 +2871,8 @@ EbErrorType read_command_line(int32_t ar
         for (uint32_t chan = 0; chan < num_channels; ++chan) {
             if (!strcmp(config_strings[chan], " "))
                 break;
+            // Mark the value as found in the temp argument buffer
+            mark_token_as_read(config_strings[chan], arg_copy, &cmd_arg_cnt);
             (entry->scf)(config_strings[chan], channels[chan].config);
         }
     }
@@ -2669,7 +2884,7 @@ EbErrorType read_command_line(int32_t ar
 
     for (index = 0; index < num_channels; ++index) {
         EncChannel *c = channels + index;
-        if (c->config->y4m_input == EB_TRUE) {
+        if (c->config->y4m_input == TRUE) {
             ret_y4m = read_y4m_header(c->config);
             if (ret_y4m == EB_ErrorBadParameter) {
                 fprintf(stderr, "Error found when reading the y4m file parameters.\n");
@@ -2683,7 +2898,7 @@ EbErrorType read_command_line(int32_t ar
     for (index = 0; index < num_channels; ++index) {
         EncChannel *c      = channels + index;
         EbConfig   *config = c->config;
-        if (config->config.enable_manual_pred_struct == EB_TRUE) {
+        if (config->config.enable_manual_pred_struct == TRUE) {
             c->return_error = (EbErrorType)read_pred_struct_file(
                 config, config->input_pred_struct_filename, index);
             return_error = (EbErrorType)(return_error & c->return_error);
@@ -2741,6 +2956,20 @@ EbErrorType read_command_line(int32_t ar
         return_error = EB_ErrorBadParameter;
     }
 
+    if (cmd_arg_cnt > 0) {
+        int32_t arg_copy_index, maybe_token = 0;
+        fprintf(stderr, "Unprocessed arguments: ");
+        for (arg_copy_index = 0; arg_copy_index < cmd_arg_cnt; ++arg_copy_index) {
+            maybe_token |= !!strchr(arg_copy[arg_copy_index], '-');
+            fprintf(stderr, " %s ", arg_copy[arg_copy_index]);
+        }
+        if (maybe_token)
+            fprintf(stderr, "\nMissing spacing between tokens");
+        fprintf(stderr, "\n\n");
+        return_error = EB_ErrorBadParameter;
+    }
+
     for (index = 0; index < MAX_CHANNEL_NUMBER; ++index) free(config_strings[index]);
+
     return return_error;
 }
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppConfig.h 1.2.0+dfsg-2/Source/App/EncApp/EbAppConfig.h
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppConfig.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppConfig.h	2022-08-01 19:12:00.000000000 +0000
@@ -176,23 +176,20 @@ typedef struct EbConfig {
     /****************************************
      * File I/O
      ****************************************/
-    FILE      *config_file;
     FILE      *input_file;
     MemMapFile mmap; //memory mapped file handler
-    EbBool     input_file_is_fifo;
+    Bool       input_file_is_fifo;
     FILE      *bitstream_file;
     FILE      *recon_file;
     FILE      *error_log_file;
     FILE      *stat_file;
-    FILE      *buffer_file;
     FILE      *qp_file;
     /* two pass */
     const char   *stats;
     FILE         *input_stat_file;
     FILE         *output_stat_file;
-    FILE         *input_pred_struct_file;
     char         *input_pred_struct_filename;
-    EbBool        y4m_input;
+    Bool          y4m_input;
     unsigned char y4m_buf[9];
 
     uint8_t progress; // 0 = no progress output, 1 = normal, 2 = aomenc style verbose progress
@@ -214,23 +211,16 @@ typedef struct EbConfig {
     uint32_t injector;
     uint32_t speed_control_flag;
 
-    uint32_t hme_level0_column_index;
-    uint32_t hme_level0_row_index;
-    uint32_t hme_level1_column_index;
-    uint32_t hme_level1_row_index;
-    uint32_t hme_level2_column_index;
-    uint32_t hme_level2_row_index;
-    EbBool   stop_encoder; // to signal CTRL+C Event, need to stop encoding.
+    Bool stop_encoder; // to signal CTRL+C Event, need to stop encoding.
 
     uint64_t processed_frame_count;
     uint64_t processed_byte_count;
 
-    uint64_t byte_count_since_ivf;
     uint64_t ivf_count;
     /****************************************
      * On-the-fly Testing
      ****************************************/
-    EbBool eos_flag;
+    Bool eos_flag;
 
     EbSvtAv1EncConfiguration config;
 } EbConfig;
@@ -243,7 +233,7 @@ typedef struct EncChannel {
     AppExitConditionType exit_cond_recon; // Processing loop exit condition
     AppExitConditionType exit_cond_input; // Processing loop exit condition
     AppExitConditionType exit_cond; // Processing loop exit condition
-    EbBool               active;
+    Bool                 active;
 } EncChannel;
 
 typedef enum MultiPassModes {
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppContext.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppContext.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppContext.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppContext.c	2022-08-01 19:12:00.000000000 +0000
@@ -205,6 +205,7 @@ EbErrorType allocate_output_recon_buffer
 
     callback_data->recon_buffer->n_alloc_len   = (uint32_t)frame_size;
     callback_data->recon_buffer->p_app_private = NULL;
+    callback_data->recon_buffer->metadata      = NULL;
 
     return EB_ErrorNone;
 }
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppInputy4m.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppInputy4m.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppInputy4m.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppInputy4m.c	2022-08-01 19:12:00.000000000 +0000
@@ -205,7 +205,6 @@ int32_t read_y4m_header(EbConfig *cfg) {
     cfg->config.source_height          = height;
     cfg->config.frame_rate_numerator   = fr_n;
     cfg->config.frame_rate_denominator = fr_d;
-    cfg->config.frame_rate             = fr_n / fr_d;
     cfg->config.encoder_bit_depth      = bitdepth;
     cfg->mmap.y4m_seq_hdr              = ftell(cfg->input_file);
 
@@ -258,11 +257,11 @@ void read_and_compute_y4m_frame_delimite
     *frame_hdr = i + 1;
 }
 /* check if the input file is in YUV4MPEG2 (y4m) format */
-EbBool check_if_y4m(EbConfig *cfg) {
+Bool check_if_y4m(EbConfig *cfg) {
 #define YUV4MPEG2_IND_SIZE 9
     char buf[YUV4MPEG2_IND_SIZE + 1] = {0};
     if (fread(buf, YUV4MPEG2_IND_SIZE, 1, cfg->input_file) != 1)
-        return EB_FALSE;
+        return FALSE;
     if (cfg->input_file != stdin && !cfg->input_file_is_fifo)
         fseek(cfg->input_file, 0, SEEK_SET);
     else
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppInputy4m.h 1.2.0+dfsg-2/Source/App/EncApp/EbAppInputy4m.h
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppInputy4m.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppInputy4m.h	2022-08-01 19:12:00.000000000 +0000
@@ -24,4 +24,4 @@ void read_and_compute_y4m_frame_delimite
                                           uint32_t *frame_hdr);
 void read_y4m_frame_delimiter(FILE *input_file, FILE *error_log_file);
 
-EbBool check_if_y4m(EbConfig *cfg);
+Bool check_if_y4m(EbConfig *cfg);
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppMain.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppMain.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppMain.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppMain.c	2022-08-01 19:12:00.000000000 +0000
@@ -173,7 +173,7 @@ static EbErrorType enc_context_ctor(EncA
             EbConfig* config                    = c->config;
             config->config.active_channel_count = num_channels;
             config->config.channel_id           = inst_cnt;
-            config->config.recon_enabled        = config->recon_file ? EB_TRUE : EB_FALSE;
+            config->config.recon_enabled        = config->recon_file ? TRUE : FALSE;
 
             init_memory_file_map(config);
 
@@ -190,7 +190,7 @@ static EbErrorType enc_context_ctor(EncA
             }
             return_error = (EbErrorType)(return_error | c->return_error);
         } else
-            c->active = EB_FALSE;
+            c->active = FALSE;
     }
     return return_error;
 }
@@ -225,84 +225,68 @@ static void print_summary(const EncConte
             double max_chroma_sse = (double)max_luma_value * max_luma_value *
                 (config->config.source_width / 2 * config->config.source_height / 2);
 
-            if ((config->config.frame_rate_numerator != 0 &&
-                 config->config.frame_rate_denominator != 0) ||
-                config->config.frame_rate != 0) {
-                double frame_rate = config->config.frame_rate_numerator &&
-                        config->config.frame_rate_denominator
-                    ? (double)config->config.frame_rate_numerator /
-                        (double)config->config.frame_rate_denominator
-                    : config->config.frame_rate > 1000
-                    // Correct for 16-bit fixed-point fractional precision
-                    ? (double)config->config.frame_rate / (1 << 16)
-                    : (double)config->config.frame_rate;
-
-                if (config->config.stat_report) {
-                    if (config->stat_file) {
-                        fprintf(config->stat_file,
-                                "\nSUMMARY "
-                                "------------------------------------------------------"
-                                "---------------\n");
-                        fprintf(config->stat_file,
-                                "\n\t\t\t\tAverage PSNR (using per-frame "
-                                "PSNR)\t\t|\tOverall PSNR (using per-frame MSE)\t\t|"
-                                "\tAverage SSIM\n");
-                        fprintf(config->stat_file,
-                                "Total Frames\tAverage QP  \tY-PSNR   \tU-PSNR   "
-                                "\tV-PSNR\t\t| \tY-PSNR   \tU-PSNR   \tV-PSNR   \t|"
-                                "\tY-SSIM   \tU-SSIM   \tV-SSIM   "
-                                "\t|\tBitrate\n");
-                        fprintf(
-                            config->stat_file,
-                            "%10ld  \t   %2.2f    \t%3.2f dB\t%3.2f dB\t%3.2f dB  "
-                            "\t|\t%3.2f dB\t%3.2f dB\t%3.2f dB \t|\t%1.5f \t%1.5f "
-                            "\t%1.5f\t\t|\t%.2f kbps\n",
-                            (long int)frame_count,
-                            (float)config->performance_context.sum_qp / frame_count,
-                            (float)config->performance_context.sum_luma_psnr / frame_count,
-                            (float)config->performance_context.sum_cb_psnr / frame_count,
-                            (float)config->performance_context.sum_cr_psnr / frame_count,
-                            (float)(get_psnr(
-                                (config->performance_context.sum_luma_sse / frame_count),
-                                max_luma_sse)),
-                            (float)(get_psnr((config->performance_context.sum_cb_sse / frame_count),
-                                             max_chroma_sse)),
-                            (float)(get_psnr((config->performance_context.sum_cr_sse / frame_count),
-                                             max_chroma_sse)),
-                            (float)config->performance_context.sum_luma_ssim / frame_count,
-                            (float)config->performance_context.sum_cb_ssim / frame_count,
-                            (float)config->performance_context.sum_cr_ssim / frame_count,
-                            ((double)(config->performance_context.byte_count << 3) * frame_rate /
-                             (config->frames_encoded * 1000)));
-                    }
-                }
+            const double frame_rate = (double)config->config.frame_rate_numerator /
+                (double)config->config.frame_rate_denominator;
 
-                fprintf(stderr,
-                        "\nSUMMARY --------------------------------- Channel %u  "
-                        "--------------------------------\n",
-                        inst_cnt + 1);
-                {
-                    fprintf(stderr, "Total Frames\t\tFrame Rate\t\tByte Count\t\tBitrate\n");
-                    fprintf(stderr,
-                            "%12d\t\t%4.2f fps\t\t%10.0f\t\t%5.2f kbps\n",
-                            (int32_t)frame_count,
-                            (double)frame_rate,
-                            (double)config->performance_context.byte_count,
-                            ((double)(config->performance_context.byte_count << 3) * frame_rate /
-                             (config->frames_encoded * 1000)));
-                }
+            if (config->config.stat_report && config->stat_file) {
+                fprintf(config->stat_file,
+                        "\nSUMMARY "
+                        "------------------------------------------------------"
+                        "---------------\n");
+                fprintf(config->stat_file,
+                        "\n\t\t\t\tAverage PSNR (using per-frame "
+                        "PSNR)\t\t|\tOverall PSNR (using per-frame MSE)\t\t|"
+                        "\tAverage SSIM\n");
+                fprintf(config->stat_file,
+                        "Total Frames\tAverage QP  \tY-PSNR   \tU-PSNR   "
+                        "\tV-PSNR\t\t| \tY-PSNR   \tU-PSNR   \tV-PSNR   \t|"
+                        "\tY-SSIM   \tU-SSIM   \tV-SSIM   "
+                        "\t|\tBitrate\n");
+                fprintf(config->stat_file,
+                        "%10ld  \t   %2.2f    \t%3.2f dB\t%3.2f dB\t%3.2f dB  "
+                        "\t|\t%3.2f dB\t%3.2f dB\t%3.2f dB \t|\t%1.5f \t%1.5f "
+                        "\t%1.5f\t\t|\t%.2f kbps\n",
+                        (long int)frame_count,
+                        (float)config->performance_context.sum_qp / frame_count,
+                        (float)config->performance_context.sum_luma_psnr / frame_count,
+                        (float)config->performance_context.sum_cb_psnr / frame_count,
+                        (float)config->performance_context.sum_cr_psnr / frame_count,
+                        (float)(get_psnr((config->performance_context.sum_luma_sse / frame_count),
+                                         max_luma_sse)),
+                        (float)(get_psnr((config->performance_context.sum_cb_sse / frame_count),
+                                         max_chroma_sse)),
+                        (float)(get_psnr((config->performance_context.sum_cr_sse / frame_count),
+                                         max_chroma_sse)),
+                        (float)config->performance_context.sum_luma_ssim / frame_count,
+                        (float)config->performance_context.sum_cb_ssim / frame_count,
+                        (float)config->performance_context.sum_cr_ssim / frame_count,
+                        ((double)(config->performance_context.byte_count << 3) * frame_rate /
+                         (config->frames_encoded * 1000)));
+            }
 
-                if (config->config.stat_report) {
-                    fprintf(stderr,
-                            "\n\t\tAverage PSNR (using per-frame "
-                            "PSNR)\t\t|\tOverall PSNR (using per-frame MSE)\t\t|\t"
-                            "Average SSIM\n");
-                    fprintf(stderr,
-                            "Average "
-                            "QP\tY-PSNR\t\tU-PSNR\t\tV-PSNR\t\t|\tY-PSNR\t\tU-"
-                            "PSNR\t\tV-PSNR\t\t|\tY-SSIM\tU-SSIM\tV-SSIM\n");
-                    fprintf(
-                        stderr,
+            fprintf(stderr,
+                    "\nSUMMARY --------------------------------- Channel %u  "
+                    "--------------------------------\n",
+                    inst_cnt + 1);
+            fprintf(stderr, "Total Frames\t\tFrame Rate\t\tByte Count\t\tBitrate\n");
+            fprintf(stderr,
+                    "%12d\t\t%4.2f fps\t\t%10.0f\t\t%5.2f kbps\n",
+                    (int32_t)frame_count,
+                    frame_rate,
+                    (double)config->performance_context.byte_count,
+                    ((double)(config->performance_context.byte_count << 3) * frame_rate /
+                     (config->frames_encoded * 1000)));
+
+            if (config->config.stat_report) {
+                fprintf(stderr,
+                        "\n\t\tAverage PSNR (using per-frame "
+                        "PSNR)\t\t|\tOverall PSNR (using per-frame MSE)\t\t|\t"
+                        "Average SSIM\n");
+                fprintf(stderr,
+                        "Average "
+                        "QP\tY-PSNR\t\tU-PSNR\t\tV-PSNR\t\t|\tY-PSNR\t\tU-"
+                        "PSNR\t\tV-PSNR\t\t|\tY-SSIM\tU-SSIM\tV-SSIM\n");
+                fprintf(stderr,
                         "%11.2f\t%4.2f dB\t%4.2f dB\t%4.2f dB\t|\t%4.2f "
                         "dB\t%4.2f dB\t%4.2f dB\t|\t%1.5f\t%1.5f\t%1.5f\n",
                         (float)config->performance_context.sum_qp / frame_count,
@@ -318,10 +302,9 @@ static void print_summary(const EncConte
                         (float)config->performance_context.sum_luma_ssim / frame_count,
                         (float)config->performance_context.sum_cb_ssim / frame_count,
                         (float)config->performance_context.sum_cr_ssim / frame_count);
-                }
-
-                fflush(stdout);
             }
+
+            fflush(stdout);
         }
     }
     fprintf(stderr, "\n");
@@ -333,7 +316,7 @@ static void print_performance(const EncC
         const EncChannel* c = enc_context->channels + inst_cnt;
         if (c->exit_cond == APP_ExitConditionFinished && c->return_error == EB_ErrorNone) {
             EbConfig* config = c->config;
-            if (config->stop_encoder == EB_FALSE) {
+            if (config->stop_encoder == FALSE) {
                 if ((config->config.pass == 0 ||
                      (config->config.pass == 2 && config->config.rate_control_mode == 0) ||
                      config->config.pass == 3))
@@ -369,15 +352,15 @@ static void print_warnnings(const EncCon
     }
 }
 
-static EbBool is_active(const EncChannel* c) { return c->active; }
+static Bool is_active(const EncChannel* c) { return c->active; }
 
-static EbBool has_active_channel(const EncContext* const enc_context) {
+static Bool has_active_channel(const EncContext* const enc_context) {
     // check if all channels are inactive
     for (uint32_t inst_cnt = 0; inst_cnt < enc_context->num_channels; ++inst_cnt) {
         if (is_active(enc_context->channels + inst_cnt))
-            return EB_TRUE;
+            return TRUE;
     }
-    return EB_FALSE;
+    return FALSE;
 }
 
 static void enc_channel_step(EncChannel* c, EncApp* enc_app, EncContext* enc_context) {
@@ -392,7 +375,7 @@ static void enc_channel_step(EncChannel*
         ((c->exit_cond_recon == APP_ExitConditionError && config->recon_file) ||
          c->exit_cond_output == APP_ExitConditionError ||
          c->exit_cond_input == APP_ExitConditionError)) {
-        c->active = EB_FALSE;
+        c->active = FALSE;
         if (config->recon_file)
             c->exit_cond = (AppExitConditionType)(c->exit_cond_recon | c->exit_cond_output |
                                                   c->exit_cond_input);
@@ -423,7 +406,7 @@ static void enc_channel_start(EncChannel
         c->exit_cond_output = APP_ExitConditionNone;
         c->exit_cond_recon  = config->recon_file ? APP_ExitConditionNone : APP_ExitConditionError;
         c->exit_cond_input  = APP_ExitConditionNone;
-        c->active           = EB_TRUE;
+        c->active           = TRUE;
         app_svt_av1_get_time(&config->performance_context.encode_start_time[0],
                              &config->performance_context.encode_start_time[1]);
     }
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppOutputivf.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppOutputivf.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppOutputivf.c	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppOutputivf.c	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,61 @@
+/*
+* Copyright(c) 2022 Intel Corporation
+*
+* This source code is subject to the terms of the BSD 3-Clause Clear License and
+* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
+* was not distributed with this source code in the LICENSE file, you can
+* obtain it at https://www.aomedia.org/license. If the Alliance for Open
+* Media Patent License 1.0 was not distributed with this source code in the
+* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
+*/
+
+#include <stdint.h>
+#include <stdio.h>
+
+#include "EbAppConfig.h"
+#include "EbAppOutputivf.h"
+
+#define AV1_FOURCC 0x31305641 // used for ivf header
+#define IVF_STREAM_HEADER_SIZE 32
+#define IVF_FRAME_HEADER_SIZE 12
+
+static __inline void mem_put_le32(void *vmem, int32_t val) {
+    uint8_t *mem = (uint8_t *)vmem;
+
+    mem[0] = (uint8_t)((val >> 0) & 0xff);
+    mem[1] = (uint8_t)((val >> 8) & 0xff);
+    mem[2] = (uint8_t)((val >> 16) & 0xff);
+    mem[3] = (uint8_t)((val >> 24) & 0xff);
+}
+
+static __inline void mem_put_le16(void *vmem, int32_t val) {
+    uint8_t *mem = (uint8_t *)vmem;
+
+    mem[0] = (uint8_t)((val >> 0) & 0xff);
+    mem[1] = (uint8_t)((val >> 8) & 0xff);
+}
+
+void write_ivf_stream_header(EbConfig *config, int32_t length) {
+    char header[IVF_STREAM_HEADER_SIZE] = {'D', 'K', 'I', 'F'};
+    mem_put_le16(header + 4, 0); // version
+    mem_put_le16(header + 6, 32); // header size
+    mem_put_le32(header + 8, AV1_FOURCC); // fourcc
+    mem_put_le16(header + 12, config->input_padded_width); // width
+    mem_put_le16(header + 14, config->input_padded_height); // height
+    mem_put_le32(header + 16, config->config.frame_rate_numerator); // rate
+    mem_put_le32(header + 20, config->config.frame_rate_denominator); // scale
+    mem_put_le32(header + 24, length); // length
+    mem_put_le32(header + 28, 0); // unused
+    fwrite(header, 1, IVF_STREAM_HEADER_SIZE, config->bitstream_file);
+}
+
+void write_ivf_frame_header(EbConfig *config, uint32_t byte_count) {
+    char header[IVF_FRAME_HEADER_SIZE];
+
+    mem_put_le32(&header[0], (int32_t)byte_count);
+    mem_put_le32(&header[4], (int32_t)(config->ivf_count & 0xFFFFFFFF));
+    mem_put_le32(&header[8], (int32_t)(config->ivf_count >> 32));
+
+    config->ivf_count++;
+    fwrite(header, 1, IVF_FRAME_HEADER_SIZE, config->bitstream_file);
+}
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppOutputivf.h 1.2.0+dfsg-2/Source/App/EncApp/EbAppOutputivf.h
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppOutputivf.h	1970-01-01 00:00:00.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppOutputivf.h	2022-08-01 19:12:00.000000000 +0000
@@ -0,0 +1,22 @@
+/*
+* Copyright(c) 2022 Intel Corporation
+*
+* This source code is subject to the terms of the BSD 3-Clause Clear License and
+* the Alliance for Open Media Patent License 1.0. If the BSD 3-Clause Clear License
+* was not distributed with this source code in the LICENSE file, you can
+* obtain it at https://www.aomedia.org/license. If the Alliance for Open
+* Media Patent License 1.0 was not distributed with this source code in the
+* PATENTS file, you can obtain it at https://www.aomedia.org/license/patent-license.
+*/
+
+#ifndef EbAppOutputivf_h
+#define EbAppOutputivf_h
+
+#include <stdint.h>
+
+#include "EbAppConfig.h"
+
+void write_ivf_stream_header(EbConfig *config, int32_t length);
+void write_ivf_frame_header(EbConfig *config, uint32_t byte_count);
+
+#endif
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbAppProcessCmd.c 1.2.0+dfsg-2/Source/App/EncApp/EbAppProcessCmd.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbAppProcessCmd.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbAppProcessCmd.c	2022-08-01 19:12:00.000000000 +0000
@@ -27,6 +27,8 @@
 #include <sys/mman.h>
 #endif
 
+#include "EbAppOutputivf.h"
+
 /***************************************
  * Macros
  ***************************************/
@@ -198,13 +200,13 @@ void read_input_frames(EbConfig *config,
             header_ptr->n_filled_len = 0;
 
             if (config->mmap.enable) {
-                if (config->y4m_input == EB_TRUE && config->processed_frame_count == 0) {
+                if (config->y4m_input == TRUE && config->processed_frame_count == 0) {
                     read_and_compute_y4m_frame_delimiter(
                         config->input_file, config->error_log_file, &config->mmap.y4m_frm_hdr);
                 }
             } else {
                 /* if input is a y4m file, read next line which contains "FRAME" */
-                if (config->y4m_input == EB_TRUE)
+                if (config->y4m_input == TRUE)
                     read_y4m_frame_delimiter(config->input_file, config->error_log_file);
             }
             uint64_t luma_read_size = (uint64_t)input_padded_width * input_padded_height
@@ -263,7 +265,7 @@ void read_input_frames(EbConfig *config,
                     header_ptr->n_filled_len += (input_ptr->cr ? chroma_read_size : 0);
                 } else if (!config->input_file_is_fifo) {
                     fseek(input_file, 0, SEEK_SET);
-                    if (config->y4m_input == EB_TRUE) {
+                    if (config->y4m_input == TRUE) {
                         read_and_skip_y4m_header(config->input_file);
                         read_y4m_frame_delimiter(config->input_file, config->error_log_file);
                     }
@@ -479,7 +481,7 @@ static long get_next_qp_from_qp_file(FIL
     return qp;
 }
 
-static unsigned char send_qp_on_the_fly(FILE *const qp_file, EbBool *use_qp_file) {
+static unsigned char send_qp_on_the_fly(FILE *const qp_file, Bool *use_qp_file) {
     long tmp_qp            = 0;
     int  qp_read_from_file = 0;
 
@@ -488,7 +490,7 @@ static unsigned char send_qp_on_the_fly(
         tmp_qp = get_next_qp_from_qp_file(qp_file, &qp_read_from_file);
 
     if (tmp_qp == -1) {
-        *use_qp_file = EB_FALSE;
+        *use_qp_file = FALSE;
         fprintf(stderr, "\nWarning: QP File did not contain any valid QPs");
     }
     return (unsigned)CLIP3(0, 63, tmp_qp);
@@ -562,7 +564,7 @@ void process_input_buffer(EncChannel *ch
         : total_bytes_to_process_count - (int64_t)config->processed_byte_count;
 
     // If there are bytes left to encode, configure the header
-    if (remaining_byte_count != 0 && config->stop_encoder == EB_FALSE) {
+    if (remaining_byte_count != 0 && config->stop_encoder == FALSE) {
         read_input_frames(config, is_16bit, header_ptr);
         if (header_ptr->n_filled_len) {
             // Update the context parameters
@@ -577,7 +579,7 @@ void process_input_buffer(EncChannel *ch
                 header_ptr->qp = send_qp_on_the_fly(config->qp_file, &config->config.use_qp_file);
 
             if (keep_running == 0 && !config->stop_encoder)
-                config->stop_encoder = EB_TRUE;
+                config->stop_encoder = TRUE;
             // Fill in Buffers Header control data
             header_ptr->pts      = config->processed_frame_count - 1;
             header_ptr->pic_type = EB_AV1_INVALID_PICTURE;
@@ -613,77 +615,11 @@ void process_input_buffer(EncChannel *ch
 #define LONG_ENCODE_FRAME_ENCODE 4000
 #define SPEED_MEASUREMENT_INTERVAL 2000
 #define START_STEADY_STATE 1000
-#define AV1_FOURCC 0x31305641 // used for ivf header
-#define IVF_STREAM_HEADER_SIZE 32
-#define IVF_FRAME_HEADER_SIZE 12
 #define OBU_FRAME_HEADER_SIZE 3
 #define TD_SIZE 2
-static __inline void mem_put_le32(void *vmem, int32_t val) {
-    uint8_t *mem = (uint8_t *)vmem;
 
-    mem[0] = (uint8_t)((val >> 0) & 0xff);
-    mem[1] = (uint8_t)((val >> 8) & 0xff);
-    mem[2] = (uint8_t)((val >> 16) & 0xff);
-    mem[3] = (uint8_t)((val >> 24) & 0xff);
-}
 #define MEM_VALUE_T_SZ_BITS (sizeof(MEM_VALUE_T) << 3)
 
-static __inline void mem_put_le16(void *vmem, int32_t val) {
-    uint8_t *mem = (uint8_t *)vmem;
-
-    mem[0] = (uint8_t)((val >> 0) & 0xff);
-    mem[1] = (uint8_t)((val >> 8) & 0xff);
-}
-
-static void write_ivf_stream_header(EbConfig *config) {
-    char header[IVF_STREAM_HEADER_SIZE];
-    header[0] = 'D';
-    header[1] = 'K';
-    header[2] = 'I';
-    header[3] = 'F';
-    mem_put_le16(header + 4, 0); // version
-    mem_put_le16(header + 6, 32); // header size
-    mem_put_le32(header + 8, AV1_FOURCC); // fourcc
-    mem_put_le16(header + 12, config->input_padded_width); // width
-    mem_put_le16(header + 14, config->input_padded_height); // height
-    if (config->config.frame_rate_denominator != 0 && config->config.frame_rate_numerator != 0) {
-        mem_put_le32(header + 16, config->config.frame_rate_numerator); // rate
-        mem_put_le32(header + 20, config->config.frame_rate_denominator); // scale
-            //mem_put_le32(header + 16, config->frame_rate_denominator);  // rate
-            //mem_put_le32(header + 20, config->frame_rate_numerator);  // scale
-    } else {
-        mem_put_le32(header + 16, (config->config.frame_rate >> 16) * 1000); // rate
-        mem_put_le32(header + 20, 1000); // scale
-            //mem_put_le32(header + 16, config->frame_rate_denominator);  // rate
-            //mem_put_le32(header + 20, config->frame_rate_numerator);  // scale
-    }
-    mem_put_le32(header + 24, 0); // length
-    mem_put_le32(header + 28, 0); // unused
-    //config->performance_context.byte_count += 32;
-    if (config->bitstream_file)
-        fwrite(header, 1, IVF_STREAM_HEADER_SIZE, config->bitstream_file);
-
-    return;
-}
-
-static void write_ivf_frame_header(EbConfig *config, uint32_t byte_count) {
-    char    header[IVF_FRAME_HEADER_SIZE];
-    int32_t write_location = 0;
-
-    mem_put_le32(&header[write_location], (int32_t)byte_count);
-    write_location = write_location + 4;
-    mem_put_le32(&header[write_location], (int32_t)((config->ivf_count) & 0xFFFFFFFF));
-    write_location = write_location + 4;
-    mem_put_le32(&header[write_location], (int32_t)((config->ivf_count) >> 32));
-
-    config->byte_count_since_ivf = (byte_count);
-
-    config->ivf_count++;
-    fflush(stdout);
-
-    if (config->bitstream_file)
-        fwrite(header, 1, IVF_FRAME_HEADER_SIZE, config->bitstream_file);
-}
 double get_psnr(double sse, double max) {
     double psnr;
     if (sse == 0)
@@ -828,7 +764,10 @@ void process_output_stream_buffer(EncCha
             if (stream_file) {
                 if (config->performance_context.frame_count == 1 &&
                     !(flags & EB_BUFFERFLAG_IS_ALT_REF)) {
-                    write_ivf_stream_header(config);
+                    write_ivf_stream_header(config,
+                                            config->frames_to_be_encoded == -1
+                                                ? 0
+                                                : (int32_t)config->frames_to_be_encoded);
                 }
                 write_ivf_frame_header(config, header_ptr->n_filled_len);
                 fwrite(header_ptr->p_buffer, 1, header_ptr->n_filled_len, stream_file);
@@ -881,14 +820,8 @@ void process_output_stream_buffer(EncCha
             }
             ++*frame_count;
             const double fps = (double)*frame_count / config->performance_context.total_encode_time;
-            const double frame_rate = config->config.frame_rate_numerator &&
-                    config->config.frame_rate_denominator
-                ? (double)config->config.frame_rate_numerator /
-                    (double)config->config.frame_rate_denominator
-                : config->config.frame_rate > 1000
-                // Correct for 16-bit fixed-point fractional precision
-                ? (double)config->config.frame_rate / (1 << 16)
-                : (double)config->config.frame_rate;
+            const double frame_rate = (double)config->config.frame_rate_numerator /
+                (double)config->config.frame_rate_denominator;
             switch (config->progress) {
             case 0: break;
             case 1:
diff -pruN 0.9.1+dfsg-1/Source/App/EncApp/EbTime.c 1.2.0+dfsg-2/Source/App/EncApp/EbTime.c
--- 0.9.1+dfsg-1/Source/App/EncApp/EbTime.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/App/EncApp/EbTime.c	2022-08-01 19:12:00.000000000 +0000
@@ -52,7 +52,7 @@ void app_svt_av1_get_time(uint64_t *cons
     struct _timeb curr_time;
     _ftime_s(&curr_time);
     *seconds  = curr_time.time;
-    *useconds = curr_time.millitm;
+    *useconds = curr_time.millitm * 1000;
 #elif defined(CLOCK_MONOTONIC) && !defined(OLD_MACOS)
     struct timespec curr_time;
     clock_gettime(CLOCK_MONOTONIC, &curr_time);
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/av1_inv_txfm_avx2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/av1_inv_txfm_avx2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/av1_inv_txfm_avx2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/av1_inv_txfm_avx2.c	2022-08-01 19:12:00.000000000 +0000
@@ -1919,7 +1919,8 @@ void svt_av1_lowbd_inv_txfm2d_add_avx2(c
 void svt_av1_inv_txfm_add_avx2(const TranLow *dqcoeff, uint8_t *dst_r, int32_t stride_r,
                                uint8_t *dst_w, int32_t stride_w, const TxfmParam *txfm_param) {
     const TxType tx_type = txfm_param->tx_type;
-    if (!txfm_param->lossless)
+    if (!txfm_param->lossless) {
+        assert(txfm_param->bd == 8);
         svt_av1_lowbd_inv_txfm2d_add_avx2(dqcoeff,
                                           dst_r,
                                           stride_r,
@@ -1928,6 +1929,6 @@ void svt_av1_inv_txfm_add_avx2(const Tra
                                           tx_type,
                                           txfm_param->tx_size,
                                           txfm_param->eob);
-    else
+    } else
         svt_av1_inv_txfm_add_c(dqcoeff, dst_r, stride_r, dst_w, stride_w, txfm_param);
 }
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/cdef_block_avx2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/cdef_block_avx2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/cdef_block_avx2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/cdef_block_avx2.c	2022-08-01 19:12:00.000000000 +0000
@@ -47,6 +47,39 @@ static INLINE __m256i fold_mul_and_sum(_
     return partial;
 }
 
+// Mask used to shuffle the elements present in 256bit register.
+const int svt_shuffle_reg_256bit[8] = {
+    0x0b0a0d0c, 0x07060908, 0x03020504, 0x0f0e0100, 0x0b0a0d0c, 0x07060908, 0x03020504, 0x0f0e0100};
+
+/* partial A is a 16-bit vector of the form:
+[x8 - - x1 | x16 - - x9] and partial B has the form:
+[0  y1 - y7 | 0 y9 - y15].
+This function computes (x1^2+y1^2)*C1 + (x2^2+y2^2)*C2 + ...
+(x7^2+y2^7)*C7 + (x8^2+0^2)*C8 on each 128-bit lane. Here the C1..C8 constants
+are in const1 and const2. */
+static INLINE __m256i fold_mul_and_sum_dual(__m256i *partiala, __m256i *partialb,
+                                            const __m256i *const1, const __m256i *const2) {
+    __m256i tmp;
+    /* Reverse partial B. */
+    *partialb = _mm256_shuffle_epi8(*partialb,
+                                    _mm256_loadu_si256((const __m256i *)svt_shuffle_reg_256bit));
+
+    /* Interleave the x and y values of identical indices and pair x8 with 0. */
+    tmp       = *partiala;
+    *partiala = _mm256_unpacklo_epi16(*partiala, *partialb);
+    *partialb = _mm256_unpackhi_epi16(tmp, *partialb);
+
+    /* Square and add the corresponding x and y values. */
+    *partiala = _mm256_madd_epi16(*partiala, *partiala);
+    *partialb = _mm256_madd_epi16(*partialb, *partialb);
+    /* Multiply by constant. */
+    *partiala = _mm256_mullo_epi32(*partiala, *const1);
+    *partialb = _mm256_mullo_epi32(*partialb, *const2);
+    /* Sum all results. */
+    *partiala = _mm256_add_epi32(*partiala, *partialb);
+    return *partiala;
+}
+
 static INLINE __m128i hsum4(__m128i x0, __m128i x1, __m128i x2, __m128i x3) {
     __m128i t0, t1, t2, t3;
     t0 = _mm_unpacklo_epi32(x0, x1);
@@ -60,6 +93,19 @@ static INLINE __m128i hsum4(__m128i x0,
     return _mm_add_epi32(_mm_add_epi32(x0, x1), _mm_add_epi32(x2, x3));
 }
 
+static INLINE __m256i hsum4_dual(__m256i *x0, __m256i *x1, __m256i *x2, __m256i *x3) {
+    const __m256i t0 = _mm256_unpacklo_epi32(*x0, *x1);
+    const __m256i t1 = _mm256_unpacklo_epi32(*x2, *x3);
+    const __m256i t2 = _mm256_unpackhi_epi32(*x0, *x1);
+    const __m256i t3 = _mm256_unpackhi_epi32(*x2, *x3);
+
+    *x0 = _mm256_unpacklo_epi64(t0, t1);
+    *x1 = _mm256_unpackhi_epi64(t0, t1);
+    *x2 = _mm256_unpacklo_epi64(t2, t3);
+    *x3 = _mm256_unpackhi_epi64(t2, t3);
+    return _mm256_add_epi32(_mm256_add_epi32(*x0, *x1), _mm256_add_epi32(*x2, *x3));
+}
+
 /* Computes cost for directions 0, 5, 6 and 7. We can call this function again
 to compute the remaining directions. */
 static INLINE void compute_directions(__m128i lines[8], int32_t tmp_cost1[4]) {
@@ -159,8 +205,9 @@ static INLINE void array_reverse_transpo
     res[1] = _mm_unpacklo_epi64(tr1_6, tr1_7);
     res[0] = _mm_unpackhi_epi64(tr1_6, tr1_7);
 }
-uint8_t svt_cdef_find_dir_avx2(const uint16_t *img, int32_t stride, int32_t *var,
-                               int32_t coeff_shift) {
+
+uint8_t svt_aom_cdef_find_dir_avx2(const uint16_t *img, int32_t stride, int32_t *var,
+                                   int32_t coeff_shift) {
     int32_t cost[8];
     int32_t best_cost = 0;
     uint8_t i;
@@ -197,6 +244,179 @@ uint8_t svt_cdef_find_dir_avx2(const uin
     return best_dir;
 }
 
+/* Computes cost for directions 0, 5, 6 and 7. We can call this function again
+to compute the remaining directions. */
+static INLINE __m256i compute_directions_dual(__m256i *lines, int32_t cost_frist_8x8[4],
+                                              int32_t cost_second_8x8[4]) {
+    __m256i partial4a, partial4b, partial5a, partial5b, partial7a, partial7b;
+    __m256i partial6;
+    __m256i tmp;
+    /* Partial sums for lines 0 and 1. */
+    partial4a = _mm256_slli_si256(lines[0], 14);
+    partial4b = _mm256_srli_si256(lines[0], 2);
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[1], 12));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[1], 4));
+    tmp       = _mm256_add_epi16(lines[0], lines[1]);
+    partial5a = _mm256_slli_si256(tmp, 10);
+    partial5b = _mm256_srli_si256(tmp, 6);
+    partial7a = _mm256_slli_si256(tmp, 4);
+    partial7b = _mm256_srli_si256(tmp, 12);
+    partial6  = tmp;
+
+    /* Partial sums for lines 2 and 3. */
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[2], 10));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[2], 6));
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[3], 8));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[3], 8));
+    tmp       = _mm256_add_epi16(lines[2], lines[3]);
+    partial5a = _mm256_add_epi16(partial5a, _mm256_slli_si256(tmp, 8));
+    partial5b = _mm256_add_epi16(partial5b, _mm256_srli_si256(tmp, 8));
+    partial7a = _mm256_add_epi16(partial7a, _mm256_slli_si256(tmp, 6));
+    partial7b = _mm256_add_epi16(partial7b, _mm256_srli_si256(tmp, 10));
+    partial6  = _mm256_add_epi16(partial6, tmp);
+
+    /* Partial sums for lines 4 and 5. */
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[4], 6));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[4], 10));
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[5], 4));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[5], 12));
+    tmp       = _mm256_add_epi16(lines[4], lines[5]);
+    partial5a = _mm256_add_epi16(partial5a, _mm256_slli_si256(tmp, 6));
+    partial5b = _mm256_add_epi16(partial5b, _mm256_srli_si256(tmp, 10));
+    partial7a = _mm256_add_epi16(partial7a, _mm256_slli_si256(tmp, 8));
+    partial7b = _mm256_add_epi16(partial7b, _mm256_srli_si256(tmp, 8));
+    partial6  = _mm256_add_epi16(partial6, tmp);
+
+    /* Partial sums for lines 6 and 7. */
+    partial4a = _mm256_add_epi16(partial4a, _mm256_slli_si256(lines[6], 2));
+    partial4b = _mm256_add_epi16(partial4b, _mm256_srli_si256(lines[6], 14));
+    partial4a = _mm256_add_epi16(partial4a, lines[7]);
+    tmp       = _mm256_add_epi16(lines[6], lines[7]);
+    partial5a = _mm256_add_epi16(partial5a, _mm256_slli_si256(tmp, 4));
+    partial5b = _mm256_add_epi16(partial5b, _mm256_srli_si256(tmp, 12));
+    partial7a = _mm256_add_epi16(partial7a, _mm256_slli_si256(tmp, 10));
+    partial7b = _mm256_add_epi16(partial7b, _mm256_srli_si256(tmp, 6));
+    partial6  = _mm256_add_epi16(partial6, tmp);
+
+    const __m256i const_reg_1 = _mm256_set_epi32(210, 280, 420, 840, 210, 280, 420, 840);
+    const __m256i const_reg_2 = _mm256_set_epi32(105, 120, 140, 168, 105, 120, 140, 168);
+    const __m256i const_reg_3 = _mm256_set_epi32(210, 420, 0, 0, 210, 420, 0, 0);
+    const __m256i const_reg_4 = _mm256_set_epi32(105, 105, 105, 140, 105, 105, 105, 140);
+
+    /* Compute costs in terms of partial sums. */
+    partial4a = fold_mul_and_sum_dual(&partial4a, &partial4b, &const_reg_1, &const_reg_2);
+    partial7a = fold_mul_and_sum_dual(&partial7a, &partial7b, &const_reg_3, &const_reg_4);
+    partial5a = fold_mul_and_sum_dual(&partial5a, &partial5b, &const_reg_3, &const_reg_4);
+    partial6  = _mm256_madd_epi16(partial6, partial6);
+    partial6  = _mm256_mullo_epi32(partial6, _mm256_set1_epi32(105));
+
+    partial4a = hsum4_dual(&partial4a, &partial5a, &partial6, &partial7a);
+    _mm_storeu_si128((__m128i *)cost_frist_8x8, _mm256_castsi256_si128(partial4a));
+    _mm_storeu_si128((__m128i *)cost_second_8x8, _mm256_extractf128_si256(partial4a, 1));
+
+    return partial4a;
+}
+
+/* transpose and reverse the order of the lines -- equivalent to a 90-degree
+counter-clockwise rotation of the pixels. */
+static INLINE void array_reverse_transpose_8x8_dual(__m256i *in, __m256i *res) {
+    const __m256i tr0_0 = _mm256_unpacklo_epi16(in[0], in[1]);
+    const __m256i tr0_1 = _mm256_unpacklo_epi16(in[2], in[3]);
+    const __m256i tr0_2 = _mm256_unpackhi_epi16(in[0], in[1]);
+    const __m256i tr0_3 = _mm256_unpackhi_epi16(in[2], in[3]);
+    const __m256i tr0_4 = _mm256_unpacklo_epi16(in[4], in[5]);
+    const __m256i tr0_5 = _mm256_unpacklo_epi16(in[6], in[7]);
+    const __m256i tr0_6 = _mm256_unpackhi_epi16(in[4], in[5]);
+    const __m256i tr0_7 = _mm256_unpackhi_epi16(in[6], in[7]);
+
+    const __m256i tr1_0 = _mm256_unpacklo_epi32(tr0_0, tr0_1);
+    const __m256i tr1_1 = _mm256_unpacklo_epi32(tr0_4, tr0_5);
+    const __m256i tr1_2 = _mm256_unpackhi_epi32(tr0_0, tr0_1);
+    const __m256i tr1_3 = _mm256_unpackhi_epi32(tr0_4, tr0_5);
+    const __m256i tr1_4 = _mm256_unpacklo_epi32(tr0_2, tr0_3);
+    const __m256i tr1_5 = _mm256_unpacklo_epi32(tr0_6, tr0_7);
+    const __m256i tr1_6 = _mm256_unpackhi_epi32(tr0_2, tr0_3);
+    const __m256i tr1_7 = _mm256_unpackhi_epi32(tr0_6, tr0_7);
+
+    res[7] = _mm256_unpacklo_epi64(tr1_0, tr1_1);
+    res[6] = _mm256_unpackhi_epi64(tr1_0, tr1_1);
+    res[5] = _mm256_unpacklo_epi64(tr1_2, tr1_3);
+    res[4] = _mm256_unpackhi_epi64(tr1_2, tr1_3);
+    res[3] = _mm256_unpacklo_epi64(tr1_4, tr1_5);
+    res[2] = _mm256_unpackhi_epi64(tr1_4, tr1_5);
+    res[1] = _mm256_unpacklo_epi64(tr1_6, tr1_7);
+    res[0] = _mm256_unpackhi_epi64(tr1_6, tr1_7);
+}
+
+void svt_aom_cdef_find_dir_dual_avx2(const uint16_t *img1, const uint16_t *img2, int stride,
+                                     int32_t *var_out_1st, int32_t *var_out_2nd,
+                                     int32_t coeff_shift, uint8_t *out_dir_1st_8x8,
+                                     uint8_t *out_dir_2nd_8x8) {
+    int32_t cost_first_8x8[8];
+    int32_t cost_second_8x8[8];
+    // Used to store the best cost for 2 8x8's.
+    int32_t best_cost[2] = {0};
+    // Best direction for 2 8x8's.
+    uint8_t best_dir[2] = {0};
+
+    const __m128i const_coeff_shift_reg = _mm_cvtsi32_si128(coeff_shift);
+    const __m256i const_128_reg         = _mm256_set1_epi16(128);
+    __m256i       lines[8];
+    for (int i = 0; i < 8; i++) {
+        const __m128i src_1 = _mm_loadu_si128((const __m128i *)&img1[i * stride]);
+        const __m128i src_2 = _mm_loadu_si128((const __m128i *)&img2[i * stride]);
+
+        lines[i] = _mm256_insertf128_si256(_mm256_castsi128_si256(src_1), src_2, 1);
+        lines[i] = _mm256_sub_epi16(_mm256_sra_epi16(lines[i], const_coeff_shift_reg),
+                                    const_128_reg);
+    }
+
+    /* Compute "mostly vertical" directions. */
+    const __m256i dir47 = compute_directions_dual(lines, cost_first_8x8 + 4, cost_second_8x8 + 4);
+
+    /* Transpose and reverse the order of the lines. */
+    array_reverse_transpose_8x8_dual(lines, lines);
+
+    /* Compute "mostly horizontal" directions. */
+    const __m256i dir03 = compute_directions_dual(lines, cost_first_8x8, cost_second_8x8);
+
+    __m256i max = _mm256_max_epi32(dir03, dir47);
+    max         = _mm256_max_epi32(
+        max, _mm256_or_si256(_mm256_srli_si256(max, 8), _mm256_slli_si256(max, 16 - (8))));
+    max = _mm256_max_epi32(
+        max, _mm256_or_si256(_mm256_srli_si256(max, 4), _mm256_slli_si256(max, 16 - (4))));
+
+    const __m128i first_8x8_output  = _mm256_castsi256_si128(max);
+    const __m128i second_8x8_output = _mm256_extractf128_si256(max, 1);
+    const __m128i cmpeg_res_00 = _mm_cmpeq_epi32(first_8x8_output, _mm256_castsi256_si128(dir47));
+    const __m128i cmpeg_res_01 = _mm_cmpeq_epi32(first_8x8_output, _mm256_castsi256_si128(dir03));
+    const __m128i cmpeg_res_10 = _mm_cmpeq_epi32(second_8x8_output,
+                                                 _mm256_extractf128_si256(dir47, 1));
+    const __m128i cmpeg_res_11 = _mm_cmpeq_epi32(second_8x8_output,
+                                                 _mm256_extractf128_si256(dir03, 1));
+    const __m128i t_first_8x8  = _mm_packs_epi32(cmpeg_res_01, cmpeg_res_00);
+    const __m128i t_second_8x8 = _mm_packs_epi32(cmpeg_res_11, cmpeg_res_10);
+
+    best_cost[0] = _mm_cvtsi128_si32(_mm256_castsi256_si128(max));
+    best_cost[1] = _mm_cvtsi128_si32(second_8x8_output);
+    best_dir[0]  = _mm_movemask_epi8(_mm_packs_epi16(t_first_8x8, t_first_8x8));
+    best_dir[0]  = get_msb(best_dir[0] ^ (best_dir[0] - 1)); // Count trailing zeros
+    best_dir[1]  = _mm_movemask_epi8(_mm_packs_epi16(t_second_8x8, t_second_8x8));
+    best_dir[1]  = get_msb(best_dir[1] ^ (best_dir[1] - 1)); // Count trailing zeros
+
+    /* Difference between the optimal variance and the variance along the
+       orthogonal direction. Again, the sum(x^2) terms cancel out. */
+    *var_out_1st = best_cost[0] - cost_first_8x8[(best_dir[0] + 4) & 7];
+    *var_out_2nd = best_cost[1] - cost_second_8x8[(best_dir[1] + 4) & 7];
+
+    /* We'd normally divide by 840, but dividing by 1024 is close enough
+    for what we're going to do with this. */
+    *var_out_1st >>= 10;
+    *var_out_2nd >>= 10;
+    *out_dir_1st_8x8 = best_dir[0];
+    *out_dir_2nd_8x8 = best_dir[1];
+}
+
 // sign(a-b) * min(abs(a-b), max(0, threshold - (abs(a-b) >> adjdamp)))
 static INLINE __m256i constrain16(const __m256i in0, const __m256i in1, const __m256i threshold,
                                   const uint32_t adjdamp) {
@@ -272,10 +492,10 @@ void svt_cdef_filter_block_8xn_16_avx2(c
                                        uint8_t subsampling_factor) {
     const int32_t po1  = eb_cdef_directions[dir][0];
     const int32_t po2  = eb_cdef_directions[dir][1];
-    const int32_t s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    const int32_t s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    const int32_t s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    const int32_t s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
     // SSE CHKN
     const int32_t *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int32_t *sec_taps = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -370,15 +590,14 @@ static void svt_cdef_filter_block_4xn_16
                                               int32_t dir, int32_t pri_damping, int32_t sec_damping,
                                               int32_t coeff_shift, uint8_t height,
                                               uint8_t subsampling_factor) {
-    __m256i p0, p1, p2, p3, sum, row, res;
-    __m256i max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
-    int32_t po1  = eb_cdef_directions[dir][0];
-    int32_t po2  = eb_cdef_directions[dir][1];
-    int32_t s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int32_t s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int32_t s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int32_t s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
-
+    __m256i        p0, p1, p2, p3, sum, row, res;
+    __m256i        max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
+    const int32_t  po1              = eb_cdef_directions[dir][0];
+    const int32_t  po2              = eb_cdef_directions[dir][1];
+    const int32_t  s1o1             = eb_cdef_directions[(dir + 2)][0];
+    const int32_t  s1o2             = eb_cdef_directions[(dir + 2)][1];
+    const int32_t  s2o1             = eb_cdef_directions[(dir - 2)][0];
+    const int32_t  s2o2             = eb_cdef_directions[(dir - 2)][1];
     const int32_t *pri_taps         = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int32_t *sec_taps         = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
     __m256i        pri_strength_256 = _mm256_set1_epi16(pri_strength);
@@ -546,14 +765,14 @@ static void svt_cdef_filter_block_4xn_8_
                                              int32_t dir, int32_t pri_damping, int32_t sec_damping,
                                              int32_t coeff_shift, uint8_t height,
                                              uint8_t subsampling_factor) {
-    __m256i p0, p1, p2, p3, sum, row, res;
-    __m256i max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
-    int32_t po1  = eb_cdef_directions[dir][0];
-    int32_t po2  = eb_cdef_directions[dir][1];
-    int32_t s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int32_t s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int32_t s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int32_t s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    __m256i       p0, p1, p2, p3, sum, row, res;
+    __m256i       max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
 
     const int32_t *pri_taps         = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int32_t *sec_taps         = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -724,15 +943,15 @@ static void svt_cdef_filter_block_8xn_8_
                                              int32_t dir, int32_t pri_damping, int32_t sec_damping,
                                              int32_t coeff_shift, uint8_t height,
                                              uint8_t subsampling_factor) {
-    int32_t i;
-    __m256i sum, p0, p1, p2, p3, row, res;
-    __m256i max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
-    int32_t po1  = eb_cdef_directions[dir][0];
-    int32_t po2  = eb_cdef_directions[dir][1];
-    int32_t s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int32_t s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int32_t s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int32_t s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    int32_t       i;
+    __m256i       sum, p0, p1, p2, p3, row, res;
+    __m256i       max, min, large = _mm256_set1_epi16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
     // SSE CHKN
     const int32_t *pri_taps         = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int32_t *sec_taps         = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -929,17 +1148,31 @@ void svt_cdef_filter_block_avx2(uint8_t
         }
     } else {
         if (bsize == BLOCK_8X8) {
-            svt_cdef_filter_block_8xn_16(in,
-                                         pri_strength,
-                                         sec_strength,
-                                         dir,
-                                         pri_damping,
-                                         sec_damping,
-                                         coeff_shift,
-                                         dst16,
-                                         dstride,
-                                         8,
-                                         subsampling_factor);
+            //When subsampling_factor is 4 then we cannot use AVX512 kernel because it load 4 lines(block height 16 in this case)
+            if (subsampling_factor == 4)
+                svt_cdef_filter_block_8xn_16_avx2(in,
+                                                  pri_strength,
+                                                  sec_strength,
+                                                  dir,
+                                                  pri_damping,
+                                                  sec_damping,
+                                                  coeff_shift,
+                                                  dst16,
+                                                  dstride,
+                                                  8,
+                                                  subsampling_factor);
+            else
+                svt_cdef_filter_block_8xn_16(in,
+                                             pri_strength,
+                                             sec_strength,
+                                             dir,
+                                             pri_damping,
+                                             sec_damping,
+                                             coeff_shift,
+                                             dst16,
+                                             dstride,
+                                             8,
+                                             subsampling_factor);
         } else if (bsize == BLOCK_4X8) {
             svt_cdef_filter_block_4xn_16_avx2(dst16,
                                               dstride,
@@ -981,3 +1214,38 @@ void svt_cdef_filter_block_avx2(uint8_t
         }
     }
 }
+
+void svt_aom_copy_rect8_8bit_to_16bit_avx2(uint16_t *dst, int32_t dstride, const uint8_t *src,
+                                           int32_t sstride, int32_t v, int32_t h) {
+    int i = 0, j = 0;
+    int remaining_width = h;
+
+    // Process multiple 16 pixels at a time.
+    if (h > 15) {
+        for (i = 0; i < v; i++) {
+            for (j = 0; j < h - 15; j += 16) {
+                __m128i row = _mm_loadu_si128((__m128i *)&src[i * sstride + j]);
+                _mm256_storeu_si256((__m256i *)&dst[i * dstride + j], _mm256_cvtepu8_epi16(row));
+            }
+        }
+        remaining_width = h & 0xe;
+    }
+
+    // Process multiple 8 pixels at a time.
+    if (remaining_width > 7) {
+        for (i = 0; i < v; i++) {
+            __m128i row = _mm_loadl_epi64((__m128i *)&src[i * sstride + j]);
+            _mm_storeu_si128((__m128i *)&dst[i * dstride + j],
+                             _mm_unpacklo_epi8(row, _mm_setzero_si128()));
+        }
+        remaining_width = h & 0x7;
+        j += 8;
+    }
+
+    // Process the remaining pixels.
+    if (remaining_width) {
+        for (i = 0; i < v; i++) {
+            for (int k = j; k < h; k++) { dst[i * dstride + k] = src[i * sstride + k]; }
+        }
+    }
+}
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/convolve_avx2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/convolve_avx2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/convolve_avx2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/convolve_avx2.c	2022-08-01 19:12:00.000000000 +0000
@@ -17,6 +17,7 @@
 #include "EbInterPrediction.h"
 #include "EbMemory_AVX2.h"
 #include "synonyms.h"
+#include "synonyms_avx2.h"
 
 static INLINE void sr_y_round_store_32_avx2(const __m256i res[2], uint8_t *const dst) {
     __m256i r[2];
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/convolve_avx2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/convolve_avx2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/convolve_avx2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/convolve_avx2.h	2022-08-01 19:12:00.000000000 +0000
@@ -18,7 +18,6 @@
 #include "EbMemory_AVX2.h"
 #include "EbMemory_SSE4_1.h"
 #include "synonyms.h"
-#include "synonyms_avx2.h"
 
 #define LEFT_SHIFT (2 * FILTER_BITS - 3 - COMPOUND_ROUND1_BITS)
 
@@ -67,16 +66,16 @@ void jnt_convolve_2d_ver_4tap_avx2(const
                                    const ConvolveParams *const conv_params, uint8_t *dst8,
                                    const int32_t dst8_stride);
 
-static INLINE EbBool is_convolve_2tap(const int16_t *const filter) {
+static INLINE Bool is_convolve_2tap(const int16_t *const filter) {
     return (const void *)filter == (const void *)bilinear_filters;
 }
 
-static INLINE EbBool is_convolve_4tap(const int16_t *const filter) {
+static INLINE Bool is_convolve_4tap(const int16_t *const filter) {
     return (const void *)filter == (const void *)sub_pel_filters_4 ||
         (const void *)filter == (const void *)sub_pel_filters_4smooth;
 }
 
-static INLINE EbBool is_convolve_6tap(const int16_t *const filter) {
+static INLINE Bool is_convolve_6tap(const int16_t *const filter) {
     return (const void *)filter == (const void *)sub_pel_filters_8 ||
         (const void *)filter == (const void *)sub_pel_filters_8smooth;
 }
@@ -686,7 +685,7 @@ static INLINE void xy_x_round_store_32_a
 
     r[0]             = xy_x_round_avx2(res[0]);
     r[1]             = xy_x_round_avx2(res[1]);
-    const __m256i d0 = _mm256_inserti128_si256(r[0], _mm256_extracti128_si256(r[1], 0), 1);
+    const __m256i d0 = _mm256_inserti128_si256(r[0], _mm256_castsi256_si128(r[1]), 1);
     const __m256i d1 = _mm256_inserti128_si256(r[1], _mm256_extracti128_si256(r[0], 1), 0);
     _mm256_storeu_si256((__m256i *)dst, d0);
     _mm256_storeu_si256((__m256i *)(dst + 16), d1);
@@ -910,7 +909,7 @@ static INLINE void sr_x_2tap_32_avg_avx2
 
 static INLINE void jnt_no_avg_store_16x2_avx2(const __m256i src0, const __m256i src1,
                                               ConvBufType *const dst, const ptrdiff_t stride) {
-    const __m256i d0 = _mm256_inserti128_si256(src0, _mm256_extracti128_si256(src1, 0), 1);
+    const __m256i d0 = _mm256_inserti128_si256(src0, _mm256_castsi256_si128(src1), 1);
     const __m256i d1 = _mm256_inserti128_si256(src1, _mm256_extracti128_si256(src0, 1), 0);
     _mm256_storeu_si256((__m256i *)dst, d0);
     _mm256_storeu_si256((__m256i *)(dst + stride), d1);
@@ -2118,9 +2117,8 @@ static INLINE __m256i jnt_copy_load_src_
 }
 
 static INLINE void jnt_copy_load_src_32_avx2(const uint8_t *const src, __m256i s_256[2]) {
-    const __m256i s8     = _mm256_loadu_si256((__m256i *)src);
-    const __m128i s8_lo  = _mm256_castsi256_si128(s8);
-    const __m128i s8_hi  = _mm256_extracti128_si256(s8, 1);
+    const __m128i s8_lo  = _mm_loadu_si128((__m128i *)src);
+    const __m128i s8_hi  = _mm_loadu_si128((__m128i *)(src + 16));
     const __m256i s16_lo = _mm256_cvtepu8_epi16(s8_lo);
     const __m256i s16_hi = _mm256_cvtepu8_epi16(s8_hi);
     s_256[0]             = _mm256_slli_epi16(s16_lo, LEFT_SHIFT);
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbHighbdIntraPrediction_AVX2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbHighbdIntraPrediction_AVX2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbHighbdIntraPrediction_AVX2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbHighbdIntraPrediction_AVX2.c	2022-08-01 19:12:00.000000000 +0000
@@ -21,7 +21,7 @@
 
 // Handle number of elements: up to 64.
 static INLINE __m128i dc_sum_large(const __m256i src) {
-    const __m128i s_lo = _mm256_extracti128_si256(src, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(src);
     const __m128i s_hi = _mm256_extracti128_si256(src, 1);
     __m128i       sum, sum_hi;
     sum    = _mm_add_epi16(s_lo, s_hi);
@@ -35,7 +35,7 @@ static INLINE __m128i dc_sum_large(const
 
 // Handle number of elements: 65 to 128.
 static INLINE __m128i dc_sum_larger(const __m256i src) {
-    const __m128i s_lo = _mm256_extracti128_si256(src, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(src);
     const __m128i s_hi = _mm256_extracti128_si256(src, 1);
     __m128i       sum, sum_hi;
     sum = _mm_add_epi16(s_lo, s_hi);
@@ -49,7 +49,7 @@ static INLINE __m128i dc_sum_larger(cons
 
 static INLINE __m128i dc_sum_16(const uint16_t *const src) {
     const __m256i s    = _mm256_loadu_si256((const __m256i *)src);
-    const __m128i s_lo = _mm256_extracti128_si256(s, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(s);
     const __m128i s_hi = _mm256_extracti128_si256(s, 1);
     const __m128i sum  = _mm_add_epi16(s_lo, s_hi);
     return dc_sum_8x16bit(sum);
@@ -76,7 +76,7 @@ static INLINE __m128i dc_sum_64(const ui
 static INLINE __m128i dc_sum_4_16(const uint16_t *const src_4, const uint16_t *const src_16) {
     const __m128i s_4         = _mm_loadl_epi64((const __m128i *)src_4);
     const __m256i s_16        = _mm256_loadu_si256((const __m256i *)src_16);
-    const __m128i s_lo        = _mm256_extracti128_si256(s_16, 0);
+    const __m128i s_lo        = _mm256_castsi256_si128(s_16);
     const __m128i s_hi        = _mm256_extracti128_si256(s_16, 1);
     const __m128i s_16_sum0   = _mm_add_epi16(s_lo, s_hi);
     const __m128i s_16_sum_hi = _mm_srli_si128(s_16_sum0, 8);
@@ -88,7 +88,7 @@ static INLINE __m128i dc_sum_4_16(const
 static INLINE __m128i dc_sum_8_16(const uint16_t *const src_8, const uint16_t *const src_16) {
     const __m128i s_8      = _mm_loadu_si128((const __m128i *)src_8);
     const __m256i s_16     = _mm256_loadu_si256((const __m256i *)src_16);
-    const __m128i s_lo     = _mm256_extracti128_si256(s_16, 0);
+    const __m128i s_lo     = _mm256_castsi256_si128(s_16);
     const __m128i s_hi     = _mm256_extracti128_si256(s_16, 1);
     const __m128i s_16_sum = _mm_add_epi16(s_lo, s_hi);
     const __m128i sum      = _mm_add_epi16(s_16_sum, s_8);
@@ -100,7 +100,7 @@ static INLINE __m128i dc_sum_8_32(const
     const __m256i s_32_0   = _mm256_loadu_si256((const __m256i *)(src_32 + 0x00));
     const __m256i s_32_1   = _mm256_loadu_si256((const __m256i *)(src_32 + 0x10));
     const __m256i s_32     = _mm256_add_epi16(s_32_0, s_32_1);
-    const __m128i s_lo     = _mm256_extracti128_si256(s_32, 0);
+    const __m128i s_lo     = _mm256_castsi256_si128(s_32);
     const __m128i s_hi     = _mm256_extracti128_si256(s_32, 1);
     const __m128i s_16_sum = _mm_add_epi16(s_lo, s_hi);
     const __m128i sum      = _mm_add_epi16(s_8, s_16_sum);
@@ -1444,7 +1444,7 @@ static INLINE void smooth_pred_8x2(const
                                    uint16_t **const dst, const ptrdiff_t stride) {
     // 00 01 02 03 04 05 06 07  10 11 12 13 14 15 16 17
     const __m256i d = smooth_pred_kernel(weights_w, weights_h, rep, ab, lr);
-    _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 0));
+    _mm_storeu_si128((__m128i *)*dst, _mm256_castsi256_si128(d));
     *dst += stride;
     _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 1));
     *dst += stride;
@@ -1958,7 +1958,7 @@ static INLINE void smooth_h_pred_8x2(con
     const __m256i t = _mm256_shuffle_epi8(*lr, rep); // 0 0 0 0  1 1 1 1
     // 00 01 02 03 04 05 06 07  10 11 12 13 14 15 16 17
     const __m256i d = smooth_h_pred_kernel(weights, t);
-    _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 0));
+    _mm_storeu_si128((__m128i *)*dst, _mm256_castsi256_si128(d));
     *dst += stride;
     _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 1));
     *dst += stride;
@@ -2348,7 +2348,7 @@ static INLINE void smooth_v_pred_8x2(con
                                      const ptrdiff_t stride) {
     // 00 01 02 03 04 05 06 07  10 11 12 13 14 15 16 17
     const __m256i d = smooth_v_pred_kernel(weights, rep, ab);
-    _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 0));
+    _mm_storeu_si128((__m128i *)*dst, _mm256_castsi256_si128(d));
     *dst += stride;
     _mm_storeu_si128((__m128i *)*dst, _mm256_extracti128_si256(d, 1));
     *dst += stride;
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbIntraPrediction_Intrinsic_AVX2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbIntraPrediction_Intrinsic_AVX2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbIntraPrediction_Intrinsic_AVX2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbIntraPrediction_Intrinsic_AVX2.c	2022-08-01 19:12:00.000000000 +0000
@@ -2117,7 +2117,7 @@ static void dr_prediction_z2_hxw_avx2(in
 
                     __m256i base_y_offset    = _mm256_sub_epi16(base_y_c256, min_y256);
                     __m128i base_y_offset128 = _mm_packs_epi16(
-                        _mm256_extracti128_si256(base_y_offset, 0),
+                        _mm256_castsi256_si128(base_y_offset),
                         _mm256_extracti128_si256(base_y_offset, 1));
 
                     __m128i a0_y128 = _mm_maskload_epi32((int *)(left + min_y),
@@ -4950,7 +4950,7 @@ void svt_aom_highbd_paeth_predictor_8x4_
         l16 = _mm256_setr_m128i(_mm_set1_epi16(left[i]), _mm_set1_epi16(left[i + 1]));
 
         row = paeth_pred(&l16, &t0, &tl);
-        _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 0));
+        _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(row));
         dst += stride;
         _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 1));
         dst += stride;
@@ -4970,7 +4970,7 @@ void svt_aom_highbd_paeth_predictor_8x8_
         l16 = _mm256_setr_m128i(_mm_set1_epi16(left[i]), _mm_set1_epi16(left[i + 1]));
 
         row = paeth_pred(&l16, &t0, &tl);
-        _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 0));
+        _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(row));
         dst += stride;
         _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 1));
         dst += stride;
@@ -4990,7 +4990,7 @@ void svt_aom_highbd_paeth_predictor_8x16
         l16 = _mm256_setr_m128i(_mm_set1_epi16(left[i]), _mm_set1_epi16(left[i + 1]));
 
         row = paeth_pred(&l16, &t0, &tl);
-        _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 0));
+        _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(row));
         dst += stride;
         _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 1));
         dst += stride;
@@ -5010,7 +5010,7 @@ void svt_aom_highbd_paeth_predictor_8x32
         l16 = _mm256_setr_m128i(_mm_set1_epi16(left[i]), _mm_set1_epi16(left[i + 1]));
 
         row = paeth_pred(&l16, &t0, &tl);
-        _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 0));
+        _mm_storeu_si128((__m128i *)dst, _mm256_castsi256_si128(row));
         dst += stride;
         _mm_storeu_si128((__m128i *)dst, _mm256_extractf128_si256(row, 1));
         dst += stride;
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbMemory_AVX2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbMemory_AVX2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbMemory_AVX2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbMemory_AVX2.h	2022-08-01 19:12:00.000000000 +0000
@@ -12,7 +12,9 @@
 #ifndef EbMemory_AVX2_h
 #define EbMemory_AVX2_h
 
-#include "synonyms.h"
+#include <immintrin.h>
+#include "EbDefinitions.h"
+#include "common_dsp_rtcd.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -31,6 +33,13 @@ extern "C" {
 #define _mm256_cvtsi256_si32(a) _mm_cvtsi128_si32(_mm256_castsi256_si128(a))
 #endif
 
+static INLINE __m256i load_u8_4x2_avx2(const uint8_t *const src, const ptrdiff_t stride) {
+    __m128i src01;
+    src01 = _mm_cvtsi32_si128(*(int32_t *)(src + 0 * stride));
+    src01 = _mm_insert_epi32(src01, *(int32_t *)(src + 1 * stride), 1);
+    return _mm256_setr_m128i(src01, _mm_setzero_si128());
+}
+
 static INLINE __m256i load_u8_4x4_avx2(const uint8_t *const src, const ptrdiff_t stride) {
     __m128i src01, src23;
     src01 = _mm_cvtsi32_si128(*(int32_t *)(src + 0 * stride));
@@ -57,12 +66,6 @@ static INLINE __m256i load_u8_8x4_avx2(c
     return _mm256_setr_m128i(src01, src23);
 }
 
-static INLINE __m256i load_u8_16x2_avx2(const uint8_t *const src, const ptrdiff_t stride) {
-    const __m128i src0 = _mm_loadu_si128((__m128i *)(src + 0 * stride));
-    const __m128i src1 = _mm_loadu_si128((__m128i *)(src + 1 * stride));
-    return _mm256_setr_m128i(src0, src1);
-}
-
 static INLINE __m256i loadu_8bit_16x2_avx2(const void *const src, const ptrdiff_t strideInByte) {
     const __m128i src0 = _mm_loadu_si128((__m128i *)src);
     const __m128i src1 = _mm_loadu_si128((__m128i *)((uint8_t *)src + strideInByte));
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPackUnPack_Intrinsic_AVX2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPackUnPack_Intrinsic_AVX2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPackUnPack_Intrinsic_AVX2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPackUnPack_Intrinsic_AVX2.c	2022-08-01 19:12:00.000000000 +0000
@@ -286,3 +286,378 @@ void svt_enc_un_pack8_bit_data_avx2_intr
         }
     }
 }
+
+void svt_enc_msb_un_pack2d_avx2_intrin(uint16_t *in16_bit_buffer, uint32_t in_stride,
+                                       uint8_t *out8_bit_buffer, uint8_t *outn_bit_buffer,
+                                       uint32_t out8_stride, uint32_t outn_stride, uint32_t width,
+                                       uint32_t height) {
+    uint32_t x, y;
+
+    __m128i in_pixel0, in_pixel1, temp_pixel0, temp_pixel1, in_pixel1_shft_r_2_u8,
+        in_pixel0_shft_r_2_u8, in_pixel0_shft_r_2, in_pixel1_shft_r_2, temp_pixel0_u8,
+        temp_pixel1_u8;
+
+    __m128i xmm_3    = _mm_set1_epi16(0x0003);
+    __m128i xmm_00ff = _mm_set1_epi16(0x00FF);
+    __m256i ymm_3    = _mm256_set1_epi16(0x0003);
+    __m256i ymm_00ff = _mm256_set1_epi16(0x00FF);
+
+    if (width == 4) {
+        for (y = 0; y < height; y += 2) {
+            in_pixel0 = _mm_loadl_epi64((__m128i *)in16_bit_buffer);
+            in_pixel1 = _mm_loadl_epi64((__m128i *)(in16_bit_buffer + in_stride));
+
+            if (outn_bit_buffer) {
+                temp_pixel0                  = _mm_slli_epi16(_mm_and_si128(in_pixel0, xmm_3), 6);
+                temp_pixel1                  = _mm_slli_epi16(_mm_and_si128(in_pixel1, xmm_3), 6);
+                temp_pixel0_u8               = _mm_packus_epi16(temp_pixel0, temp_pixel0);
+                temp_pixel1_u8               = _mm_packus_epi16(temp_pixel1, temp_pixel1);
+                *(uint32_t *)outn_bit_buffer = _mm_cvtsi128_si32(temp_pixel0_u8);
+                *(uint32_t *)(outn_bit_buffer + outn_stride) = _mm_cvtsi128_si32(temp_pixel1_u8);
+                outn_bit_buffer += 2 * outn_stride;
+            }
+
+            in_pixel0_shft_r_2           = _mm_and_si128(_mm_srli_epi16(in_pixel0, 2), xmm_00ff);
+            in_pixel1_shft_r_2           = _mm_and_si128(_mm_srli_epi16(in_pixel1, 2), xmm_00ff);
+            in_pixel0_shft_r_2_u8        = _mm_packus_epi16(in_pixel0_shft_r_2, in_pixel0_shft_r_2);
+            in_pixel1_shft_r_2_u8        = _mm_packus_epi16(in_pixel1_shft_r_2, in_pixel1_shft_r_2);
+            *(uint32_t *)out8_bit_buffer = _mm_cvtsi128_si32(in_pixel0_shft_r_2_u8);
+            *(uint32_t *)(out8_bit_buffer + out8_stride) = _mm_cvtsi128_si32(in_pixel1_shft_r_2_u8);
+
+            out8_bit_buffer += 2 * out8_stride;
+            in16_bit_buffer += 2 * in_stride;
+        }
+    } else if (width == 8) {
+        for (y = 0; y < height; y += 2) {
+            in_pixel0 = _mm_loadu_si128((__m128i *)in16_bit_buffer);
+            in_pixel1 = _mm_loadu_si128((__m128i *)(in16_bit_buffer + in_stride));
+
+            if (outn_bit_buffer) {
+                temp_pixel0    = _mm_slli_epi16(_mm_and_si128(in_pixel0, xmm_3), 6);
+                temp_pixel1    = _mm_slli_epi16(_mm_and_si128(in_pixel1, xmm_3), 6);
+                temp_pixel0_u8 = _mm_packus_epi16(temp_pixel0, temp_pixel0);
+                temp_pixel1_u8 = _mm_packus_epi16(temp_pixel1, temp_pixel1);
+                _mm_storel_epi64((__m128i *)outn_bit_buffer, temp_pixel0_u8);
+                _mm_storel_epi64((__m128i *)(outn_bit_buffer + outn_stride), temp_pixel1_u8);
+                outn_bit_buffer += 2 * outn_stride;
+            }
+
+            in_pixel0_shft_r_2    = _mm_and_si128(_mm_srli_epi16(in_pixel0, 2), xmm_00ff);
+            in_pixel1_shft_r_2    = _mm_and_si128(_mm_srli_epi16(in_pixel1, 2), xmm_00ff);
+            in_pixel0_shft_r_2_u8 = _mm_packus_epi16(in_pixel0_shft_r_2, in_pixel0_shft_r_2);
+            in_pixel1_shft_r_2_u8 = _mm_packus_epi16(in_pixel1_shft_r_2, in_pixel1_shft_r_2);
+            _mm_storel_epi64((__m128i *)out8_bit_buffer, in_pixel0_shft_r_2_u8);
+            _mm_storel_epi64((__m128i *)(out8_bit_buffer + out8_stride), in_pixel1_shft_r_2_u8);
+
+            out8_bit_buffer += 2 * out8_stride;
+            in16_bit_buffer += 2 * in_stride;
+        }
+    } else if (width == 16) {
+        __m256i in_pixel_0, in_pixel_1, in_pixel_0_shft_r_2_u8, temp_pixel_0_u8;
+
+        for (y = 0; y < height; y += 2) {
+            in_pixel_0 = _mm256_loadu_si256((__m256i *)in16_bit_buffer);
+            in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride));
+
+            if (outn_bit_buffer) {
+                temp_pixel_0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                                      _mm256_and_si256(in_pixel_1, ymm_3));
+                temp_pixel_0_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(temp_pixel_0_u8, 0xd8),
+                                                    6);
+                _mm_storeu_si128((__m128i *)outn_bit_buffer,
+                                 _mm256_castsi256_si128(temp_pixel_0_u8));
+                _mm_storeu_si128((__m128i *)(outn_bit_buffer + outn_stride),
+                                 _mm256_extracti128_si256(temp_pixel_0_u8, 1));
+                outn_bit_buffer += 2 * outn_stride;
+            }
+
+            in_pixel_0_shft_r_2_u8 = _mm256_packus_epi16(
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+            in_pixel_0_shft_r_2_u8 = _mm256_permute4x64_epi64(in_pixel_0_shft_r_2_u8, 0xd8);
+            _mm_storeu_si128((__m128i *)out8_bit_buffer,
+                             _mm256_castsi256_si128(in_pixel_0_shft_r_2_u8));
+            _mm_storeu_si128((__m128i *)(out8_bit_buffer + out8_stride),
+                             _mm256_extracti128_si256(in_pixel_0_shft_r_2_u8, 1));
+
+            out8_bit_buffer += 2 * out8_stride;
+            in16_bit_buffer += 2 * in_stride;
+        }
+    } else if (width == 32) {
+        __m256i in_pixel_0, in_pixel_1, in_pixel_2, in_pixel_3;
+        __m256i outn0_u8, outn1_u8;
+        __m256i out8_0_u8, out8_1_u8;
+
+        for (y = 0; y < height; y += 2) {
+            in_pixel_0 = _mm256_loadu_si256((__m256i *)in16_bit_buffer);
+            in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + 16));
+            in_pixel_2 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride));
+            in_pixel_3 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride + 16));
+
+            if (outn_bit_buffer) {
+                outn0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                               _mm256_and_si256(in_pixel_1, ymm_3));
+                outn1_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_2, ymm_3),
+                                               _mm256_and_si256(in_pixel_3, ymm_3));
+                outn0_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn0_u8, 0xd8), 6);
+                outn1_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn1_u8, 0xd8), 6);
+                _mm256_storeu_si256((__m256i *)outn_bit_buffer, outn0_u8);
+                _mm256_storeu_si256((__m256i *)(outn_bit_buffer + outn_stride), outn1_u8);
+                outn_bit_buffer += 2 * outn_stride;
+            }
+
+            out8_0_u8 = _mm256_packus_epi16(
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+            out8_1_u8 = _mm256_packus_epi16(
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_2, 2), ymm_00ff),
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_3, 2), ymm_00ff));
+            out8_0_u8 = _mm256_permute4x64_epi64(out8_0_u8, 0xd8);
+            out8_1_u8 = _mm256_permute4x64_epi64(out8_1_u8, 0xd8);
+            _mm256_storeu_si256((__m256i *)out8_bit_buffer, out8_0_u8);
+            _mm256_storeu_si256((__m256i *)(out8_bit_buffer + out8_stride), out8_1_u8);
+
+            out8_bit_buffer += 2 * out8_stride;
+            in16_bit_buffer += 2 * in_stride;
+        }
+    } else if (width == 64) {
+        __m256i in_pixel_0, in_pixel_1, in_pixel_2, in_pixel_3;
+        __m256i outn0_u8, outn1_u8;
+        __m256i out8_0_u8, out8_1_u8;
+
+        for (y = 0; y < height; ++y) {
+            in_pixel_0 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + y * in_stride));
+            in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + y * in_stride + 16));
+            in_pixel_2 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + y * in_stride + 32));
+            in_pixel_3 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + y * in_stride + 48));
+
+            if (outn_bit_buffer) {
+                outn0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                               _mm256_and_si256(in_pixel_1, ymm_3));
+                outn1_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_2, ymm_3),
+                                               _mm256_and_si256(in_pixel_3, ymm_3));
+                outn0_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn0_u8, 0xd8), 6);
+                outn1_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn1_u8, 0xd8), 6);
+                _mm256_storeu_si256((__m256i *)(outn_bit_buffer + y * outn_stride), outn0_u8);
+                _mm256_storeu_si256((__m256i *)(outn_bit_buffer + y * outn_stride + 32), outn1_u8);
+            }
+
+            out8_0_u8 = _mm256_packus_epi16(
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+            out8_1_u8 = _mm256_packus_epi16(
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_2, 2), ymm_00ff),
+                _mm256_and_si256(_mm256_srli_epi16(in_pixel_3, 2), ymm_00ff));
+            out8_0_u8 = _mm256_permute4x64_epi64(out8_0_u8, 0xd8);
+            out8_1_u8 = _mm256_permute4x64_epi64(out8_1_u8, 0xd8);
+            _mm256_storeu_si256((__m256i *)(out8_bit_buffer + y * out8_stride), out8_0_u8);
+            _mm256_storeu_si256((__m256i *)(out8_bit_buffer + y * out8_stride + 32), out8_1_u8);
+        }
+
+    } else {
+        uint32_t in_stride_diff    = (2 * in_stride) - width;
+        uint32_t out8_stride_diff  = (2 * out8_stride) - width;
+        uint32_t out_n_stride_diff = (2 * outn_stride) - width;
+
+        uint32_t in_stride_diff64    = in_stride - width;
+        uint32_t out8_stride_diff64  = out8_stride - width;
+        uint32_t out_n_stride_diff64 = outn_stride - width;
+
+        if (!(width & 63)) {
+            __m256i in_pixel_0, in_pixel_1, in_pixel_2, in_pixel_3;
+            __m256i outn0_u8, outn1_u8;
+            __m256i out8_0_u8, out8_1_u8;
+
+            for (x = 0; x < height; x += 1) {
+                for (y = 0; y < width; y += 64) {
+                    in_pixel_0 = _mm256_loadu_si256((__m256i *)in16_bit_buffer);
+                    in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + 16));
+                    in_pixel_2 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + 32));
+                    in_pixel_3 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + 48));
+
+                    if (outn_bit_buffer) {
+                        outn0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                                       _mm256_and_si256(in_pixel_1, ymm_3));
+                        outn1_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_2, ymm_3),
+                                                       _mm256_and_si256(in_pixel_3, ymm_3));
+                        outn0_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn0_u8, 0xd8), 6);
+                        outn1_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn1_u8, 0xd8), 6);
+                        _mm256_storeu_si256((__m256i *)outn_bit_buffer, outn0_u8);
+                        _mm256_storeu_si256((__m256i *)(outn_bit_buffer + 32), outn1_u8);
+                        outn_bit_buffer += 64;
+                    }
+
+                    out8_0_u8 = _mm256_packus_epi16(
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+                    out8_1_u8 = _mm256_packus_epi16(
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_2, 2), ymm_00ff),
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_3, 2), ymm_00ff));
+                    out8_0_u8 = _mm256_permute4x64_epi64(out8_0_u8, 0xd8);
+                    out8_1_u8 = _mm256_permute4x64_epi64(out8_1_u8, 0xd8);
+                    _mm256_storeu_si256((__m256i *)out8_bit_buffer, out8_0_u8);
+                    _mm256_storeu_si256((__m256i *)(out8_bit_buffer + 32), out8_1_u8);
+
+                    out8_bit_buffer += 64;
+                    in16_bit_buffer += 64;
+                }
+                in16_bit_buffer += in_stride_diff64;
+                if (outn_bit_buffer)
+                    outn_bit_buffer += out_n_stride_diff64;
+                out8_bit_buffer += out8_stride_diff64;
+            }
+        } else if (!(width & 31)) {
+            __m256i in_pixel_0, in_pixel_1, in_pixel_2, in_pixel_3;
+            __m256i outn0_u8, outn1_u8;
+            __m256i out8_0_u8, out8_1_u8;
+
+            for (x = 0; x < height; x += 2) {
+                for (y = 0; y < width; y += 32) {
+                    in_pixel_0 = _mm256_loadu_si256((__m256i *)in16_bit_buffer);
+                    in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + 16));
+                    in_pixel_2 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride));
+                    in_pixel_3 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride + 16));
+
+                    if (outn_bit_buffer) {
+                        outn0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                                       _mm256_and_si256(in_pixel_1, ymm_3));
+                        outn1_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_2, ymm_3),
+                                                       _mm256_and_si256(in_pixel_3, ymm_3));
+                        outn0_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn0_u8, 0xd8), 6);
+                        outn1_u8 = _mm256_slli_epi16(_mm256_permute4x64_epi64(outn1_u8, 0xd8), 6);
+                        _mm256_storeu_si256((__m256i *)outn_bit_buffer, outn0_u8);
+                        _mm256_storeu_si256((__m256i *)(outn_bit_buffer + outn_stride), outn1_u8);
+                        outn_bit_buffer += 32;
+                    }
+
+                    out8_0_u8 = _mm256_packus_epi16(
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+                    out8_1_u8 = _mm256_packus_epi16(
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_2, 2), ymm_00ff),
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_3, 2), ymm_00ff));
+                    out8_0_u8 = _mm256_permute4x64_epi64(out8_0_u8, 0xd8);
+                    out8_1_u8 = _mm256_permute4x64_epi64(out8_1_u8, 0xd8);
+                    _mm256_storeu_si256((__m256i *)out8_bit_buffer, out8_0_u8);
+                    _mm256_storeu_si256((__m256i *)(out8_bit_buffer + out8_stride), out8_1_u8);
+
+                    out8_bit_buffer += 32;
+                    in16_bit_buffer += 32;
+                }
+                in16_bit_buffer += in_stride_diff;
+                if (outn_bit_buffer)
+                    outn_bit_buffer += out_n_stride_diff;
+                out8_bit_buffer += out8_stride_diff;
+            }
+        } else if (!(width & 15)) {
+            __m256i in_pixel_0, in_pixel_1, in_pixel_0_shft_r_2_u8, temp_pixel_0_u8;
+
+            for (x = 0; x < height; x += 2) {
+                for (y = 0; y < width; y += 16) {
+                    in_pixel_0 = _mm256_loadu_si256((__m256i *)in16_bit_buffer);
+                    in_pixel_1 = _mm256_loadu_si256((__m256i *)(in16_bit_buffer + in_stride));
+
+                    if (outn_bit_buffer) {
+                        temp_pixel_0_u8 = _mm256_packus_epi16(_mm256_and_si256(in_pixel_0, ymm_3),
+                                                              _mm256_and_si256(in_pixel_1, ymm_3));
+                        temp_pixel_0_u8 = _mm256_slli_epi16(
+                            _mm256_permute4x64_epi64(temp_pixel_0_u8, 0xd8), 6);
+                        _mm_storeu_si128((__m128i *)outn_bit_buffer,
+                                         _mm256_castsi256_si128(temp_pixel_0_u8));
+                        _mm_storeu_si128((__m128i *)(outn_bit_buffer + outn_stride),
+                                         _mm256_extracti128_si256(temp_pixel_0_u8, 1));
+                        outn_bit_buffer += 16;
+                    }
+
+                    in_pixel_0_shft_r_2_u8 = _mm256_packus_epi16(
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_0, 2), ymm_00ff),
+                        _mm256_and_si256(_mm256_srli_epi16(in_pixel_1, 2), ymm_00ff));
+                    in_pixel_0_shft_r_2_u8 = _mm256_permute4x64_epi64(in_pixel_0_shft_r_2_u8, 0xd8);
+                    _mm_storeu_si128((__m128i *)out8_bit_buffer,
+                                     _mm256_castsi256_si128(in_pixel_0_shft_r_2_u8));
+                    _mm_storeu_si128((__m128i *)(out8_bit_buffer + out8_stride),
+                                     _mm256_extracti128_si256(in_pixel_0_shft_r_2_u8, 1));
+
+                    out8_bit_buffer += 16;
+                    in16_bit_buffer += 16;
+                }
+                in16_bit_buffer += in_stride_diff;
+                if (outn_bit_buffer)
+                    outn_bit_buffer += out_n_stride_diff;
+                out8_bit_buffer += out8_stride_diff;
+            }
+        } else if (!(width & 7)) {
+            for (x = 0; x < height; x += 2) {
+                for (y = 0; y < width; y += 8) {
+                    in_pixel0 = _mm_loadu_si128((__m128i *)in16_bit_buffer);
+                    in_pixel1 = _mm_loadu_si128((__m128i *)(in16_bit_buffer + in_stride));
+
+                    if (outn_bit_buffer) {
+                        temp_pixel0    = _mm_slli_epi16(_mm_and_si128(in_pixel0, xmm_3), 6);
+                        temp_pixel1    = _mm_slli_epi16(_mm_and_si128(in_pixel1, xmm_3), 6);
+                        temp_pixel0_u8 = _mm_packus_epi16(temp_pixel0, temp_pixel0);
+                        temp_pixel1_u8 = _mm_packus_epi16(temp_pixel1, temp_pixel1);
+                        _mm_storel_epi64((__m128i *)outn_bit_buffer, temp_pixel0_u8);
+                        _mm_storel_epi64((__m128i *)(outn_bit_buffer + outn_stride),
+                                         temp_pixel1_u8);
+                        outn_bit_buffer += 8;
+                    }
+
+                    in_pixel0_shft_r_2 = _mm_and_si128(_mm_srli_epi16(in_pixel0, 2), xmm_00ff);
+                    in_pixel1_shft_r_2 = _mm_and_si128(_mm_srli_epi16(in_pixel1, 2), xmm_00ff);
+
+                    in_pixel0_shft_r_2_u8 = _mm_packus_epi16(in_pixel0_shft_r_2,
+                                                             in_pixel0_shft_r_2);
+                    in_pixel1_shft_r_2_u8 = _mm_packus_epi16(in_pixel1_shft_r_2,
+                                                             in_pixel1_shft_r_2);
+                    _mm_storel_epi64((__m128i *)out8_bit_buffer, in_pixel0_shft_r_2_u8);
+                    _mm_storel_epi64((__m128i *)(out8_bit_buffer + out8_stride),
+                                     in_pixel1_shft_r_2_u8);
+
+                    out8_bit_buffer += 8;
+                    in16_bit_buffer += 8;
+                }
+                in16_bit_buffer += in_stride_diff;
+                if (outn_bit_buffer)
+                    outn_bit_buffer += out_n_stride_diff;
+                out8_bit_buffer += out8_stride_diff;
+            }
+        } else {
+            for (x = 0; x < height; x += 2) {
+                for (y = 0; y < width; y += 4) {
+                    in_pixel0 = _mm_loadl_epi64((__m128i *)in16_bit_buffer);
+                    in_pixel1 = _mm_loadl_epi64((__m128i *)(in16_bit_buffer + in_stride));
+
+                    if (outn_bit_buffer) {
+                        temp_pixel0    = _mm_slli_epi16(_mm_and_si128(in_pixel0, xmm_3), 6);
+                        temp_pixel1    = _mm_slli_epi16(_mm_and_si128(in_pixel1, xmm_3), 6);
+                        temp_pixel0_u8 = _mm_packus_epi16(temp_pixel0, temp_pixel0);
+                        temp_pixel1_u8 = _mm_packus_epi16(temp_pixel1, temp_pixel1);
+                        *(uint32_t *)outn_bit_buffer = _mm_cvtsi128_si32(temp_pixel0_u8);
+                        *(uint32_t *)(outn_bit_buffer +
+                                      outn_stride)   = _mm_cvtsi128_si32(temp_pixel1_u8);
+                        outn_bit_buffer += 4;
+                    }
+
+                    in_pixel0_shft_r_2    = _mm_and_si128(_mm_srli_epi16(in_pixel0, 2), xmm_00ff);
+                    in_pixel1_shft_r_2    = _mm_and_si128(_mm_srli_epi16(in_pixel1, 2), xmm_00ff);
+                    in_pixel0_shft_r_2_u8 = _mm_packus_epi16(in_pixel0_shft_r_2,
+                                                             in_pixel0_shft_r_2);
+                    in_pixel1_shft_r_2_u8 = _mm_packus_epi16(in_pixel1_shft_r_2,
+                                                             in_pixel1_shft_r_2);
+                    *(uint32_t *)out8_bit_buffer = _mm_cvtsi128_si32(in_pixel0_shft_r_2_u8);
+                    *(uint32_t *)(out8_bit_buffer +
+                                  out8_stride)   = _mm_cvtsi128_si32(in_pixel1_shft_r_2_u8);
+
+                    out8_bit_buffer += 4;
+                    in16_bit_buffer += 4;
+                }
+                in16_bit_buffer += in_stride_diff;
+                if (outn_bit_buffer)
+                    outn_bit_buffer += out_n_stride_diff;
+                out8_bit_buffer += out8_stride_diff;
+            }
+        }
+    }
+    return;
+}
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Inline_AVX2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Inline_AVX2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Inline_AVX2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Inline_AVX2.h	2022-08-01 19:12:00.000000000 +0000
@@ -16,6 +16,7 @@
 #include "EbDefinitions.h"
 #include "EbMemory_AVX2.h"
 #include "EbPictureOperators_SSE2.h"
+#include "synonyms.h"
 
 #ifdef __cplusplus
 extern "C" {
@@ -188,7 +189,7 @@ static INLINE void spatial_full_distorti
 }
 
 static INLINE int32_t hadd32_avx2_intrin(const __m256i src) {
-    const __m128i src_l = _mm256_extracti128_si256(src, 0);
+    const __m128i src_l = _mm256_castsi256_si128(src);
     const __m128i src_h = _mm256_extracti128_si256(src, 1);
     const __m128i sum   = _mm_add_epi32(src_l, src_h);
 
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Intrinsic_AVX2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Intrinsic_AVX2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Intrinsic_AVX2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/EbPictureOperators_Intrinsic_AVX2.c	2022-08-01 19:12:00.000000000 +0000
@@ -390,12 +390,11 @@ void svt_c_pack_avx2_intrin(const uint8_
                 if ((y & 3) == 3) {
                     __m256i c0 = _mm256_loadu_si256((__m256i *)(local_ptr));
                     __m256i c1 = _mm256_loadu_si256((__m256i *)(local_ptr + 32));
-                    _mm_storeu_si128((__m128i *)(in_compn_bit_buffer),
-                                     _mm256_extractf128_si256(c0, 0));
+                    _mm_storeu_si128((__m128i *)(in_compn_bit_buffer), _mm256_castsi256_si128(c0));
                     _mm_storeu_si128((__m128i *)(in_compn_bit_buffer + out_stride),
                                      _mm256_extractf128_si256(c0, 1));
                     _mm_storeu_si128((__m128i *)(in_compn_bit_buffer + 2 * out_stride),
-                                     _mm256_extractf128_si256(c1, 0));
+                                     _mm256_castsi256_si128(c1));
                     _mm_storeu_si128((__m128i *)(in_compn_bit_buffer + 3 * out_stride),
                                      _mm256_extractf128_si256(c1, 1));
                     in_compn_bit_buffer += 4 * out_stride;
@@ -513,6 +512,40 @@ void svt_enc_msb_pack2d_avx2_intrin_al(u
             inn_bit_buffer += inn_stride << 1;
             out16_bit_buffer += out_stride << 1;
         }
+    } else if (width == 24) {
+        __m128i in_n_bit, in_8_bit, in_n_bit_stride, in_8bit_stride, out2, out3, out4, out5;
+
+        for (y = 0; y < height; y += 2) {
+            in_n_bit        = _mm_loadu_si128((__m128i *)inn_bit_buffer);
+            in_8_bit        = _mm_loadu_si128((__m128i *)in8_bit_buffer);
+            in_n_bit_stride = _mm_loadu_si128((__m128i *)(inn_bit_buffer + inn_stride));
+            in_8bit_stride  = _mm_loadu_si128((__m128i *)(in8_bit_buffer + in8_stride));
+
+            out0 = _mm_srli_epi16(_mm_unpacklo_epi8(in_n_bit, in_8_bit), 6);
+            out1 = _mm_srli_epi16(_mm_unpackhi_epi8(in_n_bit, in_8_bit), 6);
+            out2 = _mm_srli_epi16(_mm_unpacklo_epi8(in_n_bit_stride, in_8bit_stride), 6);
+            out3 = _mm_srli_epi16(_mm_unpackhi_epi8(in_n_bit_stride, in_8bit_stride), 6);
+
+            out4 = _mm_srli_epi16(
+                _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(inn_bit_buffer + 16)),
+                                  _mm_loadl_epi64((__m128i *)(in8_bit_buffer + 16))),
+                6);
+            out5 = _mm_srli_epi16(
+                _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i *)(inn_bit_buffer + inn_stride + 16)),
+                                  _mm_loadl_epi64((__m128i *)(in8_bit_buffer + in8_stride + 16))),
+                6);
+
+            _mm_storeu_si128((__m128i *)out16_bit_buffer, out0);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 8), out1);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride), out2);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 8), out3);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 16), out4);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 16), out5);
+
+            in8_bit_buffer += in8_stride << 1;
+            inn_bit_buffer += inn_stride << 1;
+            out16_bit_buffer += out_stride << 1;
+        }
     } else if (width == 32) {
         __m256i in_n_bit, in_8_bit, in_n_bit_stride, in_8bit_stride, concat0, concat1, concat2,
             concat3;
@@ -542,8 +575,54 @@ void svt_enc_msb_pack2d_avx2_intrin_al(u
             _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 16), out_s16_s31);
 
             in8_bit_buffer += in8_stride << 1;
-            //inn_bit_buffer += inn_stride << 1;
-            inn_bit_buffer += inn_stride * 2;
+            inn_bit_buffer += inn_stride << 1;
+            out16_bit_buffer += out_stride << 1;
+        }
+    } else if (width == 48) {
+        __m128i xx_in_n_bit, xx_in_8_bit, xx_in_n_bit_stride, xx_in_8bit_stride, out2, out3;
+        __m256i in_n_bit, in_8_bit, in_n_bit_stride, in_8bit_stride, concat0, concat1, concat2,
+            concat3;
+        __m256i out0_15, out16_31, out_s0_s15, out_s16_s31;
+
+        for (y = 0; y < height; y += 2) {
+            in_n_bit           = _mm256_loadu_si256((__m256i *)inn_bit_buffer);
+            in_8_bit           = _mm256_loadu_si256((__m256i *)in8_bit_buffer);
+            in_n_bit_stride    = _mm256_loadu_si256((__m256i *)(inn_bit_buffer + inn_stride));
+            in_8bit_stride     = _mm256_loadu_si256((__m256i *)(in8_bit_buffer + in8_stride));
+            xx_in_n_bit        = _mm_loadu_si128((__m128i *)(inn_bit_buffer + 32));
+            xx_in_8_bit        = _mm_loadu_si128((__m128i *)(in8_bit_buffer + 32));
+            xx_in_n_bit_stride = _mm_loadu_si128((__m128i *)(inn_bit_buffer + inn_stride + 32));
+            xx_in_8bit_stride  = _mm_loadu_si128((__m128i *)(in8_bit_buffer + in8_stride + 32));
+
+            //(out_pixel | n_bit_pixel) concatenation is done with unpacklo_epi8 and unpackhi_epi8
+            concat0 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bit, in_8_bit), 6);
+            concat1 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bit, in_8_bit), 6);
+            concat2 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bit_stride, in_8bit_stride), 6);
+            concat3 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bit_stride, in_8bit_stride), 6);
+
+            out0 = _mm_srli_epi16(_mm_unpacklo_epi8(xx_in_n_bit, xx_in_8_bit), 6);
+            out1 = _mm_srli_epi16(_mm_unpackhi_epi8(xx_in_n_bit, xx_in_8_bit), 6);
+            out2 = _mm_srli_epi16(_mm_unpacklo_epi8(xx_in_n_bit_stride, xx_in_8bit_stride), 6);
+            out3 = _mm_srli_epi16(_mm_unpackhi_epi8(xx_in_n_bit_stride, xx_in_8bit_stride), 6);
+
+            //Re-organize the packing for writing to the out buffer
+            out0_15     = _mm256_inserti128_si256(concat0, _mm256_castsi256_si128(concat1), 1);
+            out16_31    = _mm256_inserti128_si256(concat1, _mm256_extracti128_si256(concat0, 1), 0);
+            out_s0_s15  = _mm256_inserti128_si256(concat2, _mm256_castsi256_si128(concat3), 1);
+            out_s16_s31 = _mm256_inserti128_si256(concat3, _mm256_extracti128_si256(concat2, 1), 0);
+
+            _mm256_storeu_si256((__m256i *)out16_bit_buffer, out0_15);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + 16), out16_31);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride), out_s0_s15);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 16), out_s16_s31);
+
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 32), out0);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 40), out1);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 32), out2);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 40), out3);
+
+            in8_bit_buffer += in8_stride << 1;
+            inn_bit_buffer += inn_stride << 1;
             out16_bit_buffer += out_stride << 1;
         }
     } else if (width == 64) {
@@ -595,8 +674,74 @@ void svt_enc_msb_pack2d_avx2_intrin_al(u
             _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 48), out_s48_s63);
 
             in8_bit_buffer += in8_stride << 1;
-            //inn_bit_buffer += inn_stride << 1;
-            inn_bit_buffer += inn_stride * 2;
+            inn_bit_buffer += inn_stride << 1;
+            out16_bit_buffer += out_stride << 1;
+        }
+    } else if (width == 80) {
+        __m128i xx_in_n_bit, xx_in_8_bit, xx_in_n_bit_stride, xx_in_8bit_stride, out2, out3;
+        __m256i in_n_bit, in_8_bit, in_n_bit_stride, in_8bit_stride, in_n_bit32, in_8_bit32,
+            in_n_bitStride32, in_8bit_stride32;
+        __m256i concat0, concat1, concat2, concat3, concat4, concat5, concat6, concat7;
+        __m256i out_0_15, out16_31, out32_47, out_48_63, out_s0_s15, out_s16_s31, out_s32_s47,
+            out_s48_s63;
+
+        for (y = 0; y < height; y += 2) {
+            in_n_bit           = _mm256_loadu_si256((__m256i *)inn_bit_buffer);
+            in_8_bit           = _mm256_loadu_si256((__m256i *)in8_bit_buffer);
+            in_n_bit32         = _mm256_loadu_si256((__m256i *)(inn_bit_buffer + 32));
+            in_8_bit32         = _mm256_loadu_si256((__m256i *)(in8_bit_buffer + 32));
+            in_n_bit_stride    = _mm256_loadu_si256((__m256i *)(inn_bit_buffer + inn_stride));
+            in_8bit_stride     = _mm256_loadu_si256((__m256i *)(in8_bit_buffer + in8_stride));
+            in_n_bitStride32   = _mm256_loadu_si256((__m256i *)(inn_bit_buffer + inn_stride + 32));
+            in_8bit_stride32   = _mm256_loadu_si256((__m256i *)(in8_bit_buffer + in8_stride + 32));
+            xx_in_n_bit        = _mm_loadu_si128((__m128i *)(inn_bit_buffer + 64));
+            xx_in_8_bit        = _mm_loadu_si128((__m128i *)(in8_bit_buffer + 64));
+            xx_in_n_bit_stride = _mm_loadu_si128((__m128i *)(inn_bit_buffer + inn_stride + 64));
+            xx_in_8bit_stride  = _mm_loadu_si128((__m128i *)(in8_bit_buffer + in8_stride + 64));
+            //(out_pixel | n_bit_pixel) concatenation is done with unpacklo_epi8 and unpackhi_epi8
+            concat0 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bit, in_8_bit), 6);
+            concat1 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bit, in_8_bit), 6);
+            concat2 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bit32, in_8_bit32), 6);
+            concat3 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bit32, in_8_bit32), 6);
+            concat4 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bit_stride, in_8bit_stride), 6);
+            concat5 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bit_stride, in_8bit_stride), 6);
+            concat6 = _mm256_srli_epi16(_mm256_unpacklo_epi8(in_n_bitStride32, in_8bit_stride32),
+                                        6);
+            concat7 = _mm256_srli_epi16(_mm256_unpackhi_epi8(in_n_bitStride32, in_8bit_stride32),
+                                        6);
+
+            out0 = _mm_srli_epi16(_mm_unpacklo_epi8(xx_in_n_bit, xx_in_8_bit), 6);
+            out1 = _mm_srli_epi16(_mm_unpackhi_epi8(xx_in_n_bit, xx_in_8_bit), 6);
+            out2 = _mm_srli_epi16(_mm_unpacklo_epi8(xx_in_n_bit_stride, xx_in_8bit_stride), 6);
+            out3 = _mm_srli_epi16(_mm_unpackhi_epi8(xx_in_n_bit_stride, xx_in_8bit_stride), 6);
+
+            //Re-organize the packing for writing to the out buffer
+            out_0_15    = _mm256_inserti128_si256(concat0, _mm256_castsi256_si128(concat1), 1);
+            out16_31    = _mm256_inserti128_si256(concat1, _mm256_extracti128_si256(concat0, 1), 0);
+            out32_47    = _mm256_inserti128_si256(concat2, _mm256_castsi256_si128(concat3), 1);
+            out_48_63   = _mm256_inserti128_si256(concat3, _mm256_extracti128_si256(concat2, 1), 0);
+            out_s0_s15  = _mm256_inserti128_si256(concat4, _mm256_castsi256_si128(concat5), 1);
+            out_s16_s31 = _mm256_inserti128_si256(concat5, _mm256_extracti128_si256(concat4, 1), 0);
+            out_s32_s47 = _mm256_inserti128_si256(concat6, _mm256_castsi256_si128(concat7), 1);
+            out_s48_s63 = _mm256_inserti128_si256(concat7, _mm256_extracti128_si256(concat6, 1), 0);
+
+            _mm256_storeu_si256((__m256i *)out16_bit_buffer, out_0_15);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + 16), out16_31);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + 32), out32_47);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + 48), out_48_63);
+
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride), out_s0_s15);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 16), out_s16_s31);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 32), out_s32_s47);
+            _mm256_storeu_si256((__m256i *)(out16_bit_buffer + out_stride + 48), out_s48_s63);
+
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 64), out0);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + 72), out1);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 64), out2);
+            _mm_storeu_si128((__m128i *)(out16_bit_buffer + out_stride + 72), out3);
+
+            in8_bit_buffer += in8_stride << 1;
+            inn_bit_buffer += inn_stride << 1;
             out16_bit_buffer += out_stride << 1;
         }
     } else {
@@ -1093,7 +1238,7 @@ void svt_unpack_avg_avx2_intrin(uint16_t
 
 void svt_unpack_avg_safe_sub_avx2_intrin(uint16_t *ref16_l0, uint32_t ref_l0_stride,
                                          uint16_t *ref16_l1, uint32_t ref_l1_stride,
-                                         uint8_t *dst_ptr, uint32_t dst_stride, EbBool sub_pred,
+                                         uint8_t *dst_ptr, uint32_t dst_stride, Bool sub_pred,
                                          uint32_t width, uint32_t height) {
     uint32_t y;
     __m128i  in_pixel0, in_pixel1;
@@ -2201,19 +2346,159 @@ static INLINE void svt_unpack_and_2bcomp
     }
 }
 
+static INLINE void transpose(__m256i out[4], __m256i in[4]) {
+    const __m256i shufle_transpose_128 = _mm256_setr_epi8(0,
+                                                          4,
+                                                          8,
+                                                          12,
+                                                          1,
+                                                          5,
+                                                          9,
+                                                          13,
+                                                          2,
+                                                          6,
+                                                          10,
+                                                          14,
+                                                          3,
+                                                          7,
+                                                          11,
+                                                          15,
+                                                          0,
+                                                          4,
+                                                          8,
+                                                          12,
+                                                          1,
+                                                          5,
+                                                          9,
+                                                          13,
+                                                          2,
+                                                          6,
+                                                          10,
+                                                          14,
+                                                          3,
+                                                          7,
+                                                          11,
+                                                          15);
+    //in[0] = 00 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F  10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F
+    //in[1] = 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F  30 31 32 33 34 35 36 37  38 39 3A 3B 3C 3D 3E 3F
+    //in[2] = 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F  50 51 52 53 54 55 56 57  58 59 5A 5B 5C 5D 5E 5F
+    //in[3] = 60 61 62 63 64 65 66 67  68 69 6A 6B 6C 6D 6E 6F  70 71 72 73 74 75 76 77  78 79 7A 7B 7C 7D 7E 7F
+
+    __m256i A = _mm256_shuffle_epi8(in[0], shufle_transpose_128);
+    __m256i B = _mm256_shuffle_epi8(in[1], shufle_transpose_128);
+    __m256i C = _mm256_shuffle_epi8(in[2], shufle_transpose_128);
+    __m256i D = _mm256_shuffle_epi8(in[3], shufle_transpose_128);
+
+    A = _mm256_permute4x64_epi64(A, 0xd8); //ACBD:ABCD
+    B = _mm256_permute4x64_epi64(B, 0xd8); //ACBD:ABCD
+    C = _mm256_permute4x64_epi64(C, 0xd8); //ACBD:ABCD
+    D = _mm256_permute4x64_epi64(D, 0xd8); //ACBD:ABCD
+
+    A = _mm256_shuffle_epi32(A, 0xd8); //ACBDEGFH:ABCDEFGH
+    B = _mm256_shuffle_epi32(B, 0xd8); //ACBDEGFH:ABCDEFGH
+    C = _mm256_shuffle_epi32(C, 0xd8); //ACBDEGFH:ABCDEFGH
+    D = _mm256_shuffle_epi32(D, 0xd8); //ACBDEGFH:ABCDEFGH
+
+    __m256i t0 = _mm256_unpacklo_epi64(A, B);
+    __m256i t1 = _mm256_unpackhi_epi64(A, B);
+    __m256i t2 = _mm256_unpacklo_epi64(C, D);
+    __m256i t3 = _mm256_unpackhi_epi64(C, D);
+
+    //out[0] = 00 04 08 0C 10 14 18 1C  20 24 28 2C 30 34 38 3C  40 44 48 4C 50 54 58 5C  60 64 68 6C 70 74 78 7C
+    //out[1] = 01 05 09 0D 11 15 19 1D  21 25 29 2D 31 35 39 3D  41 45 49 4D 51 55 59 5D  61 65 69 6D 71 75 79 7D
+    //out[2] = 02 06 0A 0E 12 16 1A 1E  22 26 2A 2E 32 36 3A 3E  42 46 4A 4E 52 56 5A 5E  62 66 6A 6E 72 76 7A 7E
+    //out[3] = 03 07 0B 0F 13 17 1B 1F  23 27 2B 2F 33 37 3B 3F  43 47 4B 4F 53 57 5B 5F  63 67 6B 6F 73 77 7B 7F
+    out[0] = _mm256_permute2x128_si256(t0, t2, 0x20); //[A0/2:B0/2]
+    out[1] = _mm256_permute2x128_si256(t1, t3, 0x20); //[A0/2:B0/2]
+    out[2] = _mm256_permute2x128_si256(t0, t2, 0x31); //[A1/2:B1/2]
+    out[3] = _mm256_permute2x128_si256(t1, t3, 0x31); //[A1/2:B1/2]
+}
+
+static INLINE void unpack_and_2bcompress_32x4(uint16_t *in16b_buffer, uint8_t *out8b_buffer,
+                                              uint8_t *out2b_buffer, uint32_t in16_stride,
+                                              uint32_t out8_stride, uint32_t out2_stride) {
+    __m256i ymm_00ff = _mm256_set1_epi16(0x00FF);
+    __m256i msk_2b   = _mm256_set1_epi16(0x0003); //0000.0000.0000.0011
+    __m256i in0, in1;
+    __m256i in_buff[4];
+    __m256i tmp0, tmp1;
+    __m128i out0, out1;
+
+    for (int i = 0; i < 4; i++) {
+        //load 16b input
+        in0 = _mm256_loadu_si256((__m256i *)(in16b_buffer + i * in16_stride));
+        in1 = _mm256_loadu_si256((__m256i *)(in16b_buffer + i * in16_stride + 16));
+        //extract 8 most significant bits
+        tmp0 = _mm256_and_si256(_mm256_srli_epi16(in0, 2), ymm_00ff);
+        tmp1 = _mm256_and_si256(_mm256_srli_epi16(in1, 2), ymm_00ff);
+        //convert 16bit values to 8bit
+        out0 = _mm_packus_epi16(_mm256_castsi256_si128(tmp0), _mm256_extracti128_si256(tmp0, 1));
+        out1 = _mm_packus_epi16(_mm256_castsi256_si128(tmp1), _mm256_extracti128_si256(tmp1, 1));
+        //store 8bit buffer
+        _mm_storeu_si128((__m128i *)(out8b_buffer + i * out8_stride), out0);
+        _mm_storeu_si128((__m128i *)(out8b_buffer + i * out8_stride + 16), out1);
+
+        //extract 2 least significant bits
+        in0 = _mm256_and_si256(in0, msk_2b);
+        in1 = _mm256_and_si256(in1, msk_2b);
+
+        in_buff[i] = _mm256_permute4x64_epi64(_mm256_packs_epi16(in0, in1), 0xd8);
+    }
+
+    transpose(in_buff, in_buff);
+
+    in_buff[0] = _mm256_slli_epi16(in_buff[0], 6);
+    in_buff[1] = _mm256_slli_epi16(in_buff[1], 4);
+    in_buff[2] = _mm256_slli_epi16(in_buff[2], 2);
+
+    tmp0 = _mm256_or_si256(_mm256_or_si256(in_buff[0], in_buff[1]),
+                           _mm256_or_si256(in_buff[2], in_buff[3]));
+
+    _mm_storel_epi64((__m128i *)(out2b_buffer), _mm256_castsi256_si128(tmp0));
+    _mm_storeh_epi64((__m128i *)(out2b_buffer + out2_stride), _mm256_castsi256_si128(tmp0));
+    _mm_storel_epi64((__m128i *)(out2b_buffer + 2 * out2_stride),
+                     _mm256_extracti128_si256(tmp0, 1));
+    _mm_storeh_epi64((__m128i *)(out2b_buffer + 3 * out2_stride),
+                     _mm256_extracti128_si256(tmp0, 1));
+}
+
 void svt_unpack_and_2bcompress_avx2(uint16_t *in16b_buffer, uint32_t in16b_stride,
                                     uint8_t *out8b_buffer, uint32_t out8b_stride,
                                     uint8_t *out2b_buffer, uint32_t out2b_stride, uint32_t width,
                                     uint32_t height) {
+    uint32_t leftover_h4 = height & 3;
+    uint32_t h           = 0;
     if (width == 32) {
-        for (uint32_t h = 0; h < height; h++) {
+        for (; h < height - leftover_h4; h += 4) {
+            unpack_and_2bcompress_32x4(in16b_buffer + h * in16b_stride,
+                                       out8b_buffer + h * out8b_stride,
+                                       out2b_buffer + h * out2b_stride,
+                                       in16b_stride,
+                                       out8b_stride,
+                                       out2b_stride);
+        }
+        for (; h < height; h++) {
             unpack_and_2bcompress_32(in16b_buffer + h * in16b_stride,
                                      out8b_buffer + h * out8b_stride,
                                      out2b_buffer + h * out2b_stride,
                                      1);
         }
     } else if (width == 64) {
-        for (uint32_t h = 0; h < height; h++) {
+        for (; h < height - leftover_h4; h += 4) {
+            unpack_and_2bcompress_32x4(in16b_buffer + h * in16b_stride,
+                                       out8b_buffer + h * out8b_stride,
+                                       out2b_buffer + h * out2b_stride,
+                                       in16b_stride,
+                                       out8b_stride,
+                                       out2b_stride);
+            unpack_and_2bcompress_32x4(in16b_buffer + h * in16b_stride + 32,
+                                       out8b_buffer + h * out8b_stride + 32,
+                                       out2b_buffer + h * out2b_stride + 8,
+                                       in16b_stride,
+                                       out8b_stride,
+                                       out2b_stride);
+        }
+        for (; h < height; h++) {
             unpack_and_2bcompress_32(in16b_buffer + h * in16b_stride,
                                      out8b_buffer + h * out8b_stride,
                                      out2b_buffer + h * out2b_stride,
@@ -2223,7 +2508,24 @@ void svt_unpack_and_2bcompress_avx2(uint
         uint32_t offset_rem   = width & 0xffffffe0;
         uint32_t offset2b_rem = offset_rem >> 2;
         uint32_t remainder    = width & 0x1f;
-        for (uint32_t h = 0; h < height; h++) {
+        for (; h < height - leftover_h4; h += 4) {
+            for (uint32_t w = 0; w < (width >> 5); w++)
+                unpack_and_2bcompress_32x4(in16b_buffer + h * in16b_stride + w * 32,
+                                           out8b_buffer + h * out8b_stride + w * 32,
+                                           out2b_buffer + h * out2b_stride + w * 8,
+                                           in16b_stride,
+                                           out8b_stride,
+                                           out2b_stride);
+            if (remainder) {
+                for (uint32_t hh = 0; hh < 4; hh++)
+                    svt_unpack_and_2bcompress_remainder(
+                        in16b_buffer + (h + hh) * in16b_stride + offset_rem,
+                        out8b_buffer + (h + hh) * out8b_stride + offset_rem,
+                        out2b_buffer + (h + hh) * out2b_stride + offset2b_rem,
+                        remainder);
+            }
+        }
+        for (; h < height; h++) {
             unpack_and_2bcompress_32(in16b_buffer + h * in16b_stride,
                                      out8b_buffer + h * out8b_stride,
                                      out2b_buffer + h * out2b_stride,
@@ -2236,3 +2538,207 @@ void svt_unpack_and_2bcompress_avx2(uint
         }
     }
 }
+
+static INLINE void store_tran_low(__m256i a, int32_t *b) {
+    const __m256i one  = _mm256_set1_epi16(1);
+    const __m256i a_hi = _mm256_mulhi_epi16(a, one);
+    const __m256i a_lo = _mm256_mullo_epi16(a, one);
+    const __m256i a_1  = _mm256_unpacklo_epi16(a_lo, a_hi);
+    const __m256i a_2  = _mm256_unpackhi_epi16(a_lo, a_hi);
+    _mm256_storeu_si256((__m256i *)b, a_1);
+    _mm256_storeu_si256((__m256i *)(b + 8), a_2);
+}
+
+static void hadamard_col8x2_avx2(__m256i *in, int iter) {
+    __m256i a0 = in[0];
+    __m256i a1 = in[1];
+    __m256i a2 = in[2];
+    __m256i a3 = in[3];
+    __m256i a4 = in[4];
+    __m256i a5 = in[5];
+    __m256i a6 = in[6];
+    __m256i a7 = in[7];
+
+    __m256i b0 = _mm256_add_epi16(a0, a1);
+    __m256i b1 = _mm256_sub_epi16(a0, a1);
+    __m256i b2 = _mm256_add_epi16(a2, a3);
+    __m256i b3 = _mm256_sub_epi16(a2, a3);
+    __m256i b4 = _mm256_add_epi16(a4, a5);
+    __m256i b5 = _mm256_sub_epi16(a4, a5);
+    __m256i b6 = _mm256_add_epi16(a6, a7);
+    __m256i b7 = _mm256_sub_epi16(a6, a7);
+
+    a0 = _mm256_add_epi16(b0, b2);
+    a1 = _mm256_add_epi16(b1, b3);
+    a2 = _mm256_sub_epi16(b0, b2);
+    a3 = _mm256_sub_epi16(b1, b3);
+    a4 = _mm256_add_epi16(b4, b6);
+    a5 = _mm256_add_epi16(b5, b7);
+    a6 = _mm256_sub_epi16(b4, b6);
+    a7 = _mm256_sub_epi16(b5, b7);
+
+    if (iter == 0) {
+        b0 = _mm256_add_epi16(a0, a4);
+        b7 = _mm256_add_epi16(a1, a5);
+        b3 = _mm256_add_epi16(a2, a6);
+        b4 = _mm256_add_epi16(a3, a7);
+        b2 = _mm256_sub_epi16(a0, a4);
+        b6 = _mm256_sub_epi16(a1, a5);
+        b1 = _mm256_sub_epi16(a2, a6);
+        b5 = _mm256_sub_epi16(a3, a7);
+
+        a0 = _mm256_unpacklo_epi16(b0, b1);
+        a1 = _mm256_unpacklo_epi16(b2, b3);
+        a2 = _mm256_unpackhi_epi16(b0, b1);
+        a3 = _mm256_unpackhi_epi16(b2, b3);
+        a4 = _mm256_unpacklo_epi16(b4, b5);
+        a5 = _mm256_unpacklo_epi16(b6, b7);
+        a6 = _mm256_unpackhi_epi16(b4, b5);
+        a7 = _mm256_unpackhi_epi16(b6, b7);
+
+        b0 = _mm256_unpacklo_epi32(a0, a1);
+        b1 = _mm256_unpacklo_epi32(a4, a5);
+        b2 = _mm256_unpackhi_epi32(a0, a1);
+        b3 = _mm256_unpackhi_epi32(a4, a5);
+        b4 = _mm256_unpacklo_epi32(a2, a3);
+        b5 = _mm256_unpacklo_epi32(a6, a7);
+        b6 = _mm256_unpackhi_epi32(a2, a3);
+        b7 = _mm256_unpackhi_epi32(a6, a7);
+
+        in[0] = _mm256_unpacklo_epi64(b0, b1);
+        in[1] = _mm256_unpackhi_epi64(b0, b1);
+        in[2] = _mm256_unpacklo_epi64(b2, b3);
+        in[3] = _mm256_unpackhi_epi64(b2, b3);
+        in[4] = _mm256_unpacklo_epi64(b4, b5);
+        in[5] = _mm256_unpackhi_epi64(b4, b5);
+        in[6] = _mm256_unpacklo_epi64(b6, b7);
+        in[7] = _mm256_unpackhi_epi64(b6, b7);
+    } else {
+        in[0] = _mm256_add_epi16(a0, a4);
+        in[7] = _mm256_add_epi16(a1, a5);
+        in[3] = _mm256_add_epi16(a2, a6);
+        in[4] = _mm256_add_epi16(a3, a7);
+        in[2] = _mm256_sub_epi16(a0, a4);
+        in[6] = _mm256_sub_epi16(a1, a5);
+        in[1] = _mm256_sub_epi16(a2, a6);
+        in[5] = _mm256_sub_epi16(a3, a7);
+    }
+}
+
+static void hadamard_8x8x2_avx2(const int16_t *src_diff, ptrdiff_t src_stride, int16_t *coeff) {
+    __m256i src[8];
+    src[0] = _mm256_loadu_si256((const __m256i *)src_diff);
+    src[1] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[2] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[3] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[4] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[5] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[6] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+    src[7] = _mm256_loadu_si256((const __m256i *)(src_diff += src_stride));
+
+    hadamard_col8x2_avx2(src, 0);
+    hadamard_col8x2_avx2(src, 1);
+
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[0], src[1], 0x20));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[2], src[3], 0x20));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[4], src[5], 0x20));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[6], src[7], 0x20));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[0], src[1], 0x31));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[2], src[3], 0x31));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[4], src[5], 0x31));
+    coeff += 16;
+    _mm256_storeu_si256((__m256i *)coeff, _mm256_permute2x128_si256(src[6], src[7], 0x31));
+}
+
+static INLINE void hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride,
+                                       int32_t *coeff, int is_final) {
+    DECLARE_ALIGNED(32, int16_t, temp_coeff[16 * 16]);
+    int16_t *t_coeff = temp_coeff;
+    int16_t *coeff16 = (int16_t *)coeff;
+    int      idx;
+    for (idx = 0; idx < 2; ++idx) {
+        const int16_t *src_ptr = src_diff + idx * 8 * src_stride;
+        hadamard_8x8x2_avx2(src_ptr, src_stride, t_coeff + (idx * 64 * 2));
+    }
+
+    for (idx = 0; idx < 64; idx += 16) {
+        const __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff);
+        const __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 64));
+        const __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 128));
+        const __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 192));
+
+        __m256i b0 = _mm256_add_epi16(coeff0, coeff1);
+        __m256i b1 = _mm256_sub_epi16(coeff0, coeff1);
+        __m256i b2 = _mm256_add_epi16(coeff2, coeff3);
+        __m256i b3 = _mm256_sub_epi16(coeff2, coeff3);
+
+        b0 = _mm256_srai_epi16(b0, 1);
+        b1 = _mm256_srai_epi16(b1, 1);
+        b2 = _mm256_srai_epi16(b2, 1);
+        b3 = _mm256_srai_epi16(b3, 1);
+        if (is_final) {
+            store_tran_low(_mm256_add_epi16(b0, b2), coeff);
+            store_tran_low(_mm256_add_epi16(b1, b3), coeff + 64);
+            store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 128);
+            store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 192);
+            coeff += 16;
+        } else {
+            _mm256_storeu_si256((__m256i *)coeff16, _mm256_add_epi16(b0, b2));
+            _mm256_storeu_si256((__m256i *)(coeff16 + 64), _mm256_add_epi16(b1, b3));
+            _mm256_storeu_si256((__m256i *)(coeff16 + 128), _mm256_sub_epi16(b0, b2));
+            _mm256_storeu_si256((__m256i *)(coeff16 + 192), _mm256_sub_epi16(b1, b3));
+            coeff16 += 16;
+        }
+        t_coeff += 16;
+    }
+}
+
+void svt_aom_hadamard_16x16_avx2(const int16_t *src_diff, ptrdiff_t src_stride, int32_t *coeff) {
+    hadamard_16x16_avx2(src_diff, src_stride, coeff, 1);
+}
+
+void svt_aom_hadamard_32x32_avx2(const int16_t *src_diff, ptrdiff_t src_stride, int32_t *coeff) {
+    // For high bitdepths, it is unnecessary to store_tran_low
+    // (mult/unpack/store), then load_tran_low (load/pack) the same memory in the
+    // next stage.  Output to an intermediate buffer first, then store_tran_low()
+    // in the final stage.
+    DECLARE_ALIGNED(32, int16_t, temp_coeff[32 * 32]);
+    int16_t *t_coeff = temp_coeff;
+    int      idx;
+    for (idx = 0; idx < 4; ++idx) {
+        // src_diff: 9 bit, dynamic range [-255, 255]
+        const int16_t *src_ptr = src_diff + (idx >> 1) * 16 * src_stride + (idx & 0x01) * 16;
+        hadamard_16x16_avx2(src_ptr, src_stride, (int32_t *)(t_coeff + idx * 256), 0);
+    }
+
+    for (idx = 0; idx < 256; idx += 16) {
+        const __m256i coeff0 = _mm256_loadu_si256((const __m256i *)t_coeff);
+        const __m256i coeff1 = _mm256_loadu_si256((const __m256i *)(t_coeff + 256));
+        const __m256i coeff2 = _mm256_loadu_si256((const __m256i *)(t_coeff + 512));
+        const __m256i coeff3 = _mm256_loadu_si256((const __m256i *)(t_coeff + 768));
+
+        __m256i b0 = _mm256_add_epi16(coeff0, coeff1);
+        __m256i b1 = _mm256_sub_epi16(coeff0, coeff1);
+        __m256i b2 = _mm256_add_epi16(coeff2, coeff3);
+        __m256i b3 = _mm256_sub_epi16(coeff2, coeff3);
+
+        b0 = _mm256_srai_epi16(b0, 2);
+        b1 = _mm256_srai_epi16(b1, 2);
+        b2 = _mm256_srai_epi16(b2, 2);
+        b3 = _mm256_srai_epi16(b3, 2);
+
+        store_tran_low(_mm256_add_epi16(b0, b2), coeff);
+        store_tran_low(_mm256_add_epi16(b1, b3), coeff + 256);
+        store_tran_low(_mm256_sub_epi16(b0, b2), coeff + 512);
+        store_tran_low(_mm256_sub_epi16(b1, b3), coeff + 768);
+
+        coeff += 16;
+        t_coeff += 16;
+    }
+}
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/highbd_warp_affine_avx2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/highbd_warp_affine_avx2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/highbd_warp_affine_avx2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/highbd_warp_affine_avx2.c	2022-08-01 19:12:00.000000000 +0000
@@ -169,10 +169,8 @@ void dec_svt_av1_highbd_warp_affine_avx2
                         __m256i v_ref = _mm256_permute4x64_epi64(v_refl, 0xEE);
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
-                        v_refl         = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(
@@ -235,10 +233,8 @@ void dec_svt_av1_highbd_warp_affine_avx2
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(v_c01,
@@ -338,10 +334,8 @@ void dec_svt_av1_highbd_warp_affine_avx2
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(
@@ -461,10 +455,8 @@ void dec_svt_av1_highbd_warp_affine_avx2
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(v_c01,
@@ -602,11 +594,11 @@ void dec_svt_av1_highbd_warp_affine_avx2
                         __m256i v_sum16 = _mm256_packus_epi32(v_sum1, v_sum1);
                         v_sum16         = _mm256_permute4x64_epi64(v_sum16, 0xD8);
                         v_sum16         = _mm256_min_epi16(v_sum16, clip_pixel);
-                        _mm_storeu_si128(dst16, _mm256_extracti128_si256(v_sum16, 0));
+                        _mm_storeu_si128(dst16, _mm256_castsi256_si128(v_sum16));
                     } else {
                         v_sum           = _mm256_packus_epi32(v_sum, v_sum);
                         __m256i v_sum16 = _mm256_permute4x64_epi64(v_sum, 0xD8);
-                        _mm_storeu_si128(p, _mm256_extracti128_si256(v_sum16, 0));
+                        _mm_storeu_si128(p, _mm256_castsi256_si128(v_sum16));
                     }
                 } else {
                     // Round and pack into 8 bits
@@ -625,7 +617,7 @@ void dec_svt_av1_highbd_warp_affine_avx2
 
                     __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
 
-                    _mm_storeu_si128(p, _mm256_extracti128_si256(v_sum16, 0));
+                    _mm_storeu_si128(p, _mm256_castsi256_si128(v_sum16));
                 }
             }
         }
@@ -773,10 +765,8 @@ void svt_av1_highbd_warp_affine_avx2(con
                         __m256i v_ref  = _mm256_permute4x64_epi64(v_refl, 0xEE);
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
-                        v_refl         = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(
@@ -833,10 +823,8 @@ void svt_av1_highbd_warp_affine_avx2(con
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(v_c01,
@@ -930,10 +918,8 @@ void svt_av1_highbd_warp_affine_avx2(con
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(
@@ -1046,10 +1032,8 @@ void svt_av1_highbd_warp_affine_avx2(con
 
                         __m256i v_refu = _mm256_alignr_epi8(v_ref, v_refl, 2); // R8R15R14...R2R1
 
-                        v_refl = _mm256_inserti128_si256(
-                            v_refl, _mm256_extracti128_si256(v_refu, 0), 1);
-                        v_refu = _mm256_inserti128_si256(
-                            v_refu, _mm256_extracti128_si256(v_ref, 0), 0);
+                        v_refl = _mm256_inserti128_si256(v_refl, _mm256_castsi256_si128(v_refu), 1);
+                        v_refu = _mm256_inserti128_si256(v_refu, _mm256_castsi256_si128(v_ref), 0);
 
                         __m256i v_sum  = _mm256_set1_epi32(ohoriz);
                         __m256i parsum = _mm256_madd_epi16(v_c01,
@@ -1187,11 +1171,11 @@ void svt_av1_highbd_warp_affine_avx2(con
                         __m256i v_sum16 = _mm256_packus_epi32(v_sum1, v_sum1);
                         v_sum16         = _mm256_permute4x64_epi64(v_sum16, 0xD8);
                         v_sum16         = _mm256_min_epi16(v_sum16, clip_pixel);
-                        _mm_storeu_si128(dst16, _mm256_extracti128_si256(v_sum16, 0));
+                        _mm_storeu_si128(dst16, _mm256_castsi256_si128(v_sum16));
                     } else {
                         v_sum           = _mm256_packus_epi32(v_sum, v_sum);
                         __m256i v_sum16 = _mm256_permute4x64_epi64(v_sum, 0xD8);
-                        _mm_storeu_si128(p, _mm256_extracti128_si256(v_sum16, 0));
+                        _mm_storeu_si128(p, _mm256_castsi256_si128(v_sum16));
                     }
                 } else {
                     // Round and pack into 8 bits
@@ -1210,7 +1194,7 @@ void svt_av1_highbd_warp_affine_avx2(con
 
                     __m128i *const p = (__m128i *)&pred[(i + k + 4) * p_stride + j];
 
-                    _mm_storeu_si128(p, _mm256_extracti128_si256(v_sum16, 0));
+                    _mm_storeu_si128(p, _mm256_castsi256_si128(v_sum16));
                 }
             }
         }
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/synonyms_avx2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/synonyms_avx2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/synonyms_avx2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/synonyms_avx2.h	2022-08-01 19:12:00.000000000 +0000
@@ -36,14 +36,6 @@ static INLINE __m256i yy_loadu_256(const
     return _mm256_loadu_si256((const __m256i *)a);
 }
 
-static INLINE void yy_store_256(void *const a, const __m256i v) {
-#ifdef EB_TEST_SIMD_ALIGN
-    if ((intptr_t)a % 32)
-        SVT_LOG("\n yy_store_256() NOT 32-byte aligned!!!\n");
-#endif
-    _mm256_storeu_si256((__m256i *)a, v);
-}
-
 static INLINE void yy_storeu_256(void *const a, const __m256i v) {
     _mm256_storeu_si256((__m256i *)a, v);
 }
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/transpose_avx2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/transpose_avx2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/transpose_avx2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/transpose_avx2.h	2022-08-01 19:12:00.000000000 +0000
@@ -17,7 +17,7 @@
 #include "EbDefinitions.h"
 
 static INLINE __m256i _mm256_unpacklo_epi128(const __m256i in0, const __m256i in1) {
-    return _mm256_inserti128_si256(in0, _mm256_extracti128_si256(in1, 0), 1);
+    return _mm256_inserti128_si256(in0, _mm256_castsi256_si128(in1), 1);
 }
 
 static INLINE __m256i _mm256_unpackhi_epi128(const __m256i in0, const __m256i in1) {
@@ -110,8 +110,8 @@ static INLINE void transpose_64bit_4x4_a
     // out[1]: 01 11 21 31
     // out[2]: 02 12 22 32
     // out[3]: 03 13 23 33
-    out[0] = _mm256_inserti128_si256(a0, _mm256_extracti128_si256(a1, 0), 1);
-    out[1] = _mm256_inserti128_si256(a2, _mm256_extracti128_si256(a3, 0), 1);
+    out[0] = _mm256_inserti128_si256(a0, _mm256_castsi256_si128(a1), 1);
+    out[1] = _mm256_inserti128_si256(a2, _mm256_castsi256_si128(a3), 1);
     out[2] = _mm256_inserti128_si256(a1, _mm256_extracti128_si256(a0, 1), 0);
     out[3] = _mm256_inserti128_si256(a3, _mm256_extracti128_si256(a2, 1), 0);
 }
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/txfm_common_avx2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/txfm_common_avx2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/txfm_common_avx2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/txfm_common_avx2.h	2022-08-01 19:12:00.000000000 +0000
@@ -62,13 +62,6 @@ static INLINE void btf_16_adds_subs_avx2
     *in1               = _mm256_subs_epi16(_in0, _in1);
 }
 
-static INLINE void btf_32_add_sub_avx2(__m256i *in0, __m256i *in1) {
-    const __m256i _in0 = *in0;
-    const __m256i _in1 = *in1;
-    *in0               = _mm256_add_epi32(_in0, _in1);
-    *in1               = _mm256_sub_epi32(_in0, _in1);
-}
-
 static INLINE void btf_16_adds_subs_out_avx2(__m256i *out0, __m256i *out1, __m256i in0,
                                              __m256i in1) {
     const __m256i _in0 = in0;
@@ -77,29 +70,6 @@ static INLINE void btf_16_adds_subs_out_
     *out1              = _mm256_subs_epi16(_in0, _in1);
 }
 
-static INLINE void btf_32_add_sub_out_avx2(__m256i *out0, __m256i *out1, __m256i in0, __m256i in1) {
-    const __m256i _in0 = in0;
-    const __m256i _in1 = in1;
-    *out0              = _mm256_add_epi32(_in0, _in1);
-    *out1              = _mm256_sub_epi32(_in0, _in1);
-}
-
-static INLINE __m256i load_16bit_to_16bit_avx2(const int16_t *a) {
-    return _mm256_loadu_si256((const __m256i *)a);
-}
-
-static INLINE void load_buffer_16bit_to_16bit_avx2(const int16_t *in, int stride, __m256i *out,
-                                                   int out_size) {
-    for (int i = 0; i < out_size; ++i) { out[i] = load_16bit_to_16bit_avx2(in + i * stride); }
-}
-
-static INLINE void load_buffer_16bit_to_16bit_flip_avx2(const int16_t *in, int stride, __m256i *out,
-                                                        int out_size) {
-    for (int i = 0; i < out_size; ++i) {
-        out[out_size - i - 1] = load_16bit_to_16bit_avx2(in + i * stride);
-    }
-}
-
 static INLINE __m256i load_32bit_to_16bit_w16_avx2(const int32_t *a) {
     const __m256i a_low  = _mm256_lddqu_si256((const __m256i *)a);
     const __m256i a_high = _mm256_lddqu_si256((const __m256i *)(a + 8));
@@ -165,35 +135,6 @@ static INLINE void transpose_16bit_16x16
     out[7 + 8] = _mm256_permute2x128_si256(c[12 + 2], c[13 + 2], 0x31);
 }
 
-static INLINE void transpose_16bit_16x8_avx2(const __m256i *const in, __m256i *const out) {
-    const __m256i a0 = _mm256_unpacklo_epi16(in[0], in[1]);
-    const __m256i a1 = _mm256_unpacklo_epi16(in[2], in[3]);
-    const __m256i a2 = _mm256_unpacklo_epi16(in[4], in[5]);
-    const __m256i a3 = _mm256_unpacklo_epi16(in[6], in[7]);
-    const __m256i a4 = _mm256_unpackhi_epi16(in[0], in[1]);
-    const __m256i a5 = _mm256_unpackhi_epi16(in[2], in[3]);
-    const __m256i a6 = _mm256_unpackhi_epi16(in[4], in[5]);
-    const __m256i a7 = _mm256_unpackhi_epi16(in[6], in[7]);
-
-    const __m256i b0 = _mm256_unpacklo_epi32(a0, a1);
-    const __m256i b1 = _mm256_unpacklo_epi32(a2, a3);
-    const __m256i b2 = _mm256_unpacklo_epi32(a4, a5);
-    const __m256i b3 = _mm256_unpacklo_epi32(a6, a7);
-    const __m256i b4 = _mm256_unpackhi_epi32(a0, a1);
-    const __m256i b5 = _mm256_unpackhi_epi32(a2, a3);
-    const __m256i b6 = _mm256_unpackhi_epi32(a4, a5);
-    const __m256i b7 = _mm256_unpackhi_epi32(a6, a7);
-
-    out[0] = _mm256_unpacklo_epi64(b0, b1);
-    out[1] = _mm256_unpackhi_epi64(b0, b1);
-    out[2] = _mm256_unpacklo_epi64(b4, b5);
-    out[3] = _mm256_unpackhi_epi64(b4, b5);
-    out[4] = _mm256_unpacklo_epi64(b2, b3);
-    out[5] = _mm256_unpackhi_epi64(b2, b3);
-    out[6] = _mm256_unpacklo_epi64(b6, b7);
-    out[7] = _mm256_unpackhi_epi64(b6, b7);
-}
-
 static INLINE void flip_buf_avx2(__m256i *in, __m256i *out, int size) {
     for (int i = 0; i < size; ++i) { out[size - i - 1] = in[i]; }
 }
@@ -211,13 +152,6 @@ static INLINE void round_shift_16bit_w16
     }
 }
 
-static INLINE __m256i av1_round_shift_32_avx2(__m256i vec, int bit) {
-    __m256i tmp, round;
-    round = _mm256_set1_epi32(1 << (bit - 1));
-    tmp   = _mm256_add_epi32(vec, round);
-    return _mm256_srai_epi32(tmp, bit);
-}
-
 static INLINE void av1_round_shift_array_32_avx2(__m256i *input, __m256i *output,
                                                  const int32_t size, const int32_t bit) {
     int32_t i;
@@ -260,62 +194,6 @@ static INLINE void av1_round_shift_rect_
     }
 }
 
-static INLINE __m256i scale_round_avx2(const __m256i a, const int scale) {
-    const __m256i scale_rounding = pair_set_w16_epi16(scale, 1 << (new_sqrt2_bits - 1));
-    const __m256i b              = _mm256_madd_epi16(a, scale_rounding);
-    return _mm256_srai_epi32(b, new_sqrt2_bits);
-}
-
-static INLINE void store_rect_16bit_to_32bit_w8_avx2(const __m256i a, int32_t *const b) {
-    const __m256i one  = _mm256_set1_epi16(1);
-    const __m256i a_lo = _mm256_unpacklo_epi16(a, one);
-    const __m256i a_hi = _mm256_unpackhi_epi16(a, one);
-    const __m256i b_lo = scale_round_avx2(a_lo, new_sqrt2);
-    const __m256i b_hi = scale_round_avx2(a_hi, new_sqrt2);
-    const __m256i temp = _mm256_permute2f128_si256(b_lo, b_hi, 0x31);
-    _mm_storeu_si128((__m128i *)b, _mm256_castsi256_si128(b_lo));
-    _mm_storeu_si128((__m128i *)(b + 4), _mm256_castsi256_si128(b_hi));
-    _mm256_storeu_si256((__m256i *)(b + 64), temp);
-}
-
-static INLINE void store_rect_buffer_16bit_to_32bit_w8_avx2(const __m256i *const in,
-                                                            int32_t *const out, const int stride,
-                                                            const int out_size) {
-    for (int i = 0; i < out_size; ++i) {
-        store_rect_16bit_to_32bit_w8_avx2(in[i], out + i * stride);
-    }
-}
-
-static INLINE void pack_reg(const __m128i *in1, const __m128i *in2, __m256i *out) {
-    out[0] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[0]), in2[0], 0x1);
-    out[1] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[1]), in2[1], 0x1);
-    out[2] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[2]), in2[2], 0x1);
-    out[3] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[3]), in2[3], 0x1);
-    out[4] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[4]), in2[4], 0x1);
-    out[5] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[5]), in2[5], 0x1);
-    out[6] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[6]), in2[6], 0x1);
-    out[7] = _mm256_insertf128_si256(_mm256_castsi128_si256(in1[7]), in2[7], 0x1);
-}
-
-static INLINE void extract_reg(const __m256i *in, __m128i *out1) {
-    out1[0] = _mm256_castsi256_si128(in[0]);
-    out1[1] = _mm256_castsi256_si128(in[1]);
-    out1[2] = _mm256_castsi256_si128(in[2]);
-    out1[3] = _mm256_castsi256_si128(in[3]);
-    out1[4] = _mm256_castsi256_si128(in[4]);
-    out1[5] = _mm256_castsi256_si128(in[5]);
-    out1[6] = _mm256_castsi256_si128(in[6]);
-    out1[7] = _mm256_castsi256_si128(in[7]);
-
-    out1[8]  = _mm256_extracti128_si256(in[0], 0x01);
-    out1[9]  = _mm256_extracti128_si256(in[1], 0x01);
-    out1[10] = _mm256_extracti128_si256(in[2], 0x01);
-    out1[11] = _mm256_extracti128_si256(in[3], 0x01);
-    out1[12] = _mm256_extracti128_si256(in[4], 0x01);
-    out1[13] = _mm256_extracti128_si256(in[5], 0x01);
-    out1[14] = _mm256_extracti128_si256(in[6], 0x01);
-    out1[15] = _mm256_extracti128_si256(in[7], 0x01);
-}
 #ifdef __cplusplus
 }
 #endif
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.c	2022-08-01 19:12:00.000000000 +0000
@@ -934,7 +934,7 @@ void svt_av1_highbd_wiener_convolve_add_
                     yy_storeu_256(dst16 + i * dst_stride + j, res_16bit_clamped);
                 } else {
                     _mm_storeu_si128((__m128i*)(dst16 + i * dst_stride + j),
-                                     _mm256_extracti128_si256(res_16bit_clamped, 0));
+                                     _mm256_castsi256_si128(res_16bit_clamped));
                 }
             }
         }
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX2/wiener_convolve_avx2.h	2022-08-01 19:12:00.000000000 +0000
@@ -17,8 +17,6 @@
 #include "convolve.h"
 #include "convolve_avx2.h"
 #include "EbDefinitions.h"
-#include "synonyms.h"
-#include "synonyms_avx2.h"
 
 DECLARE_ALIGNED(64, static const uint8_t, filt_center_tap7_global_avx[64]) = {
     3, 255, 4,  255, 5, 255, 6, 255, 7,  255, 8, 255, 9, 255, 10, 255, 3, 255, 4,  255, 5, 255,
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/cdef_block_avx512.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/cdef_block_avx512.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/cdef_block_avx512.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/cdef_block_avx512.c	2022-08-01 19:12:00.000000000 +0000
@@ -42,12 +42,12 @@ static INLINE void cdef_filter_block_8xn
                                                        const __m512i pri_taps, __m512i *const max,
                                                        __m512i *const min, __m512i *const sum,
                                                        uint8_t subsampling_factor) {
-    const __m512i mask = _mm512_set1_epi16(0x3FFF);
-    const __m512i p0   = loadu_u16_8x4_avx512(in + po, subsampling_factor * CDEF_BSTRIDE);
-    const __m512i p1   = loadu_u16_8x4_avx512(in - po, subsampling_factor * CDEF_BSTRIDE);
+    const __m512i large = _mm512_set1_epi16(CDEF_VERY_LARGE);
+    const __m512i p0    = loadu_u16_8x4_avx512(in + po, subsampling_factor * CDEF_BSTRIDE);
+    const __m512i p1    = loadu_u16_8x4_avx512(in - po, subsampling_factor * CDEF_BSTRIDE);
 
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p0, mask));
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p1, mask));
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p0, large), p0, *max);
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p1, large), p1, *max);
     *min = _mm512_min_epi16(*min, p0);
     *min = _mm512_min_epi16(*min, p1);
 
@@ -62,16 +62,16 @@ static INLINE void cdef_filter_block_8xn
     const uint16_t *const in, const __m128i damping, const int32_t so1, const int32_t so2,
     const __m512i row, const __m512i strength, const __m512i sec_taps, __m512i *const max,
     __m512i *const min, __m512i *const sum, uint8_t subsampling_factor) {
-    const __m512i mask = _mm512_set1_epi16(0x3FFF);
-    const __m512i p0   = loadu_u16_8x4_avx512(in + so1, subsampling_factor * CDEF_BSTRIDE);
-    const __m512i p1   = loadu_u16_8x4_avx512(in - so1, subsampling_factor * CDEF_BSTRIDE);
-    const __m512i p2   = loadu_u16_8x4_avx512(in + so2, subsampling_factor * CDEF_BSTRIDE);
-    const __m512i p3   = loadu_u16_8x4_avx512(in - so2, subsampling_factor * CDEF_BSTRIDE);
-
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p0, mask));
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p1, mask));
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p2, mask));
-    *max = _mm512_max_epi16(*max, _mm512_and_si512(p3, mask));
+    const __m512i large = _mm512_set1_epi16(CDEF_VERY_LARGE);
+    const __m512i p0    = loadu_u16_8x4_avx512(in + so1, subsampling_factor * CDEF_BSTRIDE);
+    const __m512i p1    = loadu_u16_8x4_avx512(in - so1, subsampling_factor * CDEF_BSTRIDE);
+    const __m512i p2    = loadu_u16_8x4_avx512(in + so2, subsampling_factor * CDEF_BSTRIDE);
+    const __m512i p3    = loadu_u16_8x4_avx512(in - so2, subsampling_factor * CDEF_BSTRIDE);
+
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p0, large), p0, *max);
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p1, large), p1, *max);
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p2, large), p2, *max);
+    *max = _mm512_mask_max_epi16(*max, _mm512_cmpneq_epi16_mask(p3, large), p3, *max);
     *min = _mm512_min_epi16(*min, p0);
     *min = _mm512_min_epi16(*min, p1);
     *min = _mm512_min_epi16(*min, p2);
@@ -99,10 +99,10 @@ void svt_cdef_filter_block_8xn_16_avx512
                                          uint8_t subsampling_factor) {
     const int32_t  po1              = eb_cdef_directions[dir][0];
     const int32_t  po2              = eb_cdef_directions[dir][1];
-    const int32_t  s1o1             = eb_cdef_directions[(dir + 2) & 7][0];
-    const int32_t  s1o2             = eb_cdef_directions[(dir + 2) & 7][1];
-    const int32_t  s2o1             = eb_cdef_directions[(dir + 6) & 7][0];
-    const int32_t  s2o2             = eb_cdef_directions[(dir + 6) & 7][1];
+    const int32_t  s1o1             = eb_cdef_directions[(dir + 2)][0];
+    const int32_t  s1o2             = eb_cdef_directions[(dir + 2)][1];
+    const int32_t  s2o1             = eb_cdef_directions[(dir - 2)][0];
+    const int32_t  s2o2             = eb_cdef_directions[(dir - 2)][1];
     const int32_t *pri_taps         = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int32_t *sec_taps         = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
     const __m512i  pri_taps_0       = _mm512_set1_epi16(pri_taps[0]);
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/convolve_avx512.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/convolve_avx512.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/convolve_avx512.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/convolve_avx512.h	2022-08-01 19:12:00.000000000 +0000
@@ -17,8 +17,6 @@
 #include "EbInterPrediction.h"
 #include "EbMemory_AVX2.h"
 #include "EbMemory_SSE4_1.h"
-#include "synonyms.h"
-#include "synonyms_avx2.h"
 #include "synonyms_avx512.h"
 
 #if EN_AVX512_SUPPORT
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/EbHighbdIntraPrediction_AVX512.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/EbHighbdIntraPrediction_AVX512.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/EbHighbdIntraPrediction_AVX512.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/EbHighbdIntraPrediction_AVX512.c	2022-08-01 19:12:00.000000000 +0000
@@ -198,7 +198,7 @@ static const uint16_t sm_weights_64[128]
 
 // Handle number of elements: up to 64.
 static INLINE __m128i dc_sum_large(const __m256i src) {
-    const __m128i s_lo = _mm256_extracti128_si256(src, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(src);
     const __m128i s_hi = _mm256_extracti128_si256(src, 1);
     __m128i       sum, sum_hi;
     sum    = _mm_add_epi16(s_lo, s_hi);
@@ -241,7 +241,7 @@ static INLINE void dc_common_predictor_6
 
 static INLINE __m128i dc_sum_16(const uint16_t *const src) {
     const __m256i s    = _mm256_loadu_si256((const __m256i *)src);
-    const __m128i s_lo = _mm256_extracti128_si256(s, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(s);
     const __m128i s_hi = _mm256_extracti128_si256(s, 1);
     const __m128i sum  = _mm_add_epi16(s_lo, s_hi);
     return dc_sum_8x16bit(sum);
@@ -460,7 +460,7 @@ static INLINE __m128i dc_sum_8_32(const
     const __m256i s_32_0   = _mm512_castsi512_si256(s32_01);
     const __m256i s_32_1   = _mm512_extracti64x4_epi64(s32_01, 1);
     const __m256i s_32     = _mm256_add_epi16(s_32_0, s_32_1);
-    const __m128i s_lo     = _mm256_extracti128_si256(s_32, 0);
+    const __m128i s_lo     = _mm256_castsi256_si128(s_32);
     const __m128i s_hi     = _mm256_extracti128_si256(s_32, 1);
     const __m128i s_16_sum = _mm_add_epi16(s_lo, s_hi);
     const __m128i sum      = _mm_add_epi16(s_8, s_16_sum);
@@ -479,7 +479,7 @@ static INLINE __m128i dc_sum_16_32(const
 
 // Handle number of elements: 65 to 128.
 static INLINE __m128i dc_sum_larger(const __m256i src) {
-    const __m128i s_lo = _mm256_extracti128_si256(src, 0);
+    const __m128i s_lo = _mm256_castsi256_si128(src);
     const __m128i s_hi = _mm256_extracti128_si256(src, 1);
     __m128i       sum, sum_hi;
     sum = _mm_add_epi16(s_lo, s_hi);
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/EbPictureOperators_Intrinsic_AVX512.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/EbPictureOperators_Intrinsic_AVX512.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/EbPictureOperators_Intrinsic_AVX512.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/EbPictureOperators_Intrinsic_AVX512.c	2022-08-01 19:12:00.000000000 +0000
@@ -150,14 +150,6 @@ void svt_residual_kernel8bit_avx512(uint
     }
 }
 
-static INLINE int32_t Hadd32_AVX512_INTRIN(const __m512i src) {
-    const __m256i src_l = _mm512_castsi512_si256(src);
-    const __m256i src_h = _mm512_extracti64x4_epi64(src, 1);
-    const __m256i sum   = _mm256_add_epi32(src_l, src_h);
-
-    return hadd32_avx2_intrin(sum);
-}
-
 static INLINE void Distortion_AVX512_INTRIN(const __m256i input, const __m256i recon,
                                             __m512i *const sum) {
     const __m512i in   = _mm512_cvtepu8_epi16(input);
@@ -223,7 +215,7 @@ uint64_t svt_spatial_full_distortion_ker
             } while (h);
 
             if (area_width == 4) {
-                sum_l              = _mm256_extracti128_si256(sum, 0);
+                sum_l              = _mm256_castsi256_si128(sum);
                 sum_h              = _mm256_extracti128_si256(sum, 1);
                 s                  = _mm_add_epi32(sum_l, sum_h);
                 s                  = _mm_add_epi32(s, _mm_srli_si128(s, 4));
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/synonyms_avx512.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/synonyms_avx512.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_AVX512/synonyms_avx512.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_AVX512/synonyms_avx512.h	2022-08-01 19:12:00.000000000 +0000
@@ -13,7 +13,6 @@
 #define AOM_DSP_X86_SYNONYMS_AVX512_H_
 
 #include <immintrin.h>
-#include "synonyms.h"
 
 #if EN_AVX512_SUPPORT
 
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/av1_txfm_sse2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/av1_txfm_sse2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/av1_txfm_sse2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/av1_txfm_sse2.h	2022-08-01 19:12:00.000000000 +0000
@@ -19,22 +19,6 @@ extern "C" {
 
 #define pair_set_epi16(a, b) _mm_set1_epi32((int32_t)(((uint16_t)(a)) | (((uint32_t)(b)) << 16)))
 
-static INLINE void btf_16_w4_sse2(const __m128i *const w0, const __m128i *const w1,
-                                  const __m128i __rounding, const int8_t cos_bit,
-                                  const __m128i *const in0, const __m128i *const in1,
-                                  __m128i *const out0, __m128i *const out1) {
-    const __m128i t0 = _mm_unpacklo_epi16(*in0, *in1);
-    const __m128i u0 = _mm_madd_epi16(t0, *w0);
-    const __m128i v0 = _mm_madd_epi16(t0, *w1);
-    const __m128i a0 = _mm_add_epi32(u0, __rounding);
-    const __m128i b0 = _mm_add_epi32(v0, __rounding);
-    const __m128i c0 = _mm_srai_epi32(a0, cos_bit);
-    const __m128i d0 = _mm_srai_epi32(b0, cos_bit);
-
-    *out0 = _mm_packs_epi32(c0, c0);
-    *out1 = _mm_packs_epi32(d0, c0);
-}
-
 #define btf_16_4p_sse2(w0, w1, in0, in1, out0, out1, __rounding) \
     {                                                            \
         __m128i t0 = _mm_unpacklo_epi16(in0, in1);               \
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/EbPictureOperators_Intrinsic_SSE2.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/EbPictureOperators_Intrinsic_SSE2.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/EbPictureOperators_Intrinsic_SSE2.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/EbPictureOperators_Intrinsic_SSE2.c	2022-08-01 19:12:00.000000000 +0000
@@ -287,4 +287,147 @@ extern void svt_memcpy_intrin_sse(void *
     else
         svt_memcpy_small(dst_ptr, src_ptr, size);
 }
+
+// Store 8 16 bit values. If the destination is 32 bits then sign extend the
+// values by multiplying by 1.
+static INLINE void store_tran_low(__m128i a, int32_t *b) {
+    const __m128i one  = _mm_set1_epi16(1);
+    const __m128i a_hi = _mm_mulhi_epi16(a, one);
+    const __m128i a_lo = _mm_mullo_epi16(a, one);
+    const __m128i a_1  = _mm_unpacklo_epi16(a_lo, a_hi);
+    const __m128i a_2  = _mm_unpackhi_epi16(a_lo, a_hi);
+    _mm_store_si128((__m128i *)(b), a_1);
+    _mm_store_si128((__m128i *)(b + 4), a_2);
+}
+
+static INLINE void hadamard_col8_sse2(__m128i *in, int iter) {
+    __m128i a0 = in[0];
+    __m128i a1 = in[1];
+    __m128i a2 = in[2];
+    __m128i a3 = in[3];
+    __m128i a4 = in[4];
+    __m128i a5 = in[5];
+    __m128i a6 = in[6];
+    __m128i a7 = in[7];
+
+    __m128i b0 = _mm_add_epi16(a0, a1);
+    __m128i b1 = _mm_sub_epi16(a0, a1);
+    __m128i b2 = _mm_add_epi16(a2, a3);
+    __m128i b3 = _mm_sub_epi16(a2, a3);
+    __m128i b4 = _mm_add_epi16(a4, a5);
+    __m128i b5 = _mm_sub_epi16(a4, a5);
+    __m128i b6 = _mm_add_epi16(a6, a7);
+    __m128i b7 = _mm_sub_epi16(a6, a7);
+
+    a0 = _mm_add_epi16(b0, b2);
+    a1 = _mm_add_epi16(b1, b3);
+    a2 = _mm_sub_epi16(b0, b2);
+    a3 = _mm_sub_epi16(b1, b3);
+    a4 = _mm_add_epi16(b4, b6);
+    a5 = _mm_add_epi16(b5, b7);
+    a6 = _mm_sub_epi16(b4, b6);
+    a7 = _mm_sub_epi16(b5, b7);
+
+    if (iter == 0) {
+        b0 = _mm_add_epi16(a0, a4);
+        b7 = _mm_add_epi16(a1, a5);
+        b3 = _mm_add_epi16(a2, a6);
+        b4 = _mm_add_epi16(a3, a7);
+        b2 = _mm_sub_epi16(a0, a4);
+        b6 = _mm_sub_epi16(a1, a5);
+        b1 = _mm_sub_epi16(a2, a6);
+        b5 = _mm_sub_epi16(a3, a7);
+
+        a0 = _mm_unpacklo_epi16(b0, b1);
+        a1 = _mm_unpacklo_epi16(b2, b3);
+        a2 = _mm_unpackhi_epi16(b0, b1);
+        a3 = _mm_unpackhi_epi16(b2, b3);
+        a4 = _mm_unpacklo_epi16(b4, b5);
+        a5 = _mm_unpacklo_epi16(b6, b7);
+        a6 = _mm_unpackhi_epi16(b4, b5);
+        a7 = _mm_unpackhi_epi16(b6, b7);
+
+        b0 = _mm_unpacklo_epi32(a0, a1);
+        b1 = _mm_unpacklo_epi32(a4, a5);
+        b2 = _mm_unpackhi_epi32(a0, a1);
+        b3 = _mm_unpackhi_epi32(a4, a5);
+        b4 = _mm_unpacklo_epi32(a2, a3);
+        b5 = _mm_unpacklo_epi32(a6, a7);
+        b6 = _mm_unpackhi_epi32(a2, a3);
+        b7 = _mm_unpackhi_epi32(a6, a7);
+
+        in[0] = _mm_unpacklo_epi64(b0, b1);
+        in[1] = _mm_unpackhi_epi64(b0, b1);
+        in[2] = _mm_unpacklo_epi64(b2, b3);
+        in[3] = _mm_unpackhi_epi64(b2, b3);
+        in[4] = _mm_unpacklo_epi64(b4, b5);
+        in[5] = _mm_unpackhi_epi64(b4, b5);
+        in[6] = _mm_unpacklo_epi64(b6, b7);
+        in[7] = _mm_unpackhi_epi64(b6, b7);
+    } else {
+        in[0] = _mm_add_epi16(a0, a4);
+        in[7] = _mm_add_epi16(a1, a5);
+        in[3] = _mm_add_epi16(a2, a6);
+        in[4] = _mm_add_epi16(a3, a7);
+        in[2] = _mm_sub_epi16(a0, a4);
+        in[6] = _mm_sub_epi16(a1, a5);
+        in[1] = _mm_sub_epi16(a2, a6);
+        in[5] = _mm_sub_epi16(a3, a7);
+    }
+}
+
+static INLINE void hadamard_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride, int32_t *coeff,
+                                     int is_final) {
+    __m128i src[8];
+    src[0] = _mm_load_si128((const __m128i *)src_diff);
+    src[1] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[2] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[3] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[4] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[5] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[6] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+    src[7] = _mm_load_si128((const __m128i *)(src_diff += src_stride));
+
+    hadamard_col8_sse2(src, 0);
+    hadamard_col8_sse2(src, 1);
+
+    if (is_final) {
+        store_tran_low(src[0], coeff);
+        coeff += 8;
+        store_tran_low(src[1], coeff);
+        coeff += 8;
+        store_tran_low(src[2], coeff);
+        coeff += 8;
+        store_tran_low(src[3], coeff);
+        coeff += 8;
+        store_tran_low(src[4], coeff);
+        coeff += 8;
+        store_tran_low(src[5], coeff);
+        coeff += 8;
+        store_tran_low(src[6], coeff);
+        coeff += 8;
+        store_tran_low(src[7], coeff);
+    } else {
+        int16_t *coeff16 = (int16_t *)coeff;
+        _mm_store_si128((__m128i *)coeff16, src[0]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[1]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[2]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[3]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[4]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[5]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[6]);
+        coeff16 += 8;
+        _mm_store_si128((__m128i *)coeff16, src[7]);
+    }
+}
+
+void svt_aom_hadamard_8x8_sse2(const int16_t *src_diff, ptrdiff_t src_stride, int32_t *coeff) {
+    hadamard_8x8_sse2(src_diff, src_stride, coeff, 1);
+}
 #endif
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/transpose_sse2.h 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/transpose_sse2.h
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE2/transpose_sse2.h	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE2/transpose_sse2.h	2022-08-01 19:12:00.000000000 +0000
@@ -24,82 +24,6 @@ extern "C" {
 }
 #endif
 
-static INLINE __m128i transpose_8bit_4x4(const __m128i *const in) {
-    // Unpack 16 bit elements. Goes from:
-    // in[0]: 00 01 02 03
-    // in[1]: 10 11 12 13
-    // in[2]: 20 21 22 23
-    // in[3]: 30 31 32 33
-    // to:
-    // a0:    00 10 01 11  02 12 03 13
-    // a1:    20 30 21 31  22 32 23 33
-    const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
-    const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
-
-    // Unpack 32 bit elements resulting in:
-    // 00 10 20 30  01 11 21 31  02 12 22 32  03 13 23 33
-    return _mm_unpacklo_epi16(a0, a1);
-}
-
-static INLINE void transpose_8bit_8x8(const __m128i *const in, __m128i *const out) {
-    // Unpack 8 bit elements. Goes from:
-    // in[0]: 00 01 02 03 04 05 06 07
-    // in[1]: 10 11 12 13 14 15 16 17
-    // in[2]: 20 21 22 23 24 25 26 27
-    // in[3]: 30 31 32 33 34 35 36 37
-    // in[4]: 40 41 42 43 44 45 46 47
-    // in[5]: 50 51 52 53 54 55 56 57
-    // in[6]: 60 61 62 63 64 65 66 67
-    // in[7]: 70 71 72 73 74 75 76 77
-    // to:
-    // a0:    00 10 01 11 02 12 03 13  04 14 05 15 06 16 07 17
-    // a1:    20 30 21 31 22 32 23 33  24 34 25 35 26 36 27 37
-    // a2:    40 50 41 51 42 52 43 53  44 54 45 55 46 56 47 57
-    // a3:    60 70 61 71 62 72 63 73  64 74 65 75 66 76 67 77
-    const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
-    const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
-    const __m128i a2 = _mm_unpacklo_epi8(in[4], in[5]);
-    const __m128i a3 = _mm_unpacklo_epi8(in[6], in[7]);
-
-    // Unpack 16 bit elements resulting in:
-    // b0: 00 10 20 30 01 11 21 31  02 12 22 32 03 13 23 33
-    // b1: 40 50 60 70 41 51 61 71  42 52 62 72 43 53 63 73
-    // b2: 04 14 24 34 05 15 25 35  06 16 26 36 07 17 27 37
-    // b3: 44 54 64 74 45 55 65 75  46 56 66 76 47 57 67 77
-    const __m128i b0 = _mm_unpacklo_epi16(a0, a1);
-    const __m128i b1 = _mm_unpackhi_epi16(a0, a1);
-    const __m128i b2 = _mm_unpacklo_epi16(a2, a3);
-    const __m128i b3 = _mm_unpackhi_epi16(a2, a3);
-
-    // Unpack 32 bit elements resulting in:
-    // c0: 00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71
-    // c1: 02 12 22 32 42 52 62 72  03 13 23 33 43 53 63 73
-    // c2: 04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75
-    // c3: 06 16 26 36 46 56 66 76  07 17 27 37 47 57 67 77
-    const __m128i c0 = _mm_unpacklo_epi32(b0, b2);
-    const __m128i c1 = _mm_unpackhi_epi32(b0, b2);
-    const __m128i c2 = _mm_unpacklo_epi32(b1, b3);
-    const __m128i c3 = _mm_unpackhi_epi32(b1, b3);
-
-    // Unpack 64 bit elements resulting in:
-    // out[0]: 00 10 20 30 40 50 60 70
-    // out[1]: 01 11 21 31 41 51 61 71
-    // out[2]: 02 12 22 32 42 52 62 72
-    // out[3]: 03 13 23 33 43 53 63 73
-    // out[4]: 04 14 24 34 44 54 64 74
-    // out[5]: 05 15 25 35 45 55 65 75
-    // out[6]: 06 16 26 36 46 56 66 76
-    // out[7]: 07 17 27 37 47 57 67 77
-    out[0] = c0;
-    out[1] = _mm_srli_si128(c0, 8);
-    out[2] = c1;
-    out[3] = _mm_srli_si128(c1, 8);
-    out[4] = c2;
-    out[5] = _mm_srli_si128(c2, 8);
-    out[6] = c3;
-    out[7] = _mm_srli_si128(c3, 8);
-}
-
 static INLINE void partial_transpose_8bit_8x8(const __m128i *const in, __m128i *const out) {
     // Unpack 8 bit elements. Goes from:
     // in[0]: 00 01 02 03 04 05 06 07
@@ -141,161 +65,6 @@ static INLINE void partial_transpose_8bi
     out[3] = _mm_unpackhi_epi32(b1, b3);
 }
 
-static INLINE void transpose_8bit_16x8(const __m128i *const in, __m128i *const out) {
-    // Unpack 8 bit elements. Goes from:
-    // in[0]: 00 01 02 03 04 05 06 07  08 09 0A 0B 0C 0D 0E 0F
-    // in[1]: 10 11 12 13 14 15 16 17  18 19 1A 1B 1C 1D 1E 1F
-    // in[2]: 20 21 22 23 24 25 26 27  28 29 2A 2B 2C 2D 2E 2F
-    // in[3]: 30 31 32 33 34 35 36 37  38 39 3A 3B 3C 3D 3E 3F
-    // in[4]: 40 41 42 43 44 45 46 47  48 49 4A 4B 4C 4D 4E 4F
-    // in[5]: 50 51 52 53 54 55 56 57  58 59 5A 5B 5C 5D 5E 5F
-    // in[6]: 60 61 62 63 64 65 66 67  68 69 6A 6B 6C 6D 6E 6F
-    // in[7]: 70 71 72 73 74 75 76 77  78 79 7A 7B 7C 7D 7E 7F
-    // to:
-    // a0:    00 10 01 11 02 12 03 13  04 14 05 15 06 16 07 17
-    // a1:    20 30 21 31 22 32 23 33  24 34 25 35 26 36 27 37
-    // a2:    40 50 41 51 42 52 43 53  44 54 45 55 46 56 47 57
-    // a3:    60 70 61 71 62 72 63 73  64 74 65 75 66 76 67 77
-    // a4:    08 18 09 19 0A 1A 0B 1B  08 18 09 19 0A 1A 0B 1B
-    // a5:    28 38 29 39 2A 3A 2B 3B  28 38 29 39 2A 3A 2B 3B
-    // a6:    48 58 49 59 4A 5A 4B 5B  48 58 49 59 4A 5A 4B 5B
-    // a7:    68 78 69 79 6A 7A 6B 7B  68 78 69 79 6A 7A 6B 7B
-    const __m128i a0 = _mm_unpacklo_epi8(in[0], in[1]);
-    const __m128i a1 = _mm_unpacklo_epi8(in[2], in[3]);
-    const __m128i a2 = _mm_unpacklo_epi8(in[4], in[5]);
-    const __m128i a3 = _mm_unpacklo_epi8(in[6], in[7]);
-    const __m128i a4 = _mm_unpackhi_epi8(in[0], in[1]);
-    const __m128i a5 = _mm_unpackhi_epi8(in[2], in[3]);
-    const __m128i a6 = _mm_unpackhi_epi8(in[4], in[5]);
-    const __m128i a7 = _mm_unpackhi_epi8(in[6], in[7]);
-
-    // Unpack 16 bit elements resulting in:
-    // b0: 00 10 20 30 01 11 21 31  02 12 22 32 03 13 23 33
-    // b1: 40 50 60 70 41 51 61 71  42 52 62 72 43 53 63 73
-    // b2: 04 14 24 34 05 15 25 35  06 16 26 36 07 17 27 37
-    // b3: 44 54 64 74 45 55 65 75  46 56 66 76 47 57 67 77
-    // b4: 08 18 28 38 09 19 29 39  0A 1A 2A 3A 0B 1B 2B 3B
-    // b5: 48 58 68 78 49 59 69 79  4A 5A 6A 7A 4B 5B 6B 7B
-    // b6: 0C 1C 2C 3C 0D 1D 2D 3D  0E 1E 2E 3E 0F 1F 2F 3F
-    // b7: 4C 5C 6C 7C 4D 5D 6D 7D  4E 5E 6E 7E 4F 5F 6F 7F
-    const __m128i b0 = _mm_unpacklo_epi16(a0, a1);
-    const __m128i b1 = _mm_unpackhi_epi16(a0, a1);
-    const __m128i b2 = _mm_unpacklo_epi16(a2, a3);
-    const __m128i b3 = _mm_unpackhi_epi16(a2, a3);
-    const __m128i b4 = _mm_unpacklo_epi16(a4, a5);
-    const __m128i b5 = _mm_unpackhi_epi16(a4, a5);
-    const __m128i b6 = _mm_unpacklo_epi16(a6, a7);
-    const __m128i b7 = _mm_unpackhi_epi16(a6, a7);
-
-    // Unpack 32 bit elements resulting in:
-    // c0: 00 10 20 30 40 50 60 70  01 11 21 31 41 51 61 71
-    // c1: 02 12 22 32 42 52 62 72  03 13 23 33 43 53 63 73
-    // c2: 04 14 24 34 44 54 64 74  05 15 25 35 45 55 65 75
-    // c3: 06 16 26 36 46 56 66 76  07 17 27 37 47 57 67 77
-    // c4: 08 18 28 38 48 58 68 78  09 19 29 39 49 59 69 79
-    // c5: 0A 1A 2A 3A 4A 5A 6A 7A  0B 1B 2B 3B 4B 5B 6B 7B
-    // c6: 0C 1C 2C 3C 4C 5C 6C 7C  0D 1D 2D 3D 4D 5D 6D 7D
-    // c7: 0E 1E 2E 3E 4E 5E 6E 7E  0F 1F 2F 3F 4F 5F 6F 7F
-    out[0] = _mm_unpacklo_epi32(b0, b2);
-    out[1] = _mm_unpackhi_epi32(b0, b2);
-    out[2] = _mm_unpacklo_epi32(b1, b3);
-    out[3] = _mm_unpackhi_epi32(b1, b3);
-    out[4] = _mm_unpacklo_epi32(b4, b6);
-    out[5] = _mm_unpackhi_epi32(b4, b6);
-    out[6] = _mm_unpacklo_epi32(b5, b7);
-    out[7] = _mm_unpackhi_epi32(b5, b7);
-}
-
-static INLINE void transpose_8bit_16x16_sse2(const __m128i *const in, __m128i *const out) {
-    __m128i w0, w1, w2, w3, w4, w5, w6, w7, w8, w9;
-    __m128i w10, w11, w12, w13, w14, w15;
-
-    w0 = _mm_unpacklo_epi8(in[0], in[1]);
-    w1 = _mm_unpacklo_epi8(in[2], in[3]);
-    w2 = _mm_unpacklo_epi8(in[4], in[5]);
-    w3 = _mm_unpacklo_epi8(in[6], in[7]);
-
-    w8  = _mm_unpacklo_epi8(in[8], in[9]);
-    w9  = _mm_unpacklo_epi8(in[10], in[11]);
-    w10 = _mm_unpacklo_epi8(in[12], in[13]);
-    w11 = _mm_unpacklo_epi8(in[14], in[15]);
-
-    w4  = _mm_unpacklo_epi16(w0, w1);
-    w5  = _mm_unpacklo_epi16(w2, w3);
-    w12 = _mm_unpacklo_epi16(w8, w9);
-    w13 = _mm_unpacklo_epi16(w10, w11);
-
-    w6  = _mm_unpacklo_epi32(w4, w5);
-    w7  = _mm_unpackhi_epi32(w4, w5);
-    w14 = _mm_unpacklo_epi32(w12, w13);
-    w15 = _mm_unpackhi_epi32(w12, w13);
-
-    // Store first 4-line result
-    out[0] = _mm_unpacklo_epi64(w6, w14);
-    out[1] = _mm_unpackhi_epi64(w6, w14);
-    out[2] = _mm_unpacklo_epi64(w7, w15);
-    out[3] = _mm_unpackhi_epi64(w7, w15);
-
-    w4  = _mm_unpackhi_epi16(w0, w1);
-    w5  = _mm_unpackhi_epi16(w2, w3);
-    w12 = _mm_unpackhi_epi16(w8, w9);
-    w13 = _mm_unpackhi_epi16(w10, w11);
-
-    w6  = _mm_unpacklo_epi32(w4, w5);
-    w7  = _mm_unpackhi_epi32(w4, w5);
-    w14 = _mm_unpacklo_epi32(w12, w13);
-    w15 = _mm_unpackhi_epi32(w12, w13);
-
-    // Store second 4-line result
-    out[4] = _mm_unpacklo_epi64(w6, w14);
-    out[5] = _mm_unpackhi_epi64(w6, w14);
-    out[6] = _mm_unpacklo_epi64(w7, w15);
-    out[7] = _mm_unpackhi_epi64(w7, w15);
-
-    // upper half
-    w0 = _mm_unpackhi_epi8(in[0], in[1]);
-    w1 = _mm_unpackhi_epi8(in[2], in[3]);
-    w2 = _mm_unpackhi_epi8(in[4], in[5]);
-    w3 = _mm_unpackhi_epi8(in[6], in[7]);
-
-    w8  = _mm_unpackhi_epi8(in[8], in[9]);
-    w9  = _mm_unpackhi_epi8(in[10], in[11]);
-    w10 = _mm_unpackhi_epi8(in[12], in[13]);
-    w11 = _mm_unpackhi_epi8(in[14], in[15]);
-
-    w4  = _mm_unpacklo_epi16(w0, w1);
-    w5  = _mm_unpacklo_epi16(w2, w3);
-    w12 = _mm_unpacklo_epi16(w8, w9);
-    w13 = _mm_unpacklo_epi16(w10, w11);
-
-    w6  = _mm_unpacklo_epi32(w4, w5);
-    w7  = _mm_unpackhi_epi32(w4, w5);
-    w14 = _mm_unpacklo_epi32(w12, w13);
-    w15 = _mm_unpackhi_epi32(w12, w13);
-
-    // Store first 4-line result
-    out[8]  = _mm_unpacklo_epi64(w6, w14);
-    out[9]  = _mm_unpackhi_epi64(w6, w14);
-    out[10] = _mm_unpacklo_epi64(w7, w15);
-    out[11] = _mm_unpackhi_epi64(w7, w15);
-
-    w4  = _mm_unpackhi_epi16(w0, w1);
-    w5  = _mm_unpackhi_epi16(w2, w3);
-    w12 = _mm_unpackhi_epi16(w8, w9);
-    w13 = _mm_unpackhi_epi16(w10, w11);
-
-    w6  = _mm_unpacklo_epi32(w4, w5);
-    w7  = _mm_unpackhi_epi32(w4, w5);
-    w14 = _mm_unpacklo_epi32(w12, w13);
-    w15 = _mm_unpackhi_epi32(w12, w13);
-
-    // Store second 4-line result
-    out[12] = _mm_unpacklo_epi64(w6, w14);
-    out[13] = _mm_unpackhi_epi64(w6, w14);
-    out[14] = _mm_unpacklo_epi64(w7, w15);
-    out[15] = _mm_unpackhi_epi64(w7, w15);
-}
-
 static INLINE void transpose_16bit_4x4(const __m128i *const in, __m128i *const out) {
     // Unpack 16 bit elements. Goes from:
     // in[0]: 00 01 02 03  XX XX XX XX
@@ -472,24 +241,6 @@ static INLINE void transpose_16bit_8x8(c
     out[7] = _mm_unpackhi_epi64(b6, b7);
 }
 
-// Transpose in-place
-static INLINE void transpose_16bit_16x16(__m128i *const left, __m128i *const right) {
-    __m128i tbuf[8];
-    transpose_16bit_8x8(left, left);
-    transpose_16bit_8x8(right, tbuf);
-    transpose_16bit_8x8(left + 8, right);
-    transpose_16bit_8x8(right + 8, right + 8);
-
-    left[8]  = tbuf[0];
-    left[9]  = tbuf[1];
-    left[10] = tbuf[2];
-    left[11] = tbuf[3];
-    left[12] = tbuf[4];
-    left[13] = tbuf[5];
-    left[14] = tbuf[6];
-    left[15] = tbuf[7];
-}
-
 static INLINE void transpose_32bit_4x4(const __m128i *const in, __m128i *const out) {
     // Unpack 32 bit elements. Goes from:
     // in[0]: 00 01 02 03
@@ -518,100 +269,6 @@ static INLINE void transpose_32bit_4x4(c
     out[3] = _mm_unpackhi_epi64(a2, a3);
 }
 
-static INLINE void transpose_32bit_4x4x2(const __m128i *const in, __m128i *const out) {
-    // Unpack 32 bit elements. Goes from:
-    // in[0]: 00 01 02 03
-    // in[1]: 10 11 12 13
-    // in[2]: 20 21 22 23
-    // in[3]: 30 31 32 33
-    // in[4]: 04 05 06 07
-    // in[5]: 14 15 16 17
-    // in[6]: 24 25 26 27
-    // in[7]: 34 35 36 37
-    // to:
-    // a0:    00 10 01 11
-    // a1:    20 30 21 31
-    // a2:    02 12 03 13
-    // a3:    22 32 23 33
-    // a4:    04 14 05 15
-    // a5:    24 34 25 35
-    // a6:    06 16 07 17
-    // a7:    26 36 27 37
-    const __m128i a0 = _mm_unpacklo_epi32(in[0], in[1]);
-    const __m128i a1 = _mm_unpacklo_epi32(in[2], in[3]);
-    const __m128i a2 = _mm_unpackhi_epi32(in[0], in[1]);
-    const __m128i a3 = _mm_unpackhi_epi32(in[2], in[3]);
-    const __m128i a4 = _mm_unpacklo_epi32(in[4], in[5]);
-    const __m128i a5 = _mm_unpacklo_epi32(in[6], in[7]);
-    const __m128i a6 = _mm_unpackhi_epi32(in[4], in[5]);
-    const __m128i a7 = _mm_unpackhi_epi32(in[6], in[7]);
-
-    // Unpack 64 bit elements resulting in:
-    // out[0]: 00 10 20 30
-    // out[1]: 01 11 21 31
-    // out[2]: 02 12 22 32
-    // out[3]: 03 13 23 33
-    // out[4]: 04 14 24 34
-    // out[5]: 05 15 25 35
-    // out[6]: 06 16 26 36
-    // out[7]: 07 17 27 37
-    out[0] = _mm_unpacklo_epi64(a0, a1);
-    out[1] = _mm_unpackhi_epi64(a0, a1);
-    out[2] = _mm_unpacklo_epi64(a2, a3);
-    out[3] = _mm_unpackhi_epi64(a2, a3);
-    out[4] = _mm_unpacklo_epi64(a4, a5);
-    out[5] = _mm_unpackhi_epi64(a4, a5);
-    out[6] = _mm_unpacklo_epi64(a6, a7);
-    out[7] = _mm_unpackhi_epi64(a6, a7);
-}
-
-static INLINE void transpose_32bit_8x4(const __m128i *const in, __m128i *const out) {
-    // Unpack 32 bit elements. Goes from:
-    // in[0]: 00 01 02 03
-    // in[1]: 04 05 06 07
-    // in[2]: 10 11 12 13
-    // in[3]: 14 15 16 17
-    // in[4]: 20 21 22 23
-    // in[5]: 24 25 26 27
-    // in[6]: 30 31 32 33
-    // in[7]: 34 35 36 37
-    // to:
-    // a0: 00 10 01 11
-    // a1: 20 30 21 31
-    // a2: 02 12 03 13
-    // a3: 22 32 23 33
-    // a4: 04 14 05 15
-    // a5: 24 34 25 35
-    // a6: 06 16 07 17
-    // a7: 26 36 27 37
-    const __m128i a0 = _mm_unpacklo_epi32(in[0], in[2]);
-    const __m128i a1 = _mm_unpacklo_epi32(in[4], in[6]);
-    const __m128i a2 = _mm_unpackhi_epi32(in[0], in[2]);
-    const __m128i a3 = _mm_unpackhi_epi32(in[4], in[6]);
-    const __m128i a4 = _mm_unpacklo_epi32(in[1], in[3]);
-    const __m128i a5 = _mm_unpacklo_epi32(in[5], in[7]);
-    const __m128i a6 = _mm_unpackhi_epi32(in[1], in[3]);
-    const __m128i a7 = _mm_unpackhi_epi32(in[5], in[7]);
-
-    // Unpack 64 bit elements resulting in:
-    // out[0]: 00 10 20 30
-    // out[1]: 01 11 21 31
-    // out[2]: 02 12 22 32
-    // out[3]: 03 13 23 33
-    // out[4]: 04 14 24 34
-    // out[5]: 05 15 25 35
-    // out[6]: 06 16 26 36
-    // out[7]: 07 17 27 37
-    out[0] = _mm_unpacklo_epi64(a0, a1);
-    out[1] = _mm_unpackhi_epi64(a0, a1);
-    out[2] = _mm_unpacklo_epi64(a2, a3);
-    out[3] = _mm_unpackhi_epi64(a2, a3);
-    out[4] = _mm_unpacklo_epi64(a4, a5);
-    out[5] = _mm_unpackhi_epi64(a4, a5);
-    out[6] = _mm_unpacklo_epi64(a6, a7);
-    out[7] = _mm_unpackhi_epi64(a6, a7);
-}
-
 static INLINE void transpose_64bit_4x4_sse2(const __m128i *const in, __m128i *const out) {
     out[0] = _mm_unpacklo_epi64(in[0], in[2]);
     out[4] = _mm_unpacklo_epi64(in[1], in[3]);
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE4_1/cdef_block_sse4_1.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE4_1/cdef_block_sse4_1.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSE4_1/cdef_block_sse4_1.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSE4_1/cdef_block_sse4_1.c	2022-08-01 19:12:00.000000000 +0000
@@ -134,16 +134,16 @@ void svt_av1_cdef_filter_block_8xn_8_sse
                                             int pri_strength, int sec_strength, int dir,
                                             int pri_damping, int sec_damping, int coeff_shift,
                                             uint8_t height, uint8_t subsampling_factor) {
-    int     i;
-    __m128i p0, p1, p2, p3;
-    v256    sum, row, res, tap;
-    v256    max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-    int     po1  = eb_cdef_directions[dir][0];
-    int     po2  = eb_cdef_directions[dir][1];
-    int     s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int     s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int     s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int     s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    int           i;
+    __m128i       p0, p1, p2, p3;
+    v256          sum, row, res, tap;
+    v256          max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
 
     const int *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int *sec_taps = eb_cdef_sec_taps[0];
@@ -285,15 +285,15 @@ void svt_av1_cdef_filter_block_4xn_8_sse
                                             int pri_strength, int sec_strength, int dir,
                                             int pri_damping, int sec_damping, int coeff_shift,
                                             uint8_t height, uint8_t subsampling_factor) {
-    __m128i p0, p1, p2, p3;
-    v256    sum, row, tap, res;
-    v256    max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-    int     po1  = eb_cdef_directions[dir][0];
-    int     po2  = eb_cdef_directions[dir][1];
-    int     s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int     s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int     s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int     s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    __m128i       p0, p1, p2, p3;
+    v256          sum, row, tap, res;
+    v256          max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
 
     const int *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int *sec_taps = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -507,15 +507,15 @@ void svt_av1_cdef_filter_block_8xn_16_ss
                                              int pri_strength, int sec_strength, int dir,
                                              int pri_damping, int sec_damping, int coeff_shift,
                                              uint8_t height, uint8_t subsampling_factor) {
-    int  i;
-    v256 sum, p0, p1, p2, p3, row, res;
-    v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-    int  po1  = eb_cdef_directions[dir][0];
-    int  po2  = eb_cdef_directions[dir][1];
-    int  s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int  s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int  s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int  s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    int           i;
+    v256          sum, p0, p1, p2, p3, row, res;
+    v256          max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
 
     const int *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int *sec_taps = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -636,15 +636,15 @@ void svt_av1_cdef_filter_block_4xn_16_ss
                                              int pri_strength, int sec_strength, int dir,
                                              int pri_damping, int sec_damping, int coeff_shift,
                                              uint8_t height, uint8_t subsampling_factor) {
-    int  i;
-    v256 p0, p1, p2, p3, sum, row, res;
-    v256 max, min, large = v256_dup_16(CDEF_VERY_LARGE);
-    int  po1  = eb_cdef_directions[dir][0];
-    int  po2  = eb_cdef_directions[dir][1];
-    int  s1o1 = eb_cdef_directions[(dir + 2) & 7][0];
-    int  s1o2 = eb_cdef_directions[(dir + 2) & 7][1];
-    int  s2o1 = eb_cdef_directions[(dir + 6) & 7][0];
-    int  s2o2 = eb_cdef_directions[(dir + 6) & 7][1];
+    int           i;
+    v256          p0, p1, p2, p3, sum, row, res;
+    v256          max, min, large = v256_dup_16(CDEF_VERY_LARGE);
+    const int32_t po1  = eb_cdef_directions[dir][0];
+    const int32_t po2  = eb_cdef_directions[dir][1];
+    const int32_t s1o1 = eb_cdef_directions[(dir + 2)][0];
+    const int32_t s1o2 = eb_cdef_directions[(dir + 2)][1];
+    const int32_t s2o1 = eb_cdef_directions[(dir - 2)][0];
+    const int32_t s2o2 = eb_cdef_directions[(dir - 2)][1];
 
     const int *pri_taps = eb_cdef_pri_taps[(pri_strength >> coeff_shift) & 1];
     const int *sec_taps = eb_cdef_sec_taps[(pri_strength >> coeff_shift) & 1];
@@ -1032,8 +1032,9 @@ static INLINE void array_reverse_transpo
     res[1] = _mm_unpacklo_epi64(tr1_6, tr1_7);
     res[0] = _mm_unpackhi_epi64(tr1_6, tr1_7);
 }
-uint8_t svt_cdef_find_dir_sse4_1(const uint16_t *img, int32_t stride, int32_t *var,
-                                 int32_t coeff_shift) {
+
+uint8_t svt_aom_cdef_find_dir_sse4_1(const uint16_t *img, int32_t stride, int32_t *var,
+                                     int32_t coeff_shift) {
     int     i;
     int32_t cost[8];
     int32_t best_cost = 0;
@@ -1070,8 +1071,19 @@ uint8_t svt_cdef_find_dir_sse4_1(const u
     return best_dir;
 }
 
-void svt_copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int32_t dstride, const uint8_t *src,
-                                         int32_t sstride, int32_t v, int32_t h) {
+void svt_aom_cdef_find_dir_dual_sse4_1(const uint16_t *img1, const uint16_t *img2, int stride,
+                                       int32_t *var_out_1st, int32_t *var_out_2nd,
+                                       int32_t coeff_shift, uint8_t *out_dir_1st_8x8,
+                                       uint8_t *out_dir_2nd_8x8) {
+    // Process first 8x8.
+    *out_dir_1st_8x8 = svt_aom_cdef_find_dir_sse4_1(img1, stride, var_out_1st, coeff_shift);
+
+    // Process second 8x8.
+    *out_dir_2nd_8x8 = svt_aom_cdef_find_dir_sse4_1(img2, stride, var_out_2nd, coeff_shift);
+}
+
+void svt_aom_copy_rect8_8bit_to_16bit_sse4_1(uint16_t *dst, int32_t dstride, const uint8_t *src,
+                                             int32_t sstride, int32_t v, int32_t h) {
     int32_t i, j;
     for (i = 0; i < v; i++) {
         for (j = 0; j < (h & ~0x7); j += 8) {
diff -pruN 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSSE3/aom_subpixel_8t_intrin_ssse3.c 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSSE3/aom_subpixel_8t_intrin_ssse3.c
--- 0.9.1+dfsg-1/Source/Lib/Common/ASM_SSSE3/aom_subpixel_8t_intrin_ssse3.c	2022-02-24 02:17:33.000000000 +0000
+++ 1.2.0+dfsg-2/Source/Lib/Common/ASM_SSSE3/aom_subpixel_8t_intrin_ssse3.c	2022-08-01 19:12:00.000000000 +0000
@@ -15,26 +15,7 @@
 #include "convolve.h"
 #include "transpose_sse2.h"
 
-// filters only for the 4_h8 convolution
-DECLARE_ALIGNED(16, static const uint8_t, filt1_4_h8[16]) = {
-    0, 1, 1, 2, 2, 3, 3, 4, 2, 3, 3, 4, 4, 5, 5, 6};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt2_4_h8[16]) = {
-    4, 5, 5, 6, 6, 7, 7, 8, 6, 7, 7, 8, 8, 9, 9, 10};
-
 // filters for 8_h8 and 16_h8
-DECLARE_ALIGNED(16, static const uint8_t, filt1_global[16]) = {
-    0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt2_global[16]) = {
-    2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt3_global[16]) = {
-    4, 5, 5, 6, 6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12};
-
-DECLARE_ALIGNED(16, static const uint8_t, filt4_global[16]) = {
-    6, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 12, 12, 13, 13, 14};
-
 DECLARE_ALIGNED(32, static const uint8_t, filt_h4[]) = {
     0,  1,  1, 2,  2,  3,  3,  4,  4, 5,  5,  6,  6,  7,  7,  8,  0,  1,  1,  2,  2,  3,
     3,  4,  4, 5,  5,  6,  6,  7,  7, 8,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,
@@ -154,37 +135,6 @@ filter8_1dfunction svt_aom_filter_block1
         }                                                                                          \
     }
 
-static INLINE void shuffle_filter_ssse3(const int16_t *const filter, __m128i *const f) {
-    const __m128i f_values = _mm_loadu_si128((const __m128i *)filter);
-    // pack and duplicate the filter values
-    f[0] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0200u));
-    f[1] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0604u));
-    f[2] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0a08u));
-    f[3] = _mm_shuffle_epi8(f_values, _mm_set1_epi16(0x0e0cu));
-}
-
-static INLINE __m128i convolve8_8_ssse3(const __m128i *const s, const __m128i *const f) {
-    // multiply 2 adjacent elements with the filter and add the result
-    const __m128i k_64 = _mm_set1_epi16(1 << 6);
-    const __m128i x0   = _mm_maddubs_epi16(s[0], f[0]);
-    const __m128i x1   = _mm_maddubs_epi16(s[1], f[1]);
-    const __m128i x2   = _mm_maddubs_epi16(s[2], f[2]);
-    const __m128i x3   = _mm_maddubs_epi16(s[3], f[3]);
-    __m128i       sum1, sum2;
-
-    // sum the results together, saturating only on the final step
-    // adding x0 with x2 and x1 with x3 is the only order that prevents
-    // outranges for all filters
-    sum1 = _mm_add_epi16(x0, x2);
-    sum2 = _mm_add_epi16(x1, x3);
-    // add the rounding offset early to avoid another saturated add
-    sum1 = _mm_add_epi16(sum1, k_64);
-    sum1 = _mm_adds_epi16(sum1, sum2);
-    // shift by 7 bit each 16 bit
-    sum1 = _mm_srai_epi16(sum1, 7);
-    return sum1;
-}
-
 static void svt_aom_filter_block1d4_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line,
                                              uint8_t *output_ptr, ptrdiff_t output_pitch,
                                              uint32_t output_height, const int16_t *filter) {
@@ -311,74 +261,6 @@ static void svt_aom_filter_block1d4_v4_s
     }
 }
 
-void svt_aom_filter_block1d4_h8_intrin_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line,
-                                             uint8_t *output_ptr, ptrdiff_t output_pitch,
-                                             uint32_t output_height, const int16_t *filter) {
-    __m128i      firstFilters, secondFilters, shuffle1, shuffle2;
-    __m128i      srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
-    __m128i      addFilterReg64, filtersReg, srcReg, minReg;
-    unsigned int i;
-
-    // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
-    addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-    filtersReg     = _mm_loadu_si128((const __m128i *)filter);
-    // converting the 16 bit (short) to  8 bit (byte) and have the same data
-    // in both lanes of 128 bit register.
-    filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
-    // duplicate only the first 16 bits in the filter into the first lane
-    firstFilters = _mm_shufflelo_epi16(filtersReg, 0);
-    // duplicate only the third 16 bit in the filter into the first lane
-    secondFilters = _mm_shufflelo_epi16(filtersReg, 0xAAu);
-    // duplicate only the seconds 16 bits in the filter into the second lane
-    // firstFilters: k0 k1 k0 k1 k0 k1 k0 k1 k2 k3 k2 k3 k2 k3 k2 k3
-    firstFilters = _mm_shufflehi_epi16(firstFilters, 0x55u);
-    // duplicate only the forth 16 bits in the filter into the second lane
-    // secondFilters: k4 k5 k4 k5 k4 k5 k4 k5 k6 k7 k6 k7 k6 k7 k6 k7
-    secondFilters = _mm_shufflehi_epi16(secondFilters, 0xFFu);
-
-    // loading the local filters
-    shuffle1 = _mm_loadu_si128((__m128i const *)filt1_4_h8);
-    shuffle2 = _mm_loadu_si128((__m128i const *)filt2_4_h8);
-
-    for (i = 0; i < output_height; i++) {
-        srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
-
-        // filter the source buffer
-        srcRegFilt1 = _mm_shuffle_epi8(srcReg, shuffle1);
-        srcRegFilt2 = _mm_shuffle_epi8(srcReg, shuffle2);
-
-        // multiply 2 adjacent elements with the filter and add the result
-        srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
-        srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
-        // extract the higher half of the lane
-        srcRegFilt3 = _mm_srli_si128(srcRegFilt1, 8);
-        srcRegFilt4 = _mm_srli_si128(srcRegFilt2, 8);
-
-        minReg = _mm_min_epi16(srcRegFilt3, srcRegFilt2);
-
-        // add and saturate all the results together
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-        srcRegFilt3 = _mm_max_epi16(srcRegFilt3, srcRegFilt2);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt3);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
-        // shift by 7 bit each 16 bits
-        srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
-        // shrink to 8 bit each 16 bits
-        srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-        src_ptr += src_pixels_per_line;
-
-        // save only 4 bytes
-        *((int *)&output_ptr[0]) = _mm_cvtsi128_si32(srcRegFilt1);
-
-        output_ptr += output_pitch;
-    }
-}
-
 static void svt_aom_filter_block1d8_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line,
                                              uint8_t *output_ptr, ptrdiff_t output_pitch,
                                              uint32_t output_height, const int16_t *filter) {
@@ -518,168 +400,6 @@ static void svt_aom_filter_block1d8_v4_s
     }
 }
 
-void svt_aom_filter_block1d8_h8_intrin_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line,
-                                             uint8_t *output_ptr, ptrdiff_t output_pitch,
-                                             uint32_t output_height, const int16_t *filter) {
-    __m128i      firstFilters, secondFilters, thirdFilters, forthFilters, srcReg;
-    __m128i      filt1Reg, filt2Reg, filt3Reg, filt4Reg;
-    __m128i      srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt4;
-    __m128i      addFilterReg64, filtersReg, minReg;
-    unsigned int i;
-
-    // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
-    addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-    filtersReg     = _mm_loadu_si128((const __m128i *)filter);
-    // converting the 16 bit (short) to  8 bit (byte) and have the same data
-    // in both lanes of 128 bit register.
-    filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
-    // duplicate only the first 16 bits (first and second byte)
-    // across 128 bit register
-    firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
-    // duplicate only the second 16 bits (third and forth byte)
-    // across 128 bit register
-    secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
-    // duplicate only the third 16 bits (fifth and sixth byte)
-    // across 128 bit register
-    thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
-    // duplicate only the forth 16 bits (seventh and eighth byte)
-    // across 128 bit register
-    forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
-    filt1Reg = _mm_loadu_si128((__m128i const *)filt1_global);
-    filt2Reg = _mm_loadu_si128((__m128i const *)filt2_global);
-    filt3Reg = _mm_loadu_si128((__m128i const *)filt3_global);
-    filt4Reg = _mm_loadu_si128((__m128i const *)filt4_global);
-
-    for (i = 0; i < output_height; i++) {
-        srcReg = _mm_loadu_si128((const __m128i *)(src_ptr - 3));
-
-        // filter the source buffer
-        srcRegFilt1 = _mm_shuffle_epi8(srcReg, filt1Reg);
-        srcRegFilt2 = _mm_shuffle_epi8(srcReg, filt2Reg);
-
-        // multiply 2 adjacent elements with the filter and add the result
-        srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
-        srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, secondFilters);
-
-        // filter the source buffer
-        srcRegFilt3 = _mm_shuffle_epi8(srcReg, filt3Reg);
-        srcRegFilt4 = _mm_shuffle_epi8(srcReg, filt4Reg);
-
-        // multiply 2 adjacent elements with the filter and add the result
-        srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, thirdFilters);
-        srcRegFilt4 = _mm_maddubs_epi16(srcRegFilt4, forthFilters);
-
-        // add and saturate all the results together
-        minReg      = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt4);
-
-        srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
-        // shift by 7 bit each 16 bits
-        srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
-        // shrink to 8 bit each 16 bits
-        srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
-        src_ptr += src_pixels_per_line;
-
-        // save only 8 bytes
-        _mm_storel_epi64((__m128i *)&output_ptr[0], srcRegFilt1);
-
-        output_ptr += output_pitch;
-    }
-}
-
-void svt_aom_filter_block1d8_v8_intrin_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pitch,
-                                             uint8_t *output_ptr, ptrdiff_t out_pitch,
-                                             uint32_t output_height, const int16_t *filter) {
-    __m128i      addFilterReg64, filtersReg, minReg;
-    __m128i      firstFilters, secondFilters, thirdFilters, forthFilters;
-    __m128i      srcRegFilt1, srcRegFilt2, srcRegFilt3, srcRegFilt5;
-    __m128i      srcReg1, srcReg2, srcReg3, srcReg4, srcReg5, srcReg6, srcReg7;
-    __m128i      srcReg8;
-    unsigned int i;
-
-    // create a register with 0,64,0,64,0,64,0,64,0,64,0,64,0,64,0,64
-    addFilterReg64 = _mm_set1_epi32((int)0x0400040u);
-    filtersReg     = _mm_loadu_si128((const __m128i *)filter);
-    // converting the 16 bit (short) to  8 bit (byte) and have the same data
-    // in both lanes of 128 bit register.
-    filtersReg = _mm_packs_epi16(filtersReg, filtersReg);
-
-    // duplicate only the first 16 bits in the filter
-    firstFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x100u));
-    // duplicate only the second 16 bits in the filter
-    secondFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x302u));
-    // duplicate only the third 16 bits in the filter
-    thirdFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x504u));
-    // duplicate only the forth 16 bits in the filter
-    forthFilters = _mm_shuffle_epi8(filtersReg, _mm_set1_epi16(0x706u));
-
-    // load the first 7 rows of 8 bytes
-    srcReg1 = _mm_loadl_epi64((const __m128i *)src_ptr);
-    srcReg2 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch));
-    srcReg3 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 2));
-    srcReg4 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 3));
-    srcReg5 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 4));
-    srcReg6 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 5));
-    srcReg7 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 6));
-
-    for (i = 0; i < output_height; i++) {
-        // load the last 8 bytes
-        srcReg8 = _mm_loadl_epi64((const __m128i *)(src_ptr + src_pitch * 7));
-
-        // merge the result together
-        srcRegFilt1 = _mm_unpacklo_epi8(srcReg1, srcReg2);
-        srcRegFilt3 = _mm_unpacklo_epi8(srcReg3, srcReg4);
-
-        // merge the result together
-        srcRegFilt2 = _mm_unpacklo_epi8(srcReg5, srcReg6);
-        srcRegFilt5 = _mm_unpacklo_epi8(srcReg7, srcReg8);
-
-        // multiply 2 adjacent elements with the filter and add the result
-        srcRegFilt1 = _mm_maddubs_epi16(srcRegFilt1, firstFilters);
-        srcRegFilt3 = _mm_maddubs_epi16(srcRegFilt3, secondFilters);
-        srcRegFilt2 = _mm_maddubs_epi16(srcRegFilt2, thirdFilters);
-        srcRegFilt5 = _mm_maddubs_epi16(srcRegFilt5, forthFilters);
-
-        // add and saturate the results together
-        minReg      = _mm_min_epi16(srcRegFilt2, srcRegFilt3);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt5);
-        srcRegFilt2 = _mm_max_epi16(srcRegFilt2, srcRegFilt3);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, minReg);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, srcRegFilt2);
-        srcRegFilt1 = _mm_adds_epi16(srcRegFilt1, addFilterReg64);
-
-        // shift by 7 bit each 16 bit
-        srcRegFilt1 = _mm_srai_epi16(srcRegFilt1, 7);
-
-        // shrink to 8 bit each 16 bits
-        srcRegFilt1 = _mm_packus_epi16(srcRegFilt1, srcRegFilt1);
-
-        src_ptr += src_pitch;
-
-        // shift down a row
-        srcReg1 = srcReg2;
-        srcReg2 = srcReg3;
-        srcReg3 = srcReg4;
-        srcReg4 = srcReg5;
-        srcReg5 = srcReg6;
-        srcReg6 = srcReg7;
-        srcReg7 = srcReg8;
-
-        // save only 8 bytes convolve result
-        _mm_storel_epi64((__m128i *)&output_ptr[0], srcRegFilt1);
-
-        output_ptr += out_pitch;
-    }
-}
-
 static void svt_aom_filter_block1d16_h4_ssse3(const uint8_t *src_ptr, ptrdiff_t src_pixels_per_line,
                                               uint8_t *output_ptr, ptrdiff_t output_pitch,
                                               uint32_t output_height, const int16_t *filter) {
@@ -861,398 +581,5 @@ static void svt_aom_filter_block1d16_v4_
     }
 }
 
-static INLINE __m128i shuffle_filter_convolve8_8_ssse3(const __m128i *const s,
-                                                       const int16_t *const filter) {
-    __m128i f[4];
-    shuffle_filter_ssse3(filter, f);
-    return convolve8_8_ssse3(s, f);
-}
-
-static INLINE void load_8bit_8x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) {
-    d[0] = _mm_loadl_epi64((const __m128i *)(s + 0 * stride));
-    d[1] = _mm_loadl_epi64((const __m128i *)(s + 1 * stride));
-    d[2] = _mm_loadl_epi64((const __m128i *)(s + 2 * stride));
-    d[3] = _mm_loadl_epi64((const __m128i *)(s + 3 * stride));
-}
-
-static INLINE void load_8bit_8x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) {
-    load_8bit_8x4(s + 0 * stride, stride, &d[0]);
-    load_8bit_8x4(s + 4 * stride, stride, &d[4]);
-}
-
-static INLINE void load_8bit_4x4(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) {
-    d[0] = _mm_cvtsi32_si128(*(const int *)(s + 0 * stride));
-    d[1] = _mm_cvtsi32_si128(*(const int *)(s + 1 * stride));
-    d[2] = _mm_cvtsi32_si128(*(const int *)(s + 2 * stride));
-    d[3] = _mm_cvtsi32_si128(*(const int *)(s + 3 * stride));
-}
-
-static INLINE void load_8bit_4x8(const uint8_t *const s, const ptrdiff_t stride, __m128i *const d) {
-    load_8bit_4x4(s + 0 * stride, stride, &d[0]);
-    load_8bit_4x4(s + 4 * stride, stride, &d[4]);
-}
-
-static INLINE void store_8bit_8x8(const __m128i *const s, uint8_t *const d,
-                                  const ptrdiff_t stride) {
-    _mm_storel_epi64((__m128i *)(d + 0 * stride), s[0]);
-    _mm_storel_epi64((__m128i *)(d + 1 * stride), s[1]);
-    _mm_storel_epi64((__m128i *)(d + 2 * stride), s[2]);
-    _mm_storel_epi64((__m128i *)(d + 3 * stride), s[3]);
-    _mm_storel_epi64((__m128i *)(d + 4 * stride), s[4]);
-    _mm_storel_epi64((__m128i *)(d + 5 * stride), s[5]);
-    _mm_storel_epi64((__m128i *)(d + 6 * stride), s[6]);
-    _mm_storel_epi64((__m128i *)(d + 7 * stride), s[7]);
-}
-
-static INLINE void store_8bit_4x4(const __m128i *const s, uint8_t *const d,
-                                  const ptrdiff_t stride) {
-    *(int *)(d + 0 * stride) = _mm_cvtsi128_si32(s[0]);
-    *(int *)(d + 1 * stride) = _mm_cvtsi128_si32(s[1]);
-    *(int *)(d + 2 * stride) = _mm_cvtsi128_si32(s[2]);
-    *(int *)(d + 3 * stride) = _mm_cvtsi128_si32(s[3]);
-}
-
-static INLINE void loadu_8bit_16x4(const uint8_t *const s, const ptrdiff_t stride,
-                                   __m128i *const d) {
-    d[0] = _mm_loadu_si128((const __m128i *)(s + 0 * stride));
-    d[1] = _mm_loadu_si128((const __m128i *)(s + 1 * stride));
-    d[2] = _mm_loadu_si128((const __m128i *)(s + 2 * stride));
-    d[3] = _mm_loadu_si128((const __m128i *)(s + 3 * stride));
-}
-
-static INLINE void loadu_8bit_16x8(const uint8_t *const s, const ptrdiff_t stride,
-                                   __m128i *const d) {
-    loadu_8bit_16x4(s + 0 * stride, stride, &d[0]);
-    loadu_8bit_16x4(s + 4 * stride, stride, &d[4]);
-}
-
-static void svt_filter_horiz_w8_ssse3(const uint8_t *const src, const ptrdiff_t src_stride,
-                                      uint8_t *const dst, const int16_t *const x_filter) {
-    __m128i s[8], ss[4], temp;
-
-    load_8bit_8x8(src, src_stride, s);
-    // 00 01 10 11 20 21 30 31  40 41 50 51 60 61 70 71
-    // 02 03 12 13 22 23 32 33  42 43 52 53 62 63 72 73
-    // 04 05 14 15 24 25 34 35  44 45 54 55 64 65 74 75
-    // 06 07 16 17 26 27 36 37  46 47 56 57 66 67 76 77
-    transpose_16bit_4x8(s, ss);
-    temp = shuffle_filter_convolve8_8_ssse3(ss, x_filter);
-    // shrink to 8 bit each 16 bits
-    temp = _mm_packus_epi16(temp, temp);
-    // save only 8 bytes convolve result
-    _mm_storel_epi64((__m128i *)dst, temp);
-}
-
-static void svt_transpose8x8_to_dst(const uint8_t *const src, const ptrdiff_t src_stride,
-                                    uint8_t *const dst, const ptrdiff_t dst_stride) {
-    __m128i s[8];
-
-    load_8bit_8x8(src, src_stride, s);
-    transpose_8bit_8x8(s, s);
-    store_8bit_8x8(s, dst, dst_stride);
-}
-
-static void svt_scaledconvolve_horiz_w8(const uint8_t *src, const ptrdiff_t src_stride,
-                                        uint8_t *dst, const ptrdiff_t dst_stride,
-                                        const InterpKernel *const x_filters, const int x0_q4,
-                                        const int x_step_q4, const int w, const int h) {
-    DECLARE_ALIGNED(16, uint8_t, temp[8 * 8]);
-    int x, y, z;
-    src -= SUBPEL_TAPS / 2 - 1;
-
-    // This function processes 8x8 areas. The intermediate height is not always
-    // a multiple of 8, so force it to be a multiple of 8 here.
-    y = h + (8 - (h & 0x7));
-
-    do {
-        int x_q4 = x0_q4;
-        for (x = 0; x < w; x += 8) {
-            // process 8 src_x steps
-            for (z = 0; z < 8; ++z) {
-                const uint8_t *const src_x    = &src[x_q4 >> SUBPEL_BITS];
-                const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
-                if (x_q4 & SUBPEL_MASK) {
-                    svt_filter_horiz_w8_ssse3(src_x, src_stride, temp + (z * 8), x_filter);
-                } else {
-                    int i;
-                    for (i = 0; i < 8; ++i) { temp[z * 8 + i] = src_x[i * src_stride + 3]; }
-                }
-                x_q4 += x_step_q4;
-            }
-
-            // transpose the 8x8 filters values back to dst
-            svt_transpose8x8_to_dst(temp, 8, dst + x, dst_stride);
-        }
-
-        src += src_stride * 8;
-        dst += dst_stride * 8;
-    } while (y -= 8);
-}
-
-static void svt_filter_horiz_w4_ssse3(const uint8_t *const src, const ptrdiff_t src_stride,
-                                      uint8_t *const dst, const int16_t *const filter) {
-    __m128i s[4];
-    __m128i temp;
-
-    load_8bit_8x4(src, src_stride, s);
-    transpose_16bit_4x4(s, s);
-
-    temp = shuffle_filter_convolve8_8_ssse3(s, filter);
-    // shrink to 8 bit each 16 bits
-    temp = _mm_packus_epi16(temp, temp);
-    // save only 4 bytes
-    *(int *)dst = _mm_cvtsi128_si32(temp);
-}
-
-static void svt_transpose4x4_to_dst(const uint8_t *const src, const ptrdiff_t src_stride,
-                                    uint8_t *const dst, const ptrdiff_t dst_stride) {
-    __m128i s[4];
-
-    load_8bit_4x4(src, src_stride, s);
-    s[0] = transpose_8bit_4x4(s);
-    s[1] = _mm_srli_si128(s[0], 4);
-    s[2] = _mm_srli_si128(s[0], 8);
-    s[3] = _mm_srli_si128(s[0], 12);
-    store_8bit_4x4(s, dst, dst_stride);
-}
-
-static void svt_scaledconvolve_horiz_w4(const uint8_t *src, const ptrdiff_t src_stride,
-                                        uint8_t *dst, const ptrdiff_t dst_stride,
-                                        const InterpKernel *const x_filters, const int x0_q4,
-                                        const int x_step_q4, const int w, const int h) {
-    DECLARE_ALIGNED(16, uint8_t, temp[4 * 4]);
-    int x, y, z;
-    src -= SUBPEL_TAPS / 2 - 1;
-
-    for (y = 0; y < h; y += 4) {
-        int x_q4 = x0_q4;
-        for (x = 0; x < w; x += 4) {
-            // process 4 src_x steps
-            for (z = 0; z < 4; ++z) {
-                const uint8_t *const src_x    = &src[x_q4 >> SUBPEL_BITS];
-                const int16_t *const x_filter = x_filters[x_q4 & SUBPEL_MASK];
-                if (x_q4 & SUBPEL_MASK) {
-                    svt_filter_horiz_w4_ssse3(src_x, src_stride, temp + (z * 4), x_filter);
-                } else {
-                    int i;
-                    for (i = 0; i < 4; ++i) { temp[z * 4 + i] = src_x[i * src_stride + 3]; }
-                }
-                x_q4 += x_step_q4;
-            }
-
-            // transpose the 4x4 filters values back to dst
-            svt_transpose4x4_to_dst(temp, 4, dst + x, dst_stride);
-        }
-
-        src += src_stride * 4;
-        dst += dst_stride * 4;
-    }
-}
-
-static __m128i svt_filter_vert_kernel(const __m128i *const s, const int16_t *const filter) {
-    __m128i ss[4];
-    __m128i temp;
-
-    // 00 10 01 11 02 12 03 13
-    ss[0] = _mm_unpacklo_epi8(s[0], s[1]);
-    // 20 30 21 31 22 32 23 33
-    ss[1] = _mm_unpacklo_epi8(s[2], s[3]);
-    // 40 50 41 51 42 52 43 53
-    ss[2] = _mm_unpacklo_epi8(s[4], s[5]);
-    // 60 70 61 71 62 72 63 73
-    ss[3] = _mm_unpacklo_epi8(s[6], s[7]);
-
-    temp = shuffle_filter_convolve8_8_ssse3(ss, filter);
-    // shrink to 8 bit each 16 bits
-    return _mm_packus_epi16(temp, temp);
-}
-
-static void svt_filter_vert_w4_ssse3(const uint8_t *const src, const ptrdiff_t src_stride,
-                                     uint8_t *const dst, const int16_t *const filter) {
-    __m128i s[8];
-    __m128i temp;
-
-    load_8bit_4x8(src, src_stride, s);
-    temp = svt_filter_vert_kernel(s, filter);
-    // save only 4 bytes
-    *(int *)dst = _mm_cvtsi128_si32(temp);
-}
-
-static void svt_scaledconvolve_vert_w4(const uint8_t *src, const ptrdiff_t src_stride,
-                                       uint8_t *const dst, const ptrdiff_t dst_stride,
-                                       const InterpKernel *const y_filters, const int y0_q4,
-                                       const int y_step_q4, const int w, const int h) {
-    int y;
-    int y_q4 = y0_q4;
-
-    src -= src_stride * (SUBPEL_TAPS / 2 - 1);
-    for (y = 0; y < h; ++y) {
-        const unsigned char *src_y    = &src[(y_q4 >> SUBPEL_BITS) * src_stride];
-        const int16_t *const y_filter = y_filters[y_q4 & SUBPEL_MASK];
-
-        if (y_q4 & SUBPEL_MASK) {
-            svt_filter_vert_w4_ssse3(src_y, src_stride, &dst[y * dst_stride], y_filter);
-        } else {
-            memcpy(&dst[y * dst_stride], &src_y[3 * src_stride], w);
-        }
-
-        y_q4 += y_step_q4;
-    }
-}
-
-static void svt_filter_vert_w8_ssse3(const uint8_t *const src, const ptrdiff_t src_stride,
-                                     uint8_t *const dst, const int16_t *const filter) {
-    __m128i s[8], temp;
-
-    load_8bit_8x8(src, src_stride, s);
-    temp = svt_filter_vert_kernel(s, filter);
-    // save only 8 bytes convolve result
-    _mm_storel_epi64((__m128i *)dst, temp);
-}
-
-static void svt_scaledconvolve_vert_w8(const uint8_t *src, const ptrdiff_t src_stride,
-                                       uint8_t *const dst, const ptrdiff_t dst_stride,
-                                       const InterpKernel *const y_filters, const int y0_q4,
