diff -pruN 7.2.0~b2-1/.github/workflows/ci_papi_framework.sh 7.2.0-1/.github/workflows/ci_papi_framework.sh
--- 7.2.0~b2-1/.github/workflows/ci_papi_framework.sh	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/.github/workflows/ci_papi_framework.sh	2025-06-25 22:38:10.000000000 +0000
@@ -50,7 +50,6 @@ esac
 # test linking with or without --with-shlib-tools 
 if [ "$SHLIB" = "without" ]; then
     ./configure --with-debug=$DEBUG --enable-warnings --with-components="$COMPONENTS"
-    ./configure --with-debug=$DEBUG --enable-warnings --with-components="$COMPONENTS" --with-shlib-tools
 else
     ./configure --with-debug=$DEBUG --enable-warnings --with-components="$COMPONENTS" --with-shlib-tools
 fi
@@ -62,7 +61,7 @@ utils/papi_component_avail
 
 # active component check
 CURRENT_ACTIVE_COMPONENTS=$(utils/papi_component_avail | grep -A1000 'Active components' | grep "Name:   " | awk '{printf "%s%s", sep, $2; sep=" "} END{print ""}')
-if [ "$COMPONENTS" = "cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc infiniband net appio io lustre stealtime coretemp lmsensors mx sde" ]; then 
+if [ "$COMPONENTS" = "cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc net appio io lustre stealtime coretemp lmsensors mx sde" ]; then 
     [ "$CURRENT_ACTIVE_COMPONENTS" = "perf_event perf_event_uncore cuda nvml powercap net appio io stealtime coretemp lmsensors sde sysdetect" ]
 elif [ "$COMPONENTS" = "rocm rocm_smi" ]; then
     [ "$CURRENT_ACTIVE_COMPONENTS" = "perf_event perf_event_uncore rocm rocm_smi sysdetect" ]
diff -pruN 7.2.0~b2-1/.github/workflows/papi_framework_workflow.yml 7.2.0-1/.github/workflows/papi_framework_workflow.yml
--- 7.2.0~b2-1/.github/workflows/papi_framework_workflow.yml	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/.github/workflows/papi_framework_workflow.yml	2025-06-25 22:38:10.000000000 +0000
@@ -4,11 +4,9 @@ on:
   pull_request:
     # run CI if framework receives an update excluding individual 
     # components and counter analysis toolkit
-    paths:
-      - 'src/**'
-      - 'src/components/*'
-      - '!src/components/*/**'
-      - '!src/counter_analysis_toolkit/**'
+    paths-ignore:
+      - 'src/components/*/**'
+      - 'src/counter_analysis_toolkit/**'
   # allows you to run this workflow manually from the Actions tab
   workflow_dispatch:
 
@@ -19,7 +17,7 @@ jobs:
   papi_components_comprehensive:
     strategy:
       matrix:
-        components: [cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc infiniband net appio io lustre stealtime coretemp lmsensors mx sde]
+        components: [cuda nvml rocm rocm_smi powercap powercap_ppc rapl sensors_ppc net appio io lustre stealtime coretemp lmsensors mx sde]
         debug: [yes, no]
         shlib: [with, without]
       fail-fast: false
diff -pruN 7.2.0~b2-1/ChangeLogP720.txt 7.2.0-1/ChangeLogP720.txt
--- 7.2.0~b2-1/ChangeLogP720.txt	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/ChangeLogP720.txt	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,881 @@
+2025-06-25  Treece Burgess <tburgess@icl.utk.edu>
+
+	* doc/Doxyfile-common, papi.spec, src/Makefile.in, src/configure.in,
+	  src/papi.h: The version numbers for doc/Doxyfile-common, papi.spec,
+	  src/Makefile.in, src/configure.in, and src/papi.h have been
+	  updated.
+
+2025-06-13  Heike Jagode <jagode@icl.utk.edu>
+
+	* RELEASENOTES.txt: Prepared Release Notes for PAPI 7.2.0 release.
+
+2025-06-17  Treece Burgess <tburgess@gilgamesh.nic.uoregon.edu>
+
+	* src/components/rocm/tests/sample_overflow_monitoring.cpp: rocm:
+	  Skip the test sample_overflow_monitoring.cpp.
+
+2025-06-20  Anthony Danalis <adanalis@odyssey.nic.uoregon.edu>
+
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: Ensure env
+	  variables are always respected.
+	* src/components/rocp_sdk/rocp_sdk.c: ROCP_SDK: Improve the file/dir
+	  check to skip "." and "."
+	* src/components/rocp_sdk/rocp_sdk.c: ROCP_SDK: use path instead of
+	  hsa to test for devices.
+
+2025-06-16  Daniel Barry <dbarry@vols.utk.edu>
+
+	* .../rocm/tests/multi_thread_monitoring.cpp: rocm: fix segmentation
+	  fault in component test  On Frontier, the invocation of the exit()
+	  call before pthread_merge() causes a segmentation fault. I remedy
+	  this issue by only calling test_warn(), test_fail(), and
+	  hip_test_fail() after the threads have been merged.  These changes
+	  were tested using ROCm versions 6.1.3, 6.2.0, 6.2.4, 6.3.1, and
+	  6.4.0 with the AMD MI250X architecture on the Frontier
+	  supercomputer.
+
+2025-06-12  Gerald Ragghianti <ragghianti@icl.utk.edu>
+
+	* src/components/rocm/tests/Makefile,
+	  src/components/rocm_smi/tests/Makefile: rocm/rocm_smi: Allow users
+	  to optionally set HIPCC.
+
+2025-06-10  Treece Burgess <tburgess@gilgamesh.nic.uoregon.edu>
+
+	* src/components/cuda/linux-cuda.c, src/components/rocm/rocm.c,
+	  src/components/template/template.c: cuda/rocm components:
+	  Restructure update_native_events to not call realloc on a size of
+	  0.
+
+2025-06-11  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/configure, src/configure.in: configure: Add a warning message
+	  if rocm and rocp_sdk are configured together.
+
+2025-06-04  Treece Burgess <tburgess@odyssey.nic.uoregon.edu>
+
+	* src/components/rapl/linux-rapl.c: RAPL Component: Add support in
+	  RAPL for Intel Emerald Rapids. Note at this time the PAPI team does
+	  not have access to a machine with an Intel Emerald Rapids CPU to
+	  verify this addition.
+
+2025-06-12  Treece Burgess <tburgess@pinwheel>
+
+	* src/components/rocp_sdk/tests/Makefile: rocp_sdk: In the tests
+	  Makefile account for CPU agents on amd64.
+
+2025-06-12  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/components/intel_gpu/README.md: intel_gpu: update environment
+	  variable name  On a system containing the Intel Arc A770 device, I
+	  am met with the following warning:  ZET_ENABLE_API_TRACING_EXP is
+	  deprecated. Use ZE_ENABLE_TRACING_LAYER instead.  The current
+	  README states to set ZET_ENABLE_API_TRACING_EXP; however,
+	  ZE_ENABLE_TRACING_LAYER is the correct variable to set. Setting
+	  ZE_ENABLE_TRACING_LAYER prevents the above warning.
+
+2025-06-10  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/components/cuda/linux-cuda.c, src/papi.c, src/papi_internal.c,
+	  src/papi_preset.c, src/papi_vector.c, src/papi_vector.h: framework:
+	  force init per existing policy  PR #284 introduced code that always
+	  forced the initialization of all components. However, this defeats
+	  the purpose of having PAPI_EDELAY_INIT.  The changes in this pull
+	  request only force initialization of components when necessary.
+	  These changes have been tested on systems containing: - NVIDIA
+	  Hopper architecture - AMD Zen3 CPU and AMD MI250X GPU architectures
+	  (Frontier).
+
+2025-06-07  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/intel_gpu/Rules.intel_gpu: intel_gpu: Remove -DDEBUG
+	  from Rules.intel_gpu.
+
+2025-06-06  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/rocm/README.md: rocm: Update the component README.md
+	  to account for new limitations.
+
+2025-06-09  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/sysdetect/sysdetect.c: sysdetect: Add newline
+	  characters to the SUBDBG messages.
+
+2025-06-06  Anthony <adanalis@icl.utk.edu>
+
+	* src/components/rocm/rocm.c: ROCM: PAPI_strerror() cannot be used at
+	  shutdown.
+
+2025-06-05  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/papi.c: PAPI_list_events: Update functions documentation to
+	  match the function protoype.
+
+2025-06-04  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/rocp_sdk.c: ROCP_SDK: Handle case where all
+	  events are removed.
+
+2025-06-03  Treece Burgess <tburgess@odyssey.nic.uoregon.edu>
+
+	* src/components/rocp_sdk/rocp_sdk.c: rocp_sdk: Remove assignment of
+	  info->event_code and info->component_index in rocp_sdk as it is
+	  already done in papi_internal.c.
+
+2025-06-02  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/papi_events.csv: PAPI Presets: Update AMD Family 17h to account
+	  for PMCx080 and PMCx081 reporting incorrect IC accesses and misses
+	  respectively. PMCx060 unit mask 0x10 replaces PMCx081, but there is
+	  no suitable replacement for PMCx080 therefore those instances are
+	  removed.
+
+2025-05-29  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/coretemp/linux-coretemp.c,
+	  src/components/cuda/cupti_profiler.c,
+	  src/components/cuda/cupti_utils.c, src/components/cuda/htable.h,
+	  src/components/cuda/linux-cuda.c,
+	  src/components/cuda/papi_cupti_common.c,
+	  src/components/infiniband/linux-infiniband.c,
+	  src/components/net/linux-net.c, src/components/rocm/htable.h,
+	  src/components/rocm_smi/htable.h, src/components/rocm_smi/rocs.c:
+	  Various Components: Use only PAPI memory allocation or C memory
+	  allocation to avoid possible segmentation faults.
+
+2025-06-02  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/rocm_smi/linux-rocm-smi.c,
+	  src/components/rocp_sdk/rocp_sdk.c,
+	  src/components/template/template.c: rocm_smi/rocp_sdk: Restructure
+	  init_private functions to avoid setting initialized equal to 1 even
+	  when initialization fails.
+
+2025-05-29  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/infiniband/linux-infiniband.c,
+	  src/components/nvml/linux-nvml.c,
+	  src/components/sysdetect/sysdetect.c,
+	  src/components/topdown/topdown.c, src/components/topdown/topdown.h:
+	  Sysdetect/Topdown/Infiniband/NVML Components: Properly set .size in
+	  a components vector to avoid possible Error! PAPI_library_init.
+
+2025-05-27  Treece Burgess <tburgess@picard.nic.uoregon.edu>
+
+	* src/components/lmsensors/tests/lmsensors_read.c: lmsensors
+	  component: Remove restriction on the events chosen to be added to
+	  an eventset for the test lmsensors_read.c.
+
+Thu Sep 19 23:41:22 2024 -0700  Stephane Eranian <eranian@gmail.com>
+
+	* src/libpfm4/docs/man3/libpfm_intel_knl.3,
+	  src/libpfm4/docs/man3/libpfm_intel_knm.3,
+	  src/libpfm4/lib/events/amd64_events_fam1ah_zen5.h,
+	  src/libpfm4/lib/pfmlib_arm.c, src/libpfm4/lib/pfmlib_arm_armv6.c,
+	  src/libpfm4/lib/pfmlib_arm_armv7_pmuv1.c,
+	  src/libpfm4/lib/pfmlib_arm_armv8.c,
+	  src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c,
+	  src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c,
+	  src/libpfm4/lib/pfmlib_arm_armv9.c,
+	  src/libpfm4/lib/pfmlib_arm_perf_event.c,
+	  src/libpfm4/lib/pfmlib_arm_priv.h, src/libpfm4/lib/pfmlib_common.c,
+	  src/libpfm4/lib/pfmlib_intel_x86_perf_event.c,
+	  src/libpfm4/lib/pfmlib_perf_event.c,
+	  src/libpfm4/lib/pfmlib_perf_event_pmu.c,
+	  src/libpfm4/lib/pfmlib_perf_event_priv.h,
+	  src/libpfm4/lib/pfmlib_priv.h: Update libpfm4 Current with commit
+	  0727e5f5561101d8c635a36e139dd7512616d49e  add another perf_name for
+	  ARM Cortex-A57  PAPI developers with NVIDIA Jetson board and ARM
+	  Cortex-A57 reported that the Linux PMU type is "armv8_pmuv3" Add
+	  that name as a possible name to the list of perf_name for Cortex
+	  A57.   commit 75d2e605f763f3220793c3bb52a6b6effffe4d9c  fix AMD
+	  Zen5 umasks for L2_PREFETCH_MISS_L3 and L2_FILL_RESPONSE_SRC  The
+	  umasks tables were swapped between the two events. Simplify umasks
+	  names for L2_FILL_RESPONSE_SRC   commit
+	  c5587f9931123be6fcb6f8133497d93cab36bdcd  Hotfix ARM CPU detection
+	  due to arch mismatch  This is a hotfix to avoid failure of ARM CPU
+	  detection with the new detection code introduce by commit
+	  15c4cd9f1f4a ("Add ARM hybrid detection").  For some processors,
+	  the architecture revision expected by libpfm4 does not match the
+	  revision exported by the Linux kernel via cpuinfo. For instance,
+	  the Neoverse V2 is a V9 processor, yet cpuinfo reports arch: 8. A
+	  few other ARM processors may exhibit the same error.  The hotfix
+	  simply skips checking the arch revision for now.   commit
+	  b2888ea7995d781d1c59d9c8714487b863774912  Cope with empty
+	  /proc/cpuinfo file  When running inside e.g. lxc containers,
+	  /proc/cpuinfo may be empty, in which case pfmlib_getl() never
+	  allocates a buffer, and the trailing b[i] = '\0' thus becomes
+	  bogus.   commit f09c366b45fba75f1143cb14ec8f22ad96c4c1b1 Merge:
+	  e887d24 8ca3087  Merge /u/mousezhang/perfmon2/ branch master into
+	  master  https://sourceforge.net/p/perfmon2/libpfm4/merge-
+	  requests/32/  commit e887d24a6c4b97b8087e5a284c79f63adaab4fc0  Add
+	  sysfs PMU caching on initialization  In order to accommodate the
+	  growing number of PMUs active and to handle hybrid processors
+	  better, this patch adds sysfs PMU perf_events information caching
+	  to avoid going back to sysfs for each encoded event. The caching
+	  stores the name of PMU, e.g., armv8_pmu3, and the perf_events type
+	  which is then use to build the perf_events encoding.   commit
+	  ff3291fe3f6d2c280ed2e33c42842e5dc08f38df  Remove references to
+	  /sys/devices to remain compatible with upstream  The PMUs will not
+	  appear in /sys/devices for much longer. The proper way to access
+	  PMU directories is via: /sys/bus/event_source/devices/  Where each
+	  PMU has a symlink.  It should be noted that this alternate
+	  directory is not new. It has been there all along. Therefore it is
+	  okay to remove all references to /sys/devices.   commit
+	  a41f8eeedf2c81232e5fa9129928edf9215bf3fc  Add ARM hybrid encoding
+	  support for perf_events  This patch adds the new logic to handle
+	  encoding of the PMU type for the Linux perf_events interface.
+	  Hybrids are a challenge in that it is not possible to simply use
+	  PERF_TYPE_RAW because that does not disambiguate which of the core
+	  PMU models to attach the events to. Instead, the PMU type must be
+	  collected from the Linux sysfs interface. But for that to happen
+	  the library needs to know the PMU instance name assigned by
+	  perf_events for each PMU model detected. On ARM, this is not
+	  straightforward.  The patch extends the meaning the the
+	  pmu->perf_name string to include a comma separated list of names
+	  instead of just one. The library then tries each name until there
+	  is a match in /sys/bus/event_source/devices/. This accommodates
+	  situations where the same PMU model is used in a homogeneous vs.
+	  hybrid config.   commit 15c4cd9f1f4a382ef6753a05a5d4d6c27bd449c5
+	  Add ARM hybrid detection  This patch rewrites the ARM core PMU
+	  detection logic to handle the case of hybrid processors. On ARM,
+	  there can be many different cores in the same SoC. Each potentially
+	  shows up with a different implementer, part, variant. That means
+	  just looking at the first entry in cpuinfo on Linux is not enough
+	  to activate all supported event tables.  The new code parses the
+	  entire cpuinfo once and detects each unique core identifiers. Then,
+	  for each core PMU table, the detection code checks against that
+	  pre-built list of detected core models. That way up to N (currently
+	  8) different core models can be detected.  This new detection code
+	  is provided for Linux. For other operating systems, new code must
+	  be added to get the implementer, part, variant codes for all cores
+	  in the system.  Thanks to Vince Weaver for providing the test cases
+	  to exercise this new code.   Testing: AMD Zen5 Update (Tested on a
+	  AMD Ryzen 9 9950X 16-Core Processor): - papi_avail - runs
+	  successfully and matches master branch - papi_component_avail -
+	  runs successfully and matches master branch - papi_native_avail -
+	  runs successfully and matches master branch - papi_command_line -
+	  runs successfully  I verified that with papi_native_avail we see
+	  the swapped umasks for L2_PREFETCH_MISS_L3 and
+	  L2_FILL_RESPONSE_SRC. Using the swapped umasks with
+	  papi_command_line work as expected.  ARM Updates (Tested on ARM
+	  Cortex A57, ARM Cortex A72, and ARM Neoverse V2): - papi_avail -
+	  runs successfully on all three models and matches master branch -
+	  papi_component_avail - runs successfully on all three models and
+	  matches master branch - papi_native_avail - runs successfully on
+	  all three models and matches master branch - papi_command_line -
+	  runs successfully on all three models  Note that for the ARM
+	  updates, this includes Vince's patch to resolve Issue #364.
+
+2025-05-23  Treece Burgess <tburgess@gilgamesh.nic.uoregon.edu>
+
+	* src/components/lmsensors/linux-lmsensors.c: lmsensors component:
+	  Replace fprintf with SUBDBG.
+
+2025-05-19  Treece Burgess <tburgess@gilgamesh.nic.uoregon.edu>
+
+	* src/components/cuda/linux-cuda.c: Cuda component: Initialize count
+	  variable in function cuda_init_private.
+
+2025-05-23  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: More verbose debug
+	  messages.
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: Do not overwrite
+	  library in PAPI_ROCP_SDK_LIB.
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: Cleanup dlopen()
+	  error handling.
+
+2025-05-21  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/papi_preset.c: framework: proper memory management functions
+	  This makes the usage of memory allocation and freeing functions
+	  consistent to prevent segmentation faults when using preset events.
+	  These changes were tested on the ARM Neoverse-V2 and NVIDIA Hopper
+	  architectures.
+
+2025-05-20  Anthony <adanalis@icl.utk.edu>
+
+	* src/components/rocm_smi/tests/Makefile: ROCM_SMI: Added -pthread
+	  flag in tests/Makefile.
+
+2025-05-20  Treece Burgess <tburgess@tellico-master0.local>
+
+	* src/utils/print_header.c: utils/print_header.c: Move for loop
+	  counter declaration out of for loop header.
+
+2025-05-18  G-Ragghianti <ragghianti@icl.utk.edu>
+
+	* src/components/rocp_sdk/rocp_sdk.c: Adding multiple search path
+	  functionality for libhsa
+
+2025-05-16  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/README: Remove perfctr and perfctr_ppc documentation
+	  from src/components README.
+
+2025-05-13  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/utils/papi_avail.c: utils: fix compiler warnings for
+	  papi_avail.c  Revert the structure of printf() statements to those
+	  prior to commit c214d8ca879ba5195d7cae1d8808e807ea2f812c, which
+	  inappropriately modified certain fields. This resulted in the
+	  following compiler warnings from GCC 13.3.0 (architecture: AMD
+	  Ryzen 9 9950X 16-Core CPU and NVIDIA GeForce RTX 5080 GPU):
+	  papi_avail.c: In function ‘main’: papi_avail.c:573:17: warning: too
+	  many arguments for format [-Wformat-extra-args] 573 |
+	  printf( "%-*s%-11s%-8s%-16s\n |Long Description|\n", maxSymLen, |
+	  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ papi_avail.c:687:21:
+	  warning: too many arguments for format [-Wformat-extra-args] 687 |
+	  printf( "%-*s%-11s%-8s%-16s\n |Long Description|\n", maxCompSymLen,
+	  |                     ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+	  These changes have been tested on systems containing the NVIDIA
+	  Hopper and Blackwell architectures.
+	* src/utils/papi_avail.c: utils: convert tabs in papi_avail.c to
+	  spaces  These changes have been tested on systems containing the
+	  NVIDIA Hopper and Blackwell architectures.
+
+2025-05-14  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/papi_memory.c, src/papi_memory.h: HEADERS: __FILE__ is "const
+	  char *", not "char *"
+	* src/components/rocp_sdk/sdk_class.hpp: ROCP_SDK: protect the
+	  included papi headers from C++
+
+2025-05-13  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/cuda/tests/HelloWorld.cu,
+	  src/components/cuda/tests/HelloWorld_noCuCtx.cu,
+	  src/components/cuda/tests/concurrent_profiling.cu,
+	  .../cuda/tests/concurrent_profiling_noCuCtx.cu,
+	  src/components/cuda/tests/cudaOpenMP.cu,
+	  src/components/cuda/tests/cudaOpenMP_noCuCtx.cu,
+	  src/components/cuda/tests/pthreads.cu,
+	  src/components/cuda/tests/pthreads_noCuCtx.cu,
+	  src/components/cuda/tests/runtest.sh,
+	  src/components/cuda/tests/simpleMultiGPU.cu,
+	  .../cuda/tests/simpleMultiGPU_noCuCtx.cu,
+	  .../cuda/tests/test_2thr_1gpu_not_allowed.cu,
+	  .../cuda/tests/test_multi_read_and_reset.cu: Cuda component: Update
+	  tests to more gracefully handle multiple pass events.
+
+2025-05-13  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/README.md: ROCP_SDK: Update README with
+	  linking limitations.
+
+2025-05-12  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/appio/appio.c: Appio Component: Add a component
+	  description, as it is missing from papi_component_avail
+	* src/components/rocm/roc_profiler.c: ROCm component: Bug fix for
+	  typo in rocm_verify_no_repeated_qualifiers.
+
+2025-05-10  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/configure, src/configure.in: Update configure.in to have a
+	  default value for --with-debug if not provided by a user.
+
+2025-05-06  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/configure, src/configure.in: Configure: Correctly output the
+	  tests chosen by a user with --with-tests.
+
+2025-05-08  Treece Burgess <tburgess@hopper1.nic.uoregon.edu>
+
+	* src/components/cuda/cupti_dispatch.c: Cuda component: Properly set
+	  return value in cuptid_init.
+
+2024-11-07  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/utils/papi_avail.c: utils: papi_avail extension for component
+	  presets  Enumerate presets for components as well as the CPU.
+	  These changes have been tested on the NVIDIA Grace-Hopper
+	  architecture.
+	* src/utils/papi_avail.c: utils: new modifiers for strictly CPU
+	  presets  Replace modifiers with only those that enumerate the CPU
+	  preset events.  These changes have been tested on the NVIDIA Grace-
+	  Hopper architecture.
+
+2024-10-31  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/utils/papi_avail.c: utils: convert tabs to spaces in
+	  papi_avail.c  This is an aesthetic change to improve the
+	  development process.  These changes have been tested on the NVIDIA
+	  Grace-Hopper architecture.
+	* src/papi.c, src/papi.h, src/papi_common_strings.h,
+	  src/papi_internal.c, src/papi_internal.h, src/papi_preset.c,
+	  src/papi_preset.h: framework: support for component presets
+	  Updates to framework to facilitate preset events defined by native
+	  events of non-perf_event components.  These changes have been
+	  tested on the NVIDIA Hopper architecture.
+	* src/Makefile.inc, src/configure, src/configure.in,
+	  src/papiStdEventDefs.h: config: updates for component presets
+	  Update configure to track both the number of presets per component
+	  and the arrays of presets belonging to each component.  These
+	  changes have been tested on the NVIDIA Hopper architecture.
+
+2024-10-28  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/papi_events.csv: presets: support for NVIDIA Hopper and Ampere
+	* src/components/cuda/cupti_dispatch.c,
+	  src/components/cuda/cupti_dispatch.h,
+	  src/components/cuda/cupti_profiler.c, src/components/cuda/linux-
+	  cuda.c, src/components/cuda/papi_cuda_presets.h,
+	  src/components/cuda/papi_cuda_std_event_defs.h,
+	  src/components/cuda/papi_cupti_common.c,
+	  src/components/cuda/papi_cupti_common.h: cuda: updates for presets
+	  Add functions to facilitate CUDA presets.  These changes have been
+	  tested on the NVIDIA Hopper architecture.
+
+2024-10-31  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/papi_vector.c, src/papi_vector.h: framework: fields for
+	  component presets  Create fields in the vector struct for
+	  components to define presets.  These changes have been tested on
+	  the NVIDIA Hopper architecture.
+
+2024-12-20  Dandan Zhang <zhangdandan@loongson.cn>
+
+	* src/linux-context.h, src/linux-timer.c, src/mb.h: Add loongarch64
+	  support
+
+2025-05-06  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/cuda/cupti_profiler.c,
+	  src/components/rocm/roc_profiler.c: ROCm component: Add stricter
+	  qualifiers checks.
+	* src/components/cuda/cupti_profiler.c: Cuda component: Add stricter
+	  qualifiers checks.
+
+2025-05-05  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/coretemp/linux-coretemp.c: Coretemp: Enable support
+	  for multiplexing.
+
+2025-05-06  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: Improved handling
+	  of pathological paths.
+
+2025-05-03  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/cuda/cupti_profiler.c: Cuda component: Replace int
+	  typing with long long to avoid overflow with measured values.
+
+2025-05-06  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/sdk_class.cpp,
+	  src/components/rocp_sdk/sdk_class.hpp: ROCP_SDK: Force fail if
+	  PAPI_ROCP_SDK_LIB is bogus.
+
+2025-05-05  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/rocp_sdk.c: ROCP_SDK: Suppress ROCprofiler-
+	  SDK warnings.
+	* src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: Enable default
+	  dlopen() paths, and cleaner error handling.
+	* src/components/rocp_sdk/rocp_sdk.c: ROCP_SDK: Move dlclose() to
+	  component finalization.  This avoid a conflict between the two
+	  components rocp_sdk and rocm, if both components are configured in.
+
+2025-04-29  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/rocp_sdk.c,
+	  src/components/rocp_sdk/sdk_class.cpp: ROCP_SDK: call
+	  configure_device_counting_service as early as possible.  When
+	  applications are linked against libpapi.a rocprofiler_configure()
+	  is not called on-load, so we have to explicitly initialize
+	  everything. This PR moves some of the necessary steps earlier, so
+	  that everything is initialized after PAPI_library_init().
+
+2025-04-24  Treece Burgess <tburgess@icl.utk.edu>
+
+	* .../cuda/tests/test_multipass_event_fail.cu: Cuda component: Update
+	  the error checks in the test test_multipass_event_fail to PASS even
+	  when events that do not require multiple passes are provided.
+
+2025-04-30  Treece Burgess <tburgess@voltar.nic.uoregon.edu>
+
+	* src/components/cuda/cupti_config.h,
+	  src/components/cuda/cupti_dispatch.c,
+	  src/components/cuda/cupti_dispatch.h,
+	  src/components/cuda/cupti_events.c,
+	  src/components/cuda/cupti_profiler.c, src/components/cuda/linux-
+	  cuda.c, src/components/cuda/papi_cupti_common.c,
+	  src/components/cuda/papi_cupti_common.h, src/papi.h,
+	  src/papi_internal.c, src/utils/papi_component_avail.c: Cuda
+	  component: Add functionality for a partially disabled Cuda
+	  component for CCs >= 7.0 (Perfworks API).
+
+2025-04-28  Dong Jun Woun <dwoun@histamine0.cluster>
+
+	* src/components/rocm_smi/rocs.c: rocm_smi: Add proper fan_speed
+	  access, control, and return
+
+2025-04-29  Treece Burgess <tburgess@athena.nic.uoregon.edu>
+
+	* src/components/cuda/cupti_profiler.c,
+	  src/components/cuda/papi_cupti_common.c,
+	  src/components/cuda/papi_cupti_common.h,
+	  src/components/nvml/Rules.nvml, src/components/nvml/linux-nvml.c:
+	  Cuda/NVML Components: Check for variation of shared objects e.g.
+	  libcudart.so, libcudart.so.1 or libcudart (catch all).
+
+2025-04-28  Dong Jun Woun <dwoun@histamine0.cluster>
+
+	* .../rocm_smi/tests/rocm_smi_writeTests.cpp: rocm_smi: Update
+	  read/write test
+
+2025-04-29  Treece-Burgess <burgesstreece@gmail.com>
+
+	* src/components/perf_event/perf_event.c: perf_event: Disable
+	  component if perf_event_paranoid is set to 4 in
+	  /proc/sys/kernel/perf_event_paranoid.
+	* src/components/cuda/Rules.cuda,
+	  src/components/cuda/cupti_profiler.c,
+	  src/components/cuda/cupti_utils.h,
+	  src/components/cuda/lcuda_debug.h, src/components/cuda/linux-
+	  cuda.c, src/components/cuda/papi_cupti_common.c,
+	  src/components/cuda/papi_cupti_common.h,
+	  src/components/cuda/tests/concurrent_profiling.cu,
+	  .../cuda/tests/concurrent_profiling_noCuCtx.cu: Cuda component:
+	  Refactor to support the MetricsEvaluator API (Cuda Versions 11.3
+	  and greater).
+
+2025-04-23  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/Rules.rocp_sdk,
+	  src/components/rocp_sdk/rocp_sdk.c,
+	  src/components/rocp_sdk/tests/Makefile,
+	  src/components/rocp_sdk/tests/advanced.c,
+	  src/components/rocp_sdk/tests/kernel.cpp,
+	  src/components/rocp_sdk/tests/simple.c,
+	  src/components/rocp_sdk/tests/simple_sampling.c,
+	  src/components/rocp_sdk/tests/two_eventsets.c: ROCP_SDK: Accomodate
+	  machines with fewer AMD GPUs.
+
+2025-02-26  Yoshihiro Furudera <fj5100bi@fujitsu.com>
+
+	* src/papi_events.csv: Remove some preset events for FUJITSU-MONAKA
+	  The following preset events of FUJITSU-MONAKA are not counted
+	  properly:  PAPI_L3_DCM PAPI_L3_TCM PAPI_PRF_DM PAPI_L3_DCH
+	  PAPI_L3_TCH  Specifically, the native event that is the source of
+	  the above preset event is counted inaccurately. So I remove these
+	  events in papi_events.csv.
+
+2024-09-19  Akio Kakuno <fj3333bs@aa.jp.fujitsu.com>
+
+	* src/components/sysdetect/arm_cpu_utils.c, src/papi_events.csv:
+	  papi_events.csv: Add preset events support for FUJITSU-MONAKA  This
+	  commit adds preset events support for FUJITSU-MONAKA. Also update
+	  arm_cpu_util.c to show the processor name in papi_hardware_avail
+	  command.
+
+2025-04-20  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/README.md,
+	  src/components/topdown/Rules.topdown,
+	  src/components/topdown/topdown.c, src/components/topdown/topdown.h:
+	  topdown: Use librseq to protect rdpmc on het cpus  On Intel's
+	  heterogeneous multicore processors such as Raptor Lake, the
+	  PERF_METRICS MSR is only available on the performance cores
+	  (p-cores). If the rdpmc instruction is executed attempting to
+	  access the MSR while the process is on an efficient core (e-core),
+	  a segmentation fault occurs.  Previously, the topdown component has
+	  used a simple check before every execution of the rdpmc instruction
+	  to ensure the core the program is bound to is a p-core. However,
+	  this can fail if the program is moved to another core between the
+	  check and the execution of rdpmc. While rare, a worst-case scenario
+	  test that repeatedly moves a program which is using the topdown
+	  component from p-core to e-core at a random time saw 338
+	  segmentation faults out of 1 million affinity switches (a 0.0338%
+	  error rate). This is a non-zero number of segmentation faults, and
+	  we can do better.  Use librseq to protect the rdpmc instruction
+	  with a restartable sequence (rseq). When the process is preempted
+	  by an affinity change, the sequence immediately aborts and can be
+	  restarted. By keeping the check that the process is on a p-core and
+	  the rdpmc instruction itself within the critical section of the
+	  rseq, it is guaranteed that the rdpmc instruction will never be
+	  executed on an invalid core. The same test described in the
+	  previous paragraph sees 0 segmentation faults.
+
+2025-04-22  Dong Jun Woun <dwoun@picard.nic.uoregon.edu>
+
+	* src/components/cuda/cupti_profiler.c: cuda: Adding stat|device case
+	  to code_to_info
+
+2025-04-22  Anthony <adanalis@icl.utk.edu>
+
+	* papi.spec: .SPEC: Logic for setting rocm_smi env. variables.
+
+2025-04-18  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/components/cuda/cupti_profiler.c,
+	  src/components/rocm/roc_profiler.c, src/components/rocm_smi/rocs.c,
+	  src/components/template/vendor_profiler_v1.c: components: improper
+	  usage of PAPI_END macro  PAPI_END is a macro defined in
+	  papiStdEventDefs.h to denote the end of the list of preset macros.
+	  However, it was being used as an error code in various components
+	  in cases unrelated to the number of presets.  This commit changes
+	  this to a more appropriate error code: PAPI_ENOEVNT.  These changes
+	  have been tested with ROCm 6.3.1 on Frontier.
+
+2024-08-21  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/components/rocm/roc_common.c, src/components/rocm/rocm.c: rocm:
+	  add reason for disabled component  Previously, in the absence of a
+	  ROCm device, the rocm component did not set the string containing
+	  the reason that the component was disabled.  These changes have
+	  been tested with ROCm 6.3.1 on Frontier and with ROCm 6.3.2 on a
+	  system with no ROCm devices.
+
+2025-04-18  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/components/rocm_smi/tests/Makefile: rocm_smi: updates to
+	  Makefile  The rocm_smi component tests were not getting compiled
+	  during the build process. These updates point to the proper
+	  location of 'hipcc' and automatically builds the component tests.
+	  The 'square' test was removed due to the source file missing.
+	  These changes have been tested with ROCm 6.3.1 on Frontier.
+
+2025-04-17  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/cuda/cupti_profiler.c: For the stats qualifier check
+	  for excess characters
+
+2025-01-08  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/cuda/cupti_profiler.c,
+	  src/components/rocm/roc_profiler.c: Add a check to parse event
+	  qualifiers to make sure no excess characters are appended.
+
+2024-12-13  Treece Burgess <tburgess@icl.utk.edu>
+
+	* src/components/rapl/linux-rapl.c: Add support for Intel Comet Lake
+	  S/H in RAPL component.
+
+2025-04-16  voidbert <humbertogilgomes@protonmail.com>
+
+	* src/components/perf_event_uncore/perf_event_uncore.c:
+	  perf_event_uncore: fix compilation when CAP_PERFMON is missing
+
+2024-11-04  Daniel Barry <dbarry@vols.utk.edu>
+
+	* src/counter_analysis_toolkit/Makefile,
+	  src/counter_analysis_toolkit/cat_arch.h,
+	  src/counter_analysis_toolkit/vec.c,
+	  src/counter_analysis_toolkit/vec_fma_dp.c,
+	  src/counter_analysis_toolkit/vec_fma_hp.c,
+	  src/counter_analysis_toolkit/vec_fma_sp.c,
+	  src/counter_analysis_toolkit/vec_nonfma_dp.c,
+	  src/counter_analysis_toolkit/vec_nonfma_hp.c,
+	  src/counter_analysis_toolkit/vec_nonfma_sp.c,
+	  src/counter_analysis_toolkit/vec_scalar_verify.c,
+	  src/counter_analysis_toolkit/vec_scalar_verify.h: cat: updates in
+	  vector-FLOPs benchmarks  Include kernels that perform scalar
+	  floating-point operations.  These changes have been tested on the
+	  Intel Sapphire Rapids and IBM POWER10 architectures.
+
+2025-01-22  William Cohen <wcohen@redhat.com>
+
+	* src/high-level/papi_hl.c, src/papi_vector.c: Eliminate conflicting
+	  type errors generated by GCC15  Recent PAPI compiles on Fedora
+	  rawhide (F42) fail because of "conflicting types" errors produced
+	  by GCC15.  Proper arguments types have been added to
+	  _internal_hl_read_user_events function declaration in papi_hl.c and
+	  the typecasting in papi_vector.c.
+
+2024-11-09  Dong Jun Woun <dwoun@hopper1.nic.uoregon.edu>
+
+	* src/components/cuda/README_internal.md,
+	  src/components/cuda/cupti_dispatch.c,
+	  src/components/cuda/cupti_dispatch.h,
+	  src/components/cuda/cupti_events.c,
+	  src/components/cuda/cupti_events.h,
+	  src/components/cuda/cupti_profiler.c,
+	  src/components/cuda/cupti_profiler.h,
+	  src/components/cuda/cupti_utils.c,
+	  src/components/cuda/cupti_utils.h, src/components/cuda/linux-
+	  cuda.c, src/components/cuda/tests/runtest.sh: Cuda: Statistic
+	  Qualifier
+
+2024-01-18  Evans, Richard Todd <richard1.evans@intel.com>
+
+	* src/components/rapl/linux-rapl.c: added Sapphire Rapids (Model 143)
+	  support to RAPL component
+
+2024-09-25  Willow Cunningham <willow.e.cunningham@maine.edu>
+
+	* src/papi_events.csv: papi_events.csv: Added preset events for the
+	  Arm Cortex A72 processor.  Because the A72 has the same events as
+	  the A57, this addition is a one-liner. This work is based on a
+	  patch by Stack Exchange user Bambo Wu, published in May 2021:
+	  https://raspberrypi.stackexchange.com/a/112396
+
+2025-02-21  Willow Cunningham <willow.e.cunningham@maine.edu>
+
+	* src/papi_events.csv: papi_events.csv: Second pass at arm cortex-a76
+	  events  The previous commit adding preset events for the arm cortex
+	  a76 lacked important preset events such as L3 cache misses. Add
+	  missing events based on arm documentation and validation tests.
+	  All tests passing or warning on Raspberry Pi 5.
+
+2024-10-11  Willow Cunningham <willow.e.cunningham@maine.edu>
+
+	* src/validation_tests/Makefile.recipies,
+	  src/validation_tests/load_store_testcode.c,
+	  src/validation_tests/papi_ld_ins.c,
+	  src/validation_tests/papi_sr_ins.c,
+	  src/validation_tests/testcode.h: validation_tests: Add load/store
+	  ARM assembly testcode  The previous load/store validation tests
+	  were being optimized by the compiler in a way that caused the tests
+	  to mispredict the amount of memory instructions that are generated.
+	  This made it appear like the counters were incorrect, when it was
+	  really the test being inaccurate.  To fix this, add assembly
+	  testcode for ARM to eliminate the problem of compiler
+	  optimizations. When load/store testcode is unavailable for the
+	  current platform, default back to the original matrix
+	  multiplication test.
+	* src/papi_events.csv: papi_events: Add preset events for the Arm
+	  Cortex-A76
+
+2025-01-17  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/topdown.c: topdown: simplified metrics
+	  calculation  Previously, the topdown component calculated metrics
+	  by taking the difference of the metrics before and the metrics
+	  after the calculated code block using Equation 1:  M% = (Mb*Sb/255
+	  - Ma*Sa/255) / (Sb - Sa) * 100                   (1)  where Mx are
+	  the raw bytes of the metric before and after the calipered code
+	  block and Sx are the slots. However, if Sa = 0 this simplifies to
+	  M% = Mb/255 * 100
+	  (2)  Therefore it is sufficient to simply reset the PERF_METRICS
+	  MSR and SLOTS during PAPI_start() and then use Equation 2 in
+	  PAPI_stop, reducing the number of dangerous rdpmc calls, reducing
+	  overhead, and simplifying the code.
+
+2025-01-07  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/topdown.c: topdown: relocated core type
+	  checks  To prevent programs using the topdown component on
+	  heterogeneous processors that only supply the PERF_METRICS MSR on
+	  some of their cores from segfaulting due to trying to read the MSR
+	  after being moved to an unsupported core type, the topdown
+	  component periodically checks it is on a supported core and exits
+	  if not.  Previously, this check occured at the start of PAPI_start
+	  and PAPI_stop. After writing a script that starts a program being
+	  calipered with the topdown component and moves it to an unsupported
+	  core after a random amount of time, for N=100,000 tests the
+	  heterogeneous checks failed to prevent a segmentation fault 0.08%
+	  of the time. This patch moves the heterogeneous checks to occur
+	  only directly before the rdpmc calls, resulting in cleaner code and
+	  a reduced segfault prevention failure rate of 0.064%.  While it is
+	  frustrating that the failure rate is non-zero, since there appears
+	  to be no way to tell a process to ignore changes to its affinity, I
+	  believe there to be no perfect solution at this time.
+
+2024-12-17  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/topdown.c: topdown: stop including
+	  x86intrin.h  Previously, the x86intrin.h header file had been
+	  included in order to provide definitions for _rdpmc(). However,
+	  this has caused the github actions testing compilation of the
+	  component on ARM systems to fail. Therefore, remove the include and
+	  add a manual definition for _rdpmc() taken from the perf_event
+	  component.
+
+2024-12-11  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/README.md, src/components/topdown/topdown.c:
+	  topdown: Prevent segfault on heterogeneous CPUs  All of Intel's
+	  heterogeneous CPUs that support the PERF_METRICS MSR only support
+	  it for their performance (p-core) cores. This means that if a
+	  program that is being measured using the topdown component in PAPI
+	  happens to be rescheduled to a e-core during its runtime, PAPI will
+	  segfault.  To fix this, add a check in _topdown_start() and
+	  _topdown_stop() to exit gracefully if the core affinity of the
+	  process has changed to an unsupported core type.
+
+2024-12-04  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/topdown.c: topdown: add arch support based
+	  on perfmon-intel  While the offical Software Developer Manual only
+	  lists the availability of the PERF_METRICS MSR for three
+	  architectures, we can use the 'perfmon' repository maintained by
+	  Intel to discover what architectures support the MSR (repo here:
+	  https://github.com/intel/perfmon).  Architectures that the
+	  repository demonstrates support the events
+	  'PERF_METRICS.BACKEND_BOUND', 'PERF_METRICS.FRONTEND_BOUND', etc.
+	  must support the topdown level 1 metrics of the PERF_METRICS MSR.
+	  Similarly, the presence of the events 'PERF_METRICS.FETCH_LATENCY',
+	  'PERF_METRICS.MEMORY_BOUND', etc. demonstrates support for topdown
+	  L2 metrics in the PERF_METRICS MSR. By cross-referencing the
+	  architecture names in the perfmon repository with their
+	  DisplayFamily/DisplayModel values in Table 2-1 of volume 4 of the
+	  IA32 SDM, we can add support for the following architectures:  -
+	  Rocket Lake - Ice Lake (icl & icx) - Tiger Lake - Sapphire Rapids -
+	  Meteor Lake (redwood cove p-core only) - Alder Lake (golden cove
+	  p-core only) - Granite Rapids - Everald Rapids  None of these
+	  additional architectures have been tested with the topdown
+	  component yet. While Arrow Lake is shown to support L1 & L2 metrics
+	  in the prefmon repository, its FamilyModel is not yet available in
+	  the IA32 SDM so it has not been added.
+
+2024-11-11  Willow Cunningham <willow.e.cunningham@gmail.com>
+
+	* src/components/topdown/README.md,
+	  src/components/topdown/Rules.topdown,
+	  src/components/topdown/tests/Makefile,
+	  src/components/topdown/tests/topdown_L1.c,
+	  src/components/topdown/tests/topdown_L2.c,
+	  src/components/topdown/tests/topdown_basic.c,
+	  src/components/topdown/topdown.c, src/components/topdown/topdown.h:
+	  topdown: Created a component for interfacing with Intel's
+	  PERF_METRICS MSR  Add a component that collects Intel's topdown
+	  metrics from the PERF_METRICS MSR and automatically converts the
+	  raw metric values to user-consumable percentages.  The intent of
+	  this component is to provide an intuitive interface for accessing
+	  topdown metrics on the supported processors.  Tested on a
+	  RaptorLake-S/HX machine (family/model/stepping 0x6/0xb7/0x1). To
+	  add other supported architectures the switch statment in
+	  _topdown_init_component() should be populated for the
+	  architecture's model number, whether it supports level 2 topdown
+	  metrics, and in the case of a heterogeneous processor what core
+	  type it must be run on.
+
+2024-06-28  voidbert <50591320+voidbert@users.noreply.github.com>
+
+	* .../perf_event_uncore/perf_event_uncore.c: perf_event_uncore:
+	  consider capabilities for permissions
+
+2025-02-27  Dong Jun Woun <dwoun@odyssey.nic.uoregon.edu>
+
+	* src/components/rocm_smi/README.md: rocm_smi: Update readme to note
+	  two cases of root path
+
+2025-03-27  G-Ragghianti <ragghianti@icl.utk.edu>
+
+	* src/configure, src/configure.in: Include the comp_tests in the list
+	  of tests that are enabled by the '--with-tests' configure option
+
+2025-03-20  Treece Burgess <tburgess@icl.utk.edu>
+
+	* .github/workflows/papi_framework_workflow.yml: Using paths-ignore
+	  instead of paths for framework workflow
+
+2025-03-18  Treece Burgess <tburgess@icl.utk.edu>
+
+	* .github/workflows/ci_papi_framework.sh,
+	  .github/workflows/papi_framework_workflow.yml: Remove infiniband
+	  from the papi_components_comprehensive CI test
+
+2025-03-20  Anthony Danalis <adanalis@icl.utk.edu>
+
+	* src/components/rocp_sdk/tests/Makefile: ROCP_SDK: Change
+	  tests/Makefile for spack builds.
+
+2025-03-05  G-Ragghianti <ragghianti@icl.utk.edu>
+
+	* src/components/rocm_smi/Rules.rocm_smi: Adding location of rocm_smi
+	  header files for newer versions of rocm
diff -pruN 7.2.0~b2-1/RELEASENOTES.txt 7.2.0-1/RELEASENOTES.txt
--- 7.2.0~b2-1/RELEASENOTES.txt	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/RELEASENOTES.txt	2025-06-25 22:38:10.000000000 +0000
@@ -4,6 +4,73 @@ order.
 For details on installing PAPI on your machine, consult the INSTALL.txt file
 in this directory.
 
+
+===============================================================================
+PAPI 7.2.0 RELEASE NOTES                                               Jun 2025
+===============================================================================
+
+PAPI 7.2.0 is now available as the next major release. This release officially
+introduces two new components:
+  +++ rocp_sdk: Supports AMD GPUs and APUs via the ROCprofiler-SDK interface.
+  +++ topdown: Provides proper support for Intel topdown metrics.
+PAPI 7.2.0 also introduces preset events for non-CPU devices, starting with
+CUDA events. In addition, component code has been extended to include a
+statistics qualifier (e.g., for CUDA events), offering more concise and
+functional output in the papi_native_avail utility.
+
+
+Additional Major Changes are:
+-----------------------------
+
+Component Updates:
+------------------
+
+* RAPL: Support for Intel Emerald Rapids, and Intel Comet Lake S/H CPUs.
+
+* ROCM/ROCP_SDK:
+  +++ Numerous improvements to error handling, shutdown behavior, and
+      initialization in `rocm` and `rocp_sdk` components.
+  +++ Added multiple libhsa search paths.
+  +++ Correct handling if all events are removed.
+  +++ Improved interoperability between `rocm` and `rocp_sdk` components.
+
+* CUDA:
+  +++ Added statisitcs qualifier to CUDA events, which offers a more concise
+      and functional output for the papi_native_avail utility.
+  +++ Added `partially enabled` support for systems with multiple compute
+      capabilities: <7.0, =7.0, >7.0.
+  +++ Support for MetricsEvaluator API (CUDA ≥ 11.3).
+  +++ Fixed `cuptid_init` return value and potential overflows.
+
+* Sysdetect, Coretemp, Infiniband, NVML, Net, ROCM_SMI:
+  +++ Improved robustness and memory safety across components.
+  +++ coretemp: Enabled support for event multiplexing.
+
+* Topdown:
+  +++ New component to interface with Intel's PERF_METRICS MSR.
+  +++ Converts raw metrics into user-friendly percentages.
+  +++ Provides access to topdown metrics on supported CPUs:
+      heterogeneous Intel CPUs (e.g., Raptor Lake), Sapphire Rapids,
+	  Alder Lake, Granite Rapids.
+  +++ Integrated `librseq` to protect `rdpmc` instruction execution.
+
+
+Preset Events & CAT updates:
+----------------------------
+
+* AMD Family 17h: Corrected presets for IC accesses/misses.
+* ARM Cortex A57/A72/A76: Added/updated preset support.
+* CAT: Added scalar operations to vector-FLOPs Benchmarks.
+
+
+Acknowledgements:
+This release is the result of contributions from many people. The PAPI team
+would like to extend a special thank you to Vince Weaver, Willow Cunningham,
+Stephane Eranian (for libpfm4), William Cohen, Steve Kaufmann, Dandan Zhang,
+Yoshihiro Furudera, Akio Kakuno, Richard Evans, Humberto Gomes and Phil Mucci.
+
+
+
 ===============================================================================
 PAPI 7.2.0b2 RELEASE NOTES                                          24 Feb 2025
 ===============================================================================
diff -pruN 7.2.0~b2-1/debian/changelog 7.2.0-1/debian/changelog
--- 7.2.0~b2-1/debian/changelog	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/changelog	2025-08-12 08:00:16.000000000 +0000
@@ -1,3 +1,12 @@
+papi (7.2.0-1) experimental; urgency=medium
+
+  * New upstream release.
+  * Refresh patches.
+  * Fix building rocm tests with hipcc.
+  * Bump libpfm4-dev B-D to >= 4.13.0+git101.
+
+ -- Andreas Beckmann <anbe@debian.org>  Tue, 12 Aug 2025 10:00:16 +0200
+
 papi (7.2.0~b2-1) experimental; urgency=medium
 
   * New upstream beta.
diff -pruN 7.2.0~b2-1/debian/control 7.2.0-1/debian/control
--- 7.2.0~b2-1/debian/control	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/control	2025-08-12 08:00:16.000000000 +0000
@@ -9,7 +9,8 @@ Build-Depends:
  debhelper-compat (= 13),
  dh-sequence-python3,
  gfortran,
- libpfm4-dev (>= 4.13.0+git83) [linux-any],
+ hipcc [amd64 arm64 ppc64el],
+ libpfm4-dev (>= 4.13.0+git101) [linux-any],
  librocm-smi-dev [!x32 !hurd-any],
  mpi-default-dev,
 # prevent accidental backports after the 64-bit time_t transition
diff -pruN 7.2.0~b2-1/debian/patches/0001-factor-out-Makefile.tests.patch 7.2.0-1/debian/patches/0001-factor-out-Makefile.tests.patch
--- 7.2.0~b2-1/debian/patches/0001-factor-out-Makefile.tests.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/0001-factor-out-Makefile.tests.patch	2025-08-12 08:00:16.000000000 +0000
@@ -13,7 +13,7 @@ diff --git a/src/Makefile.inc b/src/Make
 index d2577fa1a..c116239ae 100644
 --- a/src/Makefile.inc
 +++ b/src/Makefile.inc
-@@ -25,7 +25,7 @@ FHEADERS = $(FORT_HEADERS)
+@@ -26,7 +26,7 @@ FHEADERS = $(FORT_HEADERS)
  LIBPC = $(LIBDIR)/pkgconfig
  
  all: $(SHOW_CONF) $(LIBS) libsde utils tests 
@@ -22,7 +22,7 @@ index d2577fa1a..c116239ae 100644
  
  include $(COMPONENT_RULES)
  
-@@ -231,64 +231,19 @@ freebsd/map-i7.o: freebsd/map-i7.c $(HEADERS)
+@@ -232,64 +232,19 @@ freebsd/map-i7.o: freebsd/map-i7.c $(HEA
  freebsd/map-westmere.o: freebsd/map-westmere.c $(HEADERS)
  	$(CC) $(LIBCFLAGS) -c $< -o $@
  
@@ -87,9 +87,9 @@ index d2577fa1a..c116239ae 100644
  	$(MAKE) -C utils distclean
 -	$(MAKE) -C validation_tests distclean
  	$(MAKE) -C components -f Makefile_comp_tests distclean
- 	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h $(PAPI_EVENTS_TABLE)
- 	rm -f config.log config.status f77papi.h f90papi.h fpapi.h
-@@ -364,6 +319,11 @@ install-tests:  install-comp_tests
+ 	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h papi_components_config_event_defs.h $(PAPI_EVENTS_TABLE)
+ 	$(if ${COMPONENTS}, \
+@@ -375,6 +330,11 @@ install-tests:  install-comp_tests
  	cp run_tests_exclude.txt $(DESTDIR)$(DATADIR)
  	chmod go+rx $(DESTDIR)$(DATADIR)/run_tests.sh
  	chmod go+r $(DESTDIR)$(DATADIR)/run_tests_exclude_cuda.txt $(DESTDIR)$(DATADIR)/run_tests_exclude.txt
diff -pruN 7.2.0~b2-1/debian/patches/0003-fix-Makefiles-for-installed-tests.patch 7.2.0-1/debian/patches/0003-fix-Makefiles-for-installed-tests.patch
--- 7.2.0~b2-1/debian/patches/0003-fix-Makefiles-for-installed-tests.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/0003-fix-Makefiles-for-installed-tests.patch	2025-08-12 08:00:16.000000000 +0000
@@ -24,16 +24,16 @@ diff --git a/src/Makefile.inc b/src/Make
 index c116239ae..4666f408b 100644
 --- a/src/Makefile.inc
 +++ b/src/Makefile.inc
-@@ -244,7 +244,7 @@ clean: native_clean
+@@ -245,7 +245,7 @@ clean: native_clean
  clobber distclean: clean native_clobber 
  	$(MAKE) -C ../doc distclean
  	$(MAKE) -C utils distclean
 -	$(MAKE) -C components -f Makefile_comp_tests distclean
 +	$(MAKE) -C components distclean
- 	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h $(PAPI_EVENTS_TABLE)
- 	rm -f config.log config.status f77papi.h f90papi.h fpapi.h
- 
-@@ -328,6 +328,9 @@ install-tests:  install-comp_tests
+ 	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h papi_components_config_event_defs.h $(PAPI_EVENTS_TABLE)
+ 	$(if ${COMPONENTS}, \
+ 		set -ex; for comp in ${COMPONENTS}; do \
+@@ -339,6 +339,9 @@ install-tests:  install-comp_tests
  # Component tests installing
  install-comp_tests:
  ifneq (${COMPONENTS},)
diff -pruN 7.2.0~b2-1/debian/patches/add-support-for-loongarch64.patch 7.2.0-1/debian/patches/add-support-for-loongarch64.patch
--- 7.2.0~b2-1/debian/patches/add-support-for-loongarch64.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/add-support-for-loongarch64.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,52 +0,0 @@
-Description: Add support for loongarch64 
-Author: Dandan Zhang <zhangdandan@loongson.cn> 
-Last-Update: 2023-12-06
-
---- papi-7.0.1.orig/src/linux-context.h
-+++ papi-7.0.1/src/linux-context.h
-@@ -35,6 +35,8 @@ typedef ucontext_t hwd_ucontext_t;
- #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.arm_pc
- #elif defined(__aarch64__)
- #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc
-+#elif defined(__loongarch64)
-+#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc
- #elif defined(__mips__)
- #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc
- #elif defined(__hppa__)
---- papi-7.0.1.orig/src/linux-timer.c
-+++ papi-7.0.1/src/linux-timer.c
-@@ -246,6 +246,22 @@ get_cycles( void )
- }
- 
- /************************/
-+/* loongarch64 get_cycles() */
-+/************************/
-+
-+#elif defined(__loongarch64)
-+static inline long long
-+get_cycles( void )
-+{
-+	int rid = 0;
-+	unsigned long ret;
-+
-+	__asm__ __volatile__ ( "rdtime.d %0, %1" : "=r" (ret), "=r" (rid) );
-+
-+	return ret;
-+}
-+
-+/************************/
- /* POWER get_cycles()   */
- /************************/
- 
---- papi-7.0.1.orig/src/mb.h
-+++ papi-7.0.1/src/mb.h
-@@ -39,6 +39,9 @@
- #elif defined(__aarch64__)
- #define rmb()           asm volatile("dmb ld" ::: "memory")
- 
-+#elif defined(__loongarch64)
-+#define rmb()           __asm__ __volatile__("dbar 0" : : : "memory") 
-+
- #elif defined(__mips__)
- #define rmb()           asm volatile(                                   \
-                                 ".set   mips2\n\t"                      \
diff -pruN 7.2.0~b2-1/debian/patches/do-not-ignore-failures.patch 7.2.0-1/debian/patches/do-not-ignore-failures.patch
--- 7.2.0~b2-1/debian/patches/do-not-ignore-failures.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/do-not-ignore-failures.patch	2025-08-12 08:00:16.000000000 +0000
@@ -3,7 +3,7 @@ Description: do not ignore any errors in
 
 --- a/src/Makefile.inc
 +++ b/src/Makefile.inc
-@@ -66,9 +66,7 @@ libpapi.so libpapi.so.$(PAPISOVER): $(SH
+@@ -67,9 +67,7 @@ libpapi.so libpapi.so.$(PAPISOVER): $(SH
  $(SHLIB): $(HEADERS) $(SOURCES) $(SHLIBOBJS)
  	rm -f $(SHLIB) libpapi.so libpapi.so.$(PAPISOVER)
  	$(CC_SHR) $(LIBCFLAGS) $(OPTFLAGS) $(SOURCES) $(SHLIBOBJS) -o $@ $(SHLIBDEPS) $(LDFLAGS)
@@ -14,7 +14,7 @@ Description: do not ignore any errors in
  
  libsde:
  ifeq ($(BUILD_LIBSDE_SHARED),yes)
-@@ -319,14 +317,14 @@ install-hl-scripts:
+@@ -324,8 +322,8 @@ install-hl-scripts:
  
  install-lib: native_install
  	@echo "Headers (INCDIR) being installed in: \"$(DESTDIR)$(INCDIR)\""; 
@@ -22,9 +22,12 @@ Description: do not ignore any errors in
 -	-chmod go+rx $(DESTDIR)$(INCDIR)
 +	mkdir -p $(DESTDIR)$(INCDIR)
 +	chmod go+rx $(DESTDIR)$(INCDIR)
- 	cp $(FHEADERS) papi.h papiStdEventDefs.h $(DESTDIR)$(INCDIR)
+ 	cp $(FHEADERS) papi.h papiStdEventDefs.h papi_components_config_event_defs.h $(DESTDIR)$(INCDIR)
+ 	$(if ${COMPONENTS}, \
+ 		set -ex; for comp in ${COMPONENTS}; do \
+@@ -336,8 +334,8 @@ install-lib: native_install
  	cp sde_lib/sde_lib.h sde_lib/sde_lib.hpp $(DESTDIR)$(INCDIR)
- 	cd $(DESTDIR)$(INCDIR) && chmod go+r $(FHEADERS) papi.h papiStdEventDefs.h sde_lib.h sde_lib.hpp
+ 	cd $(DESTDIR)$(INCDIR) && chmod go+r $(FHEADERS) papi.h papiStdEventDefs.h papi_components_config_event_defs.h sde_lib.h sde_lib.hpp
  	@echo "Libraries (LIBDIR) being installed in: \"$(DESTDIR)$(LIBDIR)\""; 
 -	-mkdir -p $(DESTDIR)$(LIBDIR)
 -	-chmod go+rx $(DESTDIR)$(LIBDIR)
@@ -33,7 +36,7 @@ Description: do not ignore any errors in
  	@set -ex; if test -r $(LIBRARY) ; then \
  		cp $(LIBRARY) $(DESTDIR)$(LIBDIR); \
  		chmod go+r $(DESTDIR)$(LIBDIR)/$(LIBRARY); \
-@@ -361,11 +359,11 @@ install-tests:  install-comp_tests
+@@ -372,11 +370,11 @@ install-tests:  install-comp_tests
  	$(SETPATH) $(MAKE) -C ctests install
  	$(SETPATH) $(MAKE) -C ftests install
  	$(SETPATH) $(MAKE) -C validation_tests install
@@ -50,7 +53,7 @@ Description: do not ignore any errors in
  
  # Component tests installing
  install-comp_tests:
-@@ -377,8 +375,8 @@ endif
+@@ -388,8 +386,8 @@ endif
  
  install-pkgconf:
  	@echo "pkcongfig being installed in: \"$(DESTDIR)$(LIBPC)\"";
diff -pruN 7.2.0~b2-1/debian/patches/fix-typos.patch 7.2.0-1/debian/patches/fix-typos.patch
--- 7.2.0~b2-1/debian/patches/fix-typos.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/fix-typos.patch	2025-08-12 08:00:16.000000000 +0000
@@ -5,13 +5,13 @@ Description: fix some typos found by Lin
 +++ b/src/papi_common_strings.h
 @@ -55,7 +55,7 @@ hwi_presets_t _papi_hwi_presets[PAPI_MAX
  	  0, PAPI_PRESET_BIT_CACH,
- 	  NULL, {0},{NULL}, NULL},
+       NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
  /* 10 */ {"PAPI_CA_SHR", 
 -	  "Ex Acces shared CL", 
 +	  "Ex Access shared CL",
  	  "Requests for exclusive access to shared cache line", 0,
  	  0, PAPI_PRESET_BIT_CACH,
- 	  NULL, {0},{NULL}, NULL},
+       NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 --- a/man/man3/PAPI_enum_cmp_event.3
 +++ b/man/man3/PAPI_enum_cmp_event.3
 @@ -15,7 +15,7 @@ PAPI_enum_cmp_event \- Enumerate PAPI pr
@@ -36,7 +36,7 @@ Description: fix some typos found by Lin
  Given a preset or native event code, \fBPAPI_enum_event\fP replaces the event code with the next available event in either the preset or native table\&. The modifier argument affects which events are returned\&. For all platforms and event types, a value of PAPI_ENUM_ALL (zero) directs the function to return all possible events\&. 
 --- a/src/papi.c
 +++ b/src/papi.c
-@@ -1605,7 +1605,7 @@ PAPI_event_name_to_code( const char *in,
+@@ -1694,7 +1694,7 @@ PAPI_event_name_to_code( const char *in,
   *
   *	@par C Interface:
   *	\#include <papi.h> @n
@@ -45,7 +45,7 @@ Description: fix some typos found by Lin
   *
   *	Given a preset or native event code, PAPI_enum_event replaces the event 
   *	code with the next available event in either the preset or native table. 
-@@ -1791,7 +1791,7 @@ PAPI_enum_event( int *EventCode, int mod
+@@ -1948,7 +1948,7 @@ PAPI_enum_event( int *EventCode, int mod
   *
   *	@par C Interface:
   *	\#include <papi.h> @n
diff -pruN 7.2.0~b2-1/debian/patches/for-debian-no-rpath.patch 7.2.0-1/debian/patches/for-debian-no-rpath.patch
--- 7.2.0~b2-1/debian/patches/for-debian-no-rpath.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/for-debian-no-rpath.patch	2025-08-12 08:00:16.000000000 +0000
@@ -4,7 +4,7 @@ Forwarded: not-needed
 
 --- a/src/configure.in
 +++ b/src/configure.in
-@@ -1033,7 +1033,7 @@ if test "$shlib_tools" = "yes"; then
+@@ -1045,7 +1045,7 @@ if test "$shlib_tools" = "yes"; then
     fi
     LINKLIB='$(SHLIB)'
     # Set rpath and runpath to find libpfm.so and libpapi.so if not specified via LD_LIBRARY_PATH. The search path at runtime can be overriden by LD_LIBRARY_PATH.
@@ -13,7 +13,7 @@ Forwarded: not-needed
  elif test "$shlib_tools" = "no"; then
     if test "$static_lib" != "yes"; then
        AC_MSG_ERROR([Building shared but specified static linking for tests and utilities])
-@@ -1503,7 +1503,7 @@ PAPISOVER='$(PAPIVER).$(PAPIREV)'
+@@ -1515,7 +1515,7 @@ PAPISOVER='$(PAPIVER).$(PAPIREV)'
  VLIB='libpapi.so.$(PAPISOVER)'
  OMPCFLGS=-fopenmp
  CC_R='$(CC) -pthread'
diff -pruN 7.2.0~b2-1/debian/patches/gcc-15.patch 7.2.0-1/debian/patches/gcc-15.patch
--- 7.2.0~b2-1/debian/patches/gcc-15.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/gcc-15.patch	1970-01-01 00:00:00.000000000 +0000
@@ -1,25 +0,0 @@
-Author: Andreas Beckmann <anbe@debian.org>
-Description: fix FTBFS with GCC 15
-
---- a/src/high-level/papi_hl.c
-+++ b/src/high-level/papi_hl.c
-@@ -167,7 +167,7 @@ static int _internal_hl_checkCounter ( c
- static int _internal_hl_determine_rank();
- static char *_internal_hl_remove_spaces( char *str, int mode );
- static int _internal_hl_determine_default_events();
--static int _internal_hl_read_user_events();
-+static int _internal_hl_read_user_events(const char *user_events);
- static int _internal_hl_new_component(int component_id, components_t *component);
- static int _internal_hl_add_event_to_component(char *event_name, int event,
-                                         short event_type, components_t *component);
---- a/src/papi_vector.c
-+++ b/src/papi_vector.c
-@@ -218,7 +218,7 @@ _papi_hwi_innoculate_os_vector( papi_os_
- 	if ( !v->update_shlib_info )
- 		v->update_shlib_info = ( int ( * )( papi_mdi_t * ) ) vec_int_dummy;
- 	if ( !v->get_system_info )
--		v->get_system_info = ( int ( * )(  ) ) vec_int_dummy;
-+		v->get_system_info = ( int ( * )( papi_mdi_t * ) ) vec_int_dummy;
- 
- 	if ( !v->get_memory_info )
- 		v->get_memory_info =
diff -pruN 7.2.0~b2-1/debian/patches/hip.patch 7.2.0-1/debian/patches/hip.patch
--- 7.2.0~b2-1/debian/patches/hip.patch	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/debian/patches/hip.patch	2025-08-12 08:00:16.000000000 +0000
@@ -0,0 +1,76 @@
+Author: Andreas Beckmann <anbe@debian.org>
+Description: misc hipcc fixes
+
+--- a/src/components/rocm_smi/tests/Makefile
++++ b/src/components/rocm_smi/tests/Makefile
+@@ -14,6 +14,7 @@ INCLUDE += -I$(PAPI_ROCM_ROOT)/include/h
+ INCLUDE += -I$(PAPI_ROCM_ROOT)/include/rocprofiler
+ INCLUDE += -I$(PAPI_ROCM_ROOT)/include/rocblas
+ LDFLAGS = -ldl -g -pthread
++HIPCFLAGS	= $(filter-out -fcf-protection,$(CFLAGS))
+ 
+ %.o:%.c
+ 	@echo "INCLUDE=" $(INCLUDE)
+@@ -24,8 +25,10 @@ LDFLAGS = -ldl -g -pthread
+ 	@echo "CFLAGS=" $(CFLAGS)
+ 	g++ $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $<
+ 
++ifneq (,$(wildcard $(HIPCC)))
+ TESTS = rocm_command_line rocm_smi_all power_monitor_rocm rocm_smi_writeTests
+ TESTS_LONG = rocmsmi_example
++endif
+ 
+ rocm_smi_tests: $(TESTS)
+ rocm_smi_tests_long: $(TESTS_LONG)
+@@ -34,34 +37,34 @@ rocm_smi_tests_long: $(TESTS_LONG)
+ #       have trouble managing libraries if we try to do both in a single step.
+ 
+ rocm_command_line.o: rocm_command_line.cpp $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@ 
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -c $< -o $@ 
+ 
+ rocm_command_line: rocm_command_line.o $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+ 
+ rocm_smi_all.o: rocm_smi_all.cpp $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -c $< -o $@
+ 
+ rocm_smi_all: rocm_smi_all.o $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+ 
+ power_monitor_rocm.o: power_monitor_rocm.cpp $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -c $< -o $@
+ 
+ power_monitor_rocm: power_monitor_rocm.o $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+ 
+ rocmsmi_example.o: rocmsmi_example.cpp $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -c $< -o $@
+ 
+ rocmsmi_example: rocmsmi_example.o $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -L$(PAPI_ROCM_ROOT)/lib/rocblas -lrocblas
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -L$(PAPI_ROCM_ROOT)/lib/rocblas -lrocblas
+ 
+ rocm_smi_writeTests.o: rocm_smi_writeTests.cpp $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -c $< -o $@
+ 
+ rocm_smi_writeTests: rocm_smi_writeTests.o $(UTILOBJS) $(PAPILIB)
+-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS)
++	$(HIPCC) $(HIPCFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS)
+ 
+ clean:
+ 	rm -f $(TESTS) $(TESTS_LONG) *.o
+@@ -72,5 +75,5 @@ checkpath:
+ 	echo HIPCC = $(HIPCC)
+ 	echo INCLUDE = $(INCLUDE)
+ 	echo LDFLAGS = $(LDFLAGS)
+-	echo CFLAGS = $(CFLAGS)
++	echo HIPCFLAGS = $(HIPCFLAGS)
+ 
diff -pruN 7.2.0~b2-1/debian/patches/man-section.patch 7.2.0-1/debian/patches/man-section.patch
--- 7.2.0~b2-1/debian/patches/man-section.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/man-section.patch	2025-08-12 08:00:16.000000000 +0000
@@ -4,14 +4,14 @@ Description: fix man page section
 --- a/man/man1/PAPI_derived_event_files.1
 +++ b/man/man1/PAPI_derived_event_files.1
 @@ -1,4 +1,4 @@
--.TH "PAPI_derived_event_files" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
-+.TH "PAPI_derived_event_files" 5 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+-.TH "PAPI_derived_event_files" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
++.TH "PAPI_derived_event_files" 5 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
  .ad l
  .nh
  .SH NAME
 --- a/src/papi_preset.c
 +++ b/src/papi_preset.c
-@@ -908,7 +908,7 @@ infix_to_postfix( char *infix ) {
+@@ -931,7 +931,7 @@ infix_to_postfix( char *infix ) {
   * support the same event definition syntax.
   *
   * Event definition file syntax:
diff -pruN 7.2.0~b2-1/debian/patches/ppc64el.patch 7.2.0-1/debian/patches/ppc64el.patch
--- 7.2.0~b2-1/debian/patches/ppc64el.patch	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/ppc64el.patch	2025-08-12 08:00:16.000000000 +0000
@@ -4,7 +4,7 @@ Forwarded: not-needed
 
 --- a/src/configure.in
 +++ b/src/configure.in
-@@ -519,6 +519,7 @@ AC_ARG_WITH(CPU,
+@@ -531,6 +531,7 @@ AC_ARG_WITH(CPU,
                         POWER8)  CPU="POWER8";;
                         POWER9)  CPU="POWER9";;
                         POWER10) CPU="POWER10";;
diff -pruN 7.2.0~b2-1/debian/patches/series 7.2.0-1/debian/patches/series
--- 7.2.0~b2-1/debian/patches/series	2025-03-26 21:19:47.000000000 +0000
+++ 7.2.0-1/debian/patches/series	2025-08-12 08:00:16.000000000 +0000
@@ -7,9 +7,8 @@ man-section.patch
 libsde-soversion.patch
 hardening.patch
 ppc64el.patch
-add-support-for-loongarch64.patch
 reproducible.patch
 0001-factor-out-Makefile.tests.patch
 0002-sde-tests-create-lib-dir-if-needed.patch
 0003-fix-Makefiles-for-installed-tests.patch
-gcc-15.patch
+hip.patch
diff -pruN 7.2.0~b2-1/debian/salsa-ci.yml 7.2.0-1/debian/salsa-ci.yml
--- 7.2.0~b2-1/debian/salsa-ci.yml	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/debian/salsa-ci.yml	2025-08-12 08:00:16.000000000 +0000
@@ -0,0 +1,3 @@
+---
+include:
+  - https://salsa.debian.org/salsa-ci-team/pipeline/raw/master/recipes/debian.yml
diff -pruN 7.2.0~b2-1/doc/Doxyfile-common 7.2.0-1/doc/Doxyfile-common
--- 7.2.0~b2-1/doc/Doxyfile-common	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/doc/Doxyfile-common	2025-06-25 22:38:10.000000000 +0000
@@ -31,7 +31,7 @@ PROJECT_NAME           = PAPI
 # This could be handy for archiving the generated documentation or
 # if some version control system is used.
 
-PROJECT_NUMBER         = 7.2.0.0b2
+PROJECT_NUMBER         = 7.2.0.0
 
 # Using the PROJECT_BRIEF tag one can provide an optional one line description
 # for a project that appears at the top of each page and should give viewer
diff -pruN 7.2.0~b2-1/man/man1/PAPI_derived_event_files.1 7.2.0-1/man/man1/PAPI_derived_event_files.1
--- 7.2.0~b2-1/man/man1/PAPI_derived_event_files.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/PAPI_derived_event_files.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_derived_event_files" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_derived_event_files" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_avail.1 7.2.0-1/man/man1/papi_avail.1
--- 7.2.0~b2-1/man/man1/papi_avail.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_avail.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_avail" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_avail" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -28,7 +28,7 @@ papi_avail is a PAPI utility program tha
 .IP "\(bu" 2
 -e < event > Display detailed event information for the named event\&. This event can be a preset event, a user defined event, or a native event\&. If the event is a preset or a user defined event the output shows a list of native events the event is based on and the formula that is used to compute the events final value\&.
 .br
-
+ 
 .PP
 .PP
 Event filtering options 
diff -pruN 7.2.0~b2-1/man/man1/papi_clockres.1 7.2.0-1/man/man1/papi_clockres.1
--- 7.2.0~b2-1/man/man1/papi_clockres.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_clockres.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_clockres" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_clockres" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_command_line.1 7.2.0-1/man/man1/papi_command_line.1
--- 7.2.0~b2-1/man/man1/papi_command_line.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_command_line.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_command_line" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_command_line" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_component_avail.1 7.2.0-1/man/man1/papi_component_avail.1
--- 7.2.0~b2-1/man/man1/papi_component_avail.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_component_avail.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_component_avail" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_component_avail" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_cost.1 7.2.0-1/man/man1/papi_cost.1
--- 7.2.0~b2-1/man/man1/papi_cost.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_cost.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_cost" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_cost" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_decode.1 7.2.0-1/man/man1/papi_decode.1
--- 7.2.0~b2-1/man/man1/papi_decode.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_decode.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_decode" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_decode" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_error_codes.1 7.2.0-1/man/man1/papi_error_codes.1
--- 7.2.0~b2-1/man/man1/papi_error_codes.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_error_codes.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_error_codes" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_error_codes" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_event_chooser.1 7.2.0-1/man/man1/papi_event_chooser.1
--- 7.2.0~b2-1/man/man1/papi_event_chooser.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_event_chooser.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_event_chooser" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_event_chooser" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_hardware_avail.1 7.2.0-1/man/man1/papi_hardware_avail.1
--- 7.2.0~b2-1/man/man1/papi_hardware_avail.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_hardware_avail.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_hardware_avail" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_hardware_avail" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_hybrid_native_avail.1 7.2.0-1/man/man1/papi_hybrid_native_avail.1
--- 7.2.0~b2-1/man/man1/papi_hybrid_native_avail.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_hybrid_native_avail.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_hybrid_native_avail" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_hybrid_native_avail" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_mem_info.1 7.2.0-1/man/man1/papi_mem_info.1
--- 7.2.0~b2-1/man/man1/papi_mem_info.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_mem_info.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_mem_info" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_mem_info" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_multiplex_cost.1 7.2.0-1/man/man1/papi_multiplex_cost.1
--- 7.2.0~b2-1/man/man1/papi_multiplex_cost.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_multiplex_cost.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_multiplex_cost" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_multiplex_cost" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_native_avail.1 7.2.0-1/man/man1/papi_native_avail.1
--- 7.2.0~b2-1/man/man1/papi_native_avail.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_native_avail.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_native_avail" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_native_avail" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_version.1 7.2.0-1/man/man1/papi_version.1
--- 7.2.0~b2-1/man/man1/papi_version.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_version.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_version" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_version" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man1/papi_xml_event_info.1 7.2.0-1/man/man1/papi_xml_event_info.1
--- 7.2.0~b2-1/man/man1/papi_xml_event_info.1	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man1/papi_xml_event_info.1	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_xml_event_info" 1 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_xml_event_info" 1 "Wed Jun 25 2025 19:17:03" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_accum.3 7.2.0-1/man/man3/PAPIF_accum.3
--- 7.2.0~b2-1/man/man3/PAPIF_accum.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_accum.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_accum" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_accum" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_add_event.3 7.2.0-1/man/man3/PAPIF_add_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_add_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_add_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_add_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_add_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_add_events.3 7.2.0-1/man/man3/PAPIF_add_events.3
--- 7.2.0~b2-1/man/man3/PAPIF_add_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_add_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_add_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_add_events" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_add_named_event.3 7.2.0-1/man/man3/PAPIF_add_named_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_add_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_add_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_add_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_add_named_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_assign_eventset_component.3 7.2.0-1/man/man3/PAPIF_assign_eventset_component.3
--- 7.2.0~b2-1/man/man3/PAPIF_assign_eventset_component.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_assign_eventset_component.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_assign_eventset_component" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_assign_eventset_component" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_cleanup_eventset.3 7.2.0-1/man/man3/PAPIF_cleanup_eventset.3
--- 7.2.0~b2-1/man/man3/PAPIF_cleanup_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_cleanup_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_cleanup_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_cleanup_eventset" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_create_eventset.3 7.2.0-1/man/man3/PAPIF_create_eventset.3
--- 7.2.0~b2-1/man/man3/PAPIF_create_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_create_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_create_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_create_eventset" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_destroy_eventset.3 7.2.0-1/man/man3/PAPIF_destroy_eventset.3
--- 7.2.0~b2-1/man/man3/PAPIF_destroy_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_destroy_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_destroy_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_destroy_eventset" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_enum_dev_type.3 7.2.0-1/man/man3/PAPIF_enum_dev_type.3
--- 7.2.0~b2-1/man/man3/PAPIF_enum_dev_type.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_enum_dev_type.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_enum_dev_type" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_enum_dev_type" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_enum_event.3 7.2.0-1/man/man3/PAPIF_enum_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_enum_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_enum_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_enum_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_enum_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_epc.3 7.2.0-1/man/man3/PAPIF_epc.3
--- 7.2.0~b2-1/man/man3/PAPIF_epc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_epc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_epc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_epc" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_event_code_to_name.3 7.2.0-1/man/man3/PAPIF_event_code_to_name.3
--- 7.2.0~b2-1/man/man3/PAPIF_event_code_to_name.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_event_code_to_name.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_event_code_to_name" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_event_code_to_name" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_event_name_to_code.3 7.2.0-1/man/man3/PAPIF_event_name_to_code.3
--- 7.2.0~b2-1/man/man3/PAPIF_event_name_to_code.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_event_name_to_code.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_event_name_to_code" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_event_name_to_code" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_flips_rate.3 7.2.0-1/man/man3/PAPIF_flips_rate.3
--- 7.2.0~b2-1/man/man3/PAPIF_flips_rate.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_flips_rate.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_flips_rate" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_flips_rate" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_flops_rate.3 7.2.0-1/man/man3/PAPIF_flops_rate.3
--- 7.2.0~b2-1/man/man3/PAPIF_flops_rate.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_flops_rate.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_flops_rate" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_flops_rate" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_clockrate.3 7.2.0-1/man/man3/PAPIF_get_clockrate.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_clockrate.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_clockrate.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_clockrate" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_clockrate" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_dev_attr.3 7.2.0-1/man/man3/PAPIF_get_dev_attr.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_dev_attr.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_dev_attr.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_dev_attr" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_dev_attr" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_dev_type_attr.3 7.2.0-1/man/man3/PAPIF_get_dev_type_attr.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_dev_type_attr.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_dev_type_attr.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_dev_type_attr" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_dev_type_attr" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_dmem_info.3 7.2.0-1/man/man3/PAPIF_get_dmem_info.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_dmem_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_dmem_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_dmem_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_dmem_info" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_domain.3 7.2.0-1/man/man3/PAPIF_get_domain.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_event_info.3 7.2.0-1/man/man3/PAPIF_get_event_info.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_event_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_event_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_event_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_event_info" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_exe_info.3 7.2.0-1/man/man3/PAPIF_get_exe_info.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_exe_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_exe_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_exe_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_exe_info" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_granularity.3 7.2.0-1/man/man3/PAPIF_get_granularity.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_granularity.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_granularity.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_granularity" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_granularity" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_hardware_info.3 7.2.0-1/man/man3/PAPIF_get_hardware_info.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_hardware_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_hardware_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_hardware_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_hardware_info" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_multiplex.3 7.2.0-1/man/man3/PAPIF_get_multiplex.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_multiplex.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_multiplex.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_multiplex" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_multiplex" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_preload.3 7.2.0-1/man/man3/PAPIF_get_preload.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_preload.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_preload.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_preload" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_preload" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_real_cyc.3 7.2.0-1/man/man3/PAPIF_get_real_cyc.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_real_cyc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_real_cyc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_real_cyc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_real_cyc" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_real_nsec.3 7.2.0-1/man/man3/PAPIF_get_real_nsec.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_real_nsec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_real_nsec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_real_nsec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_real_nsec" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_real_usec.3 7.2.0-1/man/man3/PAPIF_get_real_usec.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_real_usec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_real_usec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_real_usec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_real_usec" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_virt_cyc.3 7.2.0-1/man/man3/PAPIF_get_virt_cyc.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_virt_cyc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_virt_cyc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_virt_cyc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_virt_cyc" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_get_virt_usec.3 7.2.0-1/man/man3/PAPIF_get_virt_usec.3
--- 7.2.0~b2-1/man/man3/PAPIF_get_virt_usec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_get_virt_usec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_get_virt_usec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_get_virt_usec" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_ipc.3 7.2.0-1/man/man3/PAPIF_ipc.3
--- 7.2.0~b2-1/man/man3/PAPIF_ipc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_ipc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_ipc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_ipc" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_is_initialized.3 7.2.0-1/man/man3/PAPIF_is_initialized.3
--- 7.2.0~b2-1/man/man3/PAPIF_is_initialized.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_is_initialized.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_is_initialized" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_is_initialized" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_library_init.3 7.2.0-1/man/man3/PAPIF_library_init.3
--- 7.2.0~b2-1/man/man3/PAPIF_library_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_library_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_library_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_library_init" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_lock.3 7.2.0-1/man/man3/PAPIF_lock.3
--- 7.2.0~b2-1/man/man3/PAPIF_lock.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_lock.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_lock" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_lock" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_multiplex_init.3 7.2.0-1/man/man3/PAPIF_multiplex_init.3
--- 7.2.0~b2-1/man/man3/PAPIF_multiplex_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_multiplex_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_multiplex_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_multiplex_init" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_num_cmp_hwctrs.3 7.2.0-1/man/man3/PAPIF_num_cmp_hwctrs.3
--- 7.2.0~b2-1/man/man3/PAPIF_num_cmp_hwctrs.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_num_cmp_hwctrs.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_num_cmp_hwctrs" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_num_cmp_hwctrs" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_num_events.3 7.2.0-1/man/man3/PAPIF_num_events.3
--- 7.2.0~b2-1/man/man3/PAPIF_num_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_num_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_num_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_num_events" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_num_hwctrs.3 7.2.0-1/man/man3/PAPIF_num_hwctrs.3
--- 7.2.0~b2-1/man/man3/PAPIF_num_hwctrs.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_num_hwctrs.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_num_hwctrs" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_num_hwctrs" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_perror.3 7.2.0-1/man/man3/PAPIF_perror.3
--- 7.2.0~b2-1/man/man3/PAPIF_perror.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_perror.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_perror" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_perror" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_query_event.3 7.2.0-1/man/man3/PAPIF_query_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_query_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_query_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_query_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_query_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_query_named_event.3 7.2.0-1/man/man3/PAPIF_query_named_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_query_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_query_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_query_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_query_named_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_rate_stop.3 7.2.0-1/man/man3/PAPIF_rate_stop.3
--- 7.2.0~b2-1/man/man3/PAPIF_rate_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_rate_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_rate_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_rate_stop" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_read.3 7.2.0-1/man/man3/PAPIF_read.3
--- 7.2.0~b2-1/man/man3/PAPIF_read.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_read.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_read" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_read" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_read_ts.3 7.2.0-1/man/man3/PAPIF_read_ts.3
--- 7.2.0~b2-1/man/man3/PAPIF_read_ts.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_read_ts.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_read_ts" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_read_ts" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_register_thread.3 7.2.0-1/man/man3/PAPIF_register_thread.3
--- 7.2.0~b2-1/man/man3/PAPIF_register_thread.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_register_thread.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_register_thread" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_register_thread" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_remove_event.3 7.2.0-1/man/man3/PAPIF_remove_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_remove_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_remove_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_remove_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_remove_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_remove_events.3 7.2.0-1/man/man3/PAPIF_remove_events.3
--- 7.2.0~b2-1/man/man3/PAPIF_remove_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_remove_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_remove_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_remove_events" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_remove_named_event.3 7.2.0-1/man/man3/PAPIF_remove_named_event.3
--- 7.2.0~b2-1/man/man3/PAPIF_remove_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_remove_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_remove_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_remove_named_event" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_reset.3 7.2.0-1/man/man3/PAPIF_reset.3
--- 7.2.0~b2-1/man/man3/PAPIF_reset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_reset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_reset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_reset" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_cmp_domain.3 7.2.0-1/man/man3/PAPIF_set_cmp_domain.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_cmp_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_cmp_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_cmp_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_cmp_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_cmp_granularity.3 7.2.0-1/man/man3/PAPIF_set_cmp_granularity.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_cmp_granularity.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_cmp_granularity.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_cmp_granularity" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_cmp_granularity" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_debug.3 7.2.0-1/man/man3/PAPIF_set_debug.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_debug.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_debug.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_debug" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_debug" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_domain.3 7.2.0-1/man/man3/PAPIF_set_domain.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_event_domain.3 7.2.0-1/man/man3/PAPIF_set_event_domain.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_event_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_event_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_event_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_event_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_granularity.3 7.2.0-1/man/man3/PAPIF_set_granularity.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_granularity.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_granularity.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_granularity" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_granularity" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_inherit.3 7.2.0-1/man/man3/PAPIF_set_inherit.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_inherit.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_inherit.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_inherit" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_inherit" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_set_multiplex.3 7.2.0-1/man/man3/PAPIF_set_multiplex.3
--- 7.2.0~b2-1/man/man3/PAPIF_set_multiplex.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_set_multiplex.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_set_multiplex" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_set_multiplex" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_shutdown.3 7.2.0-1/man/man3/PAPIF_shutdown.3
--- 7.2.0~b2-1/man/man3/PAPIF_shutdown.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_shutdown.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_shutdown" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_shutdown" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_start.3 7.2.0-1/man/man3/PAPIF_start.3
--- 7.2.0~b2-1/man/man3/PAPIF_start.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_start.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_start" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_start" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_state.3 7.2.0-1/man/man3/PAPIF_state.3
--- 7.2.0~b2-1/man/man3/PAPIF_state.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_state.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_state" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_state" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_stop.3 7.2.0-1/man/man3/PAPIF_stop.3
--- 7.2.0~b2-1/man/man3/PAPIF_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_stop" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_thread_id.3 7.2.0-1/man/man3/PAPIF_thread_id.3
--- 7.2.0~b2-1/man/man3/PAPIF_thread_id.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_thread_id.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_thread_id" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_thread_id" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_thread_init.3 7.2.0-1/man/man3/PAPIF_thread_init.3
--- 7.2.0~b2-1/man/man3/PAPIF_thread_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_thread_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_thread_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_thread_init" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_unlock.3 7.2.0-1/man/man3/PAPIF_unlock.3
--- 7.2.0~b2-1/man/man3/PAPIF_unlock.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_unlock.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_unlock" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_unlock" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_unregister_thread.3 7.2.0-1/man/man3/PAPIF_unregister_thread.3
--- 7.2.0~b2-1/man/man3/PAPIF_unregister_thread.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_unregister_thread.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_unregister_thread" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_unregister_thread" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIF_write.3 7.2.0-1/man/man3/PAPIF_write.3
--- 7.2.0~b2-1/man/man3/PAPIF_write.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIF_write.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIF_write" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIF_write" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_accum.3 7.2.0-1/man/man3/PAPI_accum.3
--- 7.2.0~b2-1/man/man3/PAPI_accum.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_accum.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_accum" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_accum" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -18,9 +18,7 @@ PAPI_accum \- Accumulate and reset count
 int \fBPAPI_accum( int  EventSet, long_long * values )\fP;
 .RE
 .PP
-These calls assume an initialized PAPI library and a properly added event set\&. \fBPAPI_accum\fP adds the counters of the indicated event set into the array values\&. The counters are zeroed and continue counting after the operation\&. Note the differences between PAPI_read() and PAPI_accum()\&. Specifically, PAPI_accum() adds the values of the counters to the values stored in the array (the second parameter in PAPI_accum()) and then resets the counters to zero\&.
-.PP
-Note: The provided array (second parameter in \fBPAPI_accum\fP) must be initialized for \fBPAPI_accum\fP because its values are read inside the function\&.
+These calls assume an initialized PAPI library and a properly added event set\&. \fBPAPI_accum\fP adds the counters of the indicated event set into the array values\&. The counters are zeroed and continue counting after the operation\&. Note the differences between \fBPAPI_read\fP and \fBPAPI_accum\fP, specifically that \fBPAPI_accum\fP resets the values array to zero\&.
 .PP
 \fBParameters\fP
 .RS 4
diff -pruN 7.2.0~b2-1/man/man3/PAPI_add_event.3 7.2.0-1/man/man3/PAPI_add_event.3
--- 7.2.0~b2-1/man/man3/PAPI_add_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_add_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_add_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_add_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_add_events.3 7.2.0-1/man/man3/PAPI_add_events.3
--- 7.2.0~b2-1/man/man3/PAPI_add_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_add_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_add_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_add_events" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_add_named_event.3 7.2.0-1/man/man3/PAPI_add_named_event.3
--- 7.2.0~b2-1/man/man3/PAPI_add_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_add_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_add_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_add_named_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -26,7 +26,7 @@ A hardware event can be either a PAPI pr
 .RS 4
 \fIEventSet\fP An integer handle for a PAPI Event Set as created by \fBPAPI_create_eventset\fP\&. 
 .br
-\fIEventName\fP A string containing the event name as listed in papi_avail or papi_native_avail\&.
+\fIEventCode\fP A defined event such as PAPI_TOT_INS\&.
 .RE
 .PP
 \fBReturn values\fP
diff -pruN 7.2.0~b2-1/man/man3/PAPI_addr_range_option_t.3 7.2.0-1/man/man3/PAPI_addr_range_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_addr_range_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_addr_range_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_addr_range_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_addr_range_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_address_map_t.3 7.2.0-1/man/man3/PAPI_address_map_t.3
--- 7.2.0~b2-1/man/man3/PAPI_address_map_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_address_map_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_address_map_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_address_map_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_all_thr_spec_t.3 7.2.0-1/man/man3/PAPI_all_thr_spec_t.3
--- 7.2.0~b2-1/man/man3/PAPI_all_thr_spec_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_all_thr_spec_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_all_thr_spec_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_all_thr_spec_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_assign_eventset_component.3 7.2.0-1/man/man3/PAPI_assign_eventset_component.3
--- 7.2.0~b2-1/man/man3/PAPI_assign_eventset_component.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_assign_eventset_component.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_assign_eventset_component" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_assign_eventset_component" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_attach.3 7.2.0-1/man/man3/PAPI_attach.3
--- 7.2.0~b2-1/man/man3/PAPI_attach.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_attach.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_attach" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_attach" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_attach_option_t.3 7.2.0-1/man/man3/PAPI_attach_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_attach_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_attach_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_attach_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_attach_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_cleanup_eventset.3 7.2.0-1/man/man3/PAPI_cleanup_eventset.3
--- 7.2.0~b2-1/man/man3/PAPI_cleanup_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_cleanup_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_cleanup_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_cleanup_eventset" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_component_info_t.3 7.2.0-1/man/man3/PAPI_component_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_component_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_component_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_component_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_component_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -34,6 +34,12 @@ PAPI_component_info_t
 .RI "int \fBdisabled\fP"
 .br
 .ti -1c
+.RI "char \fBpartially_disabled_reason\fP [1024]"
+.br
+.ti -1c
+.RI "int \fBpartially_disabled\fP"
+.br
+.ti -1c
 .RI "int \fBinitialized\fP"
 .br
 .ti -1c
@@ -176,6 +182,10 @@ Number of hardware counters the componen
 Number of native events the component supports 
 .SS "int PAPI_component_info_t::num_preset_events"
 Number of preset events the component supports 
+.SS "int PAPI_component_info_t::partially_disabled"
+1 if component is partially disabled, 0 otherwise 
+.SS "char PAPI_component_info_t::partially_disabled_reason[1024]"
+Reason for partial initialization 
 .SS "char* PAPI_component_info_t::pmu_names[80]"
 list of pmu names supported by this component 
 .SS "unsigned int PAPI_component_info_t::posix1b_timers"
diff -pruN 7.2.0~b2-1/man/man3/PAPI_cpu_option_t.3 7.2.0-1/man/man3/PAPI_cpu_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_cpu_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_cpu_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_cpu_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_cpu_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_create_eventset.3 7.2.0-1/man/man3/PAPI_create_eventset.3
--- 7.2.0~b2-1/man/man3/PAPI_create_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_create_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_create_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_create_eventset" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_debug_option_t.3 7.2.0-1/man/man3/PAPI_debug_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_debug_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_debug_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_debug_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_debug_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_destroy_eventset.3 7.2.0-1/man/man3/PAPI_destroy_eventset.3
--- 7.2.0~b2-1/man/man3/PAPI_destroy_eventset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_destroy_eventset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_destroy_eventset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_destroy_eventset" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_detach.3 7.2.0-1/man/man3/PAPI_detach.3
--- 7.2.0~b2-1/man/man3/PAPI_detach.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_detach.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_detach" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_detach" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_disable_component.3 7.2.0-1/man/man3/PAPI_disable_component.3
--- 7.2.0~b2-1/man/man3/PAPI_disable_component.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_disable_component.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_disable_component" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_disable_component" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_disable_component_by_name.3 7.2.0-1/man/man3/PAPI_disable_component_by_name.3
--- 7.2.0~b2-1/man/man3/PAPI_disable_component_by_name.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_disable_component_by_name.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_disable_component_by_name" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_disable_component_by_name" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_dmem_info_t.3 7.2.0-1/man/man3/PAPI_dmem_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_dmem_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_dmem_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_dmem_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_dmem_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_domain_option_t.3 7.2.0-1/man/man3/PAPI_domain_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_domain_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_domain_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_domain_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_domain_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_enum_cmp_event.3 7.2.0-1/man/man3/PAPI_enum_cmp_event.3
--- 7.2.0~b2-1/man/man3/PAPI_enum_cmp_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_enum_cmp_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_enum_cmp_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_enum_cmp_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_enum_dev_type.3 7.2.0-1/man/man3/PAPI_enum_dev_type.3
--- 7.2.0~b2-1/man/man3/PAPI_enum_dev_type.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_enum_dev_type.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_enum_dev_type" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_enum_dev_type" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_enum_event.3 7.2.0-1/man/man3/PAPI_enum_event.3
--- 7.2.0~b2-1/man/man3/PAPI_enum_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_enum_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_enum_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_enum_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -84,6 +84,12 @@ The following values are implemented for
 .PD 0
 .IP "\(bu" 2
 PAPI_PRESET_ENUM_AVAIL -- enumerate only available presets 
+.IP "\(bu" 2
+PAPI_PRESET_ENUM_CPU -- enumerate CPU preset events 
+.IP "\(bu" 2
+PAPI_PRESET_ENUM_CPU_AVAIL -- enumerate available CPU preset events 
+.IP "\(bu" 2
+PAPI_PRESET_ENUM_FIRST_COMP -- enumerate first component preset event 
 .PP
 .RE
 .PP
diff -pruN 7.2.0~b2-1/man/man3/PAPI_epc.3 7.2.0-1/man/man3/PAPI_epc.3
--- 7.2.0~b2-1/man/man3/PAPI_epc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_epc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_epc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_epc" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_event_code_to_name.3 7.2.0-1/man/man3/PAPI_event_code_to_name.3
--- 7.2.0~b2-1/man/man3/PAPI_event_code_to_name.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_event_code_to_name.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_event_code_to_name" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_event_code_to_name" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_event_info_t.3 7.2.0-1/man/man3/PAPI_event_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_event_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_event_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_event_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_event_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -66,6 +66,15 @@ PAPI_event_info_t
 .ti -1c
 .RI "char \fBnote\fP [1024]"
 .br
+.ti -1c
+.RI "int \fBnum_quals\fP"
+.br
+.ti -1c
+.RI "char \fBquals\fP [8][1024]"
+.br
+.ti -1c
+.RI "char \fBquals_descrs\fP [8][1024]"
+.br
 .in -1c
 .SH "Field Documentation"
 .PP 
@@ -117,8 +126,14 @@ native: descriptive strings for each reg
 
 .SS "char PAPI_event_info_t::note[1024]"
 an optional developer note supplied with a preset event to delineate platform specific anomalies or restrictions 
+.SS "int PAPI_event_info_t::num_quals"
+number of qualifiers 
 .SS "char PAPI_event_info_t::postfix[256]"
 string containing postfix operations; only defined for preset events of derived type DERIVED_POSTFIX 
+.SS "char PAPI_event_info_t::quals[8][1024]"
+qualifiers 
+.SS "char PAPI_event_info_t::quals_descrs[8][1024]"
+qualifier descriptions 
 .SS "char PAPI_event_info_t::short_descr[64]"
 a short description suitable for use as a label 
 .SS "char PAPI_event_info_t::symbol[1024]"
diff -pruN 7.2.0~b2-1/man/man3/PAPI_event_name_to_code.3 7.2.0-1/man/man3/PAPI_event_name_to_code.3
--- 7.2.0~b2-1/man/man3/PAPI_event_name_to_code.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_event_name_to_code.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_event_name_to_code" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_event_name_to_code" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_exe_info_t.3 7.2.0-1/man/man3/PAPI_exe_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_exe_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_exe_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_exe_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_exe_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_flips_rate.3 7.2.0-1/man/man3/PAPI_flips_rate.3
--- 7.2.0~b2-1/man/man3/PAPI_flips_rate.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_flips_rate.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_flips_rate" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_flips_rate" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_flops_rate.3 7.2.0-1/man/man3/PAPI_flops_rate.3
--- 7.2.0~b2-1/man/man3/PAPI_flops_rate.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_flops_rate.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_flops_rate" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_flops_rate" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_cmp_opt.3 7.2.0-1/man/man3/PAPI_get_cmp_opt.3
--- 7.2.0~b2-1/man/man3/PAPI_get_cmp_opt.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_cmp_opt.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_cmp_opt" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_cmp_opt" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_component_index.3 7.2.0-1/man/man3/PAPI_get_component_index.3
--- 7.2.0~b2-1/man/man3/PAPI_get_component_index.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_component_index.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_component_index" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_component_index" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_component_info.3 7.2.0-1/man/man3/PAPI_get_component_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_component_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_component_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_component_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_component_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_dev_attr.3 7.2.0-1/man/man3/PAPI_get_dev_attr.3
--- 7.2.0~b2-1/man/man3/PAPI_get_dev_attr.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_dev_attr.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_dev_attr" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_dev_attr" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_dev_type_attr.3 7.2.0-1/man/man3/PAPI_get_dev_type_attr.3
--- 7.2.0~b2-1/man/man3/PAPI_get_dev_type_attr.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_dev_type_attr.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_dev_type_attr" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_dev_type_attr" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_dmem_info.3 7.2.0-1/man/man3/PAPI_get_dmem_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_dmem_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_dmem_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_dmem_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_dmem_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_event_component.3 7.2.0-1/man/man3/PAPI_get_event_component.3
--- 7.2.0~b2-1/man/man3/PAPI_get_event_component.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_event_component.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_event_component" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_event_component" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_event_info.3 7.2.0-1/man/man3/PAPI_get_event_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_event_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_event_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_event_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_event_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_eventset_component.3 7.2.0-1/man/man3/PAPI_get_eventset_component.3
--- 7.2.0~b2-1/man/man3/PAPI_get_eventset_component.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_eventset_component.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_eventset_component" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_eventset_component" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_executable_info.3 7.2.0-1/man/man3/PAPI_get_executable_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_executable_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_executable_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_executable_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_executable_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_hardware_info.3 7.2.0-1/man/man3/PAPI_get_hardware_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_hardware_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_hardware_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_hardware_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_hardware_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_multiplex.3 7.2.0-1/man/man3/PAPI_get_multiplex.3
--- 7.2.0~b2-1/man/man3/PAPI_get_multiplex.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_multiplex.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_multiplex" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_multiplex" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_opt.3 7.2.0-1/man/man3/PAPI_get_opt.3
--- 7.2.0~b2-1/man/man3/PAPI_get_opt.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_opt.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_opt" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_opt" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_overflow_event_index.3 7.2.0-1/man/man3/PAPI_get_overflow_event_index.3
--- 7.2.0~b2-1/man/man3/PAPI_get_overflow_event_index.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_overflow_event_index.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_overflow_event_index" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_overflow_event_index" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_real_cyc.3 7.2.0-1/man/man3/PAPI_get_real_cyc.3
--- 7.2.0~b2-1/man/man3/PAPI_get_real_cyc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_real_cyc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_real_cyc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_real_cyc" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_real_nsec.3 7.2.0-1/man/man3/PAPI_get_real_nsec.3
--- 7.2.0~b2-1/man/man3/PAPI_get_real_nsec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_real_nsec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_real_nsec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_real_nsec" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_real_usec.3 7.2.0-1/man/man3/PAPI_get_real_usec.3
--- 7.2.0~b2-1/man/man3/PAPI_get_real_usec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_real_usec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_real_usec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_real_usec" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_shared_lib_info.3 7.2.0-1/man/man3/PAPI_get_shared_lib_info.3
--- 7.2.0~b2-1/man/man3/PAPI_get_shared_lib_info.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_shared_lib_info.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_shared_lib_info" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_shared_lib_info" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_thr_specific.3 7.2.0-1/man/man3/PAPI_get_thr_specific.3
--- 7.2.0~b2-1/man/man3/PAPI_get_thr_specific.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_thr_specific.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_thr_specific" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_thr_specific" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_virt_cyc.3 7.2.0-1/man/man3/PAPI_get_virt_cyc.3
--- 7.2.0~b2-1/man/man3/PAPI_get_virt_cyc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_virt_cyc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_virt_cyc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_virt_cyc" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_virt_nsec.3 7.2.0-1/man/man3/PAPI_get_virt_nsec.3
--- 7.2.0~b2-1/man/man3/PAPI_get_virt_nsec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_virt_nsec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_virt_nsec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_virt_nsec" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_get_virt_usec.3 7.2.0-1/man/man3/PAPI_get_virt_usec.3
--- 7.2.0~b2-1/man/man3/PAPI_get_virt_usec.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_get_virt_usec.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_get_virt_usec" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_get_virt_usec" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_granularity_option_t.3 7.2.0-1/man/man3/PAPI_granularity_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_granularity_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_granularity_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_granularity_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_granularity_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_hl_read.3 7.2.0-1/man/man3/PAPI_hl_read.3
--- 7.2.0~b2-1/man/man3/PAPI_hl_read.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_hl_read.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_hl_read" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_hl_read" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_hl_region_begin.3 7.2.0-1/man/man3/PAPI_hl_region_begin.3
--- 7.2.0~b2-1/man/man3/PAPI_hl_region_begin.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_hl_region_begin.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_hl_region_begin" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_hl_region_begin" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_hl_region_end.3 7.2.0-1/man/man3/PAPI_hl_region_end.3
--- 7.2.0~b2-1/man/man3/PAPI_hl_region_end.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_hl_region_end.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_hl_region_end" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_hl_region_end" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_hl_stop.3 7.2.0-1/man/man3/PAPI_hl_stop.3
--- 7.2.0~b2-1/man/man3/PAPI_hl_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_hl_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_hl_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_hl_stop" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_hw_info_t.3 7.2.0-1/man/man3/PAPI_hw_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_hw_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_hw_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_hw_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_hw_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_inherit_option_t.3 7.2.0-1/man/man3/PAPI_inherit_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_inherit_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_inherit_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_inherit_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_inherit_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_ipc.3 7.2.0-1/man/man3/PAPI_ipc.3
--- 7.2.0~b2-1/man/man3/PAPI_ipc.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_ipc.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_ipc" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_ipc" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_is_initialized.3 7.2.0-1/man/man3/PAPI_is_initialized.3
--- 7.2.0~b2-1/man/man3/PAPI_is_initialized.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_is_initialized.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_is_initialized" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_is_initialized" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_itimer_option_t.3 7.2.0-1/man/man3/PAPI_itimer_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_itimer_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_itimer_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_itimer_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_itimer_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_library_init.3 7.2.0-1/man/man3/PAPI_library_init.3
--- 7.2.0~b2-1/man/man3/PAPI_library_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_library_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_library_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_library_init" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_list_events.3 7.2.0-1/man/man3/PAPI_list_events.3
--- 7.2.0~b2-1/man/man3/PAPI_list_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_list_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_list_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_list_events" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -17,7 +17,7 @@ PAPI_list_events() returns an array of e
 .RS 4
 #include <\fBpapi\&.h\fP> 
 .br
-int \fBPAPI_list_events(int *EventSet, int *Events, int *number )\fP;
+int PAPI_list_events(int EventSet, int *Events, int *number);
 .RE
 .PP
 \fBParameters\fP
diff -pruN 7.2.0~b2-1/man/man3/PAPI_list_threads.3 7.2.0-1/man/man3/PAPI_list_threads.3
--- 7.2.0~b2-1/man/man3/PAPI_list_threads.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_list_threads.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_list_threads" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_list_threads" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_lock.3 7.2.0-1/man/man3/PAPI_lock.3
--- 7.2.0~b2-1/man/man3/PAPI_lock.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_lock.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_lock" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_lock" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_mh_cache_info_t.3 7.2.0-1/man/man3/PAPI_mh_cache_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_mh_cache_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_mh_cache_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_mh_cache_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_mh_cache_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_mh_info_t.3 7.2.0-1/man/man3/PAPI_mh_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_mh_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_mh_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_mh_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_mh_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_mh_level_t.3 7.2.0-1/man/man3/PAPI_mh_level_t.3
--- 7.2.0~b2-1/man/man3/PAPI_mh_level_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_mh_level_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_mh_level_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_mh_level_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_mh_tlb_info_t.3 7.2.0-1/man/man3/PAPI_mh_tlb_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_mh_tlb_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_mh_tlb_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_mh_tlb_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_mh_tlb_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_mpx_info_t.3 7.2.0-1/man/man3/PAPI_mpx_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_mpx_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_mpx_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_mpx_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_mpx_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_multiplex_init.3 7.2.0-1/man/man3/PAPI_multiplex_init.3
--- 7.2.0~b2-1/man/man3/PAPI_multiplex_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_multiplex_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_multiplex_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_multiplex_init" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_multiplex_option_t.3 7.2.0-1/man/man3/PAPI_multiplex_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_multiplex_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_multiplex_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_multiplex_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_multiplex_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_num_cmp_hwctrs.3 7.2.0-1/man/man3/PAPI_num_cmp_hwctrs.3
--- 7.2.0~b2-1/man/man3/PAPI_num_cmp_hwctrs.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_num_cmp_hwctrs.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_num_cmp_hwctrs" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_num_cmp_hwctrs" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_num_components.3 7.2.0-1/man/man3/PAPI_num_components.3
--- 7.2.0~b2-1/man/man3/PAPI_num_components.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_num_components.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_num_components" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_num_components" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_num_events.3 7.2.0-1/man/man3/PAPI_num_events.3
--- 7.2.0~b2-1/man/man3/PAPI_num_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_num_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_num_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_num_events" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_num_hwctrs.3 7.2.0-1/man/man3/PAPI_num_hwctrs.3
--- 7.2.0~b2-1/man/man3/PAPI_num_hwctrs.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_num_hwctrs.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_num_hwctrs" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_num_hwctrs" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_option_t.3 7.2.0-1/man/man3/PAPI_option_t.3
--- 7.2.0~b2-1/man/man3/PAPI_option_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_option_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_option_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_option_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_overflow.3 7.2.0-1/man/man3/PAPI_overflow.3
--- 7.2.0~b2-1/man/man3/PAPI_overflow.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_overflow.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_overflow" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_overflow" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_perror.3 7.2.0-1/man/man3/PAPI_perror.3
--- 7.2.0~b2-1/man/man3/PAPI_perror.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_perror.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_perror" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_perror" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_preload_info_t.3 7.2.0-1/man/man3/PAPI_preload_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_preload_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_preload_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_preload_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_preload_info_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_profil.3 7.2.0-1/man/man3/PAPI_profil.3
--- 7.2.0~b2-1/man/man3/PAPI_profil.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_profil.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_profil" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_profil" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_query_event.3 7.2.0-1/man/man3/PAPI_query_event.3
--- 7.2.0~b2-1/man/man3/PAPI_query_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_query_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_query_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_query_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_query_named_event.3 7.2.0-1/man/man3/PAPI_query_named_event.3
--- 7.2.0~b2-1/man/man3/PAPI_query_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_query_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_query_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_query_named_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_rate_stop.3 7.2.0-1/man/man3/PAPI_rate_stop.3
--- 7.2.0~b2-1/man/man3/PAPI_rate_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_rate_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_rate_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_rate_stop" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_read.3 7.2.0-1/man/man3/PAPI_read.3
--- 7.2.0~b2-1/man/man3/PAPI_read.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_read.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_read" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_read" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
@@ -22,7 +22,7 @@ PAPI_read() copies the counters of the i
 .PP
 The counters continue counting after the read\&.
 .PP
-Note the differences between PAPI_read() and PAPI_accum()\&. Specifically, PAPI_accum() adds the values of the counters to the values stored in the array (the second parameter in PAPI_accum()) and then resets the counters to zero\&.
+Note the differences between PAPI_read() and PAPI_accum(), specifically that PAPI_accum() resets the values array to zero\&.
 .PP
 PAPI_read() assumes an initialized PAPI library and a properly added event set\&.
 .PP
diff -pruN 7.2.0~b2-1/man/man3/PAPI_read_ts.3 7.2.0-1/man/man3/PAPI_read_ts.3
--- 7.2.0~b2-1/man/man3/PAPI_read_ts.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_read_ts.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_read_ts" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_read_ts" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_register_thread.3 7.2.0-1/man/man3/PAPI_register_thread.3
--- 7.2.0~b2-1/man/man3/PAPI_register_thread.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_register_thread.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_register_thread" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_register_thread" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_remove_event.3 7.2.0-1/man/man3/PAPI_remove_event.3
--- 7.2.0~b2-1/man/man3/PAPI_remove_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_remove_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_remove_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_remove_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_remove_events.3 7.2.0-1/man/man3/PAPI_remove_events.3
--- 7.2.0~b2-1/man/man3/PAPI_remove_events.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_remove_events.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_remove_events" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_remove_events" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_remove_named_event.3 7.2.0-1/man/man3/PAPI_remove_named_event.3
--- 7.2.0~b2-1/man/man3/PAPI_remove_named_event.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_remove_named_event.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_remove_named_event" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_remove_named_event" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_reset.3 7.2.0-1/man/man3/PAPI_reset.3
--- 7.2.0~b2-1/man/man3/PAPI_reset.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_reset.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_reset" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_reset" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_cmp_domain.3 7.2.0-1/man/man3/PAPI_set_cmp_domain.3
--- 7.2.0~b2-1/man/man3/PAPI_set_cmp_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_cmp_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_cmp_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_cmp_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_cmp_granularity.3 7.2.0-1/man/man3/PAPI_set_cmp_granularity.3
--- 7.2.0~b2-1/man/man3/PAPI_set_cmp_granularity.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_cmp_granularity.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_cmp_granularity" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_cmp_granularity" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_debug.3 7.2.0-1/man/man3/PAPI_set_debug.3
--- 7.2.0~b2-1/man/man3/PAPI_set_debug.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_debug.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_debug" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_debug" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_domain.3 7.2.0-1/man/man3/PAPI_set_domain.3
--- 7.2.0~b2-1/man/man3/PAPI_set_domain.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_domain.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_domain" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_domain" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_granularity.3 7.2.0-1/man/man3/PAPI_set_granularity.3
--- 7.2.0~b2-1/man/man3/PAPI_set_granularity.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_granularity.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_granularity" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_granularity" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_multiplex.3 7.2.0-1/man/man3/PAPI_set_multiplex.3
--- 7.2.0~b2-1/man/man3/PAPI_set_multiplex.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_multiplex.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_multiplex" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_multiplex" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_opt.3 7.2.0-1/man/man3/PAPI_set_opt.3
--- 7.2.0~b2-1/man/man3/PAPI_set_opt.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_opt.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_opt" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_opt" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_set_thr_specific.3 7.2.0-1/man/man3/PAPI_set_thr_specific.3
--- 7.2.0~b2-1/man/man3/PAPI_set_thr_specific.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_set_thr_specific.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_set_thr_specific" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_set_thr_specific" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_shlib_info_t.3 7.2.0-1/man/man3/PAPI_shlib_info_t.3
--- 7.2.0~b2-1/man/man3/PAPI_shlib_info_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_shlib_info_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_shlib_info_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_shlib_info_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_shutdown.3 7.2.0-1/man/man3/PAPI_shutdown.3
--- 7.2.0~b2-1/man/man3/PAPI_shutdown.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_shutdown.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_shutdown" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_shutdown" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_sprofil.3 7.2.0-1/man/man3/PAPI_sprofil.3
--- 7.2.0~b2-1/man/man3/PAPI_sprofil.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_sprofil.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_sprofil" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_sprofil" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_sprofil_t.3 7.2.0-1/man/man3/PAPI_sprofil_t.3
--- 7.2.0~b2-1/man/man3/PAPI_sprofil_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_sprofil_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_sprofil_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_sprofil_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_start.3 7.2.0-1/man/man3/PAPI_start.3
--- 7.2.0~b2-1/man/man3/PAPI_start.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_start.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_start" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_start" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_state.3 7.2.0-1/man/man3/PAPI_state.3
--- 7.2.0~b2-1/man/man3/PAPI_state.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_state.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_state" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_state" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_stop.3 7.2.0-1/man/man3/PAPI_stop.3
--- 7.2.0~b2-1/man/man3/PAPI_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_stop" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_strerror.3 7.2.0-1/man/man3/PAPI_strerror.3
--- 7.2.0~b2-1/man/man3/PAPI_strerror.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_strerror.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_strerror" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_strerror" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_thread_id.3 7.2.0-1/man/man3/PAPI_thread_id.3
--- 7.2.0~b2-1/man/man3/PAPI_thread_id.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_thread_id.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_thread_id" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_thread_id" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_thread_init.3 7.2.0-1/man/man3/PAPI_thread_init.3
--- 7.2.0~b2-1/man/man3/PAPI_thread_init.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_thread_init.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_thread_init" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_thread_init" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_unlock.3 7.2.0-1/man/man3/PAPI_unlock.3
--- 7.2.0~b2-1/man/man3/PAPI_unlock.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_unlock.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_unlock" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_unlock" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_unregister_thread.3 7.2.0-1/man/man3/PAPI_unregister_thread.3
--- 7.2.0~b2-1/man/man3/PAPI_unregister_thread.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_unregister_thread.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_unregister_thread" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_unregister_thread" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPI_write.3 7.2.0-1/man/man3/PAPI_write.3
--- 7.2.0~b2-1/man/man3/PAPI_write.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPI_write.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPI_write" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPI_write" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIf_hl_read.3 7.2.0-1/man/man3/PAPIf_hl_read.3
--- 7.2.0~b2-1/man/man3/PAPIf_hl_read.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIf_hl_read.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIf_hl_read" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIf_hl_read" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIf_hl_region_begin.3 7.2.0-1/man/man3/PAPIf_hl_region_begin.3
--- 7.2.0~b2-1/man/man3/PAPIf_hl_region_begin.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIf_hl_region_begin.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIf_hl_region_begin" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIf_hl_region_begin" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIf_hl_region_end.3 7.2.0-1/man/man3/PAPIf_hl_region_end.3
--- 7.2.0~b2-1/man/man3/PAPIf_hl_region_end.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIf_hl_region_end.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIf_hl_region_end" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIf_hl_region_end" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/PAPIf_hl_stop.3 7.2.0-1/man/man3/PAPIf_hl_stop.3
--- 7.2.0~b2-1/man/man3/PAPIf_hl_stop.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/PAPIf_hl_stop.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "PAPIf_hl_stop" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "PAPIf_hl_stop" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/RateInfo.3 7.2.0-1/man/man3/RateInfo.3
--- 7.2.0~b2-1/man/man3/RateInfo.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/RateInfo.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "RateInfo" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "RateInfo" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/binary_tree_t.3 7.2.0-1/man/man3/binary_tree_t.3
--- 7.2.0~b2-1/man/man3/binary_tree_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/binary_tree_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "binary_tree_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "binary_tree_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/components_t.3 7.2.0-1/man/man3/components_t.3
--- 7.2.0~b2-1/man/man3/components_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/components_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "components_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "components_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/local_components_t.3 7.2.0-1/man/man3/local_components_t.3
--- 7.2.0~b2-1/man/man3/local_components_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/local_components_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "local_components_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "local_components_t" 3 "Wed Jun 25 2025 19:30:48" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/papi_hl_output_writer_Sum_Counter.3 7.2.0-1/man/man3/papi_hl_output_writer_Sum_Counter.3
--- 7.2.0~b2-1/man/man3/papi_hl_output_writer_Sum_Counter.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/papi_hl_output_writer_Sum_Counter.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_hl_output_writer.Sum_Counter" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_hl_output_writer.Sum_Counter" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/papi_hl_output_writer_Sum_Counters.3 7.2.0-1/man/man3/papi_hl_output_writer_Sum_Counters.3
--- 7.2.0~b2-1/man/man3/papi_hl_output_writer_Sum_Counters.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/papi_hl_output_writer_Sum_Counters.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "papi_hl_output_writer.Sum_Counters" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "papi_hl_output_writer.Sum_Counters" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/reads_t.3 7.2.0-1/man/man3/reads_t.3
--- 7.2.0~b2-1/man/man3/reads_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/reads_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "reads_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "reads_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/regions_t.3 7.2.0-1/man/man3/regions_t.3
--- 7.2.0~b2-1/man/man3/regions_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/regions_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "regions_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "regions_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/threads_t.3 7.2.0-1/man/man3/threads_t.3
--- 7.2.0~b2-1/man/man3/threads_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/threads_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "threads_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "threads_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/man/man3/value_t.3 7.2.0-1/man/man3/value_t.3
--- 7.2.0~b2-1/man/man3/value_t.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/man/man3/value_t.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,4 +1,4 @@
-.TH "value_t" 3 "Mon Feb 24 2025 21:11:21" "Version 7.2.0.0b2" "PAPI" \" -*- nroff -*-
+.TH "value_t" 3 "Wed Jun 25 2025 19:30:49" "Version 7.2.0.0" "PAPI" \" -*- nroff -*-
 .ad l
 .nh
 .SH NAME
diff -pruN 7.2.0~b2-1/papi.spec 7.2.0-1/papi.spec
--- 7.2.0~b2-1/papi.spec	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/papi.spec	2025-06-25 22:38:10.000000000 +0000
@@ -1,6 +1,6 @@
 Summary: Performance Application Programming Interface
 Name: papi
-Version: 7.2.0.0b2
+Version: 7.2.0.0
 Release: 1%{?dist}
 License: BSD
 Group: Development/System
@@ -14,6 +14,21 @@ BuildRequires: chrpath
 #Right now libpfm does not know anything about s390 and will fail
 ExcludeArch: s390 s390x
 
+# Conditional for rocm_smi support
+%bcond_with rocm_smi
+
+# rocm_smi path detection
+%if %{with rocm_smi}
+# First try user-defined path, then default locations
+%define rocm_smi_path %{?_rocm_smi_path:%{_rocm_smi_path}}%{!?_rocm_smi_path:/opt/rocm}
+
+# Verify rocm_smi exists at the expected path
+%{!?__rocm_smi_exists:%global __rocm_smi_exists %(test -e %{rocm_smi_path} && echo 1 || echo 0)}
+%if !%{__rocm_smi_exists}
+  %{error: rocm_smi not found at %{rocm_smi_path}, install rocm_smi or specify alternate path with --define="_rocm_smi_path /path/to/rocm_smi"}
+%endif
+%endif
+
 %description
 PAPI provides a programmer interface to monitor the performance of
 running programs.
@@ -35,6 +50,10 @@ cd src
 %configure --with-static-lib=no --with-shared-lib=yes --with-shlib
 #DBG workaround to make sure libpfm just uses the normal CFLAGS
 DBG="" make
+%if %{with rocm_smi}
+    PAPI_ROCMSMI_ROOT=%{rocm_smi_path} \
+    --with-rocm-smi \
+%endif
 
 #%check
 #cd src
@@ -78,7 +97,7 @@ rm -rf $RPM_BUILD_ROOT
 * Wed Dec 8 2010 Dan Terpstra <terpstra@eecs.utk.edu> - 4.1.2-1
 - Rebase to papi-4.1.2
 
-* Mon Jun 8 2010 William Cohen <wcohen@redhat.com> - 4.1.0-1
+* Tue Jun 8 2010 William Cohen <wcohen@redhat.com> - 4.1.0-1
 - Rebase to papi-4.1.0
 
 * Mon May 17 2010 William Cohen <wcohen@redhat.com> - 4.0.0-5
diff -pruN 7.2.0~b2-1/src/Makefile.in 7.2.0-1/src/Makefile.in
--- 7.2.0~b2-1/src/Makefile.in	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/Makefile.in	2025-06-25 22:38:10.000000000 +0000
@@ -1,7 +1,7 @@
 PAPIVER=7
 PAPIREV=2
 PAPIAGE=0
-PAPIINC=0b2
+PAPIINC=0
 PREFIX    = @prefix@
 prefix    = $(PREFIX)
 exec_prefix = $(EPREFIX)
diff -pruN 7.2.0~b2-1/src/Makefile.inc 7.2.0-1/src/Makefile.inc
--- 7.2.0~b2-1/src/Makefile.inc	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/Makefile.inc	2025-06-25 22:38:10.000000000 +0000
@@ -17,7 +17,8 @@ HEADERS  = $(MISCHDRS) $(OSFILESHDR) $(P
 	papi_preset.h threads.h cpus.h papi_vector.h \
 	papi_memory.h config.h \
 	extras.h sw_multiplex.h \
-	papi_common_strings.h components_config.h
+	papi_common_strings.h components_config.h \
+	papi_components_config_event_defs.h
 
 LIBCFLAGS += -I. $(CFLAGS) -DOSLOCK=\"$(OSLOCK)\" -DOSCONTEXT=\"$(OSCONTEXT)\"
 FHEADERS = $(FORT_HEADERS)
@@ -292,7 +293,11 @@ clobber distclean: clean native_clobber
 	$(MAKE) -C utils distclean
 	$(MAKE) -C validation_tests distclean
 	$(MAKE) -C components -f Makefile_comp_tests distclean
-	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h $(PAPI_EVENTS_TABLE)
+	rm -f $(LIBRARY) $(SHLIB) $(EXTRALIBS) Makefile config.h libpapi.so sde_lib/libsde.so* sde_lib/libsde.a libsde.so libsde.a papi.pc components_config.h papi_components_config_event_defs.h $(PAPI_EVENTS_TABLE)
+	$(if ${COMPONENTS}, \
+		set -ex; for comp in ${COMPONENTS}; do \
+		    rm -f papi_$${comp}_std_event_defs.h; \
+		done)
 	rm -f config.log config.status f77papi.h f90papi.h fpapi.h
 
 null:
@@ -321,9 +326,15 @@ install-lib: native_install
 	@echo "Headers (INCDIR) being installed in: \"$(DESTDIR)$(INCDIR)\""; 
 	-mkdir -p $(DESTDIR)$(INCDIR)
 	-chmod go+rx $(DESTDIR)$(INCDIR)
-	cp $(FHEADERS) papi.h papiStdEventDefs.h $(DESTDIR)$(INCDIR)
+	cp $(FHEADERS) papi.h papiStdEventDefs.h papi_components_config_event_defs.h $(DESTDIR)$(INCDIR)
+	$(if ${COMPONENTS}, \
+		set -ex; for comp in ${COMPONENTS}; do \
+            if [ -e papi_$${comp}_std_event_defs.h ]; then \
+			    cp papi_$${comp}_std_event_defs.h $(DESTDIR)$(INCDIR); \
+            fi; \
+		done)
 	cp sde_lib/sde_lib.h sde_lib/sde_lib.hpp $(DESTDIR)$(INCDIR)
-	cd $(DESTDIR)$(INCDIR) && chmod go+r $(FHEADERS) papi.h papiStdEventDefs.h sde_lib.h sde_lib.hpp
+	cd $(DESTDIR)$(INCDIR) && chmod go+r $(FHEADERS) papi.h papiStdEventDefs.h papi_components_config_event_defs.h sde_lib.h sde_lib.hpp
 	@echo "Libraries (LIBDIR) being installed in: \"$(DESTDIR)$(LIBDIR)\""; 
 	-mkdir -p $(DESTDIR)$(LIBDIR)
 	-chmod go+rx $(DESTDIR)$(LIBDIR)
diff -pruN 7.2.0~b2-1/src/components/README 7.2.0-1/src/components/README
--- 7.2.0~b2-1/src/components/README	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/README	2025-06-25 22:38:10.000000000 +0000
@@ -64,8 +64,6 @@ mx                            Myricom MX
 net                           Linux network driver statistics
 nvml                          Requires its own configure; monitors NVIDIA hardware (power, temp, fan speed, etc).
 pcp                           Performance Co-Pilot interface.
-perfctr                       OLD, only used for Linux before 2.6.31.
-perfctr_ppc                   OLD, only used for Linux before 2.6.31.
 perf_event                    Linux perf_event CPU counters
 perf_event_uncore             Linux perf-event CPU uncore and Northbridge
 perfmon2                      OLD, only used for Linux before 2.6.31.
diff -pruN 7.2.0~b2-1/src/components/appio/appio.c 7.2.0-1/src/components/appio/appio.c
--- 7.2.0~b2-1/src/components/appio/appio.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/appio/appio.c	2025-06-25 22:38:10.000000000 +0000
@@ -746,6 +746,7 @@ papi_vector_t _appio_vector = {
         .name                  = "appio",
         .short_name            = "appio",
         .version               = "1.1.2.4",
+        .description           = "Linux I/O system calls",
         .CmpIdx                = 0,              /* set by init_component */
         .num_mpx_cntrs         = APPIO_MAX_COUNTERS,
         .num_cntrs             = APPIO_MAX_COUNTERS,
diff -pruN 7.2.0~b2-1/src/components/coretemp/linux-coretemp.c 7.2.0-1/src/components/coretemp/linux-coretemp.c
--- 7.2.0~b2-1/src/components/coretemp/linux-coretemp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/coretemp/linux-coretemp.c	2025-06-25 22:38:10.000000000 +0000
@@ -72,7 +72,7 @@ insert_in_list(char *name, char *units,
 		   /* Because this is a function, it is possible */
 		   /* we are called with root!=NULL but no last  */
 		   /* so add this to keep coverity happy         */
-		   free(temp);
+		   papi_free(temp);
 		   PAPIERROR("This shouldn't be possible\n");
 
 		   return PAPI_ECMP;
@@ -762,6 +762,7 @@ papi_vector_t _coretemp_vector = {
 				 .fast_virtual_timer = 0,
 				 .attach = 0,
 				 .attach_must_ptrace = 0,
+				 .kernel_multiplex = 1,
 				 }
 	,
 
diff -pruN 7.2.0~b2-1/src/components/cuda/README_internal.md 7.2.0-1/src/components/cuda/README_internal.md
--- 7.2.0~b2-1/src/components/cuda/README_internal.md	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/README_internal.md	2025-06-25 22:38:10.000000000 +0000
@@ -5,32 +5,44 @@ At current the Cuda component uses bits
 # Event Identifier Encoding Format
 
 ## Unused bits
-As of 09/16/24, there are a total of 34 unused bits. These bits can be used to create a new qualifier or can be used to extended the number of bits for an existing qualifier.
+
+As of 02/02/25, there are a total of 2 unused bits. These bits can be used to create a new qualifier or can be used to extend the number of bits for an existing qualifier.
+
+## STAT
+
+3 bits are allocated for the statistic qualifier. ([0 - 7 stats]).
 
 ## Device
+
 7 bits are allocated for the device which accounts for 128 total devices on a node (e.g. [0 - 127 devices]).
 
 ## Qlmask
-2 bits are allocated for the qualifier mask. 
+
+2 bits are allocated for the qualifier mask.
 
 ## Nameid
-21 bits are allocated for the nameid which will roughly account for greater than 2 million Cuda native events per device on a node.
+
+18 bits are allocated for the nameid which will roughly account for greater than 260k Cuda native events per device on a node.
 
 ## Calculations for Bit Masks and Shifts
-| #DEFINE    | Bits |
-| -------- | ------- |
-| EVENTS_WIDTH  | `(sizeof(uint64_t) * 8)`    |
-| DEVICE_WIDTH | `( 7)`   |
-| QLMASK_WIDTH    | `( 2)`   |
-| NAMEID_WIDTH  | `(21)`    |
-| UNUSED_WIDTH   | `(EVENTS_WIDTH - DEVICE_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)`   |
-| DEVICE_SHIFT  | `(EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)`    |
-| QLMASK_SHIFT | `(DEVICE_SHIFT - QLMASK_WIDTH)`   |
-| NAMEID_SHIFT    | `(QLMASK_SHIFT - NAMEID_WIDTH)`   |
-| DEVICE_MASK  | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)`    |
-| QLMASK_MASK | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)`   |
-| NAMEID_MASK   | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)`   |
-| DEVICE_FLAG  | `DEVICE_FLAG  (0x1)`   |
 
+| #DEFINE      | Bits                                                                       |
+| ------------ | -------------------------------------------------------------------------- |
+| EVENTS_WIDTH | `(sizeof(uint32_t) * 8)`                                                   |
+| STAT_WIDTH   | `( 3)`                                                                     |
+| DEVICE_WIDTH | `( 7)`                                                                     |
+| QLMASK_WIDTH | `( 2)`                                                                     |
+| NAMEID_WIDTH | `(18)`                                                                     |
+| UNUSED_WIDTH | `(EVENTS_WIDTH - DEVICE_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH - STAT_WIDTH)` |
+| STAT_SHIFT   | `(EVENTS_WIDTH - UNUSED_WIDTH - STAT_WIDTH)`                               |
+| DEVICE_SHIFT | `(EVENTS_WIDTH - UNUSED_WIDTH - STAT_WIDTH - DEVICE_WIDTH)`                |
+| QLMASK_SHIFT | `(DEVICE_SHIFT - QLMASK_WIDTH)`                                            |
+| NAMEID_SHIFT | `(QLMASK_SHIFT - NAMEID_WIDTH)`                                            |
+| STAT_MASK    | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - STAT_WIDTH))   << STAT_SHIFT)`    |
+| DEVICE_MASK  | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)`  |
+| QLMASK_MASK  | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)`  |
+| NAMEID_MASK  | `((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)`  |
+| STAT_FLAG    | `STAT_FLAG  (0x2)`                                                         |
+| DEVICE_FLAG  | `DEVICE_FLAG  (0x1)`                                                       |
 
 **NOTE**: If adding a new qualifier, you must add it to the table found in the section titled [Calculations for Bit Masks and Shifts](#calculations-for-bit-masks-and-shifts) and account for this addition within `cupti_profiler.c`.
diff -pruN 7.2.0~b2-1/src/components/cuda/Rules.cuda 7.2.0-1/src/components/cuda/Rules.cuda
--- 7.2.0~b2-1/src/components/cuda/Rules.cuda	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/Rules.cuda	2025-06-25 22:38:10.000000000 +0000
@@ -5,11 +5,6 @@ PAPI_CUDA_ROOT ?= $(shell dirname $(shel
 # obtain user Cuda version to check if Cuda component currently supports it
 NVCC = $(PAPI_CUDA_ROOT)/bin/nvcc
 NVCC_VERSION := $(shell $(NVCC) --version | grep -oP '(?<=release )\d+\.\d+')
-ifneq ($(MAKECMDGOALS), clean)
-    ifeq ($(shell echo $(NVCC_VERSION) | awk '{print $$1 >= 12.6}'), 1)
-        $(error In Cuda 12.6, the MetricsContext API was replaced with the MetricsEvaluator API. Due to this, the Cuda component is currrently being refactored to support Cuda versions >= 12.6)
-    endif
-endif
 
 CUDA_MACS = -DPAPI_CUDA_MAIN=$(PAPI_CUDA_MAIN) -DPAPI_CUDA_RUNTIME=$(PAPI_CUDA_RUNTIME)
 CUDA_MACS+= -DPAPI_CUDA_CUPTI=$(PAPI_CUDA_CUPTI) -DPAPI_CUDA_PERFWORKS=$(PAPI_CUDA_PERFWORKS)
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_config.h 7.2.0-1/src/components/cuda/cupti_config.h
--- 7.2.0~b2-1/src/components/cuda/cupti_config.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_config.h	2025-06-25 22:38:10.000000000 +0000
@@ -20,12 +20,11 @@
 #   define API_PERFWORKS 1
 #endif
 
-/*
- * TODO: When NVIDIA removes the event API #define CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION
- * and set it to last version that supports it.
- * Then conditionally define the following macro if the version lies within this range.
- * Note: Introduce a runtime check in `cuptic_is_runtime_events_api()` to satisfy this.
- */
-#define API_EVENTS 1
+// The Events API has been deprecated in Cuda Toolkit 12.8 and will be removed in a future
+// CUDA release (https://docs.nvidia.com/cupti/api/group__CUPTI__EVENT__API.html).
+// TODO: When the Events API has been removed #define CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION
+// and set it to the last version that is supported. Use this macro as a runtime check in
+// `cuptic_determine_runtime_api`.
+#define API_EVENTS 2
 
 #endif  /* __LCUDA_CONFIG_H__ */
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_dispatch.c 7.2.0-1/src/components/cuda/cupti_dispatch.c
--- 7.2.0~b2-1/src/components/cuda/cupti_dispatch.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_dispatch.c	2025-06-25 22:38:10.000000000 +0000
@@ -21,7 +21,8 @@
 int cuptid_shutdown(void)
 {
     int papi_errno;
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         papi_errno = cuptip_shutdown();
@@ -30,7 +31,7 @@ int cuptid_shutdown(void)
         }
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         papi_errno = cuptie_shutdown();
@@ -44,40 +45,59 @@ int cuptid_shutdown(void)
     return cuptic_shutdown();
 }
 
-void cuptid_disabled_reason_get(const char **msg)
+int cuptid_err_get_last(const char **error_str)
 {
-    cuptic_disabled_reason_get(msg);
+    return cuptic_err_get_last(error_str);
+}
+
+int cuptid_get_chip_name(int dev_num, char *name)
+{
+    return get_chip_name(dev_num, name);
+}
+
+int cuptid_device_get_count(int *num_gpus)
+{
+    return cuptic_device_get_count(num_gpus);
 }
 
 int cuptid_init(void)
 {
     int papi_errno;
-    papi_errno = cuptic_init();
-    if (papi_errno != PAPI_OK) {
+    int init_errno = cuptic_init();
+    if (init_errno != PAPI_OK && init_errno != PAPI_PARTIAL) {
+        papi_errno = init_errno;
         goto fn_exit;
     }
 
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         papi_errno = cuptip_init();
+        if (papi_errno == PAPI_OK) {
+            if (init_errno == PAPI_PARTIAL) {
+                papi_errno = init_errno;
+            }
+        }
 #else
-        cuptic_disabled_reason_set("PAPI not built with NVIDIA profiler API support.");
+        cuptic_err_set_last("PAPI not built with NVIDIA profiler API support.");
         papi_errno = PAPI_ECMP;
         goto fn_exit;
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
+        // TODO: When the Events API is added back, add a similar check
+        // as above
         papi_errno = cuptie_init();
 #else
-        cuptic_disabled_reason_set("Unknown events API problem.");
+        cuptic_err_set_last("Unknown events API problem.");
         papi_errno = PAPI_ECMP;
 #endif
 
     } else {
-        cuptic_disabled_reason_set("CUDA configuration not supported.");
+        cuptic_err_set_last("CUDA configuration not supported.");
         papi_errno = PAPI_ECMP;
     }
 fn_exit:
@@ -94,15 +114,16 @@ int cuptid_thread_info_destroy(cuptid_in
     return cuptic_ctxarr_destroy((cuptic_info_t *) info);
 }
 
-int cuptid_ctx_create(cuptid_info_t info,  cuptip_control_t *pcupti_ctl, uint64_t *events_id, int num_events)
+int cuptid_ctx_create(cuptid_info_t info,  cuptip_control_t *pcupti_ctl, uint32_t *events_id, int num_events)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_create((cuptic_info_t) info, pcupti_ctl, events_id, num_events);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined (API_EVENTS)
         return cuptie_ctx_create((cuptic_info_t) info, (cuptie_control_t *) pcupti_ctl);
@@ -114,13 +135,14 @@ int cuptid_ctx_create(cuptid_info_t info
 
 int cuptid_ctx_start(cuptip_control_t cupti_ctl)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_start(cupti_ctl);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_ctx_start((cuptie_control_t) cupti_ctl);
@@ -132,13 +154,14 @@ int cuptid_ctx_start(cuptip_control_t cu
 
 int cuptid_ctx_read(cuptip_control_t cupti_ctl, long long **counters)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_read(cupti_ctl, counters);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_ctx_read((cuptie_control_t) cupti_ctl, counters);
@@ -150,12 +173,13 @@ int cuptid_ctx_read(cuptip_control_t cup
 
 int cuptid_ctx_reset(cuptip_control_t cupti_ctl)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_reset(cupti_ctl);
 #endif
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_ctx_reset((cuptie_control_t) cupti_ctl);
@@ -166,13 +190,14 @@ int cuptid_ctx_reset(cuptip_control_t cu
 
 int cuptid_ctx_stop(cuptip_control_t cupti_ctl)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_stop(cupti_ctl);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_ctx_stop((cuptie_control_t) cupti_ctl);
@@ -184,13 +209,14 @@ int cuptid_ctx_stop(cuptip_control_t cup
 
 int cuptid_ctx_destroy(cuptip_control_t *pcupti_ctl)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_ctx_destroy(pcupti_ctl);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_ctx_destroy((cuptie_control_t *) pcupti_ctl);
@@ -200,15 +226,16 @@ int cuptid_ctx_destroy(cuptip_control_t
     return PAPI_ECMP;
 }
 
-int cuptid_evt_enum(uint64_t *event_code, int modifier)
+int cuptid_evt_enum(uint32_t *event_code, int modifier)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_evt_enum(event_code, modifier);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_evt_enum(event_code, modifier);
@@ -218,15 +245,16 @@ int cuptid_evt_enum(uint64_t *event_code
     return PAPI_ECMP;
 }
 
-int cuptid_evt_code_to_descr(uint64_t event_code, char *descr, int len)
+int cuptid_evt_code_to_descr(uint32_t event_code, char *descr, int len)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_evt_code_to_descr(event_code, descr, len);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_evt_code_to_descr(event_code, descr, len);
@@ -236,15 +264,16 @@ int cuptid_evt_code_to_descr(uint64_t ev
     return PAPI_ECMP;
 }
 
-int cuptid_evt_name_to_code(const char *name, uint64_t *event_code)
+int cuptid_evt_name_to_code(const char *name, uint32_t *event_code)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_evt_name_to_code(name, event_code);
 #endif
 
-    } else if (cuptic_is_runtime_events_api()) {
+    } else if (cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_evt_name_to_code(name, event_code);
@@ -254,15 +283,16 @@ int cuptid_evt_name_to_code(const char *
     return PAPI_ECMP;
 }
 
-int cuptid_evt_code_to_name(uint64_t event_code, char *name, int len)
+int cuptid_evt_code_to_name(uint32_t event_code, char *name, int len)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_evt_code_to_name(event_code, name, len);
 #endif
 
-    } else if(cuptic_is_runtime_events_api()) {
+    } else if(cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_evt_code_to_name(event_code, name, len);
@@ -272,15 +302,16 @@ int cuptid_evt_code_to_name(uint64_t eve
     return PAPI_ECMP;
 }
 
-int cuptid_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info)
+int cuptid_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info)
 {
-    if (cuptic_is_runtime_perfworks_api()) {
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api == API_PERFWORKS) {
 
 #if defined(API_PERFWORKS)
         return cuptip_evt_code_to_info(event_code, info);
 #endif
 
-    } else if(cuptic_is_runtime_events_api()) {
+    } else if(cupti_api == API_EVENTS) {
 
 #if defined(API_EVENTS)
         return cuptie_evt_code_to_info(event_code, info);
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_dispatch.h 7.2.0-1/src/components/cuda/cupti_dispatch.h
--- 7.2.0~b2-1/src/components/cuda/cupti_dispatch.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_dispatch.h	2025-06-25 22:38:10.000000000 +0000
@@ -23,14 +23,14 @@ int cuptid_init(void);
 int cuptid_shutdown(void);
 
 /* native event interfaces */
-int cuptid_evt_enum(uint64_t *event_code, int modifier);
-int cuptid_evt_code_to_descr(uint64_t event_code, char *descr, int len);
-int cuptid_evt_name_to_code(const char *name, uint64_t *event_code);
-int cuptid_evt_code_to_name(uint64_t event_code, char *name, int len);
-int cuptid_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info);
+int cuptid_evt_enum(uint32_t *event_code, int modifier);
+int cuptid_evt_code_to_descr(uint32_t event_code, char *descr, int len);
+int cuptid_evt_name_to_code(const char *name, uint32_t *event_code);
+int cuptid_evt_code_to_name(uint32_t event_code, char *name, int len);
+int cuptid_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info);
 
 /* profiling context handling interfaces */
-int cuptid_ctx_create(cuptid_info_t thread_info, cuptip_control_t *pcupti_ctl, uint64_t *events_id, int num_events);
+int cuptid_ctx_create(cuptid_info_t thread_info, cuptip_control_t *pcupti_ctl, uint32_t *events_id, int num_events);
 int cuptid_ctx_start(cuptip_control_t ctl);
 int cuptid_ctx_read(cuptip_control_t ctl, long long **counters);
 int cuptid_ctx_reset(cuptip_control_t ctl);
@@ -42,6 +42,8 @@ int cuptid_thread_info_create(cuptid_inf
 int cuptid_thread_info_destroy(cuptid_info_t *info);
 
 /* misc. */
-void cuptid_disabled_reason_get(const char **msg);
+int cuptid_err_get_last(const char **error_str);
+int cuptid_get_chip_name(int dev_num, char *name);
+int cuptid_device_get_count(int *num_gpus);
 
 #endif /* __CUPTI_DISPATCH_H__ */
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_events.c 7.2.0-1/src/components/cuda/cupti_events.c
--- 7.2.0~b2-1/src/components/cuda/cupti_events.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_events.c	2025-06-25 22:38:10.000000000 +0000
@@ -17,7 +17,7 @@
 
 int cuptie_init(void)
 {
-    cuptic_disabled_reason_set("CUDA events API not implemented.");
+    cuptic_err_set_last("CUDA events API not implemented.");
     return PAPI_ENOIMPL;
 }
 
@@ -51,27 +51,27 @@ int cuptie_ctx_destroy(cuptie_control_t
     return PAPI_ENOIMPL;
 }
 
-int cuptie_evt_enum(uint64_t *event_code, int modifier)
+int cuptie_evt_enum(uint32_t *event_code, int modifier)
 {
     return PAPI_ENOIMPL;
 }
 
-int cuptie_evt_code_to_descr(uint64_t event_code, char *descr, int len) 
+int cuptie_evt_code_to_descr(uint32_t event_code, char *descr, int len) 
 {
     return PAPI_ENOIMPL;
 }
 
-int cuptie_evt_name_to_code(const char *name, uint64_t *event_code)
+int cuptie_evt_name_to_code(const char *name, uint32_t *event_code)
 {
     return PAPI_ENOIMPL;
 }
 
-int cuptie_evt_code_to_name(uint64_t event_code, char *name, int len)
+int cuptie_evt_code_to_name(uint32_t event_code, char *name, int len)
 {
     return PAPI_ENOIMPL;
 }
 
-int cuptie_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info) 
+int cuptie_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info) 
 {
     return PAPI_ENOIMPL;
 }
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_events.h 7.2.0-1/src/components/cuda/cupti_events.h
--- 7.2.0~b2-1/src/components/cuda/cupti_events.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_events.h	2025-06-25 22:38:10.000000000 +0000
@@ -19,11 +19,11 @@ int cuptie_init(void);
 int cuptie_shutdown(void);
 
 /* native event interfaces */
-int cuptie_evt_enum(uint64_t *event_code, int modifier); 
-int cuptie_evt_code_to_descr(uint64_t event_code, char *descr, int len);
-int cuptie_evt_name_to_code(const char *name, uint64_t *event_code);
-int cuptie_evt_code_to_name(uint64_t event_code, char *name, int len);
-int cuptie_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info);
+int cuptie_evt_enum(uint32_t *event_code, int modifier); 
+int cuptie_evt_code_to_descr(uint32_t event_code, char *descr, int len);
+int cuptie_evt_name_to_code(const char *name, uint32_t *event_code);
+int cuptie_evt_code_to_name(uint32_t event_code, char *name, int len);
+int cuptie_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info);
 
 /* profiling context handling interfaces */
 int cuptie_ctx_create(void *thr_info, cuptie_control_t *pctl);
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_profiler.c 7.2.0-1/src/components/cuda/cupti_profiler.c
--- 7.2.0~b2-1/src/components/cuda/cupti_profiler.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_profiler.c	2025-06-25 22:38:10.000000000 +0000
@@ -3,6 +3,7 @@
  *
  * @author  Treece Burgess tburgess@icl.utk.edu (updated in 2024, redesigned to add device qualifier support.)
  * @author  Anustuv Pal    anustuv@icl.utk.edu
+ * @author  Dong Jun WOun  dwoun@vols.utk.edu
  */
 
 #include <dlfcn.h>
@@ -23,59 +24,59 @@
 
 /**
  * Event identifier encoding format:
- * +---------------------------------+-------+----+------------+
- * |         unused                  |  dev  | ql |   nameid   |
- * +---------------------------------+-------+----+------------+
+ * +--------+------+-------+----+------------+
+ * | unused | stat |  dev  | ql |   nameid   |
+ * +--------+------+-------+----+------------+
  *
- * unused    : 34 bits 
+ * unused    : 2  bits 
+ * stat      : 3  bit  ([0 -   8] stats)
  * device    : 7  bits ([0 - 127] devices)
  * qlmask    : 2  bits (qualifier mask)
- * nameid    : 21: bits (roughly > 2 million event names)
+ * nameid    : 18: bits (roughly > 262 Thousand event names)
  */
-#define EVENTS_WIDTH (sizeof(uint64_t) * 8)
+#define EVENTS_WIDTH (sizeof(uint32_t) * 8)
+#define STAT_WIDTH   ( 3)
 #define DEVICE_WIDTH ( 7)
 #define QLMASK_WIDTH ( 2) 
-#define NAMEID_WIDTH (21)
-#define UNUSED_WIDTH (EVENTS_WIDTH - DEVICE_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH)
-#define DEVICE_SHIFT (EVENTS_WIDTH - UNUSED_WIDTH - DEVICE_WIDTH)
+#define NAMEID_WIDTH (18)
+#define UNUSED_WIDTH (EVENTS_WIDTH - DEVICE_WIDTH - QLMASK_WIDTH - NAMEID_WIDTH - STAT_WIDTH)
+#define STAT_SHIFT   (EVENTS_WIDTH - UNUSED_WIDTH - STAT_WIDTH)
+#define DEVICE_SHIFT (EVENTS_WIDTH - UNUSED_WIDTH - STAT_WIDTH - DEVICE_WIDTH)
 #define QLMASK_SHIFT (DEVICE_SHIFT - QLMASK_WIDTH)
 #define NAMEID_SHIFT (QLMASK_SHIFT - NAMEID_WIDTH)
-#define DEVICE_MASK  ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)
-#define QLMASK_MASK  ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)
-#define NAMEID_MASK  ((0xFFFFFFFFFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)
+#define STAT_MASK    ((0xFFFFFFFF >> (EVENTS_WIDTH - STAT_WIDTH)) << STAT_SHIFT)
+#define DEVICE_MASK  ((0xFFFFFFFF >> (EVENTS_WIDTH - DEVICE_WIDTH)) << DEVICE_SHIFT)
+#define QLMASK_MASK  ((0xFFFFFFFF >> (EVENTS_WIDTH - QLMASK_WIDTH)) << QLMASK_SHIFT)
+#define NAMEID_MASK  ((0xFFFFFFFF >> (EVENTS_WIDTH - NAMEID_WIDTH)) << NAMEID_SHIFT)
+#define STAT_FLAG    (0x2)
 #define DEVICE_FLAG  (0x1)
 
-typedef struct byte_array_s         byte_array_t;
-typedef struct cuptip_gpu_state_s   cuptip_gpu_state_t;
-typedef struct NVPA_MetricsContext  NVPA_MetricsContext;
+#define NUM_STATS_QUALS 7
+char stats[NUM_STATS_QUALS][PAPI_MIN_STR_LEN] = {"avg", "sum", "min", "max", "max_rate", "pct", "ratio"};
 
 typedef struct {
+    int stat;
     int device;
     int flags;
     int nameid;
 } event_info_t;
 
-
-struct byte_array_s {
+typedef struct byte_array_s {
     int      size;
     uint8_t *data;
-};
+} byte_array_t;
 
-struct cuptip_gpu_state_s {
-    int                    gpu_id;
+typedef struct cuptip_gpu_state_s {
+    int                    dev_id;
     cuptiu_event_table_t  *added_events;
-    int                    rmr_count;
-    NVPA_RawMetricRequest *rmr;
-    MCCP_t                *pmetricsContextCreateParams;
-    byte_array_t           counterDataImagePrefix;
-    byte_array_t           configImage;
-    byte_array_t           counterDataImage;
-    byte_array_t           counterDataScratchBuffer;
-    byte_array_t           counterAvailabilityImage;
-    CUpti_Profiler_CounterDataImageOptions counterDataImageOptions;
-    CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams;
-    CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams;
-};
+    int                   numberOfRawMetricRequests;
+    NVPA_RawMetricRequest *rawMetricRequests;
+    byte_array_t          counterDataPrefixImage;
+    byte_array_t          configImage;
+    byte_array_t          counterDataImage;
+    byte_array_t          counterDataScratchBuffer;
+    byte_array_t          counterAvailabilityImage;
+} cuptip_gpu_state_t;
 
 struct cuptip_control_s {
     cuptip_gpu_state_t *gpu_ctl;
@@ -86,106 +87,10 @@ struct cuptip_control_s {
 };
 
 static void *dl_nvpw;
-static int num_gpus;
-static gpu_record_t *avail_gpu_info;
-
-/* main event table to store metrics */
+static int numDevicesOnMachine;
 static cuptiu_event_table_t *cuptiu_table_p;
 
-/* load and unload cuda function pointers */
-static int load_cupti_perf_sym(void);
-static int unload_cupti_perf_sym(void);
-
-/* load and unload nvperf function pointers */
-static int load_nvpw_sym(void);
-static int unload_nvpw_sym(void);
-
-/* utility functions to initialize API's such as cupti and perfworks */
-static int initialize_cupti_profiler_api(void);
-static int deinitialize_cupti_profiler_api(void);
-static int initialize_perfworks_api(void);
-
-/* utility functions to init metrics and cuda native event table */
-static int init_all_metrics(void);
-static int init_main_htable(void);
-static int init_event_table(void);
-static int shutdown_event_table(void);
-static void free_all_enumerated_metrics(void);
-
-/* functions to handle contexts */
-static int nvpw_cuda_metricscontext_create(cuptip_control_t state);
-static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state);
-
-/* funtions for config images */
-static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl);
-static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl);
-static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl);
-static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl);
-
-/* functions to set up profiling and end profiling */
-static int begin_profiling(cuptip_gpu_state_t *gpu_ctl);
-static int end_profiling(cuptip_gpu_state_t *gpu_ctl);
-
-/* NVIDIA chip functions */
-static int get_chip_name(int dev_num, char* chipName);
-static int find_same_chipname(int gpu_id);
-
-/* functions to check if a cuda native event requires multiple passes */
-static int check_multipass(cuptip_control_t state);
-static int calculate_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count,
-                                NVPA_RawMetricRequest *rmr, int *num_pass);
-
-/* functions to set and get cuda native event info  or convert cuda native events  */
-static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name, int gpu_id);
-static int verify_events(uint64_t *events_id, int num_events, cuptip_control_t state);
-static int evt_id_to_info(uint64_t event_id, event_info_t *info);
-static int evt_id_create(event_info_t *info, uint64_t *event_id);
-static int evt_code_to_name(uint64_t event_code, char *name, int len);
-static int evt_name_to_basename(const char *name, char *base, int len);
-static int evt_name_to_device(const char *name, int *device);
-static int retrieve_metric_descr( NVPA_MetricsContext *pMetricsContext, const char *evt_name,
-                                  char *description, const char *chip_name );
-static int retrieve_metric_rmr( NVPA_MetricsContext *pMetricsContext, const char *evt_name,
-                                int *numDep, NVPA_RawMetricRequest **pRMR );
-
-/* misc */
-static int get_event_collection_method(const char *evt_name);
-static int get_added_events_rmr(cuptip_gpu_state_t *gpu_ctl);
-static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl);
-static int get_measured_values(cuptip_gpu_state_t *gpu_ctl, long long *counts);
-
-/* nvperf function pointers */
-NVPA_Status ( *NVPW_GetSupportedChipNamesPtr ) (NVPW_GetSupportedChipNames_Params* params);
-NVPA_Status ( *NVPW_CUDA_MetricsContext_CreatePtr ) (NVPW_CUDA_MetricsContext_Create_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_DestroyPtr ) (NVPW_MetricsContext_Destroy_Params * params);
-NVPA_Status ( *NVPW_MetricsContext_GetMetricNames_BeginPtr ) (NVPW_MetricsContext_GetMetricNames_Begin_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_GetMetricNames_EndPtr ) (NVPW_MetricsContext_GetMetricNames_End_Params* params);
-NVPA_Status ( *NVPW_InitializeHostPtr ) (NVPW_InitializeHost_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_GetMetricProperties_BeginPtr ) (NVPW_MetricsContext_GetMetricProperties_Begin_Params* p);
-NVPA_Status ( *NVPW_MetricsContext_GetMetricProperties_EndPtr ) (NVPW_MetricsContext_GetMetricProperties_End_Params* p);
-NVPA_Status ( *NVPW_CUDA_RawMetricsConfig_CreatePtr ) (NVPW_CUDA_RawMetricsConfig_Create_Params*);
-NVPA_Status ( *NVPW_RawMetricsConfig_DestroyPtr ) (NVPW_RawMetricsConfig_Destroy_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_BeginPassGroupPtr ) (NVPW_RawMetricsConfig_BeginPassGroup_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_EndPassGroupPtr ) (NVPW_RawMetricsConfig_EndPassGroup_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_AddMetricsPtr ) (NVPW_RawMetricsConfig_AddMetrics_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_GenerateConfigImagePtr ) (NVPW_RawMetricsConfig_GenerateConfigImage_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_GetConfigImagePtr ) (NVPW_RawMetricsConfig_GetConfigImage_Params* params);
-NVPA_Status ( *NVPW_CounterDataBuilder_CreatePtr ) (NVPW_CounterDataBuilder_Create_Params* params);
-NVPA_Status ( *NVPW_CounterDataBuilder_DestroyPtr ) (NVPW_CounterDataBuilder_Destroy_Params* params);
-NVPA_Status ( *NVPW_CounterDataBuilder_AddMetricsPtr ) (NVPW_CounterDataBuilder_AddMetrics_Params* params);
-NVPA_Status ( *NVPW_CounterDataBuilder_GetCounterDataPrefixPtr ) (NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* params);
-NVPA_Status ( *NVPW_CounterData_GetNumRangesPtr ) (NVPW_CounterData_GetNumRanges_Params* params);
-NVPA_Status ( *NVPW_Profiler_CounterData_GetRangeDescriptionsPtr ) (NVPW_Profiler_CounterData_GetRangeDescriptions_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_SetCounterDataPtr ) (NVPW_MetricsContext_SetCounterData_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_EvaluateToGpuValuesPtr ) (NVPW_MetricsContext_EvaluateToGpuValues_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_GetNumPassesPtr ) (NVPW_RawMetricsConfig_GetNumPasses_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_SetCounterAvailabilityPtr ) (NVPW_RawMetricsConfig_SetCounterAvailability_Params* params);
-NVPA_Status ( *NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr ) (NVPW_RawMetricsConfig_IsAddMetricsPossible_Params* params);
-NVPA_Status ( *NVPW_MetricsContext_GetCounterNames_BeginPtr ) (NVPW_MetricsContext_GetCounterNames_Begin_Params* pParams);
-NVPA_Status ( *NVPW_MetricsContext_GetCounterNames_EndPtr ) (NVPW_MetricsContext_GetCounterNames_End_Params* pParams);
-
-/* cupti function pointers */
-CUptiResult ( *cuptiDeviceGetChipNamePtr ) (CUpti_Device_GetChipName_Params* params);
+// Cupti Profiler API function pointers //
 CUptiResult ( *cuptiProfilerInitializePtr ) (CUpti_Profiler_Initialize_Params* params);
 CUptiResult ( *cuptiProfilerDeInitializePtr ) (CUpti_Profiler_DeInitialize_Params* params);
 CUptiResult ( *cuptiProfilerCounterDataImageCalculateSizePtr ) (CUpti_Profiler_CounterDataImage_CalculateSize_Params* params);
@@ -206,6 +111,120 @@ CUptiResult ( *cuptiProfilerEndSessionPt
 CUptiResult ( *cuptiProfilerGetCounterAvailabilityPtr ) (CUpti_Profiler_GetCounterAvailability_Params* params);
 CUptiResult ( *cuptiFinalizePtr ) (void);
 
+// Function wrappers for the Cupti Profiler API //
+static int initialize_cupti_profiler_api(void);
+static int deinitialize_cupti_profiler_api(void);
+static int enable_profiling(void);
+static int begin_pass(void);
+static int end_pass(void);
+static int push_range(const char *pRangeName);
+static int pop_range(void);
+static int flush_data(void);
+static int disable_profiling(void);
+static int unset_config(void);
+static int end_session(void);
+
+// Perfworks API function pointers //
+// Initialize
+NVPA_Status ( *NVPW_InitializeHostPtr ) (NVPW_InitializeHost_Params* params);
+// Enumeration
+NVPA_Status ( *NVPW_MetricsEvaluator_GetMetricNamesPtr ) (NVPW_MetricsEvaluator_GetMetricNames_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetSupportedSubmetricsPtr ) (NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetCounterPropertiesPtr ) (NVPW_MetricsEvaluator_GetCounterProperties_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetRatioMetricPropertiesPtr ) (NVPW_MetricsEvaluator_GetRatioMetricProperties_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetThroughputMetricPropertiesPtr ) (NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetMetricDimUnitsPtr ) (NVPW_MetricsEvaluator_GetMetricDimUnits_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_DimUnitToStringPtr ) (NVPW_MetricsEvaluator_DimUnitToString_Params* pParams);
+// Configuration
+NVPA_Status ( *NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequestPtr ) (NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_GetMetricRawDependenciesPtr ) (NVPW_MetricsEvaluator_GetMetricRawDependencies_Params* pParams);
+NVPA_Status ( *NVPW_CUDA_RawMetricsConfig_Create_V2Ptr ) (NVPW_CUDA_RawMetricsConfig_Create_V2_Params* pParams);
+NVPA_Status ( *NVPW_RawMetricsConfig_GenerateConfigImagePtr ) (NVPW_RawMetricsConfig_GenerateConfigImage_Params* params);
+NVPA_Status ( *NVPW_RawMetricsConfig_GetConfigImagePtr ) (NVPW_RawMetricsConfig_GetConfigImage_Params* params);
+NVPA_Status ( *NVPW_CounterDataBuilder_CreatePtr ) (NVPW_CounterDataBuilder_Create_Params* params);
+NVPA_Status ( *NVPW_CounterDataBuilder_AddMetricsPtr ) (NVPW_CounterDataBuilder_AddMetrics_Params* params);
+NVPA_Status ( *NVPW_CounterDataBuilder_GetCounterDataPrefixPtr ) (NVPW_CounterDataBuilder_GetCounterDataPrefix_Params* params);
+NVPA_Status ( *NVPW_CUDA_CounterDataBuilder_CreatePtr ) (NVPW_CUDA_CounterDataBuilder_Create_Params* pParams);
+NVPA_Status ( *NVPW_RawMetricsConfig_SetCounterAvailabilityPtr ) (NVPW_RawMetricsConfig_SetCounterAvailability_Params* params);
+// Evaluation
+NVPA_Status ( *NVPW_MetricsEvaluator_SetDeviceAttributesPtr ) (NVPW_MetricsEvaluator_SetDeviceAttributes_Params* pParams);
+NVPA_Status ( *NVPW_MetricsEvaluator_EvaluateToGpuValuesPtr ) (NVPW_MetricsEvaluator_EvaluateToGpuValues_Params* pParams);
+// Used in both enumeration and evaluation
+NVPA_Status ( *NVPW_CUDA_MetricsEvaluator_InitializePtr ) (NVPW_CUDA_MetricsEvaluator_Initialize_Params* pParams);
+NVPA_Status ( *NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr ) (NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params* pParams);
+NVPA_Status ( *NVPW_RawMetricsConfig_GetNumPassesPtr ) (NVPW_RawMetricsConfig_GetNumPasses_Params* params);
+NVPA_Status ( *NVPW_RawMetricsConfig_BeginPassGroupPtr ) (NVPW_RawMetricsConfig_BeginPassGroup_Params* params);
+NVPA_Status ( *NVPW_RawMetricsConfig_EndPassGroupPtr ) (NVPW_RawMetricsConfig_EndPassGroup_Params* params);
+NVPA_Status ( *NVPW_RawMetricsConfig_AddMetricsPtr ) (NVPW_RawMetricsConfig_AddMetrics_Params* params);
+// Destroy
+NVPA_Status ( *NVPW_RawMetricsConfig_DestroyPtr ) (NVPW_RawMetricsConfig_Destroy_Params* params);
+NVPA_Status ( *NVPW_CounterDataBuilder_DestroyPtr ) (NVPW_CounterDataBuilder_Destroy_Params* params);
+NVPA_Status ( *NVPW_MetricsEvaluator_DestroyPtr ) (NVPW_MetricsEvaluator_Destroy_Params* pParams);
+// Misc.
+NVPA_Status ( *NVPW_GetSupportedChipNamesPtr ) (NVPW_GetSupportedChipNames_Params* params);
+
+// Helper functions for the MetricsEvaluator API //
+// Initialize
+static int initialize_perfworks_api(void);
+// Enumeration
+static int enumerate_metrics_for_unique_devices(const char *pChipName, int *totalNumMetrics, char ***arrayOfMetricNames);
+static int get_rollup_metrics(NVPW_RollupOp rollupMetric, char **strRollupMetric);
+static int get_supported_submetrics(NVPW_Submetric subMetric, char **strSubMetric);
+static int get_metric_properties(const char *pChipName, const char *metricName, char *fullMetricDescription);
+static int get_number_of_passes_for_info(const char *pChipName, NVPW_MetricsEvaluator *pMetricsEvaluator, NVPW_MetricEvalRequest *metricEvalRequest, int *numOfPasses);
+// Configuration
+static int get_metric_eval_request(NVPW_MetricsEvaluator *metricEvaluator, const char *metricName, NVPW_MetricEvalRequest *pMetricEvalRequest);
+static int create_raw_metric_requests(NVPW_MetricsEvaluator *pMetricsEvaluator, NVPW_MetricEvalRequest *metricEvalRequest, NVPA_RawMetricRequest **rawMetricRequests, int *rawMetricRequestsCount);
+// Metric Evaluation
+static int get_number_of_passes_for_eventsets(const char *pChipName, const char *metricName, int *numOfPasses);
+static int get_evaluated_metric_values(NVPW_MetricsEvaluator *pMetricsEvaluator, cuptip_gpu_state_t *gpu_ctl, long long *evaluatedMetricValues);
+// Destroy MetricsEvaluator
+static int destroy_metrics_evaluator(NVPW_MetricsEvaluator *pMetricsEvaluator);
+
+// Helper functions for profiling //
+static int start_profiling_session(byte_array_t counterDataImage, byte_array_t counterDataScratchBufferSize, byte_array_t configImage);
+static int end_profiling_session(void);
+static int get_config_image(const char *chipName, const uint8_t *pCounterAvailabilityImageData, NVPA_RawMetricRequest *rawMetricRequests, int rmr_count, byte_array_t *configImage);
+static int get_counter_data_prefix_image(const char *chipName, NVPA_RawMetricRequest *rawMetricRequests, int rmr_count, byte_array_t *counterDataPrefixImage);
+static int get_counter_data_image(byte_array_t counterDataPrefixImage, byte_array_t *counterDataScratchBuffer, byte_array_t *counterDataImage);
+static int get_event_collection_method(const char *evt_name);
+static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl);
+static void free_and_reset_configuration_images(cuptip_gpu_state_t *gpu_ctl);
+
+// Functions related to Cuda component hash tables
+static int init_main_htable(void);
+static int init_event_table(void);
+static void shutdown_event_table(void);
+static void shutdown_event_stats_table(void);
+
+// Functions related to NVIDIA device chips
+static int assign_chipnames_for_a_device_index(void);
+static int find_same_chipname(int dev_id);
+
+// Functions related to the native event interface
+static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name, int dev_id);
+static int verify_user_added_events(uint32_t *events_id, int num_events, cuptip_control_t state);
+static int evt_id_to_info(uint32_t event_id, event_info_t *info);
+static int evt_id_create(event_info_t *info, uint32_t *event_id);
+static int evt_code_to_name(uint32_t event_code, char *name, int len);
+static int evt_name_to_basename(const char *name, char *base, int len);
+static int evt_name_to_device(const char *name, int *device, const char *base);
+static int evt_name_to_stat(const char *name, int *stat, const char *base);
+static int cuda_verify_no_repeated_qualifiers(const char *eventName);
+static int cuda_verify_qualifiers(int flag, char *qualifierName, int equalitySignPosition, int *qualifierValue);
+
+// Functions related to the stats qualifier
+static int restructure_event_name(const char *input, char *output, char *base, char *stat);
+static int is_stat(const char *token);
+
+// Functions related to a partially disabled Cuda component
+static int determine_dev_cc_major(int dev_id);
+
+// Load and unload function pointers
+static int load_cupti_perf_sym(void);
+static int unload_cupti_perf_sym(void);
+static int load_nvpw_sym(void);
+static int unload_nvpw_sym(void);
 
 /** @class load_cupti_perf_sym
   * @brief Load cupti functions and assign to function pointers.
@@ -213,13 +232,11 @@ CUptiResult ( *cuptiFinalizePtr ) (void)
 static int load_cupti_perf_sym(void)
 {
     COMPDBG("Entering.\n");
-    int papi_errno = PAPI_OK;
     if (dl_cupti == NULL) {
         ERRDBG("libcupti.so should already be loaded.\n");
-        goto fn_fail;
+        return PAPI_EMISC;
     }
 
-    cuptiDeviceGetChipNamePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiDeviceGetChipName");
     cuptiProfilerInitializePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerInitialize");
     cuptiProfilerDeInitializePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerDeInitialize");
     cuptiProfilerCounterDataImageCalculateSizePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerCounterDataImageCalculateSize");
@@ -240,11 +257,7 @@ static int load_cupti_perf_sym(void)
     cuptiProfilerGetCounterAvailabilityPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiProfilerGetCounterAvailability");
     cuptiFinalizePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiFinalize");
 
-fn_exit:
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
+    return PAPI_OK;
 }
 
 /** @class unload_cupti_perf_sym
@@ -256,7 +269,6 @@ static int unload_cupti_perf_sym(void)
         dlclose(dl_cupti);
         dl_cupti = NULL;
     }
-    cuptiDeviceGetChipNamePtr                                  = NULL;
     cuptiProfilerInitializePtr                                 = NULL;
     cuptiProfilerDeInitializePtr                               = NULL;
     cuptiProfilerCounterDataImageCalculateSizePtr              = NULL;
@@ -279,87 +291,85 @@ static int unload_cupti_perf_sym(void)
     return PAPI_OK;
 }
 
-
 /**@class load_nvpw_sym
- * @brief Search for libnvperf_host.so. Order of search is outlined below.
+ * @brief Search for a variation of the shared object libnvperf_host.
+ *        Order of search is outlined below.
  *
  * 1. If a user sets PAPI_CUDA_PERFWORKS, this will take precedent over
  *    the options listed below to be searched.
- * 2. If we fail to collect libnvperf_host.so from PAPI_CUDA_PERFWORKS or it is not set,
- *    we will search the path defined with PAPI_CUDA_ROOT; as this is supposed to always be set.
- * 3. If we fail to collect libnvperf_host.so from steps 1 and 2, then we will search the linux
- *    default directories listed by /etc/ld.so.conf. As a note, updating the LD_LIBRARY_PATH is
- *    advised for this option.
- * 4. We use dlopen to search for libnvperf_host.so.
- *    If this fails, then we failed to find libnvperf_host.so.
+ * 2. If we fail to collect a variation of the shared object libnvperf_host from
+ *    PAPI_CUDA_PERFWORKS or it is not set, we will search the path defined with PAPI_CUDA_ROOT;
+ *    as this is supposed to always be set.
+ * 3. If we fail to collect a variation of the shared object libnvperf_host from steps 1 and 2,
+ *    then we will search the linux default directories listed by /etc/ld.so.conf. As a note,
+ *    updating the LD_LIBRARY_PATH is advised for this option.
+ * 4. We use dlopen to search for a variation of the shared object libnvperf_host.
+ *    If this fails, then we failed to find a variation of the shared object libnvperf_host.
  */
 static int load_nvpw_sym(void)
 {
-    COMPDBG("Entering.\n");
-    char dlname[] = "libnvperf_host.so";
-    char lookup_path[PATH_MAX];
+    int soNamesToSearchCount = 3;
+    const char *soNamesToSearchFor[] = {"libnvperf_host.so", "libnvperf_host.so.1", "libnvperf_host"};
 
-    /* search PAPI_CUDA_PERFWORKS for libnvperf_host.so (takes precedent over PAPI_CUDA_ROOT) */
+    // If a user set PAPI_CUDA_PERFWORKS with a path, then search it for the shared object (takes precedent over PAPI_CUDA_ROOT)
     char *papi_cuda_perfworks = getenv("PAPI_CUDA_PERFWORKS");
     if (papi_cuda_perfworks) {
-        sprintf(lookup_path, "%s/%s", papi_cuda_perfworks, dlname);
-        dl_nvpw = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
+        dl_nvpw = search_and_load_shared_objects(papi_cuda_perfworks, NULL, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    const char *standard_paths[] = {
-        "%s/extras/CUPTI/lib64/%s",
-        "%s/lib64/%s",
-        NULL,
-    };
-
-    /* search PAPI_CUDA_ROOT for libnvperf_host.so */
+    char *soMainName = "libnvperf_host";
+    // If a user set PAPI_CUDA_ROOT with a path and we did not already find the shared object, then search it for the shared object
     char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
     if (papi_cuda_root && !dl_nvpw) {
-        dl_nvpw = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
+          dl_nvpw = search_and_load_shared_objects(papi_cuda_root, soMainName, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    /* search linux default directories for libnvperf_host.so */
-    if (linked_cudart_path && !dl_nvpw) {
-        dl_nvpw = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
-    }
-
-    /* last ditch effort to find libcupti.so */
+    // Last ditch effort to find a variation of libnvperf_host, see dlopen manpages for how search occurs
     if (!dl_nvpw) {
-        dl_nvpw = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
+        dl_nvpw = search_and_load_from_system_paths(soNamesToSearchFor, soNamesToSearchCount);
         if (!dl_nvpw) {
             ERRDBG("Loading libnvperf_host.so failed.\n");
             goto fn_fail;
         }
     }
 
-    NVPW_GetSupportedChipNamesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_GetSupportedChipNames");
-    NVPW_CUDA_MetricsContext_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_MetricsContext_Create");
-    NVPW_MetricsContext_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_Destroy");
-    NVPW_MetricsContext_GetMetricNames_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricNames_Begin");
-    NVPW_MetricsContext_GetMetricNames_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricNames_End");
+    // Initialize
     NVPW_InitializeHostPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_InitializeHost");
-    NVPW_MetricsContext_GetMetricProperties_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricProperties_Begin");
-    NVPW_MetricsContext_GetMetricProperties_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetMetricProperties_End");
-    NVPW_CUDA_RawMetricsConfig_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_RawMetricsConfig_Create");
-    NVPW_RawMetricsConfig_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_Destroy");
-    NVPW_RawMetricsConfig_BeginPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_BeginPassGroup");
-    NVPW_RawMetricsConfig_EndPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_EndPassGroup");
-    NVPW_RawMetricsConfig_AddMetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_AddMetrics");
+    // Enumeration
+    NVPW_MetricsEvaluator_GetMetricNamesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetMetricNames");
+    NVPW_MetricsEvaluator_GetSupportedSubmetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetSupportedSubmetrics");
+    NVPW_MetricsEvaluator_GetCounterPropertiesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetCounterProperties");
+    NVPW_MetricsEvaluator_GetRatioMetricPropertiesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetRatioMetricProperties");
+    NVPW_MetricsEvaluator_GetThroughputMetricPropertiesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetThroughputMetricProperties");
+    NVPW_MetricsEvaluator_GetMetricDimUnitsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetMetricDimUnits");
+    NVPW_MetricsEvaluator_DimUnitToStringPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_DimUnitToString"); 
+    // Configuration
+    NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequestPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest");
+    NVPW_MetricsEvaluator_GetMetricRawDependenciesPtr =  DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_GetMetricRawDependencies");
+    NVPW_CUDA_RawMetricsConfig_Create_V2Ptr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_RawMetricsConfig_Create_V2");
     NVPW_RawMetricsConfig_GenerateConfigImagePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GenerateConfigImage");
     NVPW_RawMetricsConfig_GetConfigImagePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GetConfigImage");
     NVPW_CounterDataBuilder_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_Create");
-    NVPW_CounterDataBuilder_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_Destroy");
     NVPW_CounterDataBuilder_AddMetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_AddMetrics");
     NVPW_CounterDataBuilder_GetCounterDataPrefixPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_GetCounterDataPrefix");
-    NVPW_CounterData_GetNumRangesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterData_GetNumRanges");
-    NVPW_Profiler_CounterData_GetRangeDescriptionsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_Profiler_CounterData_GetRangeDescriptions");
-    NVPW_MetricsContext_SetCounterDataPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_SetCounterData");
-    NVPW_MetricsContext_EvaluateToGpuValuesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_EvaluateToGpuValues");
+    NVPW_CUDA_CounterDataBuilder_CreatePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_CounterDataBuilder_Create");
+    NVPW_RawMetricsConfig_SetCounterAvailabilityPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_SetCounterAvailability"); 
+    // Evaluation
+    NVPW_MetricsEvaluator_SetDeviceAttributesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_SetDeviceAttributes");
+    NVPW_MetricsEvaluator_EvaluateToGpuValuesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_EvaluateToGpuValues");
+    // Used in both enumeration and evaluation
+    NVPW_CUDA_MetricsEvaluator_InitializePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_MetricsEvaluator_Initialize");
+    NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr  = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize");
     NVPW_RawMetricsConfig_GetNumPassesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_GetNumPasses");
-    NVPW_RawMetricsConfig_SetCounterAvailabilityPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_SetCounterAvailability");
-    NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_IsAddMetricsPossible");
-    NVPW_MetricsContext_GetCounterNames_BeginPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetCounterNames_Begin");
-    NVPW_MetricsContext_GetCounterNames_EndPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsContext_GetCounterNames_End");
+    NVPW_RawMetricsConfig_BeginPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_BeginPassGroup");
+    NVPW_RawMetricsConfig_EndPassGroupPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_EndPassGroup");
+    NVPW_RawMetricsConfig_AddMetricsPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_AddMetrics");
+    // Destroy
+    NVPW_RawMetricsConfig_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_RawMetricsConfig_Destroy");
+    NVPW_CounterDataBuilder_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_CounterDataBuilder_Destroy");
+    NVPW_MetricsEvaluator_DestroyPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_MetricsEvaluator_Destroy");
+    // Misc.
+    NVPW_GetSupportedChipNamesPtr = DLSYM_AND_CHECK(dl_nvpw, "NVPW_GetSupportedChipNames");
 
     Dl_info info;
     dladdr(NVPW_GetSupportedChipNamesPtr, &info);
@@ -378,1022 +388,183 @@ static int unload_nvpw_sym(void)
         dlclose(dl_nvpw);
         dl_nvpw = NULL;
     }
-    NVPW_GetSupportedChipNamesPtr                     = NULL;
-    NVPW_CUDA_MetricsContext_CreatePtr                = NULL;
-    NVPW_MetricsContext_DestroyPtr                    = NULL;
-    NVPW_MetricsContext_GetMetricNames_BeginPtr       = NULL;
-    NVPW_MetricsContext_GetMetricNames_EndPtr         = NULL;
-    NVPW_InitializeHostPtr                            = NULL;
-    NVPW_MetricsContext_GetMetricProperties_BeginPtr  = NULL;
-    NVPW_MetricsContext_GetMetricProperties_EndPtr    = NULL;
-    NVPW_CUDA_RawMetricsConfig_CreatePtr              = NULL;
-    NVPW_RawMetricsConfig_DestroyPtr                  = NULL;
-    NVPW_RawMetricsConfig_BeginPassGroupPtr           = NULL;
-    NVPW_RawMetricsConfig_EndPassGroupPtr             = NULL;
-    NVPW_RawMetricsConfig_AddMetricsPtr               = NULL;
-    NVPW_RawMetricsConfig_GenerateConfigImagePtr      = NULL;
-    NVPW_RawMetricsConfig_GetConfigImagePtr           = NULL;
-    NVPW_CounterDataBuilder_CreatePtr                 = NULL;
-    NVPW_CounterDataBuilder_DestroyPtr                = NULL;
-    NVPW_CounterDataBuilder_AddMetricsPtr             = NULL;
-    NVPW_CounterDataBuilder_GetCounterDataPrefixPtr   = NULL;
-    NVPW_CounterData_GetNumRangesPtr                  = NULL;
-    NVPW_Profiler_CounterData_GetRangeDescriptionsPtr = NULL;
-    NVPW_MetricsContext_SetCounterDataPtr             = NULL;
-    NVPW_MetricsContext_EvaluateToGpuValuesPtr        = NULL;
-    NVPW_RawMetricsConfig_GetNumPassesPtr             = NULL;
-    NVPW_RawMetricsConfig_SetCounterAvailabilityPtr   = NULL;
-    NVPW_RawMetricsConfig_IsAddMetricsPossiblePtr     = NULL;
-    NVPW_MetricsContext_GetCounterNames_BeginPtr      = NULL;
-    NVPW_MetricsContext_GetCounterNames_EndPtr        = NULL;
-    return PAPI_OK;
-}
 
-/** @class initialize_cupti_profiler_api
-  * @brief Initialize the cupti profiler interface..
-*/
-static int initialize_cupti_profiler_api(void)
-{
-    COMPDBG("Entering.\n");
-    int papi_errno;
-    CUpti_Profiler_Initialize_Params profilerInitializeParams = { CUpti_Profiler_Initialize_Params_STRUCT_SIZE, NULL };
-    papi_errno = cuptiProfilerInitializePtr(&profilerInitializeParams);
-    if (papi_errno != CUPTI_SUCCESS) {
-        ERRDBG("CUPTI error %d: cuptiProfilerInitialize failed.\n", papi_errno);
-        return PAPI_EMISC;
-    }
-    return PAPI_OK;
-}
+    // Initialize
+    NVPW_InitializeHostPtr                                        = NULL;
+    // Enumeration
+    NVPW_MetricsEvaluator_GetMetricNamesPtr                       = NULL;
+    NVPW_MetricsEvaluator_GetSupportedSubmetricsPtr               = NULL;
+    NVPW_MetricsEvaluator_GetCounterPropertiesPtr                 = NULL;
+    NVPW_MetricsEvaluator_GetRatioMetricPropertiesPtr             = NULL;
+    NVPW_MetricsEvaluator_GetThroughputMetricPropertiesPtr        = NULL;
+    NVPW_MetricsEvaluator_GetMetricDimUnitsPtr                    = NULL;
+    NVPW_MetricsEvaluator_DimUnitToStringPtr                      = NULL;
+    // Configuration
+    NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequestPtr = NULL;
+    NVPW_MetricsEvaluator_GetMetricRawDependenciesPtr             = NULL;
+    NVPW_CUDA_RawMetricsConfig_Create_V2Ptr                       = NULL;
+    NVPW_RawMetricsConfig_GenerateConfigImagePtr                  = NULL;
+    NVPW_RawMetricsConfig_GetConfigImagePtr                       = NULL;
+    NVPW_CounterDataBuilder_CreatePtr                             = NULL;
+    NVPW_CounterDataBuilder_AddMetricsPtr                         = NULL;
+    NVPW_CounterDataBuilder_GetCounterDataPrefixPtr               = NULL;
+    NVPW_CUDA_CounterDataBuilder_CreatePtr                        = NULL;
+    NVPW_RawMetricsConfig_SetCounterAvailabilityPtr               = NULL;
+    // Evaluation
+    NVPW_MetricsEvaluator_SetDeviceAttributesPtr                  = NULL;
+    NVPW_MetricsEvaluator_EvaluateToGpuValuesPtr                  = NULL;
+    // Used in both enumeration and evaluation
+    NVPW_CUDA_MetricsEvaluator_InitializePtr                      = NULL;
+    NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr      = NULL;
+    NVPW_RawMetricsConfig_GetNumPassesPtr                         = NULL;
+    NVPW_RawMetricsConfig_BeginPassGroupPtr                       = NULL;
+    NVPW_RawMetricsConfig_EndPassGroupPtr                         = NULL;
+    NVPW_RawMetricsConfig_AddMetricsPtr                           = NULL;
+    // Destroy
+    NVPW_RawMetricsConfig_DestroyPtr                              = NULL;
+    NVPW_CounterDataBuilder_DestroyPtr                            = NULL;
+    NVPW_MetricsEvaluator_DestroyPtr                              = NULL;
+    // Misc.
+    NVPW_GetSupportedChipNamesPtr                                 = NULL;
 
-/** @class deinitialize_cupti_profiler_api
-  * @brief Deinitialize the cupti profiler interface.
-*/
-static int deinitialize_cupti_profiler_api(void)
-{
-    COMPDBG("Entering.\n");
-    int papi_errno;
-    CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = { CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE, NULL };
-    papi_errno = cuptiProfilerDeInitializePtr(&profilerDeInitializeParams);
-    if (papi_errno != CUPTI_SUCCESS) {
-        ERRDBG("CUPTI Error %d: cuptiProfilerDeInitialize failed.\n", papi_errno);
-        return PAPI_EMISC;
-    }
     return PAPI_OK;
 }
 
-
 /** @class initialize_perfworks_api
-  * @brief NVPW required initialization.
+  * @brief Initialize the Perfworks API.
 */
 static int initialize_perfworks_api(void)
 {
     COMPDBG("Entering.\n");
-    int papi_errno;
-    NVPW_InitializeHost_Params perfInitHostParams = { NVPW_InitializeHost_Params_STRUCT_SIZE, NULL };
-    papi_errno = NVPW_InitializeHostPtr(&perfInitHostParams);
-    if (papi_errno != NVPA_STATUS_SUCCESS) {
-        ERRDBG("NVPW Error %d: NVPW_InitializeHostPtr failed.\n", papi_errno);
-        return PAPI_EMISC;
-    }
-    return PAPI_OK;
-}
-
-static int get_chip_name(int dev_num, char* chipName)
-{
-    int papi_errno;
-    CUpti_Device_GetChipName_Params getChipName = {
-        .structSize = CUpti_Device_GetChipName_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .deviceIndex = 0
-    };
-    getChipName.deviceIndex = dev_num;
-    papi_errno = cuptiDeviceGetChipNamePtr(&getChipName);
-    if (papi_errno != CUPTI_SUCCESS) {
-        ERRDBG("CUPTI error %d: Failed to get chip name for device %d\n", papi_errno, dev_num);
-        return PAPI_EMISC;
-    }
-    strcpy(chipName, getChipName.pChipName);
-    return PAPI_OK;
-}
-
-/** @class get_added_events_rmr
-  * @brief For a Cuda native event name collect raw metrics and count
-  *        of raw metrics for collection. Raw Metrics are one layer of the Metric API
-  *        and contains the list of raw counters and generates configuration file
-  *        images. Must be done before creating a ConfigImage or 
-  *        CounterDataPrefix.
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int get_added_events_rmr(cuptip_gpu_state_t *gpu_ctl)
-{
-    COMPDBG("Entering.\n");
-    int gpu_id, num_dep, count_raw_metrics = 0, papi_errno = PAPI_OK;
-    int i, j, k;
-    NVPA_RawMetricRequest *all_rmr=NULL, *collect_rmr;
-    cuptiu_event_t *evt_rec;
 
-    /* for each event in the event table collect the raw metric requests */
-    for (i = 0; i < gpu_ctl->added_events->count; i++) {
-        papi_errno = retrieve_metric_rmr(
-                         gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
-                         gpu_ctl->added_events->cuda_evts[i], &num_dep, 
-                         &collect_rmr
-                     );
-        if (papi_errno != PAPI_OK) {
-            papi_errno = PAPI_ENOEVNT;
-            goto fn_exit;
-        }
-        all_rmr = (NVPA_RawMetricRequest *) papi_realloc(all_rmr, (count_raw_metrics + num_dep) * sizeof(NVPA_RawMetricRequest));
-        if (all_rmr == NULL) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_exit;
-        }
-        for (j = 0; j < num_dep; j++) {
-            k = j + count_raw_metrics;
-            all_rmr[k].structSize = collect_rmr[j].structSize;
-            all_rmr[k].pPriv = NULL;
-            all_rmr[k].pMetricName = strdup(collect_rmr[j].pMetricName);
-            all_rmr[k].keepInstances = 1;
-            all_rmr[k].isolated = 1;
-            papi_free((void *) collect_rmr[j].pMetricName);
-        }
-        count_raw_metrics += num_dep;
-        papi_free(collect_rmr);
-    } 
-    gpu_ctl->rmr = all_rmr;
-    gpu_ctl->rmr_count = count_raw_metrics;
-fn_exit:
-    return papi_errno;
-}
-
-/** @class calculate_num_passes
-  * @brief Calculate the numbers of passes for a Cuda native event.
-  * @param state
-*/
-static int calculate_num_passes(struct NVPA_RawMetricsConfig *pRawMetricsConfig, int rmr_count, NVPA_RawMetricRequest *rmr, int *num_pass)
-{
-    COMPDBG("Entering.\n");
-    int numNestingLevels = 1, numIsolatedPasses, numPipelinedPasses;
-    NVPA_Status nvpa_err;
-
-    /* NOTE: maxPassCount is not set here as we want to properly show the number of passes for
-             metrics that require multiple passes in papi_native_avail. */
-    /* instantiate a new struct to be passed to NVPW_RawMetricsConfig_BeginPassGroup_Params */
-    NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {
-        // [in]
-        .structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .pRawMetricsConfig = pRawMetricsConfig,
-    };
-    nvpa_err = NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-    
-    /* instantiate struct to be passed to NVPW_RawMetricsConfig_AddMetrics */
-    NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {
-        // [in]
-        .structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .pRawMetricsConfig = pRawMetricsConfig,
-        .pRawMetricRequests = rmr,
-        .numMetricRequests = rmr_count,
-    };
-    nvpa_err = NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-
-    /* instantiate a new struct to be passed to NVPW_RawMetricsConfig_EndPassGroup */
-    NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {
-        // [in]
-        .structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .pRawMetricsConfig = pRawMetricsConfig,
-    };
-    nvpa_err = NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-
-    /* instantiate a new struct to be passed to  NVPW_RawMetricsConfig_GetNumPasses_Params*/
-    NVPW_RawMetricsConfig_GetNumPasses_Params rawMetricsConfigGetNumPassesParams = {
-        // [in]
-       .structSize = NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE,
-       .pPriv = NULL, // assign to NULL
-       .pRawMetricsConfig = pRawMetricsConfig,
-    };
-    nvpa_err = NVPW_RawMetricsConfig_GetNumPassesPtr(&rawMetricsConfigGetNumPassesParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-
-    /* calculate numpass */
-    numIsolatedPasses  = rawMetricsConfigGetNumPassesParams.numIsolatedPasses;
-    numPipelinedPasses = rawMetricsConfigGetNumPassesParams.numPipelinedPasses;
-    *num_pass = numPipelinedPasses + numIsolatedPasses * numNestingLevels;
-    if (*num_pass > 1) {
-        ERRDBG("Metrics requested requires multiple passes to profile.\n");
-        return PAPI_EMULPASS;
-    }
+    NVPW_InitializeHost_Params perfInitHostParams = {NVPW_InitializeHost_Params_STRUCT_SIZE};
+    perfInitHostParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_InitializeHostPtr(&perfInitHostParams), return PAPI_EMISC );
 
     return PAPI_OK;
 }
 
-
-/** @class nvpw_cuda_metricscontext_create
-  * @brief Create a pMetricsContext.
-  *
-  * @param state
-  *     Struct that holds read count, running, cuptip_info_t, and cuptip_gpu_state_t.
-*/
-static int nvpw_cuda_metricscontext_create(cuptip_control_t state)
-{
-    int gpu_id, found, papi_errno = PAPI_OK;
-    MCCP_t *pMCCP;
-    NVPA_Status nvpa_err;
-    /* struct that holds gpu_id, rmr_count, configImage etc.
-       seee cuptip_gpu_state_s */
-    cuptip_gpu_state_t *gpu_ctl;
-
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
-        found = find_same_chipname(gpu_id);
-        if (found > -1) {
-            gpu_ctl->pmetricsContextCreateParams = state->gpu_ctl[found].pmetricsContextCreateParams;
-            continue;
-        }
-        /* struct that holds metadata for call to NVPW_CUDA_MetricsContext_CreatePtr 
-           this includes struct size and gpu chip name */
-        pMCCP = (MCCP_t *) papi_calloc( 1, sizeof(MCCP_t) );
-        /* see if struct allocated memory properly */
-        if (pMCCP == NULL) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_exit;
-        }
-        
-        /* setting metadata values */
-        pMCCP->structSize = NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE;
-        pMCCP->pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name;
-
-        /* create context */
-        nvpa_err = NVPW_CUDA_MetricsContext_CreatePtr(pMCCP);
-        if (nvpa_err != NVPA_STATUS_SUCCESS)
-            goto fn_fail ;
-
-        /* store created context in cuptip_control_t state */
-        gpu_ctl->pmetricsContextCreateParams = pMCCP;
-    }
-fn_exit:
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
-}
-
-/** @class nvpw_cuda_metricscontext_destroy
-  * @brief Destroy created context from nvpw_cuda_metricscontext_create.
-  *
-  * @param state
-  *     Struct that holds read count, running, cuptip_info_t, and cuptip_gpu_state_t.
-*/
-static int nvpw_cuda_metricscontext_destroy(cuptip_control_t state)
-{
-    int gpu_id, found, papi_errno = PAPI_OK;
-    cuptip_gpu_state_t *gpu_ctl;
-
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
-        found = find_same_chipname(gpu_id);
-        if (found > -1) {
-            gpu_ctl->pmetricsContextCreateParams = NULL;
-            continue;
-        }
-        if (gpu_ctl->pmetricsContextCreateParams->pMetricsContext) {
-            NVPW_MetricsContext_Destroy_Params mCDP = {
-                .structSize = NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE,
-                .pPriv = NULL,
-                .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
-            };
-            nvpwCheckErrors( NVPW_MetricsContext_DestroyPtr(&mCDP), goto fn_fail );
-            papi_free(gpu_ctl->pmetricsContextCreateParams);
-            gpu_ctl->pmetricsContextCreateParams = NULL;
-        }
-    }
-fn_exit:
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
-}
-
-/** @class check_multipass
-  * @brief Check to see if the Cuda native event is multi-pass. Multi-pass Cuda
-  *        native events (Numpass > 1), is not supported.
-  * @param state
-*/
-static int check_multipass(cuptip_control_t state)
-{
-    COMPDBG("Entering.\n");
-    int gpu_id, papi_errno, passes;
-    NVPA_Status nvpa_err;
-    cuptip_gpu_state_t *gpu_ctl;
-
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
-        if (gpu_ctl->added_events->count == 0) {
-            continue;
-        }
-
-        papi_errno = get_added_events_rmr(gpu_ctl);
-        if (papi_errno != PAPI_OK) {
-            goto fn_exit;
-        }
-
-        /* perfworks api: instantiate a new stuct to be passed to NVPW_CUDA_RawMetricsConfig_CreatePtr */ 
-        NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
-            .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
-            .pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name,
-        };
-        nvpa_err = NVPW_CUDA_RawMetricsConfig_CreatePtr(
-                       &nvpw_metricsConfigCreateParams
-                   );
-        if (nvpa_err != NVPA_STATUS_SUCCESS) {
-            goto fn_exit;
-        }
-
-        /* for an event, collect the number of passes to see if supported */
-        papi_errno = calculate_num_passes( nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-                                           gpu_ctl->rmr_count, gpu_ctl->rmr, &passes);
-        if ( papi_errno == PAPI_EMULPASS ) {
-        /* at this point we just want the number of passes (stored in passes) */
-        }
-
-        /* perfworks api: instantiate a new stuct to be passed to NVPW_CUDA_RawMetricsConfig_DestroyPtr */
-        NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
-            .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-        };
-        nvpa_err = NVPW_RawMetricsConfig_DestroyPtr(
-                       (NVPW_RawMetricsConfig_Destroy_Params *) 
-                       &rawMetricsConfigDestroyParams
-                   );
-        if (nvpa_err != NVPA_STATUS_SUCCESS) {
-            goto fn_fail;
-        }
-    }
-fn_exit:
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
-}
-
 /** @class get_counter_availability
   * @brief Query counter availability. Helps to filter unavailable raw metrics on host.
   * @param *gpu_ctl
   *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
+  *   dev_id, rawMetricRequests, numberOfRawMetricRequests, and more.
 */
 static int get_counter_availability(cuptip_gpu_state_t *gpu_ctl)
 {
-    int papi_errno;
-    /* Get size of counterAvailabilityImage - in first pass, GetCounterAvailability return size needed for data */
-    CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = {
-        .structSize = CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-        .pCounterAvailabilityImage = NULL,
-    };
-    papi_errno = cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams);
-    if (papi_errno != CUPTI_SUCCESS) {
-        ERRDBG("CUPTI error %d: Failed to get size.\n", papi_errno);
-        return PAPI_EMISC;
-    }
-    /* Allocate sized counterAvailabilityImage */
+    CUpti_Profiler_GetCounterAvailability_Params getCounterAvailabilityParams = {CUpti_Profiler_GetCounterAvailability_Params_STRUCT_SIZE};
+    getCounterAvailabilityParams.pPriv = NULL;
+    getCounterAvailabilityParams.ctx = NULL; // If NULL, the current CUcontext is used
+    getCounterAvailabilityParams.pCounterAvailabilityImage = NULL;
+    cuptiCheckErrors( cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams), return PAPI_EMISC );
+
+    // Allocate the necessary memory for data
     gpu_ctl->counterAvailabilityImage.size = getCounterAvailabilityParams.counterAvailabilityImageSize;
-    gpu_ctl->counterAvailabilityImage.data = (uint8_t *) papi_malloc(gpu_ctl->counterAvailabilityImage.size);
+    gpu_ctl->counterAvailabilityImage.data = (uint8_t *) malloc(gpu_ctl->counterAvailabilityImage.size);
     if (gpu_ctl->counterAvailabilityImage.data == NULL) {
-        return PAPI_ENOMEM;
-    }
-    /* Initialize counterAvailabilityImage */
-    getCounterAvailabilityParams.pCounterAvailabilityImage = gpu_ctl->counterAvailabilityImage.data;
-    papi_errno = cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams);
-    if (papi_errno != CUPTI_SUCCESS) {
-        ERRDBG("CUPTI error %d: Failed to get bytes.\n", papi_errno);
-        return PAPI_EMISC;
-    }
-    return PAPI_OK;
-}
-
-
-/** @class metric_get_config_image
-  * @brief Retrieves binary ConfigImage for the Cuda native event metrics listed 
-  *        for collection. The function get_added_events_rmr( ... ) must be 
-  *        called before this step is possible. 
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int metric_get_config_image(cuptip_gpu_state_t *gpu_ctl)
-{
-    COMPDBG("Entering.\n");
-    int gpu_id = gpu_ctl->gpu_id;
-
-    NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
-        .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
-        .pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name,
-    };
-    nvpwCheckErrors( NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams), goto fn_fail );
-
-    if( gpu_ctl->counterAvailabilityImage.data != NULL) {
-        NVPW_RawMetricsConfig_SetCounterAvailability_Params setCounterAvailabilityParams = {
-            .structSize = NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-            .pCounterAvailabilityImage = gpu_ctl->counterAvailabilityImage.data,
-        };
-        nvpwCheckErrors( NVPW_RawMetricsConfig_SetCounterAvailabilityPtr(&setCounterAvailabilityParams), goto fn_fail );
-    }
-
-    /* NOTE: maxPassCount is being set to 1 as a final safety net to limit metric collection to a single pass.
-             Metrics that require multiple passes would fail further down at AddMetrics due to this.
-             This failure should never occur as we filter for metrics with multiple passes at check_multipass,
-             which occurs before the metric_get_config_image call. */
-    NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {
-        .structSize = NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-        .maxPassCount = 1,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), goto fn_fail );
-
-    NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {
-        .structSize = NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-        .pRawMetricRequests = gpu_ctl->rmr,
-        .numMetricRequests = gpu_ctl->rmr_count,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), goto fn_fail );
-
-    NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {
-        .structSize = NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), goto fn_fail );
-
-    NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = {
-        .structSize = NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_GenerateConfigImagePtr(&generateConfigImageParams), goto fn_fail );
-
-    NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = {
-        .structSize = NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-        .bytesAllocated = 0,
-        .pBuffer = NULL,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), goto fn_fail );
-
-    gpu_ctl->configImage.size = getConfigImageParams.bytesCopied;
-    gpu_ctl->configImage.data = (uint8_t *) papi_calloc(gpu_ctl->configImage.size, sizeof(uint8_t));
-    if (gpu_ctl->configImage.data == NULL) {
-        ERRDBG("calloc gpu_ctl->configImage.data failed!");
-        return PAPI_ENOMEM;
-    }
-
-    getConfigImageParams.bytesAllocated = gpu_ctl->configImage.size;
-    getConfigImageParams.pBuffer = gpu_ctl->configImage.data;
-    nvpwCheckErrors( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), goto fn_fail );
-
-    NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
-        .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-    };
-    nvpwCheckErrors( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams), goto fn_fail );
-
-    return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
-}
-
-/** @class metric_get_counter_data_prefix_image
-  * @brief Retrieves binary CounterDataPrefix for the Cuda native event metrics 
-  *        listed for collection. The function get_added_events_rmr( ... ) 
-  *        must be called before this step is possible. 
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int metric_get_counter_data_prefix_image(cuptip_gpu_state_t *gpu_ctl)
-{
-    COMPDBG("Entering.\n");
-    int gpu_id = gpu_ctl->gpu_id;
-
-    NVPW_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = {
-        .structSize = NVPW_CounterDataBuilder_Create_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name,
-    };
-    nvpwCheckErrors( NVPW_CounterDataBuilder_CreatePtr(&counterDataBuilderCreateParams), goto fn_fail );
-
-    NVPW_CounterDataBuilder_AddMetrics_Params addMetricsParams = {
-        .structSize = NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
-        .pRawMetricRequests = gpu_ctl->rmr,
-        .numMetricRequests = gpu_ctl->rmr_count,
-    };
-    nvpwCheckErrors( NVPW_CounterDataBuilder_AddMetricsPtr(&addMetricsParams), goto fn_fail );
-
-    NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = {
-        .structSize = NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
-        .bytesAllocated = 0,
-        .pBuffer = NULL,
-    };
-    nvpwCheckErrors( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), goto fn_fail );
-
-    gpu_ctl->counterDataImagePrefix.size = getCounterDataPrefixParams.bytesCopied;
-    gpu_ctl->counterDataImagePrefix.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataImagePrefix.size, sizeof(uint8_t));
-    if (gpu_ctl->counterDataImagePrefix.data == NULL) {
-        ERRDBG("calloc gpu_ctl->counterDataImagePrefix.data failed!");
+        ERRDBG("Failed to allocate memory for counterAvailabilityImage.data.\n");
         return PAPI_ENOMEM;
     }
 
-    getCounterDataPrefixParams.bytesAllocated = gpu_ctl->counterDataImagePrefix.size;
-    getCounterDataPrefixParams.pBuffer = gpu_ctl->counterDataImagePrefix.data;
-    nvpwCheckErrors( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), goto fn_fail );
-
-    NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = {
-        .structSize = NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder,
-    };
-    nvpwCheckErrors( NVPW_CounterDataBuilder_DestroyPtr(&counterDataBuilderDestroyParams), goto fn_fail );
+    getCounterAvailabilityParams.pCounterAvailabilityImage = gpu_ctl->counterAvailabilityImage.data;
+    cuptiCheckErrors( cuptiProfilerGetCounterAvailabilityPtr(&getCounterAvailabilityParams), return PAPI_EMISC );
 
     return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
 }
 
-/** @class create_counter_data_image
-  * @brief Allocate space for values for each counter for each range and
-  *        calculate a scratch buffer size needed for internal operations. 
+/** @class free_and_reset_configuration_images
+  * @brief Free and reset the configuration images created in
+  *        cuptip_ctx_start.
   * @param *gpu_ctl
   *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
+  *   dev_id, rawMetricRequests, numberOfRawMetricRequests, and more.
 */
-static int create_counter_data_image(cuptip_gpu_state_t *gpu_ctl)
+void free_and_reset_configuration_images(cuptip_gpu_state_t *gpu_ctl)
 {
     COMPDBG("Entering.\n");
-    gpu_ctl->counterDataImageOptions = (CUpti_Profiler_CounterDataImageOptions) {
-        .structSize = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pCounterDataPrefix = gpu_ctl->counterDataImagePrefix.data,
-        .counterDataPrefixSize = gpu_ctl->counterDataImagePrefix.size,
-        .maxNumRanges = 1,
-        .maxNumRangeTreeNodes = 1,
-        .maxRangeNameLength = 64,
-    };
-
-    CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = {
-        .structSize = CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
-        .pOptions = &gpu_ctl->counterDataImageOptions,
-    };
-    cuptiCheckErrors( cuptiProfilerCounterDataImageCalculateSizePtr(&calculateSizeParams), goto fn_fail );
-
-    gpu_ctl->initializeParams = (CUpti_Profiler_CounterDataImage_Initialize_Params) {
-        .structSize = CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE,
-        .pOptions = &gpu_ctl->counterDataImageOptions,
-        .counterDataImageSize = calculateSizeParams.counterDataImageSize,
-    };
-
-    gpu_ctl->counterDataImage.size = calculateSizeParams.counterDataImageSize;
-    gpu_ctl->counterDataImage.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataImage.size, sizeof(uint8_t));
-    if (gpu_ctl->counterDataImage.data == NULL) {
-        ERRDBG("calloc gpu_ctl->counterDataImage.data failed!\n");
-        return PAPI_ENOMEM;
-    }
-
-    gpu_ctl->initializeParams.pCounterDataImage = gpu_ctl->counterDataImage.data;
-    cuptiCheckErrors( cuptiProfilerCounterDataImageInitializePtr(&gpu_ctl->initializeParams), goto fn_fail );
-
-    CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = {
-        .structSize = CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .counterDataImageSize = calculateSizeParams.counterDataImageSize,
-        .pCounterDataImage = gpu_ctl->initializeParams.pCounterDataImage,
-    };
-    cuptiCheckErrors( cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr(&scratchBufferSizeParams), goto fn_fail );
-
-    gpu_ctl->counterDataScratchBuffer.size = scratchBufferSizeParams.counterDataScratchBufferSize;
-    gpu_ctl->counterDataScratchBuffer.data = (uint8_t *) papi_calloc(gpu_ctl->counterDataScratchBuffer.size, sizeof(uint8_t));
-    if (gpu_ctl->counterDataScratchBuffer.data == NULL) {
-        ERRDBG("calloc gpu_ctl->counterDataScratchBuffer.data failed!\n");
-        return PAPI_ENOMEM;
-    }
+    // Note that you can find the memory allocation for the below variables
+    // in cuptip_ctx_start as of April 21st, 2025
+    free(gpu_ctl->configImage.data);
+    gpu_ctl->configImage.data = NULL;
+    gpu_ctl->configImage.size = 0;
 
-    gpu_ctl->initScratchBufferParams = (CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params) {
-        .structSize = CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .counterDataImageSize = calculateSizeParams.counterDataImageSize,
-        .pCounterDataImage = gpu_ctl->initializeParams.pCounterDataImage,
-        .counterDataScratchBufferSize = gpu_ctl->counterDataScratchBuffer.size,
-        .pCounterDataScratchBuffer = gpu_ctl->counterDataScratchBuffer.data,
-    };
-    cuptiCheckErrors( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&gpu_ctl->initScratchBufferParams), goto fn_fail );
+    free(gpu_ctl->counterDataPrefixImage.data);
+    gpu_ctl->counterDataPrefixImage.data = NULL;
+    gpu_ctl->counterDataPrefixImage.size = 0;
 
-    return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
-}
+    free(gpu_ctl->counterDataScratchBuffer.data);
+    gpu_ctl->counterDataScratchBuffer.data = NULL;
+    gpu_ctl->counterDataScratchBuffer.size = 0;
 
-/** @class reset_cupti_prof_config_image
-  * @brief Frees and resets variables for config image.. 
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int reset_cupti_prof_config_images(cuptip_gpu_state_t *gpu_ctl)
-{
-    COMPDBG("Entering.\n");
-    papi_free(gpu_ctl->counterDataImagePrefix.data);
-    papi_free(gpu_ctl->configImage.data);
-    papi_free(gpu_ctl->counterDataImage.data);
-    papi_free(gpu_ctl->counterDataScratchBuffer.data);
-    papi_free(gpu_ctl->counterAvailabilityImage.data);
-    gpu_ctl->counterDataImagePrefix.data = NULL;
-    gpu_ctl->configImage.data = NULL;
+    free(gpu_ctl->counterDataImage.data);
     gpu_ctl->counterDataImage.data = NULL;
-    gpu_ctl->counterDataScratchBuffer.data = NULL;
+    gpu_ctl->counterDataImage.size = 0; 
+    
+    free(gpu_ctl->counterAvailabilityImage.data);
     gpu_ctl->counterAvailabilityImage.data = NULL;
-    gpu_ctl->counterDataImagePrefix.size = 0;
-    gpu_ctl->configImage.size = 0;
-    gpu_ctl->counterDataImage.size = 0;
-    gpu_ctl->counterDataScratchBuffer.size = 0;
     gpu_ctl->counterAvailabilityImage.size = 0;
-    return PAPI_OK;
-}
-
-/** @class begin_profiling
-  * @brief Steps to setup profiling.
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int begin_profiling(cuptip_gpu_state_t *gpu_ctl)
-{
-    COMPDBG("Entering.\n");
-    byte_array_t *configImage = &(gpu_ctl->configImage);
-    byte_array_t *counterDataScratchBuffer = &(gpu_ctl->counterDataScratchBuffer);
-    byte_array_t *counterDataImage = &(gpu_ctl->counterDataImage);
-
-    CUpti_Profiler_BeginSession_Params beginSessionParams = {
-        .structSize = CUpti_Profiler_BeginSession_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-        .counterDataImageSize = counterDataImage->size,
-        .pCounterDataImage = counterDataImage->data,
-        .counterDataScratchBufferSize = counterDataScratchBuffer->size,
-        .pCounterDataScratchBuffer = counterDataScratchBuffer->data,
-        .range = CUPTI_UserRange,
-        .replayMode = CUPTI_UserReplay,
-        .maxRangesPerPass = 1,
-        .maxLaunchesPerPass = 1,
-    };
-    cuptiCheckErrors( cuptiProfilerBeginSessionPtr(&beginSessionParams), goto fn_fail );
-
-    CUpti_Profiler_SetConfig_Params setConfigParams = {
-        .structSize = CUpti_Profiler_SetConfig_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-        .pConfig = configImage->data,
-        .configSize = configImage->size,
-        .minNestingLevel = 1,
-        .numNestingLevels = 1,
-        .passIndex = 0,
-        .targetNestingLevel = 1,
-    };
-    cuptiCheckErrors( cuptiProfilerSetConfigPtr(&setConfigParams), goto fn_fail );
-
-    CUpti_Profiler_BeginPass_Params beginPassParams = {
-        .structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerBeginPassPtr(&beginPassParams), goto fn_fail );
-
-    CUpti_Profiler_EnableProfiling_Params enableProfilingParams = {
-        .structSize = CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerEnableProfilingPtr(&enableProfilingParams), goto fn_fail );
-
-    char rangeName[PAPI_MIN_STR_LEN];
-    int gpu_id = gpu_ctl->gpu_id;
-    sprintf(rangeName, "PAPI_Range_%d", gpu_id);
-    CUpti_Profiler_PushRange_Params pushRangeParams = {
-        .structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-        .pRangeName = (const char*) &rangeName,
-        .rangeNameLength = 100,
-    };
-    cuptiCheckErrors( cuptiProfilerPushRangePtr(&pushRangeParams), goto fn_fail );
-
-    return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
-}
-
-/** @class end_profiling
-  * @brief Free up the GPI resources acquired for profiling.
-  * @param *gpu_ctl
-  *   Structure of type cuptip_gpu_state_t which has member variables such as 
-  *   gpu_id, rmr, rmr_count, and more.
-*/
-static int end_profiling(cuptip_gpu_state_t *gpu_ctl)
-{
-
-    COMPDBG("EndProfiling. dev = %d\n", gpu_ctl->gpu_id);
-    (void) gpu_ctl;
-
-    CUpti_Profiler_DisableProfiling_Params disableProfilingParams = {
-        .structSize = CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerDisableProfilingPtr(&disableProfilingParams), goto fn_fail );
-
-    CUpti_Profiler_PopRange_Params popRangeParams = {
-        .structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerPopRangePtr(&popRangeParams), goto fn_fail );
-
-    CUpti_Profiler_EndPass_Params endPassParams = {
-        .structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerEndPassPtr(&endPassParams), goto fn_fail );
-
-    CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
-        .structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), goto fn_fail );
-
-    CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {
-        .structSize = CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerUnsetConfigPtr(&unsetConfigParams), goto fn_fail );
-
-    CUpti_Profiler_EndSession_Params endSessionParams = {
-        .structSize = CUpti_Profiler_EndSession_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .ctx = NULL,
-    };
-    cuptiCheckErrors( cuptiProfilerEndSessionPtr(&endSessionParams), goto fn_fail );
-
-    return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
-}
-
-
-/** @class get_measured_values
-  * @brief Get the counter values for the Cuda native events
-  *        added by the user.
-  * @param *gpu_ctl
-  *   Struct that holds member variables such as gpu id, rmr, etc.
-  * @param *counts
-  *   Array to hold the counter values for the associated Cuda native
-  *   events. 
-*/
-static int get_measured_values(cuptip_gpu_state_t *gpu_ctl, long long *counts)
-{
-    COMPDBG("eval_metric_values. dev = %d\n", gpu_ctl->gpu_id);
-    int i, papi_errno = PAPI_OK;
-    int numMetrics = gpu_ctl->added_events->count;
-    double *gpuValues;
-    char **metricNames;
-
-    if (!gpu_ctl->counterDataImage.size) {
-        ERRDBG("Counter Data Image is empty!\n");
-        return PAPI_EINVAL;
-    }
-
-    /* allocate memory */
-    gpuValues = (double*) papi_malloc(numMetrics * sizeof(double));
-    if (gpuValues == NULL) {
-        ERRDBG("malloc gpuValues failed.\n");
-        return PAPI_ENOMEM;
-    }   
-
-    /* allocate memory */
-    metricNames = (char**) papi_calloc(numMetrics, sizeof(char *)); 
-    if (metricNames == NULL) {
-        ERRDBG("Failed to allocate memory for metricNames.\n");
-        return PAPI_ENOMEM;
-    }    
-
-    for (i = 0; i < numMetrics; i++) {
-        metricNames[i] = gpu_ctl->added_events->cuda_evts[i];
-        LOGDBG("Setting metric name %s\n", metricNames[i]);
-    }
-
-    NVPW_MetricsContext_SetCounterData_Params setCounterDataParams = {
-        .structSize = NVPW_MetricsContext_SetCounterData_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
-        .pCounterDataImage = gpu_ctl->counterDataImage.data,
-        .rangeIndex = 0,
-        .isolated = 1,
-    };
-
-    nvpwCheckErrors( NVPW_MetricsContext_SetCounterDataPtr(&setCounterDataParams), goto fn_fail );
-
-    NVPW_MetricsContext_EvaluateToGpuValues_Params evalToGpuParams = {
-        .structSize = NVPW_MetricsContext_EvaluateToGpuValues_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pMetricsContext = gpu_ctl->pmetricsContextCreateParams->pMetricsContext,
-        .numMetrics = numMetrics,
-        .ppMetricNames = (const char* const*) metricNames,
-        .pMetricValues = gpuValues,
-    };
-
-    nvpwCheckErrors( NVPW_MetricsContext_EvaluateToGpuValuesPtr(&evalToGpuParams), goto fn_fail );
-
-    /* store the gpu values */
-    for (i = 0; i < (int) gpu_ctl->added_events->count; i++) {
-        counts[i] = gpuValues[i];
-    }
-
-    /* free memory allocations */
-    papi_free(metricNames);
-    papi_free(gpuValues);
-
-fn_exit:
-    return papi_errno;
-fn_fail:
-    return PAPI_EMISC;
 }
 
 /** @class find_same_chipname
   * @brief Check to see if chipnames are identical.
   * 
-  * @param gpu_id
+  * @param dev_id
   *   A gpu id number, e.g 0, 1, 2, etc.
 */
-static int find_same_chipname(int gpu_id)
+static int find_same_chipname(int dev_id)
 {
     int i;
-    for (i = 0; i < gpu_id; i++) {
-        if (!strcmp(cuptiu_table_p->avail_gpu_info[gpu_id].chip_name, cuptiu_table_p->avail_gpu_info[i].chip_name)) {
+    for (i = 0; i < dev_id; i++) {
+        if (!strcmp(cuptiu_table_p->avail_gpu_info[dev_id].chipName, cuptiu_table_p->avail_gpu_info[i].chipName)) {
             return i;
         }
     }
     return -1;
 }
 
-/** @class init_all_metrics
-  * @brief Initialize metrics for a specific GPU.
-  *        
-*/
-static int init_all_metrics(void)
-{
-    int gpu_id, papi_errno = PAPI_OK;
-
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        papi_errno = get_chip_name(gpu_id, cuptiu_table_p->avail_gpu_info[gpu_id].chip_name);
-        if (papi_errno != PAPI_OK) {
-            goto fn_exit;
-        }
-    }
-    int found;
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        found = find_same_chipname(gpu_id);
-        if (found > -1) {
-            cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams = cuptiu_table_p->avail_gpu_info[found].pmetricsContextCreateParams;
-            continue;
-        }
-        MCCP_t *pMCCP = (MCCP_t *) papi_calloc(1, sizeof(MCCP_t));
-        if (pMCCP == NULL) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_exit;
-        }
-        pMCCP->structSize = NVPW_CUDA_MetricsContext_Create_Params_STRUCT_SIZE;
-        pMCCP->pChipName = cuptiu_table_p->avail_gpu_info[gpu_id].chip_name;
-        nvpwCheckErrors( NVPW_CUDA_MetricsContext_CreatePtr(pMCCP), goto fn_fail );
-
-        cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams = pMCCP;
-    }
-
-fn_exit:
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
-}
-
-/** @class free_all_enumerated_metrics
-  * @brief Free's all enumerated metrics for each gpu on the system.  
-*/
-static void free_all_enumerated_metrics(void)
-{
-    COMPDBG("Entering.\n");
-    int gpu_id, found;
-    NVPW_MetricsContext_Destroy_Params metricsContextDestroyParams;
-    if (cuptiu_table_p->avail_gpu_info == NULL) {
-        return;
-    }
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        found = find_same_chipname(gpu_id);
-        if (found > -1) {
-            cuptiu_table_p->avail_gpu_info[gpu_id].num_metrics = 0;
-            cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams = NULL;
-            continue;
-        }
-        if (cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams->pMetricsContext) {
-            metricsContextDestroyParams = (NVPW_MetricsContext_Destroy_Params) {
-                .structSize = NVPW_MetricsContext_Destroy_Params_STRUCT_SIZE,
-                .pPriv = NULL,
-                .pMetricsContext = cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams->pMetricsContext,
-            };
-            nvpwCheckErrors(NVPW_MetricsContext_DestroyPtr(&metricsContextDestroyParams), );
-        }
-        papi_free(cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams);
-        cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams = NULL;
-
-    }
-    papi_free(cuptiu_table_p->avail_gpu_info);
-    cuptiu_table_p->avail_gpu_info = NULL;
-}
-
 /** @class init_main_htable
  *  @brief Initialize the main htable used to collect metrics.
 */
 static int init_main_htable(void)
 {
-    int i, val = 1, base = 2, papi_errno = PAPI_OK;
-
-    /* allocate (2 ^ NAMEID_WIDTH) metric names, this matches the 
-       number of bits for the event encoding format */
+    // Allocate (2 ^ NAMEID_WIDTH) metric names, this matches the
+    // number of bits for the event encoding format
+    int i, val = 1, base = 2;
     for (i = 0; i < NAMEID_WIDTH; i++) {
         val *= base;
     }    
    
-    /* initialize struct */ 
-    cuptiu_table_p = (cuptiu_event_table_t *) papi_malloc(sizeof(cuptiu_event_table_t));
+    cuptiu_table_p = (cuptiu_event_table_t *) malloc(sizeof(cuptiu_event_table_t));
     if (cuptiu_table_p == NULL) {
-        goto fn_fail;
+        ERRDBG("Failed to allocate memory for cuptiu_table_p.\n");
+        return PAPI_ENOMEM;
     }
     cuptiu_table_p->capacity = val; 
     cuptiu_table_p->count = 0;
+    cuptiu_table_p->event_stats_count = 0;
 
-    cuptiu_table_p->events = (cuptiu_event_t *) papi_calloc(val, sizeof(cuptiu_event_t));
+    cuptiu_table_p->events = (cuptiu_event_t *) calloc(val, sizeof(cuptiu_event_t));
     if (cuptiu_table_p->events == NULL) {
-        goto fn_fail;
+        ERRDBG("Failed to allocate memory for cuptiu_table_p->events.\n");
+        return PAPI_ENOMEM;
+    }
+
+    cuptiu_table_p->event_stats = (StringVector *) calloc(val, sizeof(StringVector));
+    if (cuptiu_table_p->event_stats == NULL) {
+        ERRDBG("Failed to allocate memory for cuptiu_table_p->event_stats.\n");
+        return PAPI_ENOMEM;
     }
 
-    cuptiu_table_p->avail_gpu_info = (gpu_record_t *) papi_calloc(num_gpus, sizeof(gpu_record_t));
+    cuptiu_table_p->avail_gpu_info = (gpu_record_t *) calloc(numDevicesOnMachine, sizeof(gpu_record_t));
     if (cuptiu_table_p->avail_gpu_info == NULL) {
-        goto fn_fail;
+        ERRDBG("Failed to allocate memory for cuptiu_table_p->avail_gpu_info.\n");
+        return PAPI_ENOMEM;
     }
 
-    /* initialize the main hash table for metric collection */ 
+    // Initialize the main hash table for metric collection
     htable_init(&cuptiu_table_p->htable);
 
-  fn_exit:
-    return papi_errno;
-  fn_fail:
-    papi_errno = PAPI_ENOMEM;
-    goto fn_exit;
+    return PAPI_OK;
 }
 
 /** @class cuptip_init
@@ -1402,64 +573,61 @@ static int init_main_htable(void)
 int cuptip_init(void)
 {
     COMPDBG("Entering.\n");
-    int papi_errno = PAPI_OK;
 
-    papi_errno = load_cupti_perf_sym();
+    int papi_errno = load_cupti_perf_sym();
     papi_errno += load_nvpw_sym();
     if (papi_errno != PAPI_OK) {
-        cuptic_disabled_reason_set("Unable to load CUDA library functions.");
-        goto fn_fail;
+        cuptic_err_set_last("Unable to load CUDA library functions.");
+        return papi_errno;
     }
 
-    /* collect number of gpu's on the system */
-    papi_errno = cuptic_device_get_count(&num_gpus);
+    // Collect the number of devices on the machine
+    papi_errno = cuptic_device_get_count(&numDevicesOnMachine);
     if (papi_errno != PAPI_OK) {
-        goto fn_fail;
+        return papi_errno;
     }
 
-    if (num_gpus <= 0) {
-        cuptic_disabled_reason_set("No GPUs found on system.");
-        goto fn_fail;
+    if (numDevicesOnMachine <= 0) {
+        cuptic_err_set_last("No GPUs found on system.");
+        return PAPI_ECMP;
     }
    
-    /* initialize cupti profiler and perfworks api */
+    // Initialize the Cupti Profiler and Perfworks API's
     papi_errno = initialize_cupti_profiler_api();
     papi_errno += initialize_perfworks_api();
     if (papi_errno != PAPI_OK) {
-        cuptic_disabled_reason_set("Unable to initialize CUPTI profiler libraries.");
-        goto fn_fail;
+        cuptic_err_set_last("Unable to initialize CUPTI profiler libraries.");
+        return PAPI_EMISC;
     }
 
     papi_errno = init_main_htable();
     if (papi_errno != PAPI_OK) {
-        goto fn_fail;
+        return papi_errno;
     }
 
-    papi_errno = init_all_metrics();
+    papi_errno = assign_chipnames_for_a_device_index();
     if (papi_errno != PAPI_OK) {
-        goto fn_fail;
+        return papi_errno;
     }
 
-    /* collect metrics */
+    // Collect the available metrics on the machine
     papi_errno = init_event_table();
     if (papi_errno != PAPI_OK) {
-        goto fn_fail;
+        return papi_errno;
     }
 
     papi_errno = cuInitPtr(0);
     if (papi_errno != CUDA_SUCCESS) {
-        cuptic_disabled_reason_set("Failed to initialize CUDA driver API.");
-        goto fn_fail;
+        cuptic_err_set_last("Failed to initialize CUDA driver API.");
+        return PAPI_EMISC;
     }
 
     return PAPI_OK;
-fn_fail:
-    return PAPI_EMISC;
 }
 
-/** @class verify_events
-  * @brief Verify user added events and store metadata i.e. metric names 
-  *        and device id's .
+/** @class verify_user_added_events
+  * @brief For user added events, verify they exist and do not require
+  *        multiple passes. If both are true, store metadata.
   * @param *events_id
   *   Cuda native event id's.
   * @param num_events
@@ -1468,14 +636,10 @@ fn_fail:
   *   Struct that holds read count, running, cuptip_info_t, and 
   *   cuptip_gpu_state_t. 
 */
-int verify_events(uint64_t *events_id, int num_events, 
-                  cuptip_control_t state) 
+int verify_user_added_events(uint32_t *events_id, int num_events, cuptip_control_t state)
 {
-    int papi_errno, i;
-    char *metricName;
-    int idx;
-
-    for (i = 0; i < num_gpus; i++) {
+    int i, papi_errno;
+    for (i = 0; i < numDevicesOnMachine; i++) {
         papi_errno = cuptiu_event_table_create_init_capacity(
                          num_events,
                          sizeof(cuptiu_event_t), &(state->gpu_ctl[i].added_events)
@@ -1483,31 +647,74 @@ int verify_events(uint64_t *events_id, i
         if (papi_errno != PAPI_OK) {
             return papi_errno;
         }
-     }  
+    }  
 
-    for (i = 0; i < num_events; i++) {
+     for (i = 0; i < num_events; i++) {
         event_info_t info;
         papi_errno = evt_id_to_info(events_id[i], &info);
         if (papi_errno != PAPI_OK) {
             return papi_errno;
         }
  
-        /* for a specific device table, get the current event index */
-        idx = state->gpu_ctl[info.device].added_events->count; 
-
-        metricName = state->gpu_ctl[info.device].added_events->cuda_evts[idx];
-        snprintf(metricName, PAPI_MAX_STR_LEN, "%s", cuptiu_table_p->events[info.nameid].name);
-
+        // Verify the user added event exists
         void *p;
-        if (htable_find(cuptiu_table_p->htable, metricName, (void **) &p) != HTABLE_SUCCESS) {
+        if (htable_find(cuptiu_table_p->htable, cuptiu_table_p->events[info.nameid].name, (void **) &p) != HTABLE_SUCCESS) {
             return PAPI_ENOEVNT;
         }
+
+        char stat[PAPI_HUGE_STR_LEN]="";
+        int strLen;
+        if (info.stat < NUM_STATS_QUALS){
+            strLen = snprintf(stat, sizeof(stat), "%s", stats[info.stat]);
+            if (strLen < 0 || strLen >= sizeof(stat)) {
+                SUBDBG("Failed to fully write statistic qualifier.\n");
+                return PAPI_ENOMEM;
+            }
+        }
+        const char *stat_position = strstr(cuptiu_table_p->events[info.nameid].basenameWithStatReplaced, "stat");
+        if (stat_position == NULL) { 
+            ERRDBG("Event does not have a 'stat' placeholder.\n"); 
+            return PAPI_EBUG; 
+        }
+        
+        // Reconstructing event name. Append the basename, stat, and sub-metric.
+        size_t basename_len = stat_position - cuptiu_table_p->events[info.nameid].basenameWithStatReplaced; 
+        char reconstructedEventName[PAPI_HUGE_STR_LEN]="";
+        strLen = snprintf(reconstructedEventName, PAPI_MAX_STR_LEN, "%.*s%s%s",
+                   (int)basename_len,
+                   cuptiu_table_p->events[info.nameid].basenameWithStatReplaced,
+                   stat,
+                   stat_position + 4);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            SUBDBG("Failed to fully write reconstructed event name.\n");
+            return PAPI_EBUF;
+        }
+
+        // Verify the user added event does not require multiple passes
+        int numOfPasses;
+        papi_errno = get_number_of_passes_for_eventsets(cuptiu_table_p->avail_gpu_info[info.device].chipName, reconstructedEventName, &numOfPasses);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }    
+        if (numOfPasses > 1) { 
+            return PAPI_EMULPASS;
+        }
+
+        // For a specific device table, get the current event index
+        int idx = state->gpu_ctl[info.device].added_events->count;
+        // Store metadata
+        strLen = snprintf(state->gpu_ctl[info.device].added_events->cuda_evts[idx],
+                         PAPI_MAX_STR_LEN, "%s", reconstructedEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            SUBDBG("Failed to fully write reconstructed Cuda event name to array of added events.\n");
+            return PAPI_EBUF;
+        }
         state->gpu_ctl[info.device].added_events->cuda_devs[idx] = info.device;
         state->gpu_ctl[info.device].added_events->evt_pos[idx] = i; 
         state->gpu_ctl[info.device].added_events->count++; /* total number of events added for a specific device  */
-    }
+     }
 
-    return PAPI_OK;    
+     return PAPI_OK;
 }
 
 /** @class cuptip_ctx_create
@@ -1521,67 +728,56 @@ int verify_events(uint64_t *events_id, i
   * @param num_events
   *   Number of Cuda native events a user is wanting to count.
 */
-int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate, uint64_t *events_id, int num_events)
+int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate, uint32_t *events_id, int num_events)
 {
     COMPDBG("Entering.\n");
-    int papi_errno = PAPI_OK, gpu_id, i;
-    long long *counters = NULL;
-    char name[PAPI_2MAX_STR_LEN] = { 0 };
 
-    cuptip_control_t state = (cuptip_control_t) papi_calloc (1, sizeof(struct cuptip_control_s));
+    cuptip_control_t state = (cuptip_control_t) calloc (1, sizeof(struct cuptip_control_s));
     if (state == NULL) {
+        SUBDBG("Failed to allocate memory for state.\n");
         return PAPI_ENOMEM;
     }
 
-    state->gpu_ctl = (cuptip_gpu_state_t *) papi_calloc(num_gpus, sizeof(cuptip_gpu_state_t));
+    state->gpu_ctl = (cuptip_gpu_state_t *) calloc(numDevicesOnMachine, sizeof(cuptip_gpu_state_t));
     if (state->gpu_ctl == NULL) {
+        SUBDBG("Failed to allocate memory for state->gpu_ctl.\n"); 
         return PAPI_ENOMEM;
     }
 
-    counters = papi_malloc(num_events * sizeof(*counters));
+    long long *counters = (long long *) malloc(num_events * sizeof(*counters));
     if (counters == NULL) {
+        SUBDBG("Failed to allocate memory for counters.\n");
         return PAPI_ENOMEM;
     }
 
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        state->gpu_ctl[gpu_id].gpu_id = gpu_id;
+    int dev_id;
+    for (dev_id = 0; dev_id < numDevicesOnMachine; dev_id++) {
+        state->gpu_ctl[dev_id].dev_id = dev_id;
     }
 
     event_info_t info;
-    papi_errno = evt_id_to_info(events_id[num_events - 1], &info);
+    int papi_errno = evt_id_to_info(events_id[num_events - 1], &info);
     if (papi_errno != PAPI_OK) {
         return papi_errno;
     } 
 
-    /* register the user created cuda context for the current gpu if not already known */
+    // Store a user created cuda context or create one
     papi_errno = cuptic_ctxarr_update_current(thr_info, info.device);
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
-    }
-
-    /* create a MetricsContext */
-    papi_errno = nvpw_cuda_metricscontext_create(state);
-    if (papi_errno != PAPI_OK) {
-        goto fn_exit;
+        return papi_errno;
     }
 
-    /* verify user added events are available on the machine */
-    papi_errno = verify_events(events_id, num_events, state);
+    // Verify user added events are available on the machine
+    papi_errno = verify_user_added_events(events_id, num_events, state);
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
+        return papi_errno;
     }
 
-    /* check to make sure added events do not require multiple passes */
-    papi_errno = check_multipass(state);
-    if (papi_errno != PAPI_OK) {
-        goto fn_exit;
-    }
     state->info = thr_info;
     state->counters = counters;
-
-fn_exit:
     *pstate = state;
-    return papi_errno;
+
+    return PAPI_OK;
 }
 
 /** @class cuptip_ctx_start
@@ -1592,204 +788,297 @@ fn_exit:
 */
 int cuptip_ctx_start(cuptip_control_t state)
 {
-
     COMPDBG("Entering.\n");
-    int gpu_id, papi_errno = PAPI_OK;
-    /* create instance of cuptip_gpu_state_t */
+    int papi_errno = PAPI_OK;
     cuptip_gpu_state_t *gpu_ctl;
-    /* create a context handle */
     CUcontext userCtx, ctx;
 
-    // return the Cuda context bound to the calling CPU thread
-    cudaCheckErrors( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
+    // Return the Cuda context bound to the calling CPU thread
+    cudaCheckErrors( cuCtxGetCurrentPtr(&userCtx), return PAPI_EMISC );
+
+    // Enumerate through the devices a user has added an event for
+    int dev_id;
+    for (dev_id = 0; dev_id < numDevicesOnMachine; dev_id++) {
+        // Skip devices that will require the Events API to be profiled
+        int cupti_api = determine_dev_cc_major(dev_id);
+        if (cupti_api != API_PERFWORKS) {
+            if (cupti_api == API_EVENTS) {
+                continue;
+            }
+            else {
+                return PAPI_EMISC;
+            }
 
-    /* enumerate through all of the unique gpus */
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
+        }
+        gpu_ctl = &(state->gpu_ctl[dev_id]);
         if (gpu_ctl->added_events->count == 0) {
             continue;
         }
-        LOGDBG("Device num %d: event_count %d, rmr count %d\n", gpu_id, gpu_ctl->added_events->count, gpu_ctl->rmr_count);
-        papi_errno = cuptic_device_acquire(state->gpu_ctl[gpu_id].added_events);
+
+        LOGDBG("Device num %d: event_count %d, rmr count %d\n", dev_id, gpu_ctl->added_events->count, gpu_ctl->numberOfRawMetricRequests);
+        papi_errno = cuptic_device_acquire(state->gpu_ctl[dev_id].added_events);
         if (papi_errno != PAPI_OK) {
             ERRDBG("Profiling same gpu from multiple event sets not allowed.\n");
             return papi_errno;
         }
-        /* get the cuda context */
-        papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
-        /* bind the specified CUDA context to the calling CPU thread */
-        cudaCheckErrors( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
+        // Get the cuda context
+        papi_errno = cuptic_ctxarr_get_ctx(state->info, dev_id, &ctx);
+        // Bind the specified CUDA context to the calling CPU thread
+        cudaCheckErrors( cuCtxSetCurrentPtr(ctx), return PAPI_EMISC );
 
-        /*  query/filter cuda native events available on host */
+        // Query/filter cuda native events available on host
         papi_errno = get_counter_availability(gpu_ctl);
         if (papi_errno != PAPI_OK) {
             ERRDBG("Error getting counter availability image.\n");
             return papi_errno;
         }
 
-        /* CUPTI profiler host configuration */
-        papi_errno = metric_get_config_image(gpu_ctl);
-        papi_errno += metric_get_counter_data_prefix_image(gpu_ctl);
-        papi_errno += create_counter_data_image(gpu_ctl);
+        NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+        calculateScratchBufferSizeParam.pChipName = cuptiu_table_p->avail_gpu_info[dev_id].chipName;
+        calculateScratchBufferSizeParam.pCounterAvailabilityImage = NULL;
+        calculateScratchBufferSizeParam.pPriv = NULL;
+        nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr(&calculateScratchBufferSizeParam), return PAPI_EMISC );
+
+        uint8_t myScratchBuffer[calculateScratchBufferSizeParam.scratchBufferSize];
+        NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
+        metricEvaluatorInitializeParams.scratchBufferSize = calculateScratchBufferSizeParam.scratchBufferSize;
+        metricEvaluatorInitializeParams.pScratchBuffer = myScratchBuffer;
+        metricEvaluatorInitializeParams.pChipName = cuptiu_table_p->avail_gpu_info[dev_id].chipName;
+        metricEvaluatorInitializeParams.pCounterAvailabilityImage = NULL;
+        metricEvaluatorInitializeParams.pCounterDataImage = NULL;
+        metricEvaluatorInitializeParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_InitializePtr(&metricEvaluatorInitializeParams), return PAPI_EMISC );
+        NVPW_MetricsEvaluator *pMetricsEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;
+
+        NVPA_RawMetricRequest *rawMetricRequests = NULL;
+        int i, numOfRawMetricRequests = 0;
+        for (i = 0; i < gpu_ctl->added_events->count; i++) {
+                NVPW_MetricEvalRequest metricEvalRequest;
+                papi_errno = get_metric_eval_request(pMetricsEvaluator, gpu_ctl->added_events->cuda_evts[i], &metricEvalRequest);
+                if (papi_errno != PAPI_OK) {
+                    return papi_errno;
+                }
+
+                papi_errno = create_raw_metric_requests(pMetricsEvaluator, &metricEvalRequest, &rawMetricRequests, &numOfRawMetricRequests);
+                if (papi_errno != PAPI_OK) {
+                    return papi_errno;
+                }
+        }
+
+        gpu_ctl->rawMetricRequests = rawMetricRequests;
+        gpu_ctl->numberOfRawMetricRequests = numOfRawMetricRequests;
+
+        papi_errno = get_config_image(cuptiu_table_p->avail_gpu_info[dev_id].chipName, gpu_ctl->counterAvailabilityImage.data, gpu_ctl->rawMetricRequests, gpu_ctl->numberOfRawMetricRequests, &gpu_ctl->configImage);
         if (papi_errno != PAPI_OK) {
-            ERRDBG("Failed to create CUPTI profiler state for gpu %d\n", gpu_id);
-            goto fn_fail;
+            return papi_errno;
+        } 
+
+        papi_errno = get_counter_data_prefix_image(cuptiu_table_p->avail_gpu_info[dev_id].chipName, gpu_ctl->rawMetricRequests, gpu_ctl->numberOfRawMetricRequests, &gpu_ctl->counterDataPrefixImage);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
         }
 
-        papi_errno = begin_profiling(gpu_ctl);
+        papi_errno = get_counter_data_image(gpu_ctl->counterDataPrefixImage, &gpu_ctl->counterDataScratchBuffer, &gpu_ctl->counterDataImage);
         if (papi_errno != PAPI_OK) {
-            ERRDBG("Failed to start profiling for gpu %d\n", gpu_id);
-            goto fn_fail;
+            return papi_errno;
         }
+
+        papi_errno = start_profiling_session(gpu_ctl->counterDataImage, gpu_ctl->counterDataScratchBuffer, gpu_ctl->configImage);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        papi_errno = begin_pass();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        papi_errno = enable_profiling();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        char rangeName[PAPI_MIN_STR_LEN];
+        int strLen = snprintf(rangeName, PAPI_MIN_STR_LEN, "PAPI_Range_%d", gpu_ctl->dev_id);
+        if (strLen < 0 || strLen >= PAPI_MIN_STR_LEN) {
+            ERRDBG("Failed to fully write range name.\n");
+            return PAPI_EBUF;
+        } 
+
+        papi_errno = push_range(rangeName);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        papi_errno = destroy_metrics_evaluator(pMetricsEvaluator);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }    
     }
+    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), return PAPI_EMISC );
 
-fn_exit:
-    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_ECMP;
-    goto fn_exit;
-fn_fail_misc:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
+    return PAPI_OK;
 }
 
+
 /** @class cuptip_ctx_read
-  * @brief Code to read Cuda hardware counters from an event set.
+  * @brief Query an array of numeric values corresponding
+  *        to each user added event.
   * @param state
   *   Struct that holds read count, running, cuptip_info_t, and 
   *   cuptip_gpu_state_t.
   * @param **counters
-  *   Array that holds the counter values for the specificed Cuda native events 
-  *   added by a user.  
+  *   An array which holds numeric values for the corresponding
+  *   user added event. 
 */
 int cuptip_ctx_read(cuptip_control_t state, long long **counters)
 {
     COMPDBG("Entering.\n");
-    int papi_errno, gpu_id, i, j = 0, method, evt_pos;
-    long long counts[30], *counter_vals = state->counters;
-    cuptip_gpu_state_t *gpu_ctl = NULL;
+    long long *counter_vals = state->counters;
+
     CUcontext userCtx = NULL, ctx = NULL;
+    cudaArtCheckErrors( cuCtxGetCurrentPtr(&userCtx), return PAPI_EMISC );
 
-    cudaCheckErrors( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
+    int dev_id;
+    for (dev_id = 0; dev_id < numDevicesOnMachine; dev_id++) {
+        // Skip devices that will require the Events API to be profiled
+        int cupti_api = determine_dev_cc_major(dev_id);
+        if (cupti_api != API_PERFWORKS) {
+            if (cupti_api == API_EVENTS) {
+                continue;
+            }
+            else {
+                return PAPI_EMISC;
+            }
 
-    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
+        }
+        cuptip_gpu_state_t *gpu_ctl = &(state->gpu_ctl[dev_id]);
         if (gpu_ctl->added_events->count == 0) {
             continue;
         }
 
-        papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
-        if (papi_errno != PAPI_OK) {
-            goto fn_fail_misc;
+        cudaArtCheckErrors( cuptic_ctxarr_get_ctx(state->info, dev_id, &ctx), return PAPI_EMISC );
 
+        cudaArtCheckErrors( cuCtxSetCurrentPtr(ctx), return PAPI_EMISC );
+       
+        int papi_errno = pop_range();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
         }
 
-        cudaCheckErrors( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
-
-        CUpti_Profiler_PopRange_Params popRangeParams = {
-            .structSize = CUpti_Profiler_PopRange_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .ctx = NULL,
-        };
-        cuptiCheckErrors( cuptiProfilerPopRangePtr(&popRangeParams), goto fn_fail_misc );
-
-        CUpti_Profiler_EndPass_Params endPassParams = {
-            .structSize = CUpti_Profiler_EndPass_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .ctx = NULL,
-        };
-        cuptiCheckErrors( cuptiProfilerEndPassPtr(&endPassParams), goto fn_fail_misc );
+        papi_errno = end_pass();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
 
-        CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {
-            .structSize = CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .ctx = NULL,
-        };
-       
-        cuptiCheckErrors( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), goto fn_fail_misc );
+        papi_errno = flush_data();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
 
-        papi_errno = get_measured_values(gpu_ctl, counts);
+        NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+        calculateScratchBufferSizeParam.pChipName = cuptiu_table_p->avail_gpu_info[dev_id].chipName;
+        calculateScratchBufferSizeParam.pCounterAvailabilityImage = NULL;
+        calculateScratchBufferSizeParam.pPriv = NULL;
+        nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr(&calculateScratchBufferSizeParam), return PAPI_EMISC );
+
+        uint8_t myScratchBuffer[calculateScratchBufferSizeParam.scratchBufferSize];
+        NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
+        metricEvaluatorInitializeParams.scratchBufferSize = calculateScratchBufferSizeParam.scratchBufferSize;
+        metricEvaluatorInitializeParams.pScratchBuffer = myScratchBuffer;
+        metricEvaluatorInitializeParams.pChipName = cuptiu_table_p->avail_gpu_info[dev_id].chipName;
+        metricEvaluatorInitializeParams.pCounterAvailabilityImage = NULL;
+        metricEvaluatorInitializeParams.pCounterDataImage = NULL;
+        metricEvaluatorInitializeParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_InitializePtr(&metricEvaluatorInitializeParams), return PAPI_EMISC );
+        NVPW_MetricsEvaluator *pMetricsEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;
+
+        long long *metricValues = (long long *) calloc(gpu_ctl->added_events->count, sizeof(long long));
+        if (metricValues == NULL) {
+            SUBDBG("Failed to allocate memory for metricValues.\n");
+            return PAPI_ENOMEM;
+        }
+        papi_errno = get_evaluated_metric_values(pMetricsEvaluator, gpu_ctl, metricValues);
         if (papi_errno != PAPI_OK) {
-            goto fn_exit;
+            return papi_errno;
         }
 
+        int i;
         for (i = 0; i < gpu_ctl->added_events->count; i++) {
-            evt_pos = gpu_ctl->added_events->evt_pos[i];
+            int evt_pos = gpu_ctl->added_events->evt_pos[i];
             if (state->read_count == 0) {
-                counter_vals[evt_pos] = counts[i];
+                counter_vals[evt_pos] = metricValues[i];
             }
             else {
-                /* determine collection method such as max, min, sum, and avg for an added Cuda native event */
-                method = get_event_collection_method(gpu_ctl->added_events->cuda_evts[i]);
+                int method = get_event_collection_method(gpu_ctl->added_events->cuda_evts[i]);
                 switch (method) {
                     case CUDA_SUM:
-                        counter_vals[evt_pos] += counts[i];
+                        counter_vals[evt_pos] += metricValues[i];
                         break;
                     case CUDA_MIN:
-                        counter_vals[evt_pos] = counter_vals[evt_pos] < counts[i] ? counter_vals[evt_pos] : counts[i];
+                        counter_vals[evt_pos] = counter_vals[evt_pos] < metricValues[i] ? counter_vals[evt_pos] : metricValues[i];
                         break;
                     case CUDA_MAX:
-                        counter_vals[evt_pos] = counter_vals[evt_pos] > counts[i] ? counter_vals[evt_pos] : counts[i];
+                        counter_vals[evt_pos] = counter_vals[evt_pos] > metricValues[i] ? counter_vals[evt_pos] : metricValues[i];
                         break;
                     case CUDA_AVG:
-                         /* (size * average + value) / (size + 1) 
-                            size - current number of values in the average
-                            average - current average
-                            value - number to add to the average
-                         */
-                         counter_vals[evt_pos] = (state->read_count * counter_vals[j++] + counts[i]) / (state->read_count + 1);
+                          // (size * average + value) / (size + 1) 
+                          //  size - current number of values in the average
+                          //  average - current average
+                          //  value - number to add to the average
+                         counter_vals[evt_pos] = (state->read_count * counter_vals[i] + metricValues[i]) / (state->read_count + 1);
                          break;
                     default:
-                        counter_vals[evt_pos] = counts[i];
+                        counter_vals[evt_pos] = metricValues[i];
                         break;
                 }
             }
         }
+        free(metricValues);
         *counters = counter_vals;
 
-        cuptiCheckErrors( cuptiProfilerCounterDataImageInitializePtr(&gpu_ctl->initializeParams), goto fn_fail_misc );
-        cuptiCheckErrors( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&gpu_ctl->initScratchBufferParams), goto fn_fail_misc );
+        papi_errno = begin_pass();
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
 
-        CUpti_Profiler_BeginPass_Params beginPassParams = {
-            .structSize = CUpti_Profiler_BeginPass_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .ctx = NULL,
-        };
-        cuptiCheckErrors( cuptiProfilerBeginPassPtr(&beginPassParams), goto fn_fail_misc );
+        }
 
         char rangeName[PAPI_MIN_STR_LEN];
-        sprintf(rangeName, "PAPI_Range_%d", gpu_ctl->gpu_id);
-        CUpti_Profiler_PushRange_Params pushRangeParams = {
-            .structSize = CUpti_Profiler_PushRange_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .ctx = NULL,
-            .pRangeName = (const char*) &rangeName,
-            .rangeNameLength = 100,
-        };
-        cuptiCheckErrors( cuptiProfilerPushRangePtr(&pushRangeParams), goto fn_fail_misc );
+        int strLen = snprintf(rangeName, PAPI_MIN_STR_LEN, "PAPI_Range_%d", gpu_ctl->dev_id);
+        if (strLen < 0 || strLen >= PAPI_MIN_STR_LEN) {
+            ERRDBG("Failed to fully write range name.\n");
+            return PAPI_EBUF;
+        }
+
+        papi_errno = push_range(rangeName);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        papi_errno = destroy_metrics_evaluator(pMetricsEvaluator);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
 
     }
     state->read_count++;
-fn_exit:
-    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), );
-    return papi_errno;
-fn_fail_misc:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
+
+    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), return PAPI_EMISC);
+
+    return PAPI_OK;
 }
 
 /** @class cuptip_ctx_reset
   * @brief Code to reset Cuda hardware counter values.
-  * @param *counters
-  *   Array that holds the counter values for the specificed Cuda native events
-  *   added by a user. 
+  * @param state
+  *   Struct that holds read count, running, cuptip_info_t, and
+  *   cuptip_gpu_state_t.
 */
 int cuptip_ctx_reset(cuptip_control_t state)
 {
     COMPDBG("Entering.\n");
-    int i;
 
+    int i;
     for (i = 0; i < state->read_count; i++) {
         state->counters[i] = 0;
     }
@@ -1808,42 +1097,58 @@ int cuptip_ctx_reset(cuptip_control_t st
 int cuptip_ctx_stop(cuptip_control_t state)
 {
     COMPDBG("Entering.\n");
-    int gpu_id;
-    int papi_errno = PAPI_OK;
-    cuptip_gpu_state_t *gpu_ctl;
-    CUcontext userCtx = NULL, ctx = NULL;
 
-    cudaCheckErrors( cuCtxGetCurrentPtr(&userCtx), goto fn_fail_misc );
+    CUcontext userCtx = NULL;
+    cudaCheckErrors( cuCtxGetCurrentPtr(&userCtx), return PAPI_EMISC );
+
+    int dev_id;
+    for (dev_id=0; dev_id < numDevicesOnMachine; dev_id++) {
+        // Skip devices that will require the Events API to be profiled
+        int cupti_api = determine_dev_cc_major(dev_id);
+        if (cupti_api != API_PERFWORKS) {
+            if (cupti_api == API_EVENTS) {
+                continue;
+            }
+            else {
+                return PAPI_EMISC;
+            }
 
-    for (gpu_id=0; gpu_id < num_gpus; gpu_id++) {
-        gpu_ctl = &(state->gpu_ctl[gpu_id]);
+        }        
+        cuptip_gpu_state_t *gpu_ctl = &(state->gpu_ctl[dev_id]);
         if (gpu_ctl->added_events->count == 0) {
             continue;
         }
-        papi_errno = cuptic_ctxarr_get_ctx(state->info, gpu_id, &ctx);
-        cudaCheckErrors( cuCtxSetCurrentPtr(ctx), goto fn_fail_misc );
-        papi_errno = end_profiling(gpu_ctl);
+
+        CUcontext ctx = NULL;
+        int papi_errno = cuptic_ctxarr_get_ctx(state->info, dev_id, &ctx);
         if (papi_errno != PAPI_OK) {
-            goto fn_fail;
+            return papi_errno;
         }
-        papi_errno = cuptic_device_release(state->gpu_ctl[gpu_id].added_events);
+
+        cudaCheckErrors( cuCtxSetCurrentPtr(ctx), return PAPI_EMISC );
+
+        papi_errno = end_profiling_session();
         if (papi_errno != PAPI_OK) {
-            goto fn_fail;
+            SUBDBG("Failed to end profiling session.\n");
+            return papi_errno;
+        }
+
+        papi_errno = cuptic_device_release(state->gpu_ctl[dev_id].added_events);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
         }
+
+        COMPDBG("Stopped and ended profiling session for device %d\n", gpu_ctl->dev_id);
     }
 
-fn_exit:
-    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), goto fn_fail_misc );
-    return papi_errno;
-fn_fail:
-    goto fn_exit;
-fn_fail_misc:
-    papi_errno = PAPI_EMISC;
-    goto fn_exit;
+    cudaCheckErrors( cuCtxSetCurrentPtr(userCtx), return PAPI_EMISC );
+
+    return PAPI_OK;
 }
 
 /** @class cuptip_ctx_destroy
-  * @brief Destroy created profiling context.
+  * @brief Free allocated memory in start - stop workflow and
+  *        reset config images.
   * @param *pstate
   *   Struct that holds read count, running, cuptip_info_t, and 
   *   cuptip_gpu_state_t.
@@ -1852,20 +1157,26 @@ int cuptip_ctx_destroy(cuptip_control_t
 {
     COMPDBG("Entering.\n");
     cuptip_control_t state = *pstate;
-    int i, j;
-    int papi_errno = nvpw_cuda_metricscontext_destroy(state);
-    for (i = 0; i < num_gpus; i++) {
-        reset_cupti_prof_config_images( &(state->gpu_ctl[i]) );
+    int i;
+    for (i = 0; i < numDevicesOnMachine; i++) {
+        free_and_reset_configuration_images( &(state->gpu_ctl[i]) );
         cuptiu_event_table_destroy( &(state->gpu_ctl[i].added_events) );
-        for (j = 0; j < state->gpu_ctl[i].rmr_count; j++) {
-            papi_free((void *) state->gpu_ctl[i].rmr[j].pMetricName);
-        }
-        papi_free(state->gpu_ctl[i].rmr);
+
+        // Free the created rawMetricRequests from cuptip_ctx_start
+        int j;
+        for (j = 0; j < state->gpu_ctl[i].numberOfRawMetricRequests; j++) {
+            free((void *) state->gpu_ctl[i].rawMetricRequests[j].pMetricName);
+        }
+        free(state->gpu_ctl[i].rawMetricRequests);
     }
-    papi_free(state->gpu_ctl);
-    papi_free(state);
+
+    // Free the allocated memory from cuptip_ctx_create
+    free(state->counters);
+    free(state->gpu_ctl);
+    free(state);
     *pstate = NULL;
-    return papi_errno;
+
+    return PAPI_OK;
 }
 
 
@@ -1876,7 +1187,6 @@ int cuptip_ctx_destroy(cuptip_control_t
 */
 int get_event_collection_method(const char *evt_name)
 {
-
     if (strstr(evt_name, ".avg") != NULL) {
         return CUDA_AVG;
     }
@@ -1900,11 +1210,25 @@ int get_event_collection_method(const ch
 int cuptip_shutdown(void)
 {
     COMPDBG("Entering.\n");
+
+    shutdown_event_stats_table();
     shutdown_event_table();
-    free_all_enumerated_metrics();
-    deinitialize_cupti_profiler_api();
-    unload_nvpw_sym();
-    unload_cupti_perf_sym();
+
+    int papi_errno = deinitialize_cupti_profiler_api();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = unload_nvpw_sym();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = unload_cupti_perf_sym();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
     return PAPI_OK;
 }
 
@@ -1916,11 +1240,12 @@ int cuptip_shutdown(void)
   * @param *event_id
   *   Created event id.
 */
-int evt_id_create(event_info_t *info, uint64_t *event_id)
+int evt_id_create(event_info_t *info, uint32_t *event_id)
 {
-    *event_id  = (uint64_t)(info->device   << DEVICE_SHIFT);
-    *event_id |= (uint64_t)(info->flags    << QLMASK_SHIFT);
-    *event_id |= (uint64_t)(info->nameid   << NAMEID_SHIFT);
+    *event_id  = (uint32_t)(info->stat     << STAT_SHIFT);
+    *event_id |= (uint32_t)(info->device   << DEVICE_SHIFT);
+    *event_id |= (uint32_t)(info->flags    << QLMASK_SHIFT);
+    *event_id |= (uint32_t)(info->nameid   << NAMEID_SHIFT);
     return PAPI_OK;
 }
 
@@ -1932,13 +1257,18 @@ int evt_id_create(event_info_t *info, ui
   * @param *info
   *   Structure which contains member variables of device, flags, and nameid.
 */
-int evt_id_to_info(uint64_t event_id, event_info_t *info)
+int evt_id_to_info(uint32_t event_id, event_info_t *info)
 {
-    info->device   = (int)((event_id & DEVICE_MASK) >> DEVICE_SHIFT);
-    info->flags    = (int)((event_id & QLMASK_MASK) >> QLMASK_SHIFT);
-    info->nameid   = (int)((event_id & NAMEID_MASK) >> NAMEID_SHIFT);
+    info->stat     = (uint32_t)((event_id & STAT_MASK) >> STAT_SHIFT);
+    info->device   = (uint32_t)((event_id & DEVICE_MASK) >> DEVICE_SHIFT);
+    info->flags    = (uint32_t)((event_id & QLMASK_MASK) >> QLMASK_SHIFT);
+    info->nameid   = (uint32_t)((event_id & NAMEID_MASK) >> NAMEID_SHIFT);
 
-    if (info->device >= num_gpus) {
+    if (info->stat >= (1 << STAT_WIDTH)) {
+        return PAPI_ENOEVNT;
+    }
+
+    if (info->device >= numDevicesOnMachine) {
         return PAPI_ENOEVNT;
     }
 
@@ -1954,70 +1284,153 @@ int evt_id_to_info(uint64_t event_id, ev
 }
 
 /** @class init_event_table
-  * @brief Initialize hash table and cuptiu_event_table_t structure.
+  * @brief For a device get and store the metric names.
 */
 int init_event_table(void) 
 {
-    int i, dev_id, found, table_idx = 0, papi_errno = PAPI_OK;
-    int listsubmetrics = 1;
+    int dev_id, deviceRecord = 0; 
+    // Loop through all available devices on the current system
+    for (dev_id = 0; dev_id < numDevicesOnMachine; dev_id++) {
+        // Skip devices that will require the Events API to be profiled
+        int cupti_api = determine_dev_cc_major(dev_id);
+        if (cupti_api != API_PERFWORKS) {
+            if (cupti_api == API_EVENTS) {
+                continue;
+            }
+            else {
+                return PAPI_EMISC;
+            }
 
-    /* instatiate struct to collect the total metric count and metric names;
-       instantiated here to avoid scoping issues */
-    NVPW_MetricsContext_GetMetricNames_Begin_Params getMetricNameBeginParams = { NVPW_MetricsContext_GetMetricNames_Begin_Params_STRUCT_SIZE };
-    
-    /* loop through all available devices on the current system */
-    for (dev_id = 0; dev_id < num_gpus; dev_id++) {
-        found = find_same_chipname(dev_id);
-        /* unique device found, collect metadata  */
+        }
+        
+        int papi_errno;
+        int found = find_same_chipname(dev_id);
+        // Unique device found, collect the constructed metric names
         if (found == -1) {
-            /* increment table index */
+            // Increment device record
             if (dev_id > 0)
-                table_idx++;
+                deviceRecord++;
 
-            /* assigning values to member variables */
-            getMetricNameBeginParams.pPriv = NULL;
-            getMetricNameBeginParams.pMetricsContext = cuptiu_table_p->avail_gpu_info[table_idx].pmetricsContextCreateParams->pMetricsContext;
-            getMetricNameBeginParams.hidePeakSubMetrics = !listsubmetrics;
-            getMetricNameBeginParams.hidePerCycleSubMetrics = !listsubmetrics;
-            getMetricNameBeginParams.hidePctOfPeakSubMetrics = !listsubmetrics;
-
-            nvpwCheckErrors( NVPW_MetricsContext_GetMetricNames_BeginPtr(&getMetricNameBeginParams), goto fn_fail ); 
-
-            /* for each unique device found, store both the total number of metrics and metric names */
-            cuptiu_table_p->avail_gpu_info[table_idx].num_metrics = getMetricNameBeginParams.numMetrics;
-            cuptiu_table_p->avail_gpu_info[table_idx].metric_names = getMetricNameBeginParams.ppMetricNames;
+            papi_errno = enumerate_metrics_for_unique_devices( cuptiu_table_p->avail_gpu_info[deviceRecord].chipName,
+                                                               &cuptiu_table_p->avail_gpu_info[deviceRecord].totalMetricCount,
+                                                               &cuptiu_table_p->avail_gpu_info[deviceRecord].metricNames );
+            if (papi_errno != PAPI_OK) {
+                return papi_errno;
+            }
         }
-        /* device metadata already collected, set table index */
+        // Device metadata already collected, set device record
         else {
-            /* set table_idx to */
-            table_idx = found;
+            deviceRecord = found;
         }
 
-        /* loop through metrics to add to overall event table */
-        for (i = 0; i < cuptiu_table_p->avail_gpu_info[table_idx].num_metrics; i++) {
-            papi_errno = get_ntv_events( cuptiu_table_p, cuptiu_table_p->avail_gpu_info[table_idx].metric_names[i], dev_id);
-            if (papi_errno != PAPI_OK)
-                goto fn_exit;
+        int i;
+        for (i = 0; i < cuptiu_table_p->avail_gpu_info[deviceRecord].totalMetricCount; i++) {
+            papi_errno = get_ntv_events(cuptiu_table_p, cuptiu_table_p->avail_gpu_info[deviceRecord].metricNames[i], dev_id);
+            if (papi_errno != PAPI_OK) {
+                return papi_errno;
+            }
         }
 
     }
 
-    /* free memory */
-    for (i = 0; i < table_idx; i++) {
-        NVPW_MetricsContext_GetMetricNames_End_Params getMetricNameEndParams = {
-            .structSize = NVPW_MetricsContext_GetMetricNames_End_Params_STRUCT_SIZE,
-            .pPriv = NULL,
-            .pMetricsContext = cuptiu_table_p->avail_gpu_info[table_idx].pmetricsContextCreateParams->pMetricsContext,
-        };
-        nvpwCheckErrors( NVPW_MetricsContext_GetMetricNames_EndPtr((NVPW_MetricsContext_GetMetricNames_End_Params *) &getMetricNameEndParams), goto fn_fail );
+    // Free memory allocated in enumerate_metrics_for_unique_devices and reset totalMetricCount to 0
+    int recordIdx;
+    for (recordIdx = 0; recordIdx < (deviceRecord + 1); recordIdx++) {
+        int metricIdx;
+        for (metricIdx = 0; metricIdx < cuptiu_table_p->avail_gpu_info[recordIdx].totalMetricCount; metricIdx++) {
+            free(cuptiu_table_p->avail_gpu_info[recordIdx].metricNames[metricIdx]);
+        }
+        free(cuptiu_table_p->avail_gpu_info[recordIdx].metricNames);
+        cuptiu_table_p->avail_gpu_info[recordIdx].totalMetricCount = 0;
     }
 
-  fn_exit:
-    return papi_errno;
-  fn_fail:
-    papi_errno = PAPI_EMISC; 
-    goto fn_exit;
+    return PAPI_OK;
+}
 
+/** @class is_stat
+  * @brief Helper function to determine if a token represents a statistical operation.
+  *
+  * @param token
+  *   A string from the event name. Ex. "dram__bytes" "avg"
+*/
+int is_stat(const char *token) {
+    int i;
+    for (i = 0; i < NUM_STATS_QUALS; i++) {
+        if (strcmp(token, stats[i]) == 0)
+            return 1;
+    }
+    return 0;
+}
+
+/** @restructure_event_name
+  * @brief Helper function to restructure the event name
+  *
+  * @param input
+  *   Event name string
+  * @param output
+  *   Event name string (stat string replaced w/ "stat")
+  * @param base
+  *   Event name string base(w/o stat)
+  * @param stat
+  *   Event stat string
+*/
+int restructure_event_name(const char *input, char *output, char *base, char *stat) {
+    char input_copy[PAPI_HUGE_STR_LEN];
+    int strLen = snprintf(input_copy, PAPI_HUGE_STR_LEN, "%s", input);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+        return PAPI_EBUF;
+    }
+
+
+    input_copy[sizeof(input_copy) - 1] = '\0';
+
+    char *parts[10] = {0};
+    char *token;
+    char delimiter[] = ".";
+    int segment_count = 0;
+    int stat_index = -1;
+    
+    // Initialize output strings
+    output[0] = '\0';
+    base[0] = '\0';
+    stat[0] = '\0';
+
+    // Split the string by periods
+    token = strtok(input_copy, delimiter);
+    while (token != NULL) {
+        parts[segment_count] = token;
+        if (is_stat(token) == 1) {
+            stat_index = segment_count;
+        }
+        segment_count++;
+        token = strtok(NULL, delimiter);
+    }
+
+    // Copy the stat
+    strLen = snprintf(stat, PAPI_HUGE_STR_LEN, "%s", parts[stat_index]);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+        return PAPI_EBUF;
+    }
+
+
+    // Build base name (everything except the stat)
+    int i;
+    for (i = 0; i < segment_count; i++) {
+        if (i != stat_index) {
+            if (base[0] != '\0') {
+              strcat(base, ".");
+              strcat(output, ".");
+            }
+            strcat(base, parts[i]);
+            strcat(output, parts[i]);
+        } else {
+            if (output[0] != '\0') strcat(output, ".");
+            strcat(output, "stat");
+        }
+    }    
+    return PAPI_OK;
 }
 
 /** @class get_ntv_events
@@ -2029,38 +1442,74 @@ int init_event_table(void)
   * @param *evt_name
   *   Cuda native event name.
 */
-static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name, int gpu_id) 
+static int get_ntv_events(cuptiu_event_table_t *evt_table, const char *evt_name, int dev_id) 
 {
+    int papi_errno, strLen;
+    char name_restruct[PAPI_HUGE_STR_LEN]="", name_no_stat[PAPI_HUGE_STR_LEN]="", stat[PAPI_HUGE_STR_LEN]="";
     int *count = &evt_table->count;
+    int *event_stats_count = &evt_table->event_stats_count;
     cuptiu_event_t *events = evt_table->events;
-
-    /* check to see if evt_name argument has been provided */
+    StringVector *event_stats = evt_table->event_stats;   
+    
+    // Check to see if evt_name argument has been provided
     if (evt_name == NULL) {
         return PAPI_EINVAL;
     }
 
-    /* check to see if capacity has been correctly allocated */
+    // Check to see if capacity has been correctly allocated
     if (*count >= evt_table->capacity) {
         return PAPI_EBUG;
     }
 
+    papi_errno = restructure_event_name(evt_name, name_restruct, name_no_stat, stat);
+    if (papi_errno != PAPI_OK){
+            return papi_errno;
+    }
+
     cuptiu_event_t *event;
-    /* check to make sure event entry has not already been added */
-    if ( htable_find(evt_table->htable, evt_name, (void **) &event) != HTABLE_SUCCESS ) {
+    StringVector *stat_vec;
+    
+    if ( htable_find(evt_table->htable, name_no_stat, (void **) &event) != HTABLE_SUCCESS ) {
         event = &events[*count];
-        /* increment count */
+        // Increment event count
         (*count)++;
 
-        /* store event info */
-        strcpy(event->name, evt_name);
+        strLen = snprintf(event->name, PAPI_2MAX_STR_LEN, "%s", name_no_stat);
+        if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+            ERRDBG("Failed to fully write name with no stat.\n");
+            return PAPI_EBUF;
+        }
 
-        /* insert event info into htable */
-        if ( htable_insert(evt_table->htable, evt_name, event) != HTABLE_SUCCESS ) {
+        strLen = snprintf(event->basenameWithStatReplaced, sizeof(event->basenameWithStatReplaced), "%s", name_restruct);
+        if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+            ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+            return PAPI_EBUF;
+        }
+
+        stat_vec = &event_stats[*event_stats_count];
+        (*event_stats_count)++;
+         
+        event->stat = stat_vec;
+        init_vector(event->stat);
+        
+        
+        papi_errno = push_back(event->stat, stat);
+        if (papi_errno != PAPI_OK){
+            return papi_errno;
+        }
+
+        if ( htable_insert(evt_table->htable, name_no_stat, event) != HTABLE_SUCCESS ) {
             return PAPI_ESYS;
         }
     }
+     else {
+       papi_errno = push_back(event->stat, stat);
+       if (papi_errno != PAPI_OK){
+            return papi_errno;
+       }
+     }
 
-    cuptiu_dev_set(&event->device_map, gpu_id);
+    cuptiu_dev_set(&event->device_map, dev_id);
 
     return PAPI_OK;
 }
@@ -2069,212 +1518,34 @@ static int get_ntv_events(cuptiu_event_t
   * @brief Shutdown cuptiu_event_table_t structure that holds the cuda native 
   *        event name and the corresponding description.
 */
-static int shutdown_event_table(void)
+static void shutdown_event_table(void)
 {
     cuptiu_table_p->count = 0;
 
-    papi_free(cuptiu_table_p->events);
-
-    return PAPI_OK;
-}
-
-/** @class retrieve_metric_descr
-  * @brief Collect the description for the provided evt_name.
-  *
-  * @param *pMetricsContext
-  *   Structure providing context for evt_name. 
-  * @param *evt_name
-  *   Cuda native event name.
-  * @param *description
-  *   Corresponding description for provided Cuda native event name.
-  * @param gpu_id
-  *   Device number, e.g. 0, 1, 2, ... ,etc.
-*/
-static int retrieve_metric_descr( NVPA_MetricsContext *pMetricsContext, const char *evt_name, char *description, const char *chip_name) 
-{
-    COMPDBG("Entering.\n");
-    int num_dep, i, len, passes, papi_errno;
-    const char *token_sw_evt = "sass";
-    char desc[PAPI_2MAX_STR_LEN];
-    NVPA_RawMetricRequest *rmr;
-    NVPA_Status nvpa_err;
-
-    /* check to make sure an argument has been passed for evt_name and description */
-    if (evt_name == NULL || description == NULL) {
-        return PAPI_EINVAL;
-    }
-
-    /* perfworks api: instantiate a new struct with provided event name to be passed to
-       NVPW_MetricsContext_GetMetricsProperties_BeginPtr */
-    NVPW_MetricsContext_GetMetricProperties_Begin_Params getMetricPropertiesBeginParams = {
-        // [in]
-        .structSize = NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .pMetricsContext = pMetricsContext,
-        .pMetricName = evt_name,
-    };
-    nvpa_err = NVPW_MetricsContext_GetMetricProperties_BeginPtr(&getMetricPropertiesBeginParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS || getMetricPropertiesBeginParams.ppRawMetricDependencies == NULL) {
-        strcpy(description, "Could not get description.");
-        return PAPI_EINVAL;
-    }
-
-    for (num_dep = 0; getMetricPropertiesBeginParams.ppRawMetricDependencies[num_dep] != NULL; num_dep++) {;}
-
-    rmr = (NVPA_RawMetricRequest *) papi_calloc(num_dep, sizeof(NVPA_RawMetricRequest));
-    if (rmr == NULL) {
-        return PAPI_ENOMEM;
-    }
-    for (i = 0; i < num_dep; i++) {
-        /* list of */
-        rmr[i].pMetricName = strdup(getMetricPropertiesBeginParams.ppRawMetricDependencies[i]);
-        rmr[i].isolated = 1;
-        rmr[i].keepInstances = 1;
-        rmr[i].structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE;
-    }
-    
-    /* collect the corresponding description for the provided evt_name */
-    len = snprintf( desc, PAPI_2MAX_STR_LEN, "%s. Units=(%s)",
-                    getMetricPropertiesBeginParams.pDescription,
-                    getMetricPropertiesBeginParams.pDimUnits);
-    /* check to make sure that description length is not greater than 
-       PAPI_2MAX_STR_LEN, which holds */
-    if (len > PAPI_2MAX_STR_LEN) {
-        ERRDBG("String formatting exceeded max string length.\n");
-        return PAPI_ENOMEM;
-    }
-
-    /* perfworks api: instantiate a new struct to be passsed to NVPW_MetricsContext_GetMetricProperties_EndPtr */
-    NVPW_MetricsContext_GetMetricProperties_End_Params getMetricPropertiesEndParams = {
-        // [in]
-        .structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE,
-        .pPriv = NULL, //assign to NULL
-        .pMetricsContext = pMetricsContext,
-    };
-    nvpa_err = NVPW_MetricsContext_GetMetricProperties_EndPtr(&getMetricPropertiesEndParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-    /* perfworks api: instantiate a new stuct to be passed to NVPW_CUDA_RawMetricsConfig_CreatePtr */
-    NVPW_CUDA_RawMetricsConfig_Create_Params nvpw_metricsConfigCreateParams = {
-        // [in]
-        .structSize = NVPW_CUDA_RawMetricsConfig_Create_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .activityKind = NVPA_ACTIVITY_KIND_PROFILER,
-        .pChipName = chip_name,
-    };
-    nvpa_err = NVPW_CUDA_RawMetricsConfig_CreatePtr(&nvpw_metricsConfigCreateParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-
-    /* collects the total number of passes
-       num_passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels */
-    papi_errno = calculate_num_passes( nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-                                       num_dep, rmr, &passes );
-    if ( papi_errno == PAPI_EMULPASS ) {
-        /* at this point we just want the number of passes (stored in passes) */
-    }
-
-    /* perfworks api: instantiate a new struct to be passed to NVPW_RawMetricsConfig_DestroyPtr */
-    NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {
-        // [in]
-        .structSize = NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE,
-        .pPriv = NULL, // assign to NULL
-        .pRawMetricsConfig = nvpw_metricsConfigCreateParams.pRawMetricsConfig,
-    };
-    nvpa_err = NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *) &rawMetricsConfigDestroyParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS) {
-        return PAPI_EMISC;
-    }
-
-    /* add extra metadata to description */
-    snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " Numpass=%d", passes);
-    if (passes > 1) {
-        snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (multi-pass not supported)");
-    }
-
-    if (strstr(evt_name, token_sw_evt) != NULL) {
-        snprintf(desc + strlen(desc), PAPI_2MAX_STR_LEN - strlen(desc), " (SW event)");
-    }
-
-    /* free memory, copy description, and return successful error code */
-    papi_free(rmr);
+    free(cuptiu_table_p->avail_gpu_info);
+    cuptiu_table_p->avail_gpu_info = NULL;
 
-    strcpy(description, desc);
+    free(cuptiu_table_p->events);
+    cuptiu_table_p->events = NULL;
 
-    return PAPI_OK;
+    free(cuptiu_table_p);
+    cuptiu_table_p = NULL;
 }
 
-/** @class retrieve_metric_rmr
-  * @brief Collect the raw metric request for the provided evt_name.
-  *
-  * @param *pMetricsContext
-  *   Structure providing context for evt_name. 
-  * @param *evt_name
-  *   Cuda native event name.
-  * @param *numDep
-  *   Number of dependencies for a cuda native event.
-  * @param **pRMR
-  *  Raw metric requests for a cuda native event.
+/** @class shutdown_event_stats_table
+  * @brief Shutdown StringVector structure that holds the statistic qualifiers  
+  *        for event names.
 */
-static int retrieve_metric_rmr( NVPA_MetricsContext *pMetricsContext, const char *evt_name,
-                                int *numDep, NVPA_RawMetricRequest **pRMR )
+static void shutdown_event_stats_table(void)
 {
-    COMPDBG("Entering.\n");
-    int num_dep, i;
-    NVPA_Status nvpa_err;
-    NVPA_RawMetricRequest *rmr;
-
-    /* check to make sure an argument has been passed for evt_name */
-    if ( evt_name == NULL ) {
-        return PAPI_EINVAL;
-    }
-
-    /* instantiate a new metric properties structure with the provided evt_name */
-    NVPW_MetricsContext_GetMetricProperties_Begin_Params getMetricPropertiesBeginParams = {
-        .structSize = NVPW_MetricsContext_GetMetricProperties_Begin_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pMetricsContext = pMetricsContext,
-        .pMetricName = evt_name,
-    };
-
-    /* collect metric properties such as dependencies and description for the 
-       structure created by the passed evt_name */
-    nvpa_err = NVPW_MetricsContext_GetMetricProperties_BeginPtr(&getMetricPropertiesBeginParams);
-    if (nvpa_err != NVPA_STATUS_SUCCESS || getMetricPropertiesBeginParams.ppRawMetricDependencies == NULL) {
-        return PAPI_EINVAL;
-    }
-
-    for (num_dep = 0; getMetricPropertiesBeginParams.ppRawMetricDependencies[num_dep] != NULL; num_dep++) {;}
-
-    rmr = (NVPA_RawMetricRequest *) papi_calloc(num_dep, sizeof(NVPA_RawMetricRequest));
-    if (rmr == NULL) {
-        return PAPI_ENOMEM;
-    }
-
-    for (i = 0; i < num_dep; i++) {
-        rmr[i].pMetricName = strdup(getMetricPropertiesBeginParams.ppRawMetricDependencies[i]);
-        rmr[i].isolated = 1;
-        rmr[i].keepInstances = 1;
-        rmr[i].structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE;
+    int i;
+    for (i = 0; i < cuptiu_table_p->event_stats_count; i++) {
+        free_vector(&cuptiu_table_p->event_stats[i]);
     }
-
-    /* store number of dependencies and raw metric requests */
-    *numDep = num_dep;
-    *pRMR = rmr;
     
-    /* ending/deleting instantiated struct created by passed evt_name */
-    NVPW_MetricsContext_GetMetricProperties_End_Params getMetricPropertiesEndParams = {
-        .structSize = NVPW_MetricsContext_GetMetricProperties_End_Params_STRUCT_SIZE,
-        .pPriv = NULL,
-        .pMetricsContext = pMetricsContext,
-    };
-
-    /* ending pointer created by passed evt_name */
-    nvpwCheckErrors( NVPW_MetricsContext_GetMetricProperties_EndPtr(&getMetricPropertiesEndParams), return PAPI_EMISC );
+    cuptiu_table_p->event_stats_count = 0;
 
-    return PAPI_OK;
+    free(cuptiu_table_p->event_stats);
 }
 
 /** @class cuptip_evt_enum
@@ -2286,11 +1557,11 @@ static int retrieve_metric_rmr( NVPA_Met
   *   Modifies the search logic. Three modifiers are used PAPI_ENUM_FIRST,
   *   PAPI_ENUM_EVENTS, and PAPI_NTV_ENUM_UMASKS.
 */
-int cuptip_evt_enum(uint64_t *event_code, int modifier)
+int cuptip_evt_enum(uint32_t *event_code, int modifier)
 {
     int papi_errno = PAPI_OK;
     event_info_t info;
-    SUBDBG("ENTER: event_code: %lu, modifier: %d\n", *event_code, modifier);
+    SUBDBG("ENTER: event_code: %u, modifier: %d\n", *event_code, modifier);
 
     switch(modifier) {
         case PAPI_ENUM_FIRST:
@@ -2298,6 +1569,7 @@ int cuptip_evt_enum(uint64_t *event_code
                 papi_errno = PAPI_ENOEVNT;
                 break;
             }
+            info.stat = 0;
             info.device = 0;
             info.flags = 0;
             info.nameid = 0;
@@ -2309,13 +1581,14 @@ int cuptip_evt_enum(uint64_t *event_code
                 break;
             }
             if (cuptiu_table_p->count > info.nameid + 1) {
+                info.stat = 0;
                 info.device = 0;
                 info.flags = 0;
                 info.nameid++;
                 papi_errno = evt_id_create(&info, event_code);
                 break;
             }
-            papi_errno = PAPI_END;
+            papi_errno = PAPI_ENOEVNT;
             break;
         case PAPI_NTV_ENUM_UMASKS:
             papi_errno = evt_id_to_info(*event_code, &info);
@@ -2323,12 +1596,21 @@ int cuptip_evt_enum(uint64_t *event_code
                 break;
             }
             if (info.flags == 0){
+                info.stat = 0;
+                info.device = 0;
+                info.flags = STAT_FLAG;
+                papi_errno = evt_id_create(&info, event_code);
+                break;
+            }
+            
+            if (info.flags == STAT_FLAG){
+                info.stat = 0;
                 info.device = 0;
                 info.flags = DEVICE_FLAG;
                 papi_errno = evt_id_create(&info, event_code);
                 break;
             }
-            papi_errno = PAPI_END;
+            papi_errno = PAPI_ENOEVNT;
             break;
         default:
             papi_errno = PAPI_EINVAL;
@@ -2347,19 +1629,18 @@ int cuptip_evt_enum(uint64_t *event_code
   * @param len
   *   Maximum alloted characters for Cuda native event description. 
 */
-int cuptip_evt_code_to_descr(uint64_t event_code, char *descr, int len) 
+int cuptip_evt_code_to_descr(uint32_t event_code, char *descr, int len) 
 {
-    int papi_errno, str_len;
     event_info_t info;
-    papi_errno = evt_id_to_info(event_code, &info);
+    int papi_errno = evt_id_to_info(event_code, &info);
     if (papi_errno != PAPI_OK) {
         return papi_errno;
     }    
 
-    str_len = snprintf(descr, (size_t) len, "%s", cuptiu_table_p->events[event_code].desc);
-    if (str_len > len) {
+    int str_len = snprintf(descr, (size_t) len, "%s", cuptiu_table_p->events[event_code].desc);
+    if (str_len < 0 || str_len >= len) {
         ERRDBG("String formatting exceeded max string length.\n");
-        return PAPI_ENOMEM;  
+        return PAPI_EBUF;  
     }    
 
     return papi_errno;
@@ -2373,14 +1654,14 @@ int cuptip_evt_code_to_descr(uint64_t ev
   * @param *event_code
   *   Corresponding Cuda native event code for provided Cuda native event name.
 */
-int cuptip_evt_name_to_code(const char *name, uint64_t *event_code)
+int cuptip_evt_name_to_code(const char *name, uint32_t *event_code)
 {
-    int htable_errno, device, flags, nameid, papi_errno = PAPI_OK;
+    int htable_errno, device, stat, flags, nameid, papi_errno = PAPI_OK;
     cuptiu_event_t *event;
     char base[PAPI_MAX_STR_LEN] = { 0 };
     SUBDBG("ENTER: name: %s, event_code: %p\n", name, event_code);
 
-    papi_errno = evt_name_to_device(name, &device);
+    papi_errno = cuda_verify_no_repeated_qualifiers(name);
     if (papi_errno != PAPI_OK) {
         goto fn_exit;
     }
@@ -2390,28 +1671,56 @@ int cuptip_evt_name_to_code(const char *
         goto fn_exit;
     }
 
+    papi_errno = evt_name_to_device(name, &device, base);
+    if (papi_errno != PAPI_OK) {
+        goto fn_exit;
+    }
+    
+    papi_errno = evt_name_to_stat(name, &stat, base);
+    if (papi_errno != PAPI_OK) {
+        goto fn_exit;
+    }
+
     htable_errno = htable_find(cuptiu_table_p->htable, base, (void **) &event);
     if (htable_errno != HTABLE_SUCCESS) {
         papi_errno = (htable_errno == HTABLE_ENOVAL) ? PAPI_ENOEVNT : PAPI_ECMP;
         goto fn_exit;
     }
-
-    /* flags = DEVICE_FLAG will need to be updated if more qualifiers are added,
-       see implemtation in rocm (roc_profiler.c) */
-    flags = (device >= 0) ? DEVICE_FLAG:0;
+ 
+    flags = (event->stat->size >= 0) ? (STAT_FLAG | DEVICE_FLAG) : DEVICE_FLAG;
     if (flags == 0){
         papi_errno = PAPI_EINVAL;
         goto fn_exit;
     }
 
     nameid = (int) (event - cuptiu_table_p->events);
-    event_info_t info = { device, flags, nameid };
+
+    event_info_t info = { stat, device, flags, nameid };
+
     papi_errno = evt_id_create(&info, event_code);
     if (papi_errno != PAPI_OK) {
         goto fn_exit;
     }
-
     papi_errno = evt_id_to_info(*event_code, &info);
+    if (papi_errno != PAPI_OK) {
+        goto fn_exit;
+    }
+
+    // Section handles if the Cuda component is partially disabled
+    int *enabledCudaDeviceIds, cudaCmpPartial;
+    size_t cudaEnabledDevicesCnt;
+    cuptic_partial(&cudaCmpPartial, &enabledCudaDeviceIds, &cudaEnabledDevicesCnt);
+    if (cudaCmpPartial) {
+        papi_errno = PAPI_PARTIAL;
+
+        int i; 
+        for (i = 0; i < cudaEnabledDevicesCnt; i++) {
+            if (device == enabledCudaDeviceIds[i]) {
+                papi_errno = PAPI_OK;
+                break;
+            }
+        }
+    }
 
     fn_exit:
         SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
@@ -2428,8 +1737,8 @@ int cuptip_evt_name_to_code(const char *
   * @param len
   *   Maximum alloted characters for base Cuda native event name. 
 */
-int cuptip_evt_code_to_name(uint64_t event_code, char *name, int len)
-{ 
+int cuptip_evt_code_to_name(uint32_t event_code, char *name, int len)
+{
     return evt_code_to_name(event_code, name, len);
 }
 
@@ -2443,29 +1752,51 @@ int cuptip_evt_code_to_name(uint64_t eve
   * @param len
   *   Maximum alloted characters for base Cuda native event name. 
 */
-static int evt_code_to_name(uint64_t event_code, char *name, int len)
+static int evt_code_to_name(uint32_t event_code, char *name, int len)
 {
-    int papi_errno, str_len;
-
     event_info_t info;
-    papi_errno = evt_id_to_info(event_code, &info);
+    int papi_errno = evt_id_to_info(event_code, &info);
     if (papi_errno != PAPI_OK) {
         return papi_errno;
     }
 
+    int str_len;
+    char stat[PAPI_HUGE_STR_LEN] = ""; 
+    if (info.stat < NUM_STATS_QUALS){
+        str_len = snprintf(stat, sizeof(stat), "%s", stats[info.stat]);
+        if (str_len < 0 || str_len >= PAPI_HUGE_STR_LEN) {
+            ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+            return PAPI_EBUF;
+        }
+    }
+
     switch (info.flags) {
         case (DEVICE_FLAG):
             str_len = snprintf(name, len, "%s:device=%i", cuptiu_table_p->events[info.nameid].name, info.device);
-            if (str_len > len) {
+            if (str_len < 0 || str_len >= len) {
                 ERRDBG("String formatting exceeded max string length.\n");
-                return PAPI_ENOMEM;
+                return PAPI_EBUF;
+            }
+            break;
+        case (STAT_FLAG):    
+            str_len = snprintf(name, len, "%s:stat=%s", cuptiu_table_p->events[info.nameid].name, stat);
+            if (str_len < 0 || str_len >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("String formatting exceeded max string length.\n");
+                return PAPI_EBUF;
+            }
+            break;
+        case (DEVICE_FLAG | STAT_FLAG):
+            str_len = snprintf(name, len, "%s:stat=%s:device=%i", cuptiu_table_p->events[info.nameid].name, stat, info.device);
+            if (str_len < 0 || str_len >= len) {
+                ERRDBG("String formatting exceeded max string length.\n");
+                return PAPI_EBUF;
             }
             break;
         default:
             str_len = snprintf(name, len, "%s", cuptiu_table_p->events[info.nameid].name);
-            if (str_len > len) {
+            if (str_len < 0 || str_len >= len) {
                 ERRDBG("String formatting exceeded max string length.\n");
-                return PAPI_ENOMEM;
+                return PAPI_EBUF;
             }
             break;
     }
@@ -2482,47 +1813,145 @@ static int evt_code_to_name(uint64_t eve
   *   Structure for member variables such as symbol, short description, and 
   *   long desctiption. 
 */
-int cuptip_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info)
+int cuptip_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info)
 {
-    int papi_errno, i, gpu_id;
-    char description[PAPI_HUGE_STR_LEN];
-
-    /* get the events nameid and flags */
     event_info_t inf;
-    papi_errno = evt_id_to_info(event_code, &inf);
+    int papi_errno = evt_id_to_info(event_code, &inf);
     if (papi_errno != PAPI_OK) {
         return papi_errno;
     }
 
-    /* collect the description and calculated numpass for the Cuda event  */
+    const char *stat_position = strstr(cuptiu_table_p->events[inf.nameid].basenameWithStatReplaced, "stat");
+    if (stat_position == NULL) {
+        return PAPI_ENOMEM;
+    }
+    size_t basename_len = stat_position - cuptiu_table_p->events[inf.nameid].basenameWithStatReplaced;
+    char reconstructedEventName[PAPI_HUGE_STR_LEN]="";
+    int strLen = snprintf(reconstructedEventName, PAPI_MAX_STR_LEN, "%.*s%s%s",
+               (int)basename_len,
+               cuptiu_table_p->events[inf.nameid].basenameWithStatReplaced,
+               cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[0],
+               stat_position + 4);
+
+    int i;
+    // For a Cuda event collect the description, units, and number of passes
     if (cuptiu_table_p->events[inf.nameid].desc[0] == '\0') {
-        /* find a matching device id to get correct MetricsContext and chip name */
-        for (i = 0; i < num_gpus; ++i) {
+        int dev_id = -1;
+        for (i = 0; i < numDevicesOnMachine; ++i) {
             if (cuptiu_dev_check(cuptiu_table_p->events[inf.nameid].device_map, i)) {
-                gpu_id = i;
+                dev_id = i;
                 break;
             }
         }
-        papi_errno = retrieve_metric_descr( cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams->pMetricsContext,
-                                            cuptiu_table_p->events[inf.nameid].name, cuptiu_table_p->events[inf.nameid].desc,
-                                            cuptiu_table_p->avail_gpu_info[gpu_id].pmetricsContextCreateParams->pChipName );
+
+        if (dev_id == -1) {
+            SUBDBG("Failed to find a matching device in the device map.\n");
+            return PAPI_EINVAL;
+        }
+
+        papi_errno = get_metric_properties( cuptiu_table_p->avail_gpu_info[dev_id].chipName, 
+                                            reconstructedEventName,
+                                            cuptiu_table_p->events[inf.nameid].desc );
         if (papi_errno != PAPI_OK) {
             return papi_errno;
         }
     }
 
+    char all_stat[PAPI_HUGE_STR_LEN]="";
     switch (inf.flags) {
         case (0):
-            /* store details for the Cuda event */ 
-            snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s", cuptiu_table_p->events[inf.nameid].name );
-            snprintf( info->short_descr, PAPI_MIN_STR_LEN, "%s", cuptiu_table_p->events[inf.nameid].desc );
-            snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s", cuptiu_table_p->events[inf.nameid].desc );
+        {
+            // Store details for the Cuda event
+            strLen = snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s", cuptiu_table_p->events[inf.nameid].name );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write metric name in case 0.\n");
+                return PAPI_EBUF;
+            }
+            strLen = snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s", cuptiu_table_p->events[inf.nameid].desc );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write long description in case 0.\n")
+                return PAPI_EBUF;
+            }
             break;
+        }
         case DEVICE_FLAG:
         {
+            char devices[PAPI_MAX_STR_LEN] = { 0 };
+            int init_metric_dev_id;
+            for (i = 0; i < numDevicesOnMachine; ++i) {
+                if (cuptiu_dev_check(cuptiu_table_p->events[inf.nameid].device_map, i)) {
+                    // For an event, store the first device found to use with :device=#, 
+                    // as on a heterogenous system events may not appear on each device
+                    if (devices[0] == '\0') {
+                        init_metric_dev_id = i;
+
+                    }
+                    int strLen = snprintf(devices + strlen(devices), PAPI_MAX_STR_LEN, "%i,", i);
+                    if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+                        ERRDBG("Failed to fully write device qualifiers.\n");
+                    }
+                    
+                }
+            }
+            *(devices + strlen(devices) - 1) = 0;
+
+            // Store details for the Cuda event
+            strLen = snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s:device=%i", cuptiu_table_p->events[inf.nameid].name, init_metric_dev_id );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write metric name in case DEVICE_FLAG.\n");
+                return PAPI_EBUF;
+            }
+            strLen = snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s masks:Mandatory device qualifier [%s]",
+                      cuptiu_table_p->events[inf.nameid].desc, devices );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write long description in case DEVICE_FLAG.\n");
+                return PAPI_EBUF;
+            }
+            break;
+        }
+        case STAT_FLAG:
+        {
+            all_stat[0]= '\0'; 
+            size_t current_len = strlen(all_stat);
+            for (size_t i = 0; i < cuptiu_table_p->events[inf.nameid].stat->size; i++) {
+                  size_t remaining_space = PAPI_HUGE_STR_LEN - current_len - 1;  // Calculate remaining space
+                
+                // Ensure there's enough space for the string before concatenating
+                if (remaining_space > 0) {
+                    strncat(all_stat, cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[i], remaining_space);
+                    current_len += strlen(cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[i]);
+                } else {
+                    ERRDBG("Not enough space for the all_stat string")
+                    return papi_errno;
+                }
+
+                // Add a comma only if there is space and it is not the last element
+                if (i < cuptiu_table_p->events[inf.nameid].stat->size - 1 && remaining_space > 2) {
+                    strncat(all_stat, ", ", remaining_space - 2);
+                    current_len += 2;  // Account for the added comma and space
+                }
+            }
+        
+            /* cuda native event name */
+            strLen = snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s:stat=%s", cuptiu_table_p->events[inf.nameid].name, cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[0] );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write metric name in case STAT_FLAG.\n");
+                return PAPI_EBUF;
+            }
+            /* cuda native event long description */
+            strLen = snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s masks:Mandatory stat qualifier [%s]",
+                      cuptiu_table_p->events[inf.nameid].desc, all_stat );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("Failed to fully write long description in case STAT_FLAG.\n");
+                return PAPI_EBUF;
+            }
+            break;
+        }
+        case (STAT_FLAG | DEVICE_FLAG):
+        {
             int init_metric_dev_id;
             char devices[PAPI_MAX_STR_LEN] = { 0 };
-            for (i = 0; i < num_gpus; ++i) {
+            for (i = 0; i < numDevicesOnMachine; ++i) {
                 if (cuptiu_dev_check(cuptiu_table_p->events[inf.nameid].device_map, i)) {
                     /* for an event, store the first device found to use with :device=#, 
                        as on a heterogenous system events may not appear on each device */
@@ -2534,13 +1963,42 @@ int cuptip_evt_code_to_info(uint64_t eve
                 }
             }
             *(devices + strlen(devices) - 1) = 0;
+            
+            all_stat[0]= '\0'; 
+            size_t current_len = strlen(all_stat);
+            for (size_t i = 0; i < cuptiu_table_p->events[inf.nameid].stat->size; i++) {
+                  size_t remaining_space = PAPI_HUGE_STR_LEN - current_len - 1;  // Calculate remaining space
+                
+                // Ensure there's enough space for the string before concatenating
+                if (remaining_space > 0) {
+                    strncat(all_stat, cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[i], remaining_space);
+                    current_len += strlen(cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[i]);
+                } else {
+                    ERRDBG("Not enough space for the all_stat string")
+                    return papi_errno;
+                }
 
-            /* store details for the Cuda event */
-            snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s:device=%i", cuptiu_table_p->events[inf.nameid].name, init_metric_dev_id );
-            snprintf( info->short_descr, PAPI_MIN_STR_LEN, "%s masks:Mandatory device qualifier [%s]",
-                     cuptiu_table_p->events[inf.nameid].desc, devices );
-            snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s masks:Mandatory device qualifier [%s]",
-                      cuptiu_table_p->events[inf.nameid].desc, devices );
+                // Add a comma only if there is space and it is not the last element
+                if (i < cuptiu_table_p->events[inf.nameid].stat->size - 1 && remaining_space > 2) {
+                    strncat(all_stat, ", ", remaining_space - 2);
+                    current_len += 2;  // Account for the added comma and space
+                }
+            }
+        
+            /* cuda native event name */
+            strLen = snprintf( info->symbol, PAPI_HUGE_STR_LEN, "%s:stat=%s:device=%i", cuptiu_table_p->events[inf.nameid].name, cuptiu_table_p->events[inf.nameid].stat->arrayMetricStatistics[0], init_metric_dev_id);
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+                return PAPI_EBUF;
+            }
+            
+            /* cuda native event long description */
+            strLen = snprintf( info->long_descr, PAPI_HUGE_STR_LEN, "%s masks:Mandatory stat qualifier [%s]:Mandatory device qualifier [%s]",
+                      cuptiu_table_p->events[inf.nameid].desc, all_stat, devices  );
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                ERRDBG("String larger than PAPI_HUGE_STR_LEN");
+                return PAPI_EBUF;
+            }
             break;
         }
         default:
@@ -2563,6 +2021,7 @@ int cuptip_evt_code_to_info(uint64_t eve
 static int evt_name_to_basename(const char *name, char *base, int len)
 {
     char *p = strstr(name, ":");
+    
     if (p) {
         if (len < (int)(p - name)) {
             return PAPI_EBUF;
@@ -2577,6 +2036,112 @@ static int evt_name_to_basename(const ch
     return PAPI_OK;
 }
 
+/** @class cuda_verify_no_repeated_qualifiers
+  * @brief Verify that a user has not added multiple device or stats qualifiers
+  *        to an event name.
+  *
+  * @param *eventName
+  *   User provided event name we need to verify.
+*/
+static int cuda_verify_no_repeated_qualifiers(const char *eventName)
+{
+    int numDeviceQualifiers = 0, numStatsQualifiers = 0;
+    char tmpEventName[PAPI_2MAX_STR_LEN];
+    int strLen = snprintf(tmpEventName, PAPI_2MAX_STR_LEN, "%s", eventName);
+    if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+        ERRDBG("Failed to fully write eventName into tmpEventName.\n");
+        return PAPI_EBUF;
+    }
+    char *token = strtok(tmpEventName, ":");
+    while(token != NULL) {
+        if (strncmp(token, "device", 6) == 0) {
+            numDeviceQualifiers++;
+        }
+        else if (strncmp(token, "stat", 4) == 0){
+            numStatsQualifiers++;
+        }
+
+        token = strtok(NULL, ":");
+    }
+
+    if (numDeviceQualifiers > 1 || numStatsQualifiers > 1) {
+        ERRDBG("Provided Cuda event has multiple device or stats qualifiers appended.\n");
+        return PAPI_ENOEVNT;
+    }
+
+    return PAPI_OK;
+}
+
+/** @class cuda_verify_qualifiers
+  * @brief Verify that the device and/or stats qualifier provided by the user
+  *        is valid. E.g. :device=# or :stat=avg.
+  *
+  * @param flag
+  *   Device or stats flag define. Allows us to determine the case to enter for
+  *   the switch statement.
+  * @param *qualifierName
+  *   Name of the qualifier we need to verify. E.g. :device or :stat.
+  * @param equalitySignPosition
+  *   Position of where the equal sign is located in the qualifier string name.
+  * @param *qualifierValue
+  *   Upon verifying the provided qualifier is valid. Store either a device index
+  *   or a statistic index.
+*/
+static int cuda_verify_qualifiers(int flag, char *qualifierName, int equalitySignPosition, int *qualifierValue)
+{
+    int pos = equalitySignPosition;
+    // Verify that an equal sign was provided where it was suppose to be
+    if (qualifierName[pos] != '=') {
+        SUBDBG("Improper qualifier name. No equal sign found.\n");
+        return PAPI_ENOEVNT;
+    }
+
+    switch(flag)
+    {
+        case DEVICE_FLAG:
+        {
+            // Verify that the next character after the equal sign is indeed a digit
+            pos++;
+            int isDigit = (unsigned) qualifierName[pos] - '0' < 10;
+            if (!isDigit) {
+                SUBDBG("Improper device qualifier name. Digit does not follow equal sign.\n");
+                return PAPI_ENOEVNT;
+            }
+
+            // Verify that only qualifiers have been appended
+            char *endPtr;
+            *qualifierValue = (int) strtol(qualifierName + strlen(":device="), &endPtr, 10);
+            // Check to make sure only qualifiers have been appended
+            if (*endPtr != '\0') {
+                if (strncmp(endPtr, ":stat", 5) != 0) {
+                    return PAPI_ENOEVNT;
+                }
+            }
+            return PAPI_OK;
+        }
+        case STAT_FLAG:
+        {
+            qualifierName += 6; // Move past ":stat="
+            int i;
+            for (i = 0; i < NUM_STATS_QUALS; i++) {
+                size_t token_len = strlen(stats[i]);
+                if (strncmp(qualifierName, stats[i], token_len) == 0) {
+                    // Check to make sure only qualifiers have been appended
+                    char *no_excess_chars = qualifierName + token_len;
+                    if (strlen(no_excess_chars) == 0 || strncmp(no_excess_chars, ":device", 7) == 0) {
+                        *qualifierValue = i;
+                        return PAPI_OK;
+                    }
+                }
+            }
+            return PAPI_ENOEVNT;
+        }
+        default:
+            SUBDBG("Flag provided is not accounted for in switch statement.\n");
+            return PAPI_EINVAL;
+    }
+}
+
 /** @class evt_name_to_device
   * @brief Return the device number for a user provided Cuda native event.
   *        This can be done with a device qualifier present (:device=#) or
@@ -2586,30 +2151,1295 @@ static int evt_name_to_basename(const ch
   * @param *device
   *   Device number.
 */
-static int evt_name_to_device(const char *name, int *device)
+static int evt_name_to_device(const char *name, int *device, const char *base)
 {
-    char *p = strstr(name, ":device=");
+    char *p = strstr(name, ":device");
     // User did provide :device=# qualifier
-    if (p) {
-        *device = (int) strtol(p + strlen(":device="), NULL, 10);
+    if (p != NULL) {
+        int equalitySignPos = 7;
+        int papi_errno = cuda_verify_qualifiers(DEVICE_FLAG, p, equalitySignPos, device);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
     }
     // User did not provide :device=# qualifier
     else {
         int i, htable_errno;
         cuptiu_event_t *event;
 
-        htable_errno = htable_find(cuptiu_table_p->htable, name, (void **) &event);
+        htable_errno = htable_find(cuptiu_table_p->htable, base, (void **) &event);
         if (htable_errno != HTABLE_SUCCESS) {
             return PAPI_EINVAL;
         }
-
         // Search for the first device the event exists for.
-        for (i = 0; i < num_gpus; ++i) {
+        for (i = 0; i < numDevicesOnMachine; ++i) {
             if (cuptiu_dev_check(event->device_map, i)) {
                 *device = i;
-                break;
+                return PAPI_OK;
             }
         }
     }
+
+    return PAPI_OK;
+}
+
+/** @class evt_name_to_stat
+  * @brief Take a Cuda native event name with a stat qualifer appended to 
+  *        it and collect the stat .
+  * @param *name
+  *   Cuda native event name with a stat qualifier appended.
+  * @param *stat
+  *   Stat collected.
+*/
+static int evt_name_to_stat(const char *name, int *stat, const char *base)
+{
+    char *p = strstr(name, ":stat");
+    if (p != NULL) {
+        int equalitySignPos = 5;
+        int papi_errno = cuda_verify_qualifiers(STAT_FLAG, p, equalitySignPos, stat);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+    } else {
+        cuptiu_event_t *event;
+        int htable_errno = htable_find(cuptiu_table_p->htable, base, (void **) &event);
+        if (htable_errno != HTABLE_SUCCESS) {
+            return PAPI_ENOEVNT;
+        }
+        int i;
+        for (i = 0; i < NUM_STATS_QUALS; i++) {
+          size_t token_len = strlen(stats[i]);
+          if (strncmp(event->stat->arrayMetricStatistics[0], stats[i], token_len) == 0) {
+                *stat = i;
+                return PAPI_OK;
+          }
+        }
+    }
+}
+/** @class assign_chipnames_for_a_device_index
+  * @brief For each device found, assign a chipname.
+*/
+
+static int assign_chipnames_for_a_device_index(void)
+{
+    char chipName[PAPI_MIN_STR_LEN];
+    int dev_id;
+    for (dev_id = 0; dev_id < numDevicesOnMachine; dev_id++) {
+        int retval = get_chip_name(dev_id, chipName);
+        if (PAPI_OK != retval ) {
+            return PAPI_EMISC;
+        }
+
+        int strLen = snprintf(cuptiu_table_p->avail_gpu_info[dev_id].chipName, PAPI_MIN_STR_LEN, "%s", chipName);
+        if (strLen < 0 || strLen >= PAPI_MIN_STR_LEN) {
+            SUBDBG("Failed to fully write chip name.\n");
+            return PAPI_EBUF;
+        }    
+    }    
+
     return PAPI_OK;
 }
+
+static int determine_dev_cc_major(int dev_id)
+{
+    int cc;
+    int papi_errno = get_gpu_compute_capability(dev_id, &cc);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    if (cc >= 70) {
+        return API_PERFWORKS;
+    }
+    // TODO: Once the Events API is added back, move this to either cupti_utils or papi_cupti_common
+    //       with updated logic.
+    else {
+        return API_EVENTS;
+    }
+}
+
+/**
+ *  @}
+ ******************************************************************************/
+ 
+/***************************************************************************//**
+ *  @name   Metrics Evaluator
+ *  @{
+ */
+
+/** @class enumerate_metrics_for_unique_devices
+ *  @brief Get the total number of metrics on a device and the subsequent metric names
+ *         using the Metrics Evaluator API. 
+ *
+ *  @param *pChipName
+ *    A Cuda device chip name.
+ *  @param *totalNumMetrics
+ *    Count of the total number of metrics found on a device.
+ *  @param ***arrayOfMetricNames
+ *    Constructured metric names. With the Metrics Evaluator API, a metric name must be
+ *    reconstructured using metricName.rollup.submetric.
+*/
+static int enumerate_metrics_for_unique_devices(const char *pChipName, int *totalNumMetrics, char ***arrayOfMetricNames)
+{
+    NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+    calculateScratchBufferSizeParam.pChipName = pChipName;
+    calculateScratchBufferSizeParam.pCounterAvailabilityImage = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr(&calculateScratchBufferSizeParam), return PAPI_EMISC );
+
+    uint8_t myScratchBuffer[calculateScratchBufferSizeParam.scratchBufferSize];
+    NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
+    metricEvaluatorInitializeParams.scratchBufferSize = calculateScratchBufferSizeParam.scratchBufferSize;
+    metricEvaluatorInitializeParams.pScratchBuffer = myScratchBuffer;
+    metricEvaluatorInitializeParams.pChipName = pChipName;
+    metricEvaluatorInitializeParams.pCounterAvailabilityImage = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_InitializePtr(&metricEvaluatorInitializeParams), return PAPI_EMISC );
+    NVPW_MetricsEvaluator *pMetricsEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;
+
+    char **metricNames = NULL;
+    int i, metricCount = 0, papi_errno;
+    for (i = 0; i < NVPW_METRIC_TYPE__COUNT; ++i) {
+        NVPW_MetricType metricType = (NVPW_MetricType)i;
+
+        NVPW_MetricsEvaluator_GetMetricNames_Params getMetricNamesParams = {NVPW_MetricsEvaluator_GetMetricNames_Params_STRUCT_SIZE};
+        getMetricNamesParams.metricType = metricType;
+        getMetricNamesParams.pMetricsEvaluator = pMetricsEvaluator;
+        getMetricNamesParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_GetMetricNamesPtr(&getMetricNamesParams), return PAPI_EMISC );
+
+        size_t metricIdx;
+        for (metricIdx = 0; metricIdx < getMetricNamesParams.numMetrics; ++metricIdx) {
+            size_t metricNameBeginIndex = getMetricNamesParams.pMetricNameBeginIndices[metricIdx];
+            const char *baseMetricName = &getMetricNamesParams.pMetricNames[metricNameBeginIndex];
+
+            char fullMetricName[PAPI_2MAX_STR_LEN];
+            int strLen = snprintf(fullMetricName, PAPI_2MAX_STR_LEN, "%s", baseMetricName);
+            if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+                SUBDBG("Failed to fully append the base metric name.\n");
+                return PAPI_EBUF;
+            }
+
+            int rollupMetricIdx;
+            for (rollupMetricIdx = 0; rollupMetricIdx < NVPW_ROLLUP_OP__COUNT; ++rollupMetricIdx) {
+                // Set the starting offset to be used for a metric
+                int offsetForMetricName = strlen(baseMetricName);
+                // Get the rollup metric if applicable
+                // Rollup's are required for Counter and Throughput, but does not apply to Ratio
+                char *rollupMetricName = NULL;
+                if (metricType != NVPW_METRIC_TYPE_RATIO) {
+                    papi_errno = get_rollup_metrics(rollupMetricIdx, &rollupMetricName);
+                    if (papi_errno != 0) {
+                        return papi_errno;
+                    }
+
+                    strLen = snprintf(fullMetricName + offsetForMetricName, PAPI_2MAX_STR_LEN - offsetForMetricName, "%s", rollupMetricName);
+                    if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+                        SUBDBG("Failed to fully append rollup metric name.\n");
+                        return PAPI_EBUF;
+                    }
+
+                    // Update the offset as a rollup metric was found
+                    offsetForMetricName += strlen(rollupMetricName);
+                }
+
+                // Get the list of submetrics 
+                // Submetrics are required for Ratio and Throughput, optional for Counter (here we do collect for Counter as well)
+                NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params supportedSubMetrics = {NVPW_MetricsEvaluator_GetSupportedSubmetrics_Params_STRUCT_SIZE};
+                supportedSubMetrics.pMetricsEvaluator = pMetricsEvaluator;
+                supportedSubMetrics.metricType = metricType;
+                supportedSubMetrics.pPriv = NULL;
+                nvpwCheckErrors( NVPW_MetricsEvaluator_GetSupportedSubmetricsPtr(&supportedSubMetrics), return PAPI_EMISC );
+
+                size_t subMetricIdx;
+                for (subMetricIdx = 0; subMetricIdx < supportedSubMetrics.numSupportedSubmetrics; ++subMetricIdx) {
+                    char *subMetricName;
+                    papi_errno = get_supported_submetrics(supportedSubMetrics.pSupportedSubmetrics[subMetricIdx], &subMetricName);
+                    if (papi_errno != 0) {
+                        return papi_errno;
+                    }
+
+                    if (supportedSubMetrics.pSupportedSubmetrics[subMetricIdx] != NVPW_SUBMETRIC_NONE) {
+                        strLen = snprintf(fullMetricName + offsetForMetricName, PAPI_2MAX_STR_LEN - offsetForMetricName, "%s", subMetricName);
+                        if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+                            SUBDBG("Failed to fully append submetric names.\n");
+                            return PAPI_EBUF;
+                        }
+                    }
+
+                    metricNames = (char **) realloc(metricNames, (metricCount + 1) * sizeof(char *));
+                    if (metricNames == NULL) {
+                        SUBDBG("Failed to allocate memory for metricNames.\n");
+                        return PAPI_ENOMEM;
+                    }
+                    metricNames[metricCount] = (char *) malloc(PAPI_2MAX_STR_LEN * sizeof(char));
+                    if (metricNames[metricCount] == NULL) {
+                        SUBDBG("Failed to allocate memory for the index %d in the array metricNames.\n", metricCount);
+                        return PAPI_ENOMEM;
+                    }
+
+                    // Store the constructed metric name
+                    strLen = snprintf(metricNames[metricCount], PAPI_2MAX_STR_LEN, "%s", fullMetricName);
+                    if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+                        SUBDBG("Failed to fully write constructued metric name: %s\n", fullMetricName);
+                        return PAPI_EBUF;
+                    }
+                    metricCount++;
+                }
+                // Avoid counting ratio metrics 4X more then should occur 
+                if (metricType == NVPW_METRIC_TYPE_RATIO) {
+                    break;
+                }
+            }
+        }
+    }
+
+    papi_errno = destroy_metrics_evaluator(pMetricsEvaluator);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    *totalNumMetrics = metricCount;
+    *arrayOfMetricNames = metricNames;
+
+    return PAPI_OK;
+} 
+
+/** @class get_rollup_metrics
+  * @brief Get the appropriate string for a provided member of the NVPW_RollupOp
+  *        enum. Note that, rollup's are required for Counter and Throughput, but
+  *        does not apply to Ratio.
+  * @param rollupMetric
+  *   A member of the enum NVPW_RollupOp. See nvperf_host.h for a full list.
+  * @param **strRollupMetric
+  *   String rollup metric to store based on the rollupMetric parameter.
+*/
+static int get_rollup_metrics(NVPW_RollupOp rollupMetric, char **strRollupMetric)
+{
+    switch(rollupMetric)
+    {
+        case NVPW_ROLLUP_OP_AVG:
+            *strRollupMetric = ".avg";
+            return PAPI_OK;
+        case NVPW_ROLLUP_OP_MAX:
+            *strRollupMetric = ".max";
+            return PAPI_OK;
+        case NVPW_ROLLUP_OP_MIN:
+            *strRollupMetric = ".min";
+            return PAPI_OK;
+        case NVPW_ROLLUP_OP_SUM:
+            *strRollupMetric = ".sum";
+            return PAPI_OK;
+        default:
+            SUBDBG("Rollup metric was not one of avg, max, min, or sum.\n");
+            *strRollupMetric = "";
+            return PAPI_OK;
+    } 
+}
+
+/** @class get_supported_submetrics
+  * @brief Get the appropriate string for a provided member of the NVPW_Submetric
+  *        enum. Note that, submetrics are required for Ratio and Throughput, optional
+  *        for Counter.
+  * @param subMetric
+  *   A member of the enum NVPW_Submetric. See nvperf_host.h for a full list.
+  * @param **strSubMetric
+  *   String submetric to store based on the subMetric parameter.
+*/
+static int get_supported_submetrics(NVPW_Submetric subMetric, char **strSubMetric)
+{
+    // NOTE: The following submetrics are not supported in CUPTI 11.3 and onwards:
+    //       - Burst submetrics: .peak_burst, .pct_of_peak_burst_active, .pct_of_peak_burst_active
+    //                           .pct_of_peak_burst_elapsed, .pct_of_peak_burst_region,
+    //                           .pct_of_peak_burst_frame.
+    //       - Throughput submetrics: .pct_of_peak_burst_active, .pct_of_peak_burst_elapsed
+    //                                .pct_of_peak_burst_region, .pct_of_peak_burst_frame.
+    switch (subMetric)
+    {
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED:
+            *strSubMetric = ".peak_sustained";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE:
+            *strSubMetric = ".peak_sustained_active";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_ACTIVE_PER_SECOND:
+            *strSubMetric = ".peak_sustained_active.per_second";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED:
+            *strSubMetric = ".peak_sustained_elapsed";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_ELAPSED_PER_SECOND:
+            *strSubMetric = ".peak_sustained_elapsed.per_second";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME:
+            *strSubMetric = ".peak_sustained_frame";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_FRAME_PER_SECOND:
+            *strSubMetric = ".peak_sustained_frame.per_second";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION:
+            *strSubMetric = ".peak_sustained_region";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PEAK_SUSTAINED_REGION_PER_SECOND:
+            *strSubMetric = ".peak_sustained_region.per_second";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PER_CYCLE_ACTIVE:
+            *strSubMetric = ".per_cycle_active";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PER_CYCLE_ELAPSED:
+            *strSubMetric = ".per_cycle_elapsed";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PER_CYCLE_IN_FRAME:
+            *strSubMetric = ".per_cycle_in_frame";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PER_CYCLE_IN_REGION:
+            *strSubMetric = ".per_cycle_in_region";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PER_SECOND:
+            *strSubMetric = ".per_second";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ACTIVE:
+            *strSubMetric = ".pct_of_peak_sustained_active";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_ELAPSED:
+            *strSubMetric = ".pct_of_peak_sustained_elapsed";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_FRAME:
+            *strSubMetric = ".pct_of_peak_sustained_frame";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PCT_OF_PEAK_SUSTAINED_REGION:
+            *strSubMetric = ".pct_of_peak_sustained_region";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_MAX_RATE:
+            *strSubMetric = ".max_rate";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_PCT:
+            *strSubMetric = ".pct";
+             return PAPI_OK;
+        case NVPW_SUBMETRIC_RATIO:
+            *strSubMetric = ".ratio";
+            return PAPI_OK;
+        case NVPW_SUBMETRIC_NONE:
+        default:
+           *strSubMetric = "";
+           return PAPI_OK;
+    }
+}
+
+/** @class get_metric_properties
+ *  @brief For a metric, get the description, units, and number
+ *         of passes.
+ *
+ *  @param *pChipName
+ *    The device chipname.
+ *  @param *metricName
+ *    A metric name from the Perfworks api.
+ *  @param *fullMetricDescription
+ *    The constructed metric description with units and number of
+ *    passes.
+*/
+static int get_metric_properties(const char *pChipName, const char *metricName, char *fullMetricDescription)
+{
+    NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+    calculateScratchBufferSizeParam.pChipName = pChipName;
+    calculateScratchBufferSizeParam.pCounterAvailabilityImage = NULL;
+    calculateScratchBufferSizeParam.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr(&calculateScratchBufferSizeParam), return PAPI_EMISC );
+
+    uint8_t myScratchBuffer[calculateScratchBufferSizeParam.scratchBufferSize];
+    NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
+    metricEvaluatorInitializeParams.scratchBufferSize = calculateScratchBufferSizeParam.scratchBufferSize;
+    metricEvaluatorInitializeParams.pScratchBuffer = myScratchBuffer;
+    metricEvaluatorInitializeParams.pChipName = pChipName;
+    metricEvaluatorInitializeParams.pCounterAvailabilityImage = NULL;
+    metricEvaluatorInitializeParams.pCounterDataImage = NULL;
+    metricEvaluatorInitializeParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_InitializePtr(&metricEvaluatorInitializeParams), return PAPI_EMISC );
+    NVPW_MetricsEvaluator *pMetricsEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;
+
+    NVPW_MetricEvalRequest metricEvalRequest;
+    int papi_errno = get_metric_eval_request(pMetricsEvaluator, metricName, &metricEvalRequest);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+    NVPW_MetricType metricType = (NVPW_MetricType) metricEvalRequest.metricType;
+    size_t metricIndex = metricEvalRequest.metricIndex;
+
+    // For a metric, get the description
+    const char *metricDescription;
+    if (metricType == NVPW_METRIC_TYPE_COUNTER) {
+        NVPW_MetricsEvaluator_GetCounterProperties_Params counterPropParams = {NVPW_MetricsEvaluator_GetCounterProperties_Params_STRUCT_SIZE};
+        counterPropParams.pMetricsEvaluator = pMetricsEvaluator;
+        counterPropParams.counterIndex = metricIndex;
+        counterPropParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_GetCounterPropertiesPtr(&counterPropParams), return PAPI_EMISC );
+        metricDescription = counterPropParams.pDescription;
+    }
+    else if (metricType == NVPW_METRIC_TYPE_RATIO) {
+        NVPW_MetricsEvaluator_GetRatioMetricProperties_Params ratioPropParams = {NVPW_MetricsEvaluator_GetRatioMetricProperties_Params_STRUCT_SIZE};
+        ratioPropParams.pMetricsEvaluator = pMetricsEvaluator;
+        ratioPropParams.ratioMetricIndex = metricIndex;
+        ratioPropParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_GetRatioMetricPropertiesPtr(&ratioPropParams), return PAPI_EMISC );
+        metricDescription = ratioPropParams.pDescription;
+    }
+    else if (metricType == NVPW_METRIC_TYPE_THROUGHPUT) {
+        NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params throughputPropParams = {NVPW_MetricsEvaluator_GetThroughputMetricProperties_Params_STRUCT_SIZE};
+        throughputPropParams.pMetricsEvaluator = pMetricsEvaluator;
+        throughputPropParams.throughputMetricIndex = metricIndex;
+        throughputPropParams.pPriv = NULL;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_GetThroughputMetricPropertiesPtr(&throughputPropParams), return PAPI_EMISC );
+        metricDescription = throughputPropParams.pDescription;
+    }
+
+    // For a metric, get the dimensional units
+    NVPW_MetricsEvaluator_GetMetricDimUnits_Params dimUnitsParams = {NVPW_MetricsEvaluator_GetMetricDimUnits_Params_STRUCT_SIZE};
+    dimUnitsParams.pMetricsEvaluator = pMetricsEvaluator;
+    dimUnitsParams.pMetricEvalRequest = &metricEvalRequest;
+    dimUnitsParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
+    dimUnitsParams.dimUnitFactorStructSize = NVPW_DimUnitFactor_STRUCT_SIZE;
+    dimUnitsParams.pDimUnits = NULL;
+    dimUnitsParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_MetricsEvaluator_GetMetricDimUnitsPtr(&dimUnitsParams), return PAPI_EMISC );
+
+    int strLen;
+    char *metricUnits = "unitless"; // It appears that some metrics have a bug which do not return a value of 1 when they should for unitless.
+    if (dimUnitsParams.numDimUnits > 0) {
+        NVPW_DimUnitFactor *dimUnitsFactor = (NVPW_DimUnitFactor *) malloc(dimUnitsParams.numDimUnits * sizeof(NVPW_DimUnitFactor));
+        if (dimUnitsFactor == NULL) {
+            SUBDBG("Failed to allocate memory for dimUnitsFactor.\n");
+            return PAPI_ENOMEM;
+        }
+        dimUnitsParams.pDimUnits = dimUnitsFactor;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_GetMetricDimUnitsPtr(&dimUnitsParams), return PAPI_EMISC );
+
+        char tmpMetricUnits[PAPI_MAX_STR_LEN] = { 0 };
+        int i;
+        for (i = 0; i < dimUnitsParams.numDimUnits; i++) {
+            NVPW_MetricsEvaluator_DimUnitToString_Params dimUnitToStringParams = {NVPW_MetricsEvaluator_DimUnitToString_Params_STRUCT_SIZE};
+            dimUnitToStringParams.pMetricsEvaluator = pMetricsEvaluator;
+            dimUnitToStringParams.dimUnit = dimUnitsFactor[i].dimUnit;
+            dimUnitToStringParams.pPriv = NULL;
+            nvpwCheckErrors( NVPW_MetricsEvaluator_DimUnitToStringPtr(&dimUnitToStringParams), return PAPI_EMISC );
+
+            char *unitsFormat = (i == 0) ? "%s" : "/%s";
+            strLen = snprintf(tmpMetricUnits + strlen(tmpMetricUnits), PAPI_MAX_STR_LEN - strlen(tmpMetricUnits), unitsFormat, dimUnitToStringParams.pPluralName);
+            if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+                SUBDBG("Failed to fully write dimensional units for a metric.\n");
+                return PAPI_EBUF;
+            }
+        }
+        free(dimUnitsFactor);
+        metricUnits = tmpMetricUnits;
+    }
+
+    int numOfPasses = 0;
+    papi_errno = get_number_of_passes_for_info(pChipName, pMetricsEvaluator, &metricEvalRequest, &numOfPasses);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    char *multipassSupport = "";
+    if (numOfPasses > 1) {
+        multipassSupport = "(multiple passes not supported)";
+    }
+
+    strLen = snprintf(fullMetricDescription, PAPI_HUGE_STR_LEN, "%s. Units=(%s). Numpass=%d%s.", metricDescription, metricUnits, numOfPasses, multipassSupport);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        SUBDBG("Failed to fully write metric description.\n");
+        return PAPI_EBUF;
+    }
+
+    papi_errno = destroy_metrics_evaluator(pMetricsEvaluator);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    return PAPI_OK;
+}
+
+/** @class get_number_of_passes_for_eventsets
+ *  @brief For a metric, get the number of passes. Function is specifically
+ *         designed to work with the start - stop workflow.
+ *
+ *  @param *pChipName
+ *    The device chipname.
+ *  @param *metricEvaluator
+ *    A NVPW_MetricsEvaluator struct.
+ *  @param *metricEvalRequest
+ *    A created metric eval request for the current metric. 
+ *  @param *numOfPasses
+ *    The total number of passes required by the metric.
+*/
+static int get_number_of_passes_for_eventsets(const char *pChipName, const char *metricName, int *numOfPasses)
+{
+    NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params calculateScratchBufferSizeParam = {NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+    calculateScratchBufferSizeParam.pChipName = pChipName;
+    calculateScratchBufferSizeParam.pCounterAvailabilityImage = NULL;
+    calculateScratchBufferSizeParam.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_CalculateScratchBufferSizePtr(&calculateScratchBufferSizeParam), return PAPI_EMISC );
+
+    uint8_t myScratchBuffer[calculateScratchBufferSizeParam.scratchBufferSize];
+    NVPW_CUDA_MetricsEvaluator_Initialize_Params metricEvaluatorInitializeParams = {NVPW_CUDA_MetricsEvaluator_Initialize_Params_STRUCT_SIZE};
+    metricEvaluatorInitializeParams.scratchBufferSize = calculateScratchBufferSizeParam.scratchBufferSize;
+    metricEvaluatorInitializeParams.pScratchBuffer = myScratchBuffer;
+    metricEvaluatorInitializeParams.pChipName = pChipName;
+    metricEvaluatorInitializeParams.pCounterAvailabilityImage = NULL;
+    metricEvaluatorInitializeParams.pCounterDataImage = NULL;
+    metricEvaluatorInitializeParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_MetricsEvaluator_InitializePtr(&metricEvaluatorInitializeParams), return PAPI_EMISC );
+    NVPW_MetricsEvaluator *pMetricsEvaluator = metricEvaluatorInitializeParams.pMetricsEvaluator;
+
+    NVPW_MetricEvalRequest metricEvalRequest;
+    int papi_errno = get_metric_eval_request(pMetricsEvaluator, metricName, &metricEvalRequest);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    } 
+
+    int rawMetricRequestsCount = 0;
+    NVPA_RawMetricRequest *rawMetricRequests = NULL;
+    papi_errno = create_raw_metric_requests(pMetricsEvaluator, &metricEvalRequest, &rawMetricRequests, &rawMetricRequestsCount);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    } 
+
+    papi_errno = destroy_metrics_evaluator(pMetricsEvaluator);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    NVPW_CUDA_RawMetricsConfig_Create_V2_Params rawMetricsConfigCreateParams = {NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE};
+    rawMetricsConfigCreateParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
+    rawMetricsConfigCreateParams.pChipName = pChipName;
+    rawMetricsConfigCreateParams.pCounterAvailabilityImage = NULL;
+    rawMetricsConfigCreateParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_RawMetricsConfig_Create_V2Ptr(&rawMetricsConfigCreateParams), return PAPI_EMISC );
+    // Destory pRawMetricsConfig at the end; otherwise, a memory leak will occur
+    NVPA_RawMetricsConfig *pRawMetricsConfig = rawMetricsConfigCreateParams.pRawMetricsConfig;
+
+    NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE};
+    beginPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    beginPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE};
+    addMetricsParams.pRawMetricsConfig = pRawMetricsConfig;
+    addMetricsParams.pRawMetricRequests = rawMetricRequests;
+    addMetricsParams.numMetricRequests = rawMetricRequestsCount;
+    addMetricsParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = { NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE};
+    endPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    endPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_GetNumPasses_Params rawMetricsConfigGetNumPassesParams = {NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE};
+    rawMetricsConfigGetNumPassesParams.pRawMetricsConfig = pRawMetricsConfig;
+    rawMetricsConfigGetNumPassesParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_GetNumPassesPtr(&rawMetricsConfigGetNumPassesParams), return PAPI_EMISC );
+
+    size_t numNestingLevels = 1;
+    size_t numIsolatedPasses = rawMetricsConfigGetNumPassesParams.numIsolatedPasses;
+    size_t numPipelinedPasses = rawMetricsConfigGetNumPassesParams.numPipelinedPasses;
+    *numOfPasses = numPipelinedPasses + numIsolatedPasses * numNestingLevels;
+
+    NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE};
+    rawMetricsConfigDestroyParams.pRawMetricsConfig = pRawMetricsConfig;
+    rawMetricsConfigDestroyParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *)&rawMetricsConfigDestroyParams), return PAPI_EMISC );
+
+    int i;
+    for (i = 0; i < rawMetricRequestsCount; i++) {
+        free((void *) rawMetricRequests[i].pMetricName);
+    }
+    free(rawMetricRequests);
+
+    return PAPI_OK;
+
+}
+
+
+/** @class get_number_of_passes_for_info
+ *  @brief For a metric, get the number of passes. Function is specifically
+ *         designed to work with the evt_code_to_info workflow.
+ *
+ *  @param *pChipName
+ *    The device chipname.
+ *  @param *metricEvaluator
+ *    A NVPW_MetricsEvaluator struct.
+ *  @param *metricEvalRequest
+ *    A created metric eval request for the current metric. 
+ *  @param *numOfPasses
+ *    The total number of passes required by the metric.
+*/
+static int get_number_of_passes_for_info(const char *pChipName, NVPW_MetricsEvaluator *pMetricsEvaluator, NVPW_MetricEvalRequest *metricEvalRequest, int *numOfPasses)
+{
+    int rawMetricRequestsCount = 0; 
+    NVPA_RawMetricRequest *rawMetricRequests = NULL;
+    int papi_errno = create_raw_metric_requests(pMetricsEvaluator, metricEvalRequest, &rawMetricRequests, &rawMetricRequestsCount);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }  
+
+    NVPW_CUDA_RawMetricsConfig_Create_V2_Params rawMetricsConfigCreateParams = {NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE};
+    rawMetricsConfigCreateParams.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
+    rawMetricsConfigCreateParams.pChipName = pChipName;
+    rawMetricsConfigCreateParams.pCounterAvailabilityImage = NULL;
+    rawMetricsConfigCreateParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_RawMetricsConfig_Create_V2Ptr(&rawMetricsConfigCreateParams), return PAPI_EMISC );
+    // Destory pRawMetricsConfig at the end; otherwise, a memory leak will occur
+    NVPA_RawMetricsConfig *pRawMetricsConfig = rawMetricsConfigCreateParams.pRawMetricsConfig;
+
+    NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE};
+    beginPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    beginPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), return PAPI_EMISC );
+    
+    NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE};
+    addMetricsParams.pRawMetricsConfig = pRawMetricsConfig;
+    addMetricsParams.pRawMetricRequests = rawMetricRequests;
+    addMetricsParams.numMetricRequests = rawMetricRequestsCount;
+    addMetricsParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = { NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE};
+    endPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    endPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_GetNumPasses_Params rawMetricsConfigGetNumPassesParams = {NVPW_RawMetricsConfig_GetNumPasses_Params_STRUCT_SIZE};
+    rawMetricsConfigGetNumPassesParams.pRawMetricsConfig = pRawMetricsConfig;
+    rawMetricsConfigGetNumPassesParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_GetNumPassesPtr(&rawMetricsConfigGetNumPassesParams), return PAPI_EMISC );
+
+    size_t numNestingLevels = 1;  
+    size_t numIsolatedPasses = rawMetricsConfigGetNumPassesParams.numIsolatedPasses;
+    size_t numPipelinedPasses = rawMetricsConfigGetNumPassesParams.numPipelinedPasses;
+    *numOfPasses = numPipelinedPasses + numIsolatedPasses * numNestingLevels;
+
+    NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE};
+    rawMetricsConfigDestroyParams.pRawMetricsConfig = pRawMetricsConfig;
+    rawMetricsConfigDestroyParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *)&rawMetricsConfigDestroyParams), return PAPI_EMISC );
+
+    int i;   
+    for (i = 0; i < rawMetricRequestsCount; i++) {
+        free((void *) rawMetricRequests[i].pMetricName);
+    }
+    free(rawMetricRequests);
+
+    return PAPI_OK;
+}
+
+/** @class get_metric_eval_request
+ *  @brief A simple wrapper for the perfworks api call
+ *         NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest.
+ *
+ *  @param *pMetricsEvaluator
+ *    A NVPW_MetricsEvaluator struct.
+ *  @param *metricName
+ *    The name of the metric you want to convert to a metric eval request.
+ *  @param *pMetricEvalRequest
+ *    Variable to store the created metric eval request.
+*/
+static int get_metric_eval_request(NVPW_MetricsEvaluator *pMetricsEvaluator, const char *metricName, NVPW_MetricEvalRequest *pMetricEvalRequest)
+{
+    NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params convertMetricToEvalRequest = {NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequest_Params_STRUCT_SIZE};
+    convertMetricToEvalRequest.pMetricsEvaluator = pMetricsEvaluator;
+    convertMetricToEvalRequest.pMetricName = metricName;
+    convertMetricToEvalRequest.pMetricEvalRequest = pMetricEvalRequest;
+    convertMetricToEvalRequest.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
+    convertMetricToEvalRequest.pPriv = NULL;
+    nvpwCheckErrors( NVPW_MetricsEvaluator_ConvertMetricNameToMetricEvalRequestPtr(&convertMetricToEvalRequest), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class create_raw_metric_requests
+ *  @brief Create raw metric requests for a metric.
+ *
+ *  @param *pMetricsEvaluator
+ *    A NVPW_MetricsEvaluator struct. 
+ *  @param *metricEvalRequest
+ *    A metric eval request for the metric.
+ *  @param **rawMetricRequests
+ *    Store the raw metric requests for a metric.
+ *  @param *rawMetricRequestsCount
+ *    Total number of raw metric requests created.
+*/
+static int create_raw_metric_requests(NVPW_MetricsEvaluator *pMetricsEvaluator, NVPW_MetricEvalRequest *metricEvalRequest, NVPA_RawMetricRequest **rawMetricRequests, int *rawMetricRequestsCount)
+{
+    NVPW_MetricsEvaluator_GetMetricRawDependencies_Params getMetricRawDependenciesParams = {NVPW_MetricsEvaluator_GetMetricRawDependencies_Params_STRUCT_SIZE};
+    getMetricRawDependenciesParams.pMetricsEvaluator = pMetricsEvaluator;
+    getMetricRawDependenciesParams.pMetricEvalRequests = metricEvalRequest;
+    getMetricRawDependenciesParams.numMetricEvalRequests = 1; // Set to 1 as that is the number of eval requests we will have each time
+    getMetricRawDependenciesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
+    getMetricRawDependenciesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
+    getMetricRawDependenciesParams.ppRawDependencies = NULL;
+    getMetricRawDependenciesParams.ppOptionalRawDependencies = NULL;
+    getMetricRawDependenciesParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_MetricsEvaluator_GetMetricRawDependenciesPtr(&getMetricRawDependenciesParams), return PAPI_EMISC );
+
+    const char **rawDependencies;
+    rawDependencies = (const char **) malloc(getMetricRawDependenciesParams.numRawDependencies * sizeof(char *));
+    if (rawDependencies == NULL) {
+        SUBDBG("Failed to allocate memory for variable rawDependencies.\n");
+        return PAPI_ENOMEM;
+    }   
+    getMetricRawDependenciesParams.ppRawDependencies = rawDependencies;
+    nvpwCheckErrors( NVPW_MetricsEvaluator_GetMetricRawDependenciesPtr(&getMetricRawDependenciesParams), return PAPI_EMISC );
+
+    *rawMetricRequests = (NVPA_RawMetricRequest *) realloc(*rawMetricRequests, (getMetricRawDependenciesParams.numRawDependencies + (*rawMetricRequestsCount)) * sizeof(NVPA_RawMetricRequest));
+    if (rawMetricRequests == NULL) {
+        SUBDBG("Failed to allocate memory for variable tmpRawMetricRequests.\n");
+        return PAPI_ENOMEM;
+    }   
+
+    int i, tmpRawMetricRequestsCount = *rawMetricRequestsCount;
+    for (i = 0; i < getMetricRawDependenciesParams.numRawDependencies; i++) {
+       NVPA_RawMetricRequest rawMetricRequestParams = {NVPA_RAW_METRIC_REQUEST_STRUCT_SIZE};
+       rawMetricRequestParams.pPriv = NULL;
+       rawMetricRequestParams.pMetricName = strdup(rawDependencies[i]);
+       rawMetricRequestParams.isolated = 1;  
+       rawMetricRequestParams.keepInstances = 1;  
+       (*rawMetricRequests)[(*rawMetricRequestsCount)] = rawMetricRequestParams;
+       (*rawMetricRequestsCount)++;
+    }   
+    free(rawDependencies);
+
+    return PAPI_OK;
+}
+
+/** @class get_evaluated_metric_values
+ *  @brief For a user added metric, get the evaluated gpu value.
+ *
+ *  @param *pMetricsEvaluator
+ *    A NVPW_MetricsEvaluator struct. 
+ *  @param *gpu_ctl
+ *    Structure of type cuptip_gpu_state_t which has member variables such as 
+ *    dev_id, rawMetricRequests, numberOfRawMetricRequests, and more.
+ *  @param *evaluatedMetricValues
+ *    Total number of raw metric requests created.
+*/
+static int get_evaluated_metric_values(NVPW_MetricsEvaluator *pMetricsEvaluator, cuptip_gpu_state_t *gpu_ctl, long long *evaluatedMetricValues)
+{
+    int i;
+    for (i = 0; i < gpu_ctl->added_events->count; i++) {
+        NVPW_MetricEvalRequest metricEvalRequest;
+        get_metric_eval_request(pMetricsEvaluator, gpu_ctl->added_events->cuda_evts[i], &metricEvalRequest);
+
+        NVPW_MetricsEvaluator_SetDeviceAttributes_Params setDeviceAttributeParams = {NVPW_MetricsEvaluator_SetDeviceAttributes_Params_STRUCT_SIZE};
+        setDeviceAttributeParams.pMetricsEvaluator = pMetricsEvaluator;
+        setDeviceAttributeParams.pCounterDataImage = (const uint8_t *) gpu_ctl->counterDataImage.data;
+        setDeviceAttributeParams.counterDataImageSize = gpu_ctl->counterDataImage.size;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_SetDeviceAttributesPtr(&setDeviceAttributeParams), return PAPI_EMISC );
+
+        double metricValue;
+        NVPW_MetricsEvaluator_EvaluateToGpuValues_Params evaluateToGpuValuesParams = {NVPW_MetricsEvaluator_EvaluateToGpuValues_Params_STRUCT_SIZE};
+        evaluateToGpuValuesParams.pMetricsEvaluator = pMetricsEvaluator;
+        evaluateToGpuValuesParams.pMetricEvalRequests =  &metricEvalRequest;
+        evaluateToGpuValuesParams.numMetricEvalRequests = 1;
+        evaluateToGpuValuesParams.metricEvalRequestStructSize = NVPW_MetricEvalRequest_STRUCT_SIZE;
+        evaluateToGpuValuesParams.metricEvalRequestStrideSize = sizeof(NVPW_MetricEvalRequest);
+        evaluateToGpuValuesParams.pCounterDataImage = gpu_ctl->counterDataImage.data;
+        evaluateToGpuValuesParams.counterDataImageSize = gpu_ctl->counterDataImage.size;
+        evaluateToGpuValuesParams.rangeIndex = 0;
+        evaluateToGpuValuesParams.isolated = 1;
+        evaluateToGpuValuesParams.pMetricValues = &metricValue;
+        nvpwCheckErrors( NVPW_MetricsEvaluator_EvaluateToGpuValuesPtr(&evaluateToGpuValuesParams), return PAPI_EMISC );
+
+        evaluatedMetricValues[i] = metricValue;
+    }
+
+    return PAPI_OK;
+}
+
+/** @class destroy_metric_evaluator
+  * @brief A simple wrapper for the perfworks api call
+  *        NVPW_MetricsEvaluator_Destroy.
+*/
+static int destroy_metrics_evaluator(NVPW_MetricsEvaluator *pMetricsEvaluator)
+{
+    NVPW_MetricsEvaluator_Destroy_Params metricEvaluatorDestroyParams = {NVPW_MetricsEvaluator_Destroy_Params_STRUCT_SIZE};
+    metricEvaluatorDestroyParams.pMetricsEvaluator = pMetricsEvaluator;
+    metricEvaluatorDestroyParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_MetricsEvaluator_DestroyPtr(&metricEvaluatorDestroyParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/**
+ *  @}
+ ******************************************************************************/
+
+/***************************************************************************//**
+ *  @name Functions necessary for the configuration/profiling stage
+ *  @{
+ */
+
+/** @class start_profiling_session
+ *  @brief Start a profiling session.
+ *
+ *  @param counterDataImage
+ *    Contains the size and data.
+ *  @param counterDataScratchBufferSize
+ *    Contains the size and data.
+ *  @param configImage
+ *    Contains the size and data.
+*/
+static int start_profiling_session(byte_array_t counterDataImage, byte_array_t counterDataScratchBufferSize, byte_array_t configImage)
+{
+    CUpti_Profiler_BeginSession_Params beginSessionParams = {CUpti_Profiler_BeginSession_Params_STRUCT_SIZE};
+    beginSessionParams.counterDataImageSize = counterDataImage.size;
+    beginSessionParams.pCounterDataImage = counterDataImage.data;
+    beginSessionParams.counterDataScratchBufferSize = counterDataScratchBufferSize.size;
+    beginSessionParams.pCounterDataScratchBuffer = counterDataScratchBufferSize.data;
+    beginSessionParams.maxLaunchesPerPass = 1;
+    beginSessionParams.maxRangesPerPass = 1;
+    beginSessionParams.range = CUPTI_UserRange;
+    beginSessionParams.replayMode = CUPTI_UserReplay;
+    beginSessionParams.pPriv = NULL;
+    beginSessionParams.ctx = NULL;
+    cuptiCheckErrors( cuptiProfilerBeginSessionPtr(&beginSessionParams), return PAPI_EMISC );
+
+    CUpti_Profiler_SetConfig_Params setConfigParams = {CUpti_Profiler_SetConfig_Params_STRUCT_SIZE};
+    setConfigParams.pConfig = configImage.data;
+    setConfigParams.configSize = configImage.size;
+    // Only set for Application Replay mode.
+    setConfigParams.passIndex = 0;
+    setConfigParams.minNestingLevel = 1;
+    setConfigParams.numNestingLevels = 1;
+    setConfigParams.targetNestingLevel = 1;
+    setConfigParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerSetConfigPtr(&setConfigParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class get_config_image
+ *  @brief Generate the ConfigImage binary configuration image 
+ *         (file format in memory).
+ *
+ *  @param chipName
+ *    Name of the device begin used.
+ *  @param *pCounterAvailabilityImageData
+ *    Data from cuptiProfilerGetCounterAvailability.
+ *  @param *rawMetricRequests
+ *    A filled in NVPA_RawMetricRequest.
+ *  @para rmr_count
+ *    Number of rawMetricRequests.  
+ *  @param configImage
+ *    Variable to store the generated configImage.
+*/
+static int get_config_image(const char *chipName, const uint8_t *pCounterAvailabilityImageData, NVPA_RawMetricRequest *rawMetricRequests, int rmr_count, byte_array_t *configImage)
+{
+    NVPW_CUDA_RawMetricsConfig_Create_V2_Params rawMetricsConfigCreateParamsV2 = {NVPW_CUDA_RawMetricsConfig_Create_V2_Params_STRUCT_SIZE};
+    rawMetricsConfigCreateParamsV2.activityKind = NVPA_ACTIVITY_KIND_PROFILER;
+    rawMetricsConfigCreateParamsV2.pChipName = chipName;
+    rawMetricsConfigCreateParamsV2.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_RawMetricsConfig_Create_V2Ptr(&rawMetricsConfigCreateParamsV2), return PAPI_EMISC );
+    // Destory pRawMetricsConfig at the end; otherwise, a memory leak will occur
+    NVPA_RawMetricsConfig *pRawMetricsConfig = rawMetricsConfigCreateParamsV2.pRawMetricsConfig;
+
+    // Query counter availability before starting the profiling session
+    if (pCounterAvailabilityImageData) {
+        NVPW_RawMetricsConfig_SetCounterAvailability_Params setCounterAvailabilityParams = {NVPW_RawMetricsConfig_SetCounterAvailability_Params_STRUCT_SIZE};
+	setCounterAvailabilityParams.pPriv = NULL;
+	setCounterAvailabilityParams.pRawMetricsConfig = pRawMetricsConfig;
+	setCounterAvailabilityParams.pCounterAvailabilityImage = pCounterAvailabilityImageData;
+        nvpwCheckErrors( NVPW_RawMetricsConfig_SetCounterAvailabilityPtr(&setCounterAvailabilityParams), return PAPI_EMISC );
+    }
+
+    // NOTE: maxPassCount is being set to 1 as a final safety net to limit metric collection to a single pass.
+    //       Metrics that require multiple passes would fail further down at AddMetrics due to this.
+    //       This failure should never occur as we filter for metrics with multiple passes at get_number_of_passes,
+    //       which occurs before the get_config_image call.
+    NVPW_RawMetricsConfig_BeginPassGroup_Params beginPassGroupParams = {NVPW_RawMetricsConfig_BeginPassGroup_Params_STRUCT_SIZE};
+    beginPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    beginPassGroupParams.maxPassCount = 1;
+    beginPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_BeginPassGroupPtr(&beginPassGroupParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_AddMetrics_Params addMetricsParams = {NVPW_RawMetricsConfig_AddMetrics_Params_STRUCT_SIZE};
+    addMetricsParams.pRawMetricsConfig = pRawMetricsConfig;
+    addMetricsParams.pRawMetricRequests = rawMetricRequests;
+    addMetricsParams.numMetricRequests = rmr_count;
+    addMetricsParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_AddMetricsPtr(&addMetricsParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_EndPassGroup_Params endPassGroupParams = {NVPW_RawMetricsConfig_EndPassGroup_Params_STRUCT_SIZE};
+    endPassGroupParams.pRawMetricsConfig = pRawMetricsConfig;
+    endPassGroupParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_EndPassGroupPtr(&endPassGroupParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_GenerateConfigImage_Params generateConfigImageParams = {NVPW_RawMetricsConfig_GenerateConfigImage_Params_STRUCT_SIZE};
+    generateConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
+    generateConfigImageParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_GenerateConfigImagePtr(&generateConfigImageParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_GetConfigImage_Params getConfigImageParams = {NVPW_RawMetricsConfig_GetConfigImage_Params_STRUCT_SIZE};
+    getConfigImageParams.pRawMetricsConfig = pRawMetricsConfig;
+    getConfigImageParams.bytesAllocated = 0;
+    getConfigImageParams.pBuffer = NULL;
+    getConfigImageParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), return PAPI_EMISC );
+
+    byte_array_t *tmpConfigImage;
+    tmpConfigImage = configImage;
+
+    tmpConfigImage->size = getConfigImageParams.bytesCopied;
+    tmpConfigImage->data = (uint8_t *) calloc(tmpConfigImage->size, sizeof(uint8_t));
+    if (configImage->data == NULL) {
+        SUBDBG("Failed to allocate memory for configImage->data.\n");
+        return PAPI_ENOMEM;
+    }
+
+    getConfigImageParams.bytesAllocated = tmpConfigImage->size;
+    getConfigImageParams.pBuffer = tmpConfigImage->data;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_GetConfigImagePtr(&getConfigImageParams), return PAPI_EMISC );
+
+    NVPW_RawMetricsConfig_Destroy_Params rawMetricsConfigDestroyParams = {NVPW_RawMetricsConfig_Destroy_Params_STRUCT_SIZE};
+    rawMetricsConfigDestroyParams.pRawMetricsConfig = pRawMetricsConfig;
+    rawMetricsConfigDestroyParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_RawMetricsConfig_DestroyPtr((NVPW_RawMetricsConfig_Destroy_Params *)&rawMetricsConfigDestroyParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+
+/** @class get_counter_data_prefix_image
+ *  @brief Generate the counterDataPrefix binary configuration image 
+ *         (file format in memory).
+ *
+ *  @param chipName
+ *    Name of the device begin used.
+ *  @param *rawMetricRequests
+ *    A filled in NVPA_RawMetricRequest.
+ *  @param rmr_count
+ *    Number of rawMetricRequests.  
+ *  @param obtainCounterDataPrefixImage
+ *    Variable to store the generated counterDataPrefix.
+*/
+static int get_counter_data_prefix_image(const char *chipName, NVPA_RawMetricRequest *rawMetricRequests, int rmr_count, byte_array_t *counterDataPrefixImage)
+{
+    NVPW_CUDA_CounterDataBuilder_Create_Params counterDataBuilderCreateParams = {NVPW_CUDA_CounterDataBuilder_Create_Params_STRUCT_SIZE};
+    counterDataBuilderCreateParams.pChipName = chipName;
+    counterDataBuilderCreateParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CUDA_CounterDataBuilder_CreatePtr(&counterDataBuilderCreateParams), return PAPI_EMISC );
+
+    NVPW_CounterDataBuilder_AddMetrics_Params builderAddMetricsParams = {NVPW_CounterDataBuilder_AddMetrics_Params_STRUCT_SIZE};
+    builderAddMetricsParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
+    builderAddMetricsParams.pRawMetricRequests = rawMetricRequests;
+    builderAddMetricsParams.numMetricRequests = rmr_count;
+    builderAddMetricsParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CounterDataBuilder_AddMetricsPtr(&builderAddMetricsParams), return PAPI_EMISC );
+
+    NVPW_CounterDataBuilder_GetCounterDataPrefix_Params getCounterDataPrefixParams = {NVPW_CounterDataBuilder_GetCounterDataPrefix_Params_STRUCT_SIZE};
+    getCounterDataPrefixParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
+    getCounterDataPrefixParams.bytesAllocated = 0;
+    getCounterDataPrefixParams.pBuffer = NULL;
+    getCounterDataPrefixParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), return PAPI_EMISC );
+
+    byte_array_t *tmpCounterDataPrefixImage;
+    tmpCounterDataPrefixImage = counterDataPrefixImage;
+    tmpCounterDataPrefixImage->size = getCounterDataPrefixParams.bytesCopied;
+    tmpCounterDataPrefixImage->data = (uint8_t *) calloc(tmpCounterDataPrefixImage->size, sizeof(uint8_t));
+    if (tmpCounterDataPrefixImage->data == NULL) {
+        SUBDBG("Failed to allocate memory for tmpCounterDataPrefixImage->data.\n");
+        return PAPI_ENOMEM;
+    }
+
+    getCounterDataPrefixParams.bytesAllocated = tmpCounterDataPrefixImage->size;
+    getCounterDataPrefixParams.pBuffer = tmpCounterDataPrefixImage->data;
+    nvpwCheckErrors( NVPW_CounterDataBuilder_GetCounterDataPrefixPtr(&getCounterDataPrefixParams), return PAPI_EMISC );
+
+    NVPW_CounterDataBuilder_Destroy_Params counterDataBuilderDestroyParams = {NVPW_CounterDataBuilder_Destroy_Params_STRUCT_SIZE};
+    counterDataBuilderDestroyParams.pCounterDataBuilder = counterDataBuilderCreateParams.pCounterDataBuilder;
+    counterDataBuilderDestroyParams.pPriv = NULL;
+    nvpwCheckErrors( NVPW_CounterDataBuilder_DestroyPtr((NVPW_CounterDataBuilder_Destroy_Params *)&counterDataBuilderDestroyParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class get_counter_data_image
+ *  @brief Create a counterDataImage to be used for metric evaluation. 
+ *
+ *  @param counterDataPrefixImage
+ *    Struct containing the size and data of the counterDataPrefix
+ *    binary configuration image.
+ *  @param counterDataScratchBuffer
+ *    Struct to store the size and data of the scratch buffer.
+ *  @param counterDataImage
+ *    Struct to store the size and data of the counterDataImage.
+*/
+static int get_counter_data_image(byte_array_t counterDataPrefixImage, byte_array_t *counterDataScratchBuffer, byte_array_t *counterDataImage)
+{
+    CUpti_Profiler_CounterDataImageOptions counterDataImageOptions;
+    counterDataImageOptions.pCounterDataPrefix = counterDataPrefixImage.data;
+    counterDataImageOptions.counterDataPrefixSize = counterDataPrefixImage.size;
+    counterDataImageOptions.maxNumRanges = 1;
+    counterDataImageOptions.maxNumRangeTreeNodes = 1; // Why do we do this?
+    counterDataImageOptions.maxRangeNameLength = 64; 
+
+    // Calculate size of counterDataImage based on counterDataPrefixImage and options.
+    CUpti_Profiler_CounterDataImage_CalculateSize_Params calculateSizeParams = {CUpti_Profiler_CounterDataImage_CalculateSize_Params_STRUCT_SIZE};
+    calculateSizeParams.pOptions = &counterDataImageOptions;
+    calculateSizeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
+    calculateSizeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerCounterDataImageCalculateSizePtr(&calculateSizeParams), return PAPI_EMISC );
+
+   // Initialize counterDataImage.
+    CUpti_Profiler_CounterDataImage_Initialize_Params initializeParams = {CUpti_Profiler_CounterDataImage_Initialize_Params_STRUCT_SIZE};
+    initializeParams.pOptions = &counterDataImageOptions;
+    initializeParams.sizeofCounterDataImageOptions = CUpti_Profiler_CounterDataImageOptions_STRUCT_SIZE;
+    initializeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
+    initializeParams.pPriv = NULL;
+
+    byte_array_t *tmpCounterDataImage;
+    tmpCounterDataImage = counterDataImage;
+
+    tmpCounterDataImage->size = calculateSizeParams.counterDataImageSize;
+    tmpCounterDataImage->data = (uint8_t *) calloc(tmpCounterDataImage->size, sizeof(uint8_t));
+    if (counterDataImage->data  == NULL) {
+        SUBDBG("Failed to allocate memory for counterDataImage->data.\n");
+        return PAPI_ENOMEM;
+    }
+
+    initializeParams.pCounterDataImage = counterDataImage->data;
+    cuptiCheckErrors( cuptiProfilerCounterDataImageInitializePtr(&initializeParams), return PAPI_EMISC );
+
+    // Calculate scratchBuffer size based on counterDataImage size and counterDataImage.
+    CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params scratchBufferSizeParams = {CUpti_Profiler_CounterDataImage_CalculateScratchBufferSize_Params_STRUCT_SIZE};
+    scratchBufferSizeParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
+    scratchBufferSizeParams.pCounterDataImage = counterDataImage->data;
+    scratchBufferSizeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerCounterDataImageCalculateScratchBufferSizePtr(&scratchBufferSizeParams), return PAPI_EMISC );
+
+    // Create counterDataScratchBuffer.
+    byte_array_t *tmpCounterDataScratchBuffer;
+    tmpCounterDataScratchBuffer = counterDataScratchBuffer;
+    tmpCounterDataScratchBuffer->size = scratchBufferSizeParams.counterDataScratchBufferSize;
+    tmpCounterDataScratchBuffer->data = (uint8_t *) calloc(tmpCounterDataScratchBuffer->size, sizeof(uint8_t));
+    if (counterDataScratchBuffer->data == NULL) {
+        SUBDBG("Failed to allocate memory for counterDataScratchBuffer->data.\n");
+        return PAPI_ENOMEM;
+    }   
+
+    // Initialize counterDataScratchBuffer.
+    CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params initScratchBufferParams = { CUpti_Profiler_CounterDataImage_InitializeScratchBuffer_Params_STRUCT_SIZE };
+    initScratchBufferParams.counterDataImageSize = calculateSizeParams.counterDataImageSize;
+    initScratchBufferParams.pCounterDataImage = counterDataImage->data; //uint8_t* pCounterDataImage
+    initScratchBufferParams.counterDataScratchBufferSize = counterDataScratchBuffer->size;
+    initScratchBufferParams.pCounterDataScratchBuffer = counterDataScratchBuffer->data;
+    initScratchBufferParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerCounterDataImageInitializeScratchBufferPtr(&initScratchBufferParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class end_profiling_session
+ *  @brief End the started profiling session.
+*/
+static int end_profiling_session(void)
+{
+    int papi_errno = disable_profiling();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = pop_range();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = flush_data();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = unset_config();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    papi_errno = end_session();
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
+    return PAPI_OK;
+}
+
+/**
+ *  @}
+ ******************************************************************************/
+
+/***************************************************************************//**
+ *  @name   Wrappers for cupti profiler api calls
+ *  @{
+ */
+
+/** @class initialize_cupti_profiler_api
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerInitialize.
+*/
+static int initialize_cupti_profiler_api(void)
+{
+    COMPDBG("Entering.\n");
+
+    CUpti_Profiler_Initialize_Params profilerInitializeParams = {CUpti_Profiler_Initialize_Params_STRUCT_SIZE};
+    profilerInitializeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerInitializePtr(&profilerInitializeParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class deinitialize_cupti_profiler_api
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerDeInitialize.
+*/
+static int deinitialize_cupti_profiler_api(void)
+{
+    COMPDBG("Entering.\n");
+
+    CUpti_Profiler_DeInitialize_Params profilerDeInitializeParams = {CUpti_Profiler_DeInitialize_Params_STRUCT_SIZE};
+    profilerDeInitializeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerDeInitializePtr(&profilerDeInitializeParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class enable_profiling
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerEnableProfiling.
+*/
+static int enable_profiling(void)
+{
+   CUpti_Profiler_EnableProfiling_Params enableProfilingParams = {CUpti_Profiler_EnableProfiling_Params_STRUCT_SIZE};
+   enableProfilingParams.ctx = NULL; // If NULL, the current CUcontext is used
+   enableProfilingParams.pPriv = NULL;
+   cuptiCheckErrors( cuptiProfilerEnableProfilingPtr(&enableProfilingParams), return PAPI_EMISC );
+
+   return PAPI_OK;
+}
+
+/** @class begin_pass
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerBeginPass.
+*/
+int begin_pass(void)
+{
+    CUpti_Profiler_BeginPass_Params beginPassParams = {CUpti_Profiler_BeginPass_Params_STRUCT_SIZE};
+    beginPassParams.ctx = NULL; // If NULL, the current CUcontext is used
+    beginPassParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerBeginPassPtr(&beginPassParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class end_pass
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerEndPass.
+*/
+static int end_pass(void)
+{
+    CUpti_Profiler_EndPass_Params endPassParams = {CUpti_Profiler_EndPass_Params_STRUCT_SIZE};
+    endPassParams.ctx = NULL; // If NULL, the current CUcontext is used
+    endPassParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerEndPassPtr(&endPassParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class push_range
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerPushRange.
+*/
+static int push_range(const char *pRangeName)
+{
+    CUpti_Profiler_PushRange_Params pushRangeParams = {CUpti_Profiler_PushRange_Params_STRUCT_SIZE};
+    pushRangeParams.pRangeName = pRangeName;
+    pushRangeParams.rangeNameLength = strlen(pRangeName);
+    pushRangeParams.ctx = NULL; // If NULL, the current CUcontext is used
+    pushRangeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerPushRangePtr(&pushRangeParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class pop_range
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerPopRange.
+*/
+static int pop_range(void)
+{
+    CUpti_Profiler_PopRange_Params popRangeParams = {CUpti_Profiler_PopRange_Params_STRUCT_SIZE};
+    popRangeParams.ctx = NULL; // If NULL, the current CUcontext is used
+    popRangeParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerPopRangePtr(&popRangeParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class flush_data
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerFlushCounterData.
+  *
+  *        Note that Flush is required to ensure data is returned from the 
+  *        device when running User Replay mode.
+*/
+static int flush_data(void)
+{
+    CUpti_Profiler_FlushCounterData_Params flushCounterDataParams = {CUpti_Profiler_FlushCounterData_Params_STRUCT_SIZE};
+    flushCounterDataParams.ctx = NULL; // If NULL, the current CUcontext is used
+    flushCounterDataParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerFlushCounterDataPtr(&flushCounterDataParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class disable_profiling
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerDisableProfiling.
+*/
+static int disable_profiling(void)
+{
+    CUpti_Profiler_DisableProfiling_Params disableProfilingParams = {CUpti_Profiler_DisableProfiling_Params_STRUCT_SIZE};
+    disableProfilingParams.ctx = NULL; // If NULL, the current CUcontext is used
+    disableProfilingParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerDisableProfilingPtr(&disableProfilingParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class unset_config
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerUnsetConfig.
+*/
+static int unset_config(void)
+{
+    CUpti_Profiler_UnsetConfig_Params unsetConfigParams = {CUpti_Profiler_UnsetConfig_Params_STRUCT_SIZE};
+    unsetConfigParams.ctx = NULL; // If NULL, the current CUcontext is used
+    unsetConfigParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerUnsetConfigPtr(&unsetConfigParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/** @class end_session
+  * @brief A simple wrapper for the cupti profiler api call
+  *        cuptiProfilerEndSession.
+*/
+static int end_session(void)
+{
+    CUpti_Profiler_EndSession_Params endSessionParams = {CUpti_Profiler_EndSession_Params_STRUCT_SIZE};
+    endSessionParams.ctx = NULL; // If NULL, the current CUcontext is used
+    endSessionParams.pPriv = NULL;
+    cuptiCheckErrors( cuptiProfilerEndSessionPtr(&endSessionParams), return PAPI_EMISC );
+
+    return PAPI_OK;
+}
+
+/**
+ *  @}
+ ******************************************************************************/
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_profiler.h 7.2.0-1/src/components/cuda/cupti_profiler.h
--- 7.2.0~b2-1/src/components/cuda/cupti_profiler.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_profiler.h	2025-06-25 22:38:10.000000000 +0000
@@ -24,14 +24,14 @@ int cuptip_init(void);
 int cuptip_shutdown(void);
 
 /* native event interfaces */
-int cuptip_evt_enum(uint64_t *event_code, int modifier);
-int cuptip_evt_code_to_descr(uint64_t event_code, char *descr, int len);
-int cuptip_evt_name_to_code(const char *name, uint64_t *event_code);
-int cuptip_evt_code_to_name(uint64_t event_code, char *name, int len);
-int cuptip_evt_code_to_info(uint64_t event_code, PAPI_event_info_t *info);
+int cuptip_evt_enum(uint32_t *event_code, int modifier);
+int cuptip_evt_code_to_descr(uint32_t event_code, char *descr, int len);
+int cuptip_evt_name_to_code(const char *name, uint32_t *event_code);
+int cuptip_evt_code_to_name(uint32_t event_code, char *name, int len);
+int cuptip_evt_code_to_info(uint32_t event_code, PAPI_event_info_t *info);
 
 /* profiling context handling interfaces */
-int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate,  uint64_t *events_id, int num_events);
+int cuptip_ctx_create(cuptic_info_t thr_info, cuptip_control_t *pstate,  uint32_t *events_id, int num_events);
 int cuptip_ctx_destroy(cuptip_control_t *pstate);
 int cuptip_ctx_start(cuptip_control_t state);
 int cuptip_ctx_stop(cuptip_control_t state);
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_utils.c 7.2.0-1/src/components/cuda/cupti_utils.c
--- 7.2.0~b2-1/src/components/cuda/cupti_utils.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_utils.c	2025-06-25 22:38:10.000000000 +0000
@@ -16,17 +16,20 @@
 
 int cuptiu_event_table_create_init_capacity(int capacity, int sizeof_rec, cuptiu_event_table_t **pevt_table)
 {
-    cuptiu_event_table_t *evt_table = (cuptiu_event_table_t *) papi_malloc(sizeof(cuptiu_event_table_t));
+    cuptiu_event_table_t *evt_table = (cuptiu_event_table_t *) malloc(sizeof(cuptiu_event_table_t));
     if (evt_table == NULL) {
         goto fn_fail;
     }
 
     evt_table->capacity = capacity;
     evt_table->count = 0;
+    evt_table->event_stats_count = 0;
+    
     if (htable_init(&(evt_table->htable)) != HTABLE_SUCCESS) {
         cuptiu_event_table_destroy(&evt_table);
         goto fn_fail;
     }
+    
     *pevt_table = evt_table;
     return 0;
 fn_fail:
@@ -44,7 +47,8 @@ void cuptiu_event_table_destroy(cuptiu_e
         htable_shutdown(evt_table->htable);
         evt_table->htable = NULL;
     }
-    papi_free(evt_table);
+
+    free(evt_table);
     *pevt_table = NULL;
 }
 
@@ -77,3 +81,56 @@ int cuptiu_files_search_in_path(const ch
     }
     return count;
 }
+
+// Initialize the stat Stringvector
+void init_vector(StringVector *vec) {
+    vec->arrayMetricStatistics = NULL;
+    vec->size = 0;
+    vec->capacity = 0;
+}
+
+// Add a string to the vector 
+int push_back(StringVector *vec, const char *str) {
+    size_t i;
+    for (i = 0; i < vec->size; i++) {
+      if (strcmp(vec->arrayMetricStatistics[i], str) == 0) {
+          return PAPI_OK;        
+      }
+    }
+
+    // Resize if necessary
+    if (vec->size == vec->capacity) {
+        size_t new_capacity = (vec->capacity == 0) ? 4 : vec->capacity * 2;
+        char **new_data = realloc(vec->arrayMetricStatistics, new_capacity * sizeof(char*));
+        if (new_data == NULL) {
+            ERRDBG ("Memory allocation failed\n");
+            return PAPI_ENOMEM;
+        }
+        vec->arrayMetricStatistics = new_data;
+        vec->capacity = new_capacity;
+    }
+
+    // Allocate memory for the new string and copy it
+    vec->arrayMetricStatistics[vec->size] = malloc(strlen(str) + 1); 
+    if (vec->arrayMetricStatistics[vec->size] == NULL) {
+        ERRDBG ("Memory allocation failed\n");
+        return PAPI_ENOMEM;
+    }
+    int strLen = snprintf(vec->arrayMetricStatistics[vec->size], strlen(str) + 1, "%s", str);
+    if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            SUBDBG("Failed to fully write added Cuda native event name.\n");
+            return PAPI_ENOMEM;
+    }
+    
+    vec->size++; // Increase the size
+    return PAPI_OK;
+}
+
+// Free the memory used by the vector
+void free_vector(StringVector *vec) {
+    for (size_t i = 0; i < vec->size; i++) {
+        free(vec->arrayMetricStatistics[i]); 
+    }
+    free(vec->arrayMetricStatistics); 
+    vec->arrayMetricStatistics = NULL;
+}
diff -pruN 7.2.0~b2-1/src/components/cuda/cupti_utils.h 7.2.0-1/src/components/cuda/cupti_utils.h
--- 7.2.0~b2-1/src/components/cuda/cupti_utils.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/cupti_utils.h	2025-06-25 22:38:10.000000000 +0000
@@ -16,29 +16,37 @@
 
 typedef int64_t cuptiu_bitmap_t;
 typedef int (*cuptiu_dev_get_map_cb)(uint64_t event_id, int *dev_id);
-typedef NVPW_CUDA_MetricsContext_Create_Params MCCP_t;
+
+typedef struct {
+    char **arrayMetricStatistics ;   
+    size_t size;   
+    size_t capacity;
+} StringVector;
 
 typedef struct event_record_s {
     char name[PAPI_2MAX_STR_LEN];
+    char basenameWithStatReplaced[PAPI_2MAX_STR_LEN];
     char desc[PAPI_HUGE_STR_LEN];
+    StringVector * stat;
     cuptiu_bitmap_t device_map;
 } cuptiu_event_t;
 
 typedef struct gpu_record_s {
-    char chip_name[PAPI_MIN_STR_LEN];
-    MCCP_t *pmetricsContextCreateParams;
-    int num_metrics;
-    const char* const* metric_names;
+    char chipName[PAPI_MIN_STR_LEN];
+    int totalMetricCount;
+    char **metricNames;
 } gpu_record_t;
 
 typedef struct event_table_s {
-    int count;
+    unsigned int count;
+    unsigned int event_stats_count;
     unsigned int capacity;
     char cuda_evts[30][PAPI_2MAX_STR_LEN];
     int cuda_devs[30];
     int evt_pos[30];
     gpu_record_t *avail_gpu_info;
     cuptiu_event_t *events;
+    StringVector   *event_stats;
     void *htable;
 } cuptiu_event_table_t;
 
@@ -46,6 +54,11 @@ typedef struct event_table_s {
 int cuptiu_event_table_create_init_capacity(int capacity, int sizeof_rec, cuptiu_event_table_t **pevt_table);
 void cuptiu_event_table_destroy(cuptiu_event_table_t **pevt_table);
 
+/* These functions handle list of strings for statistics qualifiers */
+void init_vector(StringVector *vec);
+int push_back(StringVector *vec, const char *str);
+void free_vector(StringVector *vec);
+
 /* Utility to locate a file in a given path */
 #define CUPTIU_MAX_FILES 100
 int cuptiu_files_search_in_path(const char *file_name, const char *search_path, char **file_paths);
diff -pruN 7.2.0~b2-1/src/components/cuda/htable.h 7.2.0-1/src/components/cuda/htable.h
--- 7.2.0~b2-1/src/components/cuda/htable.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/htable.h	2025-06-25 22:38:10.000000000 +0000
@@ -121,7 +121,7 @@ htable_insert(void *handle, const char *
     return htable_errno;
   fn_fail:
     if (entry) {
-        papi_free(entry);
+        free(entry);
     }
     goto fn_exit;
 }
@@ -196,13 +196,13 @@ create_table(uint64_t size, struct hash_
 {
     int htable_errno = HTABLE_SUCCESS;
 
-    *table = papi_calloc(1, sizeof(**table));
+    *table = calloc(1, sizeof(**table));
     if (table == NULL) {
         htable_errno = HTABLE_ENOMEM;
         goto fn_exit;
     }
 
-    (*table)->buckets = papi_calloc(size, sizeof(*(*table)->buckets));
+    (*table)->buckets = calloc(size, sizeof(*(*table)->buckets));
     if ((*table)->buckets == NULL) {
         htable_errno = HTABLE_ENOMEM;
         goto fn_exit;
@@ -220,11 +220,11 @@ destroy_table(struct hash_table *table)
     int htable_errno = HTABLE_SUCCESS;
 
     if (table && table->buckets) {
-        papi_free(table->buckets);
+        free(table->buckets);
     }
 
     if (table) {
-        papi_free(table);
+        free(table);
     }
 
     return htable_errno;
@@ -258,7 +258,7 @@ move_table(struct hash_table *new_table,
     old_table->size = new_table->size;
     old_table->buckets = new_table->buckets;
     new_table->buckets = NULL;
-    papi_free(old_buckets);
+    free(old_buckets);
 
     return htable_errno;
 }
@@ -322,7 +322,7 @@ create_table_entry(const char *key, void
 {
     int htable_errno = HTABLE_SUCCESS;
 
-    *entry = papi_calloc(1, sizeof(**entry));
+    *entry = calloc(1, sizeof(**entry));
     if (*entry == NULL) {
         return HTABLE_ENOMEM;
     }
@@ -337,8 +337,8 @@ int
 destroy_table_entry(struct hash_table_entry *entry)
 {
     int htable_errno = HTABLE_SUCCESS;
-    papi_free(entry->key);
-    papi_free(entry);
+    free(entry->key);
+    free(entry);
     return htable_errno;
 }
 
diff -pruN 7.2.0~b2-1/src/components/cuda/lcuda_debug.h 7.2.0-1/src/components/cuda/lcuda_debug.h
--- 7.2.0~b2-1/src/components/cuda/lcuda_debug.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/lcuda_debug.h	2025-06-25 22:38:10.000000000 +0000
@@ -32,7 +32,10 @@
 /* Log cuda driver and runtime calls */
 #define LOGCUDACALL(format, args...) SUBDBG("CUDACALL: " format, ## args);
 
-/* Log cupti and perfworks calls */
+/* Log cupti calls */
 #define LOGCUPTICALL(format, args...) SUBDBG("CUPTICALL: " format, ## args);
 
+/* Log perfworks calls */
+#define LOGPERFWORKSCALL(format, args...) SUBDBG("PERFWORKSCALL: " format, ## args);
+
 #endif  /* __LCUDA_DEBUG_H__ */
diff -pruN 7.2.0~b2-1/src/components/cuda/linux-cuda.c 7.2.0-1/src/components/cuda/linux-cuda.c
--- 7.2.0~b2-1/src/components/cuda/linux-cuda.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/linux-cuda.c	2025-06-25 22:38:10.000000000 +0000
@@ -42,6 +42,7 @@ static int cuda_init_thread(hwd_context_
 static int cuda_init_control_state(hwd_control_state_t *ctl);
 static int cuda_shutdown_thread(hwd_context_t *ctx);
 static int cuda_shutdown_component(void);
+static int cuda_init_comp_presets(void);
 
 /* set and update component state */
 static int cuda_update_control_state(hwd_control_state_t *ctl,
@@ -80,7 +81,7 @@ typedef struct {
     unsigned int overflow_signal;
     unsigned int attached;
     int component_id;
-    uint64_t *events_id;
+    uint32_t *events_id;
     cuptid_info_t info;
     /* struct holding read count, gpu_ctl, etc. */
     cuptip_control_t cuptid_ctx;
@@ -165,8 +166,7 @@ static int cuda_shutdown_component(void)
 
 static int cuda_init_private(void)
 {
-    int papi_errno = PAPI_OK, len, count = 0;
-    const char *disabled_reason;
+    int papi_errno = PAPI_OK;
 
     _papi_hwi_lock(COMPONENT_LOCK);
     SUBDBG("ENTER\n");
@@ -174,39 +174,77 @@ static int cuda_init_private(void)
     if (_cuda_vector.cmp_info.initialized) {
         SUBDBG("Skipping cuda_init_private, as the Cuda event table has already been initialized.\n");
         goto fn_exit;
+    } 
+
+    int strLen = snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MIN_STR_LEN, "%s", "");
+    if (strLen < 0 || strLen >= PAPI_MIN_STR_LEN) {
+        SUBDBG("Failed to fully write initial disabled_reason.\n");
+    }
+
+    strLen = snprintf(_cuda_vector.cmp_info.partially_disabled_reason, PAPI_MIN_STR_LEN, "%s", "");
+    if (strLen < 0 || strLen >= PAPI_MIN_STR_LEN) {
+         SUBDBG("Failed to fully write initial partially_disabled_reason.\n");
     }
 
     papi_errno = cuptid_init();
     if (papi_errno != PAPI_OK) {
-        /* get and assign the string literal for the disabled reason */
-        cuptid_disabled_reason_get(&disabled_reason);
-        len = snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "%s", disabled_reason);
-        if (len < 0 || len > PAPI_MAX_STR_LEN) {
-            SUBDBG("The disabled reason has been truncated.\n");
+        // Get last error message
+        const char *err_string;
+        cuptid_err_get_last(&err_string);
+        // Cuda component is partially disabled
+        if (papi_errno == PAPI_PARTIAL) {
+            _cuda_vector.cmp_info.partially_disabled = 1;
+            strLen = snprintf(_cuda_vector.cmp_info.partially_disabled_reason, PAPI_HUGE_STR_LEN, "%s", err_string);
+            if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+                SUBDBG("Failed to fully write the partially disabled reason.\n");
+            }
+            // Reset variable that holds error code
+            papi_errno = PAPI_OK; 
+        }
+        // Cuda component is disabled
+        else {
+            strLen = snprintf(_cuda_vector.cmp_info.disabled_reason, PAPI_HUGE_STR_LEN, "%s", err_string);
+            if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+                SUBDBG("Failed to fully write the disabled reason.\n");
+            }
+            goto fn_fail;
         }
-        goto fn_fail;
     }
 
-    strcpy(_cuda_vector.cmp_info.disabled_reason, "");
-
-    /* get the number of native events count */
+    // Get the metric count found on a machine
+    int count = 0;
     papi_errno = cuda_get_evt_count(&count);
+    if (papi_errno != PAPI_OK) {
+        goto fn_fail;
+    }
     _cuda_vector.cmp_info.num_native_events = count;
 
-  fn_exit:
     _cuda_vector.cmp_info.initialized = 1;
-    _cuda_vector.cmp_info.disabled = papi_errno;
-    SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
-    _papi_hwi_unlock(COMPONENT_LOCK);
-    return papi_errno;
-  fn_fail:
-    goto fn_exit;
+
+    fn_exit:
+      _cuda_vector.cmp_info.disabled = papi_errno;
+      SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
+      _papi_hwi_unlock(COMPONENT_LOCK);
+      return papi_errno;
+    fn_fail:
+      goto fn_exit;
 }
 
 static int check_n_initialize(void)
 {
     if (!_cuda_vector.cmp_info.initialized) {
-        return cuda_init_private();
+        int papi_errno = cuda_init_private();
+        if( PAPI_OK != papi_errno ) {
+            return papi_errno;
+        }
+
+        // Setup the presets.
+        papi_errno = cuda_init_comp_presets();
+        if( PAPI_OK != papi_errno ) {
+            return papi_errno;
+        }
+
+        return papi_errno;
     }
     return _cuda_vector.cmp_info.disabled;
 }
@@ -218,8 +256,8 @@ static int cuda_ntv_enum_events(unsigned
     if (papi_errno != PAPI_OK) {
         goto fn_exit;
     }
-   
-    uint64_t code = *(uint64_t *) event_code;
+
+    uint32_t code = *(uint32_t *) event_code;
     papi_errno = cuptid_evt_enum(&code, modifier);
     *event_code = (unsigned int) code;
     
@@ -236,8 +274,8 @@ static int cuda_ntv_name_to_code(const c
     if (papi_errno != PAPI_OK) {
         goto fn_exit;
     }
-   
-    uint64_t code;
+
+    uint32_t code;
     papi_errno = cuptid_evt_name_to_code(name, &code);
     *event_code = (unsigned int) code;
 
@@ -255,7 +293,7 @@ static int cuda_ntv_code_to_name(unsigne
         return papi_errno;
     }
 
-    papi_errno = cuptid_evt_code_to_name((uint64_t) event_code, name, len);
+    papi_errno = cuptid_evt_code_to_name((uint32_t) event_code, name, len);
 
     fn_exit:
         SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
@@ -272,7 +310,7 @@ static int cuda_ntv_code_to_descr(unsign
         goto fn_fail;
     }
 
-    papi_errno = cuptid_evt_code_to_descr((uint64_t) event_code, descr, len);
+    papi_errno = cuptid_evt_code_to_descr((uint32_t) event_code, descr, len);
 
 fn_exit:
     SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
@@ -289,7 +327,7 @@ static int cuda_ntv_code_to_info(unsigne
         goto fn_fail;
     }
 
-    papi_errno = cuptid_evt_code_to_info((uint64_t) event_code, info);
+    papi_errno = cuptid_evt_code_to_info((uint32_t) event_code, info);
 
 fn_exit:
     SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
@@ -317,6 +355,43 @@ static int cuda_shutdown_thread(hwd_cont
     return PAPI_OK;
 }
 
+static int cuda_init_comp_presets(void)
+{
+    SUBDBG("ENTER: Init CUDA component presets.\n");
+    int cidx = _cuda_vector.cmp_info.CmpIdx;
+    char *cname = _cuda_vector.cmp_info.name;
+
+    /* Setup presets. */
+    char arch_name[PAPI_2MAX_STR_LEN];
+    int devIdx = -1;
+    int numDevices = 0;
+
+    int retval = cuptid_device_get_count(&numDevices);
+    if ( retval != PAPI_OK ) {
+        return PAPI_EMISC;
+    }
+
+    /* Load preset table for every device type available on the system.
+     * As long as one of the cards has presets defined, then they should
+     * be available. */
+    for( devIdx = 0; devIdx < numDevices; ++devIdx ) {
+        retval = cuptid_get_chip_name(devIdx, arch_name);
+        if ( retval == PAPI_OK ) {
+            break;
+        }
+    }
+
+    if ( devIdx > -1  && devIdx < numDevices ) {
+        retval = _papi_load_preset_table_component( cname, arch_name, cidx );
+        if ( retval != PAPI_OK ) {
+            SUBDBG("EXIT: Failed to init CUDA component presets.\n");
+            return retval;
+        }
+    }
+
+    return PAPI_OK;
+}
+
 static int cuda_init_control_state(hwd_control_state_t __attribute__((unused)) *ctl)
 {
     COMPDBG("Entering.\n");
@@ -377,13 +452,6 @@ struct event_map_item {
     int frontend_idx;
 };
 
-static int compare(const void *a, const void *b)
-{
-    struct event_map_item *A = (struct event_map_item *) a;
-    struct event_map_item *B = (struct event_map_item *) b;
-    return  A->event_id - B->event_id;
-}
-
 int update_native_events(cuda_control_t *ctl, NativeInfo_t *ntv_info,
                          int ntv_count)
 {
@@ -391,14 +459,19 @@ int update_native_events(cuda_control_t
     struct event_map_item sorted_events[PAPI_CUDA_MAX_COUNTERS];
 
     if (ntv_count != ctl->num_events) {
-        ctl->events_id = papi_realloc(ctl->events_id,
-                                      ntv_count * sizeof(*ctl->events_id));
-        if (ctl->events_id == NULL) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_fail;
-        }
-
         ctl->num_events = ntv_count;
+        if (ntv_count == 0) {
+            free(ctl->events_id);
+            ctl->events_id = NULL;
+            goto fn_exit;
+        }
+        else {
+            ctl->events_id = realloc(ctl->events_id, ntv_count * sizeof(*ctl->events_id));
+            if (ctl->events_id == NULL) {
+                papi_errno = PAPI_ENOMEM;
+                goto fn_fail;
+            }
+        }
     }
 
     int i;
@@ -437,14 +510,12 @@ static int cuda_start(hwd_context_t *ctx
     int papi_errno, i;
     cuda_context_t *cuda_ctx = (cuda_context_t *) ctx;
     cuda_control_t *cuda_ctl = (cuda_control_t *) ctl;
-   
-    /* will need to flesh this out more and decide if I want to keep this, may not need it 
-    if (cuda_ctx->state & CUDA_EVENTS_OPENED) {
+
+    if (cuda_ctx->state == CUDA_EVENTS_RUNNING) {
         SUBDBG("Error! Cannot PAPI_start more than one eventset at a time for every component.");
-        papi_errno = PAPI_ECNFLCT;
+        papi_errno = PAPI_EISRUN;
         goto fn_fail;
     }
-    */
 
     papi_errno = cuptid_ctx_create(cuda_ctl->info, &(cuda_ctl->cuptid_ctx), cuda_ctl->events_id, cuda_ctl->num_events);
     if (papi_errno != PAPI_OK)
@@ -462,7 +533,6 @@ static int cuda_start(hwd_context_t *ctx
        SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
        return papi_errno;
    fn_fail:
-       /* same as above may need to flesh this out more. */
        cuda_ctx->state = CUDA_EVENTS_STOPPED;
        goto fn_exit;
 }
@@ -587,7 +657,7 @@ static int cuda_cleanup_eventset(hwd_con
     }
 
     /* free int array of event id's and reset number of events */
-    papi_free(cuda_ctl->events_id);
+    free(cuda_ctl->events_id);
     cuda_ctl->events_id = NULL;
     cuda_ctl->num_events = 0;
 
@@ -603,7 +673,7 @@ static int cuda_cleanup_eventset(hwd_con
 */
 static int cuda_get_evt_count(int *count)
 {
-    uint64_t event_code = 0;
+    uint32_t event_code = 0;
 
     if (cuptid_evt_enum(&event_code, PAPI_ENUM_FIRST) == PAPI_OK) {
         ++(*count);
diff -pruN 7.2.0~b2-1/src/components/cuda/papi_cuda_presets.h 7.2.0-1/src/components/cuda/papi_cuda_presets.h
--- 7.2.0~b2-1/src/components/cuda/papi_cuda_presets.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/cuda/papi_cuda_presets.h	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,140 @@
+#ifndef __PAPI_CUDA_PRESETS_H__
+#define __PAPI_CUDA_PRESETS_H__
+
+hwi_presets_t _cuda_presets[PAPI_MAX_cuda_PRESETS] = {
+/*  0 */ {"PAPI_CUDA_FP16_FMA",
+	  "CUDA FP16 FMA instr",
+	  "CUDA Half precision (FP16) FMA instructions", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  1 */ {"PAPI_CUDA_BF16_FMA",
+	  "CUDA BF16 FMA instr",
+	  "CUDA Half precision (BF16) FMA instructions", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  2 */ {"PAPI_CUDA_FP32_FMA",
+	  "CUDA FP32 FMA instr",
+	  "CUDA Single precision (FP32) FMA instructions", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  3 */ {"PAPI_CUDA_FP64_FMA",
+	  "CUDA FP64 FMA instr",
+	  "CUDA Double precision (FP64) FMA instructions", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  4 */ {"PAPI_CUDA_FP_FMA",
+	  "CUDA FP FMA instr",
+	  "CUDA floating-point FMA instructions", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  5 */ {"PAPI_CUDA_FP8_OPS",
+	  "CUDA FP8 ops",
+	  "CUDA 8-bit precision floating-point operations", 0,
+	  0, PAPI_PRESET_BIT_MSC,
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  6 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  7 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  8 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*  9 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 10 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 11 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 12 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 13 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 14 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 15 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 16 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 17 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 18 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 19 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 21 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 22 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 23 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 24 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 25 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 26 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 27 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 28 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 29 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 30 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 31 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 32 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 33 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 34 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 35 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 36 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 37 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 38 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 39 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 40 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 41 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 42 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 43 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 44 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 45 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 46 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 47 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 48 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 49 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 50 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 51 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 52 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 53 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 54 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 55 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 56 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 57 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 58 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 59 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 60 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 61 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 62 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 63 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 64 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 65 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 66 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 67 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 68 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 69 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 70 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 71 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 72 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 73 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 74 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 75 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 76 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 77 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 78 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 79 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 80 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 81 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 82 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 83 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 84 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 85 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 86 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 87 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 88 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 89 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 90 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 91 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 92 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 93 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 94 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 95 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 96 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 97 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 98 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/* 99 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*100 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*110 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*120 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*121 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*122 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*123 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*124 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*125 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*126 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*127 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+};
+
+#endif /* __PAPI_CUDA_PRESETS_H__ */
diff -pruN 7.2.0~b2-1/src/components/cuda/papi_cuda_std_event_defs.h 7.2.0-1/src/components/cuda/papi_cuda_std_event_defs.h
--- 7.2.0~b2-1/src/components/cuda/papi_cuda_std_event_defs.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/cuda/papi_cuda_std_event_defs.h	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,23 @@
+#ifndef __PAPI_CUDA_STD_EVENT_DEFS_H__
+#define __PAPI_CUDA_STD_EVENT_DEFS_H__
+
+#define PAPI_MAX_cuda_PRESETS 128
+
+enum
+{
+    PAPI_CUDA_FP16_FMA_idx = PAPI_cuda_PRESET_OFFSET,
+    PAPI_CUDA_BF16_FMA_idx,
+    PAPI_CUDA_FP32_FMA_idx,
+    PAPI_CUDA_FP64_FMA_idx,
+    PAPI_CUDA_FP_FMA_idx,
+    PAPI_CUDA_FP8_OPS_idx
+};
+
+#define PAPI_CUDA_FP16_FMA  (PAPI_CUDA_FP16_FMA_idx | PAPI_PRESET_MASK)
+#define PAPI_CUDA_BF16_FMA  (PAPI_CUDA_BF16_FMA_idx | PAPI_PRESET_MASK)
+#define PAPI_CUDA_FP32_FMA  (PAPI_CUDA_FP32_FMA_idx | PAPI_PRESET_MASK)
+#define PAPI_CUDA_FP64_FMA  (PAPI_CUDA_FP64_FMA_idx | PAPI_PRESET_MASK)
+#define PAPI_CUDA_FP_FMA    (PAPI_CUDA_FP_FMA_idx   | PAPI_PRESET_MASK)
+#define PAPI_CUDA_FP8_OPS   (PAPI_CUDA_FP8_OPS_idx  | PAPI_PRESET_MASK)
+
+#endif /* __PAPI_CUDA_STD_EVENT_DEFS_H__ */
diff -pruN 7.2.0~b2-1/src/components/cuda/papi_cupti_common.c 7.2.0-1/src/components/cuda/papi_cupti_common.c
--- 7.2.0~b2-1/src/components/cuda/papi_cupti_common.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/papi_cupti_common.c	2025-06-25 22:38:10.000000000 +0000
@@ -8,7 +8,9 @@
 #include <dlfcn.h>
 #include <link.h>
 #include <libgen.h>
+#include <dirent.h>
 #include <papi.h>
+#include <cupti_target.h>
 #include "papi_memory.h"
 
 #include "cupti_config.h"
@@ -16,11 +18,63 @@
 
 static void *dl_drv, *dl_rt;
 
-const char *linked_cudart_path;
+static char cuda_error_string[PAPI_HUGE_STR_LEN];
+
 void *dl_cupti;
 
 unsigned int _cuda_lock;
 
+typedef int64_t gpu_occupancy_t;
+static gpu_occupancy_t global_gpu_bitmask;
+
+// Variables to handle partially disabled Cuda component
+static int isCudaPartial = 0;
+static int enabledDeviceIds[PAPI_CUDA_MAX_DEVICES];
+static size_t enabledDevicesCnt = 0;
+
+typedef enum
+{
+    sys_gpu_ccs_unknown = 0,
+    sys_gpu_ccs_mixed,
+    sys_gpu_ccs_all_lt_70,
+    sys_gpu_ccs_all_eq_70,
+    sys_gpu_ccs_all_gt_70,
+    sys_gpu_ccs_all_lte_70,
+    sys_gpu_ccs_all_gte_70
+} sys_compute_capabilities_e;
+
+struct cuptic_info {
+    CUcontext ctx;
+};
+
+// Load necessary functions from Cuda toolkit e.g. cupti or runtime 
+static int util_load_cuda_sym(void);
+static int load_cuda_sym(void);
+static int load_cudart_sym(void);
+static int load_cupti_common_sym(void);
+
+// Unload the loaded functions from Cuda toolkit e.g. cupti or runtime
+static int unload_cudart_sym(void);
+static int unload_cupti_common_sym(void);
+static void unload_linked_cudart_path(void);
+
+// Functions to get library versions 
+static int util_dylib_cu_runtime_version(void);
+static int util_dylib_cupti_version(void);
+
+// Functions to get cuda runtime library path
+static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data);
+static int get_user_cudart_path(void);
+
+// Function to determine compute capabilities
+static int compute_capabilities_on_system(sys_compute_capabilities_e *system_ccs);
+
+// Functions to handle a partially disabled Cuda component
+static int get_enabled_devices(void); 
+
+// misc.
+static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask);
+
 /* cuda driver function pointers */
 CUresult ( *cuCtxGetCurrentPtr ) (CUcontext *);
 CUresult ( *cuCtxSetCurrentPtr ) (CUcontext);
@@ -52,13 +106,17 @@ cudaError_t ( *cudaRuntimeGetVersionPtr
 
 /* cupti function pointer */
 CUptiResult ( *cuptiGetVersionPtr ) (uint32_t* );
+CUptiResult ( *cuptiDeviceGetChipNamePtr ) (CUpti_Device_GetChipName_Params* params);
 
 /**@class load_cuda_sym
- * @brief Search for libcuda.so.
+ * @brief Search for a variation of the shared object libcuda.
  */
-static int load_cuda_sym(void)
+int load_cuda_sym(void)
 {
-    dl_drv = dlopen("libcuda.so", RTLD_NOW | RTLD_GLOBAL);
+    int soNamesToSearchCount = 3;
+    const char *soNamesToSearchFor[] = {"libcuda.so", "libcuda.so.1", "libcuda"};
+
+    dl_drv = search_and_load_from_system_paths(soNamesToSearchFor, soNamesToSearchCount);
     if (!dl_drv) {
         ERRDBG("Loading installed libcuda.so failed. Check that cuda drivers are installed.\n");
         goto fn_fail;
@@ -114,78 +172,164 @@ static int unload_cuda_sym(void)
     return PAPI_OK;
 }
 
-void *cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[])
+/**@class search_and_load_shared_objects
+ * @brief Search and load Cuda shared objects.
+ *
+ * @param *parentPath
+ *   The main path we will use to search for the shared objects. 
+ * @param *soMainName
+ *   The name of the shared object e.g. libcudart. This is used
+ *   to select the standardSubPaths to use.
+ * @param *soNamesToSearchFor[]
+ *   Varying names of the shared object we want to search for.
+ * @param soNamesToSearchCount
+ *   Total number of names in soNamesToSearchFor.
+ */
+void *search_and_load_shared_objects(const char *parentPath, const char *soMainName, const char *soNamesToSearchFor[], int soNamesToSearchCount)
 {
-    void *dl = NULL;
-    char lookup_path[PATH_MAX];
-    char *found_files[CUPTIU_MAX_FILES];
-    int i, count;
-    for (i = 0; search_subpaths[i] != NULL; i++) {
-        sprintf(lookup_path, search_subpaths[i], parent_path, dlname);
-        dl = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
-        if (dl) {
-            return dl;
-        }
-    }
-    count = cuptiu_files_search_in_path(dlname, parent_path, found_files);
-    for (i = 0; i < count; i++) {
-        dl = dlopen(found_files[i], RTLD_NOW | RTLD_GLOBAL);
-        if (dl) {
-            break;
+    const char *standardSubPaths[3];
+    // Case for when we want to search explicit subpaths for a shared object
+    if (soMainName != NULL) {
+        if (strcmp(soMainName, "libcudart") == 0) {
+            standardSubPaths[0] = "%s/lib64/";
+            standardSubPaths[1] = NULL;
+        }
+        else if (strcmp(soMainName, "libcupti") == 0) {
+            standardSubPaths[0] = "%s/extras/CUPTI/lib64/";
+            standardSubPaths[1] = "%s/lib64/";
+            standardSubPaths[2] = NULL;
+        }
+        else if (strcmp(soMainName, "libnvperf_host") == 0) {
+            standardSubPaths[0] = "%s/extras/CUPTI/lib64/";
+            standardSubPaths[1] = "%s/lib64/";
+            standardSubPaths[2] = NULL;
+        }
+    }
+    // Case for when a user provides an exact path e.g. PAPI_CUDA_RUNTIME
+    // and we do not want to search subpaths
+    else{
+        standardSubPaths[0] = "%s/";
+        standardSubPaths[1] = NULL;     
+    }
+
+    char pathToSharedLibrary[PAPI_HUGE_STR_LEN], directoryPathToSearch[PAPI_HUGE_STR_LEN];
+    void *so = NULL;
+    char *soNameFound;
+    int i, strLen;
+    for (i = 0; standardSubPaths[i] != NULL; i++) {
+        // Create path to search for dl names
+        int strLen = snprintf(directoryPathToSearch, PAPI_HUGE_STR_LEN, standardSubPaths[i], parentPath);
+        if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+            ERRDBG("Failed to fully write path to search for dlnames.\n");
+            return NULL;
+        }   
+
+        DIR *dir = opendir(directoryPathToSearch);
+        if (dir == NULL) {
+            ERRDBG("Directory path could not be opened.\n");
+            continue;
+        }
+
+        int j;
+        for (j = 0; j < soNamesToSearchCount; j++) {
+            struct dirent *dirEntry;
+            while( ( dirEntry = readdir(dir) ) != NULL ) {
+                int result;
+                char *p = strstr(soNamesToSearchFor[j], "so");
+                // Check for an exact match of a shared object name (.so and .so.1 case)
+                if (p) {
+                    result = strcmp(dirEntry->d_name, soNamesToSearchFor[j]);
+                }
+                // Check for any match of a shared object name (we could not find .so and .so.1)
+                else {
+                    result = strncmp(dirEntry->d_name, soNamesToSearchFor[j], strlen(soNamesToSearchFor[j]));
+                }
+
+                if (result == 0) {
+                    soNameFound = dirEntry->d_name;
+                    goto found;
+                }
+            }
+            // Reset the position of the directory stream
+            rewinddir(dir);
         }
     }
-    for (i = 0; i < count; i++) {
-        papi_free(found_files[i]);
+
+  exit:
+    return so;
+  found:
+    // Construct path to shared library
+    strLen = snprintf(pathToSharedLibrary, PAPI_HUGE_STR_LEN, "%s%s", directoryPathToSearch, soNameFound);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        ERRDBG("Failed to fully write constructed path to shared library.\n");
+        return NULL;
     }
-    return dl;
+    so = dlopen(pathToSharedLibrary, RTLD_NOW | RTLD_GLOBAL);
+   
+    goto exit; 
+}
+
+/**@class search_and_load_from_system_paths
+ * @brief A simple wrapper to try and search and load
+ *        Cuda shared objects from system paths.
+ *
+ * @param *soNamesToSearchFor[]
+ *   Varying names of the shared object we want to search for.
+ * @param soNamesToSearchCount
+ *   Total number of names in soNamesToSearchFor.
+ */
+void *search_and_load_from_system_paths(const char *soNamesToSearchFor[], int soNamesToSearchCount)
+{
+    void *so = NULL;
+    int i;
+    for (i = 0; i < soNamesToSearchCount; i++) {
+        so = dlopen(soNamesToSearchFor[i], RTLD_NOW | RTLD_GLOBAL);
+        if (so) {
+            return so;
+        }   
+    }
+
+    return so; 
 }
 
 /**@class load_cudart_sym
- * @brief Search for libcudart.so. Order of search is outlined below.
+ * @brief Search for a variation of the shared object libcudart.
+ *        Order of search is outlined below.
  *
  * 1. If a user sets PAPI_CUDA_RUNTIME, this will take precedent over
  *    the options listed below to be searched.
- * 2. If we fail to collect libcudart.so from PAPI_CUDA_RUNTIME or it is not set,
+ * 2. If we fail to collect a variation of the shared object libcudart from PAPI_CUDA_RUNTIME or it is not set,
  *    we will search the path defined with PAPI_CUDA_ROOT; as this is supposed to always be set.
- * 3. If we fail to collect libcudart.so from steps 1 and 2, then we will search the linux
+ * 3. If we fail to collect a variation of the shared object libcudart from steps 1 and 2, then we will search the linux
  *    default directories listed by /etc/ld.so.conf. As a note, updating the LD_LIBRARY_PATH is
  *    advised for this option.
- * 4. We use dlopen to search for libcudart.so.
- *    If this fails, then we failed to find libcudart.so
+ * 4. We use dlopen to search for a variation of the shared object libcudart.
+ *    If this fails, then we failed to find a variation of the shared object
+ *    libcudart.
  */
-static int load_cudart_sym(void)
+int load_cudart_sym(void)
 {
-    char dlname[] = "libcudart.so";
-    char lookup_path[PATH_MAX];
+    int soNamesToSearchCount = 3;
+    const char *soNamesToSearchFor[] = {"libcudart.so", "libcudart.so.1", "libcudart"};
 
-    /* search PAPI_CUDA_RUNTIME for libcudart.so (takes precedent over PAPI_CUDA_ROOT) */
+    // If a user set PAPI_CUDA_RUNTIME with a path, then search it for the shared object (takes precedent over PAPI_CUDA_ROOT)
     char *papi_cuda_runtime = getenv("PAPI_CUDA_RUNTIME");
     if (papi_cuda_runtime) {
-        sprintf(lookup_path, "%s/%s", papi_cuda_runtime, dlname);
-        dl_rt = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
+        dl_rt = search_and_load_shared_objects(papi_cuda_runtime, NULL, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    const char *standard_paths[] = {
-        "%s/lib64/%s",
-        NULL,
-    };
-
-    /* search PAPI_CUDA_ROOT for libcudart.so */
+    char *soMainName = "libcudart";
+    // If a user set PAPI_CUDA_ROOT with a path and we did not already find the shared object, then search it for the shared object
     char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
     if (papi_cuda_root && !dl_rt) {
-        dl_rt = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
-    }
-
-    /* search linux default directories for libcudart.so */
-    if (linked_cudart_path && !dl_rt) {
-        dl_rt = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
+        dl_rt = search_and_load_shared_objects(papi_cuda_root, soMainName, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    /* last ditch effort to find libcudart.so */
+    // Last ditch effort to find a variation of libcudart, see dlopen manpages for how search occurs
     if (!dl_rt) {
-        dl_rt = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
+        dl_rt = search_and_load_from_system_paths(soNamesToSearchFor, soNamesToSearchCount);
         if (!dl_rt) {
-            ERRDBG("Loading libcudart.so failed. Try setting PAPI_CUDA_ROOT\n");
+            ERRDBG("Loading libcudart shared library failed. Try setting PAPI_CUDA_ROOT\n");
             goto fn_fail;
         }
     }
@@ -208,7 +352,7 @@ fn_fail:
     return PAPI_EMISC;
 }
 
-static int unload_cudart_sym(void)
+int unload_cudart_sym(void)
 {
     if (dl_rt) {
         dlclose(dl_rt);
@@ -227,50 +371,41 @@ static int unload_cudart_sym(void)
 }
 
 /**@class load_cupti_common_sym
- * @brief Search for libcupti.so. Order of search is outlined below.
+ * @brief Search for a variation of the shared object libcupti.
+ *        Order of search is outlined below.
  *
  * 1. If a user sets PAPI_CUDA_CUPTI, this will take precedent over
  *    the options listed below to be searched.
- * 2. If we fail to collect libcupti.so from PAPI_CUDA_CUPTI or it is not set,
+ * 2. If we fail to collect a variation of the shared object libcupti from PAPI_CUDA_CUPTI or it is not set,
  *    we will search the path defined with PAPI_CUDA_ROOT; as this is supposed to always be set.
- * 3. If we fail to collect libcupti.so from steps 1 and 2, then we will search the linux
+ * 3. If we fail to collect a variation of the shared object libcupti from steps 1 and 2, then we will search the linux
  *    default directories listed by /etc/ld.so.conf. As a note, updating the LD_LIBRARY_PATH is
  *    advised for this option.
- * 4. We use dlopen to search for libcupti.so.
- *    If this fails, then we failed to find libcupti.so
+ * 4. We use dlopen to search for a variation of the shared object libcupti.
+ *    If this fails, then we failed to find a variation of the shared object
+ *    libcupti.
  */
-static int load_cupti_common_sym(void)
+int load_cupti_common_sym(void)
 {
-    char dlname[] = "libcupti.so";
-    char lookup_path[PATH_MAX];
+    int soNamesToSearchCount = 3;
+    const char  *soNamesToSearchFor[] = {"libcupti.so", "libcupti.so.1", "libcupti"};
 
-    /* search PAPI_CUDA_CUPTI for libcupti.so (takes precedent over PAPI_CUDA_ROOT) */
+    // If a user set PAPI_CUDA_CUPTI with a path, then search it for the shared object (takes precedent over PAPI_CUDA_ROOT)
     char *papi_cuda_cupti = getenv("PAPI_CUDA_CUPTI");
     if (papi_cuda_cupti) {
-        sprintf(lookup_path, "%s/%s", papi_cuda_cupti, dlname);
-        dl_cupti = dlopen(lookup_path, RTLD_NOW | RTLD_GLOBAL);
+        dl_cupti = search_and_load_shared_objects(papi_cuda_cupti, NULL, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    const char *standard_paths[] = {
-        "%s/extras/CUPTI/lib64/%s",
-        "%s/lib64/%s",
-        NULL,
-    };
-
-    /* search PAPI_CUDA_ROOT for libcupti.so */
+    char *soMainName = "libcupti";
+    // If a user set PAPI_CUDA_ROOT with a path and we did not already find the shared object, then search it for the shared object
     char *papi_cuda_root = getenv("PAPI_CUDA_ROOT");
     if (papi_cuda_root && !dl_cupti) {
-        dl_cupti = cuptic_load_dynamic_syms(papi_cuda_root, dlname, standard_paths);
+        dl_cupti = search_and_load_shared_objects(papi_cuda_root, soMainName, soNamesToSearchFor, soNamesToSearchCount);
     }
 
-    /* search linux default directories for libcupti.so */
-    if (linked_cudart_path && !dl_cupti) {
-        dl_cupti = cuptic_load_dynamic_syms(linked_cudart_path, dlname, standard_paths);
-    }
-
-    /* last ditch effort to find libcupti.so */
+    // Last ditch effort to find a variation of libcupti, see dlopen manpages for how search occurs
     if (!dl_cupti) {
-        dl_cupti = dlopen(dlname, RTLD_NOW | RTLD_GLOBAL);
+        dl_cupti = search_and_load_from_system_paths(soNamesToSearchFor, soNamesToSearchCount);
         if (!dl_cupti) {
             ERRDBG("Loading libcupti.so failed. Try setting PAPI_CUDA_ROOT\n");
             goto fn_fail;
@@ -278,6 +413,7 @@ static int load_cupti_common_sym(void)
     }
 
     cuptiGetVersionPtr = DLSYM_AND_CHECK(dl_cupti, "cuptiGetVersion");
+    cuptiDeviceGetChipNamePtr = DLSYM_AND_CHECK(dl_cupti, "cuptiDeviceGetChipName");
 
     Dl_info info;
     dladdr(cuptiGetVersionPtr, &info);
@@ -287,17 +423,18 @@ fn_fail:
     return PAPI_EMISC;
 }
 
-static int unload_cupti_common_sym(void)
+int unload_cupti_common_sym(void)
 {
     if (dl_cupti) {
         dlclose(dl_cupti);
         dl_cupti = NULL;
     }
     cuptiGetVersionPtr = NULL;
+    cuptiDeviceGetChipNamePtr = NULL;
     return PAPI_OK;
 }
 
-static int util_load_cuda_sym(void)
+int util_load_cuda_sym(void)
 {
     int papi_errno;
     papi_errno = load_cuda_sym();
@@ -310,34 +447,25 @@ static int util_load_cuda_sym(void)
         return PAPI_OK;
 }
 
-static void unload_linked_cudart_path(void)
-{
-    if (linked_cudart_path) {
-        papi_free((void*) linked_cudart_path);
-        linked_cudart_path = NULL;
-    }
-}
-
 int cuptic_shutdown(void)
 {
     unload_cuda_sym();
     unload_cudart_sym();
     unload_cupti_common_sym();
-    unload_linked_cudart_path();
     return PAPI_OK;
 }
 
-static int util_dylib_cu_runtime_version(void)
+int util_dylib_cu_runtime_version(void)
 {
     int runtimeVersion;
-    cudaArtCheckErrors(cudaRuntimeGetVersionPtr(&runtimeVersion), return PAPI_EMISC );
+    cudaArtCheckErrors(cudaRuntimeGetVersionPtr(&runtimeVersion), return PAPI_EMISC);
     return runtimeVersion;
 }
 
-static int util_dylib_cupti_version(void)
+int util_dylib_cupti_version(void)
 {
     unsigned int cuptiVersion;
-    cuptiCheckErrors(cuptiGetVersionPtr(&cuptiVersion), return PAPI_EMISC );
+    cuptiCheckErrors(cuptiGetVersionPtr(&cuptiVersion), return PAPI_EMISC);
     return cuptiVersion;
 }
 
@@ -355,224 +483,265 @@ int cuptic_device_get_count(int *num_gpu
     /* find the total number of compute-capable devices */
     cuda_err = cudaGetDeviceCountPtr(num_gpus);
     if (cuda_err != cudaSuccess) {
-        cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_err));
+        cuptic_err_set_last(cudaGetErrorStringPtr(cuda_err));
         return PAPI_EMISC;
     }
     return PAPI_OK;
 }
 
-static int get_gpu_compute_capability(int dev_num, int *cc)
+int get_gpu_compute_capability(int dev_num, int *cc)
 {
     int cc_major, cc_minor;
     cudaError_t cuda_errno;
     cuda_errno = cudaDeviceGetAttributePtr(&cc_major, cudaDevAttrComputeCapabilityMajor, dev_num);
     if (cuda_errno != cudaSuccess) {
-        cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_errno));
+        cuptic_err_set_last(cudaGetErrorStringPtr(cuda_errno));
         return PAPI_EMISC;
     }
     cuda_errno = cudaDeviceGetAttributePtr(&cc_minor, cudaDevAttrComputeCapabilityMinor, dev_num);
     if (cuda_errno != cudaSuccess) {
-        cuptic_disabled_reason_set(cudaGetErrorStringPtr(cuda_errno));
+        cuptic_err_set_last(cudaGetErrorStringPtr(cuda_errno));
         return PAPI_EMISC;
     }
     *cc = cc_major * 10 + cc_minor;
     return PAPI_OK;
 }
 
-typedef enum {GPU_COLLECTION_UNKNOWN, GPU_COLLECTION_ALL_PERF, GPU_COLLECTION_MIXED, GPU_COLLECTION_ALL_EVENTS, GPU_COLLECTION_ALL_CC70} gpu_collection_e;
-
-static int util_gpu_collection_kind(gpu_collection_e *coll_kind)
+int compute_capabilities_on_system(sys_compute_capabilities_e *system_ccs)
 {
-    int papi_errno = PAPI_OK;
-    static gpu_collection_e kind = GPU_COLLECTION_UNKNOWN;
-    if (kind != GPU_COLLECTION_UNKNOWN) {
-        goto fn_exit;
-    }
-
     int total_gpus;
-    papi_errno = cuptic_device_get_count(&total_gpus);
+    int papi_errno = cuptic_device_get_count(&total_gpus);
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
+        return papi_errno;
     }
 
     int i, cc;
-    int count_perf = 0, count_evt = 0, count_cc70 = 0;
-    for (i=0; i<total_gpus; i++) {
+    int num_gpus_with_ccs_gt_cc70 = 0, num_gpus_with_ccs_eq_cc70 = 0, num_gpus_with_ccs_lt_cc70 = 0;
+    for (i = 0; i < total_gpus; i++) {
         papi_errno = get_gpu_compute_capability(i, &cc);
         if (papi_errno != PAPI_OK) {
             return papi_errno;
         }
-        if (cc == 70) {
-            ++count_cc70;
+
+        if (cc > 70) {
+            ++num_gpus_with_ccs_gt_cc70;
         }
-        if (cc >= 70) {
-            ++count_perf;
+        if (cc == 70) {
+            ++num_gpus_with_ccs_eq_cc70;
         }
-        if (cc <= 70) {
-            ++count_evt;
+        if (cc < 70) {
+            ++num_gpus_with_ccs_lt_cc70;
         }
     }
-    if (count_cc70 == total_gpus) {
-        kind = GPU_COLLECTION_ALL_CC70;
-        goto fn_exit;
+
+    sys_compute_capabilities_e sys_ccs = sys_gpu_ccs_unknown;
+    // All devices have CCs > 7.0.
+    if (num_gpus_with_ccs_gt_cc70 == total_gpus) {
+        sys_ccs = sys_gpu_ccs_all_gt_70;
     }
-    if (count_perf == total_gpus) {
-        kind = GPU_COLLECTION_ALL_PERF;
-        goto fn_exit;
+    // All devices have CCs = 7.0
+    else if (num_gpus_with_ccs_eq_cc70 == total_gpus) {
+        sys_ccs = sys_gpu_ccs_all_eq_70;
     }
-    if (count_evt == total_gpus) {
-        kind = GPU_COLLECTION_ALL_EVENTS;
-        goto fn_exit;
+    // All devices have CCs < 7.0
+    else if (num_gpus_with_ccs_lt_cc70 == total_gpus) {
+        sys_ccs = sys_gpu_ccs_all_lt_70;
     }
-    kind = GPU_COLLECTION_MIXED;
-
-fn_exit:
-    *coll_kind = kind;
-    return papi_errno;
-}
-
-const char *cuptic_disabled_reason_g;
+    // Devices can result in a partially disabled Cuda component
+    else {
+        sys_ccs = sys_gpu_ccs_mixed;
 
-/** @class cuptic_disabled_reason_set
-  * @brief Updating the current Cuda context.
-  * @param *msg
-  *    Cuda error message.
-*/
-void cuptic_disabled_reason_set(const char *msg)
-{
-    cuptic_disabled_reason_g = msg;
-}
+        int all_ccs_gte_cc70 = num_gpus_with_ccs_eq_cc70 + num_gpus_with_ccs_gt_cc70;
+        if (all_ccs_gte_cc70 == total_gpus) {
+            sys_ccs = sys_gpu_ccs_all_gte_70;
+        }
+ 
+        int all_ccs_lte_cc70 = num_gpus_with_ccs_eq_cc70 + num_gpus_with_ccs_lt_cc70;
+        if (all_ccs_lte_cc70 == total_gpus) {
+            sys_ccs = sys_gpu_ccs_all_lte_70;
+        }
+    }
+    *system_ccs = sys_ccs;
 
-void cuptic_disabled_reason_get(const char **pmsg)
-{
-    *pmsg = cuptic_disabled_reason_g;
+    return PAPI_OK;
 }
 
-static int dl_iterate_phdr_cb(struct dl_phdr_info *info, __attribute__((unused)) size_t size, __attribute__((unused)) void *data)
+/** @class cuptic_err_set_last
+  * @brief For the last error, set an error message.
+  * @param *error_str
+  *    Error message to be set.
+*/
+int cuptic_err_set_last(const char *error_str)
 {
-    const char *library_name = "libcudart.so";
-    char *library_path = strdup(info->dlpi_name);
-
-    if (library_path != NULL && strstr(library_path, library_name) != NULL) {
-        linked_cudart_path = strdup(dirname(dirname((char *) library_path)));
+    int strLen = snprintf(cuda_error_string, PAPI_HUGE_STR_LEN, "%s", error_str);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        SUBDBG("Last set error message not fully written.\n");
     }
-
-    free(library_path);
+    
     return PAPI_OK;
 }
 
-static int get_user_cudart_path(void)
+/** @class cuptic_err_get_last
+  * @brief Get the last error message set.
+  * @param **error_str
+  *    Error message to be returned.
+*/
+int cuptic_err_get_last(const char **error_str)
 {
-    dl_iterate_phdr(dl_iterate_phdr_cb, NULL);
-    if (NULL == linked_cudart_path) {
-        return PAPI_EMISC;
-    }
+    *error_str = cuda_error_string;
     return PAPI_OK;
 }
 
 int cuptic_init(void)
 {
-    int papi_errno = get_user_cudart_path();
-    if (papi_errno == PAPI_OK) {
-        LOGDBG("Linked cudart root: %s\n", linked_cudart_path);
-    }
-    else {
-        LOGDBG("Target application not linked with cuda runtime libraries.\n");
-    }
-    papi_errno = util_load_cuda_sym();
+    int papi_errno = util_load_cuda_sym();
     if (papi_errno != PAPI_OK) {
-        cuptic_disabled_reason_set("Unable to load CUDA library functions.");
-        goto fn_exit;
+        cuptic_err_set_last("Unable to load CUDA library functions.");
+        return papi_errno;
     }
 
-    gpu_collection_e kind;
-    papi_errno = util_gpu_collection_kind(&kind);
+    sys_compute_capabilities_e system_ccs;
+    papi_errno = compute_capabilities_on_system(&system_ccs);
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
-    }
- 
-    if (kind == GPU_COLLECTION_MIXED) {
-        cuptic_disabled_reason_set("No support for systems with mixed compute capabilities, such as CC < 7.0 and CC > 7.0 GPUS.");
-        papi_errno = PAPI_ECMP;
-        goto fn_exit;
-    }
-fn_exit:
-    return papi_errno;
-}
-
-int cuptic_is_runtime_perfworks_api(void)
-{
-    static int is_perfworks_api = -1;
-    if (is_perfworks_api != -1) {
-        goto fn_exit;
+        return papi_errno;
     }
-    char *papi_cuda_110_cc70_perfworks_api = getenv("PAPI_CUDA_110_CC_70_PERFWORKS_API");
 
-    gpu_collection_e gpus_kind;
-    int papi_errno = util_gpu_collection_kind(&gpus_kind);
+    // Get an array of the available devices on the system
+    papi_errno = get_enabled_devices();
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
+        return papi_errno;
     }
 
-    unsigned int cuptiVersion = util_dylib_cupti_version();
-
-    if (gpus_kind == GPU_COLLECTION_ALL_CC70 && 
-        (cuptiVersion == CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION || util_dylib_cu_runtime_version() == 11000))
-    {
-        if (papi_cuda_110_cc70_perfworks_api != NULL) {
-            is_perfworks_api = 1;
-            goto fn_exit;
+    // Handle a partially disabled Cuda component
+    // TODO: Once the Events API is added back, this conditional will need to be updated for Issue #297 section 2
+    if (system_ccs == sys_gpu_ccs_mixed || system_ccs == sys_gpu_ccs_all_lte_70) {
+        char *PAPI_CUDA_API = getenv("PAPI_CUDA_API");
+        char *cc_support = ">=7.0";
+        if (PAPI_CUDA_API != NULL) {
+            int result = strcasecmp(PAPI_CUDA_API, "EVENTS");
+            if (result == 0) {
+                cc_support = "<=7.0";
+            }
         }
-        else {
-            is_perfworks_api = 0;
-            goto fn_exit;
+
+        char errMsg[PAPI_HUGE_STR_LEN];
+        int strLen = snprintf(errMsg, PAPI_HUGE_STR_LEN,
+                              "System includes multiple compute capabilities: <7.0, =7.0, >7.0."
+                              " Only support for CC %s enabled.", cc_support);
+        if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+            SUBDBG("Failed to fully write the partially disabled error message.\n");
+            return PAPI_ENOMEM;
         }
-    }
+        cuptic_err_set_last(errMsg);
+
+        isCudaPartial = 1;
 
-    if ((gpus_kind == GPU_COLLECTION_ALL_PERF || gpus_kind == GPU_COLLECTION_ALL_CC70) && cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) {
-        is_perfworks_api = 1;
-        goto fn_exit;
-    } else {
-        is_perfworks_api = 0;
-        goto fn_exit;
+        return PAPI_PARTIAL;
     }
 
-fn_exit:
-    return is_perfworks_api;
+    return PAPI_OK;
+}
+
+void cuptic_partial(int *isCmpPartial, int **cudaEnabledDeviceIds, size_t *totalNumEnabledDevices)
+{
+    *isCmpPartial = isCudaPartial;
+    *cudaEnabledDeviceIds = enabledDeviceIds;
+    *totalNumEnabledDevices = enabledDevicesCnt;
+    return;
 }
 
-int cuptic_is_runtime_events_api(void)
+int cuptic_determine_runtime_api(void) 
 {
-    static int is_events_api = -1;
-    if (is_events_api != -1) {
-        goto fn_exit;
+    int cupti_api = -1;
+    char *PAPI_CUDA_API = getenv("PAPI_CUDA_API");
+
+    // For the Perfworks API to be operational in the Cuda component,
+    // users must link with a Cuda toolkit version that has a CUPTI version >= 13.
+    // TODO: Once the Events API is added back into the Cuda component. Add a similar
+    // check as the one shown below.
+    unsigned int cuptiVersion = util_dylib_cupti_version();
+    if (!(cuptiVersion >= CUPTI_PROFILER_API_MIN_SUPPORTED_VERSION) && PAPI_CUDA_API == NULL) {
+        return cupti_api; 
     }
 
-    gpu_collection_e gpus_kind;
-    int papi_errno = util_gpu_collection_kind(&gpus_kind);
+    // Determine the compute capabilities on the system
+    sys_compute_capabilities_e system_ccs;
+    int papi_errno = compute_capabilities_on_system(&system_ccs);
     if (papi_errno != PAPI_OK) {
-        goto fn_exit;
+        return papi_errno;
     }
 
-    /*
-     * See cupti_config.h: When NVIDIA removes the events API add a check in the following condition
-     * to check the `util_dylib_cupti_version()` is also <= CUPTI_EVENTS_API_MAX_SUPPORTED_VERSION.
-     */
-    if ((gpus_kind == GPU_COLLECTION_ALL_EVENTS || gpus_kind == GPU_COLLECTION_ALL_CC70)) {
-        is_events_api = 1;
-        goto fn_exit;
-    } else {
-        is_events_api = 0;
-        goto fn_exit;
+    // Determine which CUPTI API will be in use
+    switch (system_ccs) {
+        // All devices have CCs < 7.0
+        case sys_gpu_ccs_all_lt_70:
+            cupti_api = API_EVENTS;
+            break;
+        // All devices have CCs > 7.0
+        case sys_gpu_ccs_all_gt_70:
+            cupti_api = API_PERFWORKS;
+            break;
+        // All devices have CCs <= 7.0
+        // TODO: Once the Events API is added back, this case will default to use the Events API
+        case sys_gpu_ccs_all_lte_70:
+        // All devices have CCs >= 7.0
+        case sys_gpu_ccs_all_gte_70:
+        // ALL devices have CC's = 7.0
+        case sys_gpu_ccs_all_eq_70:
+        // Devices are mixed with CC's > 7.0 and CC's < 7.0
+        case sys_gpu_ccs_mixed:
+            // Default will be to use Perfworks API, user can change this by setting PAPI_CUDA_API.
+            cupti_api = API_PERFWORKS;
+            if (PAPI_CUDA_API != NULL) {
+                int result = strcasecmp(PAPI_CUDA_API, "EVENTS");
+                if (result == 0)
+                    cupti_api = API_EVENTS;
+            }
+            break;
+        default:
+            SUBDBG("Implemented CUPTI APIs do not support the current GPU configuration.\n");
+            break;
     }
-fn_exit:
-    return is_events_api;
+
+    return cupti_api;
 }
 
-struct cuptic_info {
-    CUcontext ctx;
-};
+int get_enabled_devices(void)
+{
+    int total_gpus; 
+    int papi_errno = cuptic_device_get_count(&total_gpus);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }   
 
+    int cupti_api = cuptic_determine_runtime_api();
+    if (cupti_api < 0) {
+        return PAPI_ECMP;
+    }   
+
+    int i, cc, collectCudaDevice;
+    for (i = 0; i < total_gpus; i++) {
+        collectCudaDevice = 0;
+        papi_errno = get_gpu_compute_capability(i, &cc);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
+
+        if (cupti_api == API_PERFWORKS && cc >= 70) {
+            collectCudaDevice = 1;    
+        }
+        else if (cupti_api == API_EVENTS && cc <= 70) {
+            collectCudaDevice = 1;
+        }
+
+        if (collectCudaDevice) {
+            enabledDeviceIds[enabledDevicesCnt] = i;
+            enabledDevicesCnt++; 
+        }
+    }
+
+    return PAPI_OK;
+}
 
 /** @class cuptic_ctxarr_create
   * @brief Allocate memory for pinfo.
@@ -592,7 +761,7 @@ int cuptic_ctxarr_create(cuptic_info_t *
     }
   
     /* allocate memory */ 
-    *pinfo = (cuptic_info_t) papi_calloc (total_gpus, sizeof(*pinfo));
+    *pinfo = (cuptic_info_t) calloc (total_gpus, sizeof(*pinfo));
     if (*pinfo == NULL) {
         return PAPI_ENOMEM;
     }
@@ -668,51 +837,21 @@ int cuptic_ctxarr_get_ctx(cuptic_info_t
 
 int cuptic_ctxarr_destroy(cuptic_info_t *pinfo)
 {
-    papi_free(*pinfo);
+    free(*pinfo);
     *pinfo = NULL;
     return PAPI_OK;
 }
 
-/* Functions based on bitmasking to detect gpu exclusivity */
-typedef int64_t gpu_occupancy_t;
-static gpu_occupancy_t global_gpu_bitmask;
-
-static int event_name_get_gpuid(const char *name, int *gpuid)
+int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
 {
-    int papi_errno = PAPI_OK;
-    char *token;
-    char *copy = strdup(name);
-
-    token = strtok(copy, "=");
-    if (token == NULL) {
-        goto fn_fail;
-    }
-    token = strtok(NULL, "\0");
-    if (token == NULL) {
-        goto fn_fail;
-    }
-    *gpuid = strtol(token, NULL, 10);
-
-fn_exit:
-    papi_free(copy);
-    return papi_errno;
-fn_fail:
-    papi_errno = PAPI_EINVAL;
-    goto fn_exit;
-}
-
-static int _devmask_events_get(cuptiu_event_table_t *evt_table, gpu_occupancy_t *bitmask)
-{
-    int papi_errno = PAPI_OK, gpu_id;
-    long i;
     gpu_occupancy_t acq_mask = 0;
-    cuptiu_event_t *evt_rec;
+    long i;
     for (i = 0; i < evt_table->count; i++) {
         acq_mask |= (1 << evt_table->cuda_devs[i]);
     }
     *bitmask = acq_mask;
-fn_exit:
-    return papi_errno;
+
+    return PAPI_OK;
 }
 
 int cuptic_device_acquire(cuptiu_event_table_t *evt_table)
@@ -773,3 +912,21 @@ int cuptiu_dev_check(cuptiu_bitmap_t bit
 {
     return (bitmap & (1ULL << i));
 }
+
+int get_chip_name(int dev_num, char* chipName)
+{
+    int papi_errno;
+    CUpti_Device_GetChipName_Params getChipName = {
+        .structSize = CUpti_Device_GetChipName_Params_STRUCT_SIZE,
+        .pPriv = NULL,
+        .deviceIndex = 0
+    };
+    getChipName.deviceIndex = dev_num;
+    papi_errno = cuptiDeviceGetChipNamePtr(&getChipName);
+    if (papi_errno != CUPTI_SUCCESS) {
+        ERRDBG("CUPTI error %d: Failed to get chip name for device %d\n", papi_errno, dev_num);
+        return PAPI_EMISC;
+    }
+    strcpy(chipName, getChipName.pChipName);
+    return PAPI_OK;
+}
diff -pruN 7.2.0~b2-1/src/components/cuda/papi_cupti_common.h 7.2.0-1/src/components/cuda/papi_cupti_common.h
--- 7.2.0~b2-1/src/components/cuda/papi_cupti_common.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/papi_cupti_common.h	2025-06-25 22:38:10.000000000 +0000
@@ -15,9 +15,12 @@
 #include "cupti_utils.h"
 #include "lcuda_debug.h"
 
+// Set to match the maximum number of devices allowed for the event identifier
+// encoding format. See README_internal.md for more details.
+#define PAPI_CUDA_MAX_DEVICES 128
+
 typedef struct cuptic_info *cuptic_info_t;
 
-extern const char *linked_cudart_path;
 extern void *dl_cupti;
 
 extern unsigned int _cuda_lock;
@@ -55,18 +58,18 @@ extern CUptiResult ( *cuptiGetVersionPtr
 
 /* utility functions to check runtime api, disabled reason, etc. */
 int cuptic_init(void);
-int cuptic_is_runtime_perfworks_api(void);
-int cuptic_is_runtime_events_api(void);
+int cuptic_determine_runtime_api(void);
 int cuptic_device_get_count(int *num_gpus);
-void cuptic_disabled_reason_set(const char *msg);
-void cuptic_disabled_reason_get(const char **pmsg);
-void *cuptic_load_dynamic_syms(const char *parent_path, const char *dlname, const char *search_subpaths[]);
+void *search_and_load_shared_objects(const char *parentPath, const char *soMainName, const char *soNamesToSearchFor[], int soNamesToSearchCount);
+void *search_and_load_from_system_paths(const char *soNamesToSearchFor[], int soNamesToSearchCount);
+int cuptic_err_get_last(const char **error_str);
+int cuptic_err_set_last(const char *error_str);
 int cuptic_shutdown(void);
 
 /* context management interfaces */
 int cuptic_ctxarr_create(cuptic_info_t *pinfo);
 int cuptic_ctxarr_update_current(cuptic_info_t info, int evt_dev_id);
-int cuptic_ctxarr_get_ctx(cuptic_info_t info, int gpu_idx, CUcontext *ctx);
+int cuptic_ctxarr_get_ctx(cuptic_info_t info, int dev_id, CUcontext *ctx);
 int cuptic_ctxarr_destroy(cuptic_info_t *pinfo);
 
 /* functions to track the occupancy of gpu counters in event sets */
@@ -77,6 +80,15 @@ int cuptic_device_release(cuptiu_event_t
 int cuptiu_dev_set(cuptiu_bitmap_t *bitmap, int i);
 int cuptiu_dev_check(cuptiu_bitmap_t bitmap, int i);
 
+/* functions to handle a partially disabled Cuda component */
+void cuptic_partial(int *isCmpPartial, int **cudaEnabledDeviceIds, size_t *totalNumEnabledDevices);
+
+/* function to get a devices compute capability */
+int get_gpu_compute_capability(int dev_num, int *cc);
+
+/* misc. */
+int get_chip_name(int dev_num, char* chipName);
+
 #define DLSYM_AND_CHECK( dllib, name ) dlsym( dllib, name );  \
     if (dlerror() != NULL) {  \
         ERRDBG("A CUDA required function '%s' was not found in lib '%s'.\n", name, #dllib);  \
@@ -120,7 +132,7 @@ int cuptiu_dev_check(cuptiu_bitmap_t bit
 #define nvpwCheckErrors( call, handleerror ) \
     do {  \
         NVPA_Status _status = (call);  \
-        LOGCUPTICALL("\t" #call "\n");  \
+        LOGPERFWORKSCALL("\t" #call "\n");  \
         if (_status != NVPA_STATUS_SUCCESS) {  \
             ERRDBG("NVPA Error %d: Error in call to " #call "\n", _status);  \
             EXIT_OR_NOT; \
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/HelloWorld.cu 7.2.0-1/src/components/cuda/tests/HelloWorld.cu
--- 7.2.0~b2-1/src/components/cuda/tests/HelloWorld.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/HelloWorld.cu	2025-06-25 22:38:10.000000000 +0000
@@ -49,75 +49,122 @@
 #define STEP_BY_STEP_DEBUG 0 /* helps debug CUcontext issues. */
 #define PRINT(quiet, format, args...) {if (!quiet) {fprintf(stderr, format, ## args);}}
 
-// Prototypes
-__global__ void helloWorld(char*);
+// Device kernel
+__global__ void helloWorld(char* str)
+{
+        // determine where in the thread grid we are
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        // unmangle output
+        str[idx] += idx;
+}
 
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        int papi_errno = PAPI_add_named_event(EventSet, eventNamesFromCommandLine[i]);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", eventNamesFromCommandLine[i], papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        int strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", eventNamesFromCommandLine[i]);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
 
 // Host function
 int main(int argc, char** argv)
 {
-	int quiet = 0;
+    int quiet = 0;
     CUcontext getCtx=NULL, sessionCtx=NULL;
     cudaError_t cudaError;
     CUresult cuError; (void) cuError;
 
+    cuError = cuInit(0);
+    if (cuError != CUDA_SUCCESS) {
+        fprintf(stderr, "Failed to initialize the CUDA driver API.\n");
+        exit(1);
+    }
+
 #ifdef PAPI
-	char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
+    char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
     if (test_quiet)
         quiet = (int) strtol(test_quiet, (char**) NULL, 10);
 
-	/* PAPI Initialization */
-	int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
-	if( papi_errno != PAPI_VER_CURRENT ) {
-		test_fail(__FILE__,__LINE__, "PAPI_library_init failed", 0 );
-	}
-
-	printf( "PAPI_VERSION     : %4d %6d %7d\n",
-		PAPI_VERSION_MAJOR( PAPI_VERSION ),
-		PAPI_VERSION_MINOR( PAPI_VERSION ),
-		PAPI_VERSION_REVISION( PAPI_VERSION ) );
-
-	int i;
-	int EventSet = PAPI_NULL;
-	int eventCount = argc - 1;
-
-	/* if no events passed at command line, just report test skipped. */
-	if (eventCount == 0) {
-		fprintf(stderr, "No eventnames specified at command line.");
-		test_skip(__FILE__, __LINE__, "", 0);
-	}
+    /* PAPI Initialization */
+    int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
+    if( papi_errno != PAPI_VER_CURRENT ) {
+        test_fail(__FILE__,__LINE__, "PAPI_library_init failed", 0 );
+    }
+
+    printf( "PAPI_VERSION     : %4d %6d %7d\n",
+        PAPI_VERSION_MAJOR( PAPI_VERSION ),
+        PAPI_VERSION_MINOR( PAPI_VERSION ),
+        PAPI_VERSION_REVISION( PAPI_VERSION ) );
+
+    int i;
+    int EventSet = PAPI_NULL;
+    int eventCount = argc - 1;
+
+    /* if no events passed at command line, just report test skipped. */
+    if (eventCount == 0) {
+        fprintf(stderr, "No eventnames specified at command line.");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
 
-	long long *values = (long long *) calloc(eventCount, sizeof (long long));
+    long long *values = (long long *) calloc(eventCount, sizeof (long long));
     if (values == NULL) {
         test_fail(__FILE__, __LINE__, "Failed to allocate memory for values.\n", 0);
     }
-	int *events = (int *) calloc(eventCount, sizeof (int));
+
+    int *events = (int *) calloc(eventCount, sizeof (int));
     if (events == NULL) {
         test_fail(__FILE__, __LINE__, "Failed to allocate memory for events.\n", 0);
     }
-	/* convert PAPI native events to PAPI code */
-	for( i = 0; i < eventCount; i++ ){
-        papi_errno = PAPI_event_name_to_code( argv[i+1], &events[i] );
-		if( papi_errno != PAPI_OK ) {
-			fprintf(stderr, "Check event name: %s", argv[i+1] );
-			test_skip(__FILE__, __LINE__, "", 0);
-		}
-        PRINT( quiet, "Name %s --- Code: %#x\n", argv[i+1], events[i] );
-	}
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i before PAPI_create_eventset() getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	papi_errno = PAPI_create_eventset( &EventSet );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__,__LINE__,"Cannot create eventset",papi_errno);
-	}
+    papi_errno = PAPI_create_eventset( &EventSet );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__,__LINE__,"Cannot create eventset",papi_errno);
+    }
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
-        fprintf(stderr, "%s:%s:%i before PAPI_add_events(), getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
+        fprintf(stderr, "%s:%s:%i after PAPI_create_eventset() getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
     // If multiple GPUs/contexts were being used, you'd need to
@@ -127,7 +174,7 @@ int main(int argc, char** argv)
     // Context Create. We will use this one to run our kernel.
     cuError = cuCtxCreate(&sessionCtx, 0, 0); // Create a context, NULL flags, Device 0.
     if (cuError != CUDA_SUCCESS) {
-        fprintf(stderr, "Failed to create cuContext.\n");
+        fprintf(stderr, "Failed to create cuContext: %d\n", cuError);
         exit(-1);
     }
 
@@ -136,24 +183,39 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after cuCtxCreate(&sessionCtx), about to PAPI_start(), sessionCtx=%p, getCtx=%p.\n", __FILE__, __func__, __LINE__, sessionCtx, getCtx);
     }
 
-    papi_errno = PAPI_add_events( EventSet, events, eventCount );
-    if (papi_errno == PAPI_ENOEVNT) {
-        fprintf(stderr, "Event name does not exist for component.");
+    // Handle the events from the command line
+    int numEventsSuccessfullyAdded = 0, numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+    eventsSuccessfullyAdded = (char **) malloc(eventCount * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < eventCount; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+    }
+
+    add_events_from_command_line(EventSet, eventCount, metricNames, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
         test_skip(__FILE__, __LINE__, "", 0);
     }
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_add_events failed", papi_errno);
-	}
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i before PAPI_start(), getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	papi_errno = PAPI_start( EventSet );
-	if( papi_errno != PAPI_OK ) {
+    papi_errno = PAPI_start( EventSet );
+    if( papi_errno != PAPI_OK ) {
         test_fail(__FILE__, __LINE__, "PAPI_start failed.", papi_errno);
-	}
+    }
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
@@ -162,43 +224,43 @@ int main(int argc, char** argv)
 
 #endif
 
-	int j;
+    int j;
 
-	// desired output
-	char str[] = "Hello World!";
+    // desired output
+    char str[] = "Hello World!";
 
-	// mangle contents of output
-	// the null character is left intact for simplicity
-	for(j = 0; j < 12; j++) {
-		str[j] -= j;
-	}
+    // mangle contents of output
+    // the null character is left intact for simplicity
+    for(j = 0; j < 12; j++) {
+        str[j] -= j;
+    }
 
     PRINT(quiet, "mangled str=%s\n", str);
 
-	// allocate memory on the device
-	char *d_str;
-	size_t size = sizeof(str);
-	cudaMalloc((void**)&d_str, size);
+    // allocate memory on the device
+    char *d_str;
+    size_t size = sizeof(str);
+    cudaMalloc((void**)&d_str, size);
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i after cudaMalloc() getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	// copy the string to the device
-	cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
+    // copy the string to the device
+    cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i after cudaMemcpy(ToDevice) getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	// set the grid and block sizes
-	dim3 dimGrid(2); // one block per word
-	dim3 dimBlock(6); // one thread per character
+    // set the grid and block sizes
+    dim3 dimGrid(2); // one block per word
+    dim3 dimBlock(6); // one thread per character
 
-	// invoke the kernel
-	helloWorld<<< dimGrid, dimBlock >>>(d_str);
+    // invoke the kernel
+    helloWorld<<< dimGrid, dimBlock >>>(d_str);
 
     cudaError = cudaGetLastError();
     if (STEP_BY_STEP_DEBUG) {
@@ -210,16 +272,16 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i After Kernel Execution: getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	// retrieve the results from the device
-	cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
+    // retrieve the results from the device
+    cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i after cudaMemcpy(ToHost) getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	// free up the allocated memory on the device
-	cudaFree(d_str);
+    // free up the allocated memory on the device
+    cudaFree(d_str);
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
@@ -228,22 +290,23 @@ int main(int argc, char** argv)
 
 
 #ifdef PAPI
-	papi_errno = PAPI_read( EventSet, values );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_read failed", papi_errno);
-	}
+    papi_errno = PAPI_read( EventSet, values );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_read failed", papi_errno);
+    }
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i after PAPI_read getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	for( i = 0; i < eventCount; i++ )
-		PRINT( quiet, "read: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], argv[i+1] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ ) {
+        PRINT( quiet, "read: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], eventsSuccessfullyAdded[i] );
+    }
 
     papi_errno = cuCtxPopCurrent(&getCtx);
-	if( papi_errno != CUDA_SUCCESS) {
-		fprintf( stderr, "cuCtxPopCurrent failed, papi_errno=%d (%s)\n", papi_errno, PAPI_strerror(papi_errno) );
+    if( papi_errno != CUDA_SUCCESS) {
+        fprintf( stderr, "cuCtxPopCurrent failed, papi_errno=%d (%s)\n", papi_errno, PAPI_strerror(papi_errno) );
         exit(1);
     }
 
@@ -252,9 +315,9 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after cuCtxPopCurrent() getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	papi_errno = PAPI_stop( EventSet, values );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_stop failed", papi_errno);
+    papi_errno = PAPI_stop( EventSet, values );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_stop failed", papi_errno);
     }
 
     if (STEP_BY_STEP_DEBUG) {
@@ -262,9 +325,9 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after PAPI_stop getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	papi_errno = PAPI_cleanup_eventset(EventSet);
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed", papi_errno);
+    papi_errno = PAPI_cleanup_eventset(EventSet);
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed", papi_errno);
     }
 
     if (STEP_BY_STEP_DEBUG) {
@@ -272,9 +335,9 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after PAPI_cleanup_eventset getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	papi_errno = PAPI_destroy_eventset(&EventSet);
-	if (papi_errno != PAPI_OK) {
-		test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset failed", papi_errno);
+    papi_errno = PAPI_destroy_eventset(&EventSet);
+    if (papi_errno != PAPI_OK) {
+        test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset failed", papi_errno);
     }
 
     if (STEP_BY_STEP_DEBUG) {
@@ -282,9 +345,9 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after PAPI_destroy_eventset getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-
-	for( i = 0; i < eventCount; i++ )
-		PRINT( quiet, "stop: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], argv[i+1] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ ) {
+        PRINT( quiet, "stop: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], eventsSuccessfullyAdded[i] );
+    }
 #endif
 
     if (STEP_BY_STEP_DEBUG) {
@@ -301,27 +364,28 @@ int main(int argc, char** argv)
         fprintf(stderr, "%s:%s:%i after cuCtxDestroy(%p) getCtx=%p.\n", __FILE__, __func__, __LINE__, sessionCtx, getCtx);
     }
 
+    // Free allocated memory
+    free(values);
+    free(events);
+    for (i = 0; i < eventCount; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
+
 #ifdef PAPI
-	PAPI_shutdown();
+    PAPI_shutdown();
 
     if (STEP_BY_STEP_DEBUG) {
         cuCtxGetCurrent(&getCtx);
         fprintf(stderr, "%s:%s:%i after PAPI_shutdown getCtx=%p.\n", __FILE__, __func__, __LINE__, getCtx);
     }
 
-	test_pass(__FILE__);
-#endif
-	return 0;
-}
-
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
 
-// Device kernel
-__global__ void
-helloWorld(char* str)
-{
-	// determine where in the thread grid we are
-	int idx = blockIdx.x * blockDim.x + threadIdx.x;
-	// unmangle output
-	str[idx] += idx;
+    test_pass(__FILE__);
+#endif
+    return 0;
 }
-
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/HelloWorld_noCuCtx.cu 7.2.0-1/src/components/cuda/tests/HelloWorld_noCuCtx.cu
--- 7.2.0~b2-1/src/components/cuda/tests/HelloWorld_noCuCtx.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/HelloWorld_noCuCtx.cu	2025-06-25 22:38:10.000000000 +0000
@@ -49,163 +49,226 @@
 #define STEP_BY_STEP_DEBUG 0 /* helps debug CUcontext issues. */
 #define PRINT(quiet, format, args...) {if (!quiet) {fprintf(stderr, format, ## args);}}
 
-// Prototypes
-__global__ void helloWorld(char*);
+// Device kernel
+__global__ void
+helloWorld(char* str)
+{
+        // determine where in the thread grid we are
+        int idx = blockIdx.x * blockDim.x + threadIdx.x;
+        // unmangle output
+        str[idx] += idx;
+}
 
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        int papi_errno = PAPI_add_named_event(EventSet, eventNamesFromCommandLine[i]);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", eventNamesFromCommandLine[i], papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        int strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", eventNamesFromCommandLine[i]);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
 
 // Host function
 int main(int argc, char** argv)
 {
-	int quiet = 0;
+    int quiet = 0;
     cudaError_t cudaError;
     CUresult cuError; (void) cuError;
 
+    cuInit(0);
+
 #ifdef PAPI
-	char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
+    char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
     if (test_quiet)
         quiet = (int) strtol(test_quiet, (char**) NULL, 10);
 
-	/* PAPI Initialization */
-	int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
-	if( papi_errno != PAPI_VER_CURRENT ) {
-		test_fail(__FILE__,__LINE__, "PAPI_library_init failed", 0);
-	}
-
-	printf( "PAPI_VERSION     : %4d %6d %7d\n",
-		PAPI_VERSION_MAJOR( PAPI_VERSION ),
-		PAPI_VERSION_MINOR( PAPI_VERSION ),
-		PAPI_VERSION_REVISION( PAPI_VERSION ) );
-
-	int i;
-	int EventSet = PAPI_NULL;
-	int eventCount = argc - 1;
-
-	/* if no events passed at command line, just report test skipped. */
-	if (eventCount == 0) {
-		fprintf(stderr, "No events specified at command line.");
-		test_skip(__FILE__,__LINE__, "", 0);
-	}
+    /* PAPI Initialization */
+    int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
+    if( papi_errno != PAPI_VER_CURRENT ) {
+        test_fail(__FILE__,__LINE__, "PAPI_library_init failed", 0);
+    }
+
+    printf( "PAPI_VERSION     : %4d %6d %7d\n",
+        PAPI_VERSION_MAJOR( PAPI_VERSION ),
+        PAPI_VERSION_MINOR( PAPI_VERSION ),
+        PAPI_VERSION_REVISION( PAPI_VERSION ) );
+
+    int i;
+    int EventSet = PAPI_NULL;
+    int eventCount = argc - 1;
+
+    /* if no events passed at command line, just report test skipped. */
+    if (eventCount == 0) {
+        fprintf(stderr, "No events specified at command line.");
+        test_skip(__FILE__,__LINE__, "", 0);
+    }
 
-	long long *values = (long long *) calloc(eventCount, sizeof (long long));
+    long long *values = (long long *) calloc(eventCount, sizeof (long long));
     if (values == NULL) {
-        test_fail(__FILE__, __LINE__, "Failed to allocate memory for values.\n", 0);
+       test_fail(__FILE__, __LINE__, "Failed to allocate memory for values.\n", 0);
     }
-	int *events = (int *) calloc(eventCount, sizeof (int));
+
+    int *events = (int *) calloc(eventCount, sizeof (int));
     if (events == NULL) {
         test_fail(__FILE__, __LINE__, "Failed to allocate memory for events.\n", 0);
     }
-	/* convert PAPI native events to PAPI code */
-	for( i = 0; i < eventCount; i++ ){
-		papi_errno = PAPI_event_name_to_code( argv[i+1], &events[i] );
-		if( papi_errno != PAPI_OK ) {
-			fprintf(stderr, "Check event name: %s", argv[i+1] );
-			test_skip(__FILE__, __LINE__, "", 0);
-		}
-		PRINT( quiet, "Name %s --- Code: %#x\n", argv[i+1], events[i] );
-	}
-
-	papi_errno = PAPI_create_eventset( &EventSet );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__,__LINE__,"Cannot create eventset",papi_errno);
-	}
-
-    papi_errno = PAPI_add_events( EventSet, events, eventCount );
-    if (papi_errno == PAPI_ENOEVNT) {
-        fprintf(stderr, "Event name does not exist for component.");
+
+    papi_errno = PAPI_create_eventset( &EventSet );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__,__LINE__,"Cannot create eventset",papi_errno);
+    }
+
+    // Handle the events from the command line
+    int numEventsSuccessfullyAdded = 0, numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+    eventsSuccessfullyAdded = (char **) malloc(eventCount * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
         test_skip(__FILE__, __LINE__, "", 0);
     }
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_add_events failed", papi_errno);
-	}
+    for (i = 0; i < eventCount; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+    }
 
-	papi_errno = PAPI_start( EventSet );
-	if( papi_errno != PAPI_OK ) {
+    add_events_from_command_line(EventSet, eventCount, metricNames, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+
+    papi_errno = PAPI_start( EventSet );
+    if( papi_errno != PAPI_OK ) {
         test_fail(__FILE__, __LINE__, "PAPI_start failed.", papi_errno);
-	}
+    }
 
 #endif
 
-	int j;
+    int j;
 
-	// desired output
-	char str[] = "Hello World!";
+    // desired output
+    char str[] = "Hello World!";
 
-	// mangle contents of output
-	// the null character is left intact for simplicity
-	for(j = 0; j < 12; j++) {
-		str[j] -= j;
-	}
+    // mangle contents of output
+    // the null character is left intact for simplicity
+    for(j = 0; j < 12; j++) {
+        str[j] -= j;
+    }
 
     PRINT( quiet, "mangled str=%s\n", str );
 
-	// allocate memory on the device
-	char *d_str;
-	size_t size = sizeof(str);
-	cudaMalloc((void**)&d_str, size);
-
-	// copy the string to the device
-	cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
-
-	// set the grid and block sizes
-	dim3 dimGrid(2); // one block per word
-	dim3 dimBlock(6); // one thread per character
+    // allocate memory on the device
+    char *d_str;
+    size_t size = sizeof(str);
+    cudaMalloc((void**)&d_str, size);
+
+    // copy the string to the device
+    cudaMemcpy(d_str, str, size, cudaMemcpyHostToDevice);
+
+    // set the grid and block sizes
+    dim3 dimGrid(2); // one block per word
+    dim3 dimBlock(6); // one thread per character
 
-	// invoke the kernel
-	helloWorld<<< dimGrid, dimBlock >>>(d_str);
+    // invoke the kernel
+    helloWorld<<< dimGrid, dimBlock >>>(d_str);
 
     cudaError = cudaGetLastError();
     if (STEP_BY_STEP_DEBUG) {
         fprintf(stderr, "%s:%s:%i Kernel Return Code: %s.\n", __FILE__, __func__, __LINE__, cudaGetErrorString(cudaError));
     }
 
-	// retrieve the results from the device
-	cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
+    // retrieve the results from the device
+    cudaMemcpy(str, d_str, size, cudaMemcpyDeviceToHost);
 
-	// free up the allocated memory on the device
-	cudaFree(d_str);
+    // free up the allocated memory on the device
+    cudaFree(d_str);
 
 #ifdef PAPI
-	papi_errno = PAPI_read( EventSet, values );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_read failed", papi_errno);
-	}
+    papi_errno = PAPI_read( EventSet, values );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_read failed", papi_errno);
+    }
 
-	for( i = 0; i < eventCount; i++ )
-		PRINT( quiet, "read: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], argv[i+1] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ ) {
+        PRINT( quiet, "read: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], eventsSuccessfullyAdded[i] );
+    }
 
-	papi_errno = PAPI_stop( EventSet, values );
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_stop failed", papi_errno);
+    papi_errno = PAPI_stop( EventSet, values );
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_stop failed", papi_errno);
     }
 
-	papi_errno = PAPI_cleanup_eventset(EventSet);
-	if( papi_errno != PAPI_OK ) {
-		test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed", papi_errno);
+    papi_errno = PAPI_cleanup_eventset(EventSet);
+    if( papi_errno != PAPI_OK ) {
+        test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset failed", papi_errno);
     }
 
-	papi_errno = PAPI_destroy_eventset(&EventSet);
-	if (papi_errno != PAPI_OK) {
-		test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset failed", papi_errno);
+    papi_errno = PAPI_destroy_eventset(&EventSet);
+    if (papi_errno != PAPI_OK) {
+        test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset failed", papi_errno);
     }
 
-	for( i = 0; i < eventCount; i++ )
-		PRINT( quiet, "stop: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], argv[i+1] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ ) {
+        PRINT( quiet, "stop: %12lld \t=0X%016llX \t\t --> %s \n", values[i], values[i], eventsSuccessfullyAdded[i] );
+    }
+
+    // Free allocated memory
+    free(values);
+    free(events); 
+    for (i = 0; i < eventCount; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
 
-	PAPI_shutdown();
-	free(values);
-	free(events);
-	test_pass(__FILE__);
+    PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
+    test_pass(__FILE__);
 #endif
 
 	return 0;
 }
-
-// Device kernel
-__global__ void
-helloWorld(char* str)
-{
-	// determine where in the thread grid we are
-	int idx = blockIdx.x * blockDim.x + threadIdx.x;
-	// unmangle output
-	str[idx] += idx;
-}
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/concurrent_profiling.cu 7.2.0-1/src/components/cuda/tests/concurrent_profiling.cu
--- 7.2.0~b2-1/src/components/cuda/tests/concurrent_profiling.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/concurrent_profiling.cu	2025-06-25 22:38:10.000000000 +0000
@@ -51,6 +51,9 @@ using ::std::thread;
 #include <vector>
 using ::std::vector;
 
+#include <algorithm>
+using ::std::find;
+
 #define PRINT(quiet, format, args...) {if (!quiet) {fprintf(stderr, format, ## args);}}
 int quiet;
 
@@ -140,6 +143,50 @@ vector<size_t> elements(numKernels);
 // For 4 calls, this is 4k elements * 2 arrays * (1 + 2 + 3 + 4 stream mul) * 8B/elem =~ 640KB
 int const blockSize = 4 * 1024;
 
+// Globals for successfully added and multiple pass events
+int numMultipassEvents = 0;
+vector<string> eventsSuccessfullyAdded;
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param d
+  *   Per device data.
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param metricNames
+  *   Events provided on the command line.
+  * @param successfullyAddedEvents
+  *   Events successfully added to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(perDeviceData &d, int EventSet, vector<string> const &metricNames, vector<string> successfullyAddedEvents, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < metricNames.size(); i++) {
+        string evt_name = metricNames[i] + std::to_string(d.config.device);
+        int papi_errno = PAPI_add_named_event(EventSet, evt_name.c_str());
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", evt_name.c_str(), papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        if (find(eventsSuccessfullyAdded.begin(), eventsSuccessfullyAdded.end(), metricNames[i]) == eventsSuccessfullyAdded.end()) {
+            eventsSuccessfullyAdded.push_back(metricNames[i]);
+        }
+    }
+
+    return;
+}
+
 // Wrapper which will launch numKernel kernel calls on a single device
 // The device streams vector is used to control which stream each call is made on
 // If 'serial' is non-zero, the device streams are ignored and instead the default stream is used
@@ -151,18 +198,17 @@ void profileKernels(perDeviceData &d,
     RUNTIME_API_CALL(cudaSetDevice(d.config.device));  // Orig code has mistake here
     DRIVER_API_CALL(cuCtxSetCurrent(d.config.context));
 #ifdef PAPI
-    int eventset = PAPI_NULL, i, papi_errno;
+    int eventset = PAPI_NULL;
     PAPI_CALL(PAPI_create_eventset(&eventset));
-    string evt_name;
-    for (i = 0; i < metricNames.size(); i++) {
-        evt_name = metricNames[i] + std::to_string(d.config.device);
-        PRINT(quiet, "Adding event name: %s\n", evt_name.c_str());
-        papi_errno = PAPI_add_named_event(eventset, evt_name.c_str());
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", evt_name.c_str());
-            test_skip(__FILE__, __LINE__, "", 0);
-        }
+
+    add_events_from_command_line(d, eventset, metricNames, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (eventsSuccessfullyAdded.size() == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
     }
+
     PAPI_CALL(PAPI_start(eventset));
 #endif
 
@@ -198,7 +244,7 @@ void print_measured_values(perDeviceData
     PRINT(quiet, "%s\n", std::string(80, '-').c_str());
     for (int i=0; i < metricNames.size(); i++) {
         evt_name = metricNames[i] + std::to_string(d.config.device);
-        PRINT(quiet, "%s\t\t\t%ld\n", evt_name.c_str(), d.values[i]);
+        PRINT(quiet, "%s\t\t\t%lld\n", evt_name.c_str(), d.values[i]);
     }
 }
 
@@ -426,7 +472,7 @@ int main(int argc, char **argv)
     PRINT(quiet, "\nMetrics for device #0:\n");
     PRINT(quiet, "Look at the sm__cycles_elapsed.max values for each test.\n");
     PRINT(quiet, "This value represents the time spent on device to run the kernels in each case, and should be longest for the serial range, and roughly equal for the single and multi device concurrent ranges.\n");
-    print_measured_values(deviceData[0], metricNames);
+    print_measured_values(deviceData[0], eventsSuccessfullyAdded);
 
     // Only display next device info if needed
     if (numDevices > 1)
@@ -437,9 +483,16 @@ int main(int argc, char **argv)
     for (int i = 1; i < numDevices; i++)
     {
         PRINT(quiet, "\nMetrics for device #%d:\n", i);
-        print_measured_values(deviceData[i], metricNames);
+        print_measured_values(deviceData[i], eventsSuccessfullyAdded);
     }
+
     PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/concurrent_profiling_noCuCtx.cu 7.2.0-1/src/components/cuda/tests/concurrent_profiling_noCuCtx.cu
--- 7.2.0~b2-1/src/components/cuda/tests/concurrent_profiling_noCuCtx.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/concurrent_profiling_noCuCtx.cu	2025-06-25 22:38:10.000000000 +0000
@@ -51,6 +51,9 @@ using ::std::thread;
 #include <vector>
 using ::std::vector;
 
+#include <algorithm>
+using ::std::find;
+
 #define PRINT(quiet, format, args...) {if (!quiet) {fprintf(stderr, format, ## args);}}
 int quiet;
 
@@ -139,6 +142,50 @@ vector<size_t> elements(numKernels);
 // For 4 calls, this is 4k elements * 2 arrays * (1 + 2 + 3 + 4 stream mul) * 8B/elem =~ 640KB
 int const blockSize = 4 * 1024;
 
+// Globals for successfully added and multiple pass events
+int numMultipassEvents = 0;
+vector<string> eventsSuccessfullyAdded;
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param d
+  *   Per device data.
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param metricNames
+  *   Events provided on the command line.
+  * @param successfullyAddedEvents
+  *   Events successfully added to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(perDeviceData &d, int EventSet, vector<string> const &metricNames, vector<string> successfullyAddedEvents, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < metricNames.size(); i++) {
+        string evt_name = metricNames[i] + std::to_string(d.config.device);
+        int papi_errno = PAPI_add_named_event(EventSet, evt_name.c_str());
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", evt_name.c_str(), papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        if (find(eventsSuccessfullyAdded.begin(), eventsSuccessfullyAdded.end(), metricNames[i]) == eventsSuccessfullyAdded.end()) {
+            eventsSuccessfullyAdded.push_back(metricNames[i]);
+        }
+    }
+
+    return;
+}
+
 // Wrapper which will launch numKernel kernel calls on a single device
 // The device streams vector is used to control which stream each call is made on
 // If 'serial' is non-zero, the device streams are ignored and instead the default stream is used
@@ -148,19 +195,18 @@ void profileKernels(perDeviceData &d,
 {
     RUNTIME_API_CALL(cudaSetDevice(d.config.device));  // Orig code has mistake here
 #ifdef PAPI
-    int eventset = PAPI_NULL, i, papi_errno;
+    int eventset = PAPI_NULL;
     PAPI_CALL(PAPI_create_eventset(&eventset));
-    // Switch to desired device
-    string evt_name;
-    for (i = 0; i < metricNames.size(); i++) {
-        evt_name = metricNames[i] + std::to_string(d.config.device);
-        PRINT(quiet, "Adding event name: %s\n", evt_name.c_str());
-        papi_errno = PAPI_add_named_event(eventset, evt_name.c_str());
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", evt_name.c_str());
-            test_skip(__FILE__, __LINE__, "", 0);
-        }
+
+    add_events_from_command_line(d, eventset, metricNames, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (eventsSuccessfullyAdded.size() == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
     }
+
+
     PAPI_CALL(PAPI_start(eventset));
 #endif
     for (unsigned int stream = 0; stream < d.streams.size(); stream++)
@@ -195,7 +241,7 @@ void print_measured_values(perDeviceData
     PRINT(quiet, "%s\n", std::string(80, '-').c_str());
     for (int i=0; i < metricNames.size(); i++) {
         evt_name = metricNames[i] + std::to_string(d.config.device);
-        PRINT(quiet, "%s\t\t\t%ld\n", evt_name.c_str(), d.values[i]);
+        PRINT(quiet, "%s\t\t\t%lld\n", evt_name.c_str(), d.values[i]);
     }
 }
 
@@ -420,7 +466,7 @@ int main(int argc, char **argv)
     PRINT(quiet, "\nMetrics for device #0:\n");
     PRINT(quiet, "Look at the sm__cycles_elapsed.max values for each test.\n");
     PRINT(quiet, "This value represents the time spent on device to run the kernels in each case, and should be longest for the serial range, and roughly equal for the single and multi device concurrent ranges.\n");
-    print_measured_values(deviceData[0], metricNames);
+    print_measured_values(deviceData[0], eventsSuccessfullyAdded);
 
     // Only display next device info if needed
     if (numDevices > 1)
@@ -431,9 +477,16 @@ int main(int argc, char **argv)
     for (int i = 1; i < numDevices; i++)
     {
         PRINT(quiet, "\nMetrics for device #%d:\n", i);
-        print_measured_values(deviceData[i], metricNames);
+        print_measured_values(deviceData[i], eventsSuccessfullyAdded);
     }
+
     PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/cudaOpenMP.cu 7.2.0-1/src/components/cuda/tests/cudaOpenMP.cu
--- 7.2.0~b2-1/src/components/cuda/tests/cudaOpenMP.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/cudaOpenMP.cu	2025-06-25 22:38:10.000000000 +0000
@@ -75,6 +75,59 @@ do {
 
 #define MAX_THREADS (32)
 
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param gpu_id
+  *   NVIDIA device index.
+  * @param **eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, int gpu_id, char **eventNamesFromCommandLine, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 int main(int argc, char *argv[])
 {
     quiet = 0;
@@ -135,6 +188,7 @@ int main(int argc, char *argv[])
 
     PRINT(quiet, "Launching %d threads.\n", num_threads);
     omp_set_num_threads(num_threads);  // create as many CPU threads as there are CUDA devices
+    int numMultipassEvents = 0;
 #pragma omp parallel
     {
         unsigned int cpu_thread_id = omp_get_thread_num();
@@ -149,16 +203,30 @@ int main(int argc, char *argv[])
         int j, errno;
         PAPI_CALL(PAPI_create_eventset(&EventSet));
         PRINT(quiet, "CPU thread %d (of %d) uses CUDA device %d with context %p @ eventset %d\n", cpu_thread_id, num_cpu_threads, gpu_id, ctx_arr[cpu_thread_id], EventSet);
-        char tmpEventName[64];
-        for (j = 0; j < event_count; j++) {
-            snprintf(tmpEventName, 64, "%s:device=%d", argv[j+1], gpu_id);
-            PRINT(quiet, "Adding event name %s\n", tmpEventName);
-            errno = PAPI_add_named_event( EventSet, tmpEventName );
-            if (errno != PAPI_OK) {
-                fprintf(stderr, "Error adding event %s\n", tmpEventName);
+
+        int numEventsSuccessfullyAdded = 0;
+        char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+        eventsSuccessfullyAdded = (char **) malloc(event_count * sizeof(char *));
+        if (eventsSuccessfullyAdded == NULL) {
+            fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        for (i = 0; i < event_count; i++) {
+            eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+            if (eventsSuccessfullyAdded[i] == NULL) {
+                fprintf(stderr, "Failed to allocate memory for command line argument.\n");
                 test_skip(__FILE__, __LINE__, "", 0);
             }
         }
+
+        add_events_from_command_line(EventSet, event_count, gpu_id, metricNames, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+        // Only multiple pass events were provided on the command line
+        if (numEventsSuccessfullyAdded == 0) {
+            fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
         PAPI_CALL(PAPI_start(EventSet));
 #endif
         VectorAddSubtract(50000*(cpu_thread_id+1), quiet);  // gpu work
@@ -166,10 +234,16 @@ int main(int argc, char *argv[])
         PAPI_CALL(PAPI_stop(EventSet, values));
 
         PRINT(quiet, "User measured values.\n");
-        for (j = 0; j < event_count; j++) {
-            snprintf(tmpEventName, 64, "%s:device=%d", argv[j+1], gpu_id);
-            PRINT(quiet, "%s\t\t%lld\n", tmpEventName, values[j]);
+        for (j = 0; j < numEventsSuccessfullyAdded; j++) {
+            PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[j], values[j]);
         }
+
+        // Free allocated memory
+        for (i = 0; i < event_count; i++) {
+            free(eventsSuccessfullyAdded[i]);
+        }
+        free(eventsSuccessfullyAdded);
+
         DRIVER_API_CALL(cuCtxPopCurrent(&(ctx_arr[gpu_id])));
 
         errno = PAPI_cleanup_eventset(EventSet);
@@ -191,6 +265,12 @@ int main(int argc, char *argv[])
     omp_destroy_lock(&lock);
 #ifdef PAPI
     PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/cudaOpenMP_noCuCtx.cu 7.2.0-1/src/components/cuda/tests/cudaOpenMP_noCuCtx.cu
--- 7.2.0~b2-1/src/components/cuda/tests/cudaOpenMP_noCuCtx.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/cudaOpenMP_noCuCtx.cu	2025-06-25 22:38:10.000000000 +0000
@@ -74,6 +74,60 @@ do {
 
 #define MAX_THREADS (32)
 
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param gpu_id
+  *   NVIDIA device index.
+  * @param eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, int gpu_id, char **eventNamesFromCommandLine, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 int main(int argc, char *argv[])
 {
     quiet = 0;
@@ -131,6 +185,7 @@ int main(int argc, char *argv[])
     omp_init_lock(&lock);
 
     omp_set_num_threads(num_threads);  // create as many CPU threads as there are CUDA devices
+    int numMultipassEvents = 0;
 #pragma omp parallel
     {
         unsigned int cpu_thread_id = omp_get_thread_num();
@@ -145,16 +200,30 @@ int main(int argc, char *argv[])
         PAPI_CALL(PAPI_create_eventset(&EventSet));
 
         PRINT(quiet, "CPU thread %d (of %d) uses CUDA device %d @ eventset %d\n", cpu_thread_id, num_cpu_threads, gpu_id, EventSet);
-        char tmpEventName[64];
-        for (j = 0; j < event_count; j++) {
-            snprintf(tmpEventName, 64, "%s:device=%d", argv[j+1], gpu_id);
-            PRINT(quiet, "Adding event name %s\n", tmpEventName);
-            errno = PAPI_add_named_event( EventSet, tmpEventName );
-            if (errno != PAPI_OK) {
-                fprintf(stderr, "Error adding event %s\n", tmpEventName);
+
+        int numEventsSuccessfullyAdded = 0;
+        char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+        eventsSuccessfullyAdded = (char **) malloc(event_count * sizeof(char *));
+        if (eventsSuccessfullyAdded == NULL) {
+            fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        for (i = 0; i < event_count; i++) {
+            eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+            if (eventsSuccessfullyAdded[i] == NULL) {
+                fprintf(stderr, "Failed to allocate memory for command line argument.\n");
                 test_skip(__FILE__, __LINE__, "", 0);
             }
         }
+
+        add_events_from_command_line(EventSet, event_count, gpu_id, metricNames, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+        // Only multiple pass events were provided on the command line
+        if (numEventsSuccessfullyAdded == 0) {
+            fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
         PAPI_CALL(PAPI_start(EventSet));
 #endif
         VectorAddSubtract(50000*(cpu_thread_id+1), quiet);  // gpu work
@@ -162,10 +231,15 @@ int main(int argc, char *argv[])
         PAPI_CALL(PAPI_stop(EventSet, values));
 
         PRINT(quiet, "User measured values.\n");
-        for (j = 0; j < event_count; j++) {
-            snprintf(tmpEventName, 64, "%s:device=%d", argv[j+1], gpu_id);
-            PRINT(quiet, "%s\t\t%lld\n", tmpEventName, values[j]);
+        for (j = 0; j < numEventsSuccessfullyAdded; j++) {
+            PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[j], values[j]);
+        }
+
+        // Free allocated memory
+        for (i = 0; i < event_count; i++) {
+            free(eventsSuccessfullyAdded[i]);
         }
+        free(eventsSuccessfullyAdded);
 
         errno = PAPI_cleanup_eventset(EventSet);
         if (errno != PAPI_OK) {
@@ -183,6 +257,12 @@ int main(int argc, char *argv[])
     omp_destroy_lock(&lock);
 #ifdef PAPI
     PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/pthreads.cu 7.2.0-1/src/components/cuda/tests/pthreads.cu
--- 7.2.0~b2-1/src/components/cuda/tests/pthreads.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/pthreads.cu	2025-06-25 22:38:10.000000000 +0000
@@ -53,10 +53,68 @@ int numGPUs;
 int g_event_count;
 char **g_evt_names;
 
+static volatile int global_thread_count = 0;
+pthread_mutex_t global_mutex;
 pthread_t tidarr[MAX_THREADS];
 CUcontext cuCtx[MAX_THREADS];
 pthread_mutex_t lock;
 
+// Globals for multiple pass events
+int numMultipassEvents = 0;
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param gpu_id
+  *   NVIDIA device index.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int gpu_id, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 void *thread_gpu(void * idx)
 {
     int tid = *((int*) idx);
@@ -64,7 +122,7 @@ void *thread_gpu(void * idx)
 
 #ifdef PAPI
     int gpuid = tid % numGPUs;
-    int papi_errno, i;
+    int i;
 
     int EventSet = PAPI_NULL;
     long long values[MAX_THREADS];
@@ -74,16 +132,36 @@ void *thread_gpu(void * idx)
     PRINT(quiet, "This is idx %d thread %lu - using GPU %d context %p!\n",
             tid, gettid, gpuid, cuCtx[tid]);
 
-    char tmpEventName[64];
+    int numEventsSuccessfullyAdded = 0;
+    char **eventsSuccessfullyAdded;
+    eventsSuccessfullyAdded = (char **) malloc(g_event_count * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
     for (i = 0; i < g_event_count; i++) {
-        snprintf(tmpEventName, 64, "%s:device=%d", g_evt_names[i], gpuid);
-        papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", tmpEventName);
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
             test_skip(__FILE__, __LINE__, "", 0);
         }
     }
 
+    pthread_mutex_lock(&global_mutex);
+
+    add_events_from_command_line(EventSet, g_event_count, g_evt_names, gpuid, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+
+    ++global_thread_count;
+    pthread_mutex_unlock(&global_mutex);
+
+    while(global_thread_count < numGPUs);
+
     PAPI_CALL(PAPI_start(EventSet));
 #endif
 
@@ -93,10 +171,16 @@ void *thread_gpu(void * idx)
     PAPI_CALL(PAPI_stop(EventSet, values));
 
     PRINT(quiet, "User measured values in thread id %d.\n", tid);
+    for (i = 0; i < numEventsSuccessfullyAdded; i++) {
+        PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[i], values[i]);
+    }
+
+    // Free allocated memory
     for (i = 0; i < g_event_count; i++) {
-        snprintf(tmpEventName, 64, "%s:device=%d", g_evt_names[i], gpuid);
-        PRINT(quiet, "%s\t\t%lld\n", tmpEventName, values[i]);
+        free(eventsSuccessfullyAdded[i]);
     }
+    free(eventsSuccessfullyAdded);
+
     PAPI_CALL(PAPI_cleanup_eventset(EventSet));
     PAPI_CALL(PAPI_destroy_eventset(&EventSet));
 #endif
@@ -136,6 +220,7 @@ int main(int argc, char **argv)
     PRINT(quiet, "No. of threads to launch = %d\n", numGPUs);
 
 #ifdef PAPI
+    pthread_mutex_init(&global_mutex, NULL);
     int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
     if( papi_errno != PAPI_VER_CURRENT ) {
         test_fail(__FILE__, __LINE__, "PAPI_library_init failed.", 0);
@@ -156,6 +241,8 @@ int main(int argc, char **argv)
             fprintf(stderr, "\n ERROR: return code from pthread_create is %d \n", rc);
             exit(1);
         }
+
+
         PRINT(quiet, "\n Main thread %lu. Created new thread (%lu) in iteration %d ...\n",
                 (unsigned long)pthread_self(), (unsigned long)tidarr[i], i);
     }
@@ -172,7 +259,14 @@ int main(int argc, char **argv)
     }
 #ifdef PAPI
     PAPI_shutdown();
+
     PRINT(quiet, "Main thread exit!\n");
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/pthreads_noCuCtx.cu 7.2.0-1/src/components/cuda/tests/pthreads_noCuCtx.cu
--- 7.2.0~b2-1/src/components/cuda/tests/pthreads_noCuCtx.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/pthreads_noCuCtx.cu	2025-06-25 22:38:10.000000000 +0000
@@ -53,10 +53,68 @@ int numGPUs;
 int g_event_count;
 char **g_evt_names;
 
+static volatile int global_thread_count = 0;
+pthread_mutex_t global_mutex;
 pthread_t tidarr[MAX_THREADS];
 CUcontext cuCtx[MAX_THREADS];
 pthread_mutex_t lock;
 
+// Globals for multiple pass events
+int numMultipassEvents = 0;
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param gpu_id
+  *   NVIDIA device index.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int gpu_id, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 void *thread_gpu(void * idx)
 {
     int tid = *((int*) idx);
@@ -64,7 +122,7 @@ void *thread_gpu(void * idx)
 
 #ifdef PAPI
     int gpuid = tid % numGPUs;
-    int papi_errno, i;
+    int i;
     int EventSet = PAPI_NULL;
     long long values[MAX_THREADS];
     PAPI_CALL(PAPI_create_eventset(&EventSet));
@@ -73,16 +131,36 @@ void *thread_gpu(void * idx)
     PRINT(quiet, "This is idx %d thread %lu - using GPU %d\n",
             tid, gettid, gpuid);
 
-    char tmpEventName[64];
+    int numEventsSuccessfullyAdded = 0;
+    char **eventsSuccessfullyAdded;
+    eventsSuccessfullyAdded = (char **) malloc(g_event_count * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
     for (i = 0; i < g_event_count; i++) {
-        snprintf(tmpEventName, 64, "%s:device=%d", g_evt_names[i], gpuid);
-        papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", tmpEventName);
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
             test_skip(__FILE__, __LINE__, "", 0);
         }
     }
 
+    pthread_mutex_lock(&global_mutex);
+
+    add_events_from_command_line(EventSet, g_event_count, g_evt_names, gpuid, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+
+    ++global_thread_count;
+    pthread_mutex_unlock(&global_mutex);
+
+    while(global_thread_count < numGPUs);
+
     PAPI_CALL(PAPI_start(EventSet));
 #endif
 
@@ -92,10 +170,15 @@ void *thread_gpu(void * idx)
     PAPI_CALL(PAPI_stop(EventSet, values));
 
     PRINT(quiet, "User measured values in thread id %d.\n", tid);
+    for (i = 0; i < numEventsSuccessfullyAdded; i++) {
+        PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[i], values[i]);
+    }
+
+    // Free allocated memory
     for (i = 0; i < g_event_count; i++) {
-        snprintf(tmpEventName, 64, "%s:device=%d", g_evt_names[i], gpuid);
-        PRINT(quiet, "%s\t\t%lld\n", tmpEventName, values[i]);
+        free(eventsSuccessfullyAdded[i]);
     }
+    free(eventsSuccessfullyAdded);
 
     PAPI_CALL(PAPI_cleanup_eventset(EventSet));
     PAPI_CALL(PAPI_destroy_eventset(&EventSet));
@@ -136,6 +219,7 @@ int main(int argc, char **argv)
     PRINT(quiet, "No. of threads to launch = %d\n", numGPUs);
 
 #ifdef PAPI
+    pthread_mutex_init(&global_mutex, NULL);
     int papi_errno = PAPI_library_init( PAPI_VER_CURRENT );
     if( papi_errno != PAPI_VER_CURRENT ) {
         test_fail(__FILE__, __LINE__, "PAPI_library_init failed.", 0);
@@ -169,7 +253,14 @@ int main(int argc, char **argv)
 
 #ifdef PAPI
     PAPI_shutdown();
+
     PRINT(quiet, "Main thread exit!\n");
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #endif
     return 0;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/runtest.sh 7.2.0-1/src/components/cuda/tests/runtest.sh
--- 7.2.0~b2-1/src/components/cuda/tests/runtest.sh	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/runtest.sh	2025-06-25 22:38:10.000000000 +0000
@@ -2,18 +2,18 @@
 
 export PAPI_CUDA_TEST_QUIET=1    # Comment this line to see standard output from tests
 
-evt_names=("cuda:::dram__bytes_read.sum:device=0" \
-           "cuda:::sm__cycles_active.sum:device=0" \
-           "cuda:::smsp__warps_launched.sum:device=0")
-
-multi_gpu_evt_names=("cuda:::dram__bytes_read.sum" \
-                     "cuda:::sm__cycles_active.sum" \
-                     "cuda:::smsp__warps_launched.sum")
+evt_names=("cuda:::dram__bytes_read:stat=sum:device=0" \
+           "cuda:::sm__cycles_active:stat=sum:device=0" \
+           "cuda:::smsp__warps_launched:stat=sum:device=0")
+
+multi_gpu_evt_names=("cuda:::dram__bytes_read:stat=sum" \
+                     "cuda:::sm__cycles_active:stat=sum" \
+                     "cuda:::smsp__warps_launched:stat=sum")
 
-multi_pass_evt_name="cuda:::gpu__compute_memory_access_throughput_internal_activity.max.pct_of_peak_sustained_elapsed:device=0"
+multi_pass_evt_name="cuda:::gpu__compute_memory_access_throughput_internal_activity.pct_of_peak_sustained_elapsed:stat=max:device=0"
 
-concurrent_evt_names=("cuda:::sm__cycles_active.sum:device=" \
-                      "cuda:::sm__cycles_elapsed.max:device=")
+concurrent_evt_names=("cuda:::sm__cycles_active:stat=sum:device=" \
+                      "cuda:::sm__cycles_elapsed:stat=max:device=")
 
 make test_multipass_event_fail
 echo -e "Running: \e[36m./test_multipass_event_fail\e[0m" "${evt_names[@]}" $multi_pass_evt_name
@@ -26,8 +26,8 @@ echo -e "Running: \e[36m./test_multi_rea
 echo -e "-------------------------------------\n"
 
 make test_2thr_1gpu_not_allowed
-echo -e "Running: \e[36m./test_2thr_1gpu_not_allowed\e[0m" "${evt_names[@]}"
-./test_2thr_1gpu_not_allowed "${evt_names[@]}"
+echo -e "Running: \e[36m./test_2thr_1gpu_not_allowed\e[0m" "${evt_names[@]:0:2}"
+./test_2thr_1gpu_not_allowed "${evt_names[@]:0:2}"
 echo -e "-------------------------------------\n"
 
 make HelloWorld
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/simpleMultiGPU.cu 7.2.0-1/src/components/cuda/tests/simpleMultiGPU.cu
--- 7.2.0~b2-1/src/components/cuda/tests/simpleMultiGPU.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/simpleMultiGPU.cu	2025-06-25 22:38:10.000000000 +0000
@@ -105,6 +105,59 @@ __global__ static void reduceKernel( flo
     d_Result[tid] = sum;
 }
 
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventsFromCommandLine
+  *   Events provided on the command line.
+  * @param gpu_id
+  *   NVIDIA device index.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int gpu_id, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 // //////////////////////////////////////////////////////////////////////////////
 // Program main
 // //////////////////////////////////////////////////////////////////////////////
@@ -125,7 +178,7 @@ int main( int argc, char **argv )
     CUcontext ctx[MAX_GPU_COUNT];
     CUcontext poppedCtx;
 
-	char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
+    char *test_quiet = getenv("PAPI_CUDA_TEST_QUIET");
     int quiet = 0;
     if (test_quiet)
         quiet = (int) strtol(test_quiet, (char**) NULL, 10);
@@ -155,14 +208,14 @@ int main( int argc, char **argv )
     // Report on the available CUDA devices
     int computeCapabilityMajor = 0, computeCapabilityMinor = 0;
     int runtimeVersion = 0, driverVersion = 0;
-    char deviceName[64];
+    char deviceName[PAPI_MIN_STR_LEN];
     CUdevice device[MAX_GPU_COUNT];
     CHECK_CUDA_ERROR( cudaGetDeviceCount( &num_gpus ) );
     if( num_gpus > MAX_GPU_COUNT ) num_gpus = MAX_GPU_COUNT;
     PRINT( quiet, "CUDA-capable device count: %i\n", num_gpus );
     for ( i=0; i<num_gpus; i++ ) {
         CHECK_CU_ERROR( cuDeviceGet( &device[i], i ), "cuDeviceGet" );
-        CHECK_CU_ERROR( cuDeviceGetName( deviceName, 64, device[i] ), "cuDeviceGetName" );
+        CHECK_CU_ERROR( cuDeviceGetName( deviceName, PAPI_MIN_STR_LEN, device[i] ), "cuDeviceGetName" );
         CHECK_CU_ERROR( cuDeviceGetAttribute( &computeCapabilityMajor, 
             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device[i]), "cuDeviceGetAttribute");
         CHECK_CU_ERROR( cuDeviceGetAttribute( &computeCapabilityMinor, 
@@ -222,8 +275,6 @@ int main( int argc, char **argv )
     int EventSet = PAPI_NULL;
     int NUM_EVENTS = MAX_GPU_COUNT*MAX_NUM_EVENTS;
     long long values[NUM_EVENTS];
-    int total_events;
-    int ee;
 
     int cid = PAPI_get_component_index("cuda");
     if (cid < 0) {
@@ -252,30 +303,34 @@ int main( int argc, char **argv )
     CUcontext userContext;
     CHECK_CU_ERROR(cuCtxGetCurrent(&userContext), "cuCtxGetCurrent");
 
-    char *EventName[NUM_EVENTS];
-    char tmpEventName[64];
-    total_events = 0;
-    for( i = 0; i < num_gpus; i++ ) {
-        CHECK_CU_ERROR(cuCtxSetCurrent(ctx[i]), "cuCtxSetCurrent");
-        for ( ee=0; ee < event_count; ee++ ) {
-            // Create a device specific event.
-            snprintf( tmpEventName, 64, "%s:device=%d", argv[ee+1], i );
-            papi_errno = PAPI_add_named_event( EventSet, tmpEventName );
-            if (papi_errno==PAPI_OK) {
-                PRINT( quiet, "Add event success: '%s' GPU %i\n", tmpEventName, i );
-                EventName[total_events] = (char *)calloc( 64, sizeof(char) );
-                if (EventName[total_events] == NULL) {
-                    test_fail(__FILE__, __LINE__, "Failed to allocate string.\n", 0);
-                }
-                snprintf( EventName[total_events], 64, "%s", tmpEventName );
-                total_events++;
-            } else {
-                fprintf( stderr, "Add event failure: '%s' GPU %i error=%s\n", tmpEventName, i, PAPI_strerror(papi_errno));
-                test_skip(__FILE__, __LINE__, "", 0);
-            }
+    // Handle the events from the command line
+    int numEventsSuccessfullyAdded = 0, numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+    eventsSuccessfullyAdded = (char **) malloc(NUM_EVENTS * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < NUM_EVENTS; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
         }
     }
 
+    int gpu_id;
+    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
+        CHECK_CU_ERROR(cuCtxSetCurrent(ctx[gpu_id]), "cuCtxSetCurrent");
+        add_events_from_command_line(EventSet, event_count, metricNames, gpu_id, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents); 
+    }
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+
     // Restore user context.
 
     CHECK_CU_ERROR(cuCtxSetCurrent(userContext), "cuCtxSetCurrent");
@@ -337,8 +392,8 @@ int main( int argc, char **argv )
 
     papi_errno = PAPI_stop( EventSet, values );                                         // Stop (will read values).
     if( papi_errno != PAPI_OK )  fprintf( stderr, "PAPI_stop failed\n" );
-    for( i = 0; i < total_events; i++ )
-        PRINT( quiet, "PAPI counterValue %12lld \t\t --> %s \n", values[i], EventName[i] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ )
+        PRINT( quiet, "PAPI counterValue %12lld \t\t --> %s \n", values[i], eventsSuccessfullyAdded[i] );
 
     papi_errno = PAPI_cleanup_eventset( EventSet );
     if( papi_errno != PAPI_OK )  fprintf( stderr, "PAPI_cleanup_eventset failed\n" );
@@ -386,7 +441,18 @@ int main( int argc, char **argv )
         CHECK_CU_ERROR( cuCtxDestroy(ctx[i]), "cuCtxDestroy");
     }
 
+    //Free allocated memory
+    for (i = 0; i < event_count; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }   
+    free(eventsSuccessfullyAdded);
+
 #ifdef PAPI
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     if ( diff < 1e-5 )
         test_pass(__FILE__);
     else
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/simpleMultiGPU_noCuCtx.cu 7.2.0-1/src/components/cuda/tests/simpleMultiGPU_noCuCtx.cu
--- 7.2.0~b2-1/src/components/cuda/tests/simpleMultiGPU_noCuCtx.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/simpleMultiGPU_noCuCtx.cu	2025-06-25 22:38:10.000000000 +0000
@@ -105,6 +105,59 @@ __global__ static void reduceKernel( flo
     d_Result[tid] = sum;
 }
 
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventsFromCommandLine
+  *   Events provided on the command line.
+  * @param gpu_id
+  *   Current gpu id.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int gpu_id, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        char tmpEventName[PAPI_MAX_STR_LEN];
+        int strLen = snprintf(tmpEventName, PAPI_MAX_STR_LEN, "%s:device=%d", eventNamesFromCommandLine[i], gpu_id);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name with appended device qualifier.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+
+        int papi_errno = PAPI_add_named_event(EventSet, tmpEventName);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", tmpEventName, papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", tmpEventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 // //////////////////////////////////////////////////////////////////////////////
 // Program main
 // //////////////////////////////////////////////////////////////////////////////
@@ -152,14 +205,14 @@ int main( int argc, char **argv )
     // Report on the available CUDA devices
     int computeCapabilityMajor = 0, computeCapabilityMinor = 0;
     int runtimeVersion = 0, driverVersion = 0;
-    char deviceName[64];
+    char deviceName[PAPI_MIN_STR_LEN];
     CUdevice device[MAX_GPU_COUNT];
     CHECK_CUDA_ERROR( cudaGetDeviceCount( &num_gpus ) );
     if( num_gpus > MAX_GPU_COUNT ) num_gpus = MAX_GPU_COUNT;
     PRINT( quiet, "CUDA-capable device count: %i\n", num_gpus );
     for ( i=0; i<num_gpus; i++ ) {
         CHECK_CU_ERROR( cuDeviceGet( &device[i], i ), "cuDeviceGet" );
-        CHECK_CU_ERROR( cuDeviceGetName( deviceName, 64, device[i] ), "cuDeviceGetName" );
+        CHECK_CU_ERROR( cuDeviceGetName( deviceName, PAPI_MIN_STR_LEN, device[i] ), "cuDeviceGetName" );
         CHECK_CU_ERROR( cuDeviceGetAttribute( &computeCapabilityMajor, 
             CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, device[i]), "cuDeviceGetAttribute");
         CHECK_CU_ERROR( cuDeviceGetAttribute( &computeCapabilityMinor, 
@@ -212,8 +265,6 @@ int main( int argc, char **argv )
     int EventSet = PAPI_NULL;
     int NUM_EVENTS = MAX_GPU_COUNT*MAX_NUM_EVENTS;
     long long values[NUM_EVENTS];
-    int total_events;
-    int ee;
 
     int cid = PAPI_get_component_index("cuda");
     if (cid < 0) {
@@ -238,30 +289,34 @@ int main( int argc, char **argv )
     // Similar to legacy CUpti API, we must change the contexts to the appropriate device to
     // add events to inform PAPI of the context that will run the kernels.
 
-    char *EventName[NUM_EVENTS];
-    char tmpEventName[64];
-    total_events = 0;
-    for( i = 0; i < num_gpus; i++ ) {
-        for ( ee=0; ee < event_count; ee++ ) {
-            CHECK_CUDA_ERROR(cudaSetDevice(device[i]));
-            // Create a device specific event.
-            snprintf( tmpEventName, 64, "%s:device=%d", argv[ee+1], i );
-            papi_errno = PAPI_add_named_event( EventSet, tmpEventName );
-            if (papi_errno==PAPI_OK) {
-                PRINT( quiet, "Add event success: '%s' GPU %i\n", tmpEventName, i );
-                EventName[total_events] = (char *)calloc( 64, sizeof(char) );
-                if (EventName[total_events] == NULL) {
-                    test_fail(__FILE__, __LINE__, "Failed to allocate string.\n", 0);
-                }
-                snprintf( EventName[total_events], 64, "%s", tmpEventName );
-                total_events++;
-            } else {
-                fprintf( stderr, "Add event failure: '%s' GPU %i error=%s\n", tmpEventName, i, PAPI_strerror(papi_errno));
-                test_skip(__FILE__, __LINE__, "", 0);
-            }
+    // Handle the events from the command line
+    int numEventsSuccessfullyAdded = 0, numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded, **metricNames = argv + 1;
+    eventsSuccessfullyAdded = (char **) malloc(NUM_EVENTS * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < NUM_EVENTS; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
         }
     }
 
+    int gpu_id;
+    for (gpu_id = 0; gpu_id < num_gpus; gpu_id++) {
+        CHECK_CUDA_ERROR(cudaSetDevice(device[gpu_id]));
+        add_events_from_command_line(EventSet, event_count, metricNames, gpu_id, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+    }
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+ 
     // Invoke PAPI_start().
     papi_errno = PAPI_start( EventSet );
     if( papi_errno != PAPI_OK ) {
@@ -313,8 +368,8 @@ int main( int argc, char **argv )
 
     papi_errno = PAPI_stop( EventSet, values );                                         // Stop (will read values).
     if( papi_errno != PAPI_OK )  fprintf( stderr, "PAPI_stop failed\n" );
-    for( i = 0; i < total_events; i++ )
-        PRINT( quiet, "PAPI counterValue %12lld \t\t --> %s \n", values[i], EventName[i] );
+    for( i = 0; i < numEventsSuccessfullyAdded; i++ )
+        PRINT( quiet, "PAPI counterValue %12lld \t\t --> %s \n", values[i], eventsSuccessfullyAdded[i] );
 
     papi_errno = PAPI_cleanup_eventset( EventSet );
     if( papi_errno != PAPI_OK )  fprintf( stderr, "PAPI_cleanup_eventset failed\n" );
@@ -360,7 +415,19 @@ int main( int argc, char **argv )
         // Shut down this GPU
         CHECK_CUDA_ERROR( cudaStreamDestroy( plan[i].stream ) );
     }
+
+    //Free allocated memory
+    for (i = 0; i < event_count; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
+
 #ifdef PAPI
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     if ( diff < 1e-5 )
         test_pass(__FILE__);
     else
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/test_2thr_1gpu_not_allowed.cu 7.2.0-1/src/components/cuda/tests/test_2thr_1gpu_not_allowed.cu
--- 7.2.0~b2-1/src/components/cuda/tests/test_2thr_1gpu_not_allowed.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/test_2thr_1gpu_not_allowed.cu	2025-06-25 22:38:10.000000000 +0000
@@ -79,8 +79,14 @@ void *thread_gpu(void * ptinfo)
 
     papi_errno = PAPI_add_named_event(EventSet, g_evt_names[idx]);
     if (papi_errno != PAPI_OK) {
-        fprintf(stderr, "Failed to add event %s\n", g_evt_names[idx]);
-        test_skip(__FILE__, __LINE__, "", 0);
+        if (papi_errno == PAPI_EMULPASS) {
+            fprintf(stderr, "Event %s requires multiple passes and cannot be added to an EventSet. Two single pass events are needed for this test see utils/papi_native_avail for more Cuda native events.\n", g_evt_names[idx]);
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        else {
+            fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", g_evt_names[idx], papi_errno);
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
     }
 
     papi_errno = PAPI_start(EventSet);
@@ -119,6 +125,11 @@ int main(int argc, char **argv)
         fprintf(stderr, "No eventnames specified at command line.\n");
         test_skip(__FILE__, __LINE__, "", 0);
     }
+    else if (g_event_count != 2) {
+        fprintf(stderr, "Two single pass events are needed for this test to run properly.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+
     g_evt_names = argv + 1;
 #endif
     int rc, i;
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/test_multi_read_and_reset.cu 7.2.0-1/src/components/cuda/tests/test_multi_read_and_reset.cu
--- 7.2.0~b2-1/src/components/cuda/tests/test_multi_read_and_reset.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/test_multi_read_and_reset.cu	2025-06-25 22:38:10.000000000 +0000
@@ -25,6 +25,54 @@ int approx_equal(long long v1, long long
     return 0;
 }
 
+// Globals for successfully added and multiple pass events
+int numEventsSuccessfullyAdded = 0, numMultipassEvents = 0;
+
+/** @class add_events_from_command_line
+  * @brief Try and add each event provided on the command line by the user.
+  *
+  * @param EventSet
+  *   A PAPI eventset.
+  * @param totalEventCount
+  *   Number of events from the command line.
+  * @param **eventNamesFromCommandLine
+  *   Events provided on the command line.
+  * @param *numEventsSuccessfullyAdded
+  *   Total number of successfully added events.
+  * @param **eventsSuccessfullyAdded
+  *   Events that we are able to add to the EventSet.
+  * @param *numMultipassEvents
+  *   Counter to see if a multiple pass event was provided on the command line.
+*/
+static void add_events_from_command_line(int EventSet, int totalEventCount, char **eventNamesFromCommandLine, int *numEventsSuccessfullyAdded, char **eventsSuccessfullyAdded, int *numMultipassEvents)
+{
+    int i;
+    for (i = 0; i < totalEventCount; i++) {
+        int strLen;
+        int papi_errno = PAPI_add_named_event(EventSet, eventNamesFromCommandLine[i]);
+        if (papi_errno != PAPI_OK) {
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Unable to add event %s to the EventSet with error code %d.\n", eventNamesFromCommandLine[i], papi_errno);
+                test_skip(__FILE__, __LINE__, "", 0);
+            }
+
+            // Handle multiple pass events
+            (*numMultipassEvents)++;
+            continue;
+        }
+
+        // Handle successfully added events
+        strLen = snprintf(eventsSuccessfullyAdded[(*numEventsSuccessfullyAdded)], PAPI_MAX_STR_LEN, "%s", eventNamesFromCommandLine[i]);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write successfully added event.\n");
+            test_skip(__FILE__, __LINE__, "", 0);
+        }
+        (*numEventsSuccessfullyAdded)++;
+    }
+
+    return;
+}
+
 void multi_reset(int event_count, char **evt_names, long long *values)
 {
     CUcontext ctx;
@@ -43,13 +91,29 @@ void multi_reset(int event_count, char *
         test_fail(__FILE__, __LINE__, "Failed to create eventset.", papi_errno);
     }
 
-    for (i=0; i < event_count; i++) {
-        papi_errno = PAPI_add_named_event(EventSet, evt_names[i]);
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", evt_names[i]);
+    // Handle the events from the command line
+    numEventsSuccessfullyAdded = 0;
+    numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded;
+    eventsSuccessfullyAdded = (char **) malloc(event_count * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < event_count; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
             test_skip(__FILE__, __LINE__, "", 0);
         }
     }
+    add_events_from_command_line(EventSet, event_count, evt_names, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
 
     papi_errno = PAPI_start(EventSet);
     if (papi_errno != PAPI_OK) {
@@ -65,8 +129,8 @@ void multi_reset(int event_count, char *
             test_fail(__FILE__, __LINE__, "PAPI_read error.", papi_errno);
         }
         PRINT(quiet, "Measured values iter %d\n", i);
-        for (j=0; j < event_count; j++) {
-            PRINT(quiet, "%s\t\t%lld\n", evt_names[j], values[j]);
+        for (j=0; j < numEventsSuccessfullyAdded; j++) {
+            PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[j], values[j]);
         }
         papi_errno = PAPI_reset(EventSet);
         if (papi_errno != PAPI_OK) {
@@ -95,6 +159,12 @@ void multi_reset(int event_count, char *
         fprintf(stderr, "cude error: failed to destroy context.\n");
         exit(1);
     }
+
+    // Free allocated memory
+    for (i = 0; i < event_count; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
 }
 
 void multi_read(int event_count, char **evt_names, long long *values)
@@ -114,13 +184,29 @@ void multi_read(int event_count, char **
         test_fail(__FILE__, __LINE__, "Failed to create eventset.", papi_errno);
     }
 
-    for (i=0; i < event_count; i++) {
-        papi_errno = PAPI_add_named_event(EventSet, evt_names[i]);
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", evt_names[i]);
+    // Handle the events from the command line
+    numEventsSuccessfullyAdded = 0;
+    numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded;
+    eventsSuccessfullyAdded = (char **) malloc(event_count * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < event_count; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
             test_skip(__FILE__, __LINE__, "", 0);
         }
     }
+    add_events_from_command_line(EventSet, event_count, evt_names, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
 
     papi_errno = PAPI_start(EventSet);
     if (papi_errno != PAPI_OK) {
@@ -135,8 +221,8 @@ void multi_read(int event_count, char **
             test_fail(__FILE__, __LINE__, "PAPI_start error.", papi_errno);
         }
         PRINT(quiet, "Measured values iter %d\n", i);
-        for (j=0; j < event_count; j++) {
-            PRINT(quiet, "%s\t\t%lld\n", evt_names[j], values[j]);
+        for (j=0; j < numEventsSuccessfullyAdded; j++) {
+            PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[j], values[j]);
         }
     }
     papi_errno = PAPI_stop(EventSet, values);
@@ -158,9 +244,15 @@ void multi_read(int event_count, char **
         fprintf(stderr, "cude error: failed to destroy context.\n");
         exit(1);
     }
+
+    // Free allocated memory
+    for (i = 0; i < event_count; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
 }
 
-void single_read(int event_count, char **evt_names, long long *values)
+void single_read(int event_count, char **evt_names, long long *values, char ***addedEvents)
 {
     int papi_errno, i;
     CUcontext ctx;
@@ -175,12 +267,30 @@ void single_read(int event_count, char *
     if (papi_errno != PAPI_OK) {
         test_fail(__FILE__, __LINE__, "Failed to create eventset.", papi_errno);
     }
-    for (i=0; i < event_count; i++) {
-        papi_errno = PAPI_add_named_event(EventSet, evt_names[i]);
-        if (papi_errno != PAPI_OK) {
-            fprintf(stderr, "Failed to add event %s\n", evt_names[i]);
+
+    // Handle the events from the command line
+    numEventsSuccessfullyAdded = 0;
+    numMultipassEvents = 0;
+    char **eventsSuccessfullyAdded;
+    eventsSuccessfullyAdded = (char **) malloc(event_count * sizeof(char *));
+    if (eventsSuccessfullyAdded == NULL) {
+        fprintf(stderr, "Failed to allocate memory for successfully added events.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
+    }
+    for (i = 0; i < event_count; i++) {
+        eventsSuccessfullyAdded[i] = (char *) malloc(PAPI_MAX_STR_LEN * sizeof(char));
+        if (eventsSuccessfullyAdded[i] == NULL) {
+            fprintf(stderr, "Failed to allocate memory for command line argument.\n");
             test_skip(__FILE__, __LINE__, "", 0);
         }
+
+    }
+    add_events_from_command_line(EventSet, event_count, evt_names, &numEventsSuccessfullyAdded, eventsSuccessfullyAdded, &numMultipassEvents);
+
+    // Only multiple pass events were provided on the command line
+    if (numEventsSuccessfullyAdded == 0) {
+        fprintf(stderr, "Events provided on the command line could not be added to an EventSet as they require multiple passes.\n");
+        test_skip(__FILE__, __LINE__, "", 0);
     }
 
     papi_errno = PAPI_start(EventSet);
@@ -197,8 +307,8 @@ void single_read(int event_count, char *
         test_fail(__FILE__, __LINE__, "PAPI_stop error.", papi_errno);
     }
     PRINT(quiet, "Measured values from single read\n");
-    for (j=0; j < event_count; j++) {
-        PRINT(quiet, "%s\t\t%lld\n", evt_names[j], values[j]);
+    for (j=0; j < numEventsSuccessfullyAdded; j++) {
+        PRINT(quiet, "%s\t\t%lld\n", eventsSuccessfullyAdded[j], values[j]);
     }
     papi_errno = PAPI_cleanup_eventset(EventSet);
     if (papi_errno != PAPI_OK) {
@@ -214,6 +324,8 @@ void single_read(int event_count, char *
         fprintf(stderr, "cuda error: failed to destroy cuda context.\n");
         exit(1);
     }
+
+    *addedEvents = eventsSuccessfullyAdded;
 }
 
 int main(int argc, char **argv)
@@ -251,16 +363,30 @@ int main(int argc, char **argv)
     PRINT(quiet, "\nRunning multi_read.\n");
     multi_read(event_count, argv + 1, values_multi_read);
     PRINT(quiet, "\nRunning single_read.\n");
-    single_read(event_count, argv + 1, values_single_read);
+    char **eventsSuccessfullyAdded = { 0 };
+    single_read(event_count, argv + 1, values_single_read, &eventsSuccessfullyAdded);
 
     int i;
     PRINT(quiet, "Final measured values\nEvent_name\t\t\t\t\t\tMulti_read\tsingle_read\n");
-    for (i=0; i < event_count; i++) {
-        PRINT(quiet, "%s\t\t\t%lld\t\t%lld\n", argv[i+1], values_multi_read[i], values_single_read[i]);
+    for (i=0; i < numEventsSuccessfullyAdded; i++) {
+        PRINT(quiet, "%s\t\t\t%lld\t\t%lld\n", eventsSuccessfullyAdded[i], values_multi_read[i], values_single_read[i]);
         if ( !approx_equal(values_multi_read[i], values_single_read[i]) )
             test_warn(__FILE__, __LINE__, "Measured values from multi read and single read don't match.", PAPI_OK);
     }
+
+    // Free allocated memory
+    for (i = 0; i < event_count; i++) {
+        free(eventsSuccessfullyAdded[i]);
+    }
+    free(eventsSuccessfullyAdded);
+
     PAPI_shutdown();
+
+    // Output a note that a multiple pass event was provided on the command line
+    if (numMultipassEvents > 0) {
+        PRINT(quiet, "\033[0;33mNOTE: From the events provided on the command line, an event or events requiring multiple passes was detected and not added to the EventSet. Check your events with utils/papi_native_avail.\n\033[0m");
+    }
+
     test_pass(__FILE__);
 #else
     fprintf(stderr, "Please compile with -DPAPI to test this feature.\n");
diff -pruN 7.2.0~b2-1/src/components/cuda/tests/test_multipass_event_fail.cu 7.2.0-1/src/components/cuda/tests/test_multipass_event_fail.cu
--- 7.2.0~b2-1/src/components/cuda/tests/test_multipass_event_fail.cu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/cuda/tests/test_multipass_event_fail.cu	2025-06-25 22:38:10.000000000 +0000
@@ -21,45 +21,48 @@ int test_PAPI_add_named_event(int *Event
     PRINT(quiet, "LOG: %s: Entering.\n", __func__);
     for (i=0; i<numEvents; i++) {
         papi_errno = PAPI_add_named_event(*EventSet, EventName[i]);
-        if (papi_errno == PAPI_ENOEVNT) {
-            fprintf(stderr, "Event name %s does not exist.\n", EventName[i]);
+        if (papi_errno != PAPI_EMULPASS && papi_errno != PAPI_OK) {
+            fprintf(stderr, "Failed to add named event %s with error code %d.\n", EventName[i], papi_errno);
             return FAIL;
         }
-        if (papi_errno != PAPI_OK) {
-            PRINT(quiet, "Error %d: Failed to add event %s\n", papi_errno, EventName[i]);
-        }
     }
-    if (papi_errno == PAPI_EMULPASS)
-        return PASS;           // Test pass condition
+    if (papi_errno == PAPI_EMULPASS || papi_errno == PAPI_OK) {
+        PRINT(quiet, "PASSED test_PAPI_add_named_event\n");
+        return PASS; // Test pass condition
+    }
     return FAIL;
 }
 
-int test_PAPI_add_event(int *EventSet, int numEvents, char **EventName) {
+int test_PAPI_add_event(int *EventSet, int numEvents, char **EventName, int *numEventsSuccessfullyAdded) {
     int event, i, papi_errno;
     PRINT(quiet, "LOG: %s: Entering.\n", __func__);
 
     for (i=0; i<numEvents; i++) {
         papi_errno = PAPI_event_name_to_code(EventName[i], &event);
-        if (papi_errno == PAPI_ENOEVNT) {
-            fprintf(stderr, "Event name %s does not exist.\n", EventName[i]);
-            return FAIL;
-        }
         if (papi_errno != PAPI_OK) {
-            PRINT(quiet, "Error %d: Error in name to code.\n", papi_errno);
+            fprintf(stderr, "Failed to convert event name %s to event code with error code %d.\n", EventName[i], papi_errno);
             goto fail;
         }
         papi_errno = PAPI_add_event(*EventSet, event);
         if (papi_errno != PAPI_OK) {
-            PRINT(quiet, "Error %d: Failed to add event %s\n", papi_errno, EventName[i]);
+            if (papi_errno != PAPI_EMULPASS) {
+                fprintf(stderr, "Failed to add event %s with error code %d.\n", EventName[i], papi_errno);
+                goto fail;
+            }
+        }
+        else {
+            (*numEventsSuccessfullyAdded)++;
         }
     }
-    if (papi_errno == PAPI_EMULPASS)
+    if (papi_errno == PAPI_EMULPASS || papi_errno == PAPI_OK) {
+        PRINT(quiet, "PASSED test_PAPI_add_event\n");
         return PASS;
+    }
 fail:
     return FAIL;
 }
 
-int test_PAPI_add_events(int *EventSet, int numEvents, char **EventName) {
+int test_PAPI_add_events(int *EventSet, int numEvents, char **EventName, int numEventsSuccessfullyAdded) {
     int papi_errno, i;
     PRINT(quiet, "LOG: %s: Entering.\n", __func__);
 
@@ -68,16 +71,16 @@ int test_PAPI_add_events(int *EventSet,
     for (i=0; i<numEvents; i++) {
         papi_errno = PAPI_event_name_to_code(EventName[i], &events[i]);
         if (papi_errno != PAPI_OK) {
-            PRINT(quiet, "Error %d: Error in name to code.\n", papi_errno);
+            fprintf(stderr, "Failed to convert event name %s to event code with error code %d.\n", EventName[i], papi_errno);
             goto fail;
         }
     }
     papi_errno = PAPI_add_events(*EventSet, events, numEvents);
-    if (papi_errno != PAPI_OK) {
-        PRINT(quiet, "Error %d: Failed to add %d events\n", papi_errno, numEvents);
-    }
-    if (papi_errno < numEvents)        // Returns index at which error occurred.
+    if (papi_errno == PAPI_EMULPASS || papi_errno == PAPI_OK || papi_errno == numEventsSuccessfullyAdded) {
+        PRINT(quiet, "PASSED test_PAPI_add_events with %d of %d events succesfully added.\n", numEventsSuccessfullyAdded, numEvents);
         return PASS;
+    }
+
 fail:
     return FAIL;
 }
@@ -118,7 +121,10 @@ int main(int argc, char **argv)
         test_fail(__FILE__, __LINE__, "PAPI_create_eventset() failed!", 0);
     }
 
-    pass = test_PAPI_add_event(&event_set, argc-1, argv+1);
+    // Keep track of the number of events from the command line we can actually add
+    // This is done to properly check the test in the function test_PAPI_add_events
+    int numEventsSuccessfullyAdded = 0;
+    pass = test_PAPI_add_event(&event_set, argc-1, argv+1, &numEventsSuccessfullyAdded);
     papi_errno = PAPI_cleanup_eventset(event_set);
     if (papi_errno != PAPI_OK) {
         test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset() failed!", 0);
@@ -152,7 +158,7 @@ int main(int argc, char **argv)
         test_fail(__FILE__, __LINE__, "PAPI_create_eventset() failed!", 0);
     }
 
-    pass += test_PAPI_add_events(&event_set, argc-1, argv+1);
+    pass += test_PAPI_add_events(&event_set, argc-1, argv+1, numEventsSuccessfullyAdded);
     papi_errno = PAPI_cleanup_eventset(event_set);
     if (papi_errno != PAPI_OK) {
         test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset() failed!", 0);
@@ -173,4 +179,4 @@ int main(int argc, char **argv)
     fprintf(stderr, "Please compile with -DPAPI to test this feature.\n");
 #endif
     return 0;
-}
\ No newline at end of file
+}
diff -pruN 7.2.0~b2-1/src/components/infiniband/linux-infiniband.c 7.2.0-1/src/components/infiniband/linux-infiniband.c
--- 7.2.0~b2-1/src/components/infiniband/linux-infiniband.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/infiniband/linux-infiniband.c	2025-06-25 22:38:10.000000000 +0000
@@ -303,11 +303,12 @@ add_ib_device(const char* name, int port
         return (0);
     }
 
-    new_dev->dev_name = strdup(name);
+    new_dev->dev_name = papi_strdup(name);
     new_dev->dev_port = port;
     if (new_dev->dev_name==0)
     {
         PAPIERROR("cannot allocate memory for device internal fields");
+        papi_free(new_dev->dev_name);
         papi_free(new_dev);
         return (0);
     }
@@ -328,8 +329,8 @@ add_ib_counter(const char* name, const c
         return (0);
     }
 
-    new_cnt->ev_name = strdup(name);
-    new_cnt->ev_file_name = strdup(file_name);
+    new_cnt->ev_name = papi_strdup(name);
+    new_cnt->ev_file_name = papi_strdup(file_name);
     new_cnt->extended = extended;
     new_cnt->ev_device = device;
     if (new_cnt->ev_name==0 || new_cnt->ev_file_name==0)
@@ -599,9 +600,9 @@ deallocate_infiniband_resources()
     {
         for (i=0 ; i<num_events ; ++i) {
             if (infiniband_native_events[i].name)
-                free(infiniband_native_events[i].name);
+                papi_free(infiniband_native_events[i].name);
             if (infiniband_native_events[i].file_name)
-                free(infiniband_native_events[i].file_name);
+                papi_free(infiniband_native_events[i].file_name);
             if (infiniband_native_events[i].description)
                 papi_free(infiniband_native_events[i].description);
         }
@@ -612,7 +613,7 @@ deallocate_infiniband_resources()
     while (iter != 0) 
     {
         if (iter->dev_name)
-            free(iter->dev_name);
+            papi_free(iter->dev_name);
 
         ib_device_t *tmp = iter;
         iter = iter->next;
@@ -983,7 +984,7 @@ papi_vector_t _infiniband_vector = {
         .context = sizeof (infiniband_context_t),
         .control_state = sizeof (infiniband_control_state_t),
         .reg_value = sizeof (infiniband_register_t),
-        /* .reg_alloc = sizeof (infiniband_reg_alloc_t), */
+        .reg_alloc = 1 /* unused */
     },
     /* function pointers in this component */
     .init_thread =          _infiniband_init_thread,
diff -pruN 7.2.0~b2-1/src/components/intel_gpu/README.md 7.2.0-1/src/components/intel_gpu/README.md
--- 7.2.0~b2-1/src/components/intel_gpu/README.md	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/intel_gpu/README.md	2025-06-25 22:38:10.000000000 +0000
@@ -34,7 +34,7 @@ It is requred to build with Intel oneAPI
 
 * To enable metrics query on an kernel
 	```sh 
-    ZET_ENABLE_API_TRACING_EXP=1
+    ZE_ENABLE_TRACING_LAYER=1
 	```
 
 ## Metric collection mode:
@@ -42,7 +42,7 @@ It is requred to build with Intel oneAPI
 Two metrics collection modes are supported.
 
 * Time based sampling. In this mode, data collection and app can run in separate processes. 
-* Metrics query on a kernel. In this mode,  the PAPI_start() and PAPI_stop must be called before kernel launch and after kernel execution completes. When setting ZET_ENABLE_API_TRACING_EXP=1,  the collection will switch to metrics query mode.
+* Metrics query on a kernel. In this mode,  the PAPI_start() and PAPI_stop must be called before kernel launch and after kernel execution completes. When setting ZE_ENABLE_TRACING_LAYER=1,  the collection will switch to metrics query mode.
 
 ## Metrics:
 
diff -pruN 7.2.0~b2-1/src/components/intel_gpu/Rules.intel_gpu 7.2.0-1/src/components/intel_gpu/Rules.intel_gpu
--- 7.2.0~b2-1/src/components/intel_gpu/Rules.intel_gpu	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/intel_gpu/Rules.intel_gpu	2025-06-25 22:38:10.000000000 +0000
@@ -14,7 +14,7 @@ COMPSRCS += $(GPUSRCS) $(GPULIBSRCS)
 GPUOBJS = GPUMetricInterface.o GPUMetricHandler.o linux_intel_gpu_metrics.o
 COMPOBJS += $(GPUOBJS)
 
-CFLAGS += $(LDL) -g -DDEBUG  -I$(GPU_INTERNAL) -I$(GPU_INTERNAL)/inc -D_GLIBCXX_USE_CXX11_ABI=1
+CFLAGS += $(LDL) -g -I$(GPU_INTERNAL) -I$(GPU_INTERNAL)/inc -D_GLIBCXX_USE_CXX11_ABI=1
 LDFLAGS += -ldl
 
 GPUMetricInterface.o:  $(GPU_INTERNAL)/src/GPUMetricInterface.cpp $(GPUHEADER)
diff -pruN 7.2.0~b2-1/src/components/lmsensors/linux-lmsensors.c 7.2.0-1/src/components/lmsensors/linux-lmsensors.c
--- 7.2.0~b2-1/src/components/lmsensors/linux-lmsensors.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/lmsensors/linux-lmsensors.c	2025-06-25 22:38:10.000000000 +0000
@@ -201,8 +201,7 @@ createNativeEvents( void )
 	      char *featurelabel;
 
 	      if ( !( featurelabel = sensors_get_labelPtr( chip_name, feature ))) {
-		 fprintf( stderr, "ERROR: Can't get label of feature %s!\n",
-						 feature->name );
+		 SUBDBG( "ERROR: Can't get label of feature %s!\n", feature->name );
 		 continue;
 	      }
 
@@ -260,8 +259,7 @@ getEventValue( unsigned event_id )
 							 subfeat_nr, &value );
 
 	if ( res < 0 ) {
-		fprintf( stderr, "libsensors(): Could not read event #%d!\n",
-				 event_id );
+		SUBDBG( "libsensors(): Could not read event #%d!\n", event_id );
 		return -1;
 	}
 
diff -pruN 7.2.0~b2-1/src/components/lmsensors/tests/lmsensors_read.c 7.2.0-1/src/components/lmsensors/tests/lmsensors_read.c
--- 7.2.0~b2-1/src/components/lmsensors/tests/lmsensors_read.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/lmsensors/tests/lmsensors_read.c	2025-06-25 22:38:10.000000000 +0000
@@ -22,7 +22,7 @@
 #include "papi.h"
 #include "papi_test.h"
 
-/* number of events we want to add to the PAPI EventSet */
+/* maximum number of events we want to add to the PAPI EventSet */
 #define NUM_EVENTS 3
 
 int main(int argc, char **argv)
@@ -64,7 +64,7 @@ int main(int argc, char **argv)
         test_fail(__FILE__, __LINE__, "PAPI_enum_cmp_event", retval);
     }   
 
-    /* enumerate UNITL we find 3 Core and max temp events  */ 
+    /* enumerate through the available lmsensors events and add a maximum of three */
     modifier = PAPI_ENUM_EVENTS;
     do {
         retval = PAPI_event_code_to_name(EventCode, EventName);
@@ -72,21 +72,22 @@ int main(int argc, char **argv)
             test_fail(__FILE__, __LINE__, "PAPI_event_code_to_name", retval);
         }
 
-        /* filter for only core and max temp events, max of three events to be added  */
-        if (strstr(EventName, "Core") && strstr(EventName, "max")) {
-            retval = PAPI_add_named_event(EventSet, EventName);
-            if (retval != PAPI_OK) {
-                test_fail(__FILE__, __LINE__, "PAPI_add_named_event", retval); 
-            }
-              
-            if (!TESTS_QUIET) { 
-                printf("Successfully added %s to the EventSet.\n", EventName);
-            }
-            
-            /* store current event name and increment count */
-            strncpy(lm_events[event_cnt], EventName, PAPI_MAX_STR_LEN);
-            event_cnt++;
+        retval = PAPI_add_named_event(EventSet, EventName);
+        if (retval != PAPI_OK) {
+            test_fail(__FILE__, __LINE__, "PAPI_add_named_event", retval);
+        }
+
+        if (!TESTS_QUIET) {
+            printf("Successfully added %s to the EventSet.\n", EventName);
+        }
+
+        /* store current event name and increment count */
+        int strLen = snprintf(lm_events[event_cnt], PAPI_MAX_STR_LEN, "%s", EventName);
+        if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+            fprintf(stderr, "Failed to fully write event name: %s into array.\n", EventName);
+            return PAPI_EBUF;
         }
+        event_cnt++;
     } while( ( PAPI_enum_cmp_event(&EventCode, modifier, cidx) == PAPI_OK ) && ( event_cnt < NUM_EVENTS ) );
 
     /* start counting */
@@ -107,11 +108,11 @@ int main(int argc, char **argv)
         test_fail(__FILE__, __LINE__, "PAPI_stop", retval);
     }
 
-    /* print out temp values for each event  */
+    /* for each event successfully added print their counter values */
     if (!TESTS_QUIET) {
-        printf("Max temp output for events:\n");
-        for (i = 0; i < NUM_EVENTS; i++) {
-            printf("%s: %d\n", lm_events[i], values[i]);       
+        printf("Read counters from the EventSet:\n");
+        for (i = 0; i < event_cnt; i++) {
+            printf("Event: %s, Counter Value: %lld\n", lm_events[i], values[i]);
         }
     } 
    
diff -pruN 7.2.0~b2-1/src/components/net/linux-net.c 7.2.0-1/src/components/net/linux-net.c
--- 7.2.0~b2-1/src/components/net/linux-net.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/net/linux-net.c	2025-06-25 22:38:10.000000000 +0000
@@ -173,7 +173,7 @@ generateNetEventList( void )
             } else if (last) {
                 last->next = temp;
             } else {
-                free(temp);
+                papi_free(temp);
                 fclose(fin);
                 PAPIERROR("This shouldn't be possible\n");
                 snprintf(_net_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN-2,
diff -pruN 7.2.0~b2-1/src/components/nvml/Rules.nvml 7.2.0-1/src/components/nvml/Rules.nvml
--- 7.2.0~b2-1/src/components/nvml/Rules.nvml	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/nvml/Rules.nvml	2025-06-25 22:38:10.000000000 +0000
@@ -17,14 +17,14 @@ PAPI_CUDA_ROOT ?= /opt/cuda
 
 # Both at compile time and run time, the software depends on PAPI_CUDA_ROOT.
 # There are three libraries used by the NVML component, they are 
-# libcuda.so
-# libcudart.so 
-# libnvidia-ml.so
+# A variation of the shared object libcuda (e.g. libcuda.so or libcuda.so.1)
+# A variation of the shared object libcudart (e.g. libcudart.so or libcudart.so.12)
+# A variation of the shared object libnvidia-ml (e.g libnvidia-ml.so or libnvidia-ml.so.1)
 
 # The standard installed locations for these libraries, with overrides:
-# $(PAPI_CUDA_ROOT)/lib64/libcuda.so               #O.R. PAPI_CUDA_MAIN
-# $(PAPI_CUDA_ROOT)/lib64/libcudart.so             #O.R. PAPI_CUDA_RUNTIME
-# $(PAPI_CUDA_ROOT)/lib64/libnvidia-ml.so          #O.R. PAPI_NVML_MAIN
+# $(PAPI_CUDA_ROOT)/lib64/libcuda.so (or libcuda.so.1)            #O.R. PAPI_CUDA_MAIN
+# $(PAPI_CUDA_ROOT)/lib64/libcudart.so (or libcudart.so.12)       #O.R. PAPI_CUDA_RUNTIME
+# $(PAPI_CUDA_ROOT)/lib64/libnvidia-ml.so (or libnvidia-ml.so.1)  #O.R. PAPI_NVML_MAIN
 # 
 # There are many ways to cause these paths to be known. 
 # Spack is a package manager used on supercomputers, Linux and MacOS. If Spack
@@ -59,6 +59,8 @@ PAPI_NVML_MAIN = \"\"
 
 # An example of an override:
 # PAPI_NVML_MAIN = \"$(PAPI_CUDA_ROOT)/lib64/libnvidia-ml.so\"
+# NOTE: libnvidia-ml.so was replaced with libnvidia-ml.so.1 with drivers
+# 560+ see: https://github.com/NVIDIA/yum-packaging-nvidia-driver/issues/9.
 
 # Note:  PAPI_CUDA_MAIN and PAPI_CUDA_RUNTIME, if set, will also apply to the
 #        CUDA component, which uses the same libraries.
diff -pruN 7.2.0~b2-1/src/components/nvml/linux-nvml.c 7.2.0-1/src/components/nvml/linux-nvml.c
--- 7.2.0~b2-1/src/components/nvml/linux-nvml.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/nvml/linux-nvml.c	2025-06-25 22:38:10.000000000 +0000
@@ -33,6 +33,7 @@ template.
 #include <stdlib.h>
 #include <inttypes.h>
 #include <string.h>
+#include <dirent.h>
 /* Headers required by PAPI */
 #include "papi.h"
 #include "papi_internal.h"
@@ -1240,6 +1241,117 @@ nvml_init_private_exit:
     return err;
 }
 
+/**@class nvml_search_and_load_shared_objects
+ * @brief Search and load Cuda shared objects.
+ *
+ * @param *parentPath
+ *   The main path we will use to search for the shared objects. 
+ * @param *soMainName
+ *   The name of the shared object e.g. libnvidia. This is used
+ *   to select the standardSubPaths to use.
+ * @param *soNamesToSearchFor[]
+ *   Varying names of the shared object we want to search for.
+ * @param soNamesToSearchCount
+ *   Total number of names in soNamesToSearchFor.
+ */
+static void *nvml_search_and_load_shared_objects(const char *parentPath, const char *soMainName, const char *soNamesToSearchFor[], int soNamesToSearchCount)
+{
+    const char *standardSubPaths[3];
+    // Case for when we want to search explicit subpaths for a shared object 
+    if (soMainName != NULL) {
+        if (strcmp(soMainName, "libnvidia-ml") == 0) {
+            standardSubPaths[0] = "%s/lib64/";
+            standardSubPaths[1] = "%s/";
+            standardSubPaths[2] = NULL;
+        }
+    }
+    // Case for when a user provides an exact path e.g. PAPI_NVML_MAIN
+    // and we do not want to search subpaths
+    else{
+        standardSubPaths[0] = "%s/";
+        standardSubPaths[1] = NULL;
+    }
+
+    char pathToSharedLibrary[PAPI_HUGE_STR_LEN], directoryPathToSearch[PAPI_HUGE_STR_LEN];
+    void *so = NULL;
+    char *soNameFound;
+    int i, strLen;
+    for (i = 0; standardSubPaths[i] != NULL; i++) {
+        // Create path to search for dl names
+        int strLen = snprintf(directoryPathToSearch, PAPI_HUGE_STR_LEN, standardSubPaths[i], parentPath);
+        if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+            SUBDBG("Failed to fully write path to search for dlnames.\n");
+            return NULL;
+        }
+
+        DIR *dir = opendir(directoryPathToSearch);
+        if (dir == NULL) {
+            SUBDBG("Directory path could not be opened.\n");
+            continue;
+        }
+
+        int j;
+        for (j = 0; j < soNamesToSearchCount; j++) {
+            struct dirent *dirEntry;
+            while( ( dirEntry = readdir(dir) ) != NULL ) {
+                int result;
+                char *p = strstr(soNamesToSearchFor[j], "so");
+                // Check for an exact match of a shared object name (.so and .so.1 case)
+                if (p) {
+                    result = strcmp(dirEntry->d_name, soNamesToSearchFor[j]);
+                }
+                // Check for any match of a shared object name (we could not find .so and .so.1)
+                else {
+                    result = strncmp(dirEntry->d_name, soNamesToSearchFor[j], strlen(soNamesToSearchFor[j]));
+                }
+
+                if (result == 0) {
+                    soNameFound = dirEntry->d_name;
+                    goto found;
+                }
+            }
+            // Reset the position of the directory stream
+            rewinddir(dir);
+        }
+    }
+
+  exit:
+    return so;
+  found:
+    // Construct path to shared library
+    strLen = snprintf(pathToSharedLibrary, PAPI_HUGE_STR_LEN, "%s%s", directoryPathToSearch, soNameFound);
+    if (strLen < 0 || strLen >= PAPI_HUGE_STR_LEN) {
+        SUBDBG("Failed to fully write constructed path to shared library.\n");
+        return NULL;
+    }
+    so = dlopen(pathToSharedLibrary, RTLD_NOW | RTLD_GLOBAL);
+
+    goto exit;
+}
+
+/**@class nvml_search_and_load_from_system_paths
+ * @brief A simple wrapper to try and search and load
+ *        Cuda shared objects from system paths.
+ *
+ * @param *soNamesToSearchFor[]
+ *   Varying names of the shared object we want to search for.
+ * @param soNamesToSearchCount
+ *   Total number of names in soNamesToSearchFor.
+ */
+static void *nvml_search_and_load_from_system_paths(const char *soNamesToSearchFor[], int soNamesToSearchCount)
+{
+    void *so = NULL;
+    int i;
+    for (i = 0; i < soNamesToSearchCount; i++) {
+        so = dlopen(soNamesToSearchFor[i], RTLD_NOW | RTLD_GLOBAL);
+        if (so) {
+            return so; 
+        }   
+    }   
+
+    return so;
+}
+
 /*
  * Link the necessary CUDA libraries to use the NVML component.  If any of them can not be found, then
  * the NVML component will just be disabled.  This is done at runtime so that a version of PAPI built
@@ -1260,27 +1372,29 @@ linkCudaLibraries()
     // getenv returns NULL if environment variable is not found.
     char *cuda_root = getenv("PAPI_CUDA_ROOT");
 
-    // We need the NVML main library, normally libnvidia-ml.so. 
+    // We need the NVML main library, normally libnvidia-ml.so or libnvidia-ml.so.1.
     dl3 = NULL;                                                 // Ensure reset to NULL.
 
+    int soNamesToSearchCount = 3; 
+    const char *soNamesToSearchFor[] = {"libnvidia-ml.so", "libnvidia-ml.so.1", "libnvidia"};
     // Step 1: Process override if given.   
     if (strlen(nvml_main) > 0) {                                        // If override given, it MUST work.
-        dl3 = dlopen(nvml_main, RTLD_NOW | RTLD_GLOBAL);                // Try to open that path.
+        dl3 = nvml_search_and_load_shared_objects(nvml_main, NULL, soNamesToSearchFor, soNamesToSearchCount); // Try to open that path
         if (dl3 == NULL) {
             snprintf(_nvml_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "PAPI_NVML_MAIN override '%s' given in Rules.nvml not found.", nvml_main);
             return(PAPI_ENOSUPP);   // Override given but not found.
         }
     }
 
-    // Step 2: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths.
-    if (dl3 == NULL) {                                              // If no override,
-        dl3 = dlopen("libnvidia-ml.so", RTLD_NOW | RTLD_GLOBAL);    // Try system paths.
-    }
-
-    // Step 3: Try the explicit install default. 
+    char *soMainName = "libnvidia-ml";
+    // Step 2: Try the explicit install default. 
     if (dl3 == NULL && cuda_root != NULL) {                                         // If ROOT given, it doesn't HAVE to work.
-        snprintf(path_lib, 1024, "%s/lib64/libnvidia-ml.so", cuda_root);            // PAPI Root check.
-        dl3 = dlopen(path_lib, RTLD_NOW | RTLD_GLOBAL);                             // Try to open that path.
+        dl3 = nvml_search_and_load_shared_objects(cuda_root, soMainName, soNamesToSearchFor, soNamesToSearchCount); // Try to open that path.
+    } 
+
+    // Step 3: Try system paths, will work with Spack, LD_LIBRARY_PATH, default paths.
+    if (dl3 == NULL) {                                              // If no override,
+        dl3 = nvml_search_and_load_from_system_paths(soNamesToSearchFor, soNamesToSearchCount); // Try system paths.
     }
 
     // Check for failure.
@@ -1779,7 +1893,7 @@ papi_vector_t _nvml_vector = {
         .context = sizeof(nvml_context_t),
         .control_state = sizeof(nvml_control_state_t),
         .reg_value = sizeof(nvml_register_t),
-        // .reg_alloc = sizeof ( nvml_reg_alloc_t ),
+        .reg_alloc = 1, /* unused */
     },
 
     /* function pointers */
diff -pruN 7.2.0~b2-1/src/components/perf_event/perf_event.c 7.2.0-1/src/components/perf_event/perf_event.c
--- 7.2.0~b2-1/src/components/perf_event/perf_event.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/perf_event/perf_event.c	2025-06-25 22:38:10.000000000 +0000
@@ -2475,10 +2475,12 @@ _pe_handle_paranoid(papi_vector_t *compo
 	if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
 	fclose(fff);
 
-	if (paranoid_level==3) {
-		strCpy=strncpy(component->cmp_info.disabled_reason,
-			"perf_event support disabled by Linux with paranoid=3",PAPI_MAX_STR_LEN);
-      if (strCpy == NULL) HANDLE_STRING_ERROR;
+	if (paranoid_level >= 3) {
+		int strLen = snprintf(component->cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "perf_event support disabled by Linux with paranoid=%d", paranoid_level);
+		if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+			SUBDBG("Failed to fully write disabled reason due to paranoid level.\n");
+			return PAPI_EBUF;
+		}
 		return PAPI_ECMP;
 	}
 
diff -pruN 7.2.0~b2-1/src/components/perf_event_uncore/perf_event_uncore.c 7.2.0-1/src/components/perf_event_uncore/perf_event_uncore.c
--- 7.2.0~b2-1/src/components/perf_event_uncore/perf_event_uncore.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/perf_event_uncore/perf_event_uncore.c	2025-06-25 22:38:10.000000000 +0000
@@ -22,6 +22,7 @@
 #include <sys/utsname.h>
 #include <sys/mman.h>
 #include <sys/ioctl.h>
+#include <linux/capability.h>
 
 /* PAPI-specific includes */
 #include "papi.h"
@@ -114,14 +115,18 @@ get_read_format( unsigned int multiplex,
 /* In case headers aren't new enough to have __NR_perf_event_open */
 #ifndef __NR_perf_event_open
 
-#ifdef __powerpc__
+#if defined(__powerpc__)
 #define __NR_perf_event_open	319
+#define __NR_capget				183
 #elif defined(__x86_64__)
 #define __NR_perf_event_open	298
+#define __NR_capget				125
 #elif defined(__i386__)
 #define __NR_perf_event_open	336
-#elif defined(__arm__)          366+0x900000
-#define __NR_perf_event_open
+#define __NR_capget				184
+#elif defined(__arm__)
+#define __NR_perf_event_open	364+0x900000
+#define __NR_capget				184+0x900000
 #endif
 
 #endif
@@ -599,6 +604,10 @@ _peu_init_component( int cidx )
    FILE *fff;
    char *strCpy;
 
+   struct __user_cap_header_struct cap_header;
+   struct __user_cap_data_struct cap_data[2];
+   int perfmon_capabilities;
+
    our_cidx=cidx;
 
    /* The is the official way to detect if perf_event support exists */
@@ -617,6 +626,29 @@ _peu_init_component( int cidx )
    if (retval!=1) fprintf(stderr,"Error reading paranoid level\n");
    fclose(fff);
 
+   /* Check for availability of perf_event through capabilities */
+
+   memset( &cap_header, 0, sizeof(cap_header) );
+   memset( cap_data, 0, sizeof(cap_data) );
+   cap_header.version = _LINUX_CAPABILITY_VERSION_3;
+   cap_header.pid = 0;
+   retval = syscall(__NR_capget, &cap_header, &cap_data);
+
+   if (retval < 0) {
+     strCpy=strncpy( _papi_hwd[cidx]->cmp_info.disabled_reason,
+	     "Error querying Linux capabilities",PAPI_MAX_STR_LEN );
+     _peu_shutdown_component( );
+     if (strCpy == NULL) HANDLE_STRING_ERROR;
+     retval = PAPI_ECMP;
+     goto fn_fail;
+   }
+
+   #ifdef CAP_PERFMON
+      perfmon_capabilities = (cap_data[0].permitted & (1 << CAP_SYS_ADMIN)) ||
+                             (cap_data[1].permitted & (1 << (CAP_PERFMON - 32)));
+   #else
+      perfmon_capabilities = cap_data[0].permitted & (1 << CAP_SYS_ADMIN);
+   #endif
 
    /* Run the libpfm4-specific setup */
 
@@ -663,9 +695,9 @@ _peu_init_component( int cidx )
    /* 0 means you can access CPU-specific data */
    /* -1 means no restrictions                 */
 
-   if ((paranoid_level>0) && (getuid()!=0)) {
+   if ((paranoid_level>0) && (!perfmon_capabilities)) {
       strCpy=strncpy(_papi_hwd[cidx]->cmp_info.disabled_reason,
-	    "Insufficient permissions for uncore access.  Set /proc/sys/kernel/perf_event_paranoid to 0 or run as root.",
+	    "Insufficient permissions for uncore access.  Set /proc/sys/kernel/perf_event_paranoid to 0, run as root or get CAP_PERFMON.",
 	    PAPI_MAX_STR_LEN);
       _peu_shutdown_component( );
      if (strCpy == NULL) HANDLE_STRING_ERROR;
diff -pruN 7.2.0~b2-1/src/components/rapl/linux-rapl.c 7.2.0-1/src/components/rapl/linux-rapl.c
--- 7.2.0~b2-1/src/components/rapl/linux-rapl.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rapl/linux-rapl.c	2025-06-25 22:38:10.000000000 +0000
@@ -445,6 +445,7 @@ _rapl_init_component( int cidx )
 		case 94:	/* Skylake Desktop (H/S) */
 		case 142:	/* Kabylake Mobile */
 		case 158:	/* Kabylake Desktop */
+		case 165:	/* Comet Lake S/H */
 			package_avail=1;
 			pp0_avail=1;
 			pp1_avail=0;
@@ -476,7 +477,16 @@ _rapl_init_component( int cidx )
 			psys_avail=0;
 			different_units=1;
 			break;
-
+			
+		case 143:      /* Sapphire Rapids-SP */
+		case 207:      /* Emerald Rapids */
+		        package_avail=1;
+			pp0_avail=0;
+			pp1_avail=0;
+			dram_avail=1;
+			psys_avail=0;
+			different_units=0;
+			break;
 
 		case 87:	/* Knights Landing (KNL) */
 		case 133:	/* Knights Mill (KNM) */
diff -pruN 7.2.0~b2-1/src/components/rocm/README.md 7.2.0-1/src/components/rocm/README.md
--- 7.2.0~b2-1/src/components/rocm/README.md	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/README.md	2025-06-25 22:38:10.000000000 +0000
@@ -75,6 +75,11 @@ setting the ROCP\_TOOL\_LIB to the PAPI
 
 ## Known Limitations
 
+* The `rocm` component is deprecated starting at the AMD Instinct MI300A and will continue to be for any future AMD device releases.
+  Please instead use the [`rocp_sdk`](https://github.com/icl-utk-edu/papi/blob/master/src/components/rocp_sdk/README.md) component.
+
+* For AMD devices older than the AMD Instinct MI300A, PAPI should not be configured with both `rocm` and `rocp_sdk`.
+
 * PAPI may read zeros for many events if rocprofiler environment variables are
   not exported and HIP functions are executed by the user before the user
   executes PAPI\_library\_init().
diff -pruN 7.2.0~b2-1/src/components/rocm/htable.h 7.2.0-1/src/components/rocm/htable.h
--- 7.2.0~b2-1/src/components/rocm/htable.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/htable.h	2025-06-25 22:38:10.000000000 +0000
@@ -325,7 +325,7 @@ create_table_entry(const char *key, void
     if (*entry == NULL) {
         return HTABLE_ENOMEM;
     }
-    (*entry)->key = strdup(key);
+    (*entry)->key = papi_strdup(key);
     (*entry)->val = val;
     (*entry)->next = NULL;
 
diff -pruN 7.2.0~b2-1/src/components/rocm/roc_common.c 7.2.0-1/src/components/rocm/roc_common.c
--- 7.2.0~b2-1/src/components/rocm/roc_common.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/roc_common.c	2025-06-25 22:38:10.000000000 +0000
@@ -35,6 +35,11 @@ rocc_init(void)
 
     hsa_status_t status = hsa_init_p();
     if (status != HSA_STATUS_SUCCESS) {
+        int errMsgLen = snprintf(error_string, PAPI_MAX_STR_LEN, "%s",
+            "Call to hsa_init() failed. This could be due to ROCm devices failing to be detected.");
+        if( errMsgLen < 0 || errMsgLen >= PAPI_MAX_STR_LEN ) {
+            SUBDBG("error_string was truncated.\n");
+        }
         papi_errno = PAPI_EMISC;
         goto fn_fail;
     }
diff -pruN 7.2.0~b2-1/src/components/rocm/roc_profiler.c 7.2.0-1/src/components/rocm/roc_profiler.c
--- 7.2.0~b2-1/src/components/rocm/roc_profiler.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/roc_profiler.c	2025-06-25 22:38:10.000000000 +0000
@@ -140,6 +140,8 @@ static int evt_id_to_info(uint64_t event
 static int evt_name_to_device(const char *name, int *device);
 static int evt_name_to_instance(const char *name, int *instance);
 static int evt_name_to_basename(const char *name, char *base, int len);
+static int rocm_verify_no_repeated_qualifiers(const char *eventName);
+static int rocm_verify_qualifiers(int flag, char *qualifierName, int equalitySignPosition, int *qualifierValue);
 
 static void *rocp_dlp = NULL;
 static ntv_event_table_t ntv_table;
@@ -222,7 +224,7 @@ rocp_evt_enum(uint64_t *event_code, int
                 papi_errno = evt_id_create(&info, event_code);
                 break;
             }
-            papi_errno = PAPI_END;
+            papi_errno = PAPI_ENOEVNT;
             break;
         case PAPI_NTV_ENUM_UMASKS:
             papi_errno = evt_id_to_info(*event_code, &info);
@@ -245,7 +247,7 @@ rocp_evt_enum(uint64_t *event_code, int
                     break;
                 }
             }
-            papi_errno = PAPI_END;
+            papi_errno = PAPI_ENOEVNT;
             break;
         default:
             papi_errno = PAPI_EINVAL;
@@ -279,6 +281,11 @@ rocp_evt_name_to_code(const char *name,
     int htable_errno;
     SUBDBG("ENTER: name: %s, event_code: %p\n", name, event_code);
 
+    papi_errno = rocm_verify_no_repeated_qualifiers(name);
+    if (papi_errno != PAPI_OK) {
+        goto fn_exit;
+    }
+
     int device;
     papi_errno = evt_name_to_device(name, &device);
     if (papi_errno != PAPI_OK) {
@@ -805,14 +812,123 @@ evt_id_to_info(uint64_t event_id, event_
     return PAPI_OK;
 }
 
+/** @class rocm_verify_no_repeated_qualifiers
+  * @brief Verify that a user has not added multiple device or instance qualifiers
+  *        to an event name.
+  *
+  * @param *eventName
+  *   User provided event name we need to verify.
+*/
+int
+rocm_verify_no_repeated_qualifiers(const char *eventName)
+{
+    int numDeviceQualifiers = 0, numStatsQualifiers = 0;
+    char tmpEventName[PAPI_2MAX_STR_LEN];
+    int strLen = snprintf(tmpEventName, PAPI_2MAX_STR_LEN, "%s", eventName);
+    if (strLen < 0 || strLen >= PAPI_2MAX_STR_LEN) {
+        SUBDBG("Failed to fully write eventName into tmpEventName.\n");
+        return PAPI_EBUF;
+    }
+    char *token = strtok(tmpEventName, ":");
+    while(token != NULL) {
+        if (strncmp(token, "device", 6) == 0) {
+            numDeviceQualifiers++;
+        }
+        else if (strncmp(token, "instance", 8) == 0){
+            numStatsQualifiers++;
+        }
+
+        token = strtok(NULL, ":");
+    }
+
+    if (numDeviceQualifiers > 1 || numStatsQualifiers > 1) {
+        SUBDBG("Provided Cuda event has multiple device or stats qualifiers appended.\n");
+        return PAPI_ENOEVNT;
+    }
+
+    return PAPI_OK;
+}
+
+
+/** @class rocm_verify_qualifiers
+  * @brief Verify that the device and/or instance qualifier provided by the user
+  *        is valid. E.g. :device=# or :instance=#.
+  *
+  * @param flag
+  *   Device or instance flag define. Allows us to determine the case to enter for
+  *   the switch statement.
+  * @param *qualifierName
+  *   Name of the qualifier we need to verify. E.g. :device or :instance.
+  * @param equalitySignPosition
+  *   Position of where the equal sign is located in the qualifier string name.
+  * @param *qualifierValue
+  *   Upon verifying the provided qualifier is valid. Store either a device index
+  *   or a instance value.
+*/
+int
+rocm_verify_qualifiers(int flag, char *qualifierName, int equalitySignPosition, int *qualifierValue)
+{
+    int pos = equalitySignPosition;
+    // Verify that an equal sign was provided where it was suppose to be
+    if (qualifierName[pos] != '=') {
+        SUBDBG("Improper qualifier name. No equal sign found.\n");
+        return PAPI_ENOEVNT;
+    }
+
+    // Verify that the next character after the equal sign is indeed a digit
+    pos++;
+    int isDigit = (unsigned) qualifierName[pos] - '0' < 10;
+    if (!isDigit) {
+        SUBDBG("Improper qualifier name: %s. Digit does not follow equal sign.\n", qualifierName);
+        return PAPI_ENOEVNT;
+    }
+
+    // Get the qualifier value and make sure only qualifiers have been appended
+    char *endPtr;
+    switch(flag)
+    {
+        case INSTAN_FLAG:
+        {
+            *qualifierValue = (int) strtol(qualifierName + strlen(":instance="), &endPtr, 10);
+            if (*endPtr != '\0') {
+                if (strncmp(endPtr, ":device", 7) != 0) {
+                    return PAPI_ENOEVNT;
+                }
+            }
+
+            return PAPI_OK;
+        }
+        case DEVICE_FLAG:
+        {
+            *qualifierValue = (int) strtol(qualifierName + strlen(":device="), &endPtr, 10);
+            if (*endPtr != '\0') {
+                if (strncmp(endPtr, ":instance", 9) != 0) {
+                    return PAPI_ENOEVNT;
+                }
+            }
+
+            return PAPI_OK;
+        }
+        default:
+            SUBDBG("Flag provided is not accounted for in switch statement.\n");
+            return PAPI_EINVAL;
+    }
+}
+
 int
 evt_name_to_device(const char *name, int *device)
 {
-    char *p = strstr(name, ":device=");
+    char *p = strstr(name, ":device");
     if (!p) {
         return PAPI_ENOEVNT;
     }
-    *device = (int) strtol(p + strlen(":device="), NULL, 10);
+
+    int equalitySignPos = 7;
+    int papi_errno = rocm_verify_qualifiers(DEVICE_FLAG, p, equalitySignPos, device);
+    if (papi_errno != PAPI_OK) {
+        return papi_errno;
+    }
+
     return PAPI_OK;
 }
 
@@ -832,12 +948,17 @@ evt_name_to_instance(const char *name, i
         return PAPI_ENOEVNT;
     }
 
-    char *p = strstr(name, ":instance=");
+    char *p = strstr(name, ":instance");
     if (event->instances > 1) {
         if (!p) {
             return PAPI_ENOEVNT;
         }
-        *instance = (int) strtol(p + strlen(":instance="), NULL, 10);
+
+        int equalitySignPos = 9;
+        papi_errno = rocm_verify_qualifiers(INSTAN_FLAG, p, equalitySignPos, instance);
+        if (papi_errno != PAPI_OK) {
+            return papi_errno;
+        }
     } else {
         if (p) {
             return PAPI_ENOEVNT;
diff -pruN 7.2.0~b2-1/src/components/rocm/rocm.c 7.2.0-1/src/components/rocm/rocm.c
--- 7.2.0~b2-1/src/components/rocm/rocm.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/rocm.c	2025-06-25 22:38:10.000000000 +0000
@@ -221,16 +221,22 @@ rocm_init_private(void)
     papi_errno = evt_get_count(&count);
     _rocm_vector.cmp_info.num_native_events = count;
     _rocm_vector.cmp_info.num_cntrs = count;
+    _rocm_vector.cmp_info.initialized = 1;
+    int strLen = snprintf(_rocm_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN, "%s", "");
+    if (strLen < 0 || strLen >= PAPI_MAX_STR_LEN) {
+        SUBDBG("Failed to fully write disabled_reason.\n");
+    }
+
+    goto fn_exit;
+
+  fn_fail:
+    _rocm_vector.cmp_info.initialized = 0;
 
   fn_exit:
-    _rocm_vector.cmp_info.initialized = 1;
     _rocm_vector.cmp_info.disabled = papi_errno;
-    strcpy(_rocm_vector.cmp_info.disabled_reason, "");
     SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
     _papi_hwi_unlock(COMPONENT_LOCK);
     return papi_errno;
-  fn_fail:
-    goto fn_exit;
 }
 
 int
@@ -255,7 +261,7 @@ rocm_shutdown_component(void)
     }
 
   fn_exit:
-    SUBDBG("EXIT: %s\n", PAPI_strerror(papi_errno));
+    SUBDBG("EXIT\n");
     return papi_errno;
   fn_fail:
     _rocm_vector.cmp_info.initialized = orig_state;
@@ -397,14 +403,19 @@ update_native_events(rocm_control_t *ctl
     struct event_map_item sorted_events[PAPI_ROCM_MAX_COUNTERS];
 
     if (ntv_count != ctl->num_events) {
-        ctl->events_id = papi_realloc(ctl->events_id,
-                                      ntv_count * sizeof(*ctl->events_id));
-        if (ctl->events_id == NULL) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_fail;
-        }
-
         ctl->num_events = ntv_count;
+        if (ntv_count == 0) {
+            papi_free(ctl->events_id);
+            ctl->events_id = NULL;
+            goto fn_exit;
+        }
+        else {
+            ctl->events_id = papi_realloc(ctl->events_id, ntv_count * sizeof(*ctl->events_id));
+            if (ctl->events_id == NULL) {
+                papi_errno = PAPI_ENOMEM;
+                goto fn_fail;
+            }
+        }
     }
 
     int i;
diff -pruN 7.2.0~b2-1/src/components/rocm/tests/Makefile 7.2.0-1/src/components/rocm/tests/Makefile
--- 7.2.0~b2-1/src/components/rocm/tests/Makefile	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/tests/Makefile	2025-06-25 22:38:10.000000000 +0000
@@ -2,7 +2,7 @@ NAME = rocm
 include ../../Makefile_comp_tests.target
 PAPI_ROCM_ROOT ?= /opt/rocm
 
-HIPCC    = $(shell find $(PAPI_ROCM_ROOT) -iname hipcc | grep bin | head -n 1)
+HIPCC    ?= $(shell find $(PAPI_ROCM_ROOT) -iname hipcc | grep bin | head -n 1)
 CC       = $(HIPCC)
 CXX      = $(HIPCC)
 CPPFLAGS+= -I$(PAPI_ROCM_ROOT)/include          \
diff -pruN 7.2.0~b2-1/src/components/rocm/tests/multi_thread_monitoring.cpp 7.2.0-1/src/components/rocm/tests/multi_thread_monitoring.cpp
--- 7.2.0~b2-1/src/components/rocm/tests/multi_thread_monitoring.cpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/tests/multi_thread_monitoring.cpp	2025-06-25 22:38:10.000000000 +0000
@@ -15,13 +15,34 @@
 #include <pthread.h>
 #include "common.h"
 
+#define PASS     0x0
+#define PASSWARN 0x1
+#define FAIL     0x2
+#define HIPFAIL  0x4
+
+int *testLINE = NULL;
+int *status = NULL;
+char **errMSG = NULL;
+int *papi_errno = NULL;
+hipError_t *hip_errno = NULL;
+
 int quiet;
 
+static void log_error(int testLINE_arg, int status_arg, const char *errMSG_arg, int tid) {
+
+    testLINE[tid] = testLINE_arg;
+    status[tid] = status_arg;
+
+    int ret = snprintf(errMSG[tid], PAPI_MAX_STR_LEN, "%s", errMSG_arg);
+    if ( ret < 0 || ret >= PAPI_MAX_STR_LEN ) {
+        fprintf(stdout, "[%s, %d] WARNING: Could not copy string %s into buffer.\n", __FILE__, __LINE__, errMSG_arg);
+    }
+
+    return;
+}
+
 static void *run(void *thread_num_arg)
 {
-    int papi_errno;
-    int pass_with_warning = 0;
-    hipError_t hip_errno;
     int j;
 
 #define NUM_EVENTS 4
@@ -35,36 +56,47 @@ static void *run(void *thread_num_arg)
     int eventset = PAPI_NULL;
     int thread_num = *(int *) thread_num_arg;
 
-    papi_errno = PAPI_create_eventset(&eventset);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_create_eventset", papi_errno);
+    /* Initialize global variables. */
+    testLINE[thread_num] = 0;
+    status[thread_num] = PASS;
+    papi_errno[thread_num] = PAPI_OK;
+    hip_errno[thread_num] = hipSuccess;
+
+    papi_errno[thread_num] = PAPI_create_eventset(&eventset);
+    if (papi_errno[thread_num] != PAPI_OK) {
+        log_error(__LINE__, FAIL, "PAPI_create_eventset", thread_num);
+        pthread_exit(NULL);
     }
 
     for (int j = 0; j < NUM_EVENTS; ++j) {
         char named_event[PAPI_MAX_STR_LEN];
         sprintf(named_event, "%s:device=%d", events[j], thread_num);
-        papi_errno = PAPI_add_named_event(eventset, (const char*) named_event);
-        if (papi_errno != PAPI_OK && papi_errno != PAPI_ENOEVNT) {
-            test_fail(__FILE__, __LINE__, "PAPI_add_named_event", papi_errno);
-        } else if (papi_errno == PAPI_ENOEVNT) {
-            pass_with_warning = 1;
+        papi_errno[thread_num] = PAPI_add_named_event(eventset, (const char*) named_event);
+        if (papi_errno[thread_num] != PAPI_OK && papi_errno[thread_num] != PAPI_ENOEVNT) {
+            log_error(__LINE__, FAIL, "PAPI_add_named_event", thread_num);
+            pthread_exit(NULL);
+        } else if (papi_errno[thread_num] == PAPI_ENOEVNT) {
+            status[thread_num] = PASSWARN;
         }
     }
 
-    papi_errno = PAPI_start(eventset);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_start", papi_errno);
+    papi_errno[thread_num] = PAPI_start(eventset);
+    if (papi_errno[thread_num] != PAPI_OK) {
+        log_error(__LINE__, FAIL, "PAPI_start", thread_num);
+        pthread_exit(NULL);
     }
 
     hipStream_t stream;
-    hip_errno = hipSetDevice(thread_num);
-    if (hip_errno != hipSuccess) {
-        hip_test_fail(__FILE__, __LINE__, "hipSetDevice", hip_errno);
+    hip_errno[thread_num] = hipSetDevice(thread_num);
+    if (hip_errno[thread_num] != hipSuccess) {
+        log_error(__LINE__, HIPFAIL, "hipSetDevice", thread_num);
+        pthread_exit(NULL);
     }
 
-    hip_errno = hipStreamCreate(&stream);
-    if (hip_errno != hipSuccess) {
-        hip_test_fail(__FILE__, __LINE__, "hipStreamCreate", hip_errno);
+    hip_errno[thread_num] = hipStreamCreate(&stream);
+    if (hip_errno[thread_num] != hipSuccess) {
+        log_error(__LINE__, HIPFAIL, "hipStreamCreate", thread_num);
+        pthread_exit(NULL);
     }
 
     void *handle;
@@ -72,22 +104,25 @@ static void *run(void *thread_num_arg)
 
     hip_do_matmul_work(handle, stream);
 
-    hip_errno = hipStreamSynchronize(stream);
-    if (hip_errno != hipSuccess) {
-        hip_test_fail(__FILE__, __LINE__, "hipStreamSynchronize", hip_errno);
+    hip_errno[thread_num] = hipStreamSynchronize(stream);
+    if (hip_errno[thread_num] != hipSuccess) {
+        log_error(__LINE__, HIPFAIL, "hipStreamSynchronize", thread_num);
+        pthread_exit(NULL);
     }
 
-    hip_errno = hipStreamDestroy(stream);
-    if (hip_errno != hipSuccess) {
-        hip_test_fail(__FILE__, __LINE__, "hipStreamDestroy", hip_errno);
+    hip_errno[thread_num] = hipStreamDestroy(stream);
+    if (hip_errno[thread_num] != hipSuccess) {
+        log_error(__LINE__, HIPFAIL, "hipStreamDestroy", thread_num);
+        pthread_exit(NULL);
     }
 
     hip_do_matmul_cleanup(&handle);
 
     long long counters[NUM_EVENTS] = { 0 };
-    papi_errno = PAPI_stop(eventset, counters);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_stop", papi_errno);
+    papi_errno[thread_num] = PAPI_stop(eventset, counters);
+    if (papi_errno[thread_num] != PAPI_OK) {
+        log_error(__LINE__, FAIL, "PAPI_stop", thread_num);
+        pthread_exit(NULL);
     }
 
     for (int i = 0; i < NUM_EVENTS; ++i) {
@@ -98,31 +133,38 @@ static void *run(void *thread_num_arg)
         }
     }
 
-    papi_errno = PAPI_cleanup_eventset(eventset);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_cleanup_eventset" , papi_errno);
+    papi_errno[thread_num] = PAPI_cleanup_eventset(eventset);
+    if (papi_errno[thread_num] != PAPI_OK) {
+        log_error(__LINE__, FAIL, "PAPI_cleanup_eventset", thread_num);
+        pthread_exit(NULL);
     }
 
-    papi_errno = PAPI_destroy_eventset(&eventset);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_destroy_eventset", papi_errno);
+    papi_errno[thread_num] = PAPI_destroy_eventset(&eventset);
+    if (papi_errno[thread_num] != PAPI_OK) {
+        log_error(__LINE__, FAIL, "PAPI_destroy_eventset", thread_num);
+        pthread_exit(NULL);
     }
 
     /* Query only device 0 and assume all devices are identical */
     int warp_size;
-    hip_errno = hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0);
-    if (hip_errno != hipSuccess) {
-        test_fail(__FILE__, __LINE__, "hipDeviceGetAttribute", hip_errno);
+    hip_errno[thread_num] = hipDeviceGetAttribute(&warp_size, hipDeviceAttributeWarpSize, 0);
+    if (hip_errno[thread_num] != hipSuccess) {
+        log_error(__LINE__, HIPFAIL, "hipDeviceGetAttribute", thread_num);
+        pthread_exit(NULL);
     }
 
     /* compute expected number of waves need to multiply two square matrices of ROWS x COLS elements */
     long long expected_waves = (long long) ((ROWS * COLS) / warp_size);
 
     if (match_expected_counter(expected_waves, counters[2] - counters[3]) != 1) {
-        if (pass_with_warning) {
-            test_warn(__FILE__, __LINE__, "match_expected_counter", 1);
+        if (PASSWARN == status[thread_num]) {
+            papi_errno[thread_num] = 1;
+            log_error(__LINE__, PASSWARN, "match_expected_counter", thread_num);
+            pthread_exit(NULL);
         } else {
-            test_fail(__FILE__, __LINE__, "match_expected_counter", -1);
+            papi_errno[thread_num] = -1;
+            log_error(__LINE__, FAIL, "match_expected_counter", thread_num);
+            pthread_exit(NULL);
         }
     }
 
@@ -131,9 +173,9 @@ static void *run(void *thread_num_arg)
 
 int multi_thread(int argc, char *argv[])
 {
-    int papi_errno;
+    int papi_errno_main;
     int retcode;
-    hipError_t hip_errno;
+    hipError_t hip_errno_main;
     quiet = tests_quiet(argc, argv);
 
     if (!quiet) {
@@ -141,14 +183,14 @@ int multi_thread(int argc, char *argv[])
                 argv[0]);
     }
 
-    papi_errno = PAPI_library_init(PAPI_VER_CURRENT);
-    if (papi_errno != PAPI_VER_CURRENT) {
-        test_fail(__FILE__, __LINE__, "PAPI_library_init", papi_errno);
+    papi_errno_main = PAPI_library_init(PAPI_VER_CURRENT);
+    if (papi_errno_main != PAPI_VER_CURRENT) {
+        test_fail(__FILE__, __LINE__, "PAPI_library_init", papi_errno_main);
     }
 
-    papi_errno = PAPI_thread_init((unsigned long (*)(void)) pthread_self);
-    if (papi_errno != PAPI_OK) {
-        test_fail(__FILE__, __LINE__, "PAPI_thread_init", papi_errno);
+    papi_errno_main = PAPI_thread_init((unsigned long (*)(void)) pthread_self);
+    if (papi_errno_main != PAPI_OK) {
+        test_fail(__FILE__, __LINE__, "PAPI_thread_init", papi_errno_main);
     }
 
     int dev_count;
@@ -156,9 +198,9 @@ int multi_thread(int argc, char *argv[])
      * too (by calling hsa_init()). If hsa is already initialized
      * this will result in the increment of an internal reference
      * counter and won't alter the current configuration. */
-    hip_errno = hipGetDeviceCount(&dev_count);
-    if (hip_errno != hipSuccess) {
-        hip_test_fail(__FILE__, __LINE__, "hipGetDeviceCount", hip_errno);
+    hip_errno_main = hipGetDeviceCount(&dev_count);
+    if (hip_errno_main != hipSuccess) {
+        hip_test_fail(__FILE__, __LINE__, "hipGetDeviceCount", hip_errno_main);
     }
 
     pthread_t *thread = (pthread_t *) malloc(dev_count * sizeof(*thread));
@@ -171,6 +213,38 @@ int multi_thread(int argc, char *argv[])
         test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
     }
 
+    /* Allocate memory for global variables. */
+    testLINE   = (int*)malloc(dev_count*sizeof(int));
+    if (NULL == testLINE) {
+        test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+    }
+
+    status     = (int*)malloc(dev_count*sizeof(int));
+    if (NULL == status) {
+        test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+    }
+
+    errMSG   = (char**)malloc(dev_count*sizeof(char*));
+    if (NULL == errMSG) {
+        test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+    }
+    for (int i = 0; i < dev_count; ++i) {
+        errMSG[i] = (char*)malloc(PAPI_MAX_STR_LEN*sizeof(char));
+        if (NULL == errMSG[i]) {
+            test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+        }
+    }
+
+    papi_errno = (int*)malloc(dev_count*sizeof(int));
+    if (NULL == papi_errno) {
+        test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+    }
+
+    hip_errno  = (hipError_t*)malloc(dev_count*sizeof(hipError_t));
+    if (NULL == hip_errno) {
+        test_fail(__FILE__, __LINE__, "malloc", PAPI_ENOMEM);
+    }
+
     pthread_attr_t attr;
     pthread_attr_init(&attr);
     pthread_attr_setdetachstate(&attr, PTHREAD_CREATE_JOINABLE);
@@ -184,6 +258,39 @@ int multi_thread(int argc, char *argv[])
         pthread_join(thread[i], NULL);
     }
 
+    int status_main = PASS;
+    int tid;
+    for ( tid = 0; tid < dev_count; ++tid) {
+        if (PASS != status[tid]) {
+            status_main = status[tid];
+            break;
+        }
+    }
+
+    switch ( status_main ) {
+        case PASSWARN:
+            test_warn(__FILE__, testLINE[tid], errMSG[tid], papi_errno[tid]);
+            break;
+        case FAIL:
+            test_fail(__FILE__, testLINE[tid], errMSG[tid], papi_errno[tid]);
+            break;
+        case HIPFAIL:
+            hip_test_fail(__FILE__, testLINE[tid], errMSG[tid], hip_errno[tid]);
+            break;
+        default: // PASS
+            break;
+    }
+
+    /* Free dynamically allocated memory. */
+    free(testLINE);
+    free(status);
+    free(papi_errno);
+    free(hip_errno);
+    for (int i = 0; i < dev_count; ++i) {
+        free(errMSG[i]);
+    }
+    free(errMSG);
+
     free(thread);
     free(thread_num);
 
diff -pruN 7.2.0~b2-1/src/components/rocm/tests/sample_overflow_monitoring.cpp 7.2.0-1/src/components/rocm/tests/sample_overflow_monitoring.cpp
--- 7.2.0~b2-1/src/components/rocm/tests/sample_overflow_monitoring.cpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm/tests/sample_overflow_monitoring.cpp	2025-06-25 22:38:10.000000000 +0000
@@ -30,6 +30,9 @@ int main(int argc, char **argv)
     long long counter_values[1] = { 0 };
     quiet = tests_quiet(argc, argv);
 
+    fprintf(stdout, "The rocm component does not support overflow monitoring as of now. This will be added in a future release.\n");
+    test_skip(__FILE__, __LINE__,"", papi_errno);
+
     setenv("ROCP_HSA_INTERCEPT", "0", 1);
 
     setup_PAPI(&event_set, EV_THRESHOLD);
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/README.md 7.2.0-1/src/components/rocm_smi/README.md
--- 7.2.0~b2-1/src/components/rocm_smi/README.md	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/README.md	2025-06-25 22:38:10.000000000 +0000
@@ -8,7 +8,9 @@ temperature readings; it also allows cap
 * [Environment Variables](#environment-variables)
 * [Known Limitations](#known-limitations)
 * [FAQ](#faq)
+
 ***
+
 ## Enabling the ROCM_SMI Component
 
 To enable reading or writing of ROCM_SMI counters the user needs to link
@@ -16,22 +18,28 @@ against a PAPI library that was configur
 As an example the following command: `./configure --with-components="rocm_smi"`
 is sufficient to enable the component.
 
-Typically, the utility `papi_components_avail` (available in `papi/src/utils/papi_components_avail`) will display the components available to the user, and whether they are disabled, and when they are disabled why.
+Typically, the utility `papi_component_avail` (available in `papi/src/utils/papi_component_avail`) will display the components available to the user, and whether they are disabled, and when they are disabled why.
 
 ## Environment Variables
 
-For ROCM_SMI, PAPI requires one environment variable: `PAPI_ROCMSMI_ROOT`. Note
-in most installations, this is a subdirectory under the ROCM directory. This is 
-required at both compile and run time.
+For ROCM_SMI, PAPI requires the environment variable `PAPI_ROCMSMI_ROOT` to be set such that the shared object `librocm_smi64.so` and the directory `rocm_smi` are found. This variable is required at both compile and run time.
+
+There are two common cases for setting this variable:
+
+1. **Case 1: For ROCm versions 5.2 and newer:**
+    Set `PAPI_ROCMSMI_ROOT` to the top-level ROCM directory, e.g.:
 
-Example:
+        export PAPI_ROCMSMI_ROOT=/opt/rocm
 
-    export PAPI_ROCMSMI_ROOT=/opt/rocm/rocm_smi
+2. **Case 2: For ROCm versions prior to 5.2:**
+    Set `PAPI_ROCMSMI_ROOT` directly to the ROCM_SMI directory, e.g.:
 
-Within PAPI_ROCMSMI_ROOT, we expect the following standard directories:
+        export PAPI_ROCMSMI_ROOT=/opt/rocm/rocm_smi
 
-    PAPI_ROCMSMI_ROOT/lib
-    PAPI_ROCMSMI_ROOT/include/rocm_smi
+In both cases, the directory specified by `PAPI_ROCMSMI_ROOT` **must contain** the following subdirectories:
+
+* `PAPI_ROCMSMI_ROOT/lib` (which should include the dynamic library `librocm_smi64.so`)
+* `PAPI_ROCMSMI_ROOT/include/rocm_smi`
 
 ## Known Limitations
 
@@ -39,22 +47,23 @@ Within PAPI_ROCMSMI_ROOT, we expect the
 
 * Although AMD metrics may be floating point, all values are recast and returned as long long integers.
 
-    The binary image of a `double` is intact; but users must recast to `double` for display purposes.
+  The binary image of a `double` is intact; but users must recast to `double` for display purposes.
 
 ***
+
 ## FAQ
 
 1. [Unusual installations](#unusual-installations)
 
 ## Unusual installations
+
 For the ROCM_SMI component to be operational, it must find the dynamic
-library `librocm_smi64.so`. This is normally
-found in the above standard lib directory, or one of the Linux default
+library `librocm_smi64.so`. This is normally found in the above standard lib directory, or one of the Linux default
 directories listed by `/etc/ld.so.conf`, usually `/usr/lib64`, `/lib64`,
 `/usr/lib` and `/lib`. If the library is not found (or is not functional)
 then the component will be listed as "disabled" with a reason explaining the
-problem. If library was not found, it is not in the expected places. 
+problem. If the library was not found, it is not in the expected places.
 
-The system will search the directories listed in **LD\_LIBRARY\_PATH**. You can add an additional path with a colon e.g. 
+The system will search the directories listed in `LD_LIBRARY_PATH`. You can add an additional path with a colon, e.g.:
 
-    export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/WhereALibraryCanBeFound
+    export LD_LIBRARY_PATH=/WhereALibraryCanBeFound:$LD_LIBRARY_PATH
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/Rules.rocm_smi 7.2.0-1/src/components/rocm_smi/Rules.rocm_smi
--- 7.2.0~b2-1/src/components/rocm_smi/Rules.rocm_smi	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/Rules.rocm_smi	2025-06-25 22:38:10.000000000 +0000
@@ -87,6 +87,7 @@ COMPOBJS += linux-rocm-smi.o \
 CFLAGS += -I$(PAPI_ROCMSMI_ROOT)/../include/rocm_smi
 CFLAGS += -I$(PAPI_ROCMSMI_ROOT)/../include
 CFLAGS += -I$(PAPI_ROCMSMI_ROOT)/include/rocm_smi
+CFLAGS += -I$(PAPI_ROCMSMI_ROOT)/include
 CFLAGS += -g
 LDFLAGS += $(LDL) -g
 
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/htable.h 7.2.0-1/src/components/rocm_smi/htable.h
--- 7.2.0~b2-1/src/components/rocm_smi/htable.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/htable.h	2025-06-25 22:38:10.000000000 +0000
@@ -325,7 +325,7 @@ create_table_entry(const char *key, void
     if (*entry == NULL) {
         return HTABLE_ENOMEM;
     }
-    (*entry)->key = strdup(key);
+    (*entry)->key = papi_strdup(key);
     (*entry)->val = val;
     (*entry)->next = NULL;
 
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/linux-rocm-smi.c 7.2.0-1/src/components/rocm_smi/linux-rocm-smi.c
--- 7.2.0~b2-1/src/components/rocm_smi/linux-rocm-smi.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/linux-rocm-smi.c	2025-06-25 22:38:10.000000000 +0000
@@ -114,12 +114,16 @@ _rocm_smi_init_private(void)
 
     int count = 0;
     papi_errno = evt_get_count(&count);
+    if (papi_errno != PAPI_OK) {
+        goto fn_fail;
+    }
     _rocm_smi_vector.cmp_info.num_native_events = count;
     _rocm_smi_vector.cmp_info.num_cntrs = count;
     _rocm_smi_vector.cmp_info.num_mpx_cntrs = count;
 
-  fn_exit:
     _rocm_smi_vector.cmp_info.initialized = 1;
+
+  fn_exit:
     _rocm_smi_vector.cmp_info.disabled = papi_errno;
     PAPI_unlock(COMPONENT_LOCK);
     return papi_errno;
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/rocs.c 7.2.0-1/src/components/rocm_smi/rocs.c
--- 7.2.0~b2-1/src/components/rocm_smi/rocs.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/rocs.c	2025-06-25 22:38:10.000000000 +0000
@@ -396,7 +396,7 @@ rocs_evt_enum(unsigned int *event_code,
             if (*event_code + 1 < (unsigned int) ntv_table_p->count) {
                 ++(*event_code);
             } else {
-                papi_errno = PAPI_END;
+                papi_errno = PAPI_ENOEVNT;
             }
             break;
         default:
@@ -997,13 +997,13 @@ init_device_table(void)
     int i, j;
     rsmi_status_t status;
 
-    freq_table = calloc(device_count * ROCS_GPU_CLK_FREQ_VARIANT__NUM, sizeof(rsmi_frequencies_t));
+    freq_table = papi_calloc(device_count * ROCS_GPU_CLK_FREQ_VARIANT__NUM, sizeof(rsmi_frequencies_t));
     if (freq_table == NULL) {
         papi_errno = PAPI_ENOMEM;
         goto fn_fail;
     }
 
-    pcie_table = calloc(device_count, sizeof(rsmi_pcie_bandwidth_t));
+    pcie_table = papi_calloc(device_count, sizeof(rsmi_pcie_bandwidth_t));
     if (pcie_table == NULL) {
         papi_errno = PAPI_ENOMEM;
         goto fn_fail;
@@ -1862,11 +1862,11 @@ get_event_name(const char *name, int32_t
     char event_name_str[PAPI_MAX_STR_LEN] = { 0 };
 
     if (strcmp(name, "rsmi_dev_count") == 0) {
-        return strdup("NUMDevices");
+        return papi_strdup("NUMDevices");
     } else if (strcmp(name, "rsmi_lib_version") == 0) {
-        return strdup("rsmi_version");
+        return papi_strdup("rsmi_version");
     } else if (strcmp(name, "rsmi_dev_driver_version_str_get") == 0) {
-        return strdup("driver_version_str");
+        return papi_strdup("driver_version_str");
     } else if (strcmp(name, "rsmi_dev_id_get") == 0) {
         sprintf(event_name_str, "device_id:device=%i", dev);
     } else if (strcmp(name, "rsmi_dev_subsystem_vendor_id_get") == 0) {
@@ -2362,7 +2362,7 @@ get_event_name(const char *name, int32_t
         return NULL;
     }
 
-    return strdup(event_name_str);
+    return papi_strdup(event_name_str);
 }
 
 char *
@@ -2371,25 +2371,25 @@ get_event_descr(const char *name, int64_
     char event_descr_str[PAPI_MAX_STR_LEN] = { 0 };
 
     if (strcmp(name, "rsmi_dev_count") == 0) {
-        return strdup("Number of Devices which have monitors, accessible by rocm_smi.");
+        return papi_strdup("Number of Devices which have monitors, accessible by rocm_smi.");
     } else if (strcmp(name, "rsmi_lib_version") == 0) {
-        return strdup("Version of RSMI lib; 0x0000MMMMmmmmpppp Major, Minor, Patch.");
+        return papi_strdup("Version of RSMI lib; 0x0000MMMMmmmmpppp Major, Minor, Patch.");
     } else if (strcmp(name, "rsmi_dev_driver_version_str_get") == 0) {
-        return strdup("Returns char* to z-terminated driver version string; do not free().");
+        return papi_strdup("Returns char* to z-terminated driver version string; do not free().");
     } else if (strcmp(name, "rsmi_dev_id_get") == 0) {
-        return strdup("Vendor supplied device id number. May be shared by same model devices; see pci_id for a unique identifier.");
+        return papi_strdup("Vendor supplied device id number. May be shared by same model devices; see pci_id for a unique identifier.");
     } else if (strcmp(name, "rsmi_dev_subsystem_vendor_id_get") == 0) {
-        return strdup("System vendor id number.");
+        return papi_strdup("System vendor id number.");
     } else if (strcmp(name, "rsmi_dev_vendor_id_get") == 0) {
-        return strdup("Vendor id number.");
+        return papi_strdup("Vendor id number.");
     } else if (strcmp(name, "rsmi_dev_unique_id_get") == 0) {
-        return strdup("Unique id for device.");
+        return papi_strdup("Unique id for device.");
     } else if (strcmp(name, "rsmi_dev_subsystem_id_get") == 0) {
-        return strdup("Subsystem id number.");
+        return papi_strdup("Subsystem id number.");
     } else if (strcmp(name, "rsmi_dev_drm_render_minor_get") == 0) {
-        return strdup("DRM Minor Number associated with this device.");
+        return papi_strdup("DRM Minor Number associated with this device.");
     } else if (strcmp(name, "rsmi_dev_overdrive_level_get") == 0) {
-        return strdup("Overdriver Level \% for device, 0 to 20, max overclocked permitted. Read Only.");
+        return papi_strdup("Overdriver Level \% for device, 0 to 20, max overclocked permitted. Read Only.");
     } else if (strcmp(name, "rsmi_dev_perf_level_get") == 0) {
         sprintf(event_descr_str, "PowerPlay Performance Level; Read Only, enum rsmi_dev_perf_level_t [0-%i], see ROCm_SMI_Manual for details.",
                 RSMI_DEV_PERF_LEVEL_LAST);
@@ -2425,139 +2425,139 @@ get_event_descr(const char *name, int64_
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_busy_percent_get") == 0) {
-        return strdup("Percentage of time the device was busy doing any processing.");
+        return papi_strdup("Percentage of time the device was busy doing any processing.");
     } else if (strcmp(name, "rsmi_dev_memory_busy_percent_get") == 0) {
-        return strdup("Percentage_of time any device memory is being used.");
+        return papi_strdup("Percentage_of time any device memory is being used.");
     } else if (strcmp(name, "rsmi_dev_pci_id_get") == 0) {
-        return strdup("BDF (Bus/Device/Function) ID, unique per device.");
+        return papi_strdup("BDF (Bus/Device/Function) ID, unique per device.");
     } else if (strcmp(name, "rsmi_dev_pci_replay_counter_get") == 0) {
-        return strdup("Sum of the number of NAK's received by the GPU and the NAK's generated by the GPU.");
+        return papi_strdup("Sum of the number of NAK's received by the GPU and the NAK's generated by the GPU.");
     } else if (strcmp(name, "rsmi_dev_pci_throughput_get") == 0) {
         switch (variant) {
             case ROCS_PCI_THROUGHPUT_VARIANT__SENT:
-                return strdup("Throughput on PCIe traffic, bytes/second sent.");
+                return papi_strdup("Throughput on PCIe traffic, bytes/second sent.");
             case ROCS_PCI_THROUGHPUT_VARIANT__RECEIVED:
-                return strdup("Throughput on PCIe traffic, bytes/second received.");
+                return papi_strdup("Throughput on PCIe traffic, bytes/second received.");
             case ROCS_PCI_THROUGHPUT_VARIANT__MAX_PACKET_SIZE:
-                return strdup("Maximum PCIe packet size.");
+                return papi_strdup("Maximum PCIe packet size.");
             default:
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_power_profile_presets_get") == 0) {
         switch (variant) {
             case ROCS_POWER_PRESETS_VARIANT__COUNT:
-                return strdup("Number of power profile presets available. See ROCM_SMI Manual for details.");
+                return papi_strdup("Number of power profile presets available. See ROCM_SMI Manual for details.");
             case ROCS_POWER_PRESETS_VARIANT__AVAIL_PROFILES:
-                return strdup("Bit mask for available power profile presets. See ROCM_SMI Manual for details.");
+                return papi_strdup("Bit mask for available power profile presets. See ROCM_SMI Manual for details.");
             case ROCS_POWER_PRESETS_VARIANT__CURRENT:
-                return strdup("Bit mask for current power profile preset. Read/Write. See ROCM_SMI Manual for details.");
+                return papi_strdup("Bit mask for current power profile preset. Read/Write. See ROCM_SMI Manual for details.");
             default:
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_power_profile_set") == 0) {
-        return strdup("Write Only, set the power profile to one of the available masks. See ROCM_SMI Manual for details.");
+        return papi_strdup("Write Only, set the power profile to one of the available masks. See ROCM_SMI Manual for details.");
     } else if (strcmp(name, "rsmi_dev_fan_reset") == 0) {
-        return strdup("Fan Reset. Write Only, data value is ignored.");
+        return papi_strdup("Fan Reset. Write Only, data value is ignored.");
     } else if (strcmp(name, "rsmi_dev_fan_rpms_get") == 0) {
-        return strdup("Current fan speed in RPMs (Rotations Per Minute).");
+        return papi_strdup("Current fan speed in RPMs (Rotations Per Minute).");
     } else if (strcmp(name, "rsmi_dev_fan_speed_max_get") == 0) {
-        return strdup("Maximum possible fan speed in RPMs (Rotations Per Minute).");
+        return papi_strdup("Maximum possible fan speed in RPMs (Rotations Per Minute).");
     } else if (strcmp(name, "rsmi_dev_fan_speed_get") == 0) {
-        return strdup("Current fan speed in RPMs (Rotations Per Minute), Read Only, result [0-255].");
+        return papi_strdup("Current fan speed in RPMs (Rotations Per Minute), Read Only, result [0-255].");
     } else if (strcmp(name, "rsmi_dev_fan_speed_set") == 0) {
-        return strdup("Current fan speed in RPMs (Rotations Per Minute), Read/Write, Write must be <= MAX (see fan_speed_max event), arg in [0-255].");
+        return papi_strdup("Current fan speed in RPMs (Rotations Per Minute), Read/Write, Write must be <= MAX (see fan_speed_max event), arg in [0-255].");
     } else if (strcmp(name, "rsmi_dev_power_ave_get") == 0) {
-        return strdup("Current Average Power consumption in microwatts. Requires root privileges.");
+        return papi_strdup("Current Average Power consumption in microwatts. Requires root privileges.");
     } else if (strcmp(name, "rsmi_dev_power_cap_get") == 0) {
-        return strdup("Power cap in microwatts. Read Only. Between min/max (see power_cap_range_min/max). May require root privileges.");
+        return papi_strdup("Power cap in microwatts. Read Only. Between min/max (see power_cap_range_min/max). May require root privileges.");
     } else if (strcmp(name, "rsmi_dev_power_cap_set") == 0) {
-        return strdup("Power cap in microwatts. Read/Write. Between min/max (see power_cap_range_min/max). May require root privileges.");
+        return papi_strdup("Power cap in microwatts. Read/Write. Between min/max (see power_cap_range_min/max). May require root privileges.");
     } else if (strcmp(name, "rsmi_dev_power_cap_range_get") == 0) {
         switch (variant) {
             case ROCS_POWER_CAP_RANGE_VARIANT__MIN:
-                return strdup("Power cap Minimum settable value, in microwatts.");
+                return papi_strdup("Power cap Minimum settable value, in microwatts.");
             case ROCS_POWER_CAP_RANGE_VARIANT__MAX:
-                return strdup("Power cap Maximim settable value, in microwatts.");
+                return papi_strdup("Power cap Maximim settable value, in microwatts.");
             default:
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_temp_metric_get") == 0) {
         switch (variant) {
             case RSMI_TEMP_CURRENT:
-                return strdup("Temperature current value, millidegrees Celsius.");
+                return papi_strdup("Temperature current value, millidegrees Celsius.");
             case RSMI_TEMP_MAX:
-                return strdup("Temperature maximum value, millidegrees Celsius.");
+                return papi_strdup("Temperature maximum value, millidegrees Celsius.");
             case RSMI_TEMP_MIN:
-                return strdup("Temperature minimum value, millidegrees Celsius.");
+                return papi_strdup("Temperature minimum value, millidegrees Celsius.");
             case RSMI_TEMP_MAX_HYST:
-                return strdup("Temperature hysteresis value for max limit, millidegrees Celsius.");
+                return papi_strdup("Temperature hysteresis value for max limit, millidegrees Celsius.");
             case RSMI_TEMP_MIN_HYST:
-                return strdup("Temperature hysteresis value for min limit, millidegrees Celsius.");
+                return papi_strdup("Temperature hysteresis value for min limit, millidegrees Celsius.");
             case RSMI_TEMP_CRITICAL:
-                return strdup("Temperature critical max value, typical > temp_max, millidegrees Celsius.");
+                return papi_strdup("Temperature critical max value, typical > temp_max, millidegrees Celsius.");
             case RSMI_TEMP_CRITICAL_HYST:
-                return strdup("Temperature hysteresis value for critical limit, millidegrees Celsius.");
+                return papi_strdup("Temperature hysteresis value for critical limit, millidegrees Celsius.");
             case RSMI_TEMP_EMERGENCY:
-                return strdup("Temperature emergency max for chips supporting more than two upper temp limits, millidegrees Celsius.");
+                return papi_strdup("Temperature emergency max for chips supporting more than two upper temp limits, millidegrees Celsius.");
             case RSMI_TEMP_EMERGENCY_HYST:
-                return strdup("Temperature hysteresis value for emergency limit, millidegrees Celsius.");
+                return papi_strdup("Temperature hysteresis value for emergency limit, millidegrees Celsius.");
             case RSMI_TEMP_CRIT_MIN:
-                return strdup("Temperature critical min value, typical < temp_min, millidegrees Celsius.");
+                return papi_strdup("Temperature critical min value, typical < temp_min, millidegrees Celsius.");
             case RSMI_TEMP_CRIT_MIN_HYST:
-                return strdup("Temperature hysteresis value for critical min limit, millidegrees Celsius.");
+                return papi_strdup("Temperature hysteresis value for critical min limit, millidegrees Celsius.");
             case RSMI_TEMP_OFFSET:
-                return strdup("Temperature offset added to temp reading by the chip, millidegrees Celsius.");
+                return papi_strdup("Temperature offset added to temp reading by the chip, millidegrees Celsius.");
             case RSMI_TEMP_LOWEST:
-                return strdup("Temperature historical minimum, millidegrees Celsius.");
+                return papi_strdup("Temperature historical minimum, millidegrees Celsius.");
             case RSMI_TEMP_HIGHEST:
-                return strdup("Temperature historical maximum, millidegrees Celsius.");
+                return papi_strdup("Temperature historical maximum, millidegrees Celsius.");
             default:
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_firmware_version_get") == 0) {
         switch (variant) {
             case RSMI_FW_BLOCK_ASD:
-                return strdup("Firmware Version Block ASD.");
+                return papi_strdup("Firmware Version Block ASD.");
             case RSMI_FW_BLOCK_CE:
-                return strdup("Firmware Version Block CE.");
+                return papi_strdup("Firmware Version Block CE.");
             case RSMI_FW_BLOCK_DMCU:
-                return strdup("Firmware Version Block DMCU.");
+                return papi_strdup("Firmware Version Block DMCU.");
             case RSMI_FW_BLOCK_MC:
-                return strdup("Firmware Version Block MC.");
+                return papi_strdup("Firmware Version Block MC.");
             case RSMI_FW_BLOCK_ME:
-                return strdup("Firmware Version Block ME.");
+                return papi_strdup("Firmware Version Block ME.");
             case RSMI_FW_BLOCK_MEC:
-                return strdup("Firmware Version Block MEC.");
+                return papi_strdup("Firmware Version Block MEC.");
             case RSMI_FW_BLOCK_MEC2:
-                return strdup("Firmware Version Block MEC2.");
+                return papi_strdup("Firmware Version Block MEC2.");
             case RSMI_FW_BLOCK_PFP:
-                return strdup("Firmware Version Block PFP.");
+                return papi_strdup("Firmware Version Block PFP.");
             case RSMI_FW_BLOCK_RLC:
-                return strdup("Firmware Version Block RLC.");
+                return papi_strdup("Firmware Version Block RLC.");
             case RSMI_FW_BLOCK_RLC_SRLC:
-                return strdup("Firmware Version Block SRLC.");
+                return papi_strdup("Firmware Version Block SRLC.");
             case RSMI_FW_BLOCK_RLC_SRLG:
-                return strdup("Firmware Version Block SRLG.");
+                return papi_strdup("Firmware Version Block SRLG.");
             case RSMI_FW_BLOCK_RLC_SRLS:
-                return strdup("Firmware Version Block SRLS.");
+                return papi_strdup("Firmware Version Block SRLS.");
             case RSMI_FW_BLOCK_SDMA:
-                return strdup("Firmware Version Block SDMA.");
+                return papi_strdup("Firmware Version Block SDMA.");
             case RSMI_FW_BLOCK_SDMA2:
-                return strdup("Firmware Version Block SDMA2.");
+                return papi_strdup("Firmware Version Block SDMA2.");
             case RSMI_FW_BLOCK_SMC:
-                return strdup("Firmware Version Block SMC.");
+                return papi_strdup("Firmware Version Block SMC.");
             case RSMI_FW_BLOCK_SOS:
-                return strdup("Firmware Version Block SOS.");
+                return papi_strdup("Firmware Version Block SOS.");
             case RSMI_FW_BLOCK_TA_RAS:
-                return strdup("Firmware Version Block RAS.");
+                return papi_strdup("Firmware Version Block RAS.");
             case RSMI_FW_BLOCK_TA_XGMI:
-                return strdup("Firmware Version Block XGMI.");
+                return papi_strdup("Firmware Version Block XGMI.");
             case RSMI_FW_BLOCK_UVD:
-                return strdup("Firmware Version Block UVD.");
+                return papi_strdup("Firmware Version Block UVD.");
             case RSMI_FW_BLOCK_VCE:
-                return strdup("Firmware Version Block VCE.");
+                return papi_strdup("Firmware Version Block VCE.");
             case RSMI_FW_BLOCK_VCN:
-                return strdup("Firmware Version Block VCN.");
+                return papi_strdup("Firmware Version Block VCN.");
             default:
                 return NULL;
         }
@@ -2621,37 +2621,37 @@ get_event_descr(const char *name, int64_
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_ecc_enabled_get") == 0) {
-        return strdup("Bit mask of GPU blocks with ecc error counting enabled.");
+        return papi_strdup("Bit mask of GPU blocks with ecc error counting enabled.");
     } else if (strcmp(name, "rsmi_dev_ecc_status_get") == 0) {
         switch (variant) {
             case RSMI_GPU_BLOCK_UMC:
-                return strdup("ECC Error Status for the GPU Block UMC.");
+                return papi_strdup("ECC Error Status for the GPU Block UMC.");
             case RSMI_GPU_BLOCK_SDMA:
-                return strdup("ECC Error Status for the GPU Block SDMA.");
+                return papi_strdup("ECC Error Status for the GPU Block SDMA.");
             case RSMI_GPU_BLOCK_GFX:
-                return strdup("ECC Error Status for the GPU Block GFX.");
+                return papi_strdup("ECC Error Status for the GPU Block GFX.");
             case RSMI_GPU_BLOCK_MMHUB:
-                return strdup("ECC Error Status for the GPU Block MMHUB.");
+                return papi_strdup("ECC Error Status for the GPU Block MMHUB.");
             case RSMI_GPU_BLOCK_ATHUB:
-                return strdup("ECC Error Status for the GPU Block ATHUB.");
+                return papi_strdup("ECC Error Status for the GPU Block ATHUB.");
             case RSMI_GPU_BLOCK_PCIE_BIF:
-                return strdup("ECC Error Status for the GPU Block BIF.");
+                return papi_strdup("ECC Error Status for the GPU Block BIF.");
             case RSMI_GPU_BLOCK_HDP:
-                return strdup("ECC Error Status for the GPU Block HDP.");
+                return papi_strdup("ECC Error Status for the GPU Block HDP.");
             case RSMI_GPU_BLOCK_XGMI_WAFL:
-                return strdup("ECC Error Status for the GPU Block WAFL.");
+                return papi_strdup("ECC Error Status for the GPU Block WAFL.");
             case RSMI_GPU_BLOCK_DF:
-                return strdup("ECC Error Status for the GPU Block DF.");
+                return papi_strdup("ECC Error Status for the GPU Block DF.");
             case RSMI_GPU_BLOCK_SMN:
-                return strdup("ECC Error Status for the GPU Block SMN.");
+                return papi_strdup("ECC Error Status for the GPU Block SMN.");
             case RSMI_GPU_BLOCK_SEM:
-                return strdup("ECC Error Status for the GPU Block SEM.");
+                return papi_strdup("ECC Error Status for the GPU Block SEM.");
             case RSMI_GPU_BLOCK_MP0:
-                return strdup("ECC Error Status for the GPU Block MP0.");
+                return papi_strdup("ECC Error Status for the GPU Block MP0.");
             case RSMI_GPU_BLOCK_MP1:
-                return strdup("ECC Error Status for the GPU Block MP1.");
+                return papi_strdup("ECC Error Status for the GPU Block MP1.");
             case RSMI_GPU_BLOCK_FUSE:
-                return strdup("ECC Error Status for the GPU Block FUSE.");
+                return papi_strdup("ECC Error Status for the GPU Block FUSE.");
             default:
                 return NULL;
         }
@@ -2680,9 +2680,9 @@ get_event_descr(const char *name, int64_
         int idx;
         switch (subvariant) {
             case ROCS_GPU_CLK_FREQ_SUBVARIANT__COUNT:
-                return strdup("Number of frequencies available.");
+                return papi_strdup("Number of frequencies available.");
             case ROCS_GPU_CLK_FREQ_SUBVARIANT__CURRENT:
-                return strdup("Current operating frequency.");
+                return papi_strdup("Current operating frequency.");
             default:
                 idx = subvariant - ROCS_GPU_CLK_FREQ_SUBVARIANT__NUM;
         }
@@ -2714,9 +2714,9 @@ get_event_descr(const char *name, int64_
     } else if (strcmp(name, "rsmi_dev_pci_bandwidth_get") == 0) {
         switch (variant) {
             case ROCS_PCI_BW_VARIANT__COUNT:
-                return strdup("Number of PCI transfers rates available.");
+                return papi_strdup("Number of PCI transfers rates available.");
             case ROCS_PCI_BW_VARIANT__CURRENT:
-                return strdup("Current PCI transfer rate.");
+                return papi_strdup("Current PCI transfer rate.");
             case ROCS_PCI_BW_VARIANT__RATE_IDX:
                 sprintf(event_descr_str, "Returns PCI bandwidth rate value from supported_table[%i].", (int) subvariant);
                 break;
@@ -2727,19 +2727,19 @@ get_event_descr(const char *name, int64_
                 return NULL;
         }
     } else if (strcmp(name, "rsmi_dev_pci_bandwidth_set") == 0) {
-        return strdup("Write Only. Sets bit mask, 1's for PCI transfer rates in supported_table permitted. All 0 mask prohibited");
+        return papi_strdup("Write Only. Sets bit mask, 1's for PCI transfer rates in supported_table permitted. All 0 mask prohibited");
     } else if (strcmp(name, "rsmi_dev_brand_get") == 0) {
-        return strdup("Returns char* to z-terminated brand string; do not free().");
+        return papi_strdup("Returns char* to z-terminated brand string; do not free().");
     } else if (strcmp(name, "rsmi_dev_name_get") == 0) {
-        return strdup("Returns char* to z-terminated name string; do not free().");
+        return papi_strdup("Returns char* to z-terminated name string; do not free().");
     } else if (strcmp(name, "rsmi_dev_serial_number_get") == 0) {
-        return strdup("Returns char* to z-terminated serial number string; do not free().");
+        return papi_strdup("Returns char* to z-terminated serial number string; do not free().");
     } else if (strcmp(name, "rsmi_dev_subsystem_name_get") == 0) {
-        return strdup("Returns char* to z-terminated subsystem name string; do not free().");
+        return papi_strdup("Returns char* to z-terminated subsystem name string; do not free().");
     } else if (strcmp(name, "rsmi_dev_vbios_version_get") == 0) {
-        return strdup("Returns char* to z-terminated vbios version string; do not free().");
+        return papi_strdup("Returns char* to z-terminated vbios version string; do not free().");
     } else if (strcmp(name, "rsmi_dev_vendor_name_get") == 0) {
-        return strdup("Returns char* to z-terminated vendor name string; do not free().");
+        return papi_strdup("Returns char* to z-terminated vendor name string; do not free().");
     } else if (strcmp(name, "rsmi_dev_xgmi_evt_get") == 0) {
         const char *variant_str = NULL;
         switch (variant) {
@@ -2804,7 +2804,7 @@ get_event_descr(const char *name, int64_
         return NULL;
     }
 
-    return strdup(event_descr_str);
+    return papi_strdup(event_descr_str);
 }
 
 rocs_access_mode_e
@@ -3286,6 +3286,9 @@ access_rsmi_dev_perf_level(rocs_access_m
         data = (rsmi_dev_perf_level_t) event->value;
         status = rsmi_dev_perf_level_set_p(event->device, data);
         if (status != RSMI_STATUS_SUCCESS) {
+            if (status == RSMI_STATUS_PERMISSION ) {
+                return PAPI_EPERM;
+            }
             return PAPI_EMISC;
         }
     }
@@ -3516,7 +3519,6 @@ int
 access_rsmi_dev_fan_rpms(rocs_access_mode_e mode, void *arg)
 {
     ntv_event_t *event = (ntv_event_t *) arg;
-
     if (mode != ROCS_ACCESS_MODE__READ || mode != event->mode) {
         return PAPI_ENOSUPP;
     }
@@ -3552,18 +3554,29 @@ int
 access_rsmi_dev_fan_speed(rocs_access_mode_e mode, void *arg)
 {
     ntv_event_t *event = (ntv_event_t *) arg;
-
-    if (mode != ROCS_ACCESS_MODE__READ || mode != event->mode) {
+    rsmi_status_t status;
+    
+    if (!(mode & event->mode)) {
         /* Return error code as counter value to distinguish
          * this case from a successful read */
         event->value = PAPI_ENOSUPP;
         return PAPI_OK;
     }
-
-    rsmi_status_t status;
-    status = rsmi_dev_fan_speed_get_p(event->device, event->subvariant, &event->value);
-    if (status != RSMI_STATUS_SUCCESS) {
-        return PAPI_EMISC;
+    
+    if (mode == ROCS_ACCESS_MODE__READ) {
+        status = rsmi_dev_fan_speed_get_p(event->device, event->subvariant, &event->value);
+        if (status != RSMI_STATUS_SUCCESS) {
+            return PAPI_EMISC;
+        }
+    } else {
+        uint64_t data = (uint64_t) event->value;
+        status = rsmi_dev_fan_speed_set_p(event->device, event->subvariant, data);
+        if (status != RSMI_STATUS_SUCCESS) {
+            if (status == RSMI_STATUS_PERMISSION ) {
+                return PAPI_EPERM;
+            }
+                return PAPI_EMISC;
+        }
     }
     return PAPI_OK;
 }
@@ -3611,6 +3624,9 @@ access_rsmi_dev_power_cap(rocs_access_mo
         data = (uint64_t) event->value;
         status = rsmi_dev_power_cap_set_p(event->device, event->subvariant, data);
         if (status != RSMI_STATUS_SUCCESS) {
+            if (status == RSMI_STATUS_PERMISSION ) {
+                return PAPI_EPERM;
+            }
             return PAPI_EMISC;
         }
      }
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/tests/Makefile 7.2.0-1/src/components/rocm_smi/tests/Makefile
--- 7.2.0~b2-1/src/components/rocm_smi/tests/Makefile	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/tests/Makefile	2025-06-25 22:38:10.000000000 +0000
@@ -4,16 +4,16 @@
 NAME=rocm_smi
 include ../../Makefile_comp_tests.target
 PAPI_ROCM_ROOT ?= /opt/rocm
-HIP_PATH= ${PAPI_ROCM_ROOT}/hip
-HIPCC=$(HIP_PATH)/bin/hipcc
+HIPCC ?= $(PAPI_ROCM_ROOT)/bin/hipcc
 
+INCLUDE += -I$(PAPI_ROCMSMI_ROOT)/include
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include/rocm_smi
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include/hip
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include/hsa
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include/rocprofiler
 INCLUDE += -I$(PAPI_ROCM_ROOT)/include/rocblas
-LDFLAGS = -ldl -g -L$(PAPI_ROCM_ROOT)/lib/rocblas -lrocblas
+LDFLAGS = -ldl -g -pthread
 
 %.o:%.c
 	@echo "INCLUDE=" $(INCLUDE)
@@ -24,9 +24,11 @@ LDFLAGS = -ldl -g -L$(PAPI_ROCM_ROOT)/li
 	@echo "CFLAGS=" $(CFLAGS)
 	g++ $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $<
 
-TESTS = 
+TESTS = rocm_command_line rocm_smi_all power_monitor_rocm rocm_smi_writeTests
+TESTS_LONG = rocmsmi_example
 
 rocm_smi_tests: $(TESTS)
+rocm_smi_tests_long: $(TESTS_LONG)
 
 # Note: We compile .o separately from the executable link; some versions of hipcc
 #       have trouble managing libraries if we try to do both in a single step.
@@ -53,7 +55,7 @@ rocmsmi_example.o: rocmsmi_example.cpp $
 	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
 
 rocmsmi_example: rocmsmi_example.o $(UTILOBJS) $(PAPILIB)
-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -lpthread
+	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS) -L$(PAPI_ROCM_ROOT)/lib/rocblas -lrocblas
 
 rocm_smi_writeTests.o: rocm_smi_writeTests.cpp $(UTILOBJS) $(PAPILIB)
 	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
@@ -61,15 +63,8 @@ rocm_smi_writeTests.o: rocm_smi_writeTes
 rocm_smi_writeTests: rocm_smi_writeTests.o $(UTILOBJS) $(PAPILIB)
 	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS)
 
-square.o: square.cpp $(UTILOBJS) $(PAPILIB)
-	$(HIPCC) $(CFLAGS) $(INCLUDE) -c $< -o $@
-
-square: square.o $(UTILOBJS) $(PAPILIB)
-	$(HIPCC) $(CFLAGS) $(INCLUDE) -o $@ $< $(UTILOBJS) $(PAPILIB) $(LDFLAGS)
-
 clean:
-	rm -f $(TESTS) *.o
-	rm -f rocm_command_line rocmsmi_example power_monitor_rocm rocm_smi_writeTests square
+	rm -f $(TESTS) $(TESTS_LONG) *.o
 
 checkpath: 
 	echo PAPI_ROCM_ROOT = $(PAPI_ROCM_ROOT)
diff -pruN 7.2.0~b2-1/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp 7.2.0-1/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp
--- 7.2.0~b2-1/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocm_smi/tests/rocm_smi_writeTests.cpp	2025-06-25 22:38:10.000000000 +0000
@@ -1,339 +1,546 @@
 //-----------------------------------------------------------------------------
 // This program must be compiled using a special makefile:
-// make -f ROCM_SMI_Makefile rocm_smi_writeTests.out 
+// make -f ROCM_SMI_Makefile rocm_smi_writeTests.out
 //-----------------------------------------------------------------------------
 #define __HIP_PLATFORM_HCC__
 
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <stdbool.h>
 #include "papi.h"
 #include <hip/hip_runtime.h>
-#include <unistd.h>
-#include "rocm_smi.h"   // Need some enumerations.
-
+#include "rocm_smi.h"
 #include "force_init.h"
 
+// Helper Function
+void write_papi_event(int cid, const char* event_name, long long value_to_write);
+void read_and_print_current_values(int cid,
+                                   const char* perf_name, 
+                                   const char* pcap_name, 
+                                   const char* fan_name,
+                                   const char* pcap_max_name, 
+                                   const char* fan_max_name,
+                                   const char* stage_label);
+
 #define CHECK(cmd) \
-{\
-    hipError_t error  = cmd;\
+{ \
+    hipError_t error = cmd; \
     if (error != hipSuccess) { \
         fprintf(stderr, "error: '%s'(%d) at %s:%d\n", hipGetErrorString(error), error,__FILE__, __LINE__); \
-        exit(EXIT_FAILURE);\
-	  }\
+        exit(EXIT_FAILURE); \
+    } \
 }
 
-// THIS MACRO EXITS if the papi call does not return PAPI_OK. Do not use for routines that
-// return anything else; e.g. PAPI_num_components, PAPI_get_component_info, PAPI_library_init.
-#define CALL_PAPI_OK(papi_routine)                                                        \
-    do {                                                                                  \
-        int _papiret = papi_routine;                                                      \
-        if (_papiret != PAPI_OK) {                                                        \
+// THIS MACRO EXITS if the papi call does not return PAPI_OK.
+#define CALL_PAPI_OK(papi_routine) \
+    do { \
+        int _papiret = papi_routine; \
+        if (_papiret != PAPI_OK) { \
             fprintf(stderr, "%s:%d macro: PAPI Error: function " #papi_routine " failed with ret=%d [%s].\n", \
-                    __FILE__, __LINE__, _papiret, PAPI_strerror(_papiret));               \
-            exit(-1);                                                                     \
-        }                                                                                 \
+                    __FILE__, __LINE__, _papiret, PAPI_strerror(_papiret)); \
+            exit(-1); \
+        } \
     } while (0);
 
 
-#define MEMORY_ALLOCATION_CALL(var)                                     \
-    do {                                                                \
-        if (var == NULL) {                                              \
-            fprintf(stderr, "%s:%d: Error: Memory Allocation Failed \n",\
-                    __FILE__, __LINE__);                                \
-            exit(-1);                                                   \
-        }                                                               \
-    } while (0);  
-
-
-#define MAX_DEVICES    (32)
-#define BLOCK_SIZE     (1024)
-#define GRID_SIZE      (512)
-#define BUF_SIZE       (32 * 1024)
-#define ALIGN_SIZE     (8)
-#define SUCCESS        (0)
-#define NUM_METRIC     (18)
-#define NUM_EVENTS     (2)
-#define MAX_SIZE       (64*1024*1024)   // 64 MB
-
-typedef union
-{
-    long long ll;
-    unsigned long long ull;
-    double    d;
-    void *vp;
-    unsigned char ch[8];
-} convert_64_t;
-
-typedef struct {
-    char name[128];
-    long long value;
-} eventStore_t;
-
-int eventsFoundCount = 0;               // occupants of the array.
-int eventsFoundMax;                     // Size of the array.
-int eventsFoundAdd = 32;                // Blocksize for increasing the array.
-int deviceCount=0;                      // Total devices seen.
-int deviceEvents[32] = {0};             // Number of events for each device=??.
-eventStore_t *eventsFound = NULL;       // The array.
-
-//-----------------------------------------------------------------------------
-// HIP routine: Square each element in the array A and write to array C.
-//-----------------------------------------------------------------------------
-template <typename T>
-__global__ void
-vector_square(T *C_d, T *A_d, size_t N)
-{
-    size_t offset = (blockIdx.x * blockDim.x + threadIdx.x);
-    size_t stride = blockDim.x * gridDim.x ;
-
-    for (size_t i=offset; i<N; i+=stride) {
-        C_d[i] = A_d[i] * A_d[i];
-    }
-}
-
 // Show help.
 //-----------------------------------------------------------------------------
 static void printUsage()
 {
     printf("Demonstrate use of ROCM API write routines.\n");
-    printf("This program has no options, it will use PAPI to read/write/read\n");
-    printf("rocm_smi writable settings and report the results.              \n");
-} // end routine.
-
+    printf("This program will use PAPI to read ROCm SMI values, attempt to write\n");
+    printf("modified values for perf_level, power_cap, and fan_speed (for device 0),\n");
+    printf("read them back, revert them to original values, and read again.\n");
+    printf("Requires necessary permissions to write ROCm SMI values.\n");
+    printf("Compile with: make -f ROCM_SMI_Makefile rocm_smi_writeTests.out\n");
+}
 
 //-----------------------------------------------------------------------------
 // Interpret command line flags.
 //-----------------------------------------------------------------------------
 void parseCommandLineArgs(int argc, char *argv[])
 {
-    if(argc < 2) return;
-
-    if((strcmp(argv[1], "--help") == 0) || 
-       (strcmp(argv[1], "-help") == 0)  || 
-       (strcmp(argv[1], "-h") == 0)) {
-        printUsage();
-        exit(0);
-    }
-} // end routine.
-
-//-----------------------------------------------------------------------------
-// conduct a test using HIP. Derived from AMD sample code 'square.cpp'.
-// coming in, EventSet is already populated, we just run the test and read.
-// Note values must point at an array large enough to store the events in
-// Eventset.
-//-----------------------------------------------------------------------------
-void conductTest(int EventSet, int device, long long *values) {
-    float *A_d, *C_d;
-    float *A_h, *C_h;
-    size_t N = 1000000;
-    size_t Nbytes = N * sizeof(float);
-    int ret, thisDev, verbose=0;
-
-	ret = PAPI_start( EventSet );
-	if (ret != PAPI_OK ) {
-	    fprintf(stderr,"Error! PAPI_start\n");
-	    exit( ret );
-	}
-
-    hipDeviceProp_t props;                        
-    if (verbose) fprintf(stderr, "args: EventSet=%i, device=%i, values=%p.\n", EventSet, device, values);
- 
-    CHECK(hipSetDevice(device));                      // Set device requested.
-    CHECK(hipGetDevice(&thisDev));                    // Double check.
-    CHECK(hipGetDeviceProperties(&props, thisDev));   // Get properties (for name).
-    if (verbose) fprintf (stderr, "info: Requested Device=%i, running on device %i=%s\n", device, thisDev, props.name);
-
-    if (verbose) fprintf (stderr, "info: allocate host mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-    A_h = (float*)malloc(Nbytes);                     // standard malloc for host.
-    CHECK(A_h == NULL ? hipErrorMemoryAllocation : hipSuccess );
-    C_h = (float*)malloc(Nbytes);                     // standard malloc for host.
-    CHECK(C_h == NULL ? hipErrorMemoryAllocation : hipSuccess );
-
-    // Fill with Phi + i
-    for (size_t i=0; i<N; i++) 
-    {
-        A_h[i] = 1.618f + i; 
-    }
-
-    if (verbose) fprintf (stderr, "info: allocate device mem (%6.2f MB)\n", 2*Nbytes/1024.0/1024.0);
-    CHECK(hipMalloc(&A_d, Nbytes));                   // HIP malloc for device.
-    CHECK(hipMalloc(&C_d, Nbytes));                   // ...
-
-
-    if (verbose) fprintf (stderr, "info: copy Host2Device\n");
-    CHECK ( hipMemcpy(A_d, A_h, Nbytes, hipMemcpyHostToDevice));  // Copy (*dest, *source, Type).
-
-    const unsigned blocks = 512;
-    const unsigned threadsPerBlock = 256;
-    (void) blocks;
-    (void) threadsPerBlock; 
-
-    if (verbose) fprintf (stderr, "info: launch 'vector_square' kernel\n");
-//  hipLaunchKernelGGL((vector_square), dim3(blocks), dim3(threadsPerBlock), 0, 0, C_d, A_d, N);
-
-    if (verbose) fprintf (stderr, "info: copy Device2Host\n");
-    CHECK ( hipMemcpy(C_h, C_d, Nbytes, hipMemcpyDeviceToHost));  // copy (*dest, *source, Type).
-
-//  if (verbose) fprintf (stderr, "info: check result\n");
-//  for (size_t i=0; i<N; i++)  {
-//      if (C_h[i] != A_h[i] * A_h[i]) {              // If value received is not square of value sent,
-//          CHECK(hipErrorUnknown);                   // ... We have a problem!
-//      }
-//  }
-
-    // We passed. Now we need to read the event.
-    if (verbose) fprintf(stderr, "Passed. info: About to read event with PAPI_stop.\n");
-    ret = PAPI_stop( EventSet, values );
-    if (ret != PAPI_OK ) {
-        fprintf(stderr,"Error! PAPI_stop failed.\n");
-        if (verbose) fprintf(stderr, "PAPI_stop failed.\n");
-        exit(ret);
+    int i;
+    for (i = 1; i < argc; ++i) {
+        if ((strcmp(argv[i], "--help") == 0) ||
+            (strcmp(argv[i], "-help") == 0)  ||
+            (strcmp(argv[i], "-h") == 0)) {
+            printUsage();
+            exit(0);
+        }
     }
-    
-    if (verbose) fprintf (stderr, "PAPI_stop succeeded.\n");
-
-} // end conductTest.
+}
 
 //-----------------------------------------------------------------------------
 // Main program.
 //-----------------------------------------------------------------------------
 int main(int argc, char *argv[])
 {
-    int devices, device, i = 0;
-    char str[64];
-    (void) device;
-    (void) str;
+    int devices;
+    int i = 0;
+    int r;
 
-    // Parse command line arguments
     parseCommandLineArgs(argc, argv);
 
-    // fprintf(stderr, "Setup PAPI counters internally (PAPI)\n");
-    int EventSet = PAPI_NULL;
-    int eventCount;
     int ret;
-    int k, m, cid=-1;
-    (void) m;
+    int k, cid = -1;
 
-    /* PAPI Initialization */
+    // PAPI Initialization
     ret = PAPI_library_init(PAPI_VER_CURRENT);
-    if(ret != PAPI_VER_CURRENT) {
-        fprintf(stderr, "PAPI_library_init failed, ret=%i [%s]\n", 
-            ret, PAPI_strerror(ret));
+    if (ret != PAPI_VER_CURRENT) {
+        fprintf(stderr, "PAPI_library_init failed, ret=%i [%s]\n",
+                ret, PAPI_strerror(ret));
         exit(-1);
     }
-
     printf("PAPI version: %d.%d.%d\n", 
-        PAPI_VERSION_MAJOR(PAPI_VERSION), 
-        PAPI_VERSION_MINOR(PAPI_VERSION), 
-        PAPI_VERSION_REVISION(PAPI_VERSION));
+           PAPI_VERSION_MAJOR(PAPI_VERSION), 
+           PAPI_VERSION_MINOR(PAPI_VERSION), 
+           PAPI_VERSION_REVISION(PAPI_VERSION));
     fflush(stdout);
 
-    // Find rocm_smi component index.
-    k = PAPI_num_components();                                          // get number of components.
-    for (i=0; i<k && cid<0; i++) {                                      // while not found,
-        PAPI_component_info_t *aComponent = 
-            (PAPI_component_info_t*) PAPI_get_component_info(i);        // get the component info.     
-        if (aComponent == NULL) {                                       // if we failed,
-            fprintf(stderr,  "PAPI_get_component_info(%i) failed, "
-                "returned NULL. %i components reported.\n", i,k);
-            exit(-1);    
-        }
-
-       if (strcmp("rocm_smi", aComponent->name) == 0) cid=i;            // If we found our match, record it.
-    } // end search components.
-
-    if (cid < 0) {                                                      // if no PCP component found,
-        fprintf(stderr, "Failed to find rocm_smi component among %i "
-            "reported components.\n", k);
+    // Find rocm_smi component
+    k = PAPI_num_components();
+    for (i = 0; i < k && cid < 0; i++) {
+        const PAPI_component_info_t *aComponent = PAPI_get_component_info(i);
+        if (aComponent && strcmp("rocm_smi", aComponent->name) == 0) cid = i;
+    }
+    if (cid < 0) {
+        fprintf(stderr, "Failed to find rocm_smi component.\n");
         PAPI_shutdown();
-        exit(-1); 
+        exit(-1);
     }
-
     printf("Found ROCM_SMI Component at id %d\n", cid);
 
-    // Add events at a GPU specific level ... eg rocm:::device=0:Whatever
-    eventCount = 0;
-    int eventsRead=0;
-    (void) eventsRead;
-
-   // Begin enumeration of all events.
-
-    long long value=0;                                              // The only value we read.
-    std::string eventName;
-    eventName = "rocm_smi:::NUMDevices";
-
+    // Force Init
     force_rocm_smi_init(cid);
 
-    CALL_PAPI_OK(PAPI_create_eventset(&EventSet)); 
-    CALL_PAPI_OK(PAPI_assign_eventset_component(EventSet, cid)); 
-    ret = PAPI_add_named_event(EventSet, eventName.c_str());  
-    if (ret == PAPI_OK) {
-        CALL_PAPI_OK(PAPI_start(EventSet));
-        CALL_PAPI_OK(PAPI_stop(EventSet, &value));
-        devices = value;
-        printf("Found %i devices.\n", devices);
-    } else {
-        fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret));
-        CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));          // Delete all events in set.
-        CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet));         // destroy the event set.
-        exit(-1);
+    // Get Device Count
+    {
+        int tempEventSet = PAPI_NULL;
+        long long numDevValue = 0;
+        CALL_PAPI_OK(PAPI_create_eventset(&tempEventSet));
+        CALL_PAPI_OK(PAPI_assign_eventset_component(tempEventSet, cid));
+        ret = PAPI_add_named_event(tempEventSet, "rocm_smi:::NUMDevices");
+        if (ret == PAPI_OK) {
+            CALL_PAPI_OK(PAPI_start(tempEventSet));
+            CALL_PAPI_OK(PAPI_stop(tempEventSet, &numDevValue));
+            devices = (int)numDevValue;
+            printf("Found %d devices.\n", devices);
+        } else {
+            fprintf(stderr, "FAILED to add NUMDevices event.\n");
+            CALL_PAPI_OK(PAPI_cleanup_eventset(tempEventSet));
+            CALL_PAPI_OK(PAPI_destroy_eventset(&tempEventSet));
+            exit(-1);
+        }
+        CALL_PAPI_OK(PAPI_cleanup_eventset(tempEventSet));
+        CALL_PAPI_OK(PAPI_destroy_eventset(&tempEventSet));
+    }
+    // Handle no devices
+    if (devices < 1) {
+        fprintf(stderr, "No ROCm devices found.\n"); 
+        PAPI_shutdown(); 
+        exit(0);
     }
 
-    // Do something.
-    CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));              // Delete all events in set.
 
-    eventName = "rocm_smi:::device=0:sensor=0:fan_speed";
-    ret = PAPI_add_named_event(EventSet, eventName.c_str());
-    if (ret != PAPI_OK) {
-        fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret));
-        CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));          // Delete all events in set.
-        exit(-1);
+    long long initial_perf_level = -1;
+    long long initial_power_cap = -1;
+    long long initial_fan_speed = -1;
+    long long power_cap_range_max_val = -1;
+    long long fan_speed_max_val = -1;
+    char perf_level_event_name[PAPI_MAX_STR_LEN] = "";
+    char power_cap_event_name[PAPI_MAX_STR_LEN] = "";
+    char fan_speed_event_name[PAPI_MAX_STR_LEN] = "";
+    char power_cap_range_max_event_name[PAPI_MAX_STR_LEN] = "";
+    char fan_speed_max_event_name[PAPI_MAX_STR_LEN] = "";
+    bool can_write_perf = false;
+    bool can_write_pcap = false;
+    bool can_write_fan = false;
+    long long new_perf_level = -1;
+    long long new_power_cap = -1;
+    long long new_fan_speed = -1;
+
+    // ---- Initial Read ----
+    printf("\n--- Initial Read: Finding events and getting base values ---\n");
+    const char* target_substrings[] = {
+        "perf_level", "power_cap:", "power_cap_range_max", "fan_speed:", "fan_speed_max"
+    };
+    const int num_target_substrings = sizeof(target_substrings) / sizeof(target_substrings[0]);
+    const int MAX_ROCM_EVENTS = 512;
+    char event_names[MAX_ROCM_EVENTS][PAPI_MAX_STR_LEN];
+    long long *rocm_values = NULL;
+    int num_rocm_events = 0;
+    int event_code = PAPI_NATIVE_MASK;
+    char current_event_name[PAPI_MAX_STR_LEN];
+    int readEventSet = PAPI_NULL;
+
+    CALL_PAPI_OK(PAPI_create_eventset(&readEventSet));
+    CALL_PAPI_OK(PAPI_assign_eventset_component(readEventSet, cid));
+
+    printf("Enumerating events to find targets (device=0, sensor=0 where applicable) for initial read...\n");
+    r = PAPI_enum_cmp_event(&event_code, PAPI_ENUM_FIRST, cid);
+    while (r == PAPI_OK) {
+        ret = PAPI_event_code_to_name(event_code, current_event_name);
+        if (ret != PAPI_OK) { 
+            fprintf(stderr, "Warning: PAPI_event_code_to_name failed for code %#x: %s\n", 
+                   event_code, PAPI_strerror(ret)); 
+            r = PAPI_enum_cmp_event(&event_code, PAPI_ENUM_EVENTS, cid); 
+            continue; 
+        }
+
+        bool is_target = false;
+        const char* matched_substring = NULL;
+        for (i = 0; i < num_target_substrings; ++i) {
+            if (strstr(current_event_name, target_substrings[i]) != NULL) {
+                bool device_match = (strstr(current_event_name, ":device=0") != NULL);
+                if (strcmp(target_substrings[i],"perf_level") == 0) {
+                    if (device_match) { 
+                        is_target = true; 
+                        matched_substring = target_substrings[i]; 
+                        break; 
+                    }
+                } else {
+                    bool sensor_match = (strstr(current_event_name, ":sensor=0") != NULL);
+                    if (device_match && sensor_match) { 
+                        is_target = true; 
+                        matched_substring = target_substrings[i]; 
+                        break; 
+                    }
+                    else if (device_match && strstr(current_event_name, ":sensor=") == NULL){
+                         if (strcmp(target_substrings[i],"power_cap:")==0 || 
+                             strcmp(target_substrings[i],"fan_speed:")==0) {
+                              printf("  Warning: Matched '%s' for device 0 but no sensor specified: %s\n", 
+                                     target_substrings[i], current_event_name);
+                              is_target = true; 
+                              matched_substring = target_substrings[i]; 
+                              break;
+                         }
+                    }
+                }
+            }
+        }
+
+        if (is_target) {
+            if (num_rocm_events < MAX_ROCM_EVENTS) {
+                ret = PAPI_add_event(readEventSet, event_code);
+                if (ret == PAPI_OK) {
+                    printf("  Adding event (matched '%s'): %s\n", matched_substring, current_event_name);
+                    strncpy(event_names[num_rocm_events], current_event_name, PAPI_MAX_STR_LEN - 1);
+                    event_names[num_rocm_events][PAPI_MAX_STR_LEN - 1] = '\0';
+                    num_rocm_events++;
+                } else { 
+                    fprintf(stderr, "  Warning: Failed to add event %s: %s\n", 
+                            current_event_name, PAPI_strerror(ret)); 
+                    if(ret==PAPI_ENOMEM) break;
+                }
+            } else { 
+                fprintf(stderr, "Error: Exceeded MAX_ROCM_EVENTS.\n"); 
+                break; 
+            }
+        }
+        r = PAPI_enum_cmp_event(&event_code, PAPI_ENUM_EVENTS, cid);
     }
+    printf("Added %d events for initial read.\n", num_rocm_events);
 
-    eventName = "rocm_smi:::device=0:sensor=0:fan_speed_max";
-    ret = PAPI_add_named_event(EventSet, eventName.c_str());
-    if (ret != PAPI_OK) {
-        fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret));
-        CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));          // Delete all events in set.
-        exit(-1);
+    if (num_rocm_events > 0) {
+        rocm_values = (long long *)calloc(num_rocm_events, sizeof(long long));
+        if (!rocm_values) { /* Handle error */ exit(-1); }
+
+        CALL_PAPI_OK(PAPI_start(readEventSet));
+        CALL_PAPI_OK(PAPI_stop(readEventSet, rocm_values));
+
+        printf("\n--- Extracting Initial Values and Event Names ---\n");
+        for (i = 0; i < num_rocm_events; ++i) {
+            printf("  Read Event %d: %-60s = %lld\n", i, event_names[i], rocm_values[i]);
+            if (strstr(event_names[i], "power_cap_range_max") != NULL && 
+                strstr(event_names[i], ":device=0") != NULL) {
+                power_cap_range_max_val = rocm_values[i];
+                strncpy(power_cap_range_max_event_name, event_names[i], PAPI_MAX_STR_LEN - 1);
+                power_cap_range_max_event_name[PAPI_MAX_STR_LEN - 1] = '\0';
+            } else if (strstr(event_names[i], "fan_speed_max") != NULL && 
+                       strstr(event_names[i], ":device=0") != NULL) {
+                fan_speed_max_val = rocm_values[i];
+                strncpy(fan_speed_max_event_name, event_names[i], PAPI_MAX_STR_LEN - 1);
+                fan_speed_max_event_name[PAPI_MAX_STR_LEN - 1] = '\0';
+            } else if (strstr(event_names[i], "perf_level") != NULL && 
+                       strstr(event_names[i], ":device=0") != NULL) {
+                initial_perf_level = rocm_values[i];
+                strncpy(perf_level_event_name, event_names[i], PAPI_MAX_STR_LEN - 1);
+                perf_level_event_name[PAPI_MAX_STR_LEN - 1] = '\0';
+            } else if (strstr(event_names[i], "power_cap:") != NULL && 
+                       strstr(event_names[i], "power_cap_range_max") == NULL && 
+                       strstr(event_names[i], ":device=0") != NULL) {
+                initial_power_cap = rocm_values[i];
+                strncpy(power_cap_event_name, event_names[i], PAPI_MAX_STR_LEN - 1);
+                power_cap_event_name[PAPI_MAX_STR_LEN - 1] = '\0';
+            } else if (strstr(event_names[i], "fan_speed:") != NULL && 
+                       strstr(event_names[i], "fan_speed_max") == NULL && 
+                       strstr(event_names[i], ":device=0") != NULL) {
+                initial_fan_speed = rocm_values[i];
+                strncpy(fan_speed_event_name, event_names[i], PAPI_MAX_STR_LEN - 1);
+                fan_speed_event_name[PAPI_MAX_STR_LEN - 1] = '\0';
+            }
+        }
+        free(rocm_values);
+        rocm_values = NULL;
+    } else {
+        printf("No target events found for initial read. Skipping write tests.\n");
+        goto cleanup_and_exit;
+    }
+
+    // Cleanup the initial read EventSet - Pass address to destroy
+    CALL_PAPI_OK(PAPI_cleanup_eventset(readEventSet));
+    CALL_PAPI_OK(PAPI_destroy_eventset(&readEventSet)); // Pass address
+    readEventSet = PAPI_NULL;
+
+
+    // ---- Stage 1: Calculate and Write NEW Values ----
+    printf("\n=== Stage 1: Calculating and Writing NEW Values ===\n");
+    can_write_perf = (initial_perf_level != -1 && strcmp(perf_level_event_name, "") != 0);
+    can_write_pcap = (initial_power_cap != -1 && power_cap_range_max_val != -1 && strcmp(power_cap_event_name, "") != 0);
+    can_write_fan = (initial_fan_speed != -1 && strcmp(fan_speed_event_name, "") != 0);
+
+    if (can_write_perf) {
+        new_perf_level = initial_perf_level + 1; // Example: Increment perf level
+        printf("    Calculating new perf_level: %lld + 1 = %lld\n", initial_perf_level, new_perf_level);
+        write_papi_event(cid, perf_level_event_name, new_perf_level);
+    } else { 
+        printf("Skipping perf_level write (initial value/name not found or invalid).\n"); 
+    }
+
+    if (can_write_pcap) {
+        new_power_cap = power_cap_range_max_val - 1000000; // Example: 1W below max
+        if (new_power_cap < 0) { new_power_cap = initial_power_cap; } // Basic sanity check
+        printf("    Calculating new power_cap: %lld uW - 1000000 uW = %lld uW\n", 
+               power_cap_range_max_val, new_power_cap);
+        write_papi_event(cid, power_cap_event_name, new_power_cap);
+    } else { 
+        printf("Skipping power_cap write (initial value/name/max not found or invalid).\n"); 
+    }
+
+    if (can_write_fan) {
+        new_fan_speed = fan_speed_max_val - 1; // Example: Decrease fan speed slightly
+        if (new_fan_speed < 0) { new_fan_speed = 0; } // Basic sanity check (min speed 0?)
+        printf("    Calculating new fan_speed: %lld - 1 = %lld\n", fan_speed_max_val, new_fan_speed);
+        write_papi_event(cid, fan_speed_event_name, new_fan_speed);
+    } else { 
+        printf("Skipping fan_speed write (initial value/name not found or invalid).\n"); 
+    }
+
+    // ---- Stage 2: Read values AFTER writing NEW ones ----
+    printf("\n=== Stage 2: Verifying NEW Values ===\n");
+    read_and_print_current_values(cid, 
+                                  perf_level_event_name, 
+                                  power_cap_event_name, 
+                                  fan_speed_event_name,
+                                  power_cap_range_max_event_name, 
+                                  fan_speed_max_event_name,
+                                  "After Writing New Values");
+
+    // ---- Stage 3: Write INITIAL values back (Revert) ----
+    printf("\n=== Stage 3: Reverting to INITIAL Values ===\n");
+    if (can_write_perf) { 
+        write_papi_event(cid, perf_level_event_name, initial_perf_level); 
+    } else { 
+        printf("Skipping perf_level revert.\n"); 
+    }
+    
+    if (can_write_pcap) { 
+        write_papi_event(cid, power_cap_event_name, initial_power_cap); 
+    } else { 
+        printf("Skipping power_cap revert.\n"); 
     }
+    
+    if (can_write_fan) { 
+        write_papi_event(cid, fan_speed_event_name, initial_fan_speed); 
+    } else { 
+        printf("Skipping fan_speed revert.\n"); 
+    }
+
+    // ---- Stage 4: Read values AFTER reverting ----
+    printf("\n=== Stage 4: Verifying REVERTED Values ===\n");
+    read_and_print_current_values(cid, 
+                                  perf_level_event_name, 
+                                  power_cap_event_name, 
+                                  fan_speed_event_name,
+                                  power_cap_range_max_event_name, 
+                                  fan_speed_max_event_name,
+                                  "After Reverting to Initial Values");
+
+    // ---- Cleanup and Exit ----
+cleanup_and_exit:
+    printf("\n--- Write/Revert Test Sequence Finished ---\n");
+    if (readEventSet != PAPI_NULL) { // Check if cleanup needed after jump
+         printf("Performing cleanup for initial read EventSet after jump...\n");
+         CALL_PAPI_OK(PAPI_cleanup_eventset(readEventSet));
+         CALL_PAPI_OK(PAPI_destroy_eventset(&readEventSet)); // Pass address
+    }
+    printf("Finished All Tests.\n");
+    PAPI_shutdown();
+    return(0);
+} // end MAIN.
+
+// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+// +++ Helper Function Definitions ++++++++++++++++++++++++++++++++++++++++++++
+// ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
+
+//-----------------------------------------------------------------------------
+// Helper to write a single value to a specific PAPI event name. C version.
+//-----------------------------------------------------------------------------
+void write_papi_event(int cid, const char* event_name, long long value_to_write) {
+    printf("    Attempting Write: Set '%s' = %lld\n", event_name, value_to_write);
+    if (event_name == NULL || strcmp(event_name, "") == 0) { /* Handle error */ return; }
+
+    int writeEventSet = PAPI_NULL;
+    int ret;
+    long long read_back_value;
+    long long write_buffer[1];
+    write_buffer[0] = value_to_write;
 
-    long long curmax[2];
-    CALL_PAPI_OK(PAPI_start(EventSet));
-    CALL_PAPI_OK(PAPI_stop(EventSet, curmax));
-    printf("Fan speed: current=%lli maximum=%lli.\n", curmax[0], curmax[1]);
-    CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));              // Delete all events in set.
-
-    curmax[0]=128;
-    eventName = "rocm_smi:::device=0:sensor=0:fan_speed";
-    ret = PAPI_add_named_event(EventSet, eventName.c_str());
+    CALL_PAPI_OK(PAPI_create_eventset(&writeEventSet)); // Pass address
+    CALL_PAPI_OK(PAPI_assign_eventset_component(writeEventSet, cid));
+
+    ret = PAPI_add_named_event(writeEventSet, event_name);
     if (ret != PAPI_OK) {
-        fprintf(stderr, "FAILED to add event '%s', ret=%i='%s'.\n", eventName.c_str(), ret, PAPI_strerror(ret));
-        CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));          // Delete all events in set.
-        exit(-1);
+        fprintf(stderr, "    Error: FAILED to add event '%s' for writing, ret=%d [%s]. Skipping write.\n", 
+                event_name, ret, PAPI_strerror(ret));
+        CALL_PAPI_OK(PAPI_cleanup_eventset(writeEventSet));
+        CALL_PAPI_OK(PAPI_destroy_eventset(&writeEventSet)); // Pass address
+        return;
     }
 
-    CALL_PAPI_OK(PAPI_start(EventSet));
-    ret = PAPI_write(EventSet, curmax);
-    if ( ret != PAPI_OK ) {
-        PAPI_stop(EventSet, curmax);                                // Must be stopped.
-        PAPI_cleanup_eventset(EventSet);                            // Empty it.
-        PAPI_destroy_eventset(&EventSet);                           // Release memory.
-        fprintf(stderr, "PAPI_write failure returned %i, = %s.\n", ret, PAPI_strerror(ret));
+    CALL_PAPI_OK(PAPI_start(writeEventSet));
+
+    ret = PAPI_write(writeEventSet, write_buffer);
+    if (ret != PAPI_OK) {
+        fprintf(stderr, "    Error: PAPI_write FAILED for event '%s' with value %lld, ret=%d [%s].\n", 
+                event_name, value_to_write, ret, PAPI_strerror(ret));
+        int stop_ret = PAPI_stop(writeEventSet, &read_back_value);
+        if (stop_ret != PAPI_OK) { 
+            fprintf(stderr, "    Warning: PAPI_stop after failed PAPI_write also failed: %s\n", 
+                    PAPI_strerror(stop_ret)); 
+        }
     } else {
-        printf("Call succeeded to set fan_speed to %llu RPM.\n", curmax[0]);
+        printf("    PAPI_write call succeeded for '%s' = %lld.\n", event_name, value_to_write);
+        CALL_PAPI_OK(PAPI_stop(writeEventSet, &read_back_value));
+        printf("    Read back value immediately after write: %lld\n", read_back_value);
+        if (read_back_value != value_to_write) { 
+            printf("    Warning: Read-back value (%lld) does not match written value (%lld).\n", 
+                   read_back_value, value_to_write); 
+        }
     }
 
-    // Now try to read it. 
-    CALL_PAPI_OK(PAPI_stop(EventSet, &value));
-    printf("After set, read-back of fan value is %lli.\n", value);
+    CALL_PAPI_OK(PAPI_cleanup_eventset(writeEventSet));
+    CALL_PAPI_OK(PAPI_destroy_eventset(&writeEventSet)); // Pass address
+    printf("    Write attempt finished for '%s'.\n", event_name);
+}
 
-    CALL_PAPI_OK(PAPI_cleanup_eventset(EventSet));              // Delete all events in set.
-    CALL_PAPI_OK(PAPI_destroy_eventset(&EventSet));             // destroy the event set.
+//-----------------------------------------------------------------------------
+// Helper to read the set of relevant metrics (passed by name) and print them. C version.
+//-----------------------------------------------------------------------------
+#define MAX_EVENTS_TO_READ 10 // Max number of events this function will read at once
 
-    printf("Finished All Events.\n");
+void read_and_print_current_values(int cid,
+                                   const char* perf_name, 
+                                   const char* pcap_name, 
+                                   const char* fan_name,
+                                   const char* pcap_max_name, 
+                                   const char* fan_max_name,
+                                   const char* stage_label)
+{
+    printf("    Reading Values [%s] for Verification <--\n", stage_label);
 
-    PAPI_shutdown();                                            // Returns no value.
-    return(0);                                                  // exit OK.
-} // end MAIN.
+    int readSet = PAPI_NULL;
+    int ret;
+    char events_to_read[MAX_EVENTS_TO_READ][PAPI_MAX_STR_LEN];
+    char event_short_names[MAX_EVENTS_TO_READ][50];
+    bool added_flags[MAX_EVENTS_TO_READ];
+    int read_count = 0;
+    int i;
+
+    memset(events_to_read, 0, sizeof(events_to_read));
+    memset(event_short_names, 0, sizeof(event_short_names));
+    for(i=0; i<MAX_EVENTS_TO_READ; ++i) added_flags[i] = false;
+
+    if (perf_name && strcmp(perf_name, "") != 0 && read_count < MAX_EVENTS_TO_READ) { 
+        strncpy(events_to_read[read_count], perf_name, PAPI_MAX_STR_LEN - 1); 
+        strncpy(event_short_names[read_count], "Perf Level", 49); 
+        read_count++; 
+    }
+    if (pcap_name && strcmp(pcap_name, "") != 0 && read_count < MAX_EVENTS_TO_READ) { 
+        strncpy(events_to_read[read_count], pcap_name, PAPI_MAX_STR_LEN - 1); 
+        strncpy(event_short_names[read_count], "Power Cap (uW)", 49); 
+        read_count++; 
+    }
+    if (fan_name && strcmp(fan_name, "") != 0 && read_count < MAX_EVENTS_TO_READ) { 
+        strncpy(events_to_read[read_count], fan_name, PAPI_MAX_STR_LEN - 1); 
+        strncpy(event_short_names[read_count], "Fan Speed", 49); 
+        read_count++; 
+    }
+    if (pcap_max_name && strcmp(pcap_max_name, "") != 0 && read_count < MAX_EVENTS_TO_READ) { 
+        strncpy(events_to_read[read_count], pcap_max_name, PAPI_MAX_STR_LEN - 1); 
+        strncpy(event_short_names[read_count], "Power Cap Max (uW)", 49); 
+        read_count++; 
+    }
+    if (fan_max_name && strcmp(fan_max_name, "") != 0 && read_count < MAX_EVENTS_TO_READ) { 
+        strncpy(events_to_read[read_count], fan_max_name, PAPI_MAX_STR_LEN - 1); 
+        strncpy(event_short_names[read_count], "Fan Speed Max", 49); 
+        read_count++; 
+    }
+    for(i=0; i<read_count; ++i) { 
+        events_to_read[i][PAPI_MAX_STR_LEN - 1] = '\0'; 
+        event_short_names[i][49] = '\0'; 
+    }
+
+
+    if (read_count == 0) { 
+        fprintf(stderr, "    Error: No valid event names provided for reading.\n"); 
+        return; 
+    }
+
+    long long* values = (long long*)calloc(read_count, sizeof(long long));
+    if (!values) { 
+        fprintf(stderr, "    Error: Failed to allocate memory for reading values.\n"); 
+        return; 
+    }
+
+    CALL_PAPI_OK(PAPI_create_eventset(&readSet));
+    CALL_PAPI_OK(PAPI_assign_eventset_component(readSet, cid));
+
+    int added_count = 0;
+    for (i = 0; i < read_count; ++i) {
+        ret = PAPI_add_named_event(readSet, events_to_read[i]);
+        if (ret == PAPI_OK) { 
+            added_count++; 
+            added_flags[i] = true; 
+        }
+        else { 
+            fprintf(stderr, "    Warning: Failed to add event '%s' for reading: %s\n", 
+                    events_to_read[i], PAPI_strerror(ret)); 
+            if(ret == PAPI_ENOMEM){ 
+                break; 
+            } 
+        }
+    }
+
+    if (added_count > 0) {
+        CALL_PAPI_OK(PAPI_start(readSet));
+        ret = PAPI_stop(readSet, values);
+        if (ret != PAPI_OK){ fprintf(stderr, "    Error: PAPI_stop failed during read: %s\n", PAPI_strerror(ret)); printf("    Current System Values (PAPI_stop failed, results may be inaccurate):\n"); }
+        else { printf("    Current System Values:\n"); }
+
+        int value_idx = 0;
+        for (i = 0; i < read_count; ++i) {
+            if(added_flags[i]) {
+                printf("      %-20s (%s): %lld\n", event_short_names[i], events_to_read[i], (ret == PAPI_OK) ? values[value_idx] : -999);
+                value_idx++;
+            } else { printf("      %-20s (%s): [Read Skipped - Add Failed]\n", event_short_names[i], events_to_read[i]); }
+        }
+    } else { printf("    No events were successfully added to the EventSet for reading.\n"); }
+
+    free(values);
+    CALL_PAPI_OK(PAPI_cleanup_eventset(readSet));
+    CALL_PAPI_OK(PAPI_destroy_eventset(&readSet));
+    printf("    Finished Reading [%s] --\n", stage_label);
+}
\ No newline at end of file
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/README.md 7.2.0-1/src/components/rocp_sdk/README.md
--- 7.2.0~b2-1/src/components/rocp_sdk/README.md	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/README.md	2025-06-25 22:38:10.000000000 +0000
@@ -56,3 +56,4 @@ Note that this variable takes precedence
 ## Known Limitations
 
 * In dispatch mode, PAPI may read zeros if reading takes place immediately after the return of a GPU kernel. This is not a PAPI bug. It may occur because calls such as hipDeviceSynchronize() do not guarantee that ROCprofiler has been called and all counter buffers have been flushed.  Therefore, it is recommended that the user code adds a delay between the return of a kernel and calls to PAPI_read(), PAPI_stop(), etc.
+* If an application is linked against the static PAPI library libpapi.a, then the application must call PAPI_library_init() before calling any hip routines (e.g. hipInit(), hipGetDeviceCount(), hipLaunchKernelGGL(), etc). If the application is linked against the dynamic library libpapi.so, then the order of operations does not matter.
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/Rules.rocp_sdk 7.2.0-1/src/components/rocp_sdk/Rules.rocp_sdk
--- 7.2.0~b2-1/src/components/rocp_sdk/Rules.rocp_sdk	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/Rules.rocp_sdk	2025-06-25 22:38:10.000000000 +0000
@@ -2,9 +2,11 @@ COMPSRCS += components/rocp_sdk/rocp_sdk
 
 COMPOBJS += rocp_sdk.o sdk_class.o
 
-ROCP_SDK_INCL=$(PAPI_ROCP_SDK_ROOT)/include
+ROCP_SDK_INCL=-I$(PAPI_ROCP_SDK_ROOT)/include     \
+              -I$(PAPI_ROCP_SDK_ROOT)/include/hsa \
+              -I$(PAPI_ROCP_SDK_ROOT)/hsa/include
 
-CFLAGS  += -g -I$(ROCP_SDK_INCL) -D__HIP_PLATFORM_AMD__
+CFLAGS  += -g $(ROCP_SDK_INCL) -D__HIP_PLATFORM_AMD__
 LDFLAGS += $(LDL)
 
 rocp_sdk.o: components/rocp_sdk/rocp_sdk.c $(HEADERS)
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/rocp_sdk.c 7.2.0-1/src/components/rocp_sdk/rocp_sdk.c
--- 7.2.0~b2-1/src/components/rocp_sdk/rocp_sdk.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/rocp_sdk.c	2025-06-25 22:38:10.000000000 +0000
@@ -15,16 +15,32 @@
 
 #include <stdio.h>
 #include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <dirent.h>
 #include "papi.h"
 #include "papi_internal.h"
 #include "papi_vector.h"
 #include "papi_memory.h"
 #include "extras.h"
 #include "sdk_class.h"
+#include "rocprofiler-sdk/hsa.h"
 
 #define ROCPROF_SDK_MAX_COUNTERS (64)
 #define RPSDK_CTX_RUNNING (1)
 
+#define ROCM_CALL(call, err_handle) do {   \
+    hsa_status_t _status = (call);         \
+    if (_status == HSA_STATUS_SUCCESS ||   \
+        _status == HSA_STATUS_INFO_BREAK)  \
+        break;                             \
+    err_handle;                            \
+} while(0)
+
+
+/* Utility functions */
+static int check_for_available_devices(char *err_msg);
+
 unsigned int _rocp_sdk_lock;
 
 /* Init and finalize */
@@ -74,7 +90,7 @@ papi_vector_t _rocp_sdk_vector = {
         .version = "1.0",
         .description = "GPU events and metrics via AMD ROCprofiler-SDK",
         .initialized = 0,
-        .num_mpx_cntrs = ROCPROF_SDK_MAX_COUNTERS,
+        .num_mpx_cntrs = 0
     },
 
     .size = {
@@ -117,6 +133,10 @@ rocp_sdk_init_component(int cid)
     _rocp_sdk_vector.cmp_info.num_cntrs = -1;
     _rocp_sdk_lock = PAPI_NUM_LOCK + NUM_INNER_LOCK + cid;
 
+    // We set this env variable to silence some unnecessary ROCprofiler-SDK debug messages.
+    // It is not critical, so if it fails to be set, we can safely ignore the error.
+    (void)setenv("ROCPROFILER_LOG_LEVEL","fatal",0);
+
     int papi_errno = rocprofiler_sdk_init_pre();
     if (papi_errno != PAPI_OK) {
         _rocp_sdk_vector.cmp_info.initialized = 1;
@@ -127,9 +147,9 @@ rocp_sdk_init_component(int cid)
         return papi_errno;
     }
 
-    sprintf(_rocp_sdk_vector.cmp_info.disabled_reason, "Not initialized. Access component events to initialize it.");
-    _rocp_sdk_vector.cmp_info.disabled = PAPI_EDELAY_INIT;
-    return PAPI_EDELAY_INIT;
+    // This component needs to be fully initialized from the beginning,
+    // because interleaving hip calls and PAPI calls leads to errors.
+    return check_n_initialize();
 }
 
 int
@@ -175,6 +195,11 @@ rocp_sdk_init_private(void)
         goto fn_exit;
     }
 
+    papi_errno = check_for_available_devices(_rocp_sdk_vector.cmp_info.disabled_reason);
+    if (papi_errno != PAPI_OK) {
+        goto fn_fail;
+    }
+
     papi_errno = rocprofiler_sdk_init();
     if (papi_errno != PAPI_OK) {
         _rocp_sdk_vector.cmp_info.disabled = papi_errno;
@@ -186,11 +211,16 @@ rocp_sdk_init_private(void)
 
     int count = 0;
     papi_errno = evt_get_count(&count);
+    if (papi_errno != PAPI_OK) {
+        goto fn_fail;
+    }
     _rocp_sdk_vector.cmp_info.num_native_events = count;
     _rocp_sdk_vector.cmp_info.num_cntrs = count;
+    _rocp_sdk_vector.cmp_info.num_mpx_cntrs = count;
 
-  fn_exit:
     _rocp_sdk_vector.cmp_info.initialized = 1;
+
+  fn_exit:
     _rocp_sdk_vector.cmp_info.disabled = papi_errno;
     _papi_hwi_unlock(COMPONENT_LOCK);
     return papi_errno;
@@ -231,6 +261,15 @@ update_native_events(rocp_sdk_control_t
 {
     int papi_errno = PAPI_OK;
 
+    if (0 == ntv_count) {
+        if ( NULL != ctl->events_id ){
+            papi_free(ctl->events_id);
+            ctl->events_id = NULL;
+        }
+        ctl->num_events = ntv_count;
+        goto fn_exit;
+    }
+
     if (ntv_count != ctl->num_events) {
         ctl->events_id = papi_realloc(ctl->events_id, ntv_count * sizeof(*ctl->events_id));
         if (NULL == ctl->events_id) {
@@ -282,6 +321,11 @@ rocp_sdk_start(hwd_context_t *ctx, hwd_c
     rocp_sdk_context_t *rocp_sdk_ctx = (rocp_sdk_context_t *) ctx;
     rocp_sdk_control_t *rocp_sdk_ctl = (rocp_sdk_control_t *) ctl;
 
+    if (0 == rocp_sdk_ctl->num_events){
+        SUBDBG("Error! Cannot PAPI_start an empty eventset.");
+        return PAPI_ENOSUPP;
+    }
+
     if (rocp_sdk_ctx->state & RPSDK_CTX_RUNNING) {
         SUBDBG("Error! Cannot PAPI_start more than one eventset at a time for every component.");
         return PAPI_EINVAL;
@@ -392,9 +436,6 @@ rocp_sdk_ntv_code_to_info(unsigned int e
         return papi_errno;
     }
 
-    info->event_code = event_code;
-    info->component_index = _rocp_sdk_vector.cmp_info.CmpIdx;
-
     return rocprofiler_sdk_evt_code_to_info(event_code, info);
 }
 
@@ -418,3 +459,45 @@ check_n_initialize(void)
     }
     return _rocp_sdk_vector.cmp_info.disabled;
 }
+
+int
+check_for_available_devices(char *err_msg)
+{
+    int ret_val;
+    struct stat stat_info;
+    const char *dir_path="/sys/class/kfd/kfd/topology/nodes";
+
+    // If the path does not exist, there are no AMD devices on this system.
+    ret_val = stat(dir_path, &stat_info);
+    if (ret_val != 0 || !S_ISDIR(stat_info.st_mode)) {
+        goto fn_fail;
+    }
+
+    // If we can't open this directory, there are no AMD devices on this system.
+    DIR *dir = opendir(dir_path);
+    if (dir == NULL) {
+        goto fn_fail;
+    }
+
+    // If there are no non-trivial entries in this directory, there are no AMD devices on this system.
+    struct dirent *dir_entry;
+    while( NULL != (dir_entry = readdir(dir)) ) {
+        if( strlen(dir_entry->d_name) < 1 || dir_entry->d_name[0] == '.' ){
+            continue;
+        }
+
+        // If we made it here, it means we found an entry that is not "." or ".."
+        closedir(dir);
+        goto fn_exit;
+    }
+
+    // If we made it here, it means we only found entries that start with a "."
+    closedir(dir);
+    goto fn_fail;
+
+  fn_exit:
+    return PAPI_OK;
+  fn_fail:
+    sprintf(err_msg, "No compatible devices found.");
+    return PAPI_EMISC;
+}
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/sdk_class.cpp 7.2.0-1/src/components/rocp_sdk/sdk_class.cpp
--- 7.2.0~b2-1/src/components/rocp_sdk/sdk_class.cpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/sdk_class.cpp	2025-06-25 22:38:10.000000000 +0000
@@ -57,7 +57,7 @@ static std::condition_variable agent_con
 static bool data_is_ready = false;
 static std::string _rocp_sdk_error_string;
 static long long int *_counter_values = NULL;
-static int rpsdk_profiling_mode = RPSDK_MODE_DISPATCH;
+static int rpsdk_profiling_mode = RPSDK_MODE_DEVICE_SAMPLING;
 
 static agent_map_t gpu_agents = agent_map_t{};
 
@@ -186,34 +186,58 @@ obtain_function_pointers()
 {
     static bool first_time = true;
     void *dllHandle = nullptr;
+    const char* pathname;
+    const char *rocm_root;
+    const char *ret_val = NULL;
 
-    if( !first_time )
-        return NULL;
+    if( !first_time ){
+        ret_val = NULL;
+        goto fn_exit;
+    }
 
-    const char* pathname = std::getenv("PAPI_ROCP_SDK_LIB");
+    pathname = std::getenv("PAPI_ROCP_SDK_LIB");
 
     // If the user gave us an explicit path to librocprofiler-sdk.so, use it.
     if ( nullptr != pathname && strlen(pathname) <= PATH_MAX ) {
         dllHandle = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
-    }
-
-    // If we were not given an explicit path, or the path didn't work, try elsewhere.
-    if ( NULL == pathname || nullptr == dllHandle ) {
-        std::string path2;
-        const char *rocm_root = std::getenv("PAPI_ROCP_SDK_ROOT");
-        if( nullptr == rocm_root || strlen(rocm_root) > PATH_MAX ){
-            set_error_string("Did not find path for librocprofiler-sdk.so. Set either PAPI_ROCP_SDK_ROOT, or ROCP_SDK_LIB.");
-            return get_error_string().c_str();
+        if ( nullptr == dllHandle ) {
+            std::string err_str = std::string("Invalid path in PAPI_ROCP_SDK_LIB: ")+pathname;
+            set_error_string(err_str);
+            ret_val = strdup(err_str.c_str());
+            SUBDBG("%s\n",ret_val);
+            goto fn_fail;
         }
-        path2 = std::string(rocm_root) + "/lib/librocprofiler-sdk.so";
+    }else{
+        // If we were not given an explicit path to the library, try elsewhere.
+        rocm_root = std::getenv("PAPI_ROCP_SDK_ROOT");
+        if( nullptr == rocm_root || strlen(rocm_root) > PATH_MAX ){
+            // If we are here, the user has not given us any hint about the
+            // location of the library, so we let dlopen() try the default paths.
+            pathname = "librocprofiler-sdk.so";
+        }else{
+            int err;
+            struct stat stat_info;
 
-        // Clear previous errors.
-        (void)dlerror();
+            std::string tmp_str = std::string(rocm_root) + "/lib/librocprofiler-sdk.so";
+            pathname = strdup(tmp_str.c_str());
+            err = stat(pathname, &stat_info);
+            if (err != 0 || !S_ISREG(stat_info.st_mode)) {
+                std::string err_str = std::string("Invalid path in PAPI_ROCP_SDK_ROOT: ")+tmp_str;
+                set_error_string(err_str);
+                ret_val = strdup(err_str.c_str());
+                SUBDBG("%s\n",ret_val);
+                goto fn_fail;
+            }
+        }
 
-        dllHandle = dlopen(path2.c_str(), RTLD_NOW | RTLD_GLOBAL);
+        dllHandle = dlopen(pathname, RTLD_NOW | RTLD_GLOBAL);
         if (dllHandle == NULL) {
-            set_error_string(std::string("Could not dlopen() librocprofiler-sdk.so. Set either PAPI_ROCP_SDK_ROOT, or ROCP_SDK_LIB. Error: ")+dlerror());
-            return dlerror();
+            // Nothing worked. Giving up.
+            std::string err_str = std::string("Could not dlopen() librocprofiler-sdk.so. Set either PAPI_ROCP_SDK_ROOT, or PAPI_ROCP_SDK_LIB.");
+            set_error_string(err_str);
+            ret_val = strdup(err_str.c_str());
+            SUBDBG("%s\n",ret_val);
+            goto fn_fail;
         }
     }
 
@@ -241,10 +265,12 @@ obtain_function_pointers()
     DLL_SYM_CHECK(rocprofiler_query_record_counter_id, rocprofiler_query_record_counter_id_t);
     DLL_SYM_CHECK(rocprofiler_query_record_dimension_position, rocprofiler_query_record_dimension_position_t);
 
-    // Make sure we don't run this code multiple times.
-    first_time = false;
-
-    return NULL;
+    fn_exit:
+      // Make sure we don't run this code multiple times.
+      first_time = false;
+      return ret_val;
+    fn_fail:
+      goto fn_exit;
 }
 
 /**
@@ -394,13 +420,6 @@ record_callback(rocprofiler_dispatch_cou
 
     _papi_hwi_unlock(_rocp_sdk_lock);
 
-#if defined(DEBUG_OUTPUT_OF_RECORDED_VALUES)
-    for(size_t i = 0; i < record_count; ++i){
-        rocprofiler_counter_id_t counter_id;
-        ROCPROFILER_CALL(rocprofiler_query_record_counter_id_FPTR(record_data[i].id, &counter_id), "Could not retrieve counter_id");
-        std::cerr << " ## record_data[" << i << "].id: " << record_data[i].id << " -> counter_id: " << counter_id.handle << " Value= " << record_data[i].counter_value << std::endl;
-    }
-#endif
     return;
 }
 
@@ -490,9 +509,14 @@ buffered_callback(rocprofiler_context_id
 int
 tool_init(rocprofiler_client_finalize_t fini_func, void* tool_data)
 {
+    assert(tool_data != nullptr);
 
+    if( NULL != getenv("PAPI_ROCP_SDK_DISPATCH_MODE") ){
+        rpsdk_profiling_mode = RPSDK_MODE_DISPATCH;
+    }
 
-    assert(tool_data != nullptr);
+    // Obtain the list of available (GPU) agents.
+    gpu_agents = get_GPU_agent_info();
 
     ROCPROFILER_CALL(rocprofiler_create_context_FPTR(&get_client_ctx()), "context creation");
 
@@ -505,6 +529,13 @@ tool_init(rocprofiler_client_finalize_t
                                                tool_data,
                                                &get_buffer()),
                          "buffer creation failed");
+
+        // Configure device_counting_service for all devices.
+        for(auto g_it=gpu_agents.begin(); g_it!=gpu_agents.end(); ++g_it){
+            ROCPROFILER_CALL(rocprofiler_configure_device_counting_service_FPTR(
+                                 get_client_ctx(), get_buffer(), g_it->second->id, set_profile, nullptr),
+                             "Could not setup sampling");
+        }
     }else{
         ROCPROFILER_CALL(rocprofiler_configure_callback_dispatch_counting_service_FPTR(
                              get_client_ctx(), dispatch_callback, tool_data, record_callback, tool_data),
@@ -512,7 +543,6 @@ tool_init(rocprofiler_client_finalize_t
     }
 
     return 0;
-
 }
 
 /* ** */
@@ -613,23 +643,12 @@ void stop_counting(void){
 /* ** */
 void
 start_counting(vendorp_ctx_t ctx){
-    static bool is_device_counting_configured = false;
 
     // Store a pointer to the counter value array in a global variable so that
     // our functions that are called from the ROCprofiler-SDK (instead of our
     // API) can still find the array.
     _counter_values = ctx->counters;
 
-    if( (RPSDK_MODE_DEVICE_SAMPLING == get_profiling_mode()) && !is_device_counting_configured ){
-        is_device_counting_configured = true;
-        // Configure device_counting_service for all devices.
-        for(auto g_it=gpu_agents.begin(); g_it!=gpu_agents.end(); ++g_it){
-            ROCPROFILER_CALL(rocprofiler_configure_device_counting_service_FPTR(
-                                 get_client_ctx(), get_buffer(), g_it->second->id, set_profile, nullptr),
-                             "Could not setup sampling");
-        }
-    }
-
     ROCPROFILER_CALL(rocprofiler_start_context_FPTR(get_client_ctx()), "start context");
 }
 
@@ -678,11 +697,6 @@ read_sample(){
             ROCPROFILER_CALL(rocprofiler_query_record_counter_id_FPTR(output_records[i].id, &counter_id), "Could not retrieve counter_id");
             rec_info.counter_id = counter_id;
 
-#if defined(DEBUG_OUTPUT)
-            printf(" ## output_records[%d].id: %lu -> counter_id: %lu Value= %lf\n", i, output_records[i].id, counter_id.handle, output_records[i].counter_value);
-	    fflush(stdout);
-#endif // DEBUG_OUTPUT
-
             std::vector<rocprofiler_record_dimension_info_t> dimensions = counter_dimensions(counter_id);
             for(auto& dim : dimensions ){
                 unsigned long pos=0;
@@ -1040,14 +1054,13 @@ int setup() {
 
     const char *error_msg = obtain_function_pointers();
     if( NULL != error_msg ){
-        set_error_string("Could not obtain all functions from librocprofiler-sdk.so. Possible library version mismatch.");
-        SUBDBG("dlsym(): %s\n", error_msg);
+        if( get_error_string().empty() ){
+            set_error_string("Could not obtain all functions from librocprofiler-sdk.so. Possible library version mismatch.");
+            SUBDBG("dlsym(): %s\n", error_msg);
+        }
         goto fn_fail;
     }
 
-    // Obtain the list of available (GPU) agents.
-    gpu_agents = get_GPU_agent_info();
-
     if( (ROCPROFILER_STATUS_SUCCESS == rocprofiler_is_initialized_FPTR(&status)) && (0 == status) ){
         ROCPROFILER_CALL(rocprofiler_force_configure_FPTR(&rocprofiler_configure), "force configuration");
     }
@@ -1314,8 +1327,10 @@ rocprofiler_configure(uint32_t
     const char *error_msg = papi_rocpsdk::obtain_function_pointers();
 
     if( NULL != error_msg ){
-        papi_rocpsdk::set_error_string("Could not obtain all functions from librocprofiler-sdk.so. Possible library version mismatch.");
-        SUBDBG("dlsym(): %s\n", error_msg);
+        if( papi_rocpsdk::get_error_string().empty() ){
+            papi_rocpsdk::set_error_string("Could not obtain all functions from librocprofiler-sdk.so. Possible library version mismatch.");
+            SUBDBG("dlsym(): %s\n", error_msg);
+        }
         return NULL;
     }
 
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/sdk_class.hpp 7.2.0-1/src/components/rocp_sdk/sdk_class.hpp
--- 7.2.0~b2-1/src/components/rocp_sdk/sdk_class.hpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/sdk_class.hpp	2025-06-25 22:38:10.000000000 +0000
@@ -3,15 +3,26 @@
 
 #include <stdint.h>
 #include <string.h>
+#include <sys/stat.h>
 #include <rocprofiler-sdk/buffer.h>
 #include <rocprofiler-sdk/registration.h>
 #include <rocprofiler-sdk/device_counting_service.h>
 #include <rocprofiler-sdk/rocprofiler.h>
 
 #include "papi.h"
+
+#ifdef __cplusplus
+extern "C"
+{
+#endif
+
 #include "papi_internal.h"
 #include "papi_memory.h"
 
+#ifdef __cplusplus
+}
+#endif
+
 #include <dlfcn.h>
 #include <cxxabi.h>
 #include <atomic>
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/Makefile 7.2.0-1/src/components/rocp_sdk/tests/Makefile
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/Makefile	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/Makefile	2025-06-25 22:38:10.000000000 +0000
@@ -1,36 +1,41 @@
 NAME=template
 include ../../Makefile_comp_tests.target
 
+ROCP_SDK_INCL=-I$(PAPI_ROCP_SDK_ROOT)/include     \
+              -I$(PAPI_ROCP_SDK_ROOT)/include/hsa \
+              -I$(PAPI_ROCP_SDK_ROOT)/hsa/include
+
+AMDCXX   ?= amdclang++
 CFLAGS    = $(OPTFLAGS)
-CPPFLAGS += $(INCLUDE)
+CPPFLAGS += $(INCLUDE) $(ROCP_SDK_INCL)
 LDFLAGS  += $(PAPILIB) $(TESTLIB) $(UTILOBJS)
 
-GPUARCH = $(shell rocm_agent_enumerator 2>/dev/null | head -1)
-ifeq ($(GPUARCH),)
-    GPUARCH = native
+GPUARCH = $(shell rocm_agent_enumerator 2>/dev/null | grep -v "gfx000" | head -1)
+ifneq ($(GPUARCH),)
+    ARCHFLAG=--offload-arch=$(GPUARCH)
 endif
-GPUFLAGS=--offload-arch=$(GPUARCH) --hip-link --rtlib=compiler-rt -unwindlib=libgcc
+GPUFLAGS=$(ARCHFLAG) --hip-link --rtlib=compiler-rt -unwindlib=libgcc
 
 TESTS = simple advanced two_eventsets simple_sampling
 template_tests: $(TESTS)
 
 %.o: %.c
-	$(CC) $(CPPFLAGS) $(CFLAGS) $(OPTFLAGS) -c -o $@ $<
+	$(CC) $(CPPFLAGS) $(CFLAGS) $(OPTFLAGS) -D__HIP_PLATFORM_AMD__ -c -o $@ $<
 
 kernel.o: kernel.cpp
-	$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -D__HIP_ROCclr__=1 -O2 -g -DNDEBUG --offload-arch=$(GPUARCH) -W -Wall -Wextra -Wshadow -o kernel.o -x hip -c kernel.cpp
+	$(AMDCXX) -D__HIP_ROCclr__=1 -O2 -g -DNDEBUG $(ARCHFLAG) -W -Wall -Wextra -Wshadow -o kernel.o -x hip -c kernel.cpp
 
 simple: simple.o kernel.o
-	$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG $(GPUFLAGS) simple.o kernel.o -o simple $(LDFLAGS)
+	$(AMDCXX) -O2 -g -DNDEBUG $(GPUFLAGS) simple.o kernel.o -o simple $(LDFLAGS)
 
 advanced: advanced.o kernel.o
-	$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG $(GPUFLAGS) advanced.o kernel.o -o advanced $(LDFLAGS)
+	$(AMDCXX) -O2 -g -DNDEBUG $(GPUFLAGS) advanced.o kernel.o -o advanced $(LDFLAGS)
 
 two_eventsets: two_eventsets.o kernel.o
-	$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG $(GPUFLAGS) two_eventsets.o kernel.o -o two_eventsets $(LDFLAGS)
+	$(AMDCXX) -O2 -g -DNDEBUG $(GPUFLAGS) two_eventsets.o kernel.o -o two_eventsets $(LDFLAGS)
 
 simple_sampling: simple_sampling.o kernel.o
-	$(PAPI_ROCP_SDK_ROOT)/bin/amdclang++ -O2 -g -DNDEBUG $(GPUFLAGS) simple_sampling.o kernel.o -o simple_sampling $(LDFLAGS)
+	$(AMDCXX) -O2 -g -DNDEBUG $(GPUFLAGS) simple_sampling.o kernel.o -o simple_sampling $(LDFLAGS) -pthread
 
 clean:
 	rm -f $(TESTS) *.o
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/advanced.c 7.2.0-1/src/components/rocp_sdk/tests/advanced.c
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/advanced.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/advanced.c	2025-06-25 22:38:10.000000000 +0000
@@ -2,11 +2,13 @@
 #include <unistd.h>
 #include <papi.h>
 #include <papi_test.h>
+#include <hip/hip_runtime.h>
 
-extern void launch_kernel(int device_id);
+extern int launch_kernel(int device_id);
 
 int main(int argc, char *argv[])
 {
+    int dev_count=0;
     int papi_errno;
 #define NUM_EVENTS (14)
     long long counters[NUM_EVENTS] = { 0 };
@@ -27,9 +29,6 @@ int main(int argc, char *argv[])
                   "rocp_sdk:::SQ_WAVE_CYCLES:DIMENSION_SHADER_ENGINE=4:device=0",
                   "rocp_sdk:::SQ_WAVE_CYCLES:device=0"
     };
-//                  "rocp_sdk:::SQ_WAVES:device=0",
-//                  "rocp_sdk:::TCC_READ:device=0",
-//                  "rocp_sdk:::TCC_CYCLE:device=0"
 
     papi_errno = PAPI_library_init(PAPI_VER_CURRENT);
     if (papi_errno != PAPI_VER_CURRENT) {
@@ -56,7 +55,10 @@ int main(int argc, char *argv[])
     for(int rep=0; rep<=4; ++rep){
 
         printf("---------------------  launch_kernel(0)\n");
-        launch_kernel(0);
+        papi_errno = launch_kernel(0);
+        if (papi_errno != 0) {
+            test_fail(__FILE__, __LINE__, "launch_kernel(0)", papi_errno);
+        }
 
         usleep(1000);
 
@@ -82,40 +84,49 @@ int main(int argc, char *argv[])
             fprintf(stdout, "%s: %.2lfM\n", events[i], (double)counters[i]/1e6);
     }
 
-    printf("======================================================\n");
-    printf("==================== SECOND ROUND ====================\n");
-    printf("======================================================\n");
-     
-    for(int rep=0; rep<=3; ++rep){
-        papi_errno = PAPI_start(eventset);
-        if (papi_errno != PAPI_OK) {
-            test_fail(__FILE__, __LINE__, "PAPI_start", papi_errno);
-        }
-
-        printf("---------------------  launch_kernel(1)\n");
-        launch_kernel(1);
-
-        usleep(1000);
-
-        papi_errno = PAPI_read(eventset, counters);
-        if (papi_errno != PAPI_OK) {
-            test_fail(__FILE__, __LINE__, "PAPI_read", papi_errno);
-        }
-        printf("---------------------  PAPI_read()\n");
-
-        for (int i = 0; i < NUM_EVENTS; ++i) {
-            fprintf(stdout, "%s: %.2lfM\n", events[i], (double)counters[i]/1e6);
-        }
-
-        papi_errno = PAPI_stop(eventset, counters);
-        if (papi_errno != PAPI_OK) {
-            test_fail(__FILE__, __LINE__, "PAPI_stop", papi_errno);
-        }
-
-        printf("---------------------  PAPI_stop()\n");
+    if (hipGetDeviceCount(&dev_count) != hipSuccess){
+        test_fail(__FILE__, __LINE__, "Error while counting AMD devices:", papi_errno);
+    }
 
-        for (int i = 0; i < NUM_EVENTS; ++i) {
-            fprintf(stdout, "%s: %.2lfM\n", events[i], (double)counters[i]/1e6);
+    if( dev_count > 1 ){
+        printf("======================================================\n");
+        printf("==================== SECOND ROUND ====================\n");
+        printf("======================================================\n");
+
+        for(int rep=0; rep<=3; ++rep){
+            papi_errno = PAPI_start(eventset);
+            if (papi_errno != PAPI_OK) {
+                test_fail(__FILE__, __LINE__, "PAPI_start", papi_errno);
+            }
+
+            printf("---------------------  launch_kernel(1)\n");
+            papi_errno = launch_kernel(1);
+            if (papi_errno != 0) {
+                test_fail(__FILE__, __LINE__, "launch_kernel(1)", papi_errno);
+            }
+
+            usleep(1000);
+
+            papi_errno = PAPI_read(eventset, counters);
+            if (papi_errno != PAPI_OK) {
+                test_fail(__FILE__, __LINE__, "PAPI_read", papi_errno);
+            }
+            printf("---------------------  PAPI_read()\n");
+
+            for (int i = 0; i < NUM_EVENTS; ++i) {
+                fprintf(stdout, "%s: %.2lfM\n", events[i], (double)counters[i]/1e6);
+            }
+
+            papi_errno = PAPI_stop(eventset, counters);
+            if (papi_errno != PAPI_OK) {
+                test_fail(__FILE__, __LINE__, "PAPI_stop", papi_errno);
+            }
+
+            printf("---------------------  PAPI_stop()\n");
+
+            for (int i = 0; i < NUM_EVENTS; ++i) {
+                fprintf(stdout, "%s: %.2lfM\n", events[i], (double)counters[i]/1e6);
+            }
         }
     }
 
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/kernel.cpp 7.2.0-1/src/components/rocp_sdk/tests/kernel.cpp
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/kernel.cpp	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/kernel.cpp	2025-06-25 22:38:10.000000000 +0000
@@ -1,7 +1,7 @@
 #include <iostream>
 #include <hip/hip_runtime.h>
 
-extern "C" void launch_kernel(int device_id);
+extern "C" int launch_kernel(int device_id);
 
 #define HIP_CALL(call)                                                                             \
     do                                                                                             \
@@ -10,7 +10,7 @@ extern "C" void launch_kernel(int device
         if(err != hipSuccess)                                                                      \
         {                                                                                          \
             std::cerr << hipGetErrorString(err) << std::endl;                                      \
-            abort();                                                                               \
+            return(-1);                                                                            \
         }                                                                                          \
     } while(0)
 
@@ -36,7 +36,7 @@ kernelC(T* C_d, const T* A_d, size_t N)
     }
 }
 
-void launch_kernel(int device_id) {
+int launch_kernel(int device_id) {
     const int NUM_LAUNCH = 1;
 
     HIP_CALL(hipSetDevice(device_id));
@@ -47,4 +47,6 @@ void launch_kernel(int device_id) {
     }
 
     HIP_CALL(hipDeviceSynchronize());
+
+    return 0;
 }
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/simple.c 7.2.0-1/src/components/rocp_sdk/tests/simple.c
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/simple.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/simple.c	2025-06-25 22:38:10.000000000 +0000
@@ -3,7 +3,7 @@
 #include <papi.h>
 #include <papi_test.h>
 
-extern void launch_kernel(int device_id);
+extern int launch_kernel(int device_id);
 
 int main(int argc, char *argv[])
 {
@@ -44,9 +44,11 @@ int main(int argc, char *argv[])
         test_fail(__FILE__, __LINE__, "PAPI_start", papi_errno);
     }
 
-
     printf("---------------------  launch_kernel(0)\n");
-    launch_kernel(0);
+    papi_errno = launch_kernel(0);
+    if (papi_errno != 0) {
+        test_fail(__FILE__, __LINE__, "launch_kernel(0)", papi_errno);
+    }
 
     usleep(10000);
 
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/simple_sampling.c 7.2.0-1/src/components/rocp_sdk/tests/simple_sampling.c
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/simple_sampling.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/simple_sampling.c	2025-06-25 22:38:10.000000000 +0000
@@ -6,7 +6,7 @@
 
 #define NUM_EVENTS (12)
 
-extern void launch_kernel(int device_id);
+extern int launch_kernel(int device_id);
 int eventset = PAPI_NULL;
 volatile int gv=0;
 
@@ -79,7 +79,10 @@ int main(int argc, char *argv[])
 
     printf("---------------------  launch_kernel(0)\n");
     gv = 1;
-    launch_kernel(0);
+    papi_errno = launch_kernel(0);
+    if (papi_errno != 0) {
+        test_fail(__FILE__, __LINE__, "launch_kernel(0)", papi_errno);
+    }
 
     usleep(20000);
 
diff -pruN 7.2.0~b2-1/src/components/rocp_sdk/tests/two_eventsets.c 7.2.0-1/src/components/rocp_sdk/tests/two_eventsets.c
--- 7.2.0~b2-1/src/components/rocp_sdk/tests/two_eventsets.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/rocp_sdk/tests/two_eventsets.c	2025-06-25 22:38:10.000000000 +0000
@@ -3,7 +3,7 @@
 #include <papi.h>
 #include <papi_test.h>
 
-extern void launch_kernel(int device_id);
+extern int launch_kernel(int device_id);
 
 int main(int argc, char *argv[])
 {
@@ -71,7 +71,10 @@ int main(int argc, char *argv[])
     }
     for(int rep=0; rep<=3; ++rep){
 
-        launch_kernel(1);
+        papi_errno = launch_kernel(1);
+        if (papi_errno != 0) {
+            test_fail(__FILE__, __LINE__, "launch_kernel(1)", papi_errno);
+        }
 
         usleep(1000);
 
@@ -106,7 +109,10 @@ int main(int argc, char *argv[])
     }
     for(int rep=0; rep<=3; ++rep){
 
-        launch_kernel(1);
+        papi_errno = launch_kernel(1);
+        if (papi_errno != 0) {
+            test_fail(__FILE__, __LINE__, "launch_kernel(1)", papi_errno);
+        }
 
         usleep(1000);
 
@@ -140,7 +146,10 @@ int main(int argc, char *argv[])
     }
     for(int rep=0; rep<=2; ++rep){
 
-        launch_kernel(0);
+        papi_errno = launch_kernel(0);
+        if (papi_errno != 0) {
+            test_fail(__FILE__, __LINE__, "launch_kernel(0)", papi_errno);
+        }
 
         usleep(1000);
 
diff -pruN 7.2.0~b2-1/src/components/sysdetect/arm_cpu_utils.c 7.2.0-1/src/components/sysdetect/arm_cpu_utils.c
--- 7.2.0~b2-1/src/components/sysdetect/arm_cpu_utils.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/sysdetect/arm_cpu_utils.c	2025-06-25 22:38:10.000000000 +0000
@@ -26,6 +26,7 @@
 #define NAMEID_BROADCOM_THUNDERX2 0x516
 #define NAMEID_CAVIUM_THUNDERX2   0x0af
 #define NAMEID_FUJITSU_A64FX      0x001
+#define NAMEID_FUJITSU_MONAKA     0x003
 #define NAMEID_HISILICON_KUNPENG  0xd01
 #define NAMEID_APM_XGENE          0x000
 #define NAMEID_QUALCOMM_KRAIT     0x040
@@ -334,6 +335,9 @@ name_id_fujitsu_cpu_get_name( int name_i
         case NAMEID_FUJITSU_A64FX:
             strcpy(name, "Fujitsu A64FX");
             break;
+        case NAMEID_FUJITSU_MONAKA:
+            strcpy(name, "Fujitsu FUJITSU-MONAKA");
+            break;
         default:
             papi_errno = PAPI_ENOSUPP;
     }
diff -pruN 7.2.0~b2-1/src/components/sysdetect/sysdetect.c 7.2.0-1/src/components/sysdetect/sysdetect.c
--- 7.2.0~b2-1/src/components/sysdetect/sysdetect.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/sysdetect/sysdetect.c	2025-06-25 22:38:10.000000000 +0000
@@ -80,7 +80,7 @@ static int
 _sysdetect_init_component( int cidx )
 {
 
-    SUBDBG( "_sysdetect_init_component..." );
+    SUBDBG( "_sysdetect_init_component...\n" );
 
     /* Export the component id */
     _sysdetect_vector.cmp_info.CmpIdx = cidx;
@@ -99,7 +99,7 @@ static int
 _sysdetect_shutdown_component( void )
 {
 
-    SUBDBG( "_sysdetect_shutdown_component..." );
+    SUBDBG( "_sysdetect_shutdown_component...\n" );
 
     cleanup_dev_info( );
 
@@ -475,6 +475,14 @@ papi_vector_t _sysdetect_vector = {
                  .kernel_version = "n/a",
                 },
 
+    /* Sizes of framework-opaque component-private structures */
+    .size = {
+        .context = 1, /* unused */
+        .control_state = 1, /* unused */
+        .reg_value = 1, /* unused */
+        .reg_alloc = 1, /* unused */
+    },
+
     /* Used for general PAPI interactions */
     .init_component = _sysdetect_init_component,
     .init_thread = _sysdetect_init_thread,
diff -pruN 7.2.0~b2-1/src/components/template/template.c 7.2.0-1/src/components/template/template.c
--- 7.2.0~b2-1/src/components/template/template.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/template/template.c	2025-06-25 22:38:10.000000000 +0000
@@ -167,11 +167,15 @@ templ_init_private(void)
 
     int count = 0;
     papi_errno = evt_get_count(&count);
+    if (papi_errno != PAPI_OK) {
+        goto fn_fail;
+    }
     _template_vector.cmp_info.num_native_events = count;
     _template_vector.cmp_info.num_cntrs = count;
 
-  fn_exit:
     _template_vector.cmp_info.initialized = 1;
+
+  fn_exit:
     _template_vector.cmp_info.disabled = papi_errno;
     _papi_hwi_unlock(COMPONENT_LOCK);
     return papi_errno;
@@ -235,12 +239,19 @@ update_native_events(templ_control_t *ct
     int papi_errno = PAPI_OK;
 
     if (ntv_count != ctl->num_events) {
-        ctl->events_id = papi_realloc(ctl->events_id, ntv_count * sizeof(*ctl->events_id));
-        if (NULL == ctl->events_id) {
-            papi_errno = PAPI_ENOMEM;
-            goto fn_fail;
-        }
         ctl->num_events = ntv_count;
+        if (ntv_count == 0) {
+            papi_free(ctl->events_id);
+            ctl->events_id = NULL;
+            goto fn_exit;
+        }
+        else {
+            ctl->events_id = papi_realloc(ctl->events_id, ntv_count * sizeof(*ctl->events_id));
+            if (ctl->events_id == NULL) {
+                papi_errno = PAPI_ENOMEM;
+                goto fn_fail;
+            }
+        }
     }
 
     int i;
diff -pruN 7.2.0~b2-1/src/components/template/vendor_profiler_v1.c 7.2.0-1/src/components/template/vendor_profiler_v1.c
--- 7.2.0~b2-1/src/components/template/vendor_profiler_v1.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/components/template/vendor_profiler_v1.c	2025-06-25 22:38:10.000000000 +0000
@@ -281,7 +281,7 @@ vendorp1_evt_enum(unsigned int *event_co
                 papi_errno = evt_id_create(&info, event_code);
                 break;
             }
-            papi_errno = PAPI_END;
+            papi_errno = PAPI_ENOEVNT;
             break;
         default:
             papi_errno = PAPI_EINVAL;
diff -pruN 7.2.0~b2-1/src/components/topdown/README.md 7.2.0-1/src/components/topdown/README.md
--- 7.2.0~b2-1/src/components/topdown/README.md	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/README.md	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,44 @@
+# TOPDOWN Component
+
+The `topdown` component enables accessing the `PERF_METRICS` Model Specific
+Register (MSR) of modern Intel PMUs, and makes it simple to properly 
+interpret the results.
+
+* [Enabling the TOPDOWN Component](#enabling-the-topdown-component)
+* [Adding More Architectures](#adding_more_architectures)
+
+## Enabling the TOPDOWN Component
+
+To enable reading of topdown metrics the user needs to link against a
+PAPI library that was configured with the topdown component enabled. As an
+example the following command: `./configure --with-components="topdown"` is
+sufficient to enable the component.
+
+## Interpreting Results
+
+The events added by this component ending in "_PERC" should be cast to double 
+values in order to be properly interpreted as percentages. An example of how
+to do so follows:
+
+	PAPI_start(EventSet);
+	
+	/* some block of code... */
+	
+	PAPI_stop(EventSet, values);
+	
+	printf("First metric was %.1f\n", *((double *)(&values[0])));
+
+## Adding More Architectures
+
+To contribute more supported architectures to the component, add the cpuid model
+of the architecture to the switch statement in `_topdown_init_component` of 
+[topdown.c](./topdown.c) and set the relevant options (`supports_l2`, 
+`required_core_type`, etc.)
+
+## Warning on Heterogeneous CPU Affinity
+
+As of 2024-12-11, all Intel's hybrid CPU architectures only support the 
+PERF_METRICS MSR on their 'performance' cores (p-cores). This means that to 
+measure topdown events on a heterogeneous processor, one must limit the process
+affinity only to p-cores using a program like `taskset` or `numactl`. Otherwise,
+PAPI will exit to avoid encountering a segmentation fault.  
\ No newline at end of file
diff -pruN 7.2.0~b2-1/src/components/topdown/Rules.topdown 7.2.0-1/src/components/topdown/Rules.topdown
--- 7.2.0~b2-1/src/components/topdown/Rules.topdown	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/Rules.topdown	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,7 @@
+COMPSRCS += components/topdown/topdown.c
+COMPOBJS += topdown.o
+
+LDFLAGS+=-ldl
+
+topdown.o: components/topdown/topdown.c components/topdown/topdown.h $(HEADERS) 
+	$(CC) $(LIBCFLAGS) $(OPTFLAGS) -c components/topdown/topdown.c -o topdown.o $(LDFLAGS)
diff -pruN 7.2.0~b2-1/src/components/topdown/tests/Makefile 7.2.0-1/src/components/topdown/tests/Makefile
--- 7.2.0~b2-1/src/components/topdown/tests/Makefile	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/tests/Makefile	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,22 @@
+NAME=topdown
+include ../../Makefile_comp_tests.target
+
+%.o:%.c
+	$(CC) $(CFLAGS) $(OPTFLAGS) $(INCLUDE) -c -o $@ $<
+
+TESTS = topdown_basic topdown_L1 topdown_L2
+
+topdown_tests: $(TESTS)
+
+topdown_basic: topdown_basic.o $(UTILOBJS) $(PAPILIB)
+	$(CC) $(CFLAGS) $(INCLUDE) -o topdown_basic topdown_basic.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+
+topdown_L1: topdown_L1.o $(UTILOBJS) $(PAPILIB)
+	$(CC) $(CFLAGS) $(INCLUDE) -o topdown_L1 topdown_L1.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+
+topdown_L2: topdown_L2.o $(UTILOBJS) $(PAPILIB)
+	$(CC) $(CFLAGS) $(INCLUDE) -o topdown_L2 topdown_L2.o $(UTILOBJS) $(PAPILIB) $(LDFLAGS) 
+
+
+clean:
+	rm -f $(TESTS) *.o
diff -pruN 7.2.0~b2-1/src/components/topdown/tests/topdown_L1.c 7.2.0-1/src/components/topdown/tests/topdown_L1.c
--- 7.2.0~b2-1/src/components/topdown/tests/topdown_L1.c	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/tests/topdown_L1.c	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,177 @@
+/*
+ * Specifically tests that the Level 1 topdown events make sense.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "papi.h"
+#include "papi_test.h"
+
+#define NUM_EVENTS  4
+#define PERC_TOLERANCE  1.5
+
+// fibonacci function to serve as a benchable code section
+void __attribute__((optimize("O0"))) fib(int n)
+{
+	long i, a = 0;
+	int b = 1;
+	for (i = 0; i < n; i++)
+	{
+		b = b + a;
+		a = b - a;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int i, quiet, retval;
+	int EventSet = PAPI_NULL;
+	const PAPI_component_info_t *cmpinfo = NULL;
+	int numcmp, cid, topdown_cid = -1;
+	long long values[NUM_EVENTS];
+	double tmp;
+
+	/* Set TESTS_QUIET variable */
+	quiet = tests_quiet(argc, argv);
+
+	/* PAPI Initialization */
+	retval = PAPI_library_init(PAPI_VER_CURRENT);
+	if (retval != PAPI_VER_CURRENT)
+	{
+		test_fail(__FILE__, __LINE__, "PAPI_library_init failed\n", retval);
+	}
+
+	if (!quiet)
+	{
+		printf("Testing topdown component with PAPI %d.%d.%d\n",
+			   PAPI_VERSION_MAJOR(PAPI_VERSION),
+			   PAPI_VERSION_MINOR(PAPI_VERSION),
+			   PAPI_VERSION_REVISION(PAPI_VERSION));
+	}
+
+	/*******************************/
+	/* Find the topdown component  */
+	/*******************************/
+	numcmp = PAPI_num_components();
+	for (cid = 0; cid < numcmp; cid++)
+	{
+		if ((cmpinfo = PAPI_get_component_info(cid)) == NULL)
+		{
+			test_fail(__FILE__, __LINE__, "PAPI_get_component_info failed\n", 0);
+		}
+		if (!quiet)
+		{
+			printf("\tComponent %d - %d events - %s\n", cid,
+				   cmpinfo->num_native_events,
+				   cmpinfo->name);
+		}
+		if (strstr(cmpinfo->name, "topdown"))
+		{
+			topdown_cid = cid;
+
+			/* check that the component is enabled */
+			if (cmpinfo->disabled)
+			{
+				printf("Topdown component is disabled: %s\n", cmpinfo->disabled_reason);
+				test_fail(__FILE__, __LINE__, "Component is not enabled\n", 0);
+			}
+		}
+	}
+
+	if (topdown_cid < 0)
+	{
+		test_skip(__FILE__, __LINE__, "Topdown component not found\n", 0);
+	}
+
+	if (!quiet)
+	{
+		printf("\nFound Topdown Component at id %d\n", topdown_cid);
+		printf("\nAdding the level 1 topdown metrics..\n");
+	}
+
+	/* Create EventSet */
+	retval = PAPI_create_eventset(&EventSet);
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+				  "PAPI_create_eventset()", retval);
+	}
+
+	/* Add the level 1 topdown metrics */
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_RETIRING_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_RETIRING_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_BAD_SPEC_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_BAD_SPEC_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_FE_BOUND_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_FE_BOUND_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_BE_BOUND_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_BE_BOUND_PERC", retval);
+	}
+
+	/* stat a loop-based calculation of the sum of the fibonacci sequence */
+	/* the workload needs to be fairly large in order to acquire an accurate */
+	/* set of measurements */
+	PAPI_start(EventSet);
+	fib(6000000);
+	PAPI_stop(EventSet, values);
+
+	/* run some sanity checks: */
+	
+	/* first, the sum of all level 1 metric percentages should be 100% */
+	tmp = 0;
+	for (i=0; i<NUM_EVENTS; i++) {
+		tmp += *((double *)(&values[i]));
+	}
+	if (!quiet)
+		printf("L1 metric percentages sum to %.2f%%\n", tmp);
+	if (tmp < 100 - PERC_TOLERANCE || tmp > 100 + PERC_TOLERANCE) {
+		test_fail(__FILE__, __LINE__,
+			"Level 1 topdown metric percentages did not sum to 100%%\n", 1);
+	}
+
+	if (!quiet)
+		printf("\tRetiring:\t%.1f%%\n", *((double *)(&values[0])));
+
+	/* next, verify that the percentage of bad spec slots is reasonable. */
+	/* for this benchmark, we can expect very low rate of bad speculation */
+	/* due to the fact that it consists of a simple for loop */
+	if (!quiet)
+		printf("\tBad spec:\t%.1f%%\n", *((double *)(&values[1])));
+	if (*((double *)(&values[1])) > 5.0) {
+		test_warn(__FILE__, __LINE__,
+			"The percentage of slots affected by bad speculation was unexpectedly high", 1);
+	}
+
+	/* finally, make sure the frontend/backend bound percentages make sense */
+	/* we should expect this benchmark to be significantly more limited */
+	/* by the back end, so check that be bound is larger than the fe bound */
+	if (!quiet) {
+		printf("\tFrontend bound:\t%.1f%%\n", *((double *)(&values[2])));
+		printf("\tBackend bound:\t%.1f%%\n", *((double *)(&values[3])));
+
+	}
+	if (*((double *)(&values[2])) > *((double *)(&values[3]))) {
+		test_warn(__FILE__, __LINE__,
+			"Frontend bound should be significantly smaller than backend bound", 1);
+	}
+
+	return 0;
+}
\ No newline at end of file
diff -pruN 7.2.0~b2-1/src/components/topdown/tests/topdown_L2.c 7.2.0-1/src/components/topdown/tests/topdown_L2.c
--- 7.2.0~b2-1/src/components/topdown/tests/topdown_L2.c	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/tests/topdown_L2.c	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,223 @@
+/*
+ * Specifically tests that the Level 2 topdown events make sense.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "papi.h"
+#include "papi_test.h"
+
+#define NUM_EVENTS  8
+#define PERC_TOLERANCE  1.5
+
+// fibonacci function to serve as a benchable code section
+void __attribute__((optimize("O0"))) fib(int n)
+{
+	long i, a = 0;
+	int b = 1;
+	for (i = 0; i < n; i++)
+	{
+		b = b + a;
+		a = b - a;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int i, quiet, retval;
+	int EventSet = PAPI_NULL;
+	const PAPI_component_info_t *cmpinfo = NULL;
+	int numcmp, cid, topdown_cid = -1;
+	long long values[NUM_EVENTS];
+	double tmp;
+
+	/* Set TESTS_QUIET variable */
+	quiet = tests_quiet(argc, argv);
+
+	/* PAPI Initialization */
+	retval = PAPI_library_init(PAPI_VER_CURRENT);
+	if (retval != PAPI_VER_CURRENT)
+	{
+		test_fail(__FILE__, __LINE__, "PAPI_library_init failed\n", retval);
+	}
+
+	if (!quiet)
+	{
+		printf("Testing topdown component with PAPI %d.%d.%d\n",
+			   PAPI_VERSION_MAJOR(PAPI_VERSION),
+			   PAPI_VERSION_MINOR(PAPI_VERSION),
+			   PAPI_VERSION_REVISION(PAPI_VERSION));
+	}
+
+	/*******************************/
+	/* Find the topdown component  */
+	/*******************************/
+	numcmp = PAPI_num_components();
+	for (cid = 0; cid < numcmp; cid++)
+	{
+		if ((cmpinfo = PAPI_get_component_info(cid)) == NULL)
+		{
+			test_fail(__FILE__, __LINE__, "PAPI_get_component_info failed\n", 0);
+		}
+		if (!quiet)
+		{
+			printf("\tComponent %d - %d events - %s\n", cid,
+				   cmpinfo->num_native_events,
+				   cmpinfo->name);
+		}
+		if (strstr(cmpinfo->name, "topdown"))
+		{
+			topdown_cid = cid;
+
+			/* check that the component is enabled */
+			if (cmpinfo->disabled)
+			{
+				printf("Topdown component is disabled: %s\n", cmpinfo->disabled_reason);
+				test_fail(__FILE__, __LINE__, "The TOPDOWN component is not enabled\n", 0);
+			}
+		}
+	}
+
+	if (topdown_cid < 0)
+	{
+		test_skip(__FILE__, __LINE__, "Topdown component not found\n", 0);
+	}
+
+	if (!quiet)
+	{
+		printf("\nFound Topdown Component at id %d\n", topdown_cid);
+		printf("\nAdding the level 2 topdown metrics..\n");
+	}
+
+	/* Create EventSet */
+	retval = PAPI_create_eventset(&EventSet);
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+				  "PAPI_create_eventset()", retval);
+	}
+
+	/* Add the level 2 topdown metrics */
+	/* if we can't, just skip because not all processors support level 2 */
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_HEAVY_OPS_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_skip(__FILE__, __LINE__,
+			"Error adding TOPDOWN_HEAVY_OPS_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_LIGHT_OPS_PERC");
+	if (retval != PAPI_OK)
+	{
+		/* if the first L2 event was successfully added though, */
+		/* subsequent failures indicate a deeper problem */
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_LIGHT_OPS_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_BR_MISPREDICT_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_BR_MISPREDICT_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_MACHINE_CLEARS_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_MACHINE_CLEARS_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_FETCH_LAT_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_FETCH_LAT_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_FETCH_BAND_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_FETCH_BAND_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_MEM_BOUND_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_MEM_BOUND_PERC", retval);
+	}
+	retval = PAPI_add_named_event(EventSet, "TOPDOWN_CORE_BOUND_PERC");
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+			"Error adding TOPDOWN_CORE_BOUND_PERC", retval);
+	}
+
+	/* stat a loop-based calculation of the sum of the fibonacci sequence */
+	/* the workload needs to be fairly large in order to acquire an accurate */
+	/* set of measurements */
+	PAPI_start(EventSet);
+	fib(6000000);
+	PAPI_stop(EventSet, values);
+
+	/* run some sanity checks: */
+	
+	/* first, the sum of all level 2 metric percentages should be 100% */
+	tmp = 0;
+	for (i=0; i<NUM_EVENTS; i++) {
+		tmp += *((double *)(&values[i]));
+	}
+	if (!quiet)
+		printf("L2 metric percentages sum to %.2f%%:\n", tmp);
+	if (tmp < 100 - PERC_TOLERANCE || tmp > 100 + PERC_TOLERANCE) {
+		test_fail(__FILE__, __LINE__,
+			"Level 2 topdown metric percentages did not sum to 100%%\n", 1);
+	}
+
+	/* next, check that we are retiring more light ops than heavy ops */
+	/* this is a very reasonable expectation for a simple loop performing
+	/* scalar add and multiply operations */
+	if (!quiet) {
+		printf("\tHeavy ops:\t%.1f%%\n", *((double *)(&values[0])));
+		printf("\tLight ops:\t%.1f%%\n", *((double *)(&values[1])));
+
+	}
+	if (*((double *)(&values[0])) > *((double *)(&values[1]))) {
+		test_warn(__FILE__, __LINE__,
+			"Heavy ops should be much smaller than light ops", 1);
+	}
+
+	/* next, check that the branch mispredictions and machine clears */
+	/* are insignificant as this benchmark should have good speculation */
+	if (!quiet) {
+		printf("\tBranch mispredictions:\t%.1f%%\n", *((double *)(&values[2])));
+		printf("\tMachine clears:\t%.1f%%\n", *((double *)(&values[3])));
+	}
+	if ((*((double *)(&values[2])) + *((double *)(&values[3]))) > 5.0) {
+		test_warn(__FILE__, __LINE__,
+			"Bad speculation should be insignificant for this workload", 1);
+	}
+
+	/* next, check that the fetch latency and bandwidth are insignificant */
+	if (!quiet) {
+		printf("\tFetch latency:\t%.1f%%\n", *((double *)(&values[4])));
+		printf("\tFetch bandwidth:\t%.1f%%\n", *((double *)(&values[5])));
+	}
+	if ((*((double *)(&values[4])) + *((double *)(&values[5]))) > 10.0) {
+		test_warn(__FILE__, __LINE__,
+			"Frontend bound should be insignificant for this workload", 1);
+	}
+
+	/* finally, check that core bound is greater than memory bound. */
+	/* we can expect this because there are no memory loads/stores here */
+	if (!quiet) {
+		printf("\tMemory bound:\t%.1f%%\n", *((double *)(&values[6])));
+		printf("\tCore bound:\t%.1f%%\n", *((double *)(&values[7])));
+	}
+	if (*((double *)(&values[6])) > *((double *)(&values[7]))) {
+		test_warn(__FILE__, __LINE__,
+			"The workload should be significantly more core bound than memory bound", 1);
+	}
+
+	return 0;
+}
\ No newline at end of file
diff -pruN 7.2.0~b2-1/src/components/topdown/tests/topdown_basic.c 7.2.0-1/src/components/topdown/tests/topdown_basic.c
--- 7.2.0~b2-1/src/components/topdown/tests/topdown_basic.c	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/tests/topdown_basic.c	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,175 @@
+/*
+ * Basic test that just adds all of the topdown events and make sure they dont
+ * produce any errors.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <string.h>
+
+#include "papi.h"
+#include "papi_test.h"
+
+// fibonacci function to serve as a benchable code section
+void __attribute__((optimize("O0"))) fib(int n)
+{
+	long i, a = 0;
+	int b = 1;
+	for (i = 0; i < n; i++)
+	{
+		b = b + a;
+		a = b - a;
+	}
+}
+
+int main(int argc, char **argv)
+{
+	int i, quiet, retval;
+	int EventSet = PAPI_NULL;
+	const PAPI_component_info_t *cmpinfo = NULL;
+	int numcmp, cid, topdown_cid = -1;
+	int code, maximum_code = 0;
+	char event_name[PAPI_MAX_STR_LEN];
+	PAPI_event_info_t event_info;
+	int num_events = 0;
+	long long *values;
+
+	/* Set TESTS_QUIET variable */
+	quiet = tests_quiet(argc, argv);
+
+	/* PAPI Initialization */
+	retval = PAPI_library_init(PAPI_VER_CURRENT);
+	if (retval != PAPI_VER_CURRENT)
+	{
+		test_fail(__FILE__, __LINE__, "PAPI_library_init failed\n", retval);
+	}
+
+	if (!quiet)
+	{
+		printf("Testing topdown component with PAPI %d.%d.%d\n",
+			   PAPI_VERSION_MAJOR(PAPI_VERSION),
+			   PAPI_VERSION_MINOR(PAPI_VERSION),
+			   PAPI_VERSION_REVISION(PAPI_VERSION));
+	}
+
+	/*******************************/
+	/* Find the topdown component  */
+	/*******************************/
+	numcmp = PAPI_num_components();
+	for (cid = 0; cid < numcmp; cid++)
+	{
+		if ((cmpinfo = PAPI_get_component_info(cid)) == NULL)
+		{
+			test_fail(__FILE__, __LINE__, "PAPI_get_component_info failed\n", 0);
+		}
+		if (!quiet)
+		{
+			printf("\tComponent %d - %d events - %s\n", cid,
+				   cmpinfo->num_native_events,
+				   cmpinfo->name);
+		}
+		if (strstr(cmpinfo->name, "topdown"))
+		{
+			topdown_cid = cid;
+
+			/* check that the component is enabled */
+			if (cmpinfo->disabled)
+			{
+				printf("Topdown component is disabled: %s\n", cmpinfo->disabled_reason);
+				test_fail(__FILE__, __LINE__, "Component is not enabled\n", 0);
+			}
+		}
+	}
+
+	if (topdown_cid < 0)
+	{
+		test_skip(__FILE__, __LINE__, "Topdown component not found\n", 0);
+	}
+
+	if (!quiet)
+	{
+		printf("\nFound Topdown Component at id %d\n", topdown_cid);
+		printf("\nListing all events in this component:\n");
+	}
+
+	/* Create EventSet */
+	retval = PAPI_create_eventset(&EventSet);
+	if (retval != PAPI_OK)
+	{
+		test_fail(__FILE__, __LINE__,
+				  "PAPI_create_eventset()", retval);
+	}
+
+	/*****************************************************/
+	/* Add all the events to an eventset as a basic test */
+	/*****************************************************/
+	code = PAPI_NATIVE_MASK;
+	retval = PAPI_enum_cmp_event(&code, PAPI_ENUM_FIRST, topdown_cid);
+
+	while (retval == PAPI_OK)
+	{
+		if (PAPI_event_code_to_name(code, event_name) != PAPI_OK)
+		{
+			printf("Error translating %#x\n", code);
+			test_fail(__FILE__, __LINE__,
+					  "PAPI_event_code_to_name", retval);
+		}
+
+		if (PAPI_get_event_info(code, &event_info) != PAPI_OK)
+		{
+			printf("Error getting info for event %#x\n", code);
+			test_fail(__FILE__, __LINE__,
+					  "PAPI_get_event_info()", retval);
+		}
+
+		retval = PAPI_add_event(EventSet, code);
+		if (retval != PAPI_OK)
+		{
+			test_fail(__FILE__, __LINE__,
+					  "PAPI_add_event()", retval);
+		}
+
+		if (!quiet)
+		{
+			printf("\tEvent %#x: %s -- %s\n",
+				   code, event_name, event_info.long_descr);
+		}
+
+		num_events += 1;
+		maximum_code = code;
+		retval = PAPI_enum_cmp_event(&code, PAPI_ENUM_EVENTS, topdown_cid);
+	}
+	if (!quiet)
+		printf("\n");
+
+	/* ensure there is space for the output values */
+	values = calloc(num_events, sizeof(long long));
+	if (values == NULL)
+	{
+		test_fail(__FILE__, __LINE__,
+				  "Insufficient memory", retval);
+	}
+
+	/* now stat some code to make sure the events work */
+	PAPI_start(EventSet);
+	fib(6000000);
+	PAPI_stop(EventSet, values);
+
+	if (!quiet)
+		printf("Values:\n");
+	for (i = 0; i < num_events; i++)
+	{
+		/* ensure the metric percentages are between 0 and 100 */
+		if (*((double *)(&values[i])) < 0 || *((double *)(&values[i])) > 100.0)
+		{
+			test_fail(__FILE__, __LINE__,
+					  "Topdown metric was not a valid percentage", retval);
+		}
+
+		if (!quiet)
+			printf("\t%d:\t%.1lf%%\n", i, *((double *)(&values[i])));
+	}
+
+	return 0;
+}
\ No newline at end of file
diff -pruN 7.2.0~b2-1/src/components/topdown/topdown.c 7.2.0-1/src/components/topdown/topdown.c
--- 7.2.0~b2-1/src/components/topdown/topdown.c	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/topdown.c	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,935 @@
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <linux/perf_event.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stddef.h>
+#include <dlfcn.h>
+
+#ifndef _GNU_SOURCE
+	#define _GNU_SOURCE
+#endif
+#include <sched.h>
+
+/* Headers required by PAPI */
+#include "papi.h"
+#include "papi_internal.h"
+#include "papi_vector.h"
+#include "papi_memory.h" /* defines papi_malloc(), etc. */
+
+#include "topdown.h"
+
+// The following macro follows if a string function has an error. It should
+// never happen; but it is necessary to prevent compiler warnings. We print
+// something just in case there is programmer error in invoking the function.
+#define HANDLE_STRING_ERROR                                                               \
+	{                                                                                     \
+		fprintf(stderr, "%s:%i unexpected string function error.\n", __FILE__, __LINE__); \
+		exit(-1);                                                                         \
+	}
+
+papi_vector_t _topdown_vector;
+
+static _topdown_native_event_entry_t *topdown_native_events = NULL;
+static int num_events = 0;
+
+static int librseq_loaded = 0;
+
+#define INTEL_CORE_TYPE_EFFICIENT	0x20	/* also known as 'ATOM' */
+#define INTEL_CORE_TYPE_PERFORMANCE	0x40	/* also known as 'CORE' */
+#define INTEL_CORE_TYPE_HOMOGENEOUS	-1		/* core type is non-issue */
+static int required_core_type = INTEL_CORE_TYPE_HOMOGENEOUS;
+
+/**************************/
+/* x86 specific functions */
+/**************************/
+
+/* forward declarations */
+void assert_affinity(int core_type);
+static inline __attribute__((always_inline))
+unsigned long long rdpmc_rseq_protected(unsigned int counter, int allowed_core_type);
+
+/* rdpmc instruction wrapper */
+static inline unsigned long long _rdpmc(unsigned int counter) {
+
+	unsigned int low, high;
+	/* if we need protection... */
+	if (required_core_type != INTEL_CORE_TYPE_HOMOGENEOUS) {
+		/* if librseq is available, protect with librseq */
+		if (librseq_loaded)
+			return rdpmc_rseq_protected(counter, required_core_type);
+
+		/* otherwise, just hope we aren't moved to an unsupported core */
+		/* between assert_affinity() and the inline asm */
+		assert_affinity(required_core_type);
+	}
+
+	__asm__ volatile("rdpmc" : "=a" (low), "=d" (high) : "c" (counter));
+
+	return (unsigned long long)low | ((unsigned long long)high) <<32;
+}
+
+
+typedef struct {
+	unsigned int eax;
+	unsigned int ebx;
+	unsigned int ecx;
+	unsigned int edx;
+} cpuid_reg_t;
+
+void cpuid2( cpuid_reg_t *reg, unsigned int func, unsigned int subfunc )
+{
+	__asm__ ("cpuid;"
+			 : "=a" (reg->eax), "=b" (reg->ebx), "=c" (reg->ecx), "=d" (reg->edx)
+			 : "a"  (func), "c" (subfunc));
+}
+
+/**************************************/
+/* Hybrid processor support functions */
+/**************************************/
+
+/* ensure the core this process is running on is of the correct type */
+int active_core_type_is(int core_type)
+{
+	cpuid_reg_t reg;
+
+	/* check that CPUID leaf 0x1A is supported */
+	cpuid2(&reg, 0, 0);
+	if (reg.eax < 0x1a) return PAPI_ENOSUPP;
+	cpuid2(&reg, 0x1a, 0);
+	if (reg.eax == 0) return PAPI_ENOSUPP;
+
+	return ((reg.eax >> 24) & 0xff) == core_type;
+}
+
+/* helper to allow printing core type in errors */
+void core_type_to_name(int core_type, char *out)
+{
+	int err;
+
+	switch (core_type) {
+		case INTEL_CORE_TYPE_EFFICIENT:
+			err = snprintf(out, PAPI_MIN_STR_LEN, "e-core (Atom)");
+			if (err > PAPI_MAX_STR_LEN)
+				HANDLE_STRING_ERROR;
+			break;
+
+		case INTEL_CORE_TYPE_PERFORMANCE:
+			err = snprintf(out, PAPI_MIN_STR_LEN, "p-core (Core)");
+			if (err > PAPI_MAX_STR_LEN)
+				HANDLE_STRING_ERROR;
+			break;
+
+		default:
+			err = snprintf(out, PAPI_MIN_STR_LEN, "not applicable (N/A)");
+			if (err > PAPI_MAX_STR_LEN)
+				HANDLE_STRING_ERROR;
+			break;
+	}
+}
+
+/* exit if the core affinity is disallowed in order to avoid segfaulting */
+void handle_affinity_error(int allowed_type)
+{
+	char allowed_name[PAPI_MIN_STR_LEN];
+
+	core_type_to_name(allowed_type, allowed_name);
+	fprintf(stderr, 
+		"Error: Process was moved to an unsupported core type. To use the PAPI topdown component, process affinity must be limited to cores of type '%s' on this architecture.\n", 
+		allowed_name);
+
+	exit(127);
+}
+
+/* assert that the current process affinity is to an allowed core type */
+void assert_affinity(int core_type) {
+	/* ensure the process is still on a valid core to avoid segfaulting */
+	if (!active_core_type_is(core_type)) {
+		handle_affinity_error(core_type);
+	}
+}
+
+/**********************************************/
+/* Restartable sequence heterogeneous support */
+/**********************************************/
+
+/* dlsym access to librseq symbols */
+static ptrdiff_t *rseq_offset_ptr;
+static int (*rseq_available_ptr)(unsigned int query);
+
+/* local wrappers for dlsym function pointers */
+static int librseq_rseq_available(unsigned int query) { return (*rseq_available_ptr)(query); }
+
+int link_librseq()
+{
+	void* lib = dlopen("librseq.so", RTLD_NOW);
+	if (!lib) {
+		return PAPI_ENOSUPP;
+	}
+
+	rseq_available_ptr = dlsym(lib, "rseq_available");
+    if (rseq_available_ptr == NULL) {
+		return PAPI_ENOSUPP;
+    }
+	rseq_offset_ptr = dlsym(lib, "rseq_offset");
+    if (rseq_offset_ptr == NULL) {
+		return PAPI_ENOSUPP;
+    }
+
+	if (!rseq_available_ptr(0)) {
+		return PAPI_ENOSUPP;
+	}
+
+    return 0;
+} 
+
+/* This function assumes some properties of the system have been verified. */
+/* 1. Must be an Intel x86 processor */
+/* 2. Processor must be hybrid/heterogeneous (e-core/p-core) */
+/* 3. perf_event_open() + mmap() have been used to enable userspace rdpmc */
+static inline __attribute__((always_inline))
+unsigned long long rdpmc_rseq_protected(unsigned int counter, int allowed_core_type)
+{
+	unsigned int low = -1;
+	unsigned int high = -1;
+	int core_check;
+
+restart_sequence:
+	core_check = 0;
+	__asm__ __volatile__ goto (
+		/* set up critical section of restartable sequence */
+		".pushsection __rseq_cs, \"aw\"\n\t" ".balign 32\n\t" "3:\n\t" ".long 0x0\n\t" ".long 0x0\n\t" ".quad 1f\n\t" ".quad (2f) - (1f)\n\t" ".quad 4f\n\t" ".long 0x0\n\t" ".long 0x0\n\t" ".quad 1f\n\t" ".quad (2f) - (1f)\n\t" ".quad 4f\n\t" ".popsection\n\t" ".pushsection __rseq_cs_ptr_array, \"aw\"\n\t" ".quad 3b\n\t" ".popsection\n\t"
+		
+		/* start rseq by storing table entry pointer into rseq_cs. */
+		"leaq 3b(%%rip), %%rax\n\t" 
+		"movq %%rax, %%fs:8(%[rseq_offset])\n\t" 
+		"1:\n\t"
+
+		/* check if core type is valid */
+		"mov $0x1A, %%eax\n\t"
+		"mov $0x00, %%ecx\n\t"
+		"cpuid\n\t"
+		"mov %%eax, %[core_check]\n\t"
+		"test %[core_type], %%eax\n\t"
+		"jz 4f\n\t" /* abort if core type is invalid */
+
+		/* make the rdpmc call */
+		"movl %[counter], %%ecx\n\t"
+		"rdpmc\n\t"
+		/* retrieve results of rdpmc */
+		"mov %%edx, %[high]\n\t"
+		"mov %%eax, %[low]\n\t"
+		"2:\n\t"
+		/* define abort section */
+		".pushsection __rseq_failure, \"ax\"\n\t" ".byte 0x0f, 0xb9, 0x3d\n\t" ".long " "0x53053053" "\n\t" "4" ":\n\t" "" "jmp %l[" "abort" "]\n\t" ".popsection\n\t"
+		:
+		: [core_check]	"m"  (core_check),
+		  [low]			"m"	 (low),
+		  [high]		"m"  (high),
+		  [rseq_offset]	"r" (*rseq_offset_ptr),
+		  [counter]		"r" (counter),
+		  [core_type]	"r" (allowed_core_type << 24) /* shift mask into place */
+		: "memory", "cc", "rax", "eax", "ecx", "edx"
+		: abort
+	);
+	return (unsigned long long)low | ((unsigned long long)high) << 32;
+
+abort:
+	/* we may abort because the core type was found to be invalid, or */
+	/* we might abort because the restartable sequence was preempted */
+	/* therefore we have to check why the abort happened here */
+	if ((((core_check >> 24) & 0xff) != allowed_core_type) && core_check != 0) {
+		/* sequence reached the core check, and the core type was disallowed !*/
+		handle_affinity_error(allowed_core_type);
+		return PAPI_EBUG; /* should never return, handle_affinity_error exits */
+	}
+	
+	/* if the critical section aborted, but not because the core type is */ 
+	/* invalid, then give it another shot */
+	/* while theoretically possible, this has never been observed to restart */
+	/* more than once before either succeeding or failing the check */
+	goto restart_sequence;
+}
+
+/********************************/
+/* Internal component functions */
+/********************************/
+
+/* In case headers aren't new enough to have __NR_perf_event_open */
+#ifndef __NR_perf_event_open
+#define __NR_perf_event_open 298 /* __x86_64__ is the only arch we support */
+#endif
+
+__attribute__((weak)) int perf_event_open(struct perf_event_attr *attr, pid_t pid,
+										  int cpu, int group_fd, unsigned long flags)
+{
+	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+/* read PERF_METRICS */
+static inline unsigned long long read_metrics(void)
+{
+	return _rdpmc(TOPDOWN_PERF_METRICS | TOPDOWN_METRIC_COUNTER_TOPDOWN_L1_L2);
+}
+
+/* extract the metric defined by event i from the value */
+float extract_metric(int i, unsigned long long val)
+{
+	return (double)(((val) >> (i * 8)) & 0xff) / 0xff;
+}
+
+/***********************************************/
+/* Required PAPI component interface functions */
+/***********************************************/
+
+static int
+_topdown_init_component(int cidx)
+{
+	unsigned long long val;
+	int err, i;
+	int retval = PAPI_OK;
+	int supports_l2;
+
+	char *strCpy;
+	char typeStr[PAPI_MIN_STR_LEN];
+
+	const PAPI_hw_info_t *hw_info;
+
+	/* Check for processor support */
+	hw_info = &(_papi_hwi_system_info.hw_info);
+	switch (hw_info->vendor)
+	{
+	case PAPI_VENDOR_INTEL:
+		break;
+	default:
+		err = snprintf(_topdown_vector.cmp_info.disabled_reason,
+					   PAPI_MAX_STR_LEN, "Not a supported CPU vendor");
+		_topdown_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = 0;
+		if (err > PAPI_MAX_STR_LEN)
+			HANDLE_STRING_ERROR;
+		retval = PAPI_ENOSUPP;
+		goto fn_fail;
+	}
+
+	/* Ideally, we should check the IA32_PERF_CAPABILITIES MSR for */
+	/* PERF_METRICS support. However, since doing this requires a */
+	/* sysadmin to go through a lot of hassle, it may be better to
+	/* just hardcode supported platforms instead */
+
+	if (hw_info->vendor == PAPI_VENDOR_INTEL)
+	{
+		if (hw_info->cpuid_family != 6)
+		{
+			/* Not a family 6 machine */
+			strCpy = strncpy(_topdown_vector.cmp_info.disabled_reason,
+							 "CPU family not supported", PAPI_MAX_STR_LEN);
+			_topdown_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = 0;
+			if (strCpy == NULL)
+				HANDLE_STRING_ERROR;
+			retval = PAPI_ENOIMPL;
+			goto fn_fail;
+		}
+
+		/* Detect topdown support */
+		switch (hw_info->cpuid_model)
+		{
+		/* The model id can be found in Table 2-1 of the */
+		/* IA-32 Architectures Software Developer’s Manual */
+
+		/* homogeneous machines that do not support l2 TMA */
+		case 0x6a:	/* IceLake 3rd gen Xeon */
+		case 0x6c:	/* IceLake 3rd gen Xeon */
+		case 0x7d:	/* IceLake 10th gen Core */
+		case 0x7e:	/* IceLake 10th gen Core */
+		case 0x8c:	/* TigerLake 11th gen Core */
+		case 0x8d:	/* TigerLake 11th gen Core */
+		case 0xa7:	/* RocketLake 11th gen Core */
+			required_core_type = INTEL_CORE_TYPE_HOMOGENEOUS;
+			supports_l2 = 0;
+			break;
+
+		/* homogeneous machines that support l2 TMA */
+		case 0x8f:	/* SapphireRapids 4th gen Xeon */
+		case 0xcf:	/* EmeraldRapids 5th gen Xeon */
+			required_core_type = INTEL_CORE_TYPE_HOMOGENEOUS;
+			supports_l2 = 1;
+			break;
+
+		/* hybrid machines that support l2 TMA and are locked to the P-core */
+		case 0xaa:	/* MeteorLake Core Ultra 7 hybrid */
+		case 0xad:	/* GraniteRapids 6th gen Xeon P-core */
+		case 0xae:	/* GraniteRapids 6th gen Xeon P-core */
+		case 0x97:	/* AlderLake 12th gen Core hybrid */
+		case 0x9a:	/* AlderLake 12th gen Core hybrid */
+		case 0xb7:	/* RaptorLake-S/HX 13th gen Core hybrid */
+		case 0xba:	/* RaptorLake 13th gen Core hybrid */
+		case 0xbd:	/* LunarLake Series 2 Core Ultra hybrid */
+		case 0xbf:	/* RaptorLake 13th gen Core hybrid */
+			required_core_type = INTEL_CORE_TYPE_PERFORMANCE;
+			supports_l2 = 1;
+			
+			/* if we are on a heterogeneous processor, try and load librseq */
+			if (link_librseq() == PAPI_OK) {
+        		librseq_loaded = 1;
+
+				/* indicate in desc that librseq was found and is being used */
+				err = snprintf(_topdown_vector.cmp_info.description, PAPI_MAX_STR_LEN,
+				TOPDOWN_COMPONENT_DESCRIPTION " (librseq in use)");
+				_topdown_vector.cmp_info.description[PAPI_MAX_STR_LEN - 1] = 0;
+			}
+
+			break;
+
+		default: /* not a supported model */
+			strCpy = strncpy(_topdown_vector.cmp_info.disabled_reason,
+							 "CPU model not supported", PAPI_MAX_STR_LEN);
+			_topdown_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = 0;
+			if (strCpy == NULL)
+				HANDLE_STRING_ERROR;
+			retval = PAPI_ENOIMPL;
+			goto fn_fail;
+		}
+	}
+
+	/* if there is a core type requirement for this platform, check it */
+	if (!active_core_type_is(required_core_type) && required_core_type != INTEL_CORE_TYPE_HOMOGENEOUS) {
+		core_type_to_name(required_core_type, typeStr);
+		err = snprintf(_topdown_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN,
+			"The PERF_EVENT MSR does not exist on this core. Limit process affinity to cores of type '%s' only.", typeStr);
+		_topdown_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = 0;
+		if (err > PAPI_MAX_STR_LEN)
+			HANDLE_STRING_ERROR;
+		retval = PAPI_ECMP;
+		goto fn_fail;
+	}
+
+	/* allocate the events table */
+	topdown_native_events = (_topdown_native_event_entry_t *)
+		papi_calloc(TOPDOWN_MAX_COUNTERS, sizeof(_topdown_native_event_entry_t));
+	if (topdown_native_events == NULL)
+	{
+		err = snprintf(_topdown_vector.cmp_info.disabled_reason, PAPI_MAX_STR_LEN,
+				"%s:%i topdown_native_events papi_calloc for %lu bytes failed.",
+				__FILE__, __LINE__, TOPDOWN_MAX_COUNTERS * sizeof(_topdown_native_event_entry_t));
+		_topdown_vector.cmp_info.disabled_reason[PAPI_MAX_STR_LEN - 1] = 0;
+		if (err > PAPI_MAX_STR_LEN)
+			HANDLE_STRING_ERROR;
+		retval = PAPI_ENOMEM;
+		goto fn_fail;
+	}
+
+	/* fill out the events table */
+	i = 0;
+
+	/* level 1 events */
+	strcpy(topdown_native_events[i].name, "TOPDOWN_RETIRING_PERC");
+	strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were retiring instructions");
+	strcpy(topdown_native_events[i].units, "%");
+	topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+	topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_RETIRING;
+	topdown_native_events[i].selector = i + 1;
+
+	i++;
+	strcpy(topdown_native_events[i].name, "TOPDOWN_BAD_SPEC_PERC");
+	strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were stalled due to bad speculation");
+	strcpy(topdown_native_events[i].units, "%");
+	topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+	topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_BAD_SPEC;
+	topdown_native_events[i].selector = i + 1;
+
+	i++;
+	strcpy(topdown_native_events[i].name, "TOPDOWN_FE_BOUND_PERC");
+	strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were waiting on the frontend");
+	strcpy(topdown_native_events[i].units, "%");
+	topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+	topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_FE_BOUND;
+	topdown_native_events[i].selector = i + 1;
+
+	i++;
+	strcpy(topdown_native_events[i].name, "TOPDOWN_BE_BOUND_PERC");
+	strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were waiting on the backend");
+	strcpy(topdown_native_events[i].units, "%");
+	topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+	topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_BE_BOUND;
+	topdown_native_events[i].selector = i + 1;
+
+	if (supports_l2) {
+		/* level 2 events */
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_HEAVY_OPS_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were retiring heavy operations");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_HEAVY_OPS;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_BR_MISPREDICT_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were wasted due to branch misses");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_BR_MISPREDICT;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_FETCH_LAT_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were stalled due to no uops being issued");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_FETCH_LAT;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_MEM_BOUND_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were stalled due to demand load/store instructions");
+		topdown_native_events[i].metric_idx = TOPDOWN_METRIC_IDX_MEM_BOUND;
+		topdown_native_events[i].selector = i + 1;
+
+		/* derived level 2 events */
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_LIGHT_OPS_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were retiring light operations");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = -1;
+		topdown_native_events[i].derived_parent_idx = TOPDOWN_METRIC_IDX_RETIRING;
+		topdown_native_events[i].derived_sibling_idx = TOPDOWN_METRIC_IDX_HEAVY_OPS;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_MACHINE_CLEARS_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were wasted due to pipeline resets");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = -1;
+		topdown_native_events[i].derived_parent_idx = TOPDOWN_METRIC_IDX_BAD_SPEC;
+		topdown_native_events[i].derived_sibling_idx = TOPDOWN_METRIC_IDX_BR_MISPREDICT;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_FETCH_BAND_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were wasted due to less uops being issued than there are slots");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = -1;
+		topdown_native_events[i].derived_parent_idx = TOPDOWN_METRIC_IDX_FE_BOUND;
+		topdown_native_events[i].derived_sibling_idx = TOPDOWN_METRIC_IDX_FETCH_LAT;
+		topdown_native_events[i].selector = i + 1;
+
+		i++;
+		strcpy(topdown_native_events[i].name, "TOPDOWN_CORE_BOUND_PERC");
+		strcpy(topdown_native_events[i].description, "The percentage of pipeline slots that were stalled due to insufficient non-memory core resources");
+		strcpy(topdown_native_events[i].units, "%");
+		topdown_native_events[i].return_type = PAPI_DATATYPE_FP64;
+		topdown_native_events[i].metric_idx = -1;
+		topdown_native_events[i].derived_parent_idx = TOPDOWN_METRIC_IDX_BE_BOUND;
+		topdown_native_events[i].derived_sibling_idx = TOPDOWN_METRIC_IDX_MEM_BOUND;
+		topdown_native_events[i].selector = i + 1;
+	}
+
+	num_events = i + 1;
+
+	/* Export the total number of events available */
+	_topdown_vector.cmp_info.num_native_events = num_events;
+	_topdown_vector.cmp_info.num_cntrs = num_events;
+	_topdown_vector.cmp_info.num_mpx_cntrs = num_events;
+
+	/* Export the component id */
+	_topdown_vector.cmp_info.CmpIdx = cidx;
+
+fn_exit:
+	_papi_hwd[cidx]->cmp_info.disabled = retval;
+	return retval;
+fn_fail:
+	goto fn_exit;
+}
+
+static int
+_topdown_init_thread(hwd_context_t *ctx)
+{
+	(void)ctx;
+	return PAPI_OK;
+}
+
+static int
+_topdown_init_control_state(hwd_control_state_t *ctl)
+{
+	_topdown_control_state_t *control = (_topdown_control_state_t *)ctl;
+
+	int retval = PAPI_OK;
+	struct perf_event_attr slots, metrics;
+	int slots_fd = -1;
+	int metrics_fd = -1;
+	void *slots_p, *metrics_p;
+
+	/* set up slots */
+	memset(&slots, 0, sizeof(slots));
+	slots.type = PERF_TYPE_RAW;
+	slots.size = sizeof(struct perf_event_attr);
+	slots.config = 0x0400ull;
+	slots.exclude_kernel = 1;
+
+	/* open slots */
+	slots_fd = perf_event_open(&slots, 0, -1, -1, 0);
+	if (slots_fd < 0)
+	{
+		retval = PAPI_ENOMEM;
+		goto fn_fail;
+	}
+
+	/* memory mapping the fd to permit _rdpmc calls from userspace */
+	slots_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, slots_fd, 0);
+	if (slots_p == (void *) -1L)
+	{
+		retval = PAPI_ENOMEM;
+		goto fn_fail;
+	}
+
+	/* set up metrics */
+	memset(&metrics, 0, sizeof(metrics));
+	metrics.type = PERF_TYPE_RAW;
+	metrics.size = sizeof(struct perf_event_attr);
+	metrics.config = 0x8000;
+	metrics.exclude_kernel = 1;
+
+	/* open metrics with slots as the group leader */
+	metrics_fd = perf_event_open(&metrics, 0, -1, slots_fd, 0);
+	if (metrics_fd < 0)
+	{
+		retval = PAPI_ENOMEM;
+		goto fn_fail;
+	}
+
+	/* memory mapping the fd to permit _rdpmc calls from userspace */
+	metrics_p = mmap(0, getpagesize(), PROT_READ, MAP_SHARED, metrics_fd, 0);
+	if (metrics_p == (void *) -1L)
+	{
+		retval = PAPI_ENOMEM;
+		goto fn_fail;
+	}
+
+	/* we set up with no errors, so fill out the control state */
+	control->slots_fd = slots_fd;
+	control->slots_p;
+	control->metrics_fd = metrics_fd;
+	control->metrics_p;
+
+fn_exit:
+	return retval;
+
+fn_fail:
+	/* we need to close & free whatever we opened and allocated */
+	if (slots_p != NULL)
+		munmap(slots_p, getpagesize());
+	if (metrics_p != NULL)
+		munmap(metrics_p, getpagesize());
+	if (slots_fd >= 0)
+		close(slots_fd);
+	if (metrics_fd >= 0)
+		close(metrics_fd);
+	goto fn_exit;
+}
+
+static int
+_topdown_update_control_state(hwd_control_state_t *ctl,
+							  NativeInfo_t *native,
+							  int count,
+							  hwd_context_t *ctx)
+{
+	int i, index;
+	(void)ctx;
+
+	_topdown_control_state_t *control = (_topdown_control_state_t *)ctl;
+
+	for (i = 0; i < TOPDOWN_MAX_COUNTERS; i++)
+	{
+		control->being_measured[i] = 0;
+	}
+
+	for (i = 0; i < count; i++)
+	{
+		index = native[i].ni_event & PAPI_NATIVE_AND_MASK;
+		native[i].ni_position = topdown_native_events[index].selector - 1;
+		control->being_measured[index] = 1;
+	}
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_start(hwd_context_t *ctx, hwd_control_state_t *ctl)
+{
+	(void) ctx;
+	_topdown_control_state_t *control = (_topdown_control_state_t *)ctl;
+
+	/* reset the PERF_METRICS counter and slots to maintain precision */
+	/* as per the recommendation of section 21.3.9.3 of the IA-32
+	/* Architectures Software Developer’s Manual. Resetting means we do not */
+	/* need to record 'before' metrics/slots values, as they are always */
+	/* effectively 0. Despite the reset meaning we don't need to record */
+	/* the slots value at all, the SDM states that SLOTS and the PERF_METRICS */
+	/* MSR should be reset together, so we do that here. */
+
+	/* these ioctl calls do not need to be protected by assert_affinity() */
+	ioctl(control->slots_fd, PERF_EVENT_IOC_RESET, 0);
+	ioctl(control->metrics_fd, PERF_EVENT_IOC_RESET, 0);
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_stop(hwd_context_t *ctx, hwd_control_state_t *ctl)
+{
+	_topdown_context_t *context = (_topdown_context_t *)ctx;
+	_topdown_control_state_t *control = (_topdown_control_state_t *)ctl;
+	unsigned long long metrics_after;
+
+	int i, retval;
+	double ma, mb, perc;
+
+	retval = PAPI_OK;
+
+	metrics_after = read_metrics();
+
+	/* extract the values */
+	for (i = 0; i < TOPDOWN_MAX_COUNTERS; i++)
+	{
+		if (control->being_measured[i])
+		{
+			/* handle case where the metric is not derived */
+			if (topdown_native_events[i].metric_idx >= 0)
+			{
+				perc = extract_metric(topdown_native_events[i].metric_idx,
+					metrics_after) * 100.0;
+			}
+			else /* handle case where the metric is derived */
+			{
+				/* metric perc = parent perc - sibling perc */
+				perc = extract_metric(
+					topdown_native_events[i].derived_parent_idx,
+					metrics_after) * 100.0
+					- extract_metric(
+					topdown_native_events[i].derived_sibling_idx,
+					metrics_after) * 100.0;
+			}
+
+			/* sometimes the percentage will be a very small negative value */ 
+			/* instead of 0 due to floating point error. tidy that up: */
+			if (perc < 0.0) {
+				perc = 0.0;
+			}
+
+			/* store the raw bits of the double into the counter value */
+			control->count[i] = *(long long*)&perc;
+		}
+	}
+
+fn_exit:
+	/* free & close everything in the control state */
+	munmap(control->slots_p, getpagesize());
+	control->slots_p = NULL;
+	munmap(control->metrics_p, getpagesize());
+	control->metrics_p = NULL;
+	close(control->slots_fd);
+	control->slots_fd = -1;
+	close(control->metrics_fd);
+	control->metrics_fd = -1;
+	
+	return retval;
+}
+
+static int
+_topdown_read(hwd_context_t *ctx, hwd_control_state_t *ctl,
+			  long long **events, int flags)
+{
+	(void)flags;
+
+	_topdown_stop(ctx, ctl);
+
+	/* Pass back a pointer to our results */
+	*events = ((_topdown_control_state_t *)ctl)->count;
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_reset(hwd_context_t *ctx, hwd_control_state_t *ctl)
+{
+	( void ) ctx;
+	( void ) ctl;
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_shutdown_component(void)
+{
+	/* Free anything we allocated */
+	papi_free(topdown_native_events);
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_shutdown_thread(hwd_context_t *ctx)
+{
+	( void ) ctx;
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_ctl(hwd_context_t *ctx, int code, _papi_int_option_t *option)
+{
+	( void ) ctx;
+	( void ) code;
+	( void ) option;
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_set_domain(hwd_control_state_t *cntrl, int domain)
+{
+	(void) cntrl;
+	(void) domain;
+
+	return PAPI_OK;
+}
+
+static int
+_topdown_ntv_enum_events(unsigned int *EventCode, int modifier)
+{
+
+	int index;
+
+	switch (modifier)
+	{
+	case PAPI_ENUM_FIRST:
+		/* return the first event that we support */
+		*EventCode = 0;
+		return PAPI_OK;
+
+	case PAPI_ENUM_EVENTS:
+		index = *EventCode;
+		/* Make sure we have at least 1 more event after us */
+		if (index < num_events - 1)
+		{
+			/* This assumes a non-sparse mapping of the events */
+			*EventCode = *EventCode + 1;
+			return PAPI_OK;
+		}
+		else
+		{
+			return PAPI_ENOEVNT;
+		}
+		break;
+
+	default:
+		return PAPI_EINVAL;
+	}
+
+	return PAPI_EINVAL;
+}
+
+static int
+_topdown_ntv_code_to_name(unsigned int EventCode, char *name, int len)
+{
+	int index = EventCode & PAPI_NATIVE_AND_MASK;
+
+	if (index >= 0 && index < num_events)
+	{
+		strncpy(name, topdown_native_events[index].name, len);
+		return PAPI_OK;
+	}
+
+	return PAPI_ENOEVNT;
+}
+
+static int
+_topdown_ntv_code_to_descr(unsigned int EventCode, char *descr, int len)
+{
+	int index = EventCode;
+
+	if (index >= 0 && index < num_events)
+	{
+		strncpy(descr, topdown_native_events[index].description, len);
+		return PAPI_OK;
+	}
+	return PAPI_ENOEVNT;
+}
+
+static int
+_topdown_ntv_code_to_info(unsigned int EventCode, PAPI_event_info_t *info)
+{
+
+	int index = EventCode;
+
+	if ((index < 0) || (index >= num_events))
+		return PAPI_ENOEVNT;
+
+	strncpy(info->symbol, topdown_native_events[index].name, 
+			sizeof(info->symbol) - 1);
+	info->symbol[sizeof(info->symbol) - 1] = '\0';
+
+	strncpy(info->long_descr, topdown_native_events[index].description, 
+			sizeof(info->long_descr) - 1);
+	info->long_descr[sizeof(info->long_descr) - 1] = '\0';
+
+	strncpy(info->units, topdown_native_events[index].units, 
+			sizeof(info->units) - 1);
+	info->units[sizeof(info->units) - 1] = '\0';
+
+	info->data_type = topdown_native_events[index].return_type;
+
+	return PAPI_OK;
+}
+
+/** Vector that points to entry points for our component */
+papi_vector_t _topdown_vector = {
+	.cmp_info = {
+		.name = "topdown",
+		.short_name = "topdown",
+		.description = TOPDOWN_COMPONENT_DESCRIPTION,
+		.version = "1.0",
+		.support_version = "n/a",
+		.kernel_version = "n/a",
+		.default_domain = PAPI_DOM_USER,
+		.available_domains = PAPI_DOM_USER,
+		.default_granularity = PAPI_GRN_THR,
+		.available_granularities = PAPI_GRN_THR,
+		.hardware_intr_sig = PAPI_INT_SIGNAL,
+	},
+
+	/* Sizes of framework-opaque component-private structures */
+	.size = {
+		.context = sizeof(_topdown_context_t),
+		.control_state = sizeof(_topdown_control_state_t),
+		.reg_value = 1, /* unused */
+		.reg_alloc = 1, /* unused */
+	},
+
+	/* Used for general PAPI interactions */
+	.start = _topdown_start,
+	.stop = _topdown_stop,
+	.read = _topdown_read,
+	.reset = _topdown_reset,
+	.init_component = _topdown_init_component,
+	.init_thread = _topdown_init_thread,
+	.init_control_state = _topdown_init_control_state,
+	.update_control_state = _topdown_update_control_state,
+	.ctl = _topdown_ctl,
+	.shutdown_thread = _topdown_shutdown_thread,
+	.shutdown_component = _topdown_shutdown_component,
+	.set_domain = _topdown_set_domain,
+
+	/* Name Mapping Functions */
+	.ntv_enum_events = _topdown_ntv_enum_events,
+	.ntv_code_to_name = _topdown_ntv_code_to_name,
+	.ntv_code_to_descr = _topdown_ntv_code_to_descr,
+	.ntv_code_to_info = _topdown_ntv_code_to_info,
+};
diff -pruN 7.2.0~b2-1/src/components/topdown/topdown.h 7.2.0-1/src/components/topdown/topdown.h
--- 7.2.0~b2-1/src/components/topdown/topdown.h	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/components/topdown/topdown.h	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,82 @@
+#define TOPDOWN_COMPONENT_DESCRIPTION	"A component for accessing topdown " \
+									"metrics on 10th gen+ Intel processors"
+
+/* these MSR access defines are constant based on the assumptoin that */
+/* new architectures will not change them */
+#define TOPDOWN_PERF_FIXED	(1 << 30)	/* return fixed counters */
+#define TOPDOWN_PERF_METRICS	(1 << 29)	/* return metric counters */
+
+#define TOPDOWN_FIXED_COUNTER_SLOTS		        3
+#define TOPDOWN_METRIC_COUNTER_TOPDOWN_L1_L2	0
+
+/* L1 Topdown indices in the PERF_METRICS counter */
+#define TOPDOWN_METRIC_IDX_RETIRING     0
+#define TOPDOWN_METRIC_IDX_BAD_SPEC     1
+#define TOPDOWN_METRIC_IDX_FE_BOUND     2
+#define TOPDOWN_METRIC_IDX_BE_BOUND     3
+
+/* L2 Topdown indices in the PERF_METRICS counter */
+/* The L2 events not here are derived from the others */
+#define TOPDOWN_METRIC_IDX_HEAVY_OPS        4
+#define TOPDOWN_METRIC_IDX_BR_MISPREDICT    5
+#define TOPDOWN_METRIC_IDX_FETCH_LAT        6
+#define TOPDOWN_METRIC_IDX_MEM_BOUND        7
+
+/** Holds per event information */
+typedef struct topdown_native_event_entry
+{
+	int selector; /* signifies which counter slot is being used. indexed from 1 */
+
+	char name[PAPI_MAX_STR_LEN];
+	char description[PAPI_MAX_STR_LEN];
+	char units[PAPI_MIN_STR_LEN]; /* the unit to use for this event */
+	int return_type; /* the PAPI return type to use for this event */
+
+	int metric_idx; /* index in PERF_METRICS. if -1, it's derived */
+	int derived_parent_idx; /* if derived, which parent do we subtract from */
+	int derived_sibling_idx; /* if derived, which metric do we subtract */
+
+} _topdown_native_event_entry_t;
+
+/** Holds per event-set information */
+typedef struct topdown_control_state
+{
+#define TOPDOWN_MAX_COUNTERS    16
+	int being_measured[TOPDOWN_MAX_COUNTERS];
+	long long count[TOPDOWN_MAX_COUNTERS];
+
+	int slots_fd; /* file descriptor for the slots fixed counter */
+	void *slots_p; /* we need this in ctl so it can be freed */
+	unsigned long long slots_before;
+	int metrics_fd; /* file descriptor for the PERF_METRICS counter */
+	void *metrics_p; /* we need this in ctl so it can be freed */
+	unsigned long long metrics_before;
+} _topdown_control_state_t;
+
+/* these MSR access defines are constant based on the assumptoin that */
+/* new architectures will not change them */
+#define TOPDOWN_PERF_FIXED	(1 << 30)	/* return fixed counters */
+#define TOPDOWN_PERF_METRICS	(1 << 29)	/* return metric counters */
+
+#define TOPDOWN_FIXED_COUNTER_SLOTS		        3
+#define TOPDOWN_METRIC_COUNTER_TOPDOWN_L1_L2	0
+
+/* L1 Topdown indices in the PERF_METRICS counter */
+#define TOPDOWN_METRIC_IDX_RETIRING     0
+#define TOPDOWN_METRIC_IDX_BAD_SPEC     1
+#define TOPDOWN_METRIC_IDX_FE_BOUND     2
+#define TOPDOWN_METRIC_IDX_BE_BOUND     3
+
+/* L2 Topdown indices in the PERF_METRICS counter */
+/* The L2 events not here are derived from the others */
+#define TOPDOWN_METRIC_IDX_HEAVY_OPS        4
+#define TOPDOWN_METRIC_IDX_BR_MISPREDICT    5
+#define TOPDOWN_METRIC_IDX_FETCH_LAT        6
+#define TOPDOWN_METRIC_IDX_MEM_BOUND        7
+
+/* Holds per thread information; however, we do not use this structure,
+   but the framework still needs its size */
+typedef struct topdown_context
+{
+    int junk;
+} _topdown_context_t;
diff -pruN 7.2.0~b2-1/src/configure 7.2.0-1/src/configure
--- 7.2.0~b2-1/src/configure	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/configure	2025-06-25 22:38:10.000000000 +0000
@@ -1,6 +1,6 @@
 #! /bin/sh
 # Guess values for system-dependent variables and create Makefiles.
-# Generated by GNU Autoconf 2.69 for PAPI 7.2.0.0b2.
+# Generated by GNU Autoconf 2.69 for PAPI 7.2.0.0.
 #
 # Report bugs to <ptools-perfapi@icl.utk.edu>.
 #
@@ -580,8 +580,8 @@ MAKEFLAGS=
 # Identity of this package.
 PACKAGE_NAME='PAPI'
 PACKAGE_TARNAME='papi'
-PACKAGE_VERSION='7.2.0.0b2'
-PACKAGE_STRING='PAPI 7.2.0.0b2'
+PACKAGE_VERSION='7.2.0.0'
+PACKAGE_STRING='PAPI 7.2.0.0'
 PACKAGE_BUGREPORT='ptools-perfapi@icl.utk.edu'
 PACKAGE_URL=''
 
@@ -1367,7 +1367,7 @@ if test "$ac_init_help" = "long"; then
   # Omit some internal or obsolete options to make the list less imposing.
   # This message is too long to be a string in the A/UX 3.1 sh.
   cat <<_ACEOF
-\`configure' configures PAPI 7.2.0.0b2 to adapt to many kinds of systems.
+\`configure' configures PAPI 7.2.0.0 to adapt to many kinds of systems.
 
 Usage: $0 [OPTION]... [VAR=VALUE]...
 
@@ -1429,7 +1429,7 @@ fi
 
 if test -n "$ac_init_help"; then
   case $ac_init_help in
-     short | recursive ) echo "Configuration of PAPI 7.2.0.0b2:";;
+     short | recursive ) echo "Configuration of PAPI 7.2.0.0:";;
    esac
   cat <<\_ACEOF
 
@@ -1462,7 +1462,7 @@ Optional Packages:
   --with-bgpm_installdir=<path>
                           Specify the installation path of BGPM
   --with-nativecc=<path>  Specify native C compiler for header generation
-  --with-tests=<"ctests ftests mpitests", no>
+  --with-tests=<"ctests ftests mpitests comp_tests", no>
                           Specify which tests to run on install, or "no" tests
                           (default: all available tests)
   --with-debug=<yes,memory,no>
@@ -1590,7 +1590,7 @@ fi
 test -n "$ac_init_help" && exit $ac_status
 if $ac_init_version; then
   cat <<\_ACEOF
-PAPI configure 7.2.0.0b2
+PAPI configure 7.2.0.0
 generated by GNU Autoconf 2.69
 
 Copyright (C) 2012 Free Software Foundation, Inc.
@@ -2108,7 +2108,7 @@ cat >config.log <<_ACEOF
 This file contains any messages produced by compilers while
 running configure, to aid debugging if configure makes a mistake.
 
-It was created by PAPI $as_me 7.2.0.0b2, which was
+It was created by PAPI $as_me 7.2.0.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   $ $0 $@
@@ -4643,7 +4643,7 @@ $as_echo_n "checking for tests... " >&6;
 if test "${with_tests+set}" = set; then :
   withval=$with_tests; tests=$withval
 else
-  tests="ctests ftests mpitests"
+  tests="ctests ftests mpitests comp_tests"
 fi
 
 
@@ -4667,36 +4667,43 @@ else
           ;;
   esac
   case "$tests" in
+      *comp_tests*)
+          tmp_tests+="comp_tests "
+          ;;
+  esac
+  case "$tests" in
       *mpitests*)
           # we already checked if mpicc is working
           if test "x$MPICC" != "x"; then
             if test "x$NO_MPI_TESTS" = "x"; then
               mpi_tests=yes
               # mpitests only works together with ctests
-              if test "x$tmp_tests" = "x"; then
-                { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-              else
-                { $as_echo "$as_me:${as_lineno-$LINENO}: result: $tmp_tests mpitests" >&5
-$as_echo "$tmp_tests mpitests" >&6; }
+              if test "x$tmp_tests" != "x"; then
+                tmp_tests+="mpitests "
               fi
             fi
-          else
-            NO_MPI_TESTS=yes
-            if test "x$tmp_tests" = "x"; then
-              { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
-$as_echo "no" >&6; }
-            else
-              { $as_echo "$as_me:${as_lineno-$LINENO}: result: $tmp_tests" >&5
-$as_echo "$tmp_tests" >&6; }
-            fi
           fi
           ;;
   esac
-  # do not list mpi_tests for makefile target
+
+  if test "x$tmp_tests" = "x"; then
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5
+$as_echo "no" >&6; }
+  else
+    { $as_echo "$as_me:${as_lineno-$LINENO}: result: $tmp_tests" >&5
+$as_echo "$tmp_tests" >&6; }
+  fi
+
+  # do not list mpitests for makefile target
+  case "$tmp_tests" in
+    *mpitests* )
+        tmp_tests=$(echo "$tmp_tests" | sed 's/ mpitests//')
+        ;;
+  esac
+
   tests=$tmp_tests
 
-  # mpi_tests is not listed by the user
+  # mpitests is not listed by the user
   if test "$mpi_tests" = "no"; then
     NO_MPI_TESTS=yes
   fi
@@ -4705,6 +4712,9 @@ fi
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for debug build" >&5
 $as_echo_n "checking for debug build... " >&6; }
 
+# default value for --with-debug if not set by user
+debug="no"
+
 # Check whether --with-debug was given.
 if test "${with_debug+set}" = set; then :
   withval=$with_debug; debug=$withval
@@ -6457,7 +6467,7 @@ SHOW_CONF=showconf
 CTEST_TARGETS="all"
 FTEST_TARGETS="all"
 LIBRARY=libpapi.a
-SHLIB='libpapi.so.7.2.0.0b2'
+SHLIB='libpapi.so.7.2.0.0'
 PAPISOVER='$(PAPIVER).$(PAPIREV)'
 VLIB='libpapi.so.$(PAPISOVER)'
 OMPCFLGS=-fopenmp
@@ -6869,6 +6879,32 @@ if test "${with_components+set}" = set;
 fi
 
 
+# Enable sysdetect unless the user has explicitly told us not to.
+if test "$with_sysdetect" = "yes"; then
+  if test "$perf_events" != "no"; then
+    components="$components sysdetect"
+    fi
+fi
+
+{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $components" >&5
+$as_echo "$components" >&6; }
+
+# Check whether rocm and rocp_sdk were configured together
+rocm_found=0
+rocp_sdk_found=0
+for comp in $components
+do
+    if test "$comp" = "rocm"; then
+        rocm_found=1
+    fi
+
+    if test "$comp" = "rocp_sdk"; then
+        rocp_sdk_found=1
+    fi
+done
+if test $rocm_found -eq 1 && test $rocp_sdk_found -eq 1; then
+    echo "WARNING: Components rocm and rocp_sdk should not be configured together. See components/rocm/README.md for more details."
+fi
 
 # This is an ugly hack to keep building on configurations covered by any-null in the past.
 if test "$VECTOR" = "_papi_dummy_vector"; then
@@ -6923,13 +6959,132 @@ elif test "x$VECTOR" != "x"; then
    echo "extern papi_vector_t ${VECTOR};" >> components_config.h
 fi
 
-# Enable sysdetect unless the user has explicitly told us not to.
-if test "$with_sysdetect" = "yes"; then
-  if test "$perf_events" != "no"; then
-    components="$components sysdetect"
+
+# construct papi_components_config_event_defs.h
+echo "#ifndef _PAPICOMPCFGEVENTDEFS"  > papi_components_config_event_defs.h
+echo "#define _PAPICOMPCFGEVENTDEFS" >> papi_components_config_event_defs.h
+echo "" >> papi_components_config_event_defs.h
+numLine=`grep "#define PAPI_MAX_PRESET_EVENTS" papiStdEventDefs.h`
+sumNum=`echo ${numLine} | awk '{print $3}'`
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_defs_inc=components/${subcomp}/papi_${subcomp}_std_event_defs.h
+    if test -f ${subcomp_defs_inc}; then
+        `cp ${subcomp_defs_inc} ./`
+        `echo "#define PAPI_${subcomp}_PRESET_OFFSET ${sumNum}" >> papi_components_config_event_defs.h`
+        numLine=`grep "#define PAPI_MAX_${subcomp}_PRESETS" ${subcomp_defs_inc}`
+        singleNum=`echo ${numLine} | awk '{print $3}'`
+        sumNum=$(( ${sumNum} + ${singleNum} ))
     fi
+  fi
+done
+echo "" >> papi_components_config_event_defs.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_defs_inc=components/${subcomp}/papi_${subcomp}_std_event_defs.h
+    if test -f ${subcomp_defs_inc}; then
+        `echo "#include \"papi_${subcomp}_std_event_defs.h\"" >> papi_components_config_event_defs.h`
+    fi
+  fi
+done
+echo "" >> papi_components_config_event_defs.h
+echo "#endif" >> papi_components_config_event_defs.h
+
+# includes for preset headers
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    as_ac_File=`$as_echo "ac_cv_file_${subcomp_preset_inc}" | $as_tr_sh`
+{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for ${subcomp_preset_inc}" >&5
+$as_echo_n "checking for ${subcomp_preset_inc}... " >&6; }
+if eval \${$as_ac_File+:} false; then :
+  $as_echo_n "(cached) " >&6
+else
+  test "$cross_compiling" = yes &&
+  as_fn_error $? "cannot check for file existence when cross compiling" "$LINENO" 5
+if test -r "${subcomp_preset_inc}"; then
+  eval "$as_ac_File=yes"
+else
+  eval "$as_ac_File=no"
+fi
+fi
+eval ac_res=\$$as_ac_File
+	       { $as_echo "$as_me:${as_lineno-$LINENO}: result: $ac_res" >&5
+$as_echo "$ac_res" >&6; }
+if eval test \"x\$"$as_ac_File"\" = x"yes"; then :
+  `echo "#include \"${subcomp_preset_inc}\"" >> components_config.h`
 fi
 
+  fi
+done
+echo "" >> components_config.h
+
+# array tracking max number of presets per component
+echo "int _papi_hwi_max_presets[] = {" >> components_config.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    if test -f ${subcomp_preset_inc}; then
+        `echo "   PAPI_MAX_${subcomp}_PRESETS," >> components_config.h`
+    else
+        `echo "   0," >> components_config.h`
+    fi
+  else
+      `echo "   PAPI_MAX_PRESET_EVENTS," >> components_config.h`
+  fi
+done
+echo "   0" >> components_config.h
+echo "};" >> components_config.h
+echo "" >> components_config.h
+
+# preset arrays
+echo "hwi_presets_t *_papi_hwi_comp_presets[] = {" >> components_config.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    if test -f ${subcomp_preset_inc}; then
+        `echo "   _${subcomp}_presets," >> components_config.h`
+    else
+        `echo "   NULL," >> components_config.h`
+    fi
+  else
+      `echo "   _papi_hwi_presets," >> components_config.h`
+  fi
+done
+echo "   NULL" >> components_config.h
+echo "};" >> components_config.h
+echo "" >> components_config.h
+
 PAPI_NUM_COMP=0
 for comp in $components; do
   idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
@@ -6969,7 +7124,6 @@ for comp in $components; do
 	COMPONENTS="$COMPONENTS $comp"
   fi
 done
-tests="$tests comp_tests"
 
 for comp in $components; do
   # check for SDE component to determine linking flags.
@@ -7011,9 +7165,6 @@ done
 
 CFLAGS="$CFLAGS -DPAPI_NUM_COMP=$PAPI_NUM_COMP"
 
-{ $as_echo "$as_me:${as_lineno-$LINENO}: result: $components" >&5
-$as_echo "$components" >&6; }
-
 { $as_echo "$as_me:${as_lineno-$LINENO}: checking for PAPI event CSV filename to use" >&5
 $as_echo_n "checking for PAPI event CSV filename to use... " >&6; }
 if test "x$PAPI_EVENTS_CSV" == "x"; then
@@ -7645,7 +7796,7 @@ cat >>$CONFIG_STATUS <<\_ACEOF || ac_wri
 # report actual input values of CONFIG_FILES etc. instead of their
 # values after options handling.
 ac_log="
-This file was extended by PAPI $as_me 7.2.0.0b2, which was
+This file was extended by PAPI $as_me 7.2.0.0, which was
 generated by GNU Autoconf 2.69.  Invocation command line was
 
   CONFIG_FILES    = $CONFIG_FILES
@@ -7711,7 +7862,7 @@ _ACEOF
 cat >>$CONFIG_STATUS <<_ACEOF || ac_write_fail=1
 ac_cs_config="`$as_echo "$ac_configure_args" | sed 's/^ //; s/[\\""\`\$]/\\\\&/g'`"
 ac_cs_version="\\
-PAPI config.status 7.2.0.0b2
+PAPI config.status 7.2.0.0
 configured by $0, generated by GNU Autoconf 2.69,
   with options \\"\$ac_cs_config\\"
 
diff -pruN 7.2.0~b2-1/src/configure.in 7.2.0-1/src/configure.in
--- 7.2.0~b2-1/src/configure.in	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/configure.in	2025-06-25 22:38:10.000000000 +0000
@@ -6,7 +6,7 @@
 # cross compiling should work differently...
 
 AC_PREREQ(2.59)
-AC_INIT(PAPI, 7.2.0.0b2, ptools-perfapi@icl.utk.edu)
+AC_INIT(PAPI, 7.2.0.0, ptools-perfapi@icl.utk.edu)
 AC_CONFIG_SRCDIR([papi.c])
 AC_CONFIG_HEADER([config.h])
 
@@ -336,10 +336,10 @@ fi
 
 AC_MSG_CHECKING(for tests)
 AC_ARG_WITH(tests,
-           [AS_HELP_STRING([--with-tests=<"ctests ftests mpitests", no>],
+           [AS_HELP_STRING([--with-tests=<"ctests ftests mpitests comp_tests", no>],
            [Specify which tests to run on install, or "no" tests (default: all available tests)])],
     [tests=$withval],
-    [tests="ctests ftests mpitests"])
+    [tests="ctests ftests mpitests comp_tests"])
 
 if test "$tests" = "no"; then
   AC_MSG_RESULT($tests)
@@ -360,38 +360,50 @@ else
           ;;
   esac
   case "$tests" in
+      *comp_tests*)
+          tmp_tests+="comp_tests "
+          ;;
+  esac
+  case "$tests" in
       *mpitests*)
           # we already checked if mpicc is working
           if test "x$MPICC" != "x"; then
             if test "x$NO_MPI_TESTS" = "x"; then
               mpi_tests=yes
               # mpitests only works together with ctests
-              if test "x$tmp_tests" = "x"; then
-                AC_MSG_RESULT(no)
-              else
-                AC_MSG_RESULT($tmp_tests mpitests)
+              if test "x$tmp_tests" != "x"; then
+                tmp_tests+="mpitests "
               fi
             fi
-          else
-            NO_MPI_TESTS=yes
-            if test "x$tmp_tests" = "x"; then
-              AC_MSG_RESULT(no)
-            else
-              AC_MSG_RESULT($tmp_tests)
-            fi
           fi
           ;;
   esac
-  # do not list mpi_tests for makefile target
+
+  if test "x$tmp_tests" = "x"; then
+    AC_MSG_RESULT(no)
+  else
+    AC_MSG_RESULT($tmp_tests)
+  fi
+
+  # do not list mpitests for makefile target
+  case "$tmp_tests" in
+    *mpitests* )
+        tmp_tests=$(echo "$tmp_tests" | sed 's/ mpitests//')
+        ;;
+  esac
+
   tests=$tmp_tests
   
-  # mpi_tests is not listed by the user
+  # mpitests is not listed by the user
   if test "$mpi_tests" = "no"; then
     NO_MPI_TESTS=yes
   fi
 fi
 
 AC_MSG_CHECKING(for debug build)
+
+# default value for --with-debug if not set by user
+debug="no"
 AC_ARG_WITH(debug,
             [AS_HELP_STRING([--with-debug=<yes,memory,no>],
             [Build a debug version, debug version plus memory tracker or none])],
@@ -1860,6 +1872,31 @@ AC_ARG_WITH([components],
     ]
 )
 
+# Enable sysdetect unless the user has explicitly told us not to.
+if test "$with_sysdetect" = "yes"; then
+  if test "$perf_events" != "no"; then
+    components="$components sysdetect"
+    fi
+fi
+
+AC_MSG_RESULT($components)
+
+# Check whether rocm and rocp_sdk were configured together
+rocm_found=0
+rocp_sdk_found=0
+for comp in $components
+do
+    if test "$comp" = "rocm"; then
+        rocm_found=1
+    fi
+
+    if test "$comp" = "rocp_sdk"; then
+        rocp_sdk_found=1
+    fi
+done
+if test $rocm_found -eq 1 && test $rocp_sdk_found -eq 1; then
+    echo "WARNING: Components rocm and rocp_sdk should not be configured together. See components/rocm/README.md for more details."
+fi
 
 # This is an ugly hack to keep building on configurations covered by any-null in the past.
 if test "$VECTOR" = "_papi_dummy_vector"; then
@@ -1914,12 +1951,111 @@ elif test "x$VECTOR" != "x"; then
    echo "extern papi_vector_t ${VECTOR};" >> components_config.h
 fi
 
-# Enable sysdetect unless the user has explicitly told us not to.
-if test "$with_sysdetect" = "yes"; then
-  if test "$perf_events" != "no"; then
-    components="$components sysdetect"
+
+# construct papi_components_config_event_defs.h
+echo "#ifndef _PAPICOMPCFGEVENTDEFS"  > papi_components_config_event_defs.h
+echo "#define _PAPICOMPCFGEVENTDEFS" >> papi_components_config_event_defs.h
+echo "" >> papi_components_config_event_defs.h
+numLine=`grep "#define PAPI_MAX_PRESET_EVENTS" papiStdEventDefs.h`
+sumNum=`echo ${numLine} | awk '{print $3}'`
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_defs_inc=components/${subcomp}/papi_${subcomp}_std_event_defs.h
+    if test -f ${subcomp_defs_inc}; then
+        `cp ${subcomp_defs_inc} ./`
+        `echo "#define PAPI_${subcomp}_PRESET_OFFSET ${sumNum}" >> papi_components_config_event_defs.h`
+        numLine=`grep "#define PAPI_MAX_${subcomp}_PRESETS" ${subcomp_defs_inc}`
+        singleNum=`echo ${numLine} | awk '{print $3}'`
+        sumNum=$(( ${sumNum} + ${singleNum} ))
     fi
-fi
+  fi
+done
+echo "" >> papi_components_config_event_defs.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_defs_inc=components/${subcomp}/papi_${subcomp}_std_event_defs.h
+    if test -f ${subcomp_defs_inc}; then
+        `echo "#include \"papi_${subcomp}_std_event_defs.h\"" >> papi_components_config_event_defs.h`
+    fi
+  fi
+done
+echo "" >> papi_components_config_event_defs.h
+echo "#endif" >> papi_components_config_event_defs.h
+
+# includes for preset headers
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    AC_CHECK_FILE(${subcomp_preset_inc}, [`echo "#include \"${subcomp_preset_inc}\"" >> components_config.h`])
+  fi
+done
+echo "" >> components_config.h
+
+# array tracking max number of presets per component
+echo "int _papi_hwi_max_presets[[]] = {" >> components_config.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    if test -f ${subcomp_preset_inc}; then
+        `echo "   PAPI_MAX_${subcomp}_PRESETS," >> components_config.h`
+    else
+        `echo "   0," >> components_config.h`
+    fi
+  else
+      `echo "   PAPI_MAX_PRESET_EVENTS," >> components_config.h`
+  fi
+done
+echo "   0" >> components_config.h
+echo "};" >> components_config.h
+echo "" >> components_config.h
+
+# preset arrays
+echo "hwi_presets_t *_papi_hwi_comp_presets[[]] = {" >> components_config.h
+for comp in $components; do
+  idx=`echo "$comp" | sed -n "s/\/.*//p" | wc -c`
+  if test "$idx" = 0; then
+    subcomp=$comp
+  else
+    subcomp=`echo $comp | sed -E "s/^.{${idx}}//"`
+  fi
+  if test "${subcomp}" != "perf_event"; then
+    subcomp_preset_inc=components/${subcomp}/papi_${subcomp}_presets.h
+    if test -f ${subcomp_preset_inc}; then
+        `echo "   _${subcomp}_presets," >> components_config.h`
+    else
+        `echo "   NULL," >> components_config.h`
+    fi
+  else
+      `echo "   _papi_hwi_presets," >> components_config.h`
+  fi
+done
+echo "   NULL" >> components_config.h
+echo "};" >> components_config.h
+echo "" >> components_config.h
 
 PAPI_NUM_COMP=0
 for comp in $components; do
@@ -1960,7 +2096,6 @@ for comp in $components; do
 	COMPONENTS="$COMPONENTS $comp"
   fi
 done
-tests="$tests comp_tests"
 
 for comp in $components; do
   # check for SDE component to determine linking flags.
@@ -2002,8 +2137,6 @@ done
 
 CFLAGS="$CFLAGS -DPAPI_NUM_COMP=$PAPI_NUM_COMP"
 
-AC_MSG_RESULT($components)
-
 AC_MSG_CHECKING(for PAPI event CSV filename to use)
 if test "x$PAPI_EVENTS_CSV" == "x"; then
    PAPI_EVENTS_CSV="papi_events.csv"
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/Makefile 7.2.0-1/src/counter_analysis_toolkit/Makefile
--- 7.2.0~b2-1/src/counter_analysis_toolkit/Makefile	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/Makefile	2025-06-25 22:38:10.000000000 +0000
@@ -49,9 +49,9 @@ endif
 ifeq ($(ARCH),POWER)
     FLOP+=-maltivec -DPOWER
     VECSRC=vec_fma_hp.o vec_fma_sp.o vec_fma_dp.o vec_nonfma_hp.o vec_nonfma_sp.o vec_nonfma_dp.o
-    VEC=-maltivec -O0 -DPOWER
-    VEC_FMA=-maltivec -O0 -DPOWER
-    VEC_ALL=$(VEC) -O0 -DPOWER
+    VEC=-maltivec -DPOWER
+    VEC_FMA=-maltivec -DPOWER
+    VEC_ALL=$(VEC) -DPOWER
 endif
 ifeq ($(ARCH),ARM)
     FLOP+=-march=armv8.2-a+fp16 -DARM
@@ -109,58 +109,58 @@ weak_symbols.o: weak_symbols.c vec.h
 	-$(CC) -c $(CFLAGS) weak_symbols.c
 
 vec.o: vec.c vec.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) -D$(ARCH) $(VEC_META) vec.c
 
 vec_scalar_verify.o: vec_scalar_verify.c vec_scalar_verify.h cat_arch.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_ALL) vec_scalar_verify.c
 
 vec_fma_hp.o: vec_fma_hp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_hp.c
 
 vec_fma_hp: vec_fma_hp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_hp.c -o vec_fma_hp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_hp.c -o vec_fma_hp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_hp.c -o vec_fma_hp-512B.o
 
 vec_fma_sp.o: vec_fma_sp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_sp.c
 
 vec_fma_sp: vec_fma_sp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_sp.c -o vec_fma_sp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_sp.c -o vec_fma_sp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_sp.c -o vec_fma_sp-512B.o
 
 vec_fma_dp.o: vec_fma_dp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC_FMA) vec_fma_dp.c
 
 vec_fma_dp: vec_fma_dp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128_FMA) vec_fma_dp.c -o vec_fma_dp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256_FMA) vec_fma_dp.c -o vec_fma_dp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512_FMA) vec_fma_dp.c -o vec_fma_dp-512B.o
 
 vec_nonfma_hp.o: vec_nonfma_hp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_hp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_hp.c
 
 vec_nonfma_hp: vec_nonfma_hp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_hp.c -o vec_nonfma_hp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_hp.c -o vec_nonfma_hp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_hp.c -o vec_nonfma_hp-512B.o
 
 vec_nonfma_sp.o: vec_nonfma_sp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_sp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_sp.c
 
 vec_nonfma_sp: vec_nonfma_sp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_sp.c -o vec_nonfma_sp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_sp.c -o vec_nonfma_sp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_sp.c -o vec_nonfma_sp-512B.o
 
 vec_nonfma_dp.o: vec_nonfma_dp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC) vec_nonfma_dp.c
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC) vec_nonfma_dp.c
 
 vec_nonfma_dp: vec_nonfma_dp.c vec_scalar_verify.h
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
-	-$(CC) -c $(CFLAGS) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC128) vec_nonfma_dp.c -o vec_nonfma_dp-128B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC256) vec_nonfma_dp.c -o vec_nonfma_dp-256B.o
+	-$(CC) -c $(CFLAGS) $(OPT1) $(INCFLAGS) $(VEC512) vec_nonfma_dp.c -o vec_nonfma_dp-512B.o
 
 cat_collect:
 	$(CC) $(CFLAGS) -fopenmp $(INCFLAGS) main.c $(wildcard *.o) -o cat_collect $(LDFLAGS)
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/cat_arch.h 7.2.0-1/src/counter_analysis_toolkit/cat_arch.h
--- 7.2.0~b2-1/src/counter_analysis_toolkit/cat_arch.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/cat_arch.h	2025-06-25 22:38:10.000000000 +0000
@@ -123,38 +123,17 @@ typedef float64x2_t DP_VEC_TYPE;
 #define ADD_VEC_SH(_I_,_J_)     vaddh_f16( _I_ , _J_ );
 #define MUL_VEC_SH(_I_,_J_)     vmulh_f16( _I_ , _J_ );
 #define SQRT_VEC_SH(_I_)        vsqrth_f16( _I_ );
-#define FMA_VEC_SH(_out_,_I_,_J_,_K_) {\
-    HP_VEC_TYPE arg1 = SET_VEC_PH(_I_);\
-    HP_VEC_TYPE arg2 = SET_VEC_PH(_J_);\
-    HP_VEC_TYPE arg3 = SET_VEC_PH(_K_);\
-    HP_VEC_TYPE argTmp;\
-    argTmp = FMA_VEC_PH( arg1 , arg2 , arg3 );\
-    _out_ = ((half*)&(argTmp))[0];\
-}
+#define FMA_VEC_SH(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;
 
 #define SET_VEC_SS(_I_)         _I_ ;
 #define ADD_VEC_SS(_I_,_J_)     _I_ + _J_ ;
 #define MUL_VEC_SS(_I_,_J_)     _I_ * _J_ ;
-#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
-    SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
-    SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
-    SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
-    SP_VEC_TYPE argTmp;\
-    argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
-    _out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
-}
+#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;
 
 #define SET_VEC_SD(_I_)         _I_ ;
 #define ADD_VEC_SD(_I_,_J_)     _I_ + _J_ ;
 #define MUL_VEC_SD(_I_,_J_)     _I_ * _J_ ;
-#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
-    DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
-    DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
-    DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
-    DP_VEC_TYPE argTmp;\
-    argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
-    _out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
-}
+#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;
 
 #elif defined(POWER)
 void  test_hp_power_VEC( int instr_per_loop, uint64 iterations, int EventSet, FILE *fp );
@@ -187,25 +166,11 @@ typedef __vector double DP_VEC_TYPE;
 #define SET_VEC_SS(_I_)         _I_ ;
 #define ADD_VEC_SS(_I_,_J_)     _I_ + _J_ ;
 #define MUL_VEC_SS(_I_,_J_)     _I_ * _J_ ;
-#define FMA_VEC_SS(_out_,_I_,_J_,_K_) {\
-    SP_VEC_TYPE arg1 = SET_VEC_PS(_I_);\
-    SP_VEC_TYPE arg2 = SET_VEC_PS(_J_);\
-    SP_VEC_TYPE arg3 = SET_VEC_PS(_K_);\
-    SP_VEC_TYPE argTmp;\
-    argTmp = FMA_VEC_PS( arg1 , arg2 , arg3 );\
-    _out_ = ((SP_SCALAR_TYPE*)&(argTmp))[0];\
-}
+#define FMA_VEC_SS(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;
 
 #define SET_VEC_SD(_I_)         _I_ ;
 #define ADD_VEC_SD(_I_,_J_)     _I_ + _J_ ;
 #define MUL_VEC_SD(_I_,_J_)     _I_ * _J_ ;
-#define FMA_VEC_SD(_out_,_I_,_J_,_K_) {\
-    DP_VEC_TYPE arg1 = SET_VEC_PD(_I_);\
-    DP_VEC_TYPE arg2 = SET_VEC_PD(_J_);\
-    DP_VEC_TYPE arg3 = SET_VEC_PD(_K_);\
-    DP_VEC_TYPE argTmp;\
-    argTmp = FMA_VEC_PD( arg1 , arg2 , arg3 );\
-    _out_ = ((DP_SCALAR_TYPE*)&(argTmp))[0];\
-}
+#define FMA_VEC_SD(_out_,_I_,_J_,_K_) _out_ = _I_ * _J_ + _K_;
 
 #endif
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec.c 7.2.0-1/src/counter_analysis_toolkit/vec.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec.c	2025-06-25 22:38:10.000000000 +0000
@@ -5,6 +5,7 @@
 #include <papi.h>
 #include "vec.h"
 #include "cat_arch.h"
+#include "vec_scalar_verify.h"
 
 void vec_driver(char* papi_event_name, hw_desc_t *hw_desc, char* outdir)
 {
@@ -38,107 +39,162 @@ void vec_driver(char* papi_event_name, h
         goto error1;
     }
 
+    // Header to label the columns in the output file.
+    fprintf(ofp_papi, "# ExpectedInstrs EventCount\n");
+
 #if defined(X86)
 
 #if defined(AVX128_AVAIL)
 
-    // Non-FMA instruction trials.
-    test_hp_x86_128B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_128B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_hp_x86_128B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_hp_x86_256B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_256B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_hp_x86_256B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_hp_x86_512B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_512B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_hp_x86_512B_VEC( 96, 1000, EventSet, ofp_papi );
-#endif
-#endif
-
-    test_sp_x86_128B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_128B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_sp_x86_128B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_sp_x86_256B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_256B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_sp_x86_256B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_sp_x86_512B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_512B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_sp_x86_512B_VEC( 96, 1000, EventSet, ofp_papi );
-#endif
-#endif
-
-    test_dp_x86_128B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_128B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_dp_x86_128B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_dp_x86_256B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_256B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_dp_x86_256B_VEC( 96, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_dp_x86_512B_VEC( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_512B_VEC( 48, 1000, EventSet, ofp_papi );
-    test_dp_x86_512B_VEC( 96, 1000, EventSet, ofp_papi );
-#endif
-#endif
-
-    // FMA instruction trials.
-    test_hp_x86_128B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_hp_x86_128B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_128B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_hp_x86_256B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_hp_x86_256B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_256B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_hp_x86_512B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_hp_x86_512B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_hp_x86_512B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-#endif
-#endif
-
-    test_sp_x86_128B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_sp_x86_128B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_128B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_sp_x86_256B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_sp_x86_256B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_256B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_sp_x86_512B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_sp_x86_512B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_sp_x86_512B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-#endif
-#endif
-
-    test_dp_x86_128B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_dp_x86_128B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_128B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX256_AVAIL)
-    test_dp_x86_256B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_dp_x86_256B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_256B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-#if defined(AVX512_AVAIL)
-    test_dp_x86_512B_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_dp_x86_512B_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_dp_x86_512B_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-#endif
-#endif
+    // HP Non-FMA instruction trials.
+    fprintf(ofp_papi, "# HP Non-FMA Scalar\n");
+    test_hp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP Non-FMA Vector AVX128\n");
+    test_hp_x86_128B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_128B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_hp_x86_128B_VEC( 96, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# HP Non-FMA Vector AVX256\n");
+    test_hp_x86_256B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_256B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_hp_x86_256B_VEC( 96, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# HP Non-FMA Vector AVX512\n");
+    test_hp_x86_512B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_512B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_hp_x86_512B_VEC( 96, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
+
+    // SP Non-FMA instruction trials.
+    fprintf(ofp_papi, "# SP Non-FMA Scalar\n");
+    test_sp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP Non-FMA Vector AVX128\n");
+    test_sp_x86_128B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_128B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_sp_x86_128B_VEC( 96, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# SP Non-FMA Vector AVX256\n");
+    test_sp_x86_256B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_256B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_sp_x86_256B_VEC( 96, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# SP Non-FMA Vector AVX512\n");
+    test_sp_x86_512B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_512B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_sp_x86_512B_VEC( 96, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
+
+    // DP Non-FMA instruction trials.
+    fprintf(ofp_papi, "# DP Non-FMA Scalar\n");
+    test_dp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP Non-FMA Vector AVX128\n");
+    test_dp_x86_128B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_128B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_dp_x86_128B_VEC( 96, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# DP Non-FMA Vector AVX256\n");
+    test_dp_x86_256B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_256B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_dp_x86_256B_VEC( 96, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# DP Non-FMA Vector AVX512\n");
+    test_dp_x86_512B_VEC( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_512B_VEC( 48, ITER, EventSet, ofp_papi );
+    test_dp_x86_512B_VEC( 96, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
+
+    // HP FMA instruction trials.
+    fprintf(ofp_papi, "# HP FMA Scalar\n");
+    test_hp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP FMA Vector AVX128\n");
+    test_hp_x86_128B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_hp_x86_128B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_128B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# HP FMA Vector AVX256\n");
+    test_hp_x86_256B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_hp_x86_256B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_256B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# HP FMA Vector AVX512\n");
+    test_hp_x86_512B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_hp_x86_512B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_hp_x86_512B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
+
+    // SP FMA instruction trials.
+    fprintf(ofp_papi, "# SP FMA Scalar\n");
+    test_sp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP FMA Vector AVX128\n");
+    test_sp_x86_128B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_sp_x86_128B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_128B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# SP FMA Vector AVX256\n");
+    test_sp_x86_256B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_sp_x86_256B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_256B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# SP FMA Vector AVX512\n");
+    test_sp_x86_512B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_sp_x86_512B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_sp_x86_512B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
+
+    // DP FMA instruction trials.
+    fprintf(ofp_papi, "# DP FMA Scalar\n");
+    test_dp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP FMA Vector AVX128\n");
+    test_dp_x86_128B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_dp_x86_128B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_128B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+  #if defined(AVX256_AVAIL)
+    fprintf(ofp_papi, "# DP FMA Vector AVX256\n");
+    test_dp_x86_256B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_dp_x86_256B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_256B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    #if defined(AVX512_AVAIL)
+    fprintf(ofp_papi, "# DP FMA Vector AVX512\n");
+    test_dp_x86_512B_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_dp_x86_512B_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_dp_x86_512B_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+    #endif
+  #endif
 
 #else
     fprintf(stderr, "Vector FLOP benchmark is not supported on this architecture: AVX unavailable!\n");
@@ -147,58 +203,130 @@ void vec_driver(char* papi_event_name, h
 #elif defined(ARM)
 
     // Non-FMA instruction trials.
-    test_hp_arm_VEC( 24, 1000, EventSet, ofp_papi );
-    test_hp_arm_VEC( 48, 1000, EventSet, ofp_papi );
-    test_hp_arm_VEC( 96, 1000, EventSet, ofp_papi );
-
-    test_sp_arm_VEC( 24, 1000, EventSet, ofp_papi );
-    test_sp_arm_VEC( 48, 1000, EventSet, ofp_papi );
-    test_sp_arm_VEC( 96, 1000, EventSet, ofp_papi );
-
-    test_dp_arm_VEC( 24, 1000, EventSet, ofp_papi );
-    test_dp_arm_VEC( 48, 1000, EventSet, ofp_papi );
-    test_dp_arm_VEC( 96, 1000, EventSet, ofp_papi );
+    fprintf(ofp_papi, "# HP Non-FMA Scalar\n");
+    test_hp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP Non-FMA Vector\n");
+    test_hp_arm_VEC( 24, ITER, EventSet, ofp_papi );
+    test_hp_arm_VEC( 48, ITER, EventSet, ofp_papi );
+    test_hp_arm_VEC( 96, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP Non-FMA Scalar\n");
+    test_sp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP Non-FMA Vector\n");
+    test_sp_arm_VEC( 24, ITER, EventSet, ofp_papi );
+    test_sp_arm_VEC( 48, ITER, EventSet, ofp_papi );
+    test_sp_arm_VEC( 96, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP Non-FMA Scalar\n");
+    test_dp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP Non-FMA Vector\n");
+    test_dp_arm_VEC( 24, ITER, EventSet, ofp_papi );
+    test_dp_arm_VEC( 48, ITER, EventSet, ofp_papi );
+    test_dp_arm_VEC( 96, ITER, EventSet, ofp_papi );
 
     // FMA instruction trials.
-    test_hp_arm_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_hp_arm_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_hp_arm_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-    test_sp_arm_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_sp_arm_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_sp_arm_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-    test_dp_arm_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_dp_arm_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_dp_arm_VEC_FMA( 48, 1000, EventSet, ofp_papi );
+    fprintf(ofp_papi, "# HP FMA Scalar\n");
+    test_hp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP FMA Vector\n");
+    test_hp_arm_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_hp_arm_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_hp_arm_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP FMA Scalar\n");
+    test_sp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP FMA Vector\n");
+    test_sp_arm_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_sp_arm_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_sp_arm_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP FMA Scalar\n");
+    test_dp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP FMA Vector\n");
+    test_dp_arm_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_dp_arm_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_dp_arm_VEC_FMA( 48, ITER, EventSet, ofp_papi );
 
 #elif defined(POWER)
 
     // Non-FMA instruction trials.
-    test_hp_power_VEC( 24, 1000, EventSet, ofp_papi );
-    test_hp_power_VEC( 48, 1000, EventSet, ofp_papi );
-    test_hp_power_VEC( 96, 1000, EventSet, ofp_papi );
-
-    test_sp_power_VEC( 24, 1000, EventSet, ofp_papi );
-    test_sp_power_VEC( 48, 1000, EventSet, ofp_papi );
-    test_sp_power_VEC( 96, 1000, EventSet, ofp_papi );
-
-    test_dp_power_VEC( 24, 1000, EventSet, ofp_papi );
-    test_dp_power_VEC( 48, 1000, EventSet, ofp_papi );
-    test_dp_power_VEC( 96, 1000, EventSet, ofp_papi );
+    fprintf(ofp_papi, "# HP Non-FMA Scalar\n");
+    test_hp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP Non-FMA Vector\n");
+    test_hp_power_VEC( 24, ITER, EventSet, ofp_papi );
+    test_hp_power_VEC( 48, ITER, EventSet, ofp_papi );
+    test_hp_power_VEC( 96, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP Non-FMA Scalar\n");
+    test_sp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP Non-FMA Vector\n");
+    test_sp_power_VEC( 24, ITER, EventSet, ofp_papi );
+    test_sp_power_VEC( 48, ITER, EventSet, ofp_papi );
+    test_sp_power_VEC( 96, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP Non-FMA Scalar\n");
+    test_dp_scalar_VEC_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_48( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_96( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP Non-FMA Vector\n");
+    test_dp_power_VEC( 24, ITER, EventSet, ofp_papi );
+    test_dp_power_VEC( 48, ITER, EventSet, ofp_papi );
+    test_dp_power_VEC( 96, ITER, EventSet, ofp_papi );
 
     // FMA instruction trials.
-    test_hp_power_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_hp_power_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_hp_power_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-    test_sp_power_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_sp_power_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_sp_power_VEC_FMA( 48, 1000, EventSet, ofp_papi );
-
-    test_dp_power_VEC_FMA( 12, 1000, EventSet, ofp_papi );
-    test_dp_power_VEC_FMA( 24, 1000, EventSet, ofp_papi );
-    test_dp_power_VEC_FMA( 48, 1000, EventSet, ofp_papi );
+    fprintf(ofp_papi, "# HP FMA Scalar\n");
+    test_hp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_hp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# HP FMA Vector\n");
+    test_hp_power_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_hp_power_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_hp_power_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP FMA Scalar\n");
+    test_sp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_sp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# SP FMA Vector\n");
+    test_sp_power_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_sp_power_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_sp_power_VEC_FMA( 48, ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP FMA Scalar\n");
+    test_dp_scalar_VEC_FMA_12( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_24( ITER, EventSet, ofp_papi );
+    test_dp_scalar_VEC_FMA_48( ITER, EventSet, ofp_papi );
+
+    fprintf(ofp_papi, "# DP FMA Vector\n");
+    test_dp_power_VEC_FMA( 12, ITER, EventSet, ofp_papi );
+    test_dp_power_VEC_FMA( 24, ITER, EventSet, ofp_papi );
+    test_dp_power_VEC_FMA( 48, ITER, EventSet, ofp_papi );
 
 #endif
 
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_dp.c 7.2.0-1/src/counter_analysis_toolkit/vec_fma_dp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_dp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_fma_dp.c	2025-06-25 22:38:10.000000000 +0000
@@ -61,7 +61,7 @@ double test_dp_mac_VEC_FMA_12( uint64 it
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PD(r0,r7,r9);
@@ -137,7 +137,7 @@ double test_dp_mac_VEC_FMA_24( uint64 it
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PD(r0,r7,r9);
@@ -227,7 +227,7 @@ double test_dp_mac_VEC_FMA_48( uint64 it
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PD(r0,r7,r9);
@@ -320,15 +320,15 @@ void test_dp_VEC_FMA( int instr_per_loop
 
     if ( instr_per_loop == 12 ) {
         sum += test_dp_mac_VEC_FMA_12( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_FMA_12( iterations );
+        scalar_sum += test_dp_scalar_VEC_FMA_12( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 24 ) {
         sum += test_dp_mac_VEC_FMA_24( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_FMA_24( iterations );
+        scalar_sum += test_dp_scalar_VEC_FMA_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_dp_mac_VEC_FMA_48( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_FMA_48( iterations );
+        scalar_sum += test_dp_scalar_VEC_FMA_48( iterations, EventSet, NULL );
     }
 
     if( sum/2.0 != scalar_sum ) {
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_hp.c 7.2.0-1/src/counter_analysis_toolkit/vec_fma_hp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_hp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_fma_hp.c	2025-06-25 22:38:10.000000000 +0000
@@ -68,7 +68,7 @@ half test_hp_mac_VEC_FMA_12( uint64 iter
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
         /* The performance critical part */
 
             r0 = FMA_VEC_PH(r0,r7,r9);
@@ -146,7 +146,7 @@ half test_hp_mac_VEC_FMA_24( uint64 iter
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = FMA_VEC_PH(r0,r7,r9);
@@ -238,7 +238,7 @@ half test_hp_mac_VEC_FMA_48( uint64 iter
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = FMA_VEC_PH(r0,r7,r9);
@@ -333,15 +333,15 @@ void test_hp_VEC_FMA( int instr_per_loop
 
     if ( instr_per_loop == 12 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_FMA_12( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_12( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_12( iterations, EventSet, NULL ));
     }
     else if ( instr_per_loop == 24 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_FMA_24( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_24( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_24( iterations, EventSet, NULL ));
     }
     else if ( instr_per_loop == 48 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_FMA_48( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_48( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_FMA_48( iterations, EventSet, NULL ));
     }
 
     if( vdivh_f16(sum,4.0) != scalar_sum ) {
@@ -355,7 +355,10 @@ float test_hp_mac_VEC_FMA_12( uint64 ite
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(12, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(12, fp);
+    }
 
     return 0.0;
 }
@@ -365,7 +368,10 @@ float test_hp_mac_VEC_FMA_24( uint64 ite
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(24, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(24, fp);
+    }
 
     return 0.0;
 }
@@ -375,7 +381,10 @@ float test_hp_mac_VEC_FMA_48( uint64 ite
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(48, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(48, fp);
+    }
 
     return 0.0;
 }
@@ -388,15 +397,15 @@ void test_hp_VEC_FMA( int instr_per_loop
 
     if ( instr_per_loop == 12 ) {
         sum += test_hp_mac_VEC_FMA_12( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_FMA_12( iterations );
+        scalar_sum += test_hp_scalar_VEC_FMA_12( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 24 ) {
         sum += test_hp_mac_VEC_FMA_24( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_FMA_24( iterations );
+        scalar_sum += test_hp_scalar_VEC_FMA_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_hp_mac_VEC_FMA_48( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_FMA_48( iterations );
+        scalar_sum += test_hp_scalar_VEC_FMA_48( iterations, EventSet, NULL );
     }
 
     if( sum/4.0 != scalar_sum ) {
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_sp.c 7.2.0-1/src/counter_analysis_toolkit/vec_fma_sp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_fma_sp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_fma_sp.c	2025-06-25 22:38:10.000000000 +0000
@@ -61,7 +61,7 @@ float test_sp_mac_VEC_FMA_12( uint64 ite
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PS(r0,r7,r9);
@@ -139,7 +139,7 @@ float test_sp_mac_VEC_FMA_24( uint64 ite
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PS(r0,r7,r9);
@@ -231,7 +231,7 @@ float test_sp_mac_VEC_FMA_48( uint64 ite
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = FMA_VEC_PS(r0,r7,r9);
@@ -326,18 +326,18 @@ void test_sp_VEC_FMA( int instr_per_loop
 
     if ( instr_per_loop == 12 ) {
         sum += test_sp_mac_VEC_FMA_12( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_FMA_12( iterations );
+        scalar_sum += test_sp_scalar_VEC_FMA_12( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 24 ) {
         sum += test_sp_mac_VEC_FMA_24( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_FMA_24( iterations );
+        scalar_sum += test_sp_scalar_VEC_FMA_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_sp_mac_VEC_FMA_48( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_FMA_48( iterations );
+        scalar_sum += test_sp_scalar_VEC_FMA_48( iterations, EventSet, NULL );
     }
 
     if( sum/4.0 != scalar_sum ) {
-        fprintf(stderr, "FMA: Inconsistent FLOP results detected!\n");
+        fprintf(stderr, "FMA: Inconsistent FLOP results detected! %f vs %f\n", sum/4.0, scalar_sum);
     }
 }
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_dp.c 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_dp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_dp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_dp.c	2025-06-25 22:38:10.000000000 +0000
@@ -61,7 +61,7 @@ double test_dp_mac_VEC_24( uint64 iterat
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PD(r0,rC);
@@ -154,7 +154,7 @@ double test_dp_mac_VEC_48( uint64 iterat
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PD(r0,rC);
@@ -273,7 +273,7 @@ double test_dp_mac_VEC_96( uint64 iterat
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PD(r0,rC);
@@ -419,15 +419,15 @@ void test_dp_VEC( int instr_per_loop, ui
 
     if ( instr_per_loop == 24 ) {
         sum += test_dp_mac_VEC_24( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_24( iterations );
+        scalar_sum += test_dp_scalar_VEC_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_dp_mac_VEC_48( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_48( iterations );
+        scalar_sum += test_dp_scalar_VEC_48( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 96 ) {
         sum += test_dp_mac_VEC_96( iterations, EventSet, fp );
-        scalar_sum += test_dp_scalar_VEC_96( iterations );
+        scalar_sum += test_dp_scalar_VEC_96( iterations, EventSet, NULL );
     }
 
     if( sum/2.0 != scalar_sum ) {
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_hp.c 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_hp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_hp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_hp.c	2025-06-25 22:38:10.000000000 +0000
@@ -68,7 +68,7 @@ half test_hp_mac_VEC_24( uint64 iteratio
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
         /* The performance critical part */
 
             r0 = MUL_VEC_PH(r0,rC);
@@ -163,7 +163,7 @@ half test_hp_mac_VEC_48( uint64 iteratio
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PH(r0,rC);
@@ -284,7 +284,7 @@ half test_hp_mac_VEC_96( uint64 iteratio
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PH(r0,rC);
@@ -432,15 +432,15 @@ void test_hp_VEC( int instr_per_loop, ui
 
     if ( instr_per_loop == 24 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_24( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_24( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_24( iterations, EventSet, NULL ));
     }
     else if ( instr_per_loop == 48 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_48( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_48( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_48( iterations, EventSet, NULL ));
     }
     else if ( instr_per_loop == 96 ) {
         sum = vaddh_f16(sum,test_hp_mac_VEC_96( iterations, EventSet, fp ));
-        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_96( iterations ));
+        scalar_sum = vaddh_f16(scalar_sum,test_hp_scalar_VEC_96( iterations, EventSet, NULL ));
     }
 
     if( vdivh_f16(sum,4.0) != scalar_sum ) {
@@ -454,7 +454,10 @@ float test_hp_mac_VEC_24( uint64 iterati
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(24, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(24, fp);
+    }
 
     return 0.0;
 }
@@ -464,7 +467,10 @@ float test_hp_mac_VEC_48( uint64 iterati
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(48, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(48, fp);
+    }
 
     return 0.0;
 }
@@ -474,7 +480,10 @@ float test_hp_mac_VEC_96( uint64 iterati
 
     (void)iterations;
     (void)EventSet;
-    papi_stop_and_print_placeholder(96, fp);
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(96, fp);
+    }
 
     return 0.0;
 }
@@ -487,15 +496,15 @@ void test_hp_VEC( int instr_per_loop, ui
 
     if ( instr_per_loop == 24 ) {
         sum += test_hp_mac_VEC_24( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_24( iterations );
+        scalar_sum += test_hp_scalar_VEC_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_hp_mac_VEC_48( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_48( iterations );
+        scalar_sum += test_hp_scalar_VEC_48( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 96 ) {
         sum += test_hp_mac_VEC_96( iterations, EventSet, fp );
-        scalar_sum += test_hp_scalar_VEC_96( iterations );
+        scalar_sum += test_hp_scalar_VEC_96( iterations, EventSet, NULL );
     }
 
     if( sum/4.0 != scalar_sum ) {
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_sp.c 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_sp.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_nonfma_sp.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_nonfma_sp.c	2025-06-25 22:38:10.000000000 +0000
@@ -61,7 +61,7 @@ float test_sp_mac_VEC_24( uint64 iterati
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
         /* The performance critical part */
 
             r0 = MUL_VEC_PS(r0,rC);
@@ -156,7 +156,7 @@ float test_sp_mac_VEC_48( uint64 iterati
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PS(r0,rC);
@@ -277,7 +277,7 @@ float test_sp_mac_VEC_96( uint64 iterati
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
             /* The performance critical part */
 
             r0 = MUL_VEC_PS(r0,rC);
@@ -425,15 +425,15 @@ void test_sp_VEC( int instr_per_loop, ui
 
     if ( instr_per_loop == 24 ) {
         sum += test_sp_mac_VEC_24( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_24( iterations );
+        scalar_sum += test_sp_scalar_VEC_24( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 48 ) {
         sum += test_sp_mac_VEC_48( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_48( iterations );
+        scalar_sum += test_sp_scalar_VEC_48( iterations, EventSet, NULL );
     }
     else if ( instr_per_loop == 96 ) {
         sum += test_sp_mac_VEC_96( iterations, EventSet, fp );
-        scalar_sum += test_sp_scalar_VEC_96( iterations );
+        scalar_sum += test_sp_scalar_VEC_96( iterations, EventSet, NULL );
     }
 
     if( sum/4.0 != scalar_sum ) {
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_scalar_verify.c 7.2.0-1/src/counter_analysis_toolkit/vec_scalar_verify.c
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_scalar_verify.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_scalar_verify.c	2025-06-25 22:38:10.000000000 +0000
@@ -19,7 +19,7 @@ void papi_stop_and_print(long long theor
 }
 
 #if defined(ARM)
-half test_hp_scalar_VEC_24( uint64 iterations ){
+half test_hp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -40,10 +40,17 @@ half test_hp_scalar_VEC_24( uint64 itera
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SH(r0,rC);
@@ -77,6 +84,11 @@ half test_hp_scalar_VEC_24( uint64 itera
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -99,7 +111,7 @@ half test_hp_scalar_VEC_24( uint64 itera
     return out;
 }
 
-half test_hp_scalar_VEC_48( uint64 iterations ){
+half test_hp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -120,10 +132,17 @@ half test_hp_scalar_VEC_48( uint64 itera
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SH(r0,rC);
@@ -183,6 +202,11 @@ half test_hp_scalar_VEC_48( uint64 itera
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -205,7 +229,7 @@ half test_hp_scalar_VEC_48( uint64 itera
     return out;
 }
 
-half test_hp_scalar_VEC_96( uint64 iterations ){
+half test_hp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -226,10 +250,17 @@ half test_hp_scalar_VEC_96( uint64 itera
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SH(r0,rC);
@@ -341,6 +372,11 @@ half test_hp_scalar_VEC_96( uint64 itera
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(96, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -364,21 +400,39 @@ half test_hp_scalar_VEC_96( uint64 itera
 }
 
 #else
-float test_hp_scalar_VEC_24( uint64 iterations ){
+float test_hp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(24, fp);
+    }
+
     return 0.0;
 }
 
-float test_hp_scalar_VEC_48( uint64 iterations ){
+float test_hp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(48, fp);
+    }
+
     return 0.0;
 }
 
-float test_hp_scalar_VEC_96( uint64 iterations ){
+float test_hp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(96, fp);
+    }
+
     return 0.0;
 }
 #endif
@@ -386,7 +440,7 @@ float test_hp_scalar_VEC_96( uint64 iter
 /************************************/
 /* Loop unrolling:  24 instructions */
 /************************************/
-float test_sp_scalar_VEC_24( uint64 iterations ){
+float test_sp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -407,10 +461,17 @@ float test_sp_scalar_VEC_24( uint64 iter
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SS(r0,rC);
@@ -444,6 +505,11 @@ float test_sp_scalar_VEC_24( uint64 iter
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -469,7 +535,7 @@ float test_sp_scalar_VEC_24( uint64 iter
 /************************************/
 /* Loop unrolling:  48 instructions */
 /************************************/
-float test_sp_scalar_VEC_48( uint64 iterations ){
+float test_sp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -490,10 +556,17 @@ float test_sp_scalar_VEC_48( uint64 iter
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SS(r0,rC);
@@ -553,6 +626,11 @@ float test_sp_scalar_VEC_48( uint64 iter
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -578,7 +656,7 @@ float test_sp_scalar_VEC_48( uint64 iter
 /************************************/
 /* Loop unrolling:  96 instructions */
 /************************************/
-float test_sp_scalar_VEC_96( uint64 iterations ){
+float test_sp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -599,10 +677,17 @@ float test_sp_scalar_VEC_96( uint64 iter
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SS(r0,rC);
@@ -714,6 +799,11 @@ float test_sp_scalar_VEC_96( uint64 iter
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(96, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -739,7 +829,7 @@ float test_sp_scalar_VEC_96( uint64 iter
 /************************************/
 /* Loop unrolling:  24 instructions */
 /************************************/
-double test_dp_scalar_VEC_24( uint64 iterations ){
+double test_dp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -760,10 +850,17 @@ double test_dp_scalar_VEC_24( uint64 ite
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SD(r0,rC);
@@ -797,6 +894,11 @@ double test_dp_scalar_VEC_24( uint64 ite
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -822,7 +924,7 @@ double test_dp_scalar_VEC_24( uint64 ite
 /************************************/
 /* Loop unrolling:  48 instructions */
 /************************************/
-double test_dp_scalar_VEC_48( uint64 iterations ){
+double test_dp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -843,10 +945,17 @@ double test_dp_scalar_VEC_48( uint64 ite
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SD(r0,rC);
@@ -906,6 +1015,11 @@ double test_dp_scalar_VEC_48( uint64 ite
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -931,7 +1045,7 @@ double test_dp_scalar_VEC_48( uint64 ite
 /************************************/
 /* Loop unrolling:  96 instructions */
 /************************************/
-double test_dp_scalar_VEC_96( uint64 iterations ){
+double test_dp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -952,10 +1066,17 @@ double test_dp_scalar_VEC_96( uint64 ite
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             r0 = MUL_VEC_SD(r0,rC);
@@ -1067,6 +1188,11 @@ double test_dp_scalar_VEC_96( uint64 ite
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(96, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -1090,7 +1216,7 @@ double test_dp_scalar_VEC_96( uint64 ite
 }
 
 #if defined(ARM)
-half test_hp_scalar_VEC_FMA_12( uint64 iterations ){
+half test_hp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1111,10 +1237,17 @@ half test_hp_scalar_VEC_FMA_12( uint64 i
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SH(r0,r0,r7,r9);
@@ -1136,6 +1269,11 @@ half test_hp_scalar_VEC_FMA_12( uint64 i
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(12, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -1153,7 +1291,7 @@ half test_hp_scalar_VEC_FMA_12( uint64 i
     return out;
 }
 
-half test_hp_scalar_VEC_FMA_24( uint64 iterations ){
+half test_hp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1174,10 +1312,17 @@ half test_hp_scalar_VEC_FMA_24( uint64 i
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SH(r0,r0,r7,r9);
@@ -1213,6 +1358,11 @@ half test_hp_scalar_VEC_FMA_24( uint64 i
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -1230,7 +1380,7 @@ half test_hp_scalar_VEC_FMA_24( uint64 i
     return out;
 }
 
-half test_hp_scalar_VEC_FMA_48( uint64 iterations ){
+half test_hp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp ){
     register half r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1251,10 +1401,17 @@ half test_hp_scalar_VEC_FMA_48( uint64 i
     rE = SET_VEC_SH(0.15);
     rF = SET_VEC_SH(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SH(r0,r0,r7,r9);
@@ -1318,6 +1475,11 @@ half test_hp_scalar_VEC_FMA_48( uint64 i
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SH(r0,r1);
     r2 = ADD_VEC_SH(r2,r3);
@@ -1336,21 +1498,39 @@ half test_hp_scalar_VEC_FMA_48( uint64 i
 }
 
 #else
-float test_hp_scalar_VEC_FMA_12( uint64 iterations ){
+float test_hp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(12, fp);
+    }
+
     return 0.0;
 }
 
-float test_hp_scalar_VEC_FMA_24( uint64 iterations ){
+float test_hp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(24, fp);
+    }
+
     return 0.0;
 }
 
-float test_hp_scalar_VEC_FMA_48( uint64 iterations ){
+float test_hp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp ){
 
     (void)iterations;
+    (void)EventSet;
+
+    if ( NULL != fp ) {
+      papi_stop_and_print_placeholder(48, fp);
+    }
+
     return 0.0;
 }
 #endif
@@ -1358,7 +1538,8 @@ float test_hp_scalar_VEC_FMA_48( uint64
 /************************************/
 /* Loop unrolling:  12 instructions */
 /************************************/
-float test_sp_scalar_VEC_FMA_12( uint64 iterations ){
+#pragma GCC optimize ("O2")
+float test_sp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1379,10 +1560,17 @@ float test_sp_scalar_VEC_FMA_12( uint64
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SS(r0,r0,r7,r9);
@@ -1404,6 +1592,11 @@ float test_sp_scalar_VEC_FMA_12( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(12, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -1424,7 +1617,7 @@ float test_sp_scalar_VEC_FMA_12( uint64
 /************************************/
 /* Loop unrolling:  24 instructions */
 /************************************/
-float test_sp_scalar_VEC_FMA_24( uint64 iterations ){
+float test_sp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1445,10 +1638,17 @@ float test_sp_scalar_VEC_FMA_24( uint64
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SS(r0,r0,r7,r9);
@@ -1484,6 +1684,11 @@ float test_sp_scalar_VEC_FMA_24( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -1504,7 +1709,7 @@ float test_sp_scalar_VEC_FMA_24( uint64
 /************************************/
 /* Loop unrolling:  48 instructions */
 /************************************/
-float test_sp_scalar_VEC_FMA_48( uint64 iterations ){
+float test_sp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp ){
     register SP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1525,10 +1730,17 @@ float test_sp_scalar_VEC_FMA_48( uint64
     rE = SET_VEC_SS(0.15);
     rF = SET_VEC_SS(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SS(r0,r0,r7,r9);
@@ -1592,6 +1804,11 @@ float test_sp_scalar_VEC_FMA_48( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SS(r0,r1);
     r2 = ADD_VEC_SS(r2,r3);
@@ -1612,7 +1829,7 @@ float test_sp_scalar_VEC_FMA_48( uint64
 /************************************/
 /* Loop unrolling:  12 instructions */
 /************************************/
-double test_dp_scalar_VEC_FMA_12( uint64 iterations ){
+double test_dp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1633,10 +1850,17 @@ double test_dp_scalar_VEC_FMA_12( uint64
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SD(r0,r0,r7,r9);
@@ -1658,6 +1882,11 @@ double test_dp_scalar_VEC_FMA_12( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(12, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -1678,7 +1907,7 @@ double test_dp_scalar_VEC_FMA_12( uint64
 /************************************/
 /* Loop unrolling:  24 instructions */
 /************************************/
-double test_dp_scalar_VEC_FMA_24( uint64 iterations ){
+double test_dp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1699,10 +1928,17 @@ double test_dp_scalar_VEC_FMA_24( uint64
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SD(r0,r0,r7,r9);
@@ -1738,6 +1974,11 @@ double test_dp_scalar_VEC_FMA_24( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(24, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -1758,7 +1999,7 @@ double test_dp_scalar_VEC_FMA_24( uint64
 /************************************/
 /* Loop unrolling:  48 instructions */
 /************************************/
-double test_dp_scalar_VEC_FMA_48( uint64 iterations ){
+double test_dp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp ){
     register DP_SCALAR_TYPE r0,r1,r2,r3,r4,r5,r6,r7,r8,r9,rA,rB,rC,rD,rE,rF;
 
     /* Generate starting data */
@@ -1779,10 +2020,17 @@ double test_dp_scalar_VEC_FMA_48( uint64
     rE = SET_VEC_SD(0.15);
     rF = SET_VEC_SD(0.16);
 
+    /* Start PAPI counters */
+    if ( NULL != fp ) {
+      if ( PAPI_start( EventSet ) != PAPI_OK ) {
+        return -1;
+      }
+    }
+
     uint64 c = 0;
     while (c < iterations){
         size_t i = 0;
-        while (i < 1000){
+        while (i < ITER){
 
             /* The performance critical part */
             FMA_VEC_SD(r0,r0,r7,r9);
@@ -1846,6 +2094,11 @@ double test_dp_scalar_VEC_FMA_48( uint64
         c++;
     }
 
+    /* Stop PAPI counters */
+    if ( NULL != fp ) {
+      papi_stop_and_print(48, EventSet, fp);
+    }
+
     /* Use data so that compiler does not eliminate it when using -O2 */
     r0 = ADD_VEC_SD(r0,r1);
     r2 = ADD_VEC_SD(r2,r3);
@@ -1862,3 +2115,4 @@ double test_dp_scalar_VEC_FMA_48( uint64
 
     return out;
 }
+// End of pragma.
diff -pruN 7.2.0~b2-1/src/counter_analysis_toolkit/vec_scalar_verify.h 7.2.0-1/src/counter_analysis_toolkit/vec_scalar_verify.h
--- 7.2.0~b2-1/src/counter_analysis_toolkit/vec_scalar_verify.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/counter_analysis_toolkit/vec_scalar_verify.h	2025-06-25 22:38:10.000000000 +0000
@@ -3,44 +3,46 @@
 #include <stdlib.h>
 #include "cat_arch.h"
 
+#define ITER 1
+
 void papi_stop_and_print_placeholder(long long theory, FILE *fp);
 void papi_stop_and_print(long long theory, int EventSet, FILE *fp);
 
 // Non-FMA-like computations.
 #if defined(ARM)
-half test_hp_scalar_VEC_24( uint64 iterations );
-half test_hp_scalar_VEC_48( uint64 iterations );
-half test_hp_scalar_VEC_96( uint64 iterations );
+half test_hp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp );
+half test_hp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp );
+half test_hp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp );
 #else
-float test_hp_scalar_VEC_24( uint64 iterations );
-float test_hp_scalar_VEC_48( uint64 iterations );
-float test_hp_scalar_VEC_96( uint64 iterations );
+float test_hp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp );
+float test_hp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp );
+float test_hp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp );
 #endif
 
-float test_sp_scalar_VEC_24( uint64 iterations );
-float test_sp_scalar_VEC_48( uint64 iterations );
-float test_sp_scalar_VEC_96( uint64 iterations );
-
-double test_dp_scalar_VEC_24( uint64 iterations );
-double test_dp_scalar_VEC_48( uint64 iterations );
-double test_dp_scalar_VEC_96( uint64 iterations );
+float test_sp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp );
+float test_sp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp );
+float test_sp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp );
+
+double test_dp_scalar_VEC_24( uint64 iterations, int EventSet, FILE *fp );
+double test_dp_scalar_VEC_48( uint64 iterations, int EventSet, FILE *fp );
+double test_dp_scalar_VEC_96( uint64 iterations, int EventSet, FILE *fp );
 
 // Functions to emulate FMA.
 #if defined(ARM)
-half test_hp_scalar_VEC_FMA_12( uint64 iterations );
-half test_hp_scalar_VEC_FMA_24( uint64 iterations );
-half test_hp_scalar_VEC_FMA_48( uint64 iterations );
+half test_hp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp );
+half test_hp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp );
+half test_hp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp );
 #else
-float test_hp_scalar_VEC_FMA_12( uint64 iterations );
-float test_hp_scalar_VEC_FMA_24( uint64 iterations );
-float test_hp_scalar_VEC_FMA_48( uint64 iterations );
+float test_hp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp );
+float test_hp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp );
+float test_hp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp );
 #endif
 
-float test_sp_scalar_VEC_FMA_12( uint64 iterations );
-float test_sp_scalar_VEC_FMA_24( uint64 iterations );
-float test_sp_scalar_VEC_FMA_48( uint64 iterations );
-
-double test_dp_scalar_VEC_FMA_12( uint64 iterations );
-double test_dp_scalar_VEC_FMA_24( uint64 iterations );
-double test_dp_scalar_VEC_FMA_48( uint64 iterations );
+float test_sp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp );
+float test_sp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp );
+float test_sp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp );
+
+double test_dp_scalar_VEC_FMA_12( uint64 iterations, int EventSet, FILE *fp );
+double test_dp_scalar_VEC_FMA_24( uint64 iterations, int EventSet, FILE *fp );
+double test_dp_scalar_VEC_FMA_48( uint64 iterations, int EventSet, FILE *fp );
 
diff -pruN 7.2.0~b2-1/src/high-level/papi_hl.c 7.2.0-1/src/high-level/papi_hl.c
--- 7.2.0~b2-1/src/high-level/papi_hl.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/high-level/papi_hl.c	2025-06-25 22:38:10.000000000 +0000
@@ -167,7 +167,7 @@ static int _internal_hl_checkCounter ( c
 static int _internal_hl_determine_rank();
 static char *_internal_hl_remove_spaces( char *str, int mode );
 static int _internal_hl_determine_default_events();
-static int _internal_hl_read_user_events();
+static int _internal_hl_read_user_events(const char *user_events);
 static int _internal_hl_new_component(int component_id, components_t *component);
 static int _internal_hl_add_event_to_component(char *event_name, int event,
                                         short event_type, components_t *component);
diff -pruN 7.2.0~b2-1/src/libpfm4/docs/man3/libpfm_intel_knl.3 7.2.0-1/src/libpfm4/docs/man3/libpfm_intel_knl.3
--- 7.2.0~b2-1/src/libpfm4/docs/man3/libpfm_intel_knl.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/docs/man3/libpfm_intel_knl.3	2025-06-25 22:38:10.000000000 +0000
@@ -1,15 +1,15 @@
 .TH LIBPFM 3  "July, 2016" "" "Linux Programmer's Manual"
 .SH NAME
-libpfm_intel_knl - support for Intel Kinghts Landing core PMU
+libpfm_intel_knl - support for Intel Knights Landing core PMU
 .SH SYNOPSIS
 .nf
 .B #include <perfmon/pfmlib.h>
 .sp
 .B PMU name: knl
-.B PMU desc: Intel Kinghts Landing
+.B PMU desc: Intel Knights Landing
 .sp
 .SH DESCRIPTION
-The library supports the Intel Kinghts Landing core PMU. It should be noted that
+The library supports the Intel Knights Landing core PMU. It should be noted that
 this PMU model only covers each core's PMU and not the socket level PMU.
 
 On Knights Landing, the number of generic counters is 4. There is 4-way HyperThreading support.
@@ -17,7 +17,7 @@ The \fBpfm_get_pmu_info()\fR function re
 in \fBnum_cntrs\fr.
 
 .SH MODIFIERS
-The following modifiers are supported on Intel Kinghts Landing processors:
+The following modifiers are supported on Intel Knights Landing processors:
 .TP
 .B u
 Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR.
diff -pruN 7.2.0~b2-1/src/libpfm4/docs/man3/libpfm_intel_knm.3 7.2.0-1/src/libpfm4/docs/man3/libpfm_intel_knm.3
--- 7.2.0~b2-1/src/libpfm4/docs/man3/libpfm_intel_knm.3	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/docs/man3/libpfm_intel_knm.3	2025-06-25 22:38:10.000000000 +0000
@@ -6,10 +6,10 @@ libpfm_intel_knm - support for Intel Kni
 .B #include <perfmon/pfmlib.h>
 .sp
 .B PMU name: knm
-.B PMU desc: Intel Kinghts Mill
+.B PMU desc: Intel Knights Mill
 .sp
 .SH DESCRIPTION
-The library supports the Intel Kinghts Mill core PMU. It should be noted that
+The library supports the Intel Knights Mill core PMU. It should be noted that
 this PMU model only covers each core's PMU and not the socket level PMU.
 
 On Knights Mill, the number of generic counters is 4. There is 4-way HyperThreading support.
@@ -17,7 +17,7 @@ The \fBpfm_get_pmu_info()\fR function re
 in \fBnum_cntrs\fr.
 
 .SH MODIFIERS
-The following modifiers are supported on Intel Kinghts Mill processors:
+The following modifiers are supported on Intel Knights Mill processors:
 .TP
 .B u
 Measure at user level which includes privilege levels 1, 2, 3. This corresponds to \fBPFM_PLM3\fR.
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/events/amd64_events_fam1ah_zen5.h 7.2.0-1/src/libpfm4/lib/events/amd64_events_fam1ah_zen5.h
--- 7.2.0~b2-1/src/libpfm4/lib/events/amd64_events_fam1ah_zen5.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/events/amd64_events_fam1ah_zen5.h	2025-06-25 22:38:10.000000000 +0000
@@ -1502,7 +1502,7 @@ static const amd64_umask_t amd64_fam1ah_
     .udesc  = "Fill from L3 or different L2 in same CCX",
     .ucode  = 0x2,
   },
-  { .uname  = "NEAR_FAR_CACHE_NEAR",
+  { .uname  = "NEAR_CACHE",
     .udesc  = "Fill from cache of different CCX in same node",
     .ucode  = 0x4,
   },
@@ -1510,7 +1510,7 @@ static const amd64_umask_t amd64_fam1ah_
     .udesc  = "Fill from DRAM or IO connected to same node",
     .ucode  = 0x8,
   },
-  { .uname  = "NEAR_FAR_CACHE_FAR",
+  { .uname  = "FAR_CACHE",
     .udesc  = "Fill from CCX cache in different node",
     .ucode  = 0x10,
   },
@@ -1518,7 +1518,7 @@ static const amd64_umask_t amd64_fam1ah_
     .udesc  = "Fill from DRAM or IO connected from a different node (same socket or remote)",
     .ucode  = 0x40,
   },
-  { .uname  = "ALT_MEM_NEAR_FAR",
+  { .uname  = "ALT_MEM",
     .udesc  = "Fill from Extension Memory",
     .ucode  = 0x80,
   },
@@ -2170,8 +2170,8 @@ static const amd64_entry_t amd64_fam1ah_
     .code    = 0x165,
     .flags   = 0,
     .ngrp    = 1,
-    .numasks = LIBPFM_ARRAY_SIZE(amd64_fam1ah_zen5_l2_prefetch_hit_l2),
-    .umasks = amd64_fam1ah_zen5_l2_prefetch_hit_l2,  /* shared */
+    .numasks = LIBPFM_ARRAY_SIZE(amd64_fam1ah_zen5_l2_fill_resp_src),
+    .umasks = amd64_fam1ah_zen5_l2_fill_resp_src,
   },
   { .name    = "L2_PREFETCH_MISS_L3",
     .desc    = "Number of L2 prefetches accepted by the L2 pipeline which miss the L2 and the L3 caches",
@@ -2179,7 +2179,7 @@ static const amd64_entry_t amd64_fam1ah_
     .code    = 0x72,
     .flags   = 0,
     .ngrp    = 1,
-    .numasks = LIBPFM_ARRAY_SIZE(amd64_fam1ah_zen5_l2_fill_resp_src),
-    .umasks = amd64_fam1ah_zen5_l2_fill_resp_src,
+    .numasks = LIBPFM_ARRAY_SIZE(amd64_fam1ah_zen5_l2_prefetch_hit_l2),
+    .umasks = amd64_fam1ah_zen5_l2_prefetch_hit_l2,  /* shared */
   },
 };
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm.c	2025-06-25 22:38:10.000000000 +0000
@@ -41,53 +41,89 @@ const pfmlib_attr_desc_t arm_mods[]={
 };
 
 pfm_arm_config_t pfm_arm_cfg = {
-	.implementer  = -1,
-	.architecture = -1,
-	.part	      = -1,
+	.init_cpuinfo_done = 0,
 };
 
-#ifdef CONFIG_PFMLIB_OS_LINUX
+#define MAX_ARM_CPUIDS	8
+
+static arm_cpuid_t arm_cpuids[MAX_ARM_CPUIDS];
+static int num_arm_cpuids;
+
+static int pfmlib_find_arm_cpuid(arm_cpuid_t *attr, arm_cpuid_t *match_attr)
+{
+	int i;
+
+	if (attr == NULL)
+		return PFM_ERR_NOTFOUND;
+
+	for (i=0; i < num_arm_cpuids; i++) {
+#if 0
 /*
- * helper function to retrieve one value from /proc/cpuinfo
- * for internal libpfm use only
- * attr: the attribute (line) to look for
- * ret_buf: a buffer to store the value of the attribute (as a string)
- * maxlen : number of bytes of capacity in ret_buf
- *
- * ret_buf is null terminated.
- *
- * Return:
- * 	0 : attribute found, ret_buf populated
- * 	-1: attribute not found
+ * disabled due to issues with expected arch vs. reported
+ * arch by the Linux kernel cpuinfo
  */
+		if (arm_cpuids[i].arch != attr->arch)
+			continue;
+#endif
+		if (arm_cpuids[i].impl != attr->impl)
+			continue;
+		if (arm_cpuids[i].part != attr->part)
+			continue;
+		if (match_attr)
+			*match_attr = arm_cpuids[i];
+		return PFM_SUCCESS;
+	}
+	return PFM_ERR_NOTSUPP;
+}
 
+#ifdef CONFIG_PFMLIB_OS_LINUX
+/*
+ * Function populates the arm_cpuidsp[] table with each unique
+ * core identifications found on the host. In the case of hybrids
+ * that number is greater than 1
+ */
 static int
-pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen)
+pfmlib_init_cpuids(void)
 {
+	arm_cpuid_t attr = {0, };
 	FILE *fp = NULL;
 	int ret = -1;
-	size_t attr_len, buf_len = 0;
+	size_t buf_len = 0;
 	char *p, *value = NULL;
 	char *buffer = NULL;
+	int nattrs = 0;
 
-	if (attr == NULL || ret_buf == NULL || maxlen < 1)
-		return -1;
+	if (pfm_arm_cfg.init_cpuinfo_done == 1)
+		return PFM_SUCCESS;
 
-	attr_len = strlen(attr);
-
-	fp = fopen("/proc/cpuinfo", "r");
-	if (fp == NULL)
-		return -1;
+	fp = fopen(pfm_cfg.proc_cpuinfo, "r");
+	if (fp == NULL) {
+		DPRINT("pfmlib_init_cpuids: cannot open %s\n", pfm_cfg.proc_cpuinfo);
+		return PFM_ERR_NOTFOUND;
+	}
 
 	while(pfmlib_getl(&buffer, &buf_len, fp) != -1){
+		if (nattrs == ARM_NUM_ATTR_FIELDS) {
+			if (pfmlib_find_arm_cpuid(&attr, NULL) != PFM_SUCCESS) {
+				/* must add */
+				if (num_arm_cpuids == MAX_ARM_CPUIDS) {
+					DPRINT("pfmlib_init_cpuids: too many cpuids num_arm_cpuids=%d\n", num_arm_cpuids);
+					ret = PFM_ERR_TOOMANY;
+					goto error;
+				}
+				arm_cpuids[num_arm_cpuids++] = attr;
+				__pfm_vbprintf("Detected ARM CPU impl=0x%x arch=%d part=0x%x\n", attr.impl, attr.arch, attr.part);
+			}
+			nattrs = 0;
+		}
 
 		/* skip  blank lines */
-		if (*buffer == '\n')
+		if (*buffer == '\n' || *buffer == '\r')
 			continue;
 
 		p = strchr(buffer, ':');
 		if (p == NULL)
-			goto error;
+			continue;
 
 		/*
 		 * p+2: +1 = space, +2= firt character
@@ -98,20 +134,38 @@ pfmlib_getcpuinfo_attr(const char *attr,
 
 		value[strlen(value)-1] = '\0';
 
-		if (!strncmp(attr, buffer, attr_len))
-			break;
+		if (!strncmp("CPU implementer", buffer, 15)) {
+			attr.impl = strtoul(value, NULL, 0);
+			nattrs++;
+			continue;
+		}
+		if (!strncmp("CPU architecture", buffer, 16)) {
+			attr.arch = strtoul(value, NULL, 0);
+			nattrs++;
+			continue;
+		}
+		if (!strncmp("CPU part", buffer, 8)) {
+			attr.part = strtoul(value, NULL, 0);
+			nattrs++;
+			continue;
+		}
 	}
-	strncpy(ret_buf, value, maxlen-1);
-	ret_buf[maxlen-1] = '\0';
-	ret = 0;
+	ret = PFM_SUCCESS;
+	DPRINT("num_arm_cpuids=%d\n", num_arm_cpuids);
 error:
+	for (nattrs = 0; nattrs < num_arm_cpuids; nattrs++) {
+		DPRINT("cpuids[%d] = impl=0x%x arch=%d part=0x%x\n", nattrs, arm_cpuids[nattrs].impl, arm_cpuids[nattrs].arch, arm_cpuids[nattrs].part);
+	}
+	pfm_arm_cfg.init_cpuinfo_done = 1;
+
 	free(buffer);
 	fclose(fp);
+
 	return ret;
 }
 #else
 static int
-pfmlib_getcpuinfo_attr(const char *attr, char *ret_buf, size_t maxlen)
+pfmlib_init_cpuids(void)
 {
 	return -1;
 }
@@ -151,34 +205,15 @@ pfm_arm_display_reg(void *this, pfmlib_e
 }
 
 int
-pfm_arm_detect(void *this)
+pfm_arm_detect(arm_cpuid_t *attr, arm_cpuid_t *match_attr)
 {
-
 	int ret;
-	char buffer[128];
 
-	if (pfm_arm_cfg.implementer == -1) {
-		ret = pfmlib_getcpuinfo_attr("CPU implementer", buffer, sizeof(buffer));
-		if (ret == -1)
-			return PFM_ERR_NOTSUPP;
-		pfm_arm_cfg.implementer = strtol(buffer, NULL, 16);
-	}
-   
-	if (pfm_arm_cfg.part == -1) {
-		ret = pfmlib_getcpuinfo_attr("CPU part", buffer, sizeof(buffer));
-		if (ret == -1)
-			return PFM_ERR_NOTSUPP;
-		pfm_arm_cfg.part = strtol(buffer, NULL, 16);
-	}
+	ret = pfmlib_init_cpuids();
+	if (ret != PFM_SUCCESS)
+		return PFM_ERR_NOTSUPP;
 
-	if (pfm_arm_cfg.architecture == -1) {
-		ret = pfmlib_getcpuinfo_attr("CPU architecture", buffer, sizeof(buffer));
-		if (ret == -1)
-			return PFM_ERR_NOTSUPP;
-		pfm_arm_cfg.architecture = strtol(buffer, NULL, 16);
-	}
-   
-	return PFM_SUCCESS;
+	return pfmlib_find_arm_cpuid(attr, match_attr);
 }
 
 int
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv6.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv6.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv6.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv6.c	2025-06-25 22:38:10.000000000 +0000
@@ -37,24 +37,17 @@
 static int
 pfm_arm_detect_1176(void *this)
 {
+	/* ARM 1176 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 7, .part = 0xb76 };
 
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-			(pfm_arm_cfg.part==0xb76)) { /* 1176 */
-		return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 /* ARM1176 support */
 pfmlib_pmu_t arm_1176_support={
 	.desc			= "ARM1176",
 	.name			= "arm_1176",
+	.perf_name		= "armv6_1176",
 	.pmu			= PFM_PMU_ARM_1176,
 	.pme_count		= LIBPFM_ARRAY_SIZE(arm_1176_pe),
 	.type			= PFM_PMU_TYPE_CORE,
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv7_pmuv1.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv7_pmuv1.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv7_pmuv1.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv7_pmuv1.c	2025-06-25 22:38:10.000000000 +0000
@@ -42,93 +42,61 @@
 static int
 pfm_arm_detect_cortex_a7(void *this)
 {
+	/* ARM Cortex A7 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 7, .part = 0xc07 };
 
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-			(pfm_arm_cfg.part == 0xc07)) { /* Cortex-A7 */
-		return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a8(void *this)
 {
+	/* ARM Cortex A8 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 7, .part = 0xc08 };
 
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-			(pfm_arm_cfg.part == 0xc08)) { /* Cortex-A8 */
-		return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a9(void *this)
 {
+	/* ARM Cortex A9 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 7, .part = 0xc09 };
 
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-			(pfm_arm_cfg.part==0xc09)) { /* Cortex-A8 */
-		return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a15(void *this)
 {
+	/* ARM Cortex A15 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 7, .part = 0xc0f };
 
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-			(pfm_arm_cfg.part==0xc0f)) { /* Cortex-A15 */
-		return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_krait(void *this)
 {
-
+	/* Qualcomm Krait */
+	/* Check that [15:10] of midr is 0x01 which	*/
+	/* indicates Krait rather than Scorpion	CPU	*/
+	/* match_attr.part is (midr>>4)&0xfff		*/
+	/* if (pfm_arm_cfg.part >> 6 == 0x1) {		*/
+	/*	return PFM_SUCCESS;			*/
+	arm_cpuid_t attr = { .impl = 0x51, .arch = 7, .part = 1 << 6 };
+	arm_cpuid_t match_attr;
 	int ret;
 
-	ret = pfm_arm_detect(this);
+	ret = pfm_arm_detect(&attr, &match_attr);
 	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
+		return ret;
 
-	/* Check for Qualcomm */
-	if (pfm_arm_cfg.implementer == 0x51) {
-		/* Check that [15:10] of midr is 0x01 which	*/
-		/* indicates Krait rather than Scorpion	CPU	*/
-		/* pfm_arm_cfg.part is (midr>>4)&0xfff		*/
-		if (pfm_arm_cfg.part >> 6 == 0x1) {
-			return PFM_SUCCESS;
-		}
-	}
-	return PFM_ERR_NOTSUPP;
-}
+	if ((match_attr.part >> 6) == 0x1)
+		return PFM_SUCCESS;
 
+	return PFM_ERR_NOTFOUND;
+}
 
 /* Cortex A7 support */
 pfmlib_pmu_t arm_cortex_a7_support={
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8.c	2025-06-25 22:38:10.000000000 +0000
@@ -47,187 +47,115 @@
 static int
 pfm_arm_detect_n1(void *this)
 {
-	int ret;
+	/* Neoverse N1 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd0c };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd0c)) { /* Neoverse N1 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_v1(void *this)
 {
-	int ret;
+	/* Neoverse V1 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd40 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd40)) { /* Neoverse V1 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a57(void *this)
 {
-	int ret;
+	/* Cortex A57 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd07 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd07)) { /* Cortex A57 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a72(void *this)
 {
-	int ret;
+	/* Cortex A72 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd08 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd08)) { /* Cortex A57 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a53(void *this)
 {
-	int ret;
+	/* Cortex A53 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd03 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd03)) { /* Cortex A53 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a55(void *this)
 {
-	int ret;
+	/* Cortex A55 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd05 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd05)) { /* Cortex A55 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_cortex_a76(void *this)
 {
-	int ret;
+	/* Cortex A76 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 8, .part = 0xd0b };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd0b)) { /* Cortex A76 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_xgene(void *this)
 {
-	int ret;
+	/* Applied Micro X-Gene */
+	arm_cpuid_t attr = { .impl = 0x50, .arch = 8, .part = 0x0 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x50) && /* Applied Micro */
-		(pfm_arm_cfg.part == 0x000)) { /* Applied Micro X-Gene */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_thunderx2(void *this)
 {
+	/* Broadcom Thunder X2*/
+	arm_cpuid_t attr = { .impl = 0x42, .arch = 8, .part = 0x516 };
 	int ret;
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x42) && /* Broadcom */
-		(pfm_arm_cfg.part == 0x516)) { /* Thunder2x */
-			return PFM_SUCCESS;
-	}
-	if ((pfm_arm_cfg.implementer == 0x43) && /* Cavium */
-		(pfm_arm_cfg.part == 0xaf)) { /* Thunder2x */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	ret = pfm_arm_detect(&attr, NULL);
+	if (ret == PFM_SUCCESS)
+		return ret;
+
+	/* Cavium Thunder X2 */
+	attr.impl = 0x43;
+	attr.part = 0xaf;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_a64fx(void *this)
 {
-	int ret;
+	/* Fujitsu a64fx */
+	arm_cpuid_t attr = { .impl = 0x46, .arch = 8, .part = 0x001 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x46) && /* Fujitsu */
-		(pfm_arm_cfg.part == 0x001)) { /* a64fx */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_hisilicon_kunpeng(void *this)
 {
-	int ret;
+	/* Hisilicon Kunpeng */
+	arm_cpuid_t attr = { .impl = 0x48, .arch = 8, .part = 0xd01 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x48) && /* Hisilicon */
-	    (pfm_arm_cfg.part == 0xd01)) { /* Kunpeng */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 /* ARM Cortex A57 support */
 pfmlib_pmu_t arm_cortex_a57_support={
 	.desc			= "ARM Cortex A57",
 	.name			= "arm_ac57",
+	.perf_name              = "armv8_cortex_a57,armv8_pmuv3_0,armv8_pmuv3",
 	.pmu			= PFM_PMU_ARM_CORTEX_A57,
 	.pme_count		= LIBPFM_ARRAY_SIZE(arm_cortex_a57_pe),
 	.type			= PFM_PMU_TYPE_CORE,
@@ -254,6 +182,7 @@ pfmlib_pmu_t arm_cortex_a57_support={
 pfmlib_pmu_t arm_cortex_a72_support={
 	.desc			= "ARM Cortex A72",
 	.name			= "arm_ac72",
+	.perf_name		= "armv8_cortex_a72,armv8_pmuv3_0",
 	.pmu			= PFM_PMU_ARM_CORTEX_A72,
 	.pme_count		= LIBPFM_ARRAY_SIZE(arm_cortex_a57_pe), /* shared with a57 */
 	.type			= PFM_PMU_TYPE_CORE,
@@ -280,6 +209,7 @@ pfmlib_pmu_t arm_cortex_a72_support={
 pfmlib_pmu_t arm_cortex_a53_support={
 	.desc			= "ARM Cortex A53",
 	.name			= "arm_ac53",
+	.perf_name		= "armv8_cortex_a53",
 	.pmu			= PFM_PMU_ARM_CORTEX_A53,
 	.pme_count		= LIBPFM_ARRAY_SIZE(arm_cortex_a53_pe),
 	.type			= PFM_PMU_TYPE_CORE,
@@ -386,6 +316,7 @@ pfmlib_pmu_t arm_xgene_support={
 pfmlib_pmu_t arm_thunderx2_support={
 	.desc			= "Cavium ThunderX2",
 	.name			= "arm_thunderx2",
+	.perf_name		= "armv8_cavium_thunder",
 	.pmu			= PFM_PMU_ARM_THUNDERX2,
 	.pme_count		= LIBPFM_ARRAY_SIZE(arm_thunderx2_pe),
 	.type			= PFM_PMU_TYPE_CORE,
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8_kunpeng_unc.c	2025-06-25 22:38:10.000000000 +0000
@@ -36,17 +36,9 @@
 static int
 pfm_arm_detect_hisilicon_kunpeng(void *this)
 {
-	int ret;
-
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x48) && /* Hisilicon */
-	    (pfm_arm_cfg.part == 0xd01)) { /* Kunpeng */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	/* Hisilicon Kunpeng */
+	arm_cpuid_t attr = { .impl = 0x48, .arch = 8, .part = 0xd01 };
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static void
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv8_thunderx2_unc.c	2025-06-25 22:38:10.000000000 +0000
@@ -36,21 +36,19 @@
 static int
 pfm_arm_detect_thunderx2(void *this)
 {
+	/* Broadcom Thunder X2*/
+	arm_cpuid_t attr = { .impl = 0x42, .arch = 8, .part = 0x516 };
 	int ret;
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
+	ret = pfm_arm_detect(&attr, NULL);
+	if (ret == PFM_SUCCESS)
+		return ret;
 
-	if ((pfm_arm_cfg.implementer == 0x42) && /* Broadcom */
-		(pfm_arm_cfg.part == 0x516)) { /* Thunder2x */
-			return PFM_SUCCESS;
-	}
-	if ((pfm_arm_cfg.implementer == 0x43) && /* Cavium */
-		(pfm_arm_cfg.part == 0xaf)) { /* Thunder2x */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	/* Cavium Thunder X2 */
+	attr.impl = 0x43;
+	attr.part = 0xaf;
+
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv9.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv9.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_armv9.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_armv9.c	2025-06-25 22:38:10.000000000 +0000
@@ -42,81 +42,46 @@
 static int
 pfm_arm_detect_n2(void *this)
 {
-	int ret;
+	/* ARM Neoverse N2 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 9, .part = 0xd49 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd49)) { /* Neoverse N2 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_n3(void *this)
 {
-	int ret;
+	/* ARM Neoverse N3 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 9, .part = 0xd8e };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd8e)) { /* Neoverse N3 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_v2(void *this)
 {
-	int ret;
+	/* ARM Neoverse V2 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 9, .part = 0xd4f };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd4f)) { /* Neoverse V2 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_v3(void *this)
 {
-	int ret;
+	/* ARM Neoverse V3 */
+	arm_cpuid_t attr = { .impl = 0x41, .arch = 9, .part = 0xd84 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x41) && /* ARM */
-		(pfm_arm_cfg.part == 0xd84)) { /* Neoverse V3 */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 static int
 pfm_arm_detect_monaka(void *this)
 {
-	int ret;
+	/* Fujitsu Monaka */
+	arm_cpuid_t attr = { .impl = 0x46, .arch = 9, .part = 0x3 };
 
-	ret = pfm_arm_detect(this);
-	if (ret != PFM_SUCCESS)
-		return PFM_ERR_NOTSUPP;
-
-	if ((pfm_arm_cfg.implementer == 0x46) && /* Fujitsu */
-		(pfm_arm_cfg.part == 0x003)) { /* monaka */
-			return PFM_SUCCESS;
-	}
-	return PFM_ERR_NOTSUPP;
+	return pfm_arm_detect(&attr, NULL);
 }
 
 pfmlib_pmu_t arm_n2_support={
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_perf_event.c 7.2.0-1/src/libpfm4/lib/pfmlib_arm_perf_event.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_perf_event.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_perf_event.c	2025-06-25 22:38:10.000000000 +0000
@@ -36,6 +36,7 @@ pfm_arm_get_perf_encoding(void *this, pf
 	pfmlib_pmu_t *pmu = this;
 	pfm_arm_reg_t reg;
 	struct perf_event_attr *attr = e->os_data;
+	int type;
 	int ret;
 
 	if (!pmu->get_event_encoding[PFM_OS_NONE])
@@ -49,11 +50,20 @@ pfm_arm_get_perf_encoding(void *this, pf
 		return ret;
 
 	if (e->count > 1) {
-		DPRINT("%s: unsupported count=%d\n", e->count);
+		DPRINT("unsupported count=%d\n", e->count);
 		return PFM_ERR_NOTSUPP;
 	}
-
-	attr->type = PERF_TYPE_RAW;
+	/*
+	 * To eliminate the issue of PERF_TYPE_RAW not working
+	 * for hybrid because the attr needs to encode the actual
+	 * PMU type, then we simply extract the actual PMU type
+	 * from sysfs.
+	 */
+	if (pfm_perf_find_pmu_type(pmu, &type) != PFM_SUCCESS) {
+		DPRINT("cannot determine PMU type for %s\n", pmu->name);
+		return PFM_ERR_NOTSUPP;
+	}
+	attr->type = type;
 	reg.val = e->codes[0];
 	/*
 	 * suppress the bits which are under the control of perf_events.
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_priv.h 7.2.0-1/src/libpfm4/lib/pfmlib_arm_priv.h
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_arm_priv.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_arm_priv.h	2025-06-25 22:38:10.000000000 +0000
@@ -54,14 +54,20 @@ typedef union pfm_arm_reg {
 } pfm_arm_reg_t;
 
 typedef struct {
-	int implementer;
-	int architecture;
-	int part;
+	int init_cpuinfo_done;
 } pfm_arm_config_t;
 
 extern pfm_arm_config_t pfm_arm_cfg;
 
-extern int pfm_arm_detect(void *this);
+typedef struct {
+	int impl;
+	int arch;
+	int part;
+	/* if number of fields altered, update ARM_NUM_ATTR_FIELDS */
+} arm_cpuid_t;
+#define ARM_NUM_ATTR_FIELDS 3 /* number of fields on arm_cpuid_t */
+
+extern int pfm_arm_detect(arm_cpuid_t *attr, arm_cpuid_t *match_attr);
 extern int pfm_arm_get_encoding(void *this, pfmlib_event_desc_t *e);
 extern int pfm_arm_get_event_first(void *this);
 extern int pfm_arm_get_event_next(void *this, int idx);
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_common.c 7.2.0-1/src/libpfm4/lib/pfmlib_common.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_common.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_common.c	2025-06-25 22:38:10.000000000 +0000
@@ -988,7 +988,8 @@ pfmlib_getl(char **buffer, size_t *len,
 		if (c == '\n')
 			break;
 	}
-	b[i] = '\0';
+	if (c != EOF)
+		b[i] = '\0';
 	return c != EOF ? 0 : -1;
 }
 
@@ -1214,9 +1215,13 @@ pfmlib_init_env(void)
 	if (str)
 		pfm_cfg.blacklist_pmus = str;
 
+#ifdef CONFIG_PFMLIB_OS_LINUX
 	str = getenv("LIBPFM_PROC_CPUINFO");
 	if (str)
 		pfm_cfg.proc_cpuinfo = str;
+	else
+		pfm_cfg.proc_cpuinfo = "/proc/cpuinfo";
+#endif
 }
 
 static int
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_intel_x86_perf_event.c 7.2.0-1/src/libpfm4/lib/pfmlib_intel_x86_perf_event.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_intel_x86_perf_event.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_intel_x86_perf_event.c	2025-06-25 22:38:10.000000000 +0000
@@ -33,31 +33,6 @@
 #include "pfmlib_perf_event_priv.h"
 
 static int
-find_pmu_type_by_name(const char *name)
-{
-	char filename[PATH_MAX];
-	FILE *fp;
-	int ret, type;
-
-	if (!name)
-		return PFM_ERR_NOTSUPP;
-
-	sprintf(filename, "/sys/bus/event_source/devices/%s/type", name);
-
-	fp = fopen(filename, "r");
-	if (!fp)
-		return PFM_ERR_NOTSUPP;
-
-	ret = fscanf(fp, "%d", &type);
-	if (ret != 1)
-		type = PFM_ERR_NOTSUPP;
-
-	fclose(fp);
-
-	return type;
-}
-
-static int
 has_ldlat(void *this, pfmlib_event_desc_t *e)
 {
 	pfmlib_event_attr_info_t *a;
@@ -109,8 +84,9 @@ pfm_intel_x86_get_perf_encoding(void *th
 	 * This allows this function to use used by some uncore PMUs
 	 */
 	if (pmu->perf_name) {
-		int type = find_pmu_type_by_name(pmu->perf_name);
-		if (type == PFM_ERR_NOTSUPP) {
+		int type;
+		ret = pfm_perf_find_pmu_type(pmu, &type);
+		if (ret != PFM_SUCCESS) {
 			DPRINT("perf PMU %s, not supported by OS\n", pmu->perf_name);
 		} else {
 			DPRINT("PMU %s perf type=%d\n", pmu->name, type);
@@ -191,7 +167,7 @@ pfm_intel_nhm_unc_get_perf_encoding(void
 	pfmlib_pmu_t *pmu = this;
 	struct perf_event_attr *attr = e->os_data;
 	pfm_intel_x86_reg_t reg;
-	int ret;
+	int ret, type;
 
 	if (!pmu->get_event_encoding[PFM_OS_NONE])
 		return PFM_ERR_NOTSUPP;
@@ -200,11 +176,11 @@ pfm_intel_nhm_unc_get_perf_encoding(void
 	if (ret != PFM_SUCCESS)
 		return ret;
 
-	ret = find_pmu_type_by_name(pmu->perf_name);
-	if (ret < 0)
+	ret = pfm_perf_find_pmu_type(pmu, &type);
+	if (ret != PFM_SUCCESS)
 		return ret;
 
-	attr->type = ret;
+	attr->type = type;
 
 	reg.val = e->codes[0];
 
@@ -362,6 +338,6 @@ pfm_intel_x86_perf_detect(void *this)
 	pfmlib_pmu_t *pmu = this;
 	char file[64];
 
-	snprintf(file,sizeof(file), "/sys/devices/%s", pmu->perf_name);
+	snprintf(file,sizeof(file), "%s/%s", SYSFS_PMU_DEVICES_DIR, pmu->perf_name);
 	return access(file, R_OK|X_OK) ? PFM_ERR_NOTSUPP : PFM_SUCCESS;
 }
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event.c 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event.c	2025-06-25 22:38:10.000000000 +0000
@@ -26,12 +26,13 @@
 #include <string.h>
 #include <stdlib.h>
 #include <limits.h>
+#include <dirent.h>
 #include <perfmon/pfmlib_perf_event.h>
 
 #include "pfmlib_priv.h"
 #include "pfmlib_perf_event_priv.h"
 
-#define PERF_PROC_FILE "/proc/sys/kernel/perf_event_paranoid"
+#define PERF_PROC_FILE	"/proc/sys/kernel/perf_event_paranoid"
 
 #ifdef min
 #undef min
@@ -75,6 +76,15 @@ static const pfmlib_attr_desc_t perf_eve
 	PFM_ATTR_NULL /* end-marker to avoid exporting number of entries */
 };
 
+typedef struct sysfs_pmu_entry {
+	char *name;
+	int type;
+	int flags;
+} sysfs_pmu_entry_t;
+
+static sysfs_pmu_entry_t *sysfs_pmus;	/* cache os pmus available in sysfs */
+static int sysfs_npmus;			/* number of entries in sysfs_pmus */
+
 static int
 pfmlib_check_no_mods(pfmlib_event_desc_t *e)
 {
@@ -472,11 +482,196 @@ pfm_get_perf_event_encoding(const char *
 	return PFM_SUCCESS;
 }
 
+/*
+ * generic perf encoding helper
+ */
+static int
+pfmlib_perf_find_pmu_type_by_name(const char *perf_name, int *type)
+{
+	char filename[PATH_MAX];
+	FILE *fp;
+	int ret, tmp;
+	int retval = PFM_ERR_NOTFOUND;
+
+	if (!(perf_name && type))
+		return PFM_ERR_NOTSUPP;
+
+	snprintf(filename, PATH_MAX, "%s/%s/type", SYSFS_PMU_DEVICES_DIR, perf_name);
+
+	fp = fopen(filename, "r");
+	if (!fp)
+		return PFM_ERR_NOTSUPP;
+
+	ret = fscanf(fp, "%d", &tmp);
+
+	fclose(fp);
+
+	if (ret == 1) {
+		*type = tmp;
+		retval = PFM_SUCCESS;
+	}
+
+	return retval;
+}
+
+/*
+ * identify perf_events subdirectory
+ * via the presence of the mux interval config file
+ * Return:
+ * 1 : directory is a perf_events directory (match)
+ * 0 : directory is not a perf_events directory (match)
+ */
+static int
+filter_pmu_dir(const struct dirent *d)
+{
+	char fn[PATH_MAX];
+
+	if (d->d_name[0] == '.')
+		return 0;
+
+	if (d->d_type != DT_DIR && d->d_type != DT_LNK)
+		return 0;
+
+	snprintf(fn, PATH_MAX, "%s/%s/perf_event_mux_interval_ms", SYSFS_PMU_DEVICES_DIR, d->d_name);
+
+	return !access(fn, F_OK);
+}
+
+/*
+ * build a cache of PMUs available via sysfs
+ * to speedup lookup later on
+ */
+int
+pfm_init_sysfs_pmu_cache(void)
+{
+	struct dirent **dir_list = NULL;
+	int n, i, j, ret;
+	int type;
+
+	/* only initialize once (perf vs. perf_ext) */
+	if (sysfs_pmus)
+		return PFM_SUCCESS;
+
+	n = scandir(SYSFS_PMU_DEVICES_DIR, &dir_list, filter_pmu_dir, NULL);
+	if (n == 0) {
+		free(dir_list);
+		return PFM_ERR_NOTSUPP;
+	}
+
+	sysfs_pmus = (sysfs_pmu_entry_t *)malloc(n * sizeof(sysfs_pmu_entry_t));
+	if (!sysfs_pmus)
+		return PFM_ERR_NOMEM;
+
+	/*
+	 * cache perf_event PMU name and type (attr.type)
+	 */
+	for (i = j = 0; i < n; i++) {
+		sysfs_pmus[j].name = dir_list[i]->d_name;
+
+		ret = pfmlib_perf_find_pmu_type_by_name(sysfs_pmus[i].name, &type);
+		/* skip PMU if cannot get the type */
+		if (ret != PFM_SUCCESS) {
+			DPRINT("sysfs_pmus[%d]=%s failed to get PMU type from sysfs\n", j, sysfs_pmus[i].name);
+			continue;
+		}
+
+		sysfs_pmus[j].type = type;
+
+		DPRINT("sysf_pmus[%d]=%s type=%d\n", j, sysfs_pmus[i].name, sysfs_pmus[i].type);
+
+		j++;
+	}
+
+	sysfs_npmus = j;
+
+	free(dir_list);
+
+	return PFM_SUCCESS;
+}
+
 static int
 pfm_perf_event_os_detect(void *this)
 {
-	int ret = access(PERF_PROC_FILE, F_OK);
-	return ret ? PFM_ERR_NOTSUPP : PFM_SUCCESS;
+	if (access(PERF_PROC_FILE, F_OK))
+		return PFM_ERR_NOTSUPP;
+
+	return pfm_init_sysfs_pmu_cache();
+}
+
+static int
+pfmlib_perf_find_pmu_type(char *pmu_name, int *type)
+{
+	int i;
+
+	if (!sysfs_pmus)
+		return PFM_ERR_NOTFOUND;
+
+	for (i = 0; i < sysfs_npmus; i++) {
+		/* for now use exact match, add regexp later */
+		if (!strcmp(pmu_name, sysfs_pmus[i].name)) {
+			*type = sysfs_pmus[i].type;
+			return PFM_SUCCESS;
+		}
+	}
+	DPRINT("perf_find_pmu_type: cannot find PMU %s\n", pmu_name);
+	return PFM_ERR_NOTFOUND;
+}
+
+/*
+ * generic perf encoding helper
+ */
+int
+pfm_perf_find_pmu_type(void *this, int *type)
+{
+	pfmlib_pmu_t *pmu = this;
+	char *p, *s, *q;
+	int ret;
+
+
+	/*
+	 * if no perf_name specified, then the best
+	 * option is to use TYPE_RAW, i.e., the core PMU
+	 * which the caller is running on when invoking
+	 * perf_event_open()
+	 */
+	if (!pmu->perf_name) {
+		*type = PERF_TYPE_RAW;
+		DPRINT("No perf_name for %s, defaulting to TYPE_RAW\n", pmu->name);
+		return PFM_SUCCESS;
+	}
+
+	/*
+	 * perf_name may be a comma separated list of PMU names
+	 * so duplicate to split the string into PMU keywords
+	 */
+	s = q = strdup(pmu->perf_name);
+	if (!s) {
+		DPRINT("cannot dup perf_name for %s\n", pmu->perf_name);
+		return PFM_ERR_NOTSUPP;
+	}
+
+	ret = PFM_ERR_NOTFOUND;
+
+	while ((p = strchr(s, ','))) {
+
+		*p = '\0';
+
+		/* stop at first match */
+		ret = pfmlib_perf_find_pmu_type(s, type);
+		if (ret  == PFM_SUCCESS)
+			break;
+		s = p + 1;
+	}
+	/* only or last element of perf_name */
+	if (ret == PFM_ERR_NOTFOUND)
+		ret = pfmlib_perf_find_pmu_type(s, type);
+
+	free(q);
+
+	if (ret != PFM_SUCCESS) {
+		DPRINT("cannot find perf_events PMU type for %s perf_name=%s using PERF_TYPE_RAW\n", pmu->name, pmu->perf_name);
+	}
+	return ret;
 }
 
 pfmlib_os_t pfmlib_os_perf={
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event_pmu.c 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event_pmu.c
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event_pmu.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event_pmu.c	2025-06-25 22:38:10.000000000 +0000
@@ -568,7 +568,7 @@ event_exist(perf_event_t *e)
 {
 	char buf[PATH_MAX];
 
-	snprintf(buf, PATH_MAX, "/sys/devices/%s/events/%s", e->pmu ? e->pmu : "cpu", e->name);
+	snprintf(buf, PATH_MAX, "%s/%s/events/%s", SYSFS_PMU_DEVICES_DIR, e->pmu ? e->pmu : "cpu", e->name);
 
 	return access(buf, F_OK) == 0;
 }
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event_priv.h 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event_priv.h
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_perf_event_priv.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_perf_event_priv.h	2025-06-25 22:38:10.000000000 +0000
@@ -54,4 +54,8 @@
 
 #define PERF_PLM_ALL (PFM_PLM0|PFM_PLM3|PFM_PLMH)
 
+extern int pfm_perf_find_pmu_type(void *this, int *type);
+
+#define SYSFS_PMU_DEVICES_DIR	"/sys/bus/event_source/devices"
+
 #endif
diff -pruN 7.2.0~b2-1/src/libpfm4/lib/pfmlib_priv.h 7.2.0-1/src/libpfm4/lib/pfmlib_priv.h
--- 7.2.0~b2-1/src/libpfm4/lib/pfmlib_priv.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/libpfm4/lib/pfmlib_priv.h	2025-06-25 22:38:10.000000000 +0000
@@ -136,7 +136,7 @@ typedef struct pfmlib_node {
 typedef struct pfmlib_pmu {
 	const char 	*desc;			/* PMU description */
 	const char 	*name;			/* pmu short name */
-	const char	*perf_name;		/* perf_event pmu name (optional) */
+	const char	*perf_name;		/* (Linux optional): comma separated list of possible perf_events PMU names */
 	pfmlib_node_t   node;			/* active list node */
 	struct pfmlib_pmu *next_active;		/* active PMU link list */
 	struct pfmlib_pmu *prev_active;		/* active PMU link list */
diff -pruN 7.2.0~b2-1/src/linux-context.h 7.2.0-1/src/linux-context.h
--- 7.2.0~b2-1/src/linux-context.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/linux-context.h	2025-06-25 22:38:10.000000000 +0000
@@ -35,6 +35,8 @@ typedef ucontext_t hwd_ucontext_t;
 #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.arm_pc
 #elif defined(__aarch64__)
 #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc
+#elif defined(__loongarch64)
+#define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.__pc
 #elif defined(__mips__)
 #define OVERFLOW_ADDRESS(ctx) ctx.ucontext->uc_mcontext.pc
 #elif defined(__hppa__)
diff -pruN 7.2.0~b2-1/src/linux-timer.c 7.2.0-1/src/linux-timer.c
--- 7.2.0~b2-1/src/linux-timer.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/linux-timer.c	2025-06-25 22:38:10.000000000 +0000
@@ -246,6 +246,22 @@ get_cycles( void )
 }
 
 /************************/
+/* loongarch64 get_cycles() */
+/************************/
+
+#elif defined(__loongarch64)
+static inline long long
+get_cycles( void )
+{
+	int rid = 0;
+	unsigned long ret;
+
+	__asm__ __volatile__ ( "rdtime.d %0, %1" : "=r" (ret), "=r" (rid) );
+
+	return ret;
+}
+
+/************************/
 /* POWER get_cycles()   */
 /************************/
 
diff -pruN 7.2.0~b2-1/src/mb.h 7.2.0-1/src/mb.h
--- 7.2.0~b2-1/src/mb.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/mb.h	2025-06-25 22:38:10.000000000 +0000
@@ -39,6 +39,9 @@
 #elif defined(__aarch64__)
 #define rmb()           asm volatile("dmb ld" ::: "memory")
 
+#elif defined(__loongarch64)
+#define rmb()           __asm__ __volatile__("dbar 0" : : : "memory")
+
 #elif defined(__mips__)
 #define rmb()           asm volatile(                                   \
                                 ".set   mips2\n\t"                      \
diff -pruN 7.2.0~b2-1/src/papi.c 7.2.0-1/src/papi.c
--- 7.2.0~b2-1/src/papi.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi.c	2025-06-25 22:38:10.000000000 +0000
@@ -586,6 +586,10 @@ _rate_calls( float *real_time, float *pr
 
 extern hwi_presets_t user_defined_events[PAPI_MAX_USER_EVENTS];
 extern int user_defined_events_count;
+extern int num_all_presets;
+extern int _papi_hwi_start_idx[PAPI_NUM_COMP];
+extern int first_comp_with_presets;
+extern int first_comp_preset_idx;
 
 
 #ifdef DEBUG
@@ -1186,6 +1190,17 @@ PAPI_library_init( int version )
 		_in_papi_library_init_cnt--;
 		papi_return( init_retval );
 	}
+
+
+    /* Initialize component preset globals. */
+
+    tmp = _papi_hwi_init_global_presets();
+    if ( tmp ) {
+        init_retval = tmp;
+        _papi_hwi_shutdown_global_internal(  );
+        _in_papi_library_init_cnt--;
+        papi_return( init_retval );
+    }
 	
 	init_level = PAPI_LOW_LEVEL_INITED;
 	_in_papi_library_init_cnt--;
@@ -1240,10 +1255,16 @@ PAPI_query_event( int EventCode )
     APIDBG( "Entry: EventCode: %#x\n", EventCode);
 	if ( IS_PRESET(EventCode) ) {
 		EventCode &= PAPI_PRESET_AND_MASK;
-		if ( EventCode < 0 || EventCode >= PAPI_MAX_PRESET_EVENTS )
+		if ( EventCode < 0 || EventCode >= num_all_presets )
 			papi_return( PAPI_ENOTPRESET );
 
-		if ( _papi_hwi_presets[EventCode].count )
+        int preset_index = EventCode;
+        int compIdx = get_preset_cmp(&preset_index);
+        if( compIdx < 0 ) {
+            return PAPI_ENOEVNT;
+        }
+
+		if ( _papi_hwi_comp_presets[compIdx][preset_index].count )
 		        papi_return (PAPI_OK);
 		else
 			return PAPI_ENOEVNT;
@@ -1396,8 +1417,9 @@ PAPI_get_event_info( int EventCode, PAPI
 
 	if ( IS_PRESET(EventCode) ) {
            i = EventCode & PAPI_PRESET_AND_MASK;
-	   if ( i >= PAPI_MAX_PRESET_EVENTS )
+	   if ( i >= num_all_presets ) {
 	      papi_return( PAPI_ENOTPRESET );
+       }
 	   papi_return( _papi_hwi_get_preset_event_info( EventCode, info ) );
 	}
 
@@ -1474,13 +1496,24 @@ PAPI_event_code_to_name( int EventCode,
 
 	if ( IS_PRESET(EventCode) ) {
 		EventCode &= PAPI_PRESET_AND_MASK;
-		if ( EventCode < 0 || EventCode >= PAPI_MAX_PRESET_EVENTS )
+		if ( EventCode < 0 || EventCode >= num_all_presets )
 			papi_return( PAPI_ENOTPRESET );
 
-		if (_papi_hwi_presets[EventCode].symbol == NULL )
+        int preset_index = EventCode;
+        int compIdx = get_preset_cmp(&preset_index);
+        if( compIdx < 0 ) {
+            return PAPI_ENOEVNT;
+        }
+        if ( _papi_hwd[compIdx]->cmp_info.disabled == PAPI_EDELAY_INIT ) {
+            int junk;
+            _papi_hwd[compIdx]->ntv_enum_events(&junk, PAPI_ENUM_FIRST);
+        }
+
+
+		if (_papi_hwi_comp_presets[compIdx][preset_index].symbol == NULL )
 			papi_return( PAPI_ENOTPRESET );
 
-		strncpy( out, _papi_hwi_presets[EventCode].symbol, PAPI_MAX_STR_LEN-1 );
+		strncpy( out, _papi_hwi_comp_presets[compIdx][preset_index].symbol, PAPI_MAX_STR_LEN-1 );
 		out[PAPI_MAX_STR_LEN-1] = '\0';
 		papi_return( PAPI_OK );
 	}
@@ -1569,14 +1602,70 @@ PAPI_event_name_to_code( const char *in,
 	/* All presets start with "PAPI_" so no need to */
 	/* do an exhaustive search if that's not there  */
 	if (strncmp(in, "PAPI_", 5) == 0) {
-	   for(i = 0; i < PAPI_MAX_PRESET_EVENTS; i++ ) {
-	      if ( ( _papi_hwi_presets[i].symbol )
-		   && ( strcasecmp( _papi_hwi_presets[i].symbol, in ) == 0) ) {
-		 *out = ( int ) ( i | PAPI_PRESET_MASK );
-		 papi_return( PAPI_OK );
-	      }
-	   }
-	}
+
+       /* Split event name into base name and qualifier. */
+       int preset_idx = -1;
+       char *evt_name_copy = strdup(in);
+       if( NULL == evt_name_copy ) {
+           PAPIERROR("Failed to allocate space for preset buffer.\n");
+		   papi_return( PAPI_EINVAL );
+       }
+
+       char *evt_base_name = strtok(evt_name_copy, ":");
+       if( NULL == evt_base_name ) {
+           PAPIERROR("Failed to allocate space for base name of native event used in preset.\n");
+		   papi_return( PAPI_EINVAL );
+       }
+
+       /* Since the preset could live inside of either the CPU or component preset list,
+        * set the list pointer appropriately. */
+       hwi_presets_t *_papi_hwi_list = NULL;
+
+       /* Now check the component presets. */
+       int cmpnt, breakFlag = 0;
+       for(cmpnt = 0; cmpnt < PAPI_NUM_COMP; cmpnt++ ) {
+           _papi_hwi_list = _papi_hwi_comp_presets[cmpnt];
+           for(i = 0; i < _papi_hwi_max_presets[cmpnt]; i++ ) {
+               if ( ( _papi_hwi_list[i].symbol )
+                 && ( strcasecmp( _papi_hwi_list[i].symbol, evt_base_name ) == 0) ) {
+                     *out = ( int ) ( (i + _papi_hwi_start_idx[cmpnt]) | PAPI_PRESET_MASK );
+
+                     if ( _papi_hwd[cmpnt]->cmp_info.disabled == PAPI_EDELAY_INIT ) {
+                         int junk;
+                         _papi_hwd[cmpnt]->ntv_enum_events(&junk, PAPI_ENUM_FIRST);
+                     }
+
+                     preset_idx = i;
+                     breakFlag = 1;
+                     break;
+               }
+           }
+           /* Checks whether preset was found. */
+           if( breakFlag ) {
+               break;
+           }
+       }
+
+       free(evt_name_copy);
+
+       /* User may have provided an invalid event name. */
+       if( NULL != _papi_hwi_list ) {
+
+           /* Keep track of all qualifiers provided by the user. */
+           hwi_presets_t *prstPtr = &_papi_hwi_list[preset_idx];
+           int status = overwrite_qualifiers(prstPtr, in, 1);
+           if( status < 0 ) {
+               papi_return( PAPI_ENOMEM );
+           }
+
+           status = construct_qualified_event(prstPtr);
+           if( status < 0 ) {
+               papi_return( status );
+           }
+
+           papi_return( PAPI_OK );
+       }
+    }
 
 	// check to see if it is a user defined event
 	for ( i=0; i < user_defined_events_count ; i++ ) {
@@ -1664,6 +1753,9 @@ PAPI_event_name_to_code( const char *in,
  *	The following values are implemented for preset events
  *	<ul>
  *         <li> PAPI_PRESET_ENUM_AVAIL -- enumerate only available presets
+ *         <li> PAPI_PRESET_ENUM_CPU   -- enumerate CPU preset events
+ *         <li> PAPI_PRESET_ENUM_CPU_AVAIL -- enumerate available CPU preset events
+ *         <li> PAPI_PRESET_ENUM_FIRST_COMP -- enumerate first component preset event
  *	</ul>
  *
  *	@see PAPI @n
@@ -1687,42 +1779,107 @@ PAPI_enum_event( int *EventCode, int mod
 	cidx = _papi_hwi_component_index( *EventCode );
 	if (cidx < 0) return PAPI_ENOCMP;
 
-	/* Do we handle presets in componets other than CPU? */
-	/* if (( IS_PRESET(i) ) && cidx > 0 )) return PAPI_ENOCMP; */
-
     /* check to see if a valid modifier is provided */
     if (modifier != PAPI_ENUM_EVENTS &&
         modifier != PAPI_ENUM_FIRST &&
         modifier != PAPI_ENUM_ALL &&
         modifier != PAPI_PRESET_ENUM_AVAIL && 
+        modifier != PAPI_PRESET_ENUM_CPU && 
+        modifier != PAPI_PRESET_ENUM_CPU_AVAIL && 
+        modifier != PAPI_PRESET_ENUM_FIRST_COMP && 
         modifier != PAPI_NTV_ENUM_UMASKS && 
         modifier != PAPI_NTV_ENUM_UMASK_COMBOS)
         {
             return PAPI_EINVAL;
         }
 		
-	if ( IS_PRESET(i) ) {
-		if ( modifier == PAPI_ENUM_FIRST ) {
-			*EventCode = ( int ) PAPI_PRESET_MASK;
-			APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
-			return ( PAPI_OK );
-		}
-		i &= PAPI_PRESET_AND_MASK;
-		while ( ++i < PAPI_MAX_PRESET_EVENTS ) {
-			if ( _papi_hwi_presets[i].symbol == NULL ) {
-				APIDBG("EXIT: PAPI_ENOEVNT\n");
-				return ( PAPI_ENOEVNT );	/* NULL pointer terminates list */
-			}
-			if ( modifier & PAPI_PRESET_ENUM_AVAIL ) {
-				if ( _papi_hwi_presets[i].count == 0 )
-					continue;
-			}
-			*EventCode = ( int ) ( i | PAPI_PRESET_MASK );
-			APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
-			return ( PAPI_OK );
-		}
-		papi_return( PAPI_EINVAL );
-	}
+    /* If it is a component preset, it will be in a separate array. */
+    int preset_index;
+    hwi_presets_t *_papi_hwi_list;
+
+    if ( IS_PRESET(i) ) {
+
+        /* Set to the first preset. */
+        if ( modifier == PAPI_ENUM_FIRST ) {
+            *EventCode = ( int ) PAPI_PRESET_MASK;
+            APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+            return ( PAPI_OK );
+        }
+
+        i &= PAPI_PRESET_AND_MASK;
+
+        /* Iterate over all or all available presets. */
+        if ( modifier == PAPI_ENUM_EVENTS || modifier == PAPI_PRESET_ENUM_AVAIL ) {
+
+            if ( _papi_hwd[cidx]->cmp_info.disabled == PAPI_EDELAY_INIT ) {
+                int junk;
+                _papi_hwd[cidx]->ntv_enum_events(&junk, PAPI_ENUM_FIRST);
+            }
+
+            /* NULL pointer used to terminate the list. However, now we have
+             * more presets that exist beyond the bounds of the original
+             * array, so skip over the NULL entries. */
+            do {
+                if ( ++i >= num_all_presets ) {
+                    return ( PAPI_EINVAL );
+                }
+
+                /* Find the component to which the preset belongs and set the
+                 * preset index relative to the component's presets' index range. */
+                preset_index = i;
+                int compIdx = get_preset_cmp(&preset_index);
+                if( compIdx < 0 ) {
+                    return ( PAPI_ENOEVNT );
+                }
+
+                _papi_hwi_list = _papi_hwi_comp_presets[compIdx];
+
+            } while ( _papi_hwi_list[preset_index].symbol == NULL ||
+                      (modifier == PAPI_PRESET_ENUM_AVAIL && _papi_hwi_list[preset_index].count == 0) );
+
+            *EventCode = ( int ) ( i | PAPI_PRESET_MASK );
+            APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+            return ( PAPI_OK );
+        }
+
+        /* Set to the first component preset. */
+        if ( modifier == PAPI_PRESET_ENUM_FIRST_COMP ) {
+
+            preset_index = get_first_cmp_preset_idx();
+            if( preset_index < 0 ) {
+                return ( PAPI_ENOEVNT );
+            }
+
+            if ( _papi_hwd[first_comp_with_presets]->cmp_info.disabled == PAPI_EDELAY_INIT ) {
+                int junk;
+                _papi_hwd[first_comp_with_presets]->ntv_enum_events(&junk, PAPI_ENUM_FIRST);
+            }
+
+            *EventCode = ( int ) ( preset_index | PAPI_PRESET_MASK );
+            APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+            return ( PAPI_OK );
+        }
+
+        /* Iterate over CPU presets. */
+        if ( modifier == PAPI_PRESET_ENUM_CPU || modifier == PAPI_PRESET_ENUM_CPU_AVAIL ) {
+
+            while ( ++i < PAPI_MAX_PRESET_EVENTS ) {
+                if ( _papi_hwi_presets[i].symbol == NULL ) {
+                    APIDBG("EXIT: PAPI_ENOEVNT\n");
+                    return ( PAPI_ENOEVNT );    /* NULL pointer terminates list */
+                }
+                if ( modifier == PAPI_PRESET_ENUM_CPU_AVAIL
+                     && _papi_hwi_presets[i].count == 0 ) {
+                    continue;
+                }
+                *EventCode = ( int ) ( i | PAPI_PRESET_MASK );
+                APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+                return ( PAPI_OK );
+            }
+        }
+
+        papi_return( PAPI_EINVAL );
+    }
 
 	if ( IS_NATIVE(i) ) {
 	    // save event code so components can get it with call to: _papi_hwi_get_papi_event_code()
@@ -1899,7 +2056,7 @@ PAPI_enum_cmp_event( int *EventCode, int
 	int event_code;
 	char *evt_name;
 
-	if ( _papi_hwi_invalid_cmp(cidx) || ( (IS_PRESET(i)) && cidx > 0 ) ) {
+	if ( _papi_hwi_invalid_cmp(cidx) ) {
 		return PAPI_ENOCMP;
 	}
 
@@ -1908,28 +2065,50 @@ PAPI_enum_cmp_event( int *EventCode, int
 	  return PAPI_ENOCMP;
 	}
 
-	if ( IS_PRESET(i) ) {
-		if ( modifier == PAPI_ENUM_FIRST ) {
-			*EventCode = ( int ) PAPI_PRESET_MASK;
-			APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
-			return PAPI_OK;
-		}
-		i &= PAPI_PRESET_AND_MASK;
-		while ( ++i < PAPI_MAX_PRESET_EVENTS ) {
-			if ( _papi_hwi_presets[i].symbol == NULL ) {
-				APIDBG("EXIT: PAPI_ENOEVNT\n");
-				return ( PAPI_ENOEVNT );	/* NULL pointer terminates list */
-			}
-			if ( modifier & PAPI_PRESET_ENUM_AVAIL ) {
-				if ( _papi_hwi_presets[i].count == 0 )
-					continue;
-			}
-			*EventCode = ( int ) ( i | PAPI_PRESET_MASK );
-			APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
-			return PAPI_OK;
-		}
-		papi_return( PAPI_EINVAL );
-	}
+    if ( IS_PRESET(i) ) {
+
+        if ( _papi_hwd[cidx]->cmp_info.disabled == PAPI_EDELAY_INIT ) {
+            int junk;
+            _papi_hwd[cidx]->ntv_enum_events(&junk, PAPI_ENUM_FIRST);
+        }
+
+        int preset_index;
+        hwi_presets_t *_papi_hwi_list;
+
+        /* Set to the first preset. */
+        if ( modifier == PAPI_ENUM_FIRST ) {
+            *EventCode = ( int ) ( _papi_hwi_start_idx[cidx] | PAPI_PRESET_MASK );
+            APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+            return ( PAPI_OK );
+        }
+
+        i &= PAPI_PRESET_AND_MASK;
+
+        /* Iterate over all or all available presets. */
+        if ( modifier == PAPI_ENUM_EVENTS || modifier == PAPI_PRESET_ENUM_AVAIL ) {
+
+            /* NULL pointer used to terminate the list. However, now we have
+             * more presets that exist beyond the bounds of the original
+             * array, so skip over the NULL entries. */
+            do {
+                if ( ++i >= _papi_hwi_start_idx[cidx] + _papi_hwi_max_presets[cidx] ) {
+                    return ( PAPI_EINVAL );
+                }
+
+                /* Find the component to which the preset belongs. */
+                _papi_hwi_list = _papi_hwi_comp_presets[cidx];
+                preset_index = i - _papi_hwi_start_idx[cidx];
+
+            } while ( _papi_hwi_list[preset_index].symbol == NULL ||
+                      (modifier == PAPI_PRESET_ENUM_AVAIL && _papi_hwi_list[preset_index].count == 0) );
+
+            *EventCode = ( int ) ( i | PAPI_PRESET_MASK );
+            APIDBG("EXIT: *EventCode: %#x\n", *EventCode);
+            return ( PAPI_OK );
+        }
+
+        papi_return( PAPI_EINVAL );
+    }
 
 	if ( IS_NATIVE(i) ) {
 	    // save event code so components can get it with call to: _papi_hwi_get_papi_event_code()
@@ -2366,8 +2545,8 @@ PAPI_remove_event( int EventSet, int Eve
  *
  *	@param EventSet
  *		An integer handle for a PAPI Event Set as created by PAPI_create_eventset.
- *	@param EventName
- *		A string containing the event name as listed in papi_avail or papi_native_avail.
+ *	@param EventCode
+ *		A defined event such as PAPI_TOT_INS.
  *
  *	@retval Positive-Integer
  *		The number of consecutive elements that succeeded before the error. 
@@ -3055,10 +3234,8 @@ PAPI_reset( int EventSet )
  *
  *  The counters continue counting after the read. 
  *
- *  Note the differences between PAPI_read() and PAPI_accum(). Specifically,
- *  PAPI_accum() adds the values of the counters to the values stored in the 
- *  array (the second parameter in PAPI_accum()) and then resets the counters
- *  to zero.
+ *  Note the differences between PAPI_read() and PAPI_accum(), specifically
+ *  that PAPI_accum() resets the values array to zero.
  *
  *  PAPI_read() assumes an initialized PAPI library and a properly added 
  *  event set. 
@@ -3247,13 +3424,8 @@ PAPI_read_ts( int EventSet, long long *v
  *	These calls assume an initialized PAPI library and a properly added event set. 
  *	PAPI_accum adds the counters of the indicated event set into the array values. 
  *	The counters are zeroed and continue counting after the operation.
- *	Note the differences between PAPI_read() and PAPI_accum(). Specifically,
- *	PAPI_accum() adds the values of the counters to the values stored in the
- *	array (the second parameter in PAPI_accum()) and then resets the counters
- *	to zero.
- *
- *	Note: The provided array (second parameter in PAPI_accum) must be initialized for PAPI_accum
- *	because its values are read inside the function.
+ *	Note the differences between PAPI_read and PAPI_accum, specifically 
+ *	that PAPI_accum resets the values array to zero. 
  *
  *	@param EventSet
  *		an integer handle for a PAPI Event Set 
@@ -6507,7 +6679,7 @@ PAPI_remove_events( int EventSet, int *E
  *
  * @par C Interface
  * \#include <papi.h> @n
- * int PAPI_list_events(int *EventSet, int *Events, int *number );
+ * int PAPI_list_events(int EventSet, int *Events, int *number);
 *
  *	@param[in] EventSet
  *		An integer handle for a PAPI event set as created by PAPI_create_eventset 
diff -pruN 7.2.0~b2-1/src/papi.h 7.2.0-1/src/papi.h
--- 7.2.0~b2-1/src/papi.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi.h	2025-06-25 22:38:10.000000000 +0000
@@ -280,7 +280,8 @@ failure.
 #define PAPI_ECMP_DISABLED	-25    /**< Component containing event is disabled */
 #define PAPI_EDELAY_INIT -26   /**< Delayed initialization component */
 #define PAPI_EMULPASS   -27    /**< Event exists, but cannot be counted due to multiple passes required by hardware */
-#define PAPI_NUM_ERRORS	 28    /**< Number of error messages specified in this API */
+#define PAPI_PARTIAL    -28    /**< Component is partially disabled */
+#define PAPI_NUM_ERRORS	 29    /**< Number of error messages specified in this API */
 
 #define PAPI_NOT_INITED		0
 #define PAPI_LOW_LEVEL_INITED 	1       /* Low level has called library init */
@@ -488,6 +489,7 @@ All of the functions in the PerfAPI shou
 enum {
    PAPI_ENUM_EVENTS = 0,		/**< Always enumerate all events */
    PAPI_ENUM_FIRST,				/**< Enumerate first event (preset or native) */
+   PAPI_PRESET_ENUM_FIRST_COMP,	/**< Enumerate first component preset event */
    PAPI_PRESET_ENUM_AVAIL, 		/**< Enumerate events that exist here */
 
    /* PAPI PRESET section */
@@ -503,6 +505,8 @@ enum {
    PAPI_PRESET_ENUM_L3,			/**< L3 cache related preset events */
    PAPI_PRESET_ENUM_TLB,		/**< Translation Lookaside Buffer events */
    PAPI_PRESET_ENUM_FP,			/**< Floating Point related preset events */
+   PAPI_PRESET_ENUM_CPU,		/**< CPU preset events */
+   PAPI_PRESET_ENUM_CPU_AVAIL,	/**< Available CPU preset events */
 
    /* PAPI native event related section */
    PAPI_NTV_ENUM_UMASKS,		/**< all individual bits for given group */
@@ -634,6 +638,8 @@ typedef void *vptr_t;
      char kernel_version[PAPI_MIN_STR_LEN];  /**< Version of the kernel PMC support driver */
      char disabled_reason[PAPI_HUGE_STR_LEN]; /**< Reason for failure of initialization */
      int disabled;   /**< 0 if enabled, otherwise error code from initialization */
+     char partially_disabled_reason[PAPI_HUGE_STR_LEN]; /**< Reason for partial initialization */
+     int partially_disabled; /**< 1 if component is partially disabled, 0 otherwise */
      int initialized;                        /**< Component is ready to use */
      int CmpIdx;				/**< Index into the vector array for this component; set at init time */
      int num_cntrs;               /**< Number of hardware counters the component supports */
@@ -895,6 +901,7 @@ typedef char* PAPI_user_defined_events_f
 #define PAPIF_DMEM_MAXVAL     12
 
 #define PAPI_MAX_INFO_TERMS  12		   /* should match PAPI_EVENTS_IN_DERIVED_EVENT defined in papi_internal.h */
+#define PAPI_MAX_COMP_QUALS  8
 
 
 /** @ingroup papi_data_structures 
@@ -1010,6 +1017,9 @@ enum {
                                                 to delineate platform specific 
 						anomalies or restrictions */
 
+     int  num_quals;                                       /**< number of qualifiers */
+     char quals[PAPI_MAX_COMP_QUALS][PAPI_HUGE_STR_LEN];   /**< qualifiers */
+     char quals_descrs[PAPI_MAX_COMP_QUALS][PAPI_HUGE_STR_LEN];  /**< qualifier descriptions */
    } PAPI_event_info_t;
 
 
diff -pruN 7.2.0~b2-1/src/papiStdEventDefs.h 7.2.0-1/src/papiStdEventDefs.h
--- 7.2.0~b2-1/src/papiStdEventDefs.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papiStdEventDefs.h	2025-06-25 22:38:10.000000000 +0000
@@ -12,6 +12,8 @@ platform's documentation carefully.
 #ifndef _PAPISTDEVENTDEFS
 #define _PAPISTDEVENTDEFS
 
+#include "papi_components_config_event_defs.h"
+
 /*
    Masks to indicate the event is a preset- the presets will have 
    the high bit set to one, as the vendors probably won't use the 
diff -pruN 7.2.0~b2-1/src/papi_common_strings.h 7.2.0-1/src/papi_common_strings.h
--- 7.2.0~b2-1/src/papi_common_strings.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_common_strings.h	2025-06-25 22:38:10.000000000 +0000
@@ -8,562 +8,562 @@ hwi_presets_t _papi_hwi_presets[PAPI_MAX
           "L1D cache misses", 
           "Level 1 data cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  1 */ {"PAPI_L1_ICM", 
 	  "L1I cache misses", 
 	  "Level 1 instruction cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  2 */ {"PAPI_L2_DCM", 
 	  "L2D cache misses", 
 	  "Level 2 data cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  3 */ {"PAPI_L2_ICM", 
           "L2I cache misses", 
           "Level 2 instruction cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  4 */ {"PAPI_L3_DCM", 
 	  "L3D cache misses", 
 	  "Level 3 data cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  5 */ {"PAPI_L3_ICM", 
 	  "L3I cache misses", 
 	  "Level 3 instruction cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  6 */ {"PAPI_L1_TCM", 
 	  "L1 cache misses", 
 	  "Level 1 cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  7 */ {"PAPI_L2_TCM", 
 	  "L2 cache misses", 
 	  "Level 2 cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  8 */ {"PAPI_L3_TCM", 
 	  "L3 cache misses", 
 	  "Level 3 cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*  9 */ {"PAPI_CA_SNP", 
 	  "Snoop Requests", 
 	  "Requests for a snoop", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 10 */ {"PAPI_CA_SHR", 
 	  "Ex Acces shared CL", 
 	  "Requests for exclusive access to shared cache line", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 11 */ {"PAPI_CA_CLN", 
 	  "Ex Access clean CL", 
 	  "Requests for exclusive access to clean cache line", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 12 */ {"PAPI_CA_INV", 
           "Cache ln invalid",
 	  "Requests for cache line invalidation", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 13 */ {"PAPI_CA_ITV", 
           "Cache ln intervene",
 	  "Requests for cache line intervention", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 14 */ {"PAPI_L3_LDM", 
 	  "L3 load misses", 
 	  "Level 3 load misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 15 */ {"PAPI_L3_STM", 
 	  "L3 store misses", 
 	  "Level 3 store misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 16 */ {"PAPI_BRU_IDL", 
 	  "Branch idle cycles",
 	  "Cycles branch units are idle", 0,
 	  0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_BR,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 17 */ {"PAPI_FXU_IDL", 
 	  "IU idle cycles",
 	  "Cycles integer units are idle", 0,
 	  0, PAPI_PRESET_BIT_IDL,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 18 */ {"PAPI_FPU_IDL", 
 	  "FPU idle cycles",
 	  "Cycles floating point units are idle", 0,
 	  0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 19 */ {"PAPI_LSU_IDL", 
 	  "L/SU idle cycles",
 	  "Cycles load/store units are idle", 0,
 	  0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 20 */ {"PAPI_TLB_DM", 
 	  "Data TLB misses",
 	  "Data translation lookaside buffer misses", 0,
 	  0, PAPI_PRESET_BIT_TLB,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 21 */ {"PAPI_TLB_IM", 
 	  "Instr TLB misses",
 	  "Instruction translation lookaside buffer misses", 0,
 	  0, PAPI_PRESET_BIT_TLB + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 22 */ {"PAPI_TLB_TL",
 	  "Total TLB misses",
 	  "Total translation lookaside buffer misses", 0,
 	  0, PAPI_PRESET_BIT_TLB,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 23 */ {"PAPI_L1_LDM",
 	  "L1 load misses", 
 	  "Level 1 load misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 24 */ {"PAPI_L1_STM", 
 	  "L1 store misses", 
 	  "Level 1 store misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 25 */ {"PAPI_L2_LDM", 
 	  "L2 load misses", 
 	  "Level 2 load misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 26 */ {"PAPI_L2_STM", 
 	  "L2 store misses", 
 	  "Level 2 store misses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 27 */ {"PAPI_BTAC_M", 
 	  "Br targt addr miss",
 	  "Branch target address cache misses", 0,
 	  0, PAPI_PRESET_BIT_BR,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 28 */ {"PAPI_PRF_DM", 
 	  "Data prefetch miss",
 	  "Data prefetch cache misses", 0,
 	  0, PAPI_PRESET_BIT_CACH,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 29 */ {"PAPI_L3_DCH", 
 	  "L3D cache hits", 
 	  "Level 3 data cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 30 */ {"PAPI_TLB_SD",
 	  "TLB shootdowns",
 	  "Translation lookaside buffer shootdowns", 0,
 	  0, PAPI_PRESET_BIT_TLB,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 31 */ {"PAPI_CSR_FAL", 
 	  "Failed store cond",
 	  "Failed store conditional instructions", 0,
 	  0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 32 */ {"PAPI_CSR_SUC", 
 	  "Good store cond",
 	  "Successful store conditional instructions", 0,
 	  0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 33 */ {"PAPI_CSR_TOT", 
 	  "Total store cond",
 	  "Total store conditional instructions", 0,
 	  0, PAPI_PRESET_BIT_CND + PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 34 */ {"PAPI_MEM_SCY", 
 	  "Stalled mem cycles",
 	  "Cycles Stalled Waiting for memory accesses", 0,
 	  0, PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 35 */ {"PAPI_MEM_RCY", 
 	  "Stalled rd cycles",
 	  "Cycles Stalled Waiting for memory Reads", 0,
 	  0, PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 36 */ {"PAPI_MEM_WCY", 
 	  "Stalled wr cycles",
 	  "Cycles Stalled Waiting for memory writes", 0,
 	  0, PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 37 */ {"PAPI_STL_ICY", 
 	  "No instr issue",
 	  "Cycles with no instruction issue", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 38 */ {"PAPI_FUL_ICY", 
 	  "Max instr issue",
 	  "Cycles with maximum instruction issue", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 39 */ {"PAPI_STL_CCY", 
 	  "No instr done",
 	  "Cycles with no instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 40 */ {"PAPI_FUL_CCY", 
 	  "Max instr done",
 	  "Cycles with maximum instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 41 */ {"PAPI_HW_INT", 
 	  "Hdw interrupts", 
 	  "Hardware interrupts", 0,
 	  0, PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 42 */ {"PAPI_BR_UCN", 
 	  "Uncond branch",
 	  "Unconditional branch instructions", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 43 */ {"PAPI_BR_CN", 
 	  "Cond branch", 
 	  "Conditional branch instructions", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 44 */ {"PAPI_BR_TKN", 
 	  "Cond branch taken",
 	  "Conditional branch instructions taken", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 45 */ {"PAPI_BR_NTK", 
 	  "Cond br not taken",
 	  "Conditional branch instructions not taken", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 46 */ {"PAPI_BR_MSP", 
 	  "Cond br mspredictd",
 	  "Conditional branch instructions mispredicted", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 47 */ {"PAPI_BR_PRC", 
 	  "Cond br predicted",
 	  "Conditional branch instructions correctly predicted", 0,
 	  0, PAPI_PRESET_BIT_BR + PAPI_PRESET_BIT_CND,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 48 */ {"PAPI_FMA_INS", 
 	  "FMAs completed", 
 	  "FMA instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 49 */ {"PAPI_TOT_IIS", 
 	  "Instr issued", 
 	  "Instructions issued", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 50 */ {"PAPI_TOT_INS", 
 	  "Instr completed", 
 	  "Instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 51 */ {"PAPI_INT_INS", 
 	  "Int instructions", 
 	  "Integer instructions", 0,
 	  0, PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 52 */ {"PAPI_FP_INS", 
 	  "FP instructions", 
 	  "Floating point instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 53 */ {"PAPI_LD_INS", 
 	  "Loads", 
 	  "Load instructions", 0,
 	  0, PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 54 */ {"PAPI_SR_INS", 
 	  "Stores", 
 	  "Store instructions", 0,
 	  0, PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 55 */ {"PAPI_BR_INS", 
 	  "Branches", 
 	  "Branch instructions", 0,
 	  0, PAPI_PRESET_BIT_BR,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 56 */ {"PAPI_VEC_INS", 
 	  "Vector/SIMD instr",
 	  "Vector/SIMD instructions (could include integer)", 0,
 	  0, PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 57 */ {"PAPI_RES_STL", 
 	  "Stalled res cycles",
 	  "Cycles stalled on any resource", 0,
 	  0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 58 */ {"PAPI_FP_STAL", 
 	  "Stalled FPU cycles",
 	  "Cycles the FP unit(s) are stalled", 0,
 	  0, PAPI_PRESET_BIT_IDL + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 59 */ {"PAPI_TOT_CYC", 
 	  "Total cycles", 
 	  "Total cycles", 0,
 	  0, PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 60 */ {"PAPI_LST_INS", 
 	  "L/S completed",
 	  "Load/store instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_MEM,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 61 */ {"PAPI_SYC_INS", 
 	  "Syncs completed",
 	  "Synchronization instructions completed", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 62 */ {"PAPI_L1_DCH", 
 	  "L1D cache hits", 
 	  "Level 1 data cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 63 */ {"PAPI_L2_DCH", 
 	  "L2D cache hits", 
 	  "Level 2 data cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 64 */ {"PAPI_L1_DCA", 
 	  "L1D cache accesses",
 	  "Level 1 data cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 65 */ {"PAPI_L2_DCA", 
 	  "L2D cache accesses",
 	  "Level 2 data cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 66 */ {"PAPI_L3_DCA", 
 	  "L3D cache accesses",
 	  "Level 3 data cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 67 */ {"PAPI_L1_DCR", 
 	  "L1D cache reads", 
 	  "Level 1 data cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 68 */ {"PAPI_L2_DCR", 
 	  "L2D cache reads", 
 	  "Level 2 data cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 69 */ {"PAPI_L3_DCR", 
 	  "L3D cache reads", 
 	  "Level 3 data cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 70 */ {"PAPI_L1_DCW", 
 	  "L1D cache writes", 
 	  "Level 1 data cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 71 */ {"PAPI_L2_DCW", 
 	  "L2D cache writes", 
 	  "Level 2 data cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 72 */ {"PAPI_L3_DCW", 
 	  "L3D cache writes", 
 	  "Level 3 data cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 73 */ {"PAPI_L1_ICH", 
 	  "L1I cache hits",
 	  "Level 1 instruction cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 74 */ {"PAPI_L2_ICH", 
 	  "L2I cache hits",
 	  "Level 2 instruction cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 75 */ {"PAPI_L3_ICH", 
 	  "L3I cache hits",
 	  "Level 3 instruction cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 76 */ {"PAPI_L1_ICA", 
 	  "L1I cache accesses",
 	  "Level 1 instruction cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 77 */ {"PAPI_L2_ICA", 
 	  "L2I cache accesses",
 	  "Level 2 instruction cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 78 */ {"PAPI_L3_ICA", 
 	  "L3I cache accesses",
 	  "Level 3 instruction cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 79 */ {"PAPI_L1_ICR", 
 	  "L1I cache reads",
 	  "Level 1 instruction cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 80 */ {"PAPI_L2_ICR", 
 	  "L2I cache reads",
 	  "Level 2 instruction cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 81 */ {"PAPI_L3_ICR", 
 	  "L3I cache reads",
 	  "Level 3 instruction cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 82 */ {"PAPI_L1_ICW", 
 	  "L1I cache writes",
 	  "Level 1 instruction cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 83 */ {"PAPI_L2_ICW", 
 	  "L2I cache writes",
 	  "Level 2 instruction cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 84 */ {"PAPI_L3_ICW", 
 	  "L3I cache writes",
 	  "Level 3 instruction cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3 + PAPI_PRESET_BIT_INS,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 85 */ {"PAPI_L1_TCH", 
 	  "L1 cache hits", 
 	  "Level 1 total cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 86 */ {"PAPI_L2_TCH", 
 	  "L2 cache hits", 
 	  "Level 2 total cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 87 */ {"PAPI_L3_TCH", 
 	  "L3 cache hits", 
 	  "Level 3 total cache hits", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 88 */ {"PAPI_L1_TCA", 
 	  "L1 cache accesses",
 	  "Level 1 total cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 89 */ {"PAPI_L2_TCA", 
 	  "L2 cache accesses",
 	  "Level 2 total cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+	  NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 90 */ {"PAPI_L3_TCA", 
 	  "L3 cache accesses",
 	  "Level 3 total cache accesses", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 91 */ {"PAPI_L1_TCR", 
 	  "L1 cache reads", 
 	  "Level 1 total cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 92 */ {"PAPI_L2_TCR", 
 	  "L2 cache reads", 
 	  "Level 2 total cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 93 */ {"PAPI_L3_TCR", 
 	  "L3 cache reads", 
 	  "Level 3 total cache reads", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 94 */ {"PAPI_L1_TCW", 
 	  "L1 cache writes", 
 	  "Level 1 total cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L1,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 95 */ {"PAPI_L2_TCW", 
 	  "L2 cache writes",
 	  "Level 2 total cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L2,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 96 */ {"PAPI_L3_TCW", 
 	  "L3 cache writes", 
 	  "Level 3 total cache writes", 0,
 	  0, PAPI_PRESET_BIT_CACH + PAPI_PRESET_BIT_L3,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 97 */ {"PAPI_FML_INS", 
 	  "FPU multiply",
 	  "Floating point multiply instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 98 */ {"PAPI_FAD_INS", 
 	  "FPU add", 
 	  "Floating point add instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 99 */ {"PAPI_FDV_INS", 
 	  "FPU divide",
 	  "Floating point divide instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*100 */ {"PAPI_FSQ_INS", 
 	  "FPU square root",
 	  "Floating point square root instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*101 */ {"PAPI_FNV_INS", 
 	  "FPU inverse",
 	  "Floating point inverse instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*102 */ {"PAPI_FP_OPS", 
 	  "FP operations", 
 	  "Floating point operations", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*103 */ {"PAPI_SP_OPS", 
 	  "SP operations",
 	  "Floating point operations; optimized to count scaled single precision vector operations", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*104 */ {"PAPI_DP_OPS", 
 	  "DP operations",
 	  "Floating point operations; optimized to count scaled double precision vector operations", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*105 */ {"PAPI_VEC_SP", 
 	  "SP Vector/SIMD instr",
 	  "Single precision vector/SIMD instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP, 
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /*106 */ {"PAPI_VEC_DP", 
 	  "DP Vector/SIMD instr",
 	  "Double precision vector/SIMD instructions", 0,
 	  0, PAPI_PRESET_BIT_INS + PAPI_PRESET_BIT_FP,
-	  NULL, {0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 /* 107 */ {"PAPI_REF_CYC", 
 	  "Reference cycles", 
 	  "Reference clock cycles", 0,
 	  0, PAPI_PRESET_BIT_MSC,
-	  NULL, {0},{NULL}, NULL},
-/*108 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*109 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*110 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*111 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*112 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*113 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*114 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*115 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*116 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*117 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*118 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*119 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*120 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*121 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*122 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*123 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*124 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*125 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*126 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
-/*127 */ {NULL, NULL, NULL,0,0,0,NULL,{0},{NULL}, NULL},
+      NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*108 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*109 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*110 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*111 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*112 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*113 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*114 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*115 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*116 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*117 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*118 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*119 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*120 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*121 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*122 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*123 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*124 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*125 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*126 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
+/*127 */ {NULL, NULL, NULL, 0, 0, 0, NULL, {0}, {NULL}, {NULL}, {0}, {NULL}, NULL, 0, 0, {NULL}, {NULL}},
 };
 
 #if 0
diff -pruN 7.2.0~b2-1/src/papi_events.csv 7.2.0-1/src/papi_events.csv
--- 7.2.0~b2-1/src/papi_events.csv	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_events.csv	2025-06-25 22:38:10.000000000 +0000
@@ -400,15 +400,11 @@ CPU,amd64_fam17h_zen1
 #
 PRESET,PAPI_TOT_INS,NOT_DERIVED,RETIRED_INSTRUCTIONS
 PRESET,PAPI_TOT_CYC,NOT_DERIVED,CYCLES_NOT_IN_HALT
-PRESET,PAPI_L1_ICH,DERIVED_SUB,32_BYTE_INSTRUCTION_CACHE_FETCH,32_BYTE_INSTRUCTION_CACHE_MISSES
-PRESET,PAPI_L1_ICM,NOT_DERIVED,32_BYTE_INSTRUCTION_CACHE_MISSES
-PRESET,PAPI_L1_ICA,NOT_DERIVED,32_BYTE_INSTRUCTION_CACHE_FETCH
-PRESET,PAPI_L1_ICR,NOT_DERIVED,32_BYTE_INSTRUCTION_CACHE_FETCH
+PRESET,PAPI_L1_ICM,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
 # Same event code, confusing name?
 #PRESET,PAPI_L1_DCM,NOT_DERIVED,MAB_ALLOCATION_BY_PIPE
 PRESET,PAPI_L1_DCA,NOT_DERIVED,DATA_CACHE_ACCESSES
-PRESET,PAPI_L1_TCA,DERIVED_ADD,DATA_CACHE_ACCESSES,32_BYTE_INSTRUCTION_CACHE_FETCH
-PRESET,PAPI_L2_ICA,NOT_DERIVED,32_BYTE_INSTRUCTION_CACHE_MISSES
+PRESET,PAPI_L2_ICA,NOT_DERIVED,REQUESTS_TO_L2_GROUP1:CACHEABLE_IC_READ
 #
 # Note, need access to special L2 uncore events
 #	to get L2 related events
@@ -2061,6 +2057,7 @@ PRESET,PAPI_L1_ICM,NOT_DERIVED,IFETCH_MI
 #
 CPU,arm_ac15
 CPU,arm_ac57
+CPU,arm_ac72
 #
 PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
 PRESET,PAPI_TOT_IIS,NOT_DERIVED,INST_SPEC_EXEC
@@ -2105,6 +2102,63 @@ PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE
 PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL
 PRESET,PAPI_TLB_DM,NOT_DERIVED,L1D_TLB_REFILL
 PRESET,PAPI_HW_INT,NOT_DERIVED,EXCEPTION_TAKEN
+
+#
+CPU,arm_ac76
+#
+PRESET,PAPI_L1_DCM,DERIVED_ADD,L1D_CACHE_REFILL
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
+PRESET,PAPI_L3_DCM,NOT_DERIVED,L3D_CACHE_REFILL
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1I_CACHE_REFILL,L1D_CACHE_REFILL
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
+PRESET,PAPI_L3_TCM,NOT_DERIVED,L3D_CACHE_REFILL
+PRESET,PAPI_L3_LDM,NOT_DERIVED,LL_CACHE_MISS_RD
+PRESET,PAPI_L3_STM,DERIVED_SUB,L3D_CACHE_REFILL,LL_CACHE_MISS_RD
+PRESET,PAPI_TLB_DM,DERIVED_ADD,L1D_TLB_REFILL,L2D_TLB_REFILL
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L1I_TLB_REFILL 
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L1I_TLB_REFILL,L1D_TLB_REFILL,L2D_TLB_REFILL
+PRESET,PAPI_L1_LDM,NOT_DERIVED,L1D_CACHE_REFILL_RD
+PRESET,PAPI_L1_STM,NOT_DERIVED,L1D_CACHE_REFILL_WR
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_RD
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_WR
+PRESET,PAPI_L3_DCH,DERIVED_SUB,L3D_CACHE,L3D_CACHE_REFILL
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
+PRESET,PAPI_INT_INS,NOT_DERIVED,DP_SPEC
+PRESET,PAPI_FP_INS,NOT_DERIVED,VFP_SPEC
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_RETIRED
+PRESET,PAPI_VEC_INS,NOT_DERIVED,ASE_SPEC
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
+PRESET,PAPI_L2_DCA,DERIVED_SUB,L2D_CACHE,L2D_CACHE_RD
+#PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L3D_CACHE
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
+PRESET,PAPI_L3_DCR,NOT_DERIVED,L3D_CACHE_RD
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,|N0|N1|+|N2|-|N3|-|,L1D_CACHE,L1I_CACHE,L1I_CACHE_REFILL,L1D_CACHE_REFILL
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
+PRESET,PAPI_L3_TCH,DERIVED_SUB,LL_CACHE_RD,LL_CACHE_MISS_RD
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1I_CACHE,L1D_CACHE
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L2D_CACHE_REFILL
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
+PRESET,PAPI_L3_TCR,NOT_DERIVED,L3_CACHE_RD
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L3_TCW,DERIVED_SUB,L3D_CACHE,L3_CACHE_RD
+
+
 #
 CPU,qcom_krait
 #
@@ -2321,6 +2375,17 @@ PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,E
 PRESET,PAPI_SYC_INS,DERIVED_ADD,ISB_SPEC,DSB_SPEC,DMB_SPEC
 PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
 
+cuda,GH100
+cuda,GA100
+#
+PRESET,PAPI_CUDA_FP16_FMA,NOT_DERIVED,cuda:::sm__sass_thread_inst_executed_op_hfma_pred_on:stat=sum
+PRESET,PAPI_CUDA_BF16_FMA,NOT_DERIVED,cuda:::sm__sass_thread_inst_executed_op_hfma_pred_on:stat=sum
+PRESET,PAPI_CUDA_FP32_FMA,NOT_DERIVED,cuda:::sm__sass_thread_inst_executed_op_ffma_pred_on:stat=sum
+PRESET,PAPI_CUDA_FP64_FMA,NOT_DERIVED,cuda:::sm__sass_thread_inst_executed_op_dfma_pred_on:stat=sum
+PRESET,PAPI_CUDA_FP_FMA,DERIVED_POSTFIX,N0|N1|+|N2|+|,cuda:::sm__sass_thread_inst_executed_op_hfma_pred_on:stat=sum,cuda:::sm__sass_thread_inst_executed_op_ffma_pred_on:stat=sum,cuda:::sm__sass_thread_inst_executed_op_dfma_pred_on:stat=sum
+cuda,GH100
+PRESET,PAPI_CUDA_FP8_OPS,NOT_DERIVED,cuda:::sm__ops_path_tensor_src_fp8:stat=sum
+
 #########################
 # ARM Neoverse V2       #
 #########################
@@ -2435,6 +2500,85 @@ PRESET,PAPI_TLB_TL,DERIVED_ADD,L1D_TLB_R
 #NOT_IMPLEMENTED,PAPI_CSR_TOT,Total store conditional instructions
 #NOT_IMPLEMENTED,PAPI_PRF_DM,Data prefetch cache misses
 
+##############################
+# ARM Fujitsu FUJITSU-MONAKA #
+##############################
+CPU,arm_monaka
+#
+PRESET,PAPI_L1_DCM,NOT_DERIVED,L1D_CACHE_REFILL
+PRESET,PAPI_L1_ICM,NOT_DERIVED,L1I_CACHE_REFILL
+PRESET,PAPI_L2_DCM,NOT_DERIVED,L2D_CACHE_REFILL
+#PRESET,PAPI_L3_DCM,NOT_DERIVED,L2D_CACHE_REFILL_L3D_MISS
+PRESET,PAPI_L1_TCM,DERIVED_ADD,L1D_CACHE_REFILL,L1I_CACHE_REFILL
+PRESET,PAPI_L2_TCM,NOT_DERIVED,L2D_CACHE_REFILL
+#PRESET,PAPI_L3_TCM,NOT_DERIVED,L2D_CACHE_REFILL_L3D_MISS
+PRESET,PAPI_L3_LDM,NOT_DERIVED,L2D_CACHE_REFILL_L3D_MISS_DM_RD
+PRESET,PAPI_L3_STM,NOT_DERIVED,L2D_CACHE_REFILL_L3D_MISS_DM_WR
+PRESET,PAPI_BRU_IDL,NOT_DERIVED,BR_COMP_WAIT
+PRESET,PAPI_FXU_IDL,DERIVED_SUB,EU_COMP_WAIT,FL_COMP_WAIT
+PRESET,PAPI_FPU_IDL,NOT_DERIVED,FL_COMP_WAIT
+PRESET,PAPI_LSU_IDL,NOT_DERIVED,LD_COMP_WAIT
+PRESET,PAPI_TLB_DM,NOT_DERIVED,L2D_TLB_REFILL
+PRESET,PAPI_TLB_IM,NOT_DERIVED,L2I_TLB_REFILL
+PRESET,PAPI_TLB_TL,DERIVED_ADD,L2D_TLB_REFILL,L2I_TLB_REFILL
+PRESET,PAPI_L1_LDM,DERIVED_ADD,L1D_CACHE_REFILL_DM_RD,L1I_CACHE_REFILL_DM_RD
+PRESET,PAPI_L1_STM,NOT_DERIVED,L1D_CACHE_REFILL_DM_WR
+PRESET,PAPI_L2_LDM,NOT_DERIVED,L2D_CACHE_REFILL_DM_RD
+PRESET,PAPI_L2_STM,NOT_DERIVED,L2D_CACHE_REFILL_DM_WR
+#RESET,PAPI_PRF_DM,NOT_DERIVED,L2D_CACHE_REFILL_L3D_MISS_PRF
+#PRESET,PAPI_L3_DCH,NOT_DERIVED,L2D_CACHE_REFILL_L3D_HIT
+PRESET,PAPI_MEM_SCY,DERIVED_ADD,STALL_FRONTEND_MEMBOUND,STALL_BACKEND_MEMBOUND
+PRESET,PAPI_STL_ICY,DERIVED_ADD,STALL_FRONTEND,STALL_BACKEND
+PRESET,PAPI_STL_CCY,NOT_DERIVED,_0INST_COMMIT
+PRESET,PAPI_FUL_CCY,DERIVED_POSTFIX,N0|N1|-|N2|-|N3|-|N4|-|N5|-|,CPU_CYCLES,_0INST_COMMIT,_1INST_COMMIT,_2INST_COMMIT,_3INST_COMMIT,_4INST_COMMIT
+PRESET,PAPI_HW_INT,DERIVED_ADD,EXC_IRQ,EXC_FIQ
+PRESET,PAPI_BR_MSP,NOT_DERIVED,BR_MIS_PRED
+PRESET,PAPI_BR_PRC,DERIVED_SUB,BR_PRED,BR_MIS_PRED
+PRESET,PAPI_FMA_INS,NOT_DERIVED,FP_FMA_SPEC
+PRESET,PAPI_TOT_INS,NOT_DERIVED,INST_RETIRED
+PRESET,PAPI_INT_INS,NOT_DERIVED,INT_SPEC
+PRESET,PAPI_FP_INS,NOT_DERIVED,FP_SPEC
+PRESET,PAPI_LD_INS,NOT_DERIVED,LD_SPEC
+PRESET,PAPI_SR_INS,NOT_DERIVED,ST_SPEC
+PRESET,PAPI_BR_INS,NOT_DERIVED,BR_PRED
+PRESET,PAPI_VEC_INS,NOT_DERIVED,SIMD_INST_RETIRED
+PRESET,PAPI_RES_STL,NOT_DERIVED,STALL_BACKEND
+PRESET,PAPI_TOT_CYC,NOT_DERIVED,CPU_CYCLES
+PRESET,PAPI_LST_INS,NOT_DERIVED,LDST_SPEC
+PRESET,PAPI_SYC_INS,DERIVED_POSTFIX,N0|N1|+|N2|+|N3|+|,ISB_SPEC,DSB_SPEC,DMB_SPEC,CSDB_SPEC
+PRESET,PAPI_L1_DCH,DERIVED_SUB,L1D_CACHE,L1D_CACHE_REFILL
+PRESET,PAPI_L2_DCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
+PRESET,PAPI_L1_DCA,NOT_DERIVED,L1D_CACHE
+PRESET,PAPI_L2_DCA,NOT_DERIVED,L2D_CACHE
+PRESET,PAPI_L3_DCA,NOT_DERIVED,L2D_CACHE_REFILL_L3D_CACHE
+PRESET,PAPI_L1_DCR,NOT_DERIVED,L1D_CACHE_RD
+PRESET,PAPI_L2_DCR,NOT_DERIVED,L2D_CACHE_RD
+PRESET,PAPI_L3_DCR,NOT_DERIVED,L3D_CACHE_RD
+PRESET,PAPI_L1_DCW,NOT_DERIVED,L1D_CACHE_WR
+PRESET,PAPI_L2_DCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L3_DCW,DERIVED_SUB,L2D_CACHE_REFILL_L3D_CACHE,L3D_CACHE_RD
+PRESET,PAPI_L1_ICH,DERIVED_SUB,L1I_CACHE,L1I_CACHE_REFILL
+PRESET,PAPI_L1_ICA,NOT_DERIVED,L1I_CACHE
+PRESET,PAPI_L1_TCH,DERIVED_POSTFIX,N0|N1|-|N2|+|N3|-|,L1D_CACHE,L1D_CACHE_REFILL,L1I_CACHE,L1I_CACHE_REFILL
+PRESET,PAPI_L2_TCH,DERIVED_SUB,L2D_CACHE,L2D_CACHE_REFILL
+#PRESET,PAPI_L3_TCH,NOT_DERIVED,L2D_CACHE_REFILL_L3D_HIT
+PRESET,PAPI_L1_TCA,DERIVED_ADD,L1D_CACHE,L1I_CACHE
+PRESET,PAPI_L2_TCA,NOT_DERIVED,L2D_CACHE
+PRESET,PAPI_L3_TCA,NOT_DERIVED,L2D_CACHE_REFILL_L3D_CACHE
+PRESET,PAPI_L2_TCR,NOT_DERIVED,L2D_CACHE_RD
+PRESET,PAPI_L3_TCR,NOT_DERIVED,L3D_CACHE_RD
+PRESET,PAPI_L2_TCW,NOT_DERIVED,L2D_CACHE_WR
+PRESET,PAPI_L3_TCW,DERIVED_SUB,L2D_CACHE_REFILL_L3D_CACHE,L3D_CACHE_RD
+PRESET,PAPI_FML_INS,NOT_DERIVED,FP_MUL_SPEC
+PRESET,PAPI_FDV_INS,NOT_DERIVED,FP_DIV_SPEC
+PRESET,PAPI_FSQ_INS,NOT_DERIVED,FP_SQRT_SPEC
+PRESET,PAPI_FP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SCALE_OPS_SPEC,FP_FIXED_OPS_SPEC
+PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_SP_SCALE_OPS_SPEC,FP_SP_FIXED_OPS_SPEC
+PRESET,PAPI_DP_OPS,DERIVED_POSTFIX,N0|512|128|/|*|N1|+|,FP_DP_SCALE_OPS_SPEC,FP_DP_FIXED_OPS_SPEC
+PRESET,PAPI_VEC_SP,NOT_DERIVED,ASE_SVE_FP_SP_SPEC
+PRESET,PAPI_VEC_DP,NOT_DERIVED,ASE_SVE_FP_DP_SPEC
+PRESET,PAPI_REF_CYC,NOT_DERIVED,CNT_CYCLES
+
 #
 CPU,mips_74k
 #
diff -pruN 7.2.0~b2-1/src/papi_internal.c 7.2.0-1/src/papi_internal.c
--- 7.2.0~b2-1/src/papi_internal.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_internal.c	2025-06-25 22:38:10.000000000 +0000
@@ -50,6 +50,12 @@ static int default_debug_handler( int er
 static long long handle_derived( EventInfo_t * evi, long long *from );
 
 /* Global definitions used by other files */
+int num_all_presets = 0;                            // total number of presets
+int _papi_hwi_start_idx[PAPI_NUM_COMP];             // first index for given component
+int first_comp_with_presets = -1;                   // track the first component that has presets
+int first_comp_preset_idx = PAPI_MAX_PRESET_EVENTS; // track the first non-perf_event component preset index
+int pe_disabled = 1;                                // track whether perf_event component is available
+
 int init_level = PAPI_NOT_INITED;
 int _papi_hwi_error_level = PAPI_QUIET;
 PAPI_debug_handler_t _papi_hwi_debug_handler = default_debug_handler;
@@ -84,7 +90,6 @@ static int num_native_chunks=0;
 char **_papi_errlist= NULL;
 static int num_error_chunks = 0;
 
-
 // pointer to event:mask string associated with last enum call to a components
 // will be NULL for non libpfm4 components
 // this is needed because libpfm4 event codes and papi event codes do not contain mask information
@@ -509,6 +514,7 @@ _papi_hwi_init_errors(void) {
 	/* 25 PAPI_ECMP_DISABLED */_papi_hwi_add_error("Component containing event is disabled");
     /* 26 PAPI_EDELAY_INIT */ _papi_hwi_add_error("Delayed initialization component");
     /* 27 PAPI_EMULPASS */ _papi_hwi_add_error("Event exists, but cannot be counted due to multiple passes required by hardware");
+    /* 28 PAPI_PARTIAL */ _papi_hwi_add_error("Component in use is partially disabled, see utils/papi_component_avail for more information.");
 }
 
 int
@@ -525,10 +531,10 @@ _papi_hwi_component_index( int event_cod
   int cidx;
   int event_index;
 
-  /* currently assume presets are for component 0 only */
   if (IS_PRESET(event_code)) {
      INTDBG("EXIT: Event %#x is a PRESET, assigning component %d\n", event_code,0);
-     return 0;
+     event_index = event_code & PAPI_PRESET_AND_MASK;
+     return get_preset_cmp(&event_index);
   }
 
   /* user defined events are treated like preset events (component 0 only) */
@@ -632,6 +638,199 @@ PAPIWARN( char *format, ... )
 	}
 }
 
+/* Construct fully qualified event names for the native events in a preset. */
+int
+construct_qualified_event(hwi_presets_t *prstPtr) {
+
+    int j;
+    for(j = 0; j < prstPtr->count; j++ ) {
+        /* Construct event with all qualifiers. */
+        int k, strLenSum = 0, baseLen = 1+strlen(prstPtr->base_name[j]);
+        for (k = 0; k < prstPtr->num_quals; k++){
+            strLenSum += strlen(prstPtr->quals[k]);
+        }
+        strLenSum += baseLen;
+
+        /* Allocate space for constructing fully qualified event. */
+        char *tmpEvent = (char*)malloc(strLenSum*sizeof(char));
+        char *tmpQuals = (char*)malloc(strLenSum*sizeof(char));
+
+        if( NULL == tmpQuals || NULL == tmpEvent ) {
+            SUBDBG("EXIT: Could not allocate memory.\n");
+            return PAPI_ENOMEM;
+        }
+
+        /* Print the basename to a string. */
+        int status = snprintf(tmpEvent, baseLen, "%s", prstPtr->base_name[j]);
+        if( status < 0 || status >= baseLen ) {
+            PAPIERROR("Event basename %s was truncated to %s in derived event %s",
+                       prstPtr->base_name[j], tmpEvent, prstPtr->symbol);
+            return PAPI_ENOMEM;
+        }
+
+        /* Concatenate the qualifiers onto the string. */
+        status = 0;
+        for (k = 0; k < prstPtr->num_quals; k++) {
+            status = snprintf(tmpQuals, strLenSum, "%s%s", tmpEvent, prstPtr->quals[k]);
+            strcpy(tmpEvent, tmpQuals);
+        }
+        if( status < 0 || status >= strLenSum ) {
+            PAPIERROR("Event %s with qualifiers was truncated to %s in derived event %s",
+                      prstPtr->base_name[j], tmpEvent, prstPtr->symbol);
+            return PAPI_ENOMEM;
+        }
+
+        /* Set the new name, which includes the qualifiers. */
+        free(prstPtr->name[j]);
+        prstPtr->name[j] = strdup(tmpEvent);
+
+        /* Set the corresponding new code. */
+        status = _papi_hwi_native_name_to_code( tmpEvent, &(prstPtr->code[j]) );
+        if( PAPI_OK != status ) {
+            PAPIERROR("Failed to get code for native event %s used in derived event %s\n",
+                      tmpEvent, prstPtr->symbol);
+            return PAPI_EINVAL;
+        }
+
+        /* Free dynamically allocated memory. */
+        free(tmpQuals);
+        free(tmpEvent);
+    }
+
+    return PAPI_OK;
+}
+
+/* Overwrite qualifiers in the preset struct based on those provided in the input string. */
+int
+overwrite_qualifiers(hwi_presets_t *prstPtr, const char *in, int is_preset) {
+
+    char *qualDelim = ":";
+    char **providedQuals = (char**)malloc(sizeof(char*)*(prstPtr->num_quals));
+    int numProvidedQuals = 0;
+    int k;
+    for (k = 0; k < prstPtr->num_quals; k++){
+        providedQuals[k] = (char*)malloc(sizeof(char)*(PAPI_MAX_STR_LEN+1));
+    }
+    char *givenName = strdup(in);
+    char *qualName  = strtok(givenName, ":");
+    qualName = strtok(NULL, ":");
+
+    /* Skip past component prefix. */
+    if( !is_preset ) {
+        qualName = strtok(NULL, ":");
+    }
+
+    k = 0;
+    while( qualName != NULL ) {
+        size_t qualLen = 1+strlen(qualDelim)+strlen(qualName);
+        int status = snprintf(providedQuals[k], qualLen, "%s%s", qualDelim, qualName);
+        if( status < 0 || status >= qualLen ) {
+            PAPIERROR("Failed to make copy of qualifier %s", qualName);
+            return PAPI_ENOMEM;
+        }
+        k++;
+        numProvidedQuals++;
+        qualName = strtok(NULL, ":");
+    }
+
+    /* If a specific qualifier was provided, use that as the default value
+     * for the qualifier for the preset. To accomplish this, find the same
+     * qualifier in the preset struct's list, and overwrite it. */
+    int l, breakFlag = 0;
+    char *wholeQual1, *matchQual1, *wholeQual2, *matchQual2;
+
+    /* For each qualifier provided. */
+    for (k = 0; k < numProvidedQuals; k++) {
+        wholeQual1 = strdup(providedQuals[k]);
+        matchQual1 = strtok(wholeQual1, "=");
+
+        /* For each qualifier in the preset struct. */
+        for (l = 0; l < prstPtr->num_quals; l++) {
+            wholeQual2 = strdup(prstPtr->quals[l]);
+            matchQual2 = strtok(wholeQual2, "=");
+            if( strcmp(matchQual1, matchQual2) == 0 ) {
+                breakFlag = 1;
+                free(wholeQual2);
+                break;
+            }
+            free(wholeQual2);
+        }
+        free(wholeQual1);
+
+        /* The qualifier was found, so overwrite it with the provided value. */
+        if( breakFlag ) {
+            free(prstPtr->quals[l]);
+            prstPtr->quals[l] = strdup(providedQuals[k]);
+            breakFlag = 0;
+        }
+    }
+
+    free(givenName);
+    for (k = 0; k < prstPtr->num_quals; k++){
+        free(providedQuals[k]);
+    }
+    free(providedQuals);
+
+    return PAPI_OK;
+}
+
+/* Return index of first non-perf_event component's preset. */
+int
+get_first_cmp_preset_idx( void ) {
+
+    int cmpnt = first_comp_with_presets;
+    if( cmpnt < 0 ) {
+        return PAPI_EINVAL;
+    }
+
+    return first_comp_preset_idx;
+}
+
+/* Return index of component containing preset with given index. */
+int
+get_preset_cmp( unsigned int *index ) {
+
+    unsigned int sum = 0;
+    if(pe_disabled) {
+        sum += PAPI_MAX_PRESET_EVENTS;
+        if(*index < sum) {
+            return PAPI_EMISC;
+        }
+    }
+
+    int i;
+    for(i = 0; i < PAPI_NUM_COMP; ++i) {
+        sum += _papi_hwi_max_presets[i];
+        if(*index < sum) {
+            *index = *index - (sum - _papi_hwi_max_presets[i]);
+            return i;
+        }
+    }
+
+    /* If we did not find the component to which the preset belongs. */
+    return PAPI_EINVAL;
+}
+
+/* Return a pointer to preset which has given event code. */
+hwi_presets_t*
+get_preset( int event_code ) {
+    unsigned int preset_index = ( event_code & PAPI_PRESET_AND_MASK );
+    hwi_presets_t *_papi_hwi_list;
+
+    int i = get_preset_cmp(&preset_index);
+    if( i == PAPI_EINVAL ) {
+        return NULL;
+    }
+    if( i == PAPI_EMISC ) {
+        _papi_hwi_list = _papi_hwi_presets;
+    }
+    if( i >= 0 ) {
+        _papi_hwi_list = _papi_hwi_comp_presets[i];
+    }
+
+    return &_papi_hwi_list[preset_index];
+}
+
 static int
 default_debug_handler( int errorCode )
 {
@@ -1102,11 +1301,17 @@ _papi_hwi_map_events_to_native( EventSet
 
 		/* If it's a preset */
 		if ( IS_PRESET(ESI->EventInfoArray[event].event_code) ) {
-			preset_index = ( int ) ESI->EventInfoArray[event].event_code & PAPI_PRESET_AND_MASK;
+
+            /* If it is a component preset, it will be in a separate array. */
+            hwi_presets_t *_preset_ptr = get_preset((int)ESI->EventInfoArray[event].event_code);
+            if( NULL == _preset_ptr ) {
+                INTDBG("EXIT: preset not found\n");
+                return;
+            }
 
 			/* walk all sub-events in the preset */
 			for( k = 0; k < PAPI_EVENTS_IN_DERIVED_EVENT; k++ ) {
-				nevt = _papi_hwi_presets[preset_index].code[k];
+				nevt = _preset_ptr->code[k];
 				if ( nevt == PAPI_NULL ) {
 					break;
 				}
@@ -1380,17 +1585,23 @@ _papi_hwi_add_event( EventSetInfo_t * ES
     if ( !_papi_hwi_is_sw_multiplex( ESI ) ) {
 
        /* Handle preset case */
-       if ( IS_PRESET(EventCode) ) {
+       if ( IS_PRESET(EventCode) ) { /* begin preset case */
 	  int count;
 	  int preset_index = EventCode & ( int ) PAPI_PRESET_AND_MASK;
 
 	  /* Check if it's within the valid range */
-	  if ( ( preset_index < 0 ) || ( preset_index >= PAPI_MAX_PRESET_EVENTS ) ) {
+	  if ( ( preset_index < 0 ) || ( preset_index >= num_all_presets ) ) {
 	     return PAPI_EINVAL;
 	  }
 
+      hwi_presets_t *_preset_ptr = get_preset(EventCode);
+      if( NULL == _preset_ptr ) {
+	      INTDBG("EXIT: preset not found\n");
+	      return PAPI_ENOEVNT;
+      }
+
 	  /* count the number of native events in this preset */
-	  count = ( int ) _papi_hwi_presets[preset_index].count;
+	  count = ( int ) _preset_ptr->count;
 
 	  /* Check if event exists */
 	  if ( !count ) {
@@ -1403,7 +1614,7 @@ _papi_hwi_add_event( EventSetInfo_t * ES
 	     for( i = 0; i < count; i++ ) {
 		for( j = 0; j < ESI->overflow.event_counter; j++ ) {
 		  if ( ESI->overflow.EventCode[j] ==(int)
-			( _papi_hwi_presets[preset_index].code[i] ) ) {
+			( _preset_ptr->code[i] ) ) {
 		      return PAPI_ECNFLCT;
 		   }
 		}
@@ -1413,7 +1624,7 @@ _papi_hwi_add_event( EventSetInfo_t * ES
 	  /* Try to add the preset. */
 
 	  remap = add_native_events( ESI,
-				     _papi_hwi_presets[preset_index].code,
+				     _preset_ptr->code,
 				     count, &ESI->EventInfoArray[thisindex] );
 	  if ( remap < 0 ) {
 	     return remap;
@@ -1423,14 +1634,15 @@ _papi_hwi_add_event( EventSetInfo_t * ES
 	     ESI->EventInfoArray[thisindex].event_code =
                                   ( unsigned int ) EventCode;
 	     ESI->EventInfoArray[thisindex].derived =
-				  _papi_hwi_presets[preset_index].derived_int;
+				  _preset_ptr->derived_int;
 	     ESI->EventInfoArray[thisindex].ops =
-				  _papi_hwi_presets[preset_index].postfix;
+				  _preset_ptr->postfix;
              ESI->NumberOfEvents++;
 	     _papi_hwi_map_events_to_native( ESI );
 
 	  }
        }
+
        /* Handle adding Native events */
        else if ( IS_NATIVE(EventCode) ) {
 
@@ -1975,6 +2187,49 @@ _papi_hwi_init_global( int PE_OR_PEU )
 	return PAPI_OK;
 }
 
+
+/*
+ * Routine that initializes the presets for all components other
+ * than perf_event. Ignore perf_event component.
+ */
+int
+_papi_hwi_init_global_presets( void )
+{
+    int retval = PAPI_OK, is_pe, i = 0;
+
+    /* Determine whether or not perf_event is available. */
+    while ( _papi_hwd[i] ) {
+        if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event") == 0) {
+            pe_disabled = 0;
+            break;
+        }
+        i++;
+    }
+
+    if( pe_disabled ) {
+        num_all_presets = PAPI_MAX_PRESET_EVENTS;
+    }
+
+    i = 0;
+    while ( _papi_hwd[i] ) {
+        is_pe = 0;
+        if (strcmp(_papi_hwd[i]->cmp_info.name, "perf_event") == 0) {
+            is_pe = 1;
+        } else {
+            /* Only set the first non-perf_event component with presets once. */
+            if ( -1 == first_comp_with_presets && _papi_hwi_max_presets[i] > 0 ) {
+                first_comp_with_presets = i;
+            }
+        }
+
+        _papi_hwi_start_idx[i] = num_all_presets;
+        num_all_presets += _papi_hwi_max_presets[i];
+        i++;
+    }
+    return retval;
+}
+
+
 /* Machine info struct initialization using defaults */
 /* See _papi_mdi definition in papi_internal.h       */
 
@@ -2292,45 +2547,49 @@ _papi_hwi_get_preset_event_info( int Eve
 {
 	INTDBG("ENTER: EventCode: %#x, info: %p\n", EventCode, info);
 
-	int i = EventCode & PAPI_PRESET_AND_MASK;
 	unsigned int j;
+    hwi_presets_t *_preset_ptr = get_preset(EventCode);
+    if( NULL == _preset_ptr ) {
+        INTDBG("EXIT: preset not found\n");
+        return PAPI_ENOEVNT;
+    }
 
-	if ( _papi_hwi_presets[i].symbol ) {	/* if the event is in the preset table */
+	if ( _preset_ptr->symbol ) {	/* if the event is in the preset table */
       // since we are setting the whole structure to zero the strncpy calls below will 
       // be leaving NULL terminates strings as long as they copy 1 less byte than the 
       // buffer size of the field.
 
-	   INTDBG("ENTER: Configuring: %s\n", _papi_hwi_presets[i].symbol);
+	   INTDBG("ENTER: Configuring: %s\n", _preset_ptr->symbol);
 
 	   memset( info, 0, sizeof ( PAPI_event_info_t ) );
 
 		/* set up eventcode and name */
 	   info->event_code = ( unsigned int ) EventCode;
-	   strncpy( info->symbol, _papi_hwi_presets[i].symbol,
+	   strncpy( info->symbol, _preset_ptr->symbol,
 	            sizeof(info->symbol)-1);
 
 		/* set up short description, if available */
-	   if ( _papi_hwi_presets[i].short_descr != NULL ) {
-	      strncpy( info->short_descr, _papi_hwi_presets[i].short_descr,
+	   if ( _preset_ptr->short_descr != NULL ) {
+	      strncpy( info->short_descr, _preset_ptr->short_descr,
 		          sizeof ( info->short_descr )-1 );
 	   }
 
 		/* set up long description, if available */
-	   if ( _papi_hwi_presets[i].long_descr != NULL ) {
-	      strncpy( info->long_descr,  _papi_hwi_presets[i].long_descr,
+	   if ( _preset_ptr->long_descr != NULL ) {
+	      strncpy( info->long_descr,  _preset_ptr->long_descr,
 		          sizeof ( info->long_descr )-1 );
 	   }
 
-	   info->event_type = _papi_hwi_presets[i].event_type;
-	   info->count = _papi_hwi_presets[i].count;
+	   info->event_type = _preset_ptr->event_type;
+	   info->count = _preset_ptr->count;
 
 
 		/* set up if derived event */
-	   _papi_hwi_derived_string( _papi_hwi_presets[i].derived_int,
+	   _papi_hwi_derived_string( _preset_ptr->derived_int,
 				     info->derived,  sizeof ( info->derived )-1 );
 
-	   if ( _papi_hwi_presets[i].postfix != NULL ) {
-	      strncpy( info->postfix, _papi_hwi_presets[i].postfix,
+	   if ( _preset_ptr->postfix != NULL ) {
+	      strncpy( info->postfix, _preset_ptr->postfix,
 		          sizeof ( info->postfix )-1 );
 	   }
 
@@ -2342,22 +2601,34 @@ _papi_hwi_get_preset_event_info( int Eve
 		/* ideally that should never happen, but also ideally */
 		/* we wouldn't segfault if it does */
 
-	      if (_papi_hwi_presets[i].name[j]==NULL) {
-		INTDBG("ERROR in event definition of %s\n", _papi_hwi_presets[i].symbol);
+	      if (_preset_ptr->name[j]==NULL) {
+		INTDBG("ERROR in event definition of %s\n", _preset_ptr->symbol);
 			   return PAPI_ENOEVNT;
 		}
 		else {
-			info->code[j]=_papi_hwi_presets[i].code[j];
-			strncpy(info->name[j], _papi_hwi_presets[i].name[j],
+			info->code[j]=_preset_ptr->code[j];
+			strncpy(info->name[j], _preset_ptr->name[j],
 				sizeof(info->name[j])-1);
 		}
 	   }
 
-	   if ( _papi_hwi_presets[i].note != NULL ) {
-	      strncpy( info->note, _papi_hwi_presets[i].note,
+	   if ( _preset_ptr->note != NULL ) {
+	      strncpy( info->note, _preset_ptr->note,
 		          sizeof ( info->note )-1 );
 	   }
 
+       /* Copy the qualifiers and their associated descriptions into
+        * the info struct. */
+       int k;
+	   for( k = 0; k < _preset_ptr->num_quals; ++k ) {
+	      strncpy( info->quals[k], _preset_ptr->quals[k],
+		          sizeof ( info->quals[k] )-1 );
+	      strncpy( info->quals_descrs[k], _preset_ptr->quals_descrs[k],
+		          sizeof ( info->quals_descrs[k] )-1 );
+	   }
+       info->num_quals = _preset_ptr->num_quals;
+       info->component_index = _preset_ptr->component_index;
+
 	   return PAPI_OK;
 	} else {
 	   return PAPI_ENOEVNT;
diff -pruN 7.2.0~b2-1/src/papi_internal.h 7.2.0-1/src/papi_internal.h
--- 7.2.0~b2-1/src/papi_internal.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_internal.h	2025-06-25 22:38:10.000000000 +0000
@@ -98,6 +98,7 @@ extern char **_papi_errlist;
 #define DONT_NEED_CONTEXT 	0
 
 #define PAPI_EVENTS_IN_DERIVED_EVENT	8
+#define PAPI_MAX_COMP_QUALS	8
 
 
 /* these vestigial pointers are to structures defined in the components
@@ -447,6 +448,7 @@ void _papi_hwi_free_papi_event_string();
 void _papi_hwi_set_papi_event_code (unsigned int event_code, int update_flag);
 unsigned int _papi_hwi_get_papi_event_code (void);
 int _papi_hwi_get_ntv_idx (unsigned int papi_evt_code);
+const char *_papi_hwi_strip_component_prefix(const char *event_name);
 int _papi_hwi_is_sw_multiplex( EventSetInfo_t * ESI );
 hwd_context_t *_papi_hwi_get_context( EventSetInfo_t * ESI, int *is_dirty );
 
@@ -468,6 +470,7 @@ int _papi_hwi_read( hwd_context_t * cont
 int _papi_hwi_cleanup_eventset( EventSetInfo_t * ESI );
 int _papi_hwi_convert_eventset_to_multiplex( _papi_int_multiplex_t * mpx );
 int _papi_hwi_init_global( int PE_OR_PEU );
+int _papi_hwi_init_global_presets( void );
 int _papi_hwi_init_global_internal( void );
 int _papi_hwi_init_os(void);
 void _papi_hwi_init_errors(void);
@@ -522,4 +525,10 @@ int _papi_hwi_enum_dev_type(int enum_mod
 int _papi_hwi_get_dev_type_attr(void *handle, PAPI_dev_type_attr_e attr, void *val);
 int _papi_hwi_get_dev_attr(void *handle, int id, PAPI_dev_attr_e attr, void *val);
 
+int construct_qualified_event(hwi_presets_t *prstPtr);
+int overwrite_qualifiers(hwi_presets_t *prstPtr, const char *in, int is_preset);
+int get_first_cmp_preset_idx( void );
+int get_preset_cmp( unsigned int *index );
+hwi_presets_t* get_preset( int event_code );
+
 #endif /* PAPI_INTERNAL_H */
diff -pruN 7.2.0~b2-1/src/papi_memory.c 7.2.0-1/src/papi_memory.c
--- 7.2.0~b2-1/src/papi_memory.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_memory.c	2025-06-25 22:38:10.000000000 +0000
@@ -47,7 +47,7 @@ static pmem_t *mem_head = NULL;
 
 /* Local Prototypes */
 static pmem_t *get_mem_ptr( void *ptr );
-static pmem_t *init_mem_ptr( void *, int, char *, int );
+static pmem_t *init_mem_ptr( void *, int, const char *, int );
 static void insert_mem_ptr( pmem_t * );
 static void remove_mem_ptr( pmem_t * );
 static int set_epilog( pmem_t * mem_ptr );
@@ -74,7 +74,7 @@ static int set_epilog( pmem_t * mem_ptr
  * Checks for NULL pointers and returns NULL if error.
  */
 void *
-_papi_realloc( char *file, int line, void *ptr, size_t size )
+_papi_realloc( const char *file, int line, void *ptr, size_t size )
 {
 	size_t nsize = size + MEM_PROLOG;
 	pmem_t *mem_ptr;
@@ -110,7 +110,7 @@ _papi_realloc( char *file, int line, voi
 }
 
 void *
-_papi_calloc( char *file, int line, size_t nmemb, size_t size )
+_papi_calloc( const char *file, int line, size_t nmemb, size_t size )
 {
 	void *ptr = _papi_malloc( file, line, size * nmemb );
 
@@ -121,7 +121,7 @@ _papi_calloc( char *file, int line, size
 }
 
 void *
-_papi_malloc( char *file, int line, size_t size )
+_papi_malloc( const char *file, int line, size_t size )
 {
 	void *ptr;
 	void **tmp;
@@ -166,7 +166,7 @@ _papi_malloc( char *file, int line, size
 }
 
 char *
-_papi_strdup( char *file, int line, const char *s )
+_papi_strdup( const char *file, int line, const char *s )
 {
 	size_t size;
 	char *ptr;
@@ -188,7 +188,7 @@ _papi_strdup( char *file, int line, cons
 /** Only frees the memory if PAPI malloced it 
   * returns 1 if pointer was valid; 0 if not */
 int
-_papi_valid_free( char *file, int line, void *ptr )
+_papi_valid_free( const char *file, int line, void *ptr )
 {
 	pmem_t *tmp;
 	int valid = 0;
@@ -223,7 +223,7 @@ _papi_valid_free( char *file, int line,
 
 /** Frees up the ptr */
 void
-_papi_free( char *file, int line, void *ptr )
+_papi_free( const char *file, int line, void *ptr )
 {
 	pmem_t *mem_ptr = get_mem_ptr( ptr );
 
@@ -355,7 +355,7 @@ get_mem_ptr( void *ptr )
 
 /* Allocate and initialize a memory pointer */
 pmem_t *
-init_mem_ptr( void *ptr, int size, char *file, int line )
+init_mem_ptr( void *ptr, int size, const char *file, int line )
 {
 	pmem_t *mem_ptr = NULL;
 	if ( ( mem_ptr = ( pmem_t * ) malloc( sizeof ( pmem_t ) ) ) == NULL )
diff -pruN 7.2.0~b2-1/src/papi_memory.h 7.2.0-1/src/papi_memory.h
--- 7.2.0~b2-1/src/papi_memory.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_memory.h	2025-06-25 22:38:10.000000000 +0000
@@ -45,12 +45,12 @@ typedef struct pmem
 #endif
 #endif
 
-void *_papi_malloc( char *, int, size_t );
-void _papi_free( char *, int, void * );
-void *_papi_realloc( char *, int, void *, size_t );
-void *_papi_calloc( char *, int, size_t, size_t );
-int _papi_valid_free( char *, int, void * );
-char *_papi_strdup( char *, int, const char *s );
+void *_papi_malloc( const char *, int, size_t );
+void _papi_free( const char *, int, void * );
+void *_papi_realloc( const char *, int, void *, size_t );
+void *_papi_calloc( const char *, int, size_t, size_t );
+int _papi_valid_free( const char *, int, void * );
+char *_papi_strdup( const char *, int, const char *s );
 void _papi_mem_cleanup_all(  );
 void _papi_mem_print_info( void *ptr );
 void _papi_mem_print_stats(  );
diff -pruN 7.2.0~b2-1/src/papi_preset.c 7.2.0-1/src/papi_preset.c
--- 7.2.0~b2-1/src/papi_preset.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_preset.c	2025-06-25 22:38:10.000000000 +0000
@@ -9,7 +9,6 @@
 *          Merge of the libpfm3/libpfm4/pmapi-ppc64_events preset code
 */
 
-
 #include <string.h>
 #include <ctype.h>
 #include <errno.h>
@@ -25,8 +24,11 @@
 // A place to put user defined events
 extern hwi_presets_t user_defined_events[];
 extern int user_defined_events_count;
+extern int num_all_presets;
+extern int _papi_hwi_start_idx[PAPI_NUM_COMP];
 
 static int papi_load_derived_events (char *pmu_str, int pmu_type, int cidx, int preset_flag);
+static int papi_load_derived_events_component (char *comp_str, char *arch_str, int cidx);
 
 
 /* This routine copies values from a dense 'findem' array of events
@@ -87,6 +89,10 @@ _papi_hwi_setup_all_presets( hwi_search_
 
 	   INTDBG( "This preset has %d terms.\n", j );
 	   _papi_hwi_presets[preset_index].count = j;
+
+       // Set the component index to that of the first native event used to define it.
+       // Make sure we later check that all native events in a preset come from same comp.
+	   _papi_hwi_presets[preset_index].component_index = _papi_hwi_component_index(findem[pnum].native[0]);
  
            _papi_hwi_presets[preset_index].derived_int = findem[pnum].derived;
 	   for(k=0;k<j;k++) {
@@ -113,27 +119,36 @@ _papi_hwi_setup_all_presets( hwi_search_
 int
 _papi_hwi_cleanup_all_presets( void )
 {
-        int preset_index,cidx;
-	unsigned int j;
-
-	for ( preset_index = 0; preset_index < PAPI_MAX_PRESET_EVENTS;
-		  preset_index++ ) {
-	    if ( _papi_hwi_presets[preset_index].postfix != NULL ) {
-	       papi_free( _papi_hwi_presets[preset_index].postfix );
-	       _papi_hwi_presets[preset_index].postfix = NULL;
-	    }
-	    if ( _papi_hwi_presets[preset_index].note != NULL ) {
-	       papi_free( _papi_hwi_presets[preset_index].note );
-	       _papi_hwi_presets[preset_index].note = NULL;
-	    }
-	    for(j=0; j<_papi_hwi_presets[preset_index].count;j++) {
-           papi_free(_papi_hwi_presets[preset_index].name[j]);
-	    }
-	}
-
-	for(cidx=0;cidx<papi_num_components;cidx++) {
-	   _papi_hwd[cidx]->cmp_info.num_preset_events = 0;
-	}
+    int preset_index,cidx;
+    unsigned int j;
+    hwi_presets_t *_papi_hwi_list;
+
+    for(cidx=0;cidx<papi_num_components;cidx++) {
+      _papi_hwi_list = _papi_hwi_comp_presets[cidx];
+      for ( preset_index = 0; preset_index < _papi_hwi_max_presets[cidx]; preset_index++ ) {
+        /* Free the postfix. */
+        if ( _papi_hwi_list[preset_index].postfix != NULL ) {
+           papi_free( _papi_hwi_list[preset_index].postfix );
+           _papi_hwi_list[preset_index].postfix = NULL;
+        }
+        /* Free the note. */
+        if ( _papi_hwi_list[preset_index].note != NULL ) {
+           papi_free( _papi_hwi_list[preset_index].note );
+           _papi_hwi_list[preset_index].note = NULL;
+        }
+        /* Free the event names used to define the preset. */
+        for(j=0; j<_papi_hwi_list[preset_index].count;j++) {
+           free(_papi_hwi_list[preset_index].name[j]);
+           free(_papi_hwi_list[preset_index].base_name[j]);
+           free(_papi_hwi_list[preset_index].default_name[j]);
+        }
+        /* Free the qualifier names and descriptions. */
+        for(j=0; j<_papi_hwi_list[preset_index].num_quals;j++) {
+           free(_papi_hwi_list[preset_index].quals[j]);
+           free(_papi_hwi_list[preset_index].quals_descrs[j]);
+        }
+      }
+    }
 
 #if defined(ITANIUM2) || defined(ITANIUM3)
 	/* NOTE: This memory may need to be freed for BG/P builds as well */
@@ -718,16 +733,8 @@ check_native_events(char *target, hwi_pr
 		return 0;
 	}
 
-	// if this native event is not for component 0, return to show it can not be used in derived events
-	// it should be possible to create derived events for other components as long as all events in the derived event are associated with the same component
-	if ( _papi_hwi_component_index(results->code[results->count]) != 0 ) {
-		INTDBG( "EXIT: returned: 0, new event not associated with component 0 (current limitation with derived events)\n");
-		return 0;
-	}
-
 	//	  found = 1;
 	INTDBG("\tFound a native event %s\n", target);
-	results->name[results->count++] = papi_strdup(target);
 
 	INTDBG( "EXIT: returned: 1\n");
 	return 1;
@@ -787,6 +794,22 @@ int _papi_load_preset_table(char *pmu_st
 	return retval;
 }
 
+int _papi_load_preset_table_component(char *comp_str, char *arch_str, int cidx) {
+	SUBDBG("ENTER: arch_str: %s, cidx: %d\n", arch_str, cidx);
+
+	int retval;
+
+	// go load papi preset events for component index 'cidx'
+	retval = papi_load_derived_events_component(comp_str, arch_str, cidx);
+	if (retval != PAPI_OK) {
+		SUBDBG("EXIT: retval: %d\n", retval);
+		return retval;
+	}
+
+	SUBDBG("EXIT: retval: %d\n", retval);
+	return retval;
+}
+
 // global variables
 static char stack[2*PAPI_HUGE_STR_LEN]; // stack
 static int stacktop = -1; // stack length
@@ -966,6 +989,7 @@ papi_load_derived_events (char *pmu_str,
 	int preset = 0;
 	int get_events = 0; /* only process derived events after CPU type they apply to is identified      */
 	int found_events = 0; /* flag to track if event definitions (PRESETS) are found since last CPU declaration */
+    int breakAfter = 0; /* flag to break parsing events file if component 'arch' has already been parsed */
 #ifdef PAPI_DATADIR
 		char path[PATH_MAX];
 #endif
@@ -1064,6 +1088,8 @@ papi_load_derived_events (char *pmu_str,
 			if (strcasecmp(t, pmu_name) == 0) {
 				int type;
 
+                breakAfter = 1;
+
 				SUBDBG( "Process events for PMU %s found at line %d of %s.\n", t, line_no, name);
 
 				t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
@@ -1078,9 +1104,7 @@ papi_load_derived_events (char *pmu_str,
 				}
 			}
 			continue;
-		}
-
-		if ((strcasecmp(t, "PRESET") == 0)  || (strcasecmp(t, "EVENT") == 0)) {
+		} else if ((strcasecmp(t, "PRESET") == 0)  || (strcasecmp(t, "EVENT") == 0)) {
 
 			if (get_events == 0)
 				continue;
@@ -1106,6 +1130,8 @@ papi_load_derived_events (char *pmu_str,
 			(void) preset;
 
 			SUBDBG( "Use event code: %#x for %s\n", preset, t);
+            unsigned int preset_index = ( preset & PAPI_PRESET_AND_MASK );
+	        _papi_hwi_presets[preset_index].component_index = cidx;
 
 			t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
 			if ((t == NULL) || (strlen(t) == 0)) {
@@ -1192,6 +1218,14 @@ papi_load_derived_events (char *pmu_str,
 					break;
 				}
 
+                /* If it is a valid event, then update the preset fields here. */
+                /* Initially, the event name should be those with a default, mandatory qualifiers. */
+                results[res_idx].name[results[res_idx].count]         = strdup(t);
+                results[res_idx].base_name[results[res_idx].count]    = strdup(t);
+                results[res_idx].default_name[results[res_idx].count] = strdup(t);
+                results[res_idx].default_code[results[res_idx].count] = results[res_idx].code[results[res_idx].count];
+                results[res_idx].count++;
+
 				i++;
 			} while (results[res_idx].count < PAPI_EVENTS_IN_DERIVED_EVENT);
 
@@ -1275,10 +1309,13 @@ papi_load_derived_events (char *pmu_str,
 				} while (t != NULL);
 			}
 			(*event_count)++;
+
 			continue;
-		}
+		} else {
+            if( breakAfter ) break; // Break this while-loop once all presets for the given component's arch have been parsed.
+        }
 
-		PAPIERROR("Unrecognized token %s at line %d of %s -- ignoring", t, line_no, name);
+		//PAPIERROR("Unrecognized token %s at line %d of %s -- ignoring", t, line_no, name);
 	}
 
 	if (event_file) {
@@ -1290,6 +1327,481 @@ papi_load_derived_events (char *pmu_str,
 }
 
 
+static int
+papi_load_derived_events_component (char *comp_str, char *arch_str, int cidx) {
+	SUBDBG( "ENTER: arch_str: %s, cidx: %d\n", arch_str, cidx);
+
+	char arch_name[PAPI_MIN_STR_LEN];
+	char line[LINE_MAX];
+	char name[PATH_MAX] = "builtin papi_events_table";
+	char *event_file_path=NULL;
+	char *event_table_ptr=NULL;
+	int event_type_bits = 0;
+	char *tmpn;
+	char *tok_save_ptr=NULL;
+	FILE *event_file = NULL;
+	hwi_presets_t *results=NULL;
+	int result_size = 0;
+	int *event_count = NULL;
+	int invalid_event;
+	int line_no = 0;  /* count of lines read from event definition input */
+	int derived = 0;
+	int res_idx = 0;  /* index into results array for where to store next event */
+	int preset = 0;
+	int get_events = 0; /* only process derived events after CPU type they apply to is identified      */
+	int found_events = 0; /* flag to track if event definitions (PRESETS) are found since last CPU declaration */
+    int breakAfter = 0; /* flag to break parsing events file if component 'arch' has already been parsed */
+    int status = 0;
+#ifdef PAPI_DATADIR
+		char path[PATH_MAX];
+#endif
+
+
+	/* try the environment variable first */
+	if ((tmpn = getenv("PAPI_CSV_EVENT_FILE")) && (strlen(tmpn) > 0)) {
+		event_file_path = tmpn;
+	}
+	/* if no valid environment variable, look for built-in table */
+	else if (papi_events_table) {
+		event_table_ptr = papi_events_table;
+	}
+	/* if no env var and no built-in, search for default file */
+	else {
+#ifdef PAPI_DATADIR
+		sprintf( path, "%s/%s", PAPI_DATADIR, PAPI_EVENT_FILE );
+		event_file_path = path;
+#else
+		event_file_path = PAPI_EVENT_FILE;
+#endif
+	}
+	event_type_bits = PAPI_PRESET_MASK;
+	results = &_papi_hwi_comp_presets[cidx][0];
+	result_size = _papi_hwi_max_presets[cidx];
+	event_count = &_papi_hwd[cidx]->cmp_info.num_preset_events;
+
+	// if we have an event file pathname, open it and read event definitions from the file
+	if (event_file_path != NULL) {
+		if ((event_file = open_event_table(event_file_path)) == NULL) {
+			// if file open fails, return an error
+			SUBDBG("EXIT: Event file open failed.\n");
+			return PAPI_ESYS;
+		}
+		strncpy(name, event_file_path, sizeof(name)-1);
+		name[sizeof(name)-1] = '\0';
+	} else if (event_table_ptr == NULL) {
+		// if we do not have a path name or table pointer, return an error
+		SUBDBG("EXIT: Both event_file_path and event_table_ptr are NULL.\n");
+		return PAPI_ESYS;
+	}
+
+	/* copy the arch identifier, stripping commas if found */
+	tmpn = arch_name;
+	while (*arch_str) {
+		if (*arch_str != ',')
+			*tmpn++ = *arch_str;
+		arch_str++;
+	}
+	*tmpn = '\0';
+
+	/* at this point we have either a valid file pointer or built-in table pointer */
+	while (get_event_line(line, event_file, &event_table_ptr)) {
+		char *t;
+		int i;
+
+		// increment number of lines we have read
+		line_no++;
+
+		t = trim_string(strtok_r(line, ",", &tok_save_ptr));
+
+		/* Skip blank lines */
+		if ((t == NULL) || (strlen(t) == 0))
+			continue;
+
+		/* Skip comments */
+		if (t[0] == '#') {
+			continue;
+		}
+
+		if (strcasecmp(t, comp_str) == 0) {
+			if (get_events != 0 && found_events != 0) {
+				SUBDBG( "Ending event scanning at line %d of %s.\n", line_no, name);
+				get_events = 0;
+				found_events = 0;
+			}
+
+			t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+			if ((t == NULL) || (strlen(t) == 0)) {
+				PAPIERROR("Expected name after component-name token at line %d of %s -- ignoring", line_no, name);
+				continue;
+			}
+
+			if (strcasecmp(t, arch_name) == 0) {
+				int type;
+
+                breakAfter = 1;
+
+				SUBDBG( "Process events for ARCH %s found at line %d of %s.\n", t, line_no, name);
+
+				t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+				if ((t == NULL) || (strlen(t) == 0)) {
+					SUBDBG("No additional qualifier found, matching on string.\n");
+					get_events = 1;
+				}
+			}
+			continue;
+		} else if ((strcasecmp(t, "PRESET") == 0)  || (strcasecmp(t, "EVENT") == 0)) {
+
+			if (get_events == 0)
+				continue;
+
+			found_events = 1;
+			t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+
+			if ((t == NULL) || (strlen(t) == 0)) {
+				PAPIERROR("Expected name after PRESET token at line %d of %s -- ignoring", line_no, name);
+				continue;
+			}
+
+			SUBDBG( "Examining event %s\n", t);
+
+			// see if this event already exists in the results array, if not already known it sets up event in unused entry
+			if ((res_idx = find_event_index (results, result_size, t)) < 0) {
+				PAPIERROR("No room left for event %s -- ignoring", t);
+				continue;
+			}
+
+
+			// add the proper event bits (preset or user defined bits)
+			preset = res_idx | event_type_bits;
+			(void) preset;
+
+			SUBDBG( "Use event code: %#x for %s\n", preset, t);
+            unsigned int preset_index = ( preset & PAPI_PRESET_AND_MASK );
+	        _papi_hwi_comp_presets[cidx][preset_index].component_index = cidx;
+
+			t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+			if ((t == NULL) || (strlen(t) == 0)) {
+				// got an error, make this entry unused
+                if (results[res_idx].symbol != NULL){
+                    papi_free (results[res_idx].symbol);
+                    results[res_idx].symbol = NULL;
+                }
+				PAPIERROR("Expected derived type after PRESET token at line %d of %s -- ignoring", line_no, name);
+				continue;
+			}
+
+			if (_papi_hwi_derived_type(t, &derived) != PAPI_OK) {
+				// got an error, make this entry unused
+                if (results[res_idx].symbol != NULL){
+                    papi_free (results[res_idx].symbol);
+                    results[res_idx].symbol = NULL;
+                }
+				PAPIERROR("Invalid derived name %s after PRESET token at line %d of %s -- ignoring", t, line_no, name);
+				continue;
+			}
+
+			/****************************************/
+			/* Have an event, let's start assigning */
+			/****************************************/
+
+			SUBDBG( "Adding event: %s, code: %#x, derived: %d results[%d]: %p.\n", t, preset, derived, res_idx, &results[res_idx]);
+
+			results[res_idx].derived_int = derived;
+
+			/* Derived support starts here */
+			/* Special handling for postfix and infix */
+			if ((derived == DERIVED_POSTFIX)  || (derived == DERIVED_INFIX)) {
+				t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+				if ((t == NULL) || (strlen(t) == 0)) {
+					// got an error, make this entry unused
+                    if (results[res_idx].symbol != NULL){
+                        papi_free (results[res_idx].symbol);
+                        results[res_idx].symbol = NULL;
+                    }
+					PAPIERROR("Expected Operation string after derived type DERIVED_POSTFIX or DERIVED_INFIX at line %d of %s -- ignoring", line_no, name);
+					continue;
+				}
+
+				// if it is an algebraic formula, we need to convert it to postfix
+				if (derived == DERIVED_INFIX) {
+					SUBDBG( "Converting InFix operations %s\n", t);
+					t = infix_to_postfix( t );
+					results[res_idx].derived_int = DERIVED_POSTFIX;
+				}
+
+				SUBDBG( "Saving PostFix operations %s\n", t);
+				results[res_idx].postfix = papi_strdup(t);
+			}
+
+			/* All derived terms collected here */
+			i = 0;
+			invalid_event = 0;
+			results[res_idx].count = 0;
+            int firstTerm = 1;
+			do {
+				t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+				if ((t == NULL) || (strlen(t) == 0))
+					break;
+				if (strcasecmp(t, "NOTE") == 0)
+					break;
+				if (strcasecmp(t, "LDESC") == 0)
+					break;
+				if (strcasecmp(t, "SDESC") == 0)
+					break;
+
+				SUBDBG( "Adding term (%d) %s to derived event %#x, current native event count: %d.\n", i, t, preset, results[res_idx].count);
+
+				// show that we do not have an event code yet (the component may create one and update this info)
+				// this also clears any values left over from a previous call
+				_papi_hwi_set_papi_event_code(-1, -1);
+
+                unsigned int eventCode;
+                char *tmpEvent, *tmpQuals;
+                char *qualDelim = ":";
+                PAPI_event_info_t eventInfo;
+                hwi_presets_t *prstPtr = &(_papi_hwi_comp_presets[cidx][preset_index]);
+
+                if( firstTerm ) {
+
+                    // Convert native event to code and check that it's valid.
+                    status = _papi_hwi_native_name_to_code(t, &eventCode);
+                    if( status != PAPI_OK ) {
+					    invalid_event = 1;
+					    PAPIERROR("Failed to get code for native event %s, used in derived event %s", t, results[res_idx].symbol);
+					    break;
+                    }
+
+                    // Call get_event_info, and use the qualifier string that comes after the
+                    // single instance of ":" and the description that comes after "masks:"
+                    status = _papi_hwi_get_native_event_info( (unsigned int)eventCode, &eventInfo );
+                    if ( status != PAPI_OK ) {
+					    invalid_event = 1;
+					    PAPIERROR("Failed to get info for native event %s, used in derived event %s", t, results[res_idx].symbol);
+					    break;
+                    }
+
+                    /* Get the qualifiers. */
+                    char *wholeName = strdup(eventInfo.symbol);
+                    char *qualPtr = strtok( wholeName, qualDelim );
+
+                    /* Skip over PMU name or component prefix. */
+                    qualPtr = strtok( NULL, qualDelim );
+
+                    /* Skip over basename. */
+                    qualPtr = strtok( NULL, qualDelim );
+
+                    while( qualPtr != NULL ) {
+
+                        /* Store the qualifier in the preset struct. */
+                        size_t qualLen = 1+strlen(qualDelim)+strlen(qualPtr);
+                        prstPtr->quals[prstPtr->num_quals] = (char*)malloc(qualLen*sizeof(char));
+                        if( NULL != prstPtr->quals[prstPtr->num_quals] ) {
+                            status = snprintf(prstPtr->quals[prstPtr->num_quals], qualLen, "%s%s", qualDelim, qualPtr);
+                            if( status < 0 || status >= qualLen ) {
+                                invalid_event = 1;
+                                PAPIERROR("Failed to store qualifier for native event %s,",
+                                          " used in derived event %s",
+                                           t, results[res_idx].symbol);
+                                break;
+                            }
+                            prstPtr->num_quals++;
+                        }
+                        qualPtr = strtok( NULL, qualDelim );
+                    }
+                    free(wholeName);
+
+                    /* Get the qualifier descriptions. */
+                    int count = 0;
+                    char *desc = strdup(eventInfo.long_descr);
+                    char *descStart = strstr( desc, "masks:" );
+                    char *descPtr = strtok( descStart, qualDelim );
+
+                    /* Skip over 'masks'. */
+                    descPtr = strtok( NULL, qualDelim );
+
+                    while( descPtr != NULL ) {
+
+                        /* Store the qualifier's description in the preset struct. */
+                        size_t descLen = 1+strlen(descPtr);
+                        prstPtr->quals_descrs[count] = (char*)malloc(descLen*sizeof(char));
+                        if( NULL != prstPtr->quals_descrs[count] ) {
+                            status = snprintf(prstPtr->quals_descrs[count], descLen, "%s", descPtr);
+                            if( status < 0 || status >= descLen ) {
+                                invalid_event = 1;
+                                PAPIERROR("Failed to store qualifier description for native event %s,",
+                                          " used in derived event %s",
+                                          t, results[res_idx].symbol);
+                                break;
+                            }
+                            count++;
+                        }
+                        descPtr = strtok( NULL, qualDelim );
+                    }
+                    free(desc);
+
+                    firstTerm = 0;
+                }
+
+                char *localname = strdup(t);
+                char *basename  = strtok(localname, ":");
+                basename = strtok(NULL, ":");
+                if( NULL == basename ) {
+                    basename = t;
+                }
+
+                /* Keep track of all qualifiers provided in the papi_events.csv file. */
+                status = overwrite_qualifiers(prstPtr, t, 0);
+                if( status < 0 ) {
+                    invalid_event = 1;
+                }
+
+                /* Construct event with all qualifiers. */
+                int k, strLenSum = 0, baseLen = 1+strlen(basename);
+                for (k = 0; k < prstPtr->num_quals; k++){
+                    strLenSum += strlen(prstPtr->quals[k]);
+                }
+                strLenSum += baseLen;
+
+                /* Allocate space for constructing fully qualified event. */
+                tmpEvent = (char*)malloc(strLenSum*sizeof(char));
+                tmpQuals = (char*)malloc(strLenSum*sizeof(char));
+
+                if( NULL == tmpQuals || NULL == tmpEvent ) {
+                    SUBDBG("EXIT: Could not allocate memory.\n");
+                    return PAPI_ENOMEM;
+                }
+
+                /* Print the basename to a string. */
+                status = snprintf(tmpEvent, baseLen, "%s", basename);
+                if( status < 0 || status >= baseLen ) {
+                    invalid_event = 1;
+                    PAPIERROR("Event basename %s was truncated to %s in derived event %s",
+                               basename, tmpEvent, results[res_idx].symbol);
+                    return PAPI_ENOMEM;
+                }
+
+                /* Concatenate the qualifiers onto the string. */
+                status = 0;
+                for (k = 0; k < prstPtr->num_quals; k++) {
+                    status = snprintf(tmpQuals, strLenSum, "%s%s", tmpEvent, prstPtr->quals[k]);
+                    strcpy(tmpEvent, tmpQuals);
+                }
+                if( status < 0 || status >= strLenSum ) {
+                    invalid_event = 1;
+                    PAPIERROR("Event %s with qualifiers was truncated to %s in derived event %s",
+                              basename, tmpEvent, results[res_idx].symbol);
+                    return PAPI_ENOMEM;
+                }
+
+				// make sure that this term in the derived event is a valid event name
+				// this call replaces preset and user event names with the equivalent native events in our results table
+				// it also updates formulas for derived events so that they refer to the correct native event index
+				if (is_event(tmpEvent, results[res_idx].derived_int, &results[res_idx], i) == 0) {
+					invalid_event = 1;
+					PAPIERROR("Missing event %s, used in derived event %s", basename, results[res_idx].symbol);
+					break;
+				}
+
+                /* If it is a valid event, then update the preset fields here. */
+                /* Initially, the event name should be those with a default, mandatory qualifiers. */
+                results[res_idx].name[results[res_idx].count]         = strdup(tmpEvent);
+                results[res_idx].base_name[results[res_idx].count]    = strdup(basename);
+                results[res_idx].default_name[results[res_idx].count] = strdup(tmpEvent);
+                results[res_idx].default_code[results[res_idx].count] = results[res_idx].code[results[res_idx].count];
+                results[res_idx].count++;
+
+                /* Free dynamically allocated strings. */
+                free(tmpQuals);
+                free(tmpEvent);
+                free(localname);
+
+				i++;
+
+			} while (results[res_idx].count < PAPI_EVENTS_IN_DERIVED_EVENT);
+
+			/* preset code list must be PAPI_NULL terminated */
+			if (i < PAPI_EVENTS_IN_DERIVED_EVENT) {
+				results[res_idx].code[results[res_idx].count] = PAPI_NULL;
+			}
+
+			if (invalid_event) {
+				// got an error, make this entry unused
+			        // preset table is statically allocated, user defined is dynamic
+                unsigned int j;
+                for (j = 0; j < results[res_idx].count; j++){
+                    if (results[res_idx].name[j] != NULL){
+                        papi_free( results[res_idx].name[j] );
+                        results[res_idx].name[j] = NULL;
+                    }
+                }
+
+				continue;
+			}
+
+			/* End of derived support */
+
+			// if we did not find any terms to base this derived event on, report error
+			if (i == 0) {
+				// got an error, make this entry unused
+                PAPIERROR("Expected PFM event after DERIVED token at line %d of %s -- ignoring", line_no, name);
+				continue;
+			}
+
+			if (i == PAPI_EVENTS_IN_DERIVED_EVENT) {
+				t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+			}
+
+			// if something was provided following the list of events to be used by the operation, process it
+			if ( t!= NULL  && strlen(t) > 0 ) {
+				do {
+					// save the field name
+					char *fptr = papi_strdup(t);
+
+					// get the value to be used with this field
+					t = trim_note(strtok_r(NULL, ",", &tok_save_ptr));
+					if ( t== NULL  || strlen(t) == 0 ) {
+						papi_free(fptr);
+						break;
+					}
+
+					// Handle optional short descriptions, long descriptions and notes
+					if (strcasecmp(fptr, "SDESC") == 0) {
+						results[res_idx].short_descr = papi_strdup(t);
+					}
+					if (strcasecmp(fptr, "LDESC") == 0) {
+						results[res_idx].long_descr = papi_strdup(t);
+					}
+					if (strcasecmp(fptr, "NOTE") == 0) {
+						results[res_idx].note = papi_strdup(t);
+					}
+
+					SUBDBG( "Found %s (%s) on line %d\n", fptr, t, line_no);
+					papi_free (fptr);
+
+					// look for another field name
+					t = trim_string(strtok_r(NULL, ",", &tok_save_ptr));
+					if ( t== NULL  || strlen(t) == 0 ) {
+						break;
+					}
+				} while (t != NULL);
+			}
+			(*event_count)++;
+
+			continue;
+		} else {
+            if( breakAfter ) break; // Break this while-loop once all presets for the given component's 'arch' have been parsed.
+        }
+
+		//PAPIERROR("Unrecognized token %s at line %d of %s -- ignoring", t, line_no, name);
+	}
+
+	if (event_file) {
+		fclose(event_file);
+	}
+
+	SUBDBG("EXIT: Done processing derived event file.\n");
+	return PAPI_OK;
+}
 
 
 /* The following code is proof of principle for reading preset events from an
diff -pruN 7.2.0~b2-1/src/papi_preset.h 7.2.0-1/src/papi_preset.h
--- 7.2.0~b2-1/src/papi_preset.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_preset.h	2025-06-25 22:38:10.000000000 +0000
@@ -29,9 +29,16 @@ typedef struct hwi_presets {
    unsigned int count;
    unsigned int event_type;
    char *postfix;
-   unsigned int code[PAPI_MAX_INFO_TERMS];
-   char *name[PAPI_MAX_INFO_TERMS];
+   unsigned int code[PAPI_MAX_INFO_TERMS];          // Active code for each native event.
+   char *name[PAPI_MAX_INFO_TERMS];                 // Active name for each native event.
+   char *base_name[PAPI_MAX_INFO_TERMS];            // Unqualified native event name.
+   unsigned int default_code[PAPI_MAX_INFO_TERMS];  // Codes for names with mandatory quals included.
+   char *default_name[PAPI_MAX_INFO_TERMS];         // Name of native events with mandatory quals included.
    char *note;
+   int component_index;
+   int num_quals;
+   char *quals[PAPI_MAX_COMP_QUALS];
+   char *quals_descrs[PAPI_MAX_COMP_QUALS];
 } hwi_presets_t;
 
 
@@ -49,7 +56,10 @@ int _papi_hwi_setup_all_presets( hwi_sea
 int _papi_hwi_cleanup_all_presets( void );
 int _xml_papi_hwi_setup_all_presets( char *arch);
 int _papi_load_preset_table( char *name, int type, int cidx );
+int _papi_load_preset_table_component( char *comp_str, char *name, int cidx );
 
 extern hwi_presets_t _papi_hwi_presets[PAPI_MAX_PRESET_EVENTS];
+extern hwi_presets_t *_papi_hwi_comp_presets[];
+extern int _papi_hwi_max_presets[];
 
 #endif /* _PAPI_PRESET */
diff -pruN 7.2.0~b2-1/src/papi_vector.c 7.2.0-1/src/papi_vector.c
--- 7.2.0~b2-1/src/papi_vector.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/papi_vector.c	2025-06-25 22:38:10.000000000 +0000
@@ -218,7 +218,7 @@ _papi_hwi_innoculate_os_vector( papi_os_
 	if ( !v->update_shlib_info )
 		v->update_shlib_info = ( int ( * )( papi_mdi_t * ) ) vec_int_dummy;
 	if ( !v->get_system_info )
-		v->get_system_info = ( int ( * )(  ) ) vec_int_dummy;
+		v->get_system_info = ( int ( * )( papi_mdi_t * ) ) vec_int_dummy;
 
 	if ( !v->get_memory_info )
 		v->get_memory_info =
diff -pruN 7.2.0~b2-1/src/utils/papi_avail.c 7.2.0-1/src/utils/papi_avail.c
--- 7.2.0~b2-1/src/utils/papi_avail.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/utils/papi_avail.c	2025-06-25 22:38:10.000000000 +0000
@@ -1,29 +1,29 @@
 // Define the papi_avail man page contents.
 /**
   * file papi_avail.c
-  *	@brief papi_avail utility.
+  *    @brief papi_avail utility.
   * @page papi_avail
-  *	@section Name
-  *	papi_avail - provides availability and detailed information for PAPI preset and user defined events.
+  *    @section Name
+  *    papi_avail - provides availability and detailed information for PAPI preset and user defined events.
   *
-  *	@section Synopsis
-  *	papi_avail [-adht] [-e event]
+  *    @section Synopsis
+  *    papi_avail [-adht] [-e event]
   *
-  *	@section Description
-  *	papi_avail is a PAPI utility program that reports information about the 
-  *	current PAPI installation and supported preset and user defined events.
+  *    @section Description
+  *    papi_avail is a PAPI utility program that reports information about the 
+  *    current PAPI installation and supported preset and user defined events.
   *
-  *	@section Options
+  *    @section Options
   * <ul>
-  *		<li>-h	Display help information about this utility.
-  *		<li>-a	Display only the available PAPI events.
+  *        <li>-h    Display help information about this utility.
+  *        <li>-a    Display only the available PAPI events.
   *     <li>-c  Display only the available PAPI events after a check.
-  *		<li>-d	Display PAPI event information in a more detailed format.
-  *		<li>-e < event >	Display detailed event information for the named event. 
-  *			This event can be a preset event, a user defined event, or a native event.
-  *			If the event is a preset or a user defined event the output shows a list of native
-  *			events the event is based on and the formula that is used to compute the events final value.\n
-  *	</ul>
+  *        <li>-d    Display PAPI event information in a more detailed format.
+  *        <li>-e < event >    Display detailed event information for the named event. 
+  *            This event can be a preset event, a user defined event, or a native event.
+  *            If the event is a preset or a user defined event the output shows a list of native
+  *            events the event is based on and the formula that is used to compute the events final value.\n
+  *    </ul>
   *
   * Event filtering options 
   * <ul>
@@ -40,139 +40,139 @@
   *     <li>--msc       Display miscellaneous PAPI preset events
   *     <li>--tlb       Display Translation Lookaside Buffer PAPI preset events
   * </ul>
-  *	@section Bugs
-  *	There are no known bugs in this utility.
-  *	If you find a bug, it should be reported to the PAPI Mailing List at <ptools-perfapi@icl.utk.edu>.
+  *    @section Bugs
+  *    There are no known bugs in this utility.
+  *    If you find a bug, it should be reported to the PAPI Mailing List at <ptools-perfapi@icl.utk.edu>.
   * <br>
-  *	@see PAPI_derived_event_files
+  *    @see PAPI_derived_event_files
   *
   */
 
 // Define the PAPI_derived_event_files man page contents.
 /**
- *	@page PAPI_derived_event_files
- *	@brief Describes derived event definition file syntax.
+ *    @page PAPI_derived_event_files
+ *    @brief Describes derived event definition file syntax.
  *
- *	@section main Derived Events
- *		PAPI provides the ability to define events whose value will be derived from multiple native events.  The list of native
- *		events to be used in a derived event and a formula which describes how to use them is provided in an event definition file.
- *		The PAPI team provides an event definition file which describes all of the supported PAPI preset events.  PAPI also allows
- *		a user to provide an event definition file that describes a set of user defined events which can extend the events PAPI
- *		normally supports.
+ *    @section main Derived Events
+ *        PAPI provides the ability to define events whose value will be derived from multiple native events.  The list of native
+ *        events to be used in a derived event and a formula which describes how to use them is provided in an event definition file.
+ *        The PAPI team provides an event definition file which describes all of the supported PAPI preset events.  PAPI also allows
+ *        a user to provide an event definition file that describes a set of user defined events which can extend the events PAPI
+ *        normally supports.
  *
- *		This page documents the syntax of the commands which can appear in an event definition file.
+ *        This page documents the syntax of the commands which can appear in an event definition file.
  *
  * <br>
- *	@subsection rules General Rules:
- *	<ul>
- *		<li>Blank lines are ignored.</li>
- *		<li>Lines that begin with '#' are comments (they are also ignored).</li>
- *		<li>Names shown inside < > below represent values that must be provided by the user.</li>
- *		<li>If a user provided value contains white space, it must be protected with quotes.</li>
- *	</ul>
+ *    @subsection rules General Rules:
+ *    <ul>
+ *        <li>Blank lines are ignored.</li>
+ *        <li>Lines that begin with '#' are comments (they are also ignored).</li>
+ *        <li>Names shown inside < > below represent values that must be provided by the user.</li>
+ *        <li>If a user provided value contains white space, it must be protected with quotes.</li>
+ *    </ul>
  *
  * <br>
- *	@subsection commands Commands:
- *		@par CPU,\<pmuName\>
- *		Specifies a PMU name which controls if the PRESET and EVENT commands that follow this line should
- *		be processed.  Multiple CPU commands can be entered without PRESET or EVENT commands between them to provide
- *		a list of PMU names to which the derived events that follow will apply.  When a PMU name provided in the list
- *		matches a PMU name known to the running system, the events which follow will be created.  If none of the PMU
- *		names provided in the list match a PMU name on the running system, the events which follow will be ignored.
- *		When a new CPU command follows either a PRESET or EVENT command, the PMU list is rebuilt.<br><br>
- *
- *		@par PRESET,\<eventName\>,\<derivedType\>,\<eventAttr\>,LDESC,\"\<longDesc\>\",SDESC,\"\<shortDesc\>\",NOTE,\"\<note\>\"
- *		Declare a PAPI preset derived event.<br><br>
- *
- *		@par EVENT,\<eventName\>,\<derivedType\>,\<eventAttr\>,LDESC,\"\<longDesc\>\",SDESC,\"\<shortDesc\>\",NOTE,\"\<note\>\"
- *		Declare a user defined derived event.<br><br>
- *
- *		@par Where:
- *		@par pmuName:
- *			The PMU which the following events should apply to.  A list of PMU names supported by your
- *			system can be obtained by running papi_component_avail on your system.<br>
- *		@par eventName:
- *			Specifies the name used to identify this derived event.  This name should be unique within the events on your system.<br>
- *		@par derivedType:
- *			Specifies the kind of derived event being defined (see 'Derived Types' below).<br>
- *		@par eventAttr:
- *			Specifies a formula and a list of base events that are used to compute the derived events value.  The syntax
- *			of this field depends on the 'derivedType' specified above (see 'Derived Types' below).<br>
- *		@par longDesc:
- *			Provides the long description of the event.<br>
- *		@par shortDesc:
- *			Provides the short description of the event.<br>
- *		@par note:
- *			Provides an event note.<br>
- *		@par baseEvent (used below):
- *			Identifies an event on which this derived event is based.  This may be a native event (possibly with event masks),
- *			an already known preset event, or an already known user event.<br>
+ *    @subsection commands Commands:
+ *        @par CPU,\<pmuName\>
+ *        Specifies a PMU name which controls if the PRESET and EVENT commands that follow this line should
+ *        be processed.  Multiple CPU commands can be entered without PRESET or EVENT commands between them to provide
+ *        a list of PMU names to which the derived events that follow will apply.  When a PMU name provided in the list
+ *        matches a PMU name known to the running system, the events which follow will be created.  If none of the PMU
+ *        names provided in the list match a PMU name on the running system, the events which follow will be ignored.
+ *        When a new CPU command follows either a PRESET or EVENT command, the PMU list is rebuilt.<br><br>
+ *
+ *        @par PRESET,\<eventName\>,\<derivedType\>,\<eventAttr\>,LDESC,\"\<longDesc\>\",SDESC,\"\<shortDesc\>\",NOTE,\"\<note\>\"
+ *        Declare a PAPI preset derived event.<br><br>
+ *
+ *        @par EVENT,\<eventName\>,\<derivedType\>,\<eventAttr\>,LDESC,\"\<longDesc\>\",SDESC,\"\<shortDesc\>\",NOTE,\"\<note\>\"
+ *        Declare a user defined derived event.<br><br>
+ *
+ *        @par Where:
+ *        @par pmuName:
+ *            The PMU which the following events should apply to.  A list of PMU names supported by your
+ *            system can be obtained by running papi_component_avail on your system.<br>
+ *        @par eventName:
+ *            Specifies the name used to identify this derived event.  This name should be unique within the events on your system.<br>
+ *        @par derivedType:
+ *            Specifies the kind of derived event being defined (see 'Derived Types' below).<br>
+ *        @par eventAttr:
+ *            Specifies a formula and a list of base events that are used to compute the derived events value.  The syntax
+ *            of this field depends on the 'derivedType' specified above (see 'Derived Types' below).<br>
+ *        @par longDesc:
+ *            Provides the long description of the event.<br>
+ *        @par shortDesc:
+ *            Provides the short description of the event.<br>
+ *        @par note:
+ *            Provides an event note.<br>
+ *        @par baseEvent (used below):
+ *            Identifies an event on which this derived event is based.  This may be a native event (possibly with event masks),
+ *            an already known preset event, or an already known user event.<br>
  *
  * <br>
- *	@subsection notes Notes:
- *		The PRESET command has traditionally been used in the PAPI provided preset definition file.
- *		The EVENT command is intended to be used in user defined event definition files.  The code treats them
- *		the same so they are interchangeable and they can both be used in either event definition file.<br>
+ *    @subsection notes Notes:
+ *        The PRESET command has traditionally been used in the PAPI provided preset definition file.
+ *        The EVENT command is intended to be used in user defined event definition files.  The code treats them
+ *        the same so they are interchangeable and they can both be used in either event definition file.<br>
  *
  * <br>
- *	@subsection types Derived Types:
- *		This describes values allowed in the 'derivedType' field of the PRESET and EVENT commands.  It also
- *		shows the syntax of the 'eventAttr' field for each derived type supported by these commands.
- *		All of the derived events provide a list of one or more events which the derived event is based
- *		on (baseEvent).  Some derived events provide a formula that specifies how to compute the derived
- *		events value using the baseEvents in the list.  The following derived types are supported, the syntax
- *		of the 'eventAttr' parameter for each derived event type is shown in parentheses.<br><br>
- *
- *		@par NOT_DERIVED (\<baseEvent\>):
- *			This derived type defines an alias for the existing event 'baseEvent'.<br>
- *		@par DERIVED_ADD (\<baseEvent1\>,\<baseEvent2\>):
- *			This derived type defines a new event that will be the sum of two other
- *			events.  It has a value of 'baseEvent1' plus 'baseEvent2'.<br>
- *		@par DERIVED_PS (PAPI_TOT_CYC,\<baseEvent1\>):
- *			This derived type defines a new event that will report the number of 'baseEvent1' events which occurred
- *			per second.  It has a value of ((('baseEvent1' * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC).  The user must
- *			provide PAPI_TOT_CYC as the first event of two events in the event list for this to work correctly.<br>
- *		@par DERIVED_ADD_PS (PAPI_TOT_CYC,\<baseEvent1\>,\<baseEvent2\>):
- *			This derived type defines a new event that will add together two event counters and then report the number
- *			which occurred per second.  It has a value of (((('baseEvent1' + baseEvent2) * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC).
- *			The user must provide PAPI_TOT_CYC as the first event of three events in the event list for this to work correctly.<br>
- *		@par DERIVED_CMPD (\<baseEvent1\>,\<baseEvent2\):
- *			This derived type works much like the NOT_DERIVED type.  It is rarely used and it looks like the code just returns
- *			a single value returned from the kernel.  There is no calculation done to compute this events value.  Not sure why
- *			multiple input events seem to be needed to use this event type.<br>
- *		@par DERIVED_SUB (\<baseEvent1\>,\<baseEvent2\>):
- *			This derived type defines a new event that will be the difference between two other
- *			events.  It has a value of 'baseEvent1' minus 'baseEvent2'.<br>
- *		@par DERIVED_POSTFIX (\<pfFormula\>,\<baseEvent1\>,\<baseEvent2\>, ... ,\<baseEventn\>):
- *			This derived type defines a new event whose value is computed from several native events using
- *			a postfix (reverse polish notation) formula.  Its value is the result of processing the postfix
- *			formula.  The 'pfFormula' is of the form 'N0|N1|N2|5|*|+|-|' where the '|' acts as a token
- *			separator and the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1,
- *			and baseEvent2 respectively.<br>
- *		@par DERIVED_INFIX (\<ifFormula\>,\<baseEvent1\>,\<baseEvent2\>, ... ,\<baseEventn\>):
- *			This derived type defines a new event whose value is computed from several native events using
- *			an infix (algebraic notation) formula.  Its value is the result of processing the infix
- *			formula.  The 'ifFormula' is of the form 'N0-(N1+(N2*5))' where the tokens N0, N1, and N2
- *			are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively.<br>
+ *    @subsection types Derived Types:
+ *        This describes values allowed in the 'derivedType' field of the PRESET and EVENT commands.  It also
+ *        shows the syntax of the 'eventAttr' field for each derived type supported by these commands.
+ *        All of the derived events provide a list of one or more events which the derived event is based
+ *        on (baseEvent).  Some derived events provide a formula that specifies how to compute the derived
+ *        events value using the baseEvents in the list.  The following derived types are supported, the syntax
+ *        of the 'eventAttr' parameter for each derived event type is shown in parentheses.<br><br>
+ *
+ *        @par NOT_DERIVED (\<baseEvent\>):
+ *            This derived type defines an alias for the existing event 'baseEvent'.<br>
+ *        @par DERIVED_ADD (\<baseEvent1\>,\<baseEvent2\>):
+ *            This derived type defines a new event that will be the sum of two other
+ *            events.  It has a value of 'baseEvent1' plus 'baseEvent2'.<br>
+ *        @par DERIVED_PS (PAPI_TOT_CYC,\<baseEvent1\>):
+ *            This derived type defines a new event that will report the number of 'baseEvent1' events which occurred
+ *            per second.  It has a value of ((('baseEvent1' * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC).  The user must
+ *            provide PAPI_TOT_CYC as the first event of two events in the event list for this to work correctly.<br>
+ *        @par DERIVED_ADD_PS (PAPI_TOT_CYC,\<baseEvent1\>,\<baseEvent2\>):
+ *            This derived type defines a new event that will add together two event counters and then report the number
+ *            which occurred per second.  It has a value of (((('baseEvent1' + baseEvent2) * cpu_max_mhz) * 1000000 ) / PAPI_TOT_CYC).
+ *            The user must provide PAPI_TOT_CYC as the first event of three events in the event list for this to work correctly.<br>
+ *        @par DERIVED_CMPD (\<baseEvent1\>,\<baseEvent2\):
+ *            This derived type works much like the NOT_DERIVED type.  It is rarely used and it looks like the code just returns
+ *            a single value returned from the kernel.  There is no calculation done to compute this events value.  Not sure why
+ *            multiple input events seem to be needed to use this event type.<br>
+ *        @par DERIVED_SUB (\<baseEvent1\>,\<baseEvent2\>):
+ *            This derived type defines a new event that will be the difference between two other
+ *            events.  It has a value of 'baseEvent1' minus 'baseEvent2'.<br>
+ *        @par DERIVED_POSTFIX (\<pfFormula\>,\<baseEvent1\>,\<baseEvent2\>, ... ,\<baseEventn\>):
+ *            This derived type defines a new event whose value is computed from several native events using
+ *            a postfix (reverse polish notation) formula.  Its value is the result of processing the postfix
+ *            formula.  The 'pfFormula' is of the form 'N0|N1|N2|5|*|+|-|' where the '|' acts as a token
+ *            separator and the tokens N0, N1, and N2 are place holders that represent baseEvent0, baseEvent1,
+ *            and baseEvent2 respectively.<br>
+ *        @par DERIVED_INFIX (\<ifFormula\>,\<baseEvent1\>,\<baseEvent2\>, ... ,\<baseEventn\>):
+ *            This derived type defines a new event whose value is computed from several native events using
+ *            an infix (algebraic notation) formula.  Its value is the result of processing the infix
+ *            formula.  The 'ifFormula' is of the form 'N0-(N1+(N2*5))' where the tokens N0, N1, and N2
+ *            are place holders that represent baseEvent0, baseEvent1, and baseEvent2 respectively.<br>
  *
  * <br>
- *	@subsection example Example:
- *		In the following example, the events PAPI_SP_OPS, USER_SP_OPS, and ALIAS_SP_OPS will all measure the same events and return
- *		the same value.  They just demonstrate different ways to use the PRESET and EVENT event definition commands.<br><br>
- *
- *		<ul>
- *			<li># The following lines define pmu names that all share the following events</li>
- *			<li>CPU nhm</li>
- *			<li>CPU nhm-ex</li>
- *			<li>\# Events which should be defined for either of the above pmu types</li>
- *			<li>PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES</li>
- *			<li>PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES</li>
- *			<li>PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|3|*|+|,FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using a postfix formula"</li>
- *			<li>EVENT,USER_SP_OPS,DERIVED_INFIX,N0+(N1*3),FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using the same formula in infix format"</li>
- *			<li>EVENT,ALIAS_SP_OPS,NOT_DERIVED,PAPI_SP_OPS,LDESC,"Alias for preset event PAPI_SP_OPS"</li>
- *			<li># End of event definitions for above pmu names and start of a section for a new pmu name.</li>
- *			<li>CPU snb</li>
- *		</ul>
+ *    @subsection example Example:
+ *        In the following example, the events PAPI_SP_OPS, USER_SP_OPS, and ALIAS_SP_OPS will all measure the same events and return
+ *        the same value.  They just demonstrate different ways to use the PRESET and EVENT event definition commands.<br><br>
+ *
+ *        <ul>
+ *            <li># The following lines define pmu names that all share the following events</li>
+ *            <li>CPU nhm</li>
+ *            <li>CPU nhm-ex</li>
+ *            <li>\# Events which should be defined for either of the above pmu types</li>
+ *            <li>PRESET,PAPI_TOT_CYC,NOT_DERIVED,UNHALTED_CORE_CYCLES</li>
+ *            <li>PRESET,PAPI_REF_CYC,NOT_DERIVED,UNHALTED_REFERENCE_CYCLES</li>
+ *            <li>PRESET,PAPI_SP_OPS,DERIVED_POSTFIX,N0|N1|3|*|+|,FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using a postfix formula"</li>
+ *            <li>EVENT,USER_SP_OPS,DERIVED_INFIX,N0+(N1*3),FP_COMP_OPS_EXE:SSE_SINGLE_PRECISION,FP_COMP_OPS_EXE:SSE_FP_PACKED,NOTE,"Using the same formula in infix format"</li>
+ *            <li>EVENT,ALIAS_SP_OPS,NOT_DERIVED,PAPI_SP_OPS,LDESC,"Alias for preset event PAPI_SP_OPS"</li>
+ *            <li># End of event definitions for above pmu names and start of a section for a new pmu name.</li>
+ *            <li>CPU snb</li>
+ *        </ul>
  *
  */
 
@@ -187,14 +187,14 @@
 static char *
 is_derived( PAPI_event_info_t * info )
 {
-	if ( strlen( info->derived ) == 0 )
-		return ( "No" );
-	else if ( strcmp( info->derived, "NOT_DERIVED" ) == 0 )
-		return ( "No" );
-	else if ( strcmp( info->derived, "DERIVED_CMPD" ) == 0 )
-		return ( "No" );
-	else
-		return ( "Yes" );
+    if ( strlen( info->derived ) == 0 )
+        return ( "No" );
+    else if ( strcmp( info->derived, "NOT_DERIVED" ) == 0 )
+        return ( "No" );
+    else if ( strcmp( info->derived, "DERIVED_CMPD" ) == 0 )
+        return ( "No" );
+    else
+        return ( "Yes" );
 }
 
 static void
@@ -202,64 +202,106 @@ print_help( char **argv )
 {
         printf( "This is the PAPI avail program.\n" );
         printf( "It provides availability and details about PAPI Presets and User-defined Events.\n" );
-	printf( "PAPI Preset Event filters can be combined in a logical OR.\n" );
-	printf( "Usage: %s [options]\n", argv[0] );
-	printf( "Options:\n\n" );
-	printf( "General command options:\n" );
-	printf( "\t-h, --help       Print this help message\n" );
-	printf( "\t-a, --avail      Display only available PAPI preset and user defined events\n" );
-	printf( "\t-c, --check      Display only available PAPI preset and user defined events after an availability check\n" );
-	printf( "\t-d, --detail     Display detailed information about events\n" );
-	printf( "\t-e EVENTNAME     Display detail information about specified event\n" );
-	printf( "\nEvent filtering options:\n" );
-	printf( "\t--br             Display branch related PAPI preset events\n" );
-	printf( "\t--cache          Display cache related PAPI preset events\n" );
-	printf( "\t--cnd            Display conditional PAPI preset events\n" );
-	printf( "\t--fp             Display Floating Point related PAPI preset events\n" );
-	printf( "\t--ins            Display instruction related PAPI preset events\n" );
-	printf( "\t--idl            Display Stalled or Idle PAPI preset events\n" );
-	printf( "\t--l1             Display level 1 cache related PAPI preset events\n" );
-	printf( "\t--l2             Display level 2 cache related PAPI preset events\n" );
-	printf( "\t--l3             Display level 3 cache related PAPI preset events\n" );
-	printf( "\t--mem            Display memory related PAPI preset events\n" );
-	printf( "\t--msc            Display miscellaneous PAPI preset events\n" );
-	printf( "\t--tlb            Display Translation Lookaside Buffer PAPI preset events\n" );
-	printf( "\n" );
+    printf( "PAPI Preset Event filters can be combined in a logical OR.\n" );
+    printf( "Usage: %s [options]\n", argv[0] );
+    printf( "Options:\n\n" );
+    printf( "General command options:\n" );
+    printf( "\t-h, --help       Print this help message\n" );
+    printf( "\t-a, --avail      Display only available PAPI preset and user defined events\n" );
+    printf( "\t-c, --check      Display only available PAPI preset and user defined events after an availability check\n" );
+    printf( "\t-d, --detail     Display detailed information about events\n" );
+    printf( "\t-e EVENTNAME     Display detail information about specified event\n" );
+    printf( "\nEvent filtering options:\n" );
+    printf( "\t--br             Display branch related PAPI preset events\n" );
+    printf( "\t--cache          Display cache related PAPI preset events\n" );
+    printf( "\t--cnd            Display conditional PAPI preset events\n" );
+    printf( "\t--fp             Display Floating Point related PAPI preset events\n" );
+    printf( "\t--ins            Display instruction related PAPI preset events\n" );
+    printf( "\t--idl            Display Stalled or Idle PAPI preset events\n" );
+    printf( "\t--l1             Display level 1 cache related PAPI preset events\n" );
+    printf( "\t--l2             Display level 2 cache related PAPI preset events\n" );
+    printf( "\t--l3             Display level 3 cache related PAPI preset events\n" );
+    printf( "\t--mem            Display memory related PAPI preset events\n" );
+    printf( "\t--msc            Display miscellaneous PAPI preset events\n" );
+    printf( "\t--tlb            Display Translation Lookaside Buffer PAPI preset events\n" );
+    printf( "\n" );
 }
 
 static int
 parse_unit_masks( PAPI_event_info_t * info )
 {
-	char *pmask;
+    char *pmask;
 
-	if ( ( pmask = strchr( info->symbol, ':' ) ) == NULL ) {
-		return ( 0 );
-	}
-	memmove( info->symbol, pmask, ( strlen( pmask ) + 1 ) * sizeof ( char ) );
-	pmask = strchr( info->long_descr, ':' );
-	if ( pmask == NULL )
-		info->long_descr[0] = 0;
-	else
-		memmove( info->long_descr, pmask + sizeof ( char ),
-				 ( strlen( pmask ) + 1 ) * sizeof ( char ) );
-	return 1;
+    if ( ( pmask = strchr( info->symbol, ':' ) ) == NULL ) {
+        return ( 0 );
+    }
+    memmove( info->symbol, pmask, ( strlen( pmask ) + 1 ) * sizeof ( char ) );
+    pmask = strchr( info->long_descr, ':' );
+    if ( pmask == NULL )
+        info->long_descr[0] = 0;
+    else
+        memmove( info->long_descr, pmask + sizeof ( char ),
+                 ( strlen( pmask ) + 1 ) * sizeof ( char ) );
+    return 1;
 }
 
 static int
 checkCounter (int eventcode)
 {
-	int EventSet = PAPI_NULL;
-	if (PAPI_create_eventset(&EventSet) != PAPI_OK)
-		return 0;
-	if (PAPI_add_event (EventSet, eventcode) != PAPI_OK)
-		return 0;
-	if (PAPI_cleanup_eventset (EventSet) != PAPI_OK)
-		return 0;
-	if (PAPI_destroy_eventset (&EventSet) != PAPI_OK)
-		return 0;
-	return 1;
+    int EventSet = PAPI_NULL;
+    if (PAPI_create_eventset(&EventSet) != PAPI_OK)
+        return 0;
+    if (PAPI_add_event (EventSet, eventcode) != PAPI_OK)
+        return 0;
+    if (PAPI_cleanup_eventset (EventSet) != PAPI_OK)
+        return 0;
+    if (PAPI_destroy_eventset (&EventSet) != PAPI_OK)
+        return 0;
+    return 1;
+}
+
+static int
+get_max_symbol_length ( int initModifier, int iterModifier ) {
+
+    int ecode = 0 | PAPI_PRESET_MASK;
+    int len, maxLen = 0;
+    PAPI_event_info_t info;
+
+    /* In case of error, return the legacy value. */
+    if ( PAPI_enum_event( &ecode, initModifier ) != PAPI_OK ) {
+        return 13;
+    }
+
+    do {
+        if ( PAPI_get_event_info( ecode, &info ) == PAPI_OK ) {
+            len = strlen(info.symbol);
+            if ( len > maxLen ) {
+                maxLen = len;
+            }
+        }
+    } while ( PAPI_enum_event(&ecode, iterModifier) == PAPI_OK );
+
+    return maxLen+1;
 }
 
+static int
+print_comp_header_flag ( void ) {
+
+    int numComps = PAPI_num_components();
+    const PAPI_component_info_t *cmpinfo;
+    int cid, non_cpu_comps = 0;
+    for ( cid = 0; cid < numComps; cid++ ) {
+      cmpinfo = PAPI_get_component_info( cid );
+      if ( strcmp(cmpinfo->name, "perf_event") == 0
+           || strcmp(cmpinfo->name, "sysdetect") == 0
+           || strcmp(cmpinfo->name, "No Components Configured. ") == 0 ) {
+         continue;
+      }
+      non_cpu_comps++;
+    }
+
+    return non_cpu_comps;
+}
 
 /*
   Checks whether a preset event is available. If it is available,
@@ -277,19 +319,31 @@ int is_preset_event_available(char *name
     exit(1);
   }
 
+  /* Since some component presets require qualifiers, such as ":device=0", but
+   * the base preset names do not contain qualifiers, then the qualifier must
+   * first be stripped in order to find a match. */
+  char *localname = strdup(name);
+  char *basename  = strtok(localname, ":");
+  if( NULL == basename ) {
+    basename = name;
+  }
+
   /* Iterate over all the available preset events and compare them by names. */
   do {
     if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
       
       if ( info.count ) {
-	if ( (check_counter && checkCounter (event_code)) || !check_counter) {
-	  if (strcmp(info.symbol, name) == 0)
-	    return 1;
-	}
+    if ( (check_counter && checkCounter (event_code)) || !check_counter) {
+      if (strcmp(info.symbol, basename) == 0)
+        return 1;
+    }
       }
     }
   } while (PAPI_enum_event( &event_code, PAPI_PRESET_ENUM_AVAIL ) == PAPI_OK);
 
+  /* Free the temporary, dynamically allocated buffer. */
+  free(localname);
+
   return 0;
 }
 
@@ -303,7 +357,7 @@ main( int argc, char **argv )
    unsigned int filter = 0;
    int print_event_info = 0;
    char *name = NULL;
-   int print_avail_only = PAPI_ENUM_EVENTS;
+   int print_avail_only = PAPI_PRESET_ENUM_CPU;
    int print_tabular = 1;
    PAPI_event_info_t info;
    const PAPI_hw_info_t *hwinfo = NULL;
@@ -319,49 +373,49 @@ main( int argc, char **argv )
 
    for( args = 1; args < argc; args++ ) {
       if ( strstr( argv[args], "-e" ) ) {
-	 print_event_info = 1;
+     print_event_info = 1;
          if( (args+1 >= argc) || ( argv[args+1] == NULL ) || ( strlen( argv[args+1] ) == 0 ) ) {
-	    print_help( argv );
-	    exit( 1 );
-	 }
-	 name = argv[args + 1];
+        print_help( argv );
+        exit( 1 );
+     }
+     name = argv[args + 1];
       }
       else if ( ( !strstr( argv[args], "--") && strstr( argv[args], "-c" ) ) || strstr(argv[args], "--check") )
       {
-	 print_avail_only = PAPI_PRESET_ENUM_AVAIL;
+     print_avail_only = PAPI_PRESET_ENUM_CPU_AVAIL;
          check_counter = 1;
       }
       else if ( strstr( argv[args], "-a" ))
-	 print_avail_only = PAPI_PRESET_ENUM_AVAIL;
+     print_avail_only = PAPI_PRESET_ENUM_CPU_AVAIL;
       else if ( strstr( argv[args], "-d" ) )
-	 print_tabular = 0;
+     print_tabular = 0;
       else if ( strstr( argv[args], "-h" ) ) {
-	 print_help( argv );
-	 exit( 1 );
+     print_help( argv );
+     exit( 1 );
       } else if ( strstr( argv[args], "--br" ) )
-	 filter |= PAPI_PRESET_BIT_BR;
+     filter |= PAPI_PRESET_BIT_BR;
       else if ( strstr( argv[args], "--cache" ) )
-	 filter |= PAPI_PRESET_BIT_CACH;
+     filter |= PAPI_PRESET_BIT_CACH;
       else if ( strstr( argv[args], "--cnd" ) )
-	 filter |= PAPI_PRESET_BIT_CND;
+     filter |= PAPI_PRESET_BIT_CND;
       else if ( strstr( argv[args], "--fp" ) )
-	 filter |= PAPI_PRESET_BIT_FP;
+     filter |= PAPI_PRESET_BIT_FP;
       else if ( strstr( argv[args], "--ins" ) )
-	 filter |= PAPI_PRESET_BIT_INS;
+     filter |= PAPI_PRESET_BIT_INS;
       else if ( strstr( argv[args], "--idl" ) )
-	 filter |= PAPI_PRESET_BIT_IDL;
+     filter |= PAPI_PRESET_BIT_IDL;
       else if ( strstr( argv[args], "--l1" ) )
-	 filter |= PAPI_PRESET_BIT_L1;
+     filter |= PAPI_PRESET_BIT_L1;
       else if ( strstr( argv[args], "--l2" ) )
-	 filter |= PAPI_PRESET_BIT_L2;
+     filter |= PAPI_PRESET_BIT_L2;
       else if ( strstr( argv[args], "--l3" ) )
-	 filter |= PAPI_PRESET_BIT_L3;
+     filter |= PAPI_PRESET_BIT_L3;
       else if ( strstr( argv[args], "--mem" ) )
-	 filter |= PAPI_PRESET_BIT_MEM;
+     filter |= PAPI_PRESET_BIT_MEM;
       else if ( strstr( argv[args], "--msc" ) )
-	 filter |= PAPI_PRESET_BIT_MSC;
+     filter |= PAPI_PRESET_BIT_MSC;
       else if ( strstr( argv[args], "--tlb" ) )
-	 filter |= PAPI_PRESET_BIT_TLB;
+     filter |= PAPI_PRESET_BIT_TLB;
    }
 
    if ( filter == 0 ) {
@@ -372,233 +426,397 @@ main( int argc, char **argv )
 
    retval = PAPI_library_init( PAPI_VER_CURRENT );
    if ( retval != PAPI_VER_CURRENT ) {
-	fprintf(stderr,"Error!  PAPI library mismatch!\n");
-	return 1;
+    fprintf(stderr,"Error!  PAPI library mismatch!\n");
+    return 1;
    }
 
 
-	retval = PAPI_set_debug( PAPI_VERB_ECONT );
-	if ( retval != PAPI_OK ) {
-		fprintf(stderr,"Error with PAPI_set debug!\n");
-		return 1;
-	}
+    retval = PAPI_set_debug( PAPI_VERB_ECONT );
+    if ( retval != PAPI_OK ) {
+        fprintf(stderr,"Error with PAPI_set debug!\n");
+        return 1;
+    }
 
       retval=papi_print_header("Available PAPI preset and user defined events plus hardware information.\n",
-			       &hwinfo );
-	if ( retval != PAPI_OK ) {
-		fprintf(stderr,"Error with PAPI_get_hardware_info!\n");
-		return 1;
-	}
+                   &hwinfo );
+    if ( retval != PAPI_OK ) {
+        fprintf(stderr,"Error with PAPI_get_hardware_info!\n");
+        return 1;
+    }
 
       /* Code for info on just one event */
 
       if ( print_event_info ) {
 
-	 if ( PAPI_event_name_to_code( name, &event_code ) == PAPI_OK ) {
-	    if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
+     if ( PAPI_event_name_to_code( name, &event_code ) == PAPI_OK ) {
+        if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
 
-	       if ( event_code & PAPI_PRESET_MASK ) {
-		  printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n",
-			  "Event name:", info.symbol, "Event Code:",
-			  info.event_code, "Number of Native Events:",
-			  info.count );
-		  printf( "%-29s|%s|\n%-29s|%s|\n%-29s|%s|\n",
-			  "Short Description:", info.short_descr,
-			  "Long Description:", info.long_descr,
-			  "Developer's Notes:", info.note );
-		  printf( "%-29s|%s|\n%-29s|%s|\n", "Derived Type:",
-			  info.derived, "Postfix Processing String:",
-			  info.postfix );
-
-		  for( j = 0; j < ( int ) info.count; j++ ) {
-		     printf( " Native Code[%d]: %#x |%s|\n", j,
-			     info.code[j], info.name[j] );
-		     PAPI_get_event_info( (int) info.code[j], &n_info );
-		     printf(" Number of Register Values: %d\n", n_info.count );
-		     for( k = 0; k < ( int ) n_info.count; k++ ) {
-			printf( " Register[%2d]: %#08x |%s|\n", k,
-				n_info.code[k], n_info.name[k] );
-		     }
-		     printf( " Native Event Description: |%s|\n\n",
-			     n_info.long_descr );
-		  }
-
-		  if (!is_preset_event_available(name)) {
-		    printf("\nPRESET event %s is NOT available on this architecture!\n\n", name);
-		  }
-
-	       } else {	 /* must be a native event code */
-		  printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n",
-			  "Event name:", info.symbol, "Event Code:",
-			  info.event_code, "Number of Register Values:",
-			  info.count );
-		  printf( "%-29s|%s|\n", "Description:", info.long_descr );
-		  for ( k = 0; k < ( int ) info.count; k++ ) {
-		      printf( " Register[%2d]: %#08x |%s|\n", k,
-			      info.code[k], info.name[k] );
-		  }
-
-		  /* if unit masks exist but none are specified, process all */
-		  if ( !strchr( name, ':' ) ) {
-		     if ( 1 ) {
-			if ( PAPI_enum_event( &event_code, PAPI_NTV_ENUM_UMASKS ) == PAPI_OK ) {
-			   printf( "\nUnit Masks:\n" );
-			   do {
-			      retval = PAPI_get_event_info(event_code, &info );
-			      if ( retval == PAPI_OK ) {
-				 if ( parse_unit_masks( &info ) ) {
-				    printf( "%-29s|%s|%s|\n",
-					    " Mask Info:", info.symbol,
-					    info.long_descr );
-				    for ( k = 0; k < ( int ) info.count;k++ ) {
-					printf( "  Register[%2d]:  %#08x  |%s|\n",
-						k, info.code[k], info.name[k] );
-				    }
-				 }
-			      }
-			   } while ( PAPI_enum_event( &event_code,
-					  PAPI_NTV_ENUM_UMASKS ) == PAPI_OK );
-			}
-		     }
-		  }
-	       }
-	    }
-	 } else {
-	    printf( "Sorry, an event by the name '%s' could not be found.\n"
+           if ( event_code & PAPI_PRESET_MASK ) {
+          printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n",
+              "Event name:", info.symbol, "Event Code:",
+              info.event_code, "Number of Native Events:",
+              info.count );
+          printf( "%-29s|%s|\n%-29s|%s|\n%-29s|%s|\n",
+              "Short Description:", info.short_descr,
+              "Long Description:", info.long_descr,
+              "Developer's Notes:", info.note );
+          printf( "%-29s|%s|\n%-29s|%s|\n", "Derived Type:",
+              info.derived, "Postfix Processing String:",
+              info.postfix );
+
+          for( j = 0; j < ( int ) info.count; j++ ) {
+             printf( " Native Code[%d]: %#x |%s|\n", j,
+                 info.code[j], info.name[j] );
+             PAPI_get_event_info( (int) info.code[j], &n_info );
+             printf(" Number of Register Values: %d\n", n_info.count );
+             for( k = 0; k < ( int ) n_info.count; k++ ) {
+            printf( " Register[%2d]: %#08x |%s|\n", k,
+                n_info.code[k], n_info.name[k] );
+             }
+             printf( " Native Event Description: |%s|\n\n",
+                 n_info.long_descr );
+          }
+
+          if (!is_preset_event_available(name)) {
+            printf("\nPRESET event %s is NOT available on this architecture!\n\n", name);
+          }
+
+           } else {     /* must be a native event code */
+          printf( "%-30s%s\n%-30s%#-10x\n%-30s%d\n",
+              "Event name:", info.symbol, "Event Code:",
+              info.event_code, "Number of Register Values:",
+              info.count );
+          printf( "%-29s|%s|\n", "Description:", info.long_descr );
+          for ( k = 0; k < ( int ) info.count; k++ ) {
+              printf( " Register[%2d]: %#08x |%s|\n", k,
+                  info.code[k], info.name[k] );
+          }
+
+          /* if unit masks exist but none are specified, process all */
+          if ( !strchr( name, ':' ) ) {
+             if ( 1 ) {
+            if ( PAPI_enum_event( &event_code, PAPI_NTV_ENUM_UMASKS ) == PAPI_OK ) {
+               printf( "\nUnit Masks:\n" );
+               do {
+                  retval = PAPI_get_event_info(event_code, &info );
+                  if ( retval == PAPI_OK ) {
+                 if ( parse_unit_masks( &info ) ) {
+                    printf( "%-29s|%s|%s|\n",
+                        " Mask Info:", info.symbol,
+                        info.long_descr );
+                    for ( k = 0; k < ( int ) info.count;k++ ) {
+                    printf( "  Register[%2d]:  %#08x  |%s|\n",
+                        k, info.code[k], info.name[k] );
+                    }
+                 }
+                  }
+               } while ( PAPI_enum_event( &event_code,
+                      PAPI_NTV_ENUM_UMASKS ) == PAPI_OK );
+            }
+             }
+          }
+           }
+        }
+     } else {
+        printf( "Sorry, an event by the name '%s' could not be found.\n"
                     " Is it typed correctly?\n\n", name );
-	 }
+     }
       } else {
 
-	 /* Print *ALL* Events */
+     /* Print *ALL* Events */
 
   for (i=0 ; i<2 ; i++) {
-	// set the event code to fetch preset events the first time through loop and user events the second time through the loop
-	if (i== 0) {
-		event_code = 0 | PAPI_PRESET_MASK;
-	} else {
-		event_code = 0 | PAPI_UE_MASK;
-	}
-
-	/* For consistency, always ASK FOR the first event, if there is not one then nothing to process */
-	if (PAPI_enum_event( &event_code, PAPI_ENUM_FIRST ) != PAPI_OK) {
-		 continue;
-	}
-
-	// print heading to show which kind of events follow
-	if (i== 0) {
-		printf( "================================================================================\n" );
-		printf( "  PAPI Preset Events\n" );
-		printf( "================================================================================\n" );
-	} else {
-		printf( "\n");       // put a blank line after the presets before strarting the user events
-		printf( "================================================================================\n" );
-		printf( "  User Defined Events\n" );
-		printf( "================================================================================\n" );
-	}
-
-	 if ( print_tabular ) {
-	    printf( "    Name        Code    " );
-	    if ( !print_avail_only ) {
-	       printf( "Avail " );
-	    }
-	    printf( "Deriv Description (Note)\n" );
-	 } else {
-	    printf( "%-13s%-11s%-8s%-16s\n |Long Description|\n"
-                    " |Developer's Notes|\n |Derived|\n |PostFix|\n"
-                    " Native Code[n]: <hex> |name|\n",
-		    "Symbol", "Event Code", "Count", "|Short Description|" );
-	 }
-	 do {
-	    if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
-	       if ( print_tabular ) {
-	      // if this is a user defined event or its a preset and matches the preset event filters, display its information
-		  if ( (i==1) || (filter & info.event_type)) {
-		     if ( print_avail_only ) {
-		        if ( info.count ) {
+    // set the event code to fetch preset events the first time through loop and user events the second time through the loop
+    if (i== 0) {
+        event_code = 0 | PAPI_PRESET_MASK;
+    } else {
+        event_code = 0 | PAPI_UE_MASK;
+    }
+
+    /* For consistency, always ASK FOR the first event, if there is not one then nothing to process */
+    if (PAPI_enum_event( &event_code, PAPI_ENUM_FIRST ) != PAPI_OK) {
+         continue;
+    }
+
+    /* Get the length of the longest preset symbol. */
+    int maxSymLen = get_max_symbol_length(PAPI_ENUM_FIRST, PAPI_PRESET_ENUM_CPU);
+    int frontPad = (maxSymLen-4)/2; /* 4 == strlen("Name") */
+    int backPad  = maxSymLen-4-frontPad;
+
+    // print heading to show which kind of events follow
+    if (i== 0) {
+        printf( "================================================================================\n" );
+        printf( "  PAPI Preset Events\n" );
+        printf( "================================================================================\n" );
+    } else {
+        printf( "\n");       // put a blank line after the presets before strarting the user events
+        printf( "================================================================================\n" );
+        printf( "  User Defined Events\n" );
+        printf( "================================================================================\n" );
+    }
+
+     if ( print_tabular ) {
+        int spaceCnt = 0;
+        for( spaceCnt = 0; spaceCnt < frontPad; ++spaceCnt ) {
+            printf(" ");
+        }
+        printf( "Name");
+        for( spaceCnt = 0; spaceCnt < backPad; ++spaceCnt ) {
+            printf(" ");
+        }
+        printf( "   Code    " );
+        if ( print_avail_only == PAPI_PRESET_ENUM_CPU ) {
+           printf( "Avail " );
+        }
+        printf( "Deriv Description (Note)\n" );
+     } else {
+        printf( "%-13s%-11s%-8s%-16s\n |Long Description|\n"
+                " |Developer's Notes|\n |Derived|\n |PostFix|\n"
+                " Native Code[n]: <hex> |name|\n",
+                "Symbol", "Event Code", "Count", "|Short Description|" );
+     }
+     do {
+        if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
+           if ( print_tabular ) {
+          // if this is a user defined event or its a preset and matches the preset event filters, display its information
+          if ( (i==1) || (filter & info.event_type)) {
+             if ( print_avail_only == PAPI_PRESET_ENUM_CPU_AVAIL ) {
+                if ( info.count ) {
                    if ( (check_counter && checkCounter (event_code)) || !check_counter)
                    {
-                      printf( "%-13s%#x  %-5s%s",
+                      printf( "%-*s%#x  %-5s%s", maxSymLen,
                          info.symbol,
                          info.event_code,
                          is_derived( &info ), info.long_descr );
                    }
-			}
-		        if ( info.note[0] ) {
-			   printf( " (%s)", info.note );
-			}
-			printf( "\n" );
-		     } else {
-			printf( "%-13s%#x  %-6s%-4s %s",
-				info.symbol,
-				info.event_code,
-				( info.count ? "Yes" : "No" ),
-				is_derived( &info ), info.long_descr );
-			if ( info.note[0] ) {
-			   printf( " (%s)", info.note );
-			}
-			printf( "\n" );
-		     }
-		     tot_count++;
-		     if ( info.count ) {
-	            if ((check_counter && checkCounter (event_code)) || !check_counter )
-	              avail_count++;
-		     }
-		     if ( !strcmp( is_derived( &info ), "Yes" ) ) {
-			deriv_count++;
-		     }
-		  }
-	       } else {
-		  if ( ( print_avail_only && info.count ) ||
-		       ( print_avail_only == 0 ) )
-	      {
-	         if ((check_counter && checkCounter (event_code)) || !check_counter)
-	         {
-	           printf( "%s\t%#x\t%d\t|%s|\n |%s|\n"
-			     " |%s|\n |%s|\n |%s|\n",
-			     info.symbol, info.event_code, info.count,
-			     info.short_descr, info.long_descr, info.note,
-			     info.derived, info.postfix );
-	           for ( j = 0; j < ( int ) info.count; j++ ) {
-	              printf( " Native Code[%d]: %#x |%s|\n", j,
-	              info.code[j], info.name[j] );
-	           }
+            }
+                if ( info.note[0] ) {
+               printf( " (%s)", info.note );
+            }
+            printf( "\n" );
+             } else {
+            printf( "%-*s%#x  %-6s%-4s %s", maxSymLen,
+                info.symbol,
+                info.event_code,
+                ( info.count ? "Yes" : "No" ),
+                is_derived( &info ), info.long_descr );
+            if ( info.note[0] ) {
+               printf( " (%s)", info.note );
+            }
+            printf( "\n" );
+             }
+             tot_count++;
+             if ( info.count ) {
+                if ((check_counter && checkCounter (event_code)) || !check_counter )
+                  avail_count++;
+             }
+             if ( !strcmp( is_derived( &info ), "Yes" ) ) {
+            deriv_count++;
              }
-		  }
-		  tot_count++;
-		  if ( info.count ) {
-	         if ((check_counter && checkCounter (event_code)) || !check_counter )
-		        avail_count++;
-		  }
-		  if ( !strcmp( is_derived( &info ), "Yes" ) ) {
-		     deriv_count++;
-		  }
-	       }
-	    }
-	 } while (PAPI_enum_event( &event_code, print_avail_only ) == PAPI_OK);
+          }
+           } else {
+          if ( ( print_avail_only == PAPI_PRESET_ENUM_CPU_AVAIL && info.count ) ||
+               ( print_avail_only == PAPI_PRESET_ENUM_CPU ) )
+          {
+             if ((check_counter && checkCounter (event_code)) || !check_counter)
+             {
+               printf( "%s\t%#x\t%d\t|%s|\n |%s|\n"
+                 " |%s|\n |%s|\n |%s|\n",
+                 info.symbol, info.event_code, info.count,
+                 info.short_descr, info.long_descr, info.note,
+                 info.derived, info.postfix );
+               for ( j = 0; j < ( int ) info.count; j++ ) {
+                  printf( " Native Code[%d]: %#x |%s|\n", j,
+                  info.code[j], info.name[j] );
+               }
+             }
+          }
+          tot_count++;
+          if ( info.count ) {
+             if ((check_counter && checkCounter (event_code)) || !check_counter )
+                avail_count++;
+          }
+          if ( !strcmp( is_derived( &info ), "Yes" ) ) {
+             deriv_count++;
+          }
+           }
+        }
+     } while (PAPI_enum_event( &event_code, print_avail_only ) == PAPI_OK);
+
+    /* Repeat the logic for component presets. For consistency, always ASK FOR the first event,
+     * if there is not one then nothing to process */
+    if (PAPI_enum_event( &event_code, PAPI_PRESET_ENUM_FIRST_COMP ) != PAPI_OK) {
+         continue;
+    }
+
+    /* Print heading for component presets. */
+    if (i== 0) {
+
+        if( print_avail_only == PAPI_PRESET_ENUM_CPU ) {
+            print_avail_only = PAPI_ENUM_EVENTS;
+        } else if( print_avail_only == PAPI_PRESET_ENUM_CPU_AVAIL ) {
+            print_avail_only = PAPI_PRESET_ENUM_AVAIL;
+        }
+
+        /* Get the length of the longest component preset symbol. */
+        int maxCompSymLen = get_max_symbol_length(PAPI_PRESET_ENUM_FIRST_COMP, PAPI_ENUM_EVENTS);
+        int frontPad = (maxCompSymLen-4)/2; /* 4 == strlen("Name") */
+        int backPad  = maxCompSymLen-4-frontPad;
+
+          printf( "================================================================================\n" );
+          printf( "  PAPI Component Preset Events\n" );
+          printf( "================================================================================\n" );
+
+        int printCompPresets = print_comp_header_flag();
+        if ( printCompPresets ) {
+          if ( print_tabular ) {
+            int spaceCnt = 0;
+            for( spaceCnt = 0; spaceCnt < frontPad; ++spaceCnt ) {
+                printf(" ");
+            }
+            printf( "Name");
+            for( spaceCnt = 0; spaceCnt < backPad; ++spaceCnt ) {
+                printf(" ");
+            }
+            printf( "   Code    " );
+            if ( print_avail_only == PAPI_ENUM_EVENTS ) {
+                printf( "Avail " );
+            }
+            printf( "Deriv Description (Note)\n" );
+          } else {
+            printf( "%-13s%-11s%-8s%-16s\n |Long Description|\n"
+                    " |Developer's Notes|\n |Derived|\n |PostFix|\n"
+                    " Native Code[n]: <hex> |name|\n",
+                    "Symbol", "Event Code", "Count", "|Short Description|" );
+          }
+        } else {
+          printf( "No components compiled in that support PAPI Component Preset Events.\n" );
+        }
+
+          int first_flag = 1;
+          do {
+            if ( PAPI_get_event_info( event_code, &info ) == PAPI_OK ) {
+
+              /* Skip disabled components */
+              const PAPI_component_info_t *component=PAPI_get_component_info(info.component_index);
+              if (component->disabled && component->disabled != PAPI_EDELAY_INIT) {
+                  continue;
+              }
+
+              if( !first_flag ) {
+                  printf( "--------------------------------------------------------------------------------\n" );
+              }
+              first_flag = 0;
+
+              if ( print_tabular ) {
+                // if this is a user defined event or its a preset and matches the preset event filters, display its information
+                if ( filter & info.event_type ) {
+                  if ( print_avail_only == PAPI_PRESET_ENUM_AVAIL ) {
+                    if ( info.count ) {
+                      if ( (check_counter && checkCounter (event_code)) || !check_counter) {
+                          printf( "%-*s%#x  %-5s%s\n", maxCompSymLen,
+                                  info.symbol,
+                                  info.event_code,
+                                  is_derived( &info ), info.long_descr );
+
+                          /* Add event to tally. */
+                          avail_count++;
+                          if ( !strcmp( is_derived( &info ), "Yes" ) ) {
+                            deriv_count++;
+                          }
+
+                          /* List the qualifiers. */
+                          int k;
+                          for( k = 0; k < info.num_quals; ++k ) {
+                              printf("    %s\n        %s\n",  info.quals[k], info.quals_descrs[k]);
+                          }
+                      }
+                    }
+                    if ( info.note[0] ) {
+                        printf( " (%s)\n", info.note );
+                    }
+                  } else {
+                    printf( "%-*s%#x  %-6s%-4s %s\n", maxCompSymLen,
+                            info.symbol,
+                            info.event_code,
+                            ( info.count ? "Yes" : "No" ),
+                            is_derived( &info ), info.long_descr );
+                    if ( info.note[0] ) {
+                        printf( " (%s)\n", info.note );
+                    }
+
+                    /* List the qualifiers. */
+                    int k;
+                    for( k = 0; k < info.num_quals; ++k ) {
+                        printf("    %s\n        %s\n",  info.quals[k], info.quals_descrs[k]);
+                    }
+
+                    tot_count++;
+                    if ( info.count ) {
+                        if ((check_counter && checkCounter (event_code)) || !check_counter )
+                            avail_count++;
+                    }
+                    if ( !strcmp( is_derived( &info ), "Yes" ) ) {
+                        deriv_count++;
+                    }
+                  }
+                }
+              } else {
+                if ( ( print_avail_only == PAPI_PRESET_ENUM_AVAIL && info.count ) ||
+                     ( print_avail_only == PAPI_ENUM_EVENTS ) )
+                {
+                  if ((check_counter && checkCounter (event_code)) || !check_counter) {
+                    printf( "%s\t%#x\t%d\t|%s|\n |%s|\n"
+                            " |%s|\n |%s|\n |%s|\n",
+                            info.symbol, info.event_code, info.count,
+                            info.short_descr, info.long_descr, info.note,
+                            info.derived, info.postfix );
+                    for ( j = 0; j < ( int ) info.count; j++ ) {
+                        printf( " Native Code[%d]: %#x |%s|\n", j,
+                                info.code[j], info.name[j] );
+                    }
+                  }
+                }
+                tot_count++;
+                if ( info.count ) {
+                  if ((check_counter && checkCounter (event_code)) || !check_counter )
+                      avail_count++;
+                }
+                if ( !strcmp( is_derived( &info ), "Yes" ) ) {
+                    deriv_count++;
+                }
+              }
+            }
+          } while (PAPI_enum_event( &event_code, print_avail_only ) == PAPI_OK);
+
+        printf( "================================================================================\n" );
+
+    }
+
+
+
   }
       }
 
-	printf( "--------------------------------------------------------------------------------\n" );
-
-	if ( !print_event_info ) {
-		if ( print_avail_only ) {
-			printf( "Of %d available events, %d ", avail_count, deriv_count );
-		} else {
-			printf( "Of %d possible events, %d are available, of which %d ",
-				tot_count, avail_count, deriv_count );
-		}
-		if ( deriv_count == 1 ) {
-			printf( "is derived.\n\n" );
-		} else {
-			printf( "are derived.\n\n" );
-		}
-
-		if (avail_count==0) {
-			printf("No events detected!  Check papi_component_avail to find out why.\n");
-			printf("\n");
-		}
-	}
+    if ( !print_event_info ) {
+        if ( print_avail_only == PAPI_PRESET_ENUM_CPU_AVAIL || print_avail_only == PAPI_PRESET_ENUM_AVAIL ) {
+            printf( "Of %d available events, %d ", avail_count, deriv_count );
+        } else {
+            printf( "Of %d possible events, %d are available, of which %d ",
+                tot_count, avail_count, deriv_count );
+        }
+        if ( deriv_count == 1 ) {
+            printf( "is derived.\n\n" );
+        } else {
+            printf( "are derived.\n\n" );
+        }
+
+        if (avail_count==0) {
+            printf("No events detected!  Check papi_component_avail to find out why.\n");
+            printf("\n");
+        }
+    }
 
-	return 0;
+    return 0;
 
 }
diff -pruN 7.2.0~b2-1/src/utils/papi_component_avail.c 7.2.0-1/src/utils/papi_component_avail.c
--- 7.2.0~b2-1/src/utils/papi_component_avail.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/utils/papi_component_avail.c	2025-06-25 22:38:10.000000000 +0000
@@ -118,13 +118,17 @@ main( int argc, char **argv )
 
 	  printf( "Name:   %-23s %s\n", cmpinfo->name ,cmpinfo->description);
 
-      if (cmpinfo->disabled == PAPI_EDELAY_INIT) {
-          force_cmp_init(cid);
-      }
+	  if (cmpinfo->disabled == PAPI_EDELAY_INIT) {
+	      force_cmp_init(cid);
+	  }
 	  if (cmpinfo->disabled) {
 	    printf("   \\-> Disabled: %s\n",cmpinfo->disabled_reason);
 	  }
 
+	  if (cmpinfo->partially_disabled) {
+	      printf("   \\-> Partially disabled: %s\n", cmpinfo->partially_disabled_reason);
+	  }
+
 	  if ( flags.details ) {
 		printf( "        %-23s Version:\t\t\t%s\n", " ", cmpinfo->version );
 		printf( "        %-23s Number of native events:\t%d\n", " ", cmpinfo->num_native_events);
diff -pruN 7.2.0~b2-1/src/utils/print_header.c 7.2.0-1/src/utils/print_header.c
--- 7.2.0~b2-1/src/utils/print_header.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/utils/print_header.c	2025-06-25 22:38:10.000000000 +0000
@@ -75,8 +75,8 @@ papi_print_header( char *prompt, const P
 	mpx = PAPI_get_opt( PAPI_MAX_MPX_CTRS, NULL );
   
   int numcmp = PAPI_num_components(  );
-  int perf_event = 0;
-	for (int cid = 0; cid < numcmp; cid++ ) {
+  int perf_event = 0, cid;
+	for ( cid = 0; cid < numcmp; cid++ ) {
 	  const PAPI_component_info_t* cmpinfo = PAPI_get_component_info( cid );
 	  if (cmpinfo->disabled) continue;
     if (strcmp(cmpinfo->name, "perf_event")== 0) perf_event = 1;
diff -pruN 7.2.0~b2-1/src/validation_tests/Makefile.recipies 7.2.0-1/src/validation_tests/Makefile.recipies
--- 7.2.0~b2-1/src/validation_tests/Makefile.recipies	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/validation_tests/Makefile.recipies	2025-06-25 22:38:10.000000000 +0000
@@ -38,6 +38,9 @@ instructions_testcode.o:	instructions_te
 matrix_multiply.o:	matrix_multiply.c matrix_multiply.h
 	$(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -O1 -c matrix_multiply.c
 
+load_store_testcode.o:	load_store_testcode.c testcode.h
+	$(CC) $(INCLUDE) $(CFLAGS) $(OPTFLAGS) -c load_store_testcode.c
+
 fp_validation_hl: fp_validation_hl.o $(TESTLIB) $(PAPILIB) flops_testcode.o
 	$(CC) -o fp_validation_hl fp_validation_hl.o $(TESTLIB) flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB) -lpthread
 
@@ -81,8 +84,8 @@ papi_fp_ops: papi_fp_ops.o $(TESTLIB) $(
 papi_hw_int: papi_hw_int.o $(TESTLIB) $(PAPILIB)
 	$(CC) -o papi_hw_int papi_hw_int.o $(TESTLIB) $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
 
-papi_ld_ins: papi_ld_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o
-	$(CC) -o papi_ld_ins papi_ld_ins.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
+papi_ld_ins: papi_ld_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o load_store_testcode.o
+	$(CC) -o papi_ld_ins papi_ld_ins.o $(TESTLIB) display_error.o matrix_multiply.o load_store_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
 
 papi_l1_dca: papi_l1_dca.o $(TESTLIB) $(PAPILIB) cache_testcode.o display_error.o matrix_multiply.o
 	$(CC) -o papi_l1_dca papi_l1_dca.o $(TESTLIB) cache_testcode.o display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
@@ -108,8 +111,8 @@ papi_ref_cyc: papi_ref_cyc.o $(TESTLIB)
 papi_sp_ops: papi_sp_ops.o $(TESTLIB) $(PAPILIB) display_error.o branches_testcode.o flops_testcode.o
 	$(CC) -o papi_sp_ops papi_sp_ops.o $(TESTLIB) display_error.o branches_testcode.o flops_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
 
-papi_sr_ins: papi_sr_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o
-	$(CC) -o papi_sr_ins papi_sr_ins.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
+papi_sr_ins: papi_sr_ins.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o load_store_testcode.o
+	$(CC) -o papi_sr_ins papi_sr_ins.o $(TESTLIB) display_error.o matrix_multiply.o load_store_testcode.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
 
 papi_tot_cyc: papi_tot_cyc.o $(TESTLIB) $(PAPILIB) display_error.o matrix_multiply.o
 	$(CC) -o papi_tot_cyc papi_tot_cyc.o $(TESTLIB) display_error.o matrix_multiply.o $(PAPILIB) $(LDFLAGS) $(EXTRALIB)
diff -pruN 7.2.0~b2-1/src/validation_tests/load_store_testcode.c 7.2.0-1/src/validation_tests/load_store_testcode.c
--- 7.2.0~b2-1/src/validation_tests/load_store_testcode.c	1970-01-01 00:00:00.000000000 +0000
+++ 7.2.0-1/src/validation_tests/load_store_testcode.c	2025-06-25 22:38:10.000000000 +0000
@@ -0,0 +1,57 @@
+#include "testcode.h"
+
+/* Execute n stores */
+int execute_stores(int n) {
+
+#if defined(__aarch64__)
+
+	__asm(  ".data\n"
+		"stvar: .word 1 /* stvar in memory */\n"
+		".text\n"
+		"	ldr x2, =stvar /* address of stvar */\n"
+		"	mov x4, %0\n"
+		"	mov x1, #0\n"
+		"str_loop:\n"
+		"	str x1, [x2] /* store into stvar */\n"
+		"	add x1, x1, #1\n"
+		"	cmp x1, x4\n"
+		"	bne str_loop\n"
+		:
+		: "r" (n)
+		: "cc" /* clobbered */
+	);
+
+	return 0;
+
+#endif
+	return CODE_UNIMPLEMENTED;
+
+}
+
+/* Execute n loads */
+int execute_loads(int n) {
+
+#if defined(__aarch64__)
+
+	__asm(  ".data\n"
+		"ldvar: .word 1 /* ldvar in memory */\n"
+		".text\n"
+		"	ldr x2, =ldvar /* address of ldvar */\n"
+		"	mov x4, %0\n"
+		"	mov x1, #0\n"
+		"ldr_loop:\n"
+		"	ldr x3, [x2] /* load from ldvar */\n"
+		"	add x1, x1, x3\n"
+		"	cmp x1, x4\n"
+		"	bne ldr_loop\n"
+		:
+		: "r" (n)
+		: "cc" /* clobbered */
+	);
+
+	return 0;
+
+#endif
+	return CODE_UNIMPLEMENTED;
+
+}
diff -pruN 7.2.0~b2-1/src/validation_tests/papi_ld_ins.c 7.2.0-1/src/validation_tests/papi_ld_ins.c
--- 7.2.0~b2-1/src/validation_tests/papi_ld_ins.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/validation_tests/papi_ld_ins.c	2025-06-25 22:38:10.000000000 +0000
@@ -23,6 +23,7 @@
 #include "display_error.h"
 
 #include "matrix_multiply.h"
+#include "testcode.h"
 
 #define SLEEP_RUNS 3
 
@@ -101,18 +102,26 @@ int main(int argc, char **argv) {
 		test_fail( __FILE__, __LINE__, "idle average", retval );
 	}
 
-	/*****************************/
-	/* testing Matrix Matrix GHz */
-	/*****************************/
+	/***********************************/
+	/* testing a large number of loads */
+	/***********************************/
 
 	if (!quiet) {
-		printf("\nTesting with matrix matrix multiply\n");
+		printf("\nTesting a large number of loads\n");
 	}
 
+	expected=naive_matrix_multiply_estimated_loads(quiet);
+
 	PAPI_reset(eventset);
 	PAPI_start(eventset);
 
-	naive_matrix_multiply(quiet);
+	retval = execute_loads(expected);
+	if (retval == CODE_UNIMPLEMENTED) {
+		if (!quiet) {
+			printf("\tNo asm test found for the current hardware. Testing matrix multiply\n");
+		}
+		naive_matrix_multiply(quiet);
+	}
 
 	retval=PAPI_stop(eventset,&count);
 
@@ -120,8 +129,6 @@ int main(int argc, char **argv) {
 		test_fail( __FILE__, __LINE__, "Problem stopping!", retval );
 	}
 
-	expected=naive_matrix_multiply_estimated_loads(quiet);
-
 	if (!quiet) {
 		printf("\tActual measured loads = %lld\n",count);
 	}
@@ -154,7 +161,13 @@ int main(int argc, char **argv) {
 	PAPI_start(eventset);
 
 	for(i=0;i<REPITITIONS;i++) {
-		naive_matrix_multiply(quiet);
+		retval = execute_loads(expected);
+		if (retval == CODE_UNIMPLEMENTED) {
+			if (!quiet) {
+				printf("\tNo asm test found for the current hardware. Testing matrix multiply\n");
+			}
+			naive_matrix_multiply(quiet);
+		}
 	}
 
 	retval=PAPI_stop(eventset,&count);
diff -pruN 7.2.0~b2-1/src/validation_tests/papi_sr_ins.c 7.2.0-1/src/validation_tests/papi_sr_ins.c
--- 7.2.0~b2-1/src/validation_tests/papi_sr_ins.c	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/validation_tests/papi_sr_ins.c	2025-06-25 22:38:10.000000000 +0000
@@ -23,6 +23,7 @@
 #include "display_error.h"
 
 #include "matrix_multiply.h"
+#include "testcode.h"
 
 #define SLEEP_RUNS 3
 
@@ -101,18 +102,26 @@ int main(int argc, char **argv) {
 		test_fail( __FILE__, __LINE__, "idle average", retval );
 	}
 
-	/*****************************/
-	/* testing Matrix Matrix GHz */
-	/*****************************/
+	/************************************/
+	/* testing a large number of stores */
+	/************************************/
 
 	if (!quiet) {
-		printf("\nTesting with matrix matrix multiply\n");
+		printf("\nTesting a large number of stores\n");
 	}
 
+	expected=naive_matrix_multiply_estimated_stores(quiet);
+
 	PAPI_reset(eventset);
 	PAPI_start(eventset);
 
-	naive_matrix_multiply(quiet);
+	retval = execute_stores(expected);
+	if (retval == CODE_UNIMPLEMENTED) {
+		if (!quiet) {
+			printf("\tNo asm test found for the current hardware. Testing matrix multiply\n");
+		}
+		naive_matrix_multiply(quiet);
+	}
 
 	retval=PAPI_stop(eventset,&count);
 
@@ -120,7 +129,6 @@ int main(int argc, char **argv) {
 		test_fail( __FILE__, __LINE__, "Problem stopping!", retval );
 	}
 
-	expected=naive_matrix_multiply_estimated_stores(quiet);
 
 	if (!quiet) {
 		printf("\tActual measured stores = %lld\n",count);
@@ -154,7 +162,13 @@ int main(int argc, char **argv) {
 	PAPI_start(eventset);
 
 	for(i=0;i<REPITITIONS;i++) {
-		naive_matrix_multiply(quiet);
+		retval = execute_stores(expected);
+		if (retval == CODE_UNIMPLEMENTED) {
+			if (!quiet) {
+				printf("\tNo asm test found for the current hardware. Testing matrix multiply\n");
+			}
+			naive_matrix_multiply(quiet);
+		}
 	}
 
 	retval=PAPI_stop(eventset,&count);
diff -pruN 7.2.0~b2-1/src/validation_tests/testcode.h 7.2.0-1/src/validation_tests/testcode.h
--- 7.2.0~b2-1/src/validation_tests/testcode.h	2025-03-05 20:35:12.000000000 +0000
+++ 7.2.0-1/src/validation_tests/testcode.h	2025-06-25 22:38:10.000000000 +0000
@@ -27,5 +27,9 @@ double cache_read_test(double *array, in
 int cache_random_write_test(double *array, int size, int count);
 double cache_random_read_test(double *array, int size, int count);
 
+/* load_store_testcode.c */
+int execute_stores(int n);
+int execute_loads(int n);
+
 /* busy_work.c */
 double do_cycles( int minimum_time );
