diff --git a/common/include/byte_order.h b/common/include/byte_order.h
index e6fba25..411e849 100644
--- a/common/include/byte_order.h
+++ b/common/include/byte_order.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/context_switch_defs.h b/common/include/context_switch_defs.h
index a0fd4e6..5e1e85f 100644
--- a/common/include/context_switch_defs.h
+++ b/common/include/context_switch_defs.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -45,7 +45,7 @@ extern "C" {
 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT (5)
 
 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__SET(dst, engine_index, vdma_channel_index) do { \
-        (dst) = (vdma_channel_index) | ((engine_index) << CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT);\
+        (dst) = (uint8_t)((vdma_channel_index) | ((engine_index) << CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__ENGINE_INDEX_SHIFT));\
     } while (0)
 
 #define CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__READ(src, engine_index, vdma_channel_index) do {\
@@ -73,7 +73,6 @@ typedef struct {
     uint16_t feature_padding_payload;
     uint32_t buffer_padding_payload;
     uint16_t buffer_padding;
-    bool is_periph_calculated_in_hailort;
     bool is_core_hw_padding_config_in_dfc;
 } CONTEXT_SWITCH_DEFS__stream_reg_info_t;
 
@@ -382,11 +381,12 @@ typedef struct {
 } CONTEXT_SWITCH_DEFS__activate_ddr_buffer_output_data_t;
 
 typedef struct {
+    CONTEXT_SWITCH_DEFS__stream_reg_info_t stream_reg_info;
+    CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info;
+    uint16_t batch_size;
     uint8_t packed_vdma_channel_id;
     uint8_t stream_index;
     uint8_t network_index;
-    CONTEXT_SWITCH_DEFS__stream_reg_info_t stream_reg_info;
-    CONTROL_PROTOCOL__host_buffer_info_t host_buffer_info;
 } CONTEXT_SWITCH_DEFS__activate_cache_output_data_t;
 
 typedef struct {
diff --git a/common/include/control_protocol.h b/common/include/control_protocol.h
index d6ceeb9..e79b510 100644
--- a/common/include/control_protocol.h
+++ b/common/include/control_protocol.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -447,7 +447,6 @@ typedef struct {
     uint16_t feature_padding_payload;
     uint32_t buffer_padding_payload;
     uint16_t buffer_padding;
-    bool is_periph_calculated_in_hailort;
     bool is_core_hw_padding_config_in_dfc;
 } CONTROL_PROTOCOL__nn_stream_config_t;
 
@@ -878,15 +877,21 @@ typedef struct {
     bool can_fast_batch_switch;
 } CONTROL_PROTOCOL__INFER_FEATURE_LIST_t;
 
+typedef struct {
+    uint8_t packed_vdma_channel_id;
+} CONTROL_PROTOCOL__config_channel_info_t;
+
 typedef struct {
     uint16_t dynamic_contexts_count;
     CONTROL_PROTOCOL__INFER_FEATURE_LIST_t infer_features;
     CONTROL_PROTOCOL__VALIDATION_FEATURE_LIST_t validation_features;
     uint8_t networks_count;
     uint16_t csm_buffer_size;
-    uint16_t batch_size[CONTROL_PROTOCOL__MAX_NETWORKS_PER_NETWORK_GROUP];
+    uint16_t batch_size;
     uint32_t external_action_list_address;
     uint32_t boundary_channels_bitmap[CONTROL_PROTOCOL__MAX_VDMA_ENGINES_COUNT];
+    uint8_t config_channels_count;
+    CONTROL_PROTOCOL__config_channel_info_t config_channel_info[CONTROL_PROTOCOL__MAX_CFG_CHANNELS];
 } CONTROL_PROTOCOL__application_header_t;
 
 typedef struct {
@@ -1316,13 +1321,23 @@ typedef struct {
 } CONTROL_PROTOCOL__hw_infer_channels_info_t;
 
 typedef enum {
-    CONTROL_PROTOCOL__HW_INFER_STATE_START, 
+    CONTROL_PROTOCOL__HW_INFER_STATE_START,
     CONTROL_PROTOCOL__HW_INFER_STATE_STOP,
 
     /* must be last*/
     CONTROL_PROTOCOL__HW_INFER_STATE_COUNT
 } CONTROL_PROTOCOL__hw_infer_state_t;
 
+typedef enum {
+    CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL,
+    CONTROL_PROTOCOL__CCB_BOUNDARY_CHANNEL,
+
+    /* must be last*/
+    CONTROL_PROTOCOL__BOUNDARY_CHANNEL_MODE_COUNT
+} CONTROL_PROTOCOL__boundary_channel_mode_t;
+
+#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (6)
+
 typedef struct {
     uint32_t hw_infer_state_length;
     uint8_t hw_infer_state;
@@ -1334,6 +1349,8 @@ typedef struct {
     uint16_t batch_count;
     uint32_t channels_info_length;
     CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info;
+    uint32_t boundary_channel_mode_length;
+    uint8_t boundary_channel_mode;
 } CONTROL_PROTOCOL__change_hw_infer_status_request_t;
 
 typedef union {
diff --git a/common/include/d2h_events.h b/common/include/d2h_events.h
index 9e7db0b..ac8399c 100644
--- a/common/include/d2h_events.h
+++ b/common/include/d2h_events.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -86,13 +86,11 @@ typedef struct {
 
 /* D2H_EVENT_health_monitor_closed_streams_event_message_t should be the same as hailo_health_monitor_dataflow_shutdown_notification_message_t */
 typedef struct {
-    uint32_t closed_input_streams;
-    uint32_t closed_output_streams;
     float32_t ts0_temperature;
     float32_t ts1_temperature;
 } D2H_EVENT_health_monitor_closed_streams_event_message_t;
 
-#define D2H_EVENT_HEALTH_MONITOR_CLOSED_STREAMS_EVENT_PARAMETER_COUNT  (4)
+#define D2H_EVENT_HEALTH_MONITOR_CLOSED_STREAMS_EVENT_PARAMETER_COUNT  (2)
 
 /* D2H_EVENT_health_monitor_temperature_alarm_event_message_t should be the same as hailo_health_monitor_temperature_alarm_notification_message_t */
 typedef struct {
diff --git a/common/include/firmware_header.h b/common/include/firmware_header.h
index b487b55..b9be775 100644
--- a/common/include/firmware_header.h
+++ b/common/include/firmware_header.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -19,7 +19,8 @@ extern "C" {
 
 #define FIRMWARE_HEADER_MAGIC_HAILO8    (0x1DD89DE0)
 #define FIRMWARE_HEADER_MAGIC_HAILO15   (0xE905DAAB)
-#define FIRMWARE_HEADER_MAGIC_HAILO15L     (0xF94739AB)
+#define FIRMWARE_HEADER_MAGIC_HAILO15L  (0xF94739AB)
+#define FIRMWARE_HEADER_MAGIC_MARS      (0xF94739AB)
 
 typedef enum {
     FIRMWARE_HEADER_VERSION_INITIAL = 0,
@@ -31,7 +32,8 @@ typedef enum {
 typedef enum {
     FIRMWARE_TYPE_HAILO8 = 0,
     FIRMWARE_TYPE_HAILO15,
-    FIRMWARE_TYPE_HAILO15L
+    FIRMWARE_TYPE_HAILO15L,
+    FIRMWARE_TYPE_MARS
 } firmware_type_t;
 
 
@@ -41,6 +43,8 @@ typedef enum {
 #define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_HAILO8)
 #elif defined(PLUTO)
 #define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_HAILO15L)
+#elif defined(MARS)
+#define COMPILED_FIRMWARE_TYPE (FIRMWARE_TYPE_MARS)
 #endif /* MERCURY */
 
 typedef struct {
diff --git a/common/include/firmware_header_utils.h b/common/include/firmware_header_utils.h
index 36500cc..d3c3149 100644
--- a/common/include/firmware_header_utils.h
+++ b/common/include/firmware_header_utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/firmware_status.h b/common/include/firmware_status.h
index f359770..1dc4764 100644
--- a/common/include/firmware_status.h
+++ b/common/include/firmware_status.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -415,6 +415,7 @@ Updating rules:
    FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BATCH_COUNT_LENGTH)\
    FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_CACHE_INFO_LENGTH)\
    FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_READ_OFFSET_DELTA_LENGTH)\
+   FIRMWARE_STATUS__X(CONTROL_PROTOCOL_STATUS_INVALID_BOUNDARY_CHANNELS_MODE_LENGTH)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__POWER_MEASUREMENT)\
    FIRMWARE_STATUS__X(HAILO_POWER_MEASUREMENT_STATUS_POWER_INIT_ERROR)\
@@ -771,6 +772,9 @@ Updating rules:
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_READ_OFFSET_SIZE)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_INVALID_SLEEP_TIME)\
    FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_SRAM_MEMORY_FULL)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_LCU)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_SEQUENCER)\
+   FIRMWARE_STATUS__X(CONTEXT_SWITCH_STATUS_TIMEOUT_CONFIG_DONE)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__D2H_EVENT_MANAGER)\
    FIRMWARE_STATUS__X(HAILO_D2H_EVENT_MANAGER_STATUS_MESSAGE_HIGH_PRIORITY_QUEUE_CREATE_FAILED)\
@@ -1062,6 +1066,7 @@ Updating rules:
    FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_INVALID_STREAM_INDEX)\
    FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_INVALID_CHANNEL_INDEX)\
    FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_FAILED_TO_RESET_QM_CREDITS)\
+   FIRMWARE_STATUS__X(DRAM_DMA_SERVICE_STATUS_TRIED_USING_BURST_IN_NOT_ENHANCED)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__NN_CORE_SERVICE)\
    FIRMWARE_STATUS__X(NN_CORE_SERVICE_STATUS_INVALID_ARG_PASSED)\
@@ -1127,12 +1132,19 @@ Updating rules:
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_NETWORK_GROUP_ALREADY_ACTIVATED)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_STATE_MACHINE_NOT_IN_RESET_STATE_BEFORE_DEACTIVATE)\
    FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_STATE)\
+   FIRMWARE_STATUS__X(HW_INFER_MANAGER_STATUS_INVALID_BOUNDARY_CHANNEL_MODE)\
    \
    FIRMWARE_MODULE__X(FIRMWARE_MODULE__INFINITE_CONTEXT_LOADER)\
    FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_EVENT_BITS_NOT_CLEARED_BEFORE_COPY_CALL)\
    FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_TIMEOUT_OCCURED_WAITING_FOR_COPY)\
    FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_SUPPORTED)\
    FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NOT_MODULE_NOT_INITIALIZED)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_SEND_FAIL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_RECEIVE_FAIL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_FULL)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_QUEUE_FAILED_INIT)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_NULL_POINTER)\
+   FIRMWARE_STATUS__X(INFINITE_CONTEXT_LOADER_STATUS_INVALID_NUM_CONTEXTS)\
 
 typedef enum {
 #define FIRMWARE_MODULE__X(module) module,
diff --git a/common/include/firmware_version.h b/common/include/firmware_version.h
index 94ae67f..11fe950 100644
--- a/common/include/firmware_version.h
+++ b/common/include/firmware_version.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/logger_level.h b/common/include/logger_level.h
index 3b7efdb..90620eb 100644
--- a/common/include/logger_level.h
+++ b/common/include/logger_level.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/sensor_config_exports.h b/common/include/sensor_config_exports.h
index cd98dd4..62fbb77 100644
--- a/common/include/sensor_config_exports.h
+++ b/common/include/sensor_config_exports.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/status.h b/common/include/status.h
index c0126d5..8145ef0 100644
--- a/common/include/status.h
+++ b/common/include/status.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/stdfloat.h b/common/include/stdfloat.h
index e28f578..a2a8975 100644
--- a/common/include/stdfloat.h
+++ b/common/include/stdfloat.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/user_config_common.h b/common/include/user_config_common.h
index 2cdf367..bcf8bd9 100644
--- a/common/include/user_config_common.h
+++ b/common/include/user_config_common.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/include/utils.h b/common/include/utils.h
index 45dcab2..44d2c15 100644
--- a/common/include/utils.h
+++ b/common/include/utils.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/common/src/firmware_header_utils.c b/common/src/firmware_header_utils.c
index f22513a..147ba4a 100644
--- a/common/src/firmware_header_utils.c
+++ b/common/src/firmware_header_utils.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -45,7 +45,7 @@ static HAILO_COMMON_STATUS_t firmware_header_utils__validate_fw_header(uintptr_t
 
     switch (firmware_type) {
     case FIRMWARE_TYPE_HAILO8:
-        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO8; 
+        firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO8;
         break;
     case FIRMWARE_TYPE_HAILO15:
         firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO15;
@@ -53,6 +53,9 @@ static HAILO_COMMON_STATUS_t firmware_header_utils__validate_fw_header(uintptr_t
     case FIRMWARE_TYPE_HAILO15L:
         firmware_magic = FIRMWARE_HEADER_MAGIC_HAILO15L;
         break;
+    case FIRMWARE_TYPE_MARS:
+        firmware_magic = FIRMWARE_HEADER_MAGIC_MARS;
+        break;
     default:
         status = HAILO_STATUS__FIRMWARE_HEADER_UTILS__INVALID_FIRMWARE_TYPE;
         goto exit;
diff --git a/common/src/firmware_status.c b/common/src/firmware_status.c
index e21f35a..2fe48dd 100644
--- a/common/src/firmware_status.c
+++ b/common/src/firmware_status.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/CMakeLists.txt b/hailort/CMakeLists.txt
index 2bce837..716a5c5 100644
--- a/hailort/CMakeLists.txt
+++ b/hailort/CMakeLists.txt
@@ -29,8 +29,8 @@ endif()
 
 # Set firmware version
 add_definitions( -DFIRMWARE_VERSION_MAJOR=4 )
-add_definitions( -DFIRMWARE_VERSION_MINOR=20 )
-add_definitions( -DFIRMWARE_VERSION_REVISION=1 )
+add_definitions( -DFIRMWARE_VERSION_MINOR=21 )
+add_definitions( -DFIRMWARE_VERSION_REVISION=0 )
 if(HAILO_BUILD_SERVICE)
     add_definitions( -DHAILO_SUPPORT_MULTI_PROCESS )
 endif()
@@ -58,8 +58,6 @@ set(RPC_DIR ${PROJECT_SOURCE_DIR}/hailort/rpc)
 set(HRPC_DIR ${PROJECT_SOURCE_DIR}/hailort/hrpc)
 set(HRPC_PROTOCOL_DIR ${PROJECT_SOURCE_DIR}/hailort/hrpc_protocol)
 set(HAILORT_SERVICE_DIR ${PROJECT_SOURCE_DIR}/hailort/hailort_service)
-set(HAILORT_SERVER_DIR ${PROJECT_SOURCE_DIR}/hailort/hailort_server)
-set(HAILORT_LIBUSB_DIR ${PROJECT_SOURCE_DIR}/hailort/internals/libusb-wrapper/)
 
 if(HAILO_BUILD_SERVICE)
     add_subdirectory(rpc)
@@ -91,5 +89,3 @@ endif()
 if(CMAKE_SYSTEM_NAME STREQUAL QNX)
     add_subdirectory(drivers/qnx)
 endif()
-
-add_subdirectory(hailort_server)
diff --git a/hailort/LICENSE b/hailort/LICENSE
index aeed83c..3f84d44 100644
--- a/hailort/LICENSE
+++ b/hailort/LICENSE
@@ -1,6 +1,6 @@
 The MIT License (MIT)
 
-Copyright (c) 2020-2022 Hailo Technologies Ltd.
+Copyright (c) 2019-2025 Hailo Technologies Ltd.
 All rights reserved.
 
 Permission is hereby granted, free of charge, to any person obtaining a copy of
diff --git a/hailort/LICENSE-3RD-PARTY.md b/hailort/LICENSE-3RD-PARTY.md
index c80001b..4ebb203 100644
--- a/hailort/LICENSE-3RD-PARTY.md
+++ b/hailort/LICENSE-3RD-PARTY.md
@@ -6,14 +6,16 @@
 | pybind11                         | Wenzel Jakob                      | BSD                                | 2.10.1         | Cloned entire package                                | https://github.com/pybind/pybind11                                            |
 | spdlog                           | Gabi Melman                       | MIT                                | 1.14.1         | Cloned entire package                                | https://github.com/gabime/spdlog                                              |
 | folly                            | Facebook, Inc. and its affiliates | Apache License 2.0                 | v2020.08.17.00 | Copied only the file `folly/TokenBucket.h`           | https://github.com/facebook/folly                                             |
-| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                        | v3.9.1         | Cloned entire package                                | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
-| readerwriterqueue                | Cameron Desrochers                | Simplified BSD                     | 1.0.3          | Cloned entire package                                | https://github.com/cameron314/readerwriterqueue                               |
-| DotWriter                        | John Vilk                         | MIT License                        | master         | Fork                                                 | https://github.com/hailo-ai/DotWriter                                         |
-| benchmark                        | Google Inc.                       | Apache License 2.0                 | 1.6.0          | Cloned entire package                                | https://github.com/google/benchmark.git                                       |
-| md5                              | Alexander Peslyak                 | cut-down BSD                       | -              | Copied code from website                             | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
-| pevents                          | Mahmoud Al-Qudsi                  | MIT License                        | master         | Cloned entire package                                | https://github.com/neosmart/pevents.git                                       |
-| grpc                             | Google Inc.                       | Apache License 2.0                 | 1.54.0         | Cloned entire package                                | https://github.com/grpc/grpc                                                  |
-| stb                              | Sean Barrett                      | MIT License                        | 0.97           | Copied only the file `stb/stb_image_resize.h`        | https://github.com/nothings/stb                                               |
-| eigen                            |                                   | Mozilla Public License 2.0         | 3.4.0          | Cloned entire package                                | https://gitlab.com/libeigen/eigen                                             |
-| libusb                           |                                   | GNU LESSER GENERAL PUBLIC LICENSE  | 1.0.27         | Cloned entire package                                | https://github.com/libusb/libusb.git                                          |
-| xxHash                           | Yann Collet                       | 2-Clause BSD                       | 0.8.2          | Cloned entire package, used as a header-only lib     | https://github.com/Cyan4973/xxHash                                            |
+| nlohmann_json_cmake_fetchcontent | ArthurSonzogni                    | MIT License                        | v3.9.1                | Cloned entire package                                | https://github.com/ArthurSonzogni/nlohmann_json_cmake_fetchcontent            |
+| readerwriterqueue                | Cameron Desrochers                | Simplified BSD                     | 1.0.3                 | Cloned entire package                                | https://github.com/cameron314/readerwriterqueue                               |
+| DotWriter                        | John Vilk                         | MIT License                        | master                | Fork                                                 | https://github.com/hailo-ai/DotWriter                                         |
+| benchmark                        | Google Inc.                       | Apache License 2.0                 | 1.6.0                 | Cloned entire package                                | https://github.com/google/benchmark.git                                       |
+| md5                              | Alexander Peslyak                 | cut-down BSD                       | -                     | Copied code from website                             | http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 |
+| pevents                          | Mahmoud Al-Qudsi                  | MIT License                        | master                | Cloned entire package                                | https://github.com/neosmart/pevents.git                                       |
+| grpc                             | Google Inc.                       | Apache License 2.0                 | 1.54.0                | Cloned entire package                                | https://github.com/grpc/grpc                                                  |
+| stb                              | Sean Barrett                      | MIT License                        | 0.97                  | Copied only the file `stb/stb_image_resize.h`        | https://github.com/nothings/stb                                               |
+| eigen                            |                                   | Mozilla Public License 2.0         | 3.4.0                 | Cloned entire package                                | https://gitlab.com/libeigen/eigen                                             |
+| cpp-httplib                      |                                   | MIT License                        | v0.18.2               | Cloned entire package                                | https://github.com/yhirose/cpp-httplib.git                                    |
+| xxHash                           | Yann Collet                       | 2-Clause BSD                       | 0.8.2                 | Cloned entire package, used as a header-only lib     | https://github.com/Cyan4973/xxHash                                            |
+| tokenizers_cpp                   | mlc-ai                            | Apache License 2.0                 | disable-sentencepiece | Cloned entire package                                | https://github.com/mlc-ai/tokenizers-cpp.git                                  |
+| libnpy                           | Leon Merten Lohse                 | MIT License                        | 1.0.1                 | Cloned entire package, used as a header-only lib     | https://github.com/llohse/libnpy.git                                          |
diff --git a/hailort/cmake/external/cpp-httplib.cmake b/hailort/cmake/external/cpp-httplib.cmake
new file mode 100644
index 0000000..e086b82
--- /dev/null
+++ b/hailort/cmake/external/cpp-httplib.cmake
@@ -0,0 +1,21 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    cpp-httplib
+    GIT_REPOSITORY https://github.com/yhirose/cpp-httplib.git
+    GIT_TAG 51dee793fec2fa70239f5cf190e165b54803880f # v0.18.2
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/cpp-httplib-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/cpp-httplib-subbuild
+)
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(cpp-httplib)
+if(NOT cpp-httplib_POPULATED)
+    FetchContent_Populate(cpp-httplib)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        add_subdirectory(${cpp-httplib_SOURCE_DIR} ${cpp-httplib_BINARY_DIR} EXCLUDE_FROM_ALL)
+    endif()
+endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/libnpy.cmake b/hailort/cmake/external/libnpy.cmake
new file mode 100644
index 0000000..3963446
--- /dev/null
+++ b/hailort/cmake/external/libnpy.cmake
@@ -0,0 +1,23 @@
+cmake_minimum_required(VERSION 3.11.0)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    libnpy
+    GIT_REPOSITORY https://github.com/llohse/libnpy.git
+    GIT_TAG 890ea4fcda302a580e633c624c6a63e2a5d422f6 # v1.0.1
+    GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/libnpy-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/libnpy-subbuild
+)
+
+# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+FetchContent_GetProperties(libnpy)
+if(NOT libnpy_POPULATED)
+    FetchContent_Populate(libnpy)
+    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+        # Add libnpy as a header-only library
+        add_library(libnpy INTERFACE)
+        target_include_directories(libnpy INTERFACE ${libnpy_SOURCE_DIR}/include)
+    endif()
+endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/libusb.cmake b/hailort/cmake/external/libusb.cmake
deleted file mode 100644
index 2a94cf9..0000000
--- a/hailort/cmake/external/libusb.cmake
+++ /dev/null
@@ -1,256 +0,0 @@
-cmake_minimum_required(VERSION 3.11.0)
-
-include(FetchContent)
-
-FetchContent_Declare(
-    libusb
-    GIT_REPOSITORY https://github.com/libusb/libusb.git
-    GIT_TAG d52e355daa09f17ce64819122cb067b8a2ee0d4b # Version 1.0.27
-    GIT_SHALLOW TRUE
-    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/libusb-src
-    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/libusb-subbuild
-)
-
-# https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
-# Note this cmakeFile is taken from https://github.com/libusb/libusb-cmake and modified to work with our build system
-FetchContent_GetProperties(libusb)
-if(NOT libusb_POPULATED)
-    FetchContent_Populate(libusb)
-    if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
-        set(LIBUSB_ROOT ${HAILO_EXTERNAL_DIR}/libusb-src/libusb/)
-
-        # Get the version information from version.h ignoring the nano version as it appears in version_nano.h and so we need it?
-        file(READ "${LIBUSB_ROOT}/version.h" VERSIONHEADERDATA)
-        string(REGEX MATCH "#define LIBUSB_MAJOR ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MAJOR ${CMAKE_MATCH_1})
-        string(REGEX MATCH "#define LIBUSB_MINOR ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MINOR ${CMAKE_MATCH_1})
-        string(REGEX MATCH "#define LIBUSB_MICRO ([0-9]*)" _ ${VERSIONHEADERDATA})
-        set(LIBUSB_VERSION_MICRO ${CMAKE_MATCH_1})
-        set(LIBUSB_VERSION "${LIBUSB_VERSION_MAJOR}.${LIBUSB_VERSION_MINOR}.${LIBUSB_VERSION_MICRO}")
-    
-        project(usb-1.0
-            DESCRIPTION "A cross-platform library to access USB devices"
-            VERSION ${LIBUSB_VERSION}
-            LANGUAGES C
-        )
-        if(EMSCRIPTEN)
-            set(CMAKE_CXX_STANDARD 20)
-            enable_language(CXX)
-        endif()
-    
-        # This function generates all the local variables what end up getting written to config.
-        # We use a function as any vars set in this context don't mess with the rest of the file.
-        # e.g. Logging LIBUSB_ENABLE_LOGGING mapps to ENABLE_LOGGING in the config, keeps it clean
-        function(generate_config_file)
-            include(CheckIncludeFiles)
-            include(CheckFunctionExists)
-            include(CheckSymbolExists)
-            include(CheckStructHasMember)
-            include(CheckCCompilerFlag)
-    
-            check_function_exists(clock_gettime             HAVE_CLOCK_GETTIME)
-            check_function_exists(pthread_condattr_setclock HAVE_PTHREAD_CONDATTR_SETCLOCK)
-            check_function_exists(pthread_setname_np        HAVE_PTHREAD_SETNAME_NP)
-            check_function_exists(pthread_threadid_np       HAVE_PTHREAD_THREADID_NP)
-            check_function_exists(eventfd                   HAVE_EVENTFD)
-            check_function_exists(pipe2                     HAVE_PIPE2)
-            check_function_exists(syslog                    HAVE_SYSLOG)
-    
-            check_include_files(asm/types.h      HAVE_ASM_TYPES_H)
-            check_include_files(sys/eventfd.h    HAVE_EVENTFD)
-            check_include_files(string.h         HAVE_STRING_H)
-            check_include_files(sys/time.h       HAVE_SYS_TIME_H)
-    
-            check_symbol_exists(timerfd_create  "sys/timerfd.h" HAVE_TIMERFD)
-            check_symbol_exists(nfds_t  "poll.h" HAVE_NFDS_T)
-    
-            check_struct_has_member("struct timespec" tv_sec time.h HAVE_STRUCT_TIMESPEC)
-    
-            if(HAVE_VISIBILITY)
-                set(DEFAULT_VISIBILITY "__attribute__((visibility(\"default\")))")
-            else()
-                set(DEFAULT_VISIBILITY "" )
-            endif()
-    
-            # Set vars that will be written into the config file.
-            if(WIN32)
-                set(PLATFORM_WINDOWS 1)
-            else()
-                set(PLATFORM_POSIX 1)
-            endif()
-    
-            if(LIBUSB_ENABLE_LOGGING)
-                set(ENABLE_LOGGING ${LIBUSB_ENABLE_LOGGING})
-            endif()
-            if(LIBUSB_ENABLE_DEBUG_LOGGING)
-                set(ENABLE_DEBUG_LOGGING ${LIBUSB_ENABLE_DEBUG_LOGGING})
-            endif()
-    
-            if(CMAKE_C_COMPILER_ID MATCHES "Clang" OR CMAKE_C_COMPILER_ID STREQUAL "GNU")
-                check_c_compiler_flag("-fvisibility=hidden" HAVE_VISIBILITY)
-            endif()
-    
-            file(MAKE_DIRECTORY "${LIBUSB_GEN_INCLUDES}")
-            if(NOT MSVC)
-                set(_GNU_SOURCE TRUE)
-            endif()
-            configure_file("${HAILORT_LIBUSB_DIR}/config.h.in" "${LIBUSB_GEN_INCLUDES}/config.h" @ONLY)
-        endfunction()
-    
-        if(BUILD_SHARED_LIBS)
-            set(LIBUSB_BUILD_SHARED_LIBS_DEFAULT ON)
-        else()
-            set(LIBUSB_BUILD_SHARED_LIBS_DEFAULT OFF)
-        endif()
-    
-        option(LIBUSB_BUILD_SHARED_LIBS "Build Shared Libraries for libusb" ${LIBUSB_BUILD_SHARED_LIBS_DEFAULT})
-        option(LIBUSB_BUILD_TESTING "Build Tests" OFF)
-        if(LIBUSB_BUILD_TESTING)
-            enable_testing()
-        endif()
-    
-        option(LIBUSB_BUILD_EXAMPLES "Build Example Applications" OFF)
-    
-        option(LIBUSB_INSTALL_TARGETS "Install libusb targets" ON)
-        option(LIBUSB_TARGETS_INCLUDE_USING_SYSTEM "Make targets include paths System" ON)
-    
-        option(LIBUSB_ENABLE_LOGGING "Enable Logging" ON)
-        option(LIBUSB_ENABLE_DEBUG_LOGGING "Enable Debug Logging" OFF)
-        
-        # Dont use libudev on linux currently
-        if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-            option(LIBUSB_ENABLE_UDEV "Enable udev backend for device enumeration" OFF)
-        endif()
-    
-        set(LIBUSB_GEN_INCLUDES "${CMAKE_CURRENT_BINARY_DIR}/gen_include")
-        generate_config_file()
-    
-    if(LIBUSB_BUILD_SHARED_LIBS)
-        add_library(usb-1.0 SHARED)
-    else()
-        add_library(usb-1.0 STATIC)
-    endif()
-    
-    set_target_properties(usb-1.0 PROPERTIES
-        PREFIX lib # to be consistent with mainline libusb build system(s)
-    )
-    
-    # common sources
-    target_sources(usb-1.0 PRIVATE
-        "${LIBUSB_GEN_INCLUDES}/config.h"
-        "${LIBUSB_ROOT}/core.c"
-        "${LIBUSB_ROOT}/descriptor.c"
-        "${LIBUSB_ROOT}/hotplug.c"
-        "${LIBUSB_ROOT}/io.c"
-        "${LIBUSB_ROOT}/libusb.h"
-        "${LIBUSB_ROOT}/libusbi.h"
-        "${LIBUSB_ROOT}/strerror.c"
-        "${LIBUSB_ROOT}/sync.c"
-        "${LIBUSB_ROOT}/version.h"
-        "${LIBUSB_ROOT}/version_nano.h"
-    )
-    target_include_directories(usb-1.0
-        PRIVATE
-            "${LIBUSB_GEN_INCLUDES}"
-            "${LIBUSB_ROOT}/os"
-    )
-    
-    if (LIBUSB_TARGETS_INCLUDE_USING_SYSTEM)
-        target_include_directories(usb-1.0 SYSTEM PUBLIC "${LIBUSB_ROOT}")
-    else()
-        target_include_directories(usb-1.0 PUBLIC "${LIBUSB_ROOT}")
-    endif()
-    
-    if(WIN32)
-        target_sources(usb-1.0 PRIVATE
-            "${LIBUSB_ROOT}/libusb-1.0.def"
-            "${LIBUSB_ROOT}/os/events_windows.c"
-            "${LIBUSB_ROOT}/os/events_windows.h"
-            "${LIBUSB_ROOT}/os/threads_windows.c"
-            "${LIBUSB_ROOT}/os/threads_windows.h"
-            "${LIBUSB_ROOT}/os/windows_common.c"
-            "${LIBUSB_ROOT}/os/windows_common.h"
-            "${LIBUSB_ROOT}/os/windows_usbdk.c"
-            "${LIBUSB_ROOT}/os/windows_usbdk.h"
-            "${LIBUSB_ROOT}/os/windows_winusb.c"
-            "${LIBUSB_ROOT}/os/windows_winusb.h"
-            $<$<C_COMPILER_ID:MSVC>:${LIBUSB_ROOT}/libusb-1.0.rc>
-        )
-        target_compile_definitions(usb-1.0 PRIVATE $<$<C_COMPILER_ID:MSVC>:_CRT_SECURE_NO_WARNINGS=1>)
-        target_link_libraries(usb-1.0 PRIVATE windowsapp)
-    else()
-        # common POSIX/non-Windows sources
-        target_sources(usb-1.0 PRIVATE
-            "${LIBUSB_ROOT}/os/events_posix.c"
-            "${LIBUSB_ROOT}/os/events_posix.h"
-            "${LIBUSB_ROOT}/os/threads_posix.c"
-            "${LIBUSB_ROOT}/os/threads_posix.h"
-        )
-        if(CMAKE_SYSTEM_NAME MATCHES "Linux")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/linux_usbfs.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.h"
-            )
-            if(LIBUSB_ENABLE_UDEV)
-                target_sources(usb-1.0 PRIVATE
-                    "${LIBUSB_ROOT}/os/linux_udev.c"
-                )
-                target_link_libraries(usb-1.0 PRIVATE udev)
-                target_compile_definitions(usb-1.0 PRIVATE HAVE_LIBUDEV=1)
-            else()
-                target_sources(usb-1.0 PRIVATE
-                    "${LIBUSB_ROOT}/os/linux_netlink.c"
-                )
-            endif()
-            find_package(Threads REQUIRED)
-            target_link_libraries(usb-1.0 PRIVATE Threads::Threads)
-        elseif(ANDROID)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/linux_netlink.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.c"
-                "${LIBUSB_ROOT}/os/linux_usbfs.h"
-            )
-            target_link_libraries(usb-1.0 PRIVATE android log)
-        elseif(APPLE)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/darwin_usb.c"
-                "${LIBUSB_ROOT}/os/darwin_usb.h"
-            )
-            target_link_libraries(usb-1.0 PRIVATE
-                "-framework Foundation"
-                "-framework IOKit"
-                "-framework Security"
-            )
-        elseif(CMAKE_SYSTEM_NAME STREQUAL "NetBSD")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/netbsd_usb.c"
-            )
-        elseif(CMAKE_SYSTEM_NAME STREQUAL "OpenBSD")
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/openbsd_usb.c"
-            )
-        elseif(EMSCRIPTEN)
-            target_sources(usb-1.0 PRIVATE
-                "${LIBUSB_ROOT}/os/emscripten_webusb.cpp"
-            )
-            target_compile_options(usb-1.0 PRIVATE -pthread)
-        else()
-            message(FATAL_ERROR "Unsupported target platform: ${CMAKE_SYSTEM_NAME}")
-        endif()
-    endif()
-    
-    if(LIBUSB_BUILD_TESTING)
-        add_subdirectory(tests)
-    endif()
-    
-    if(LIBUSB_BUILD_EXAMPLES)
-        add_subdirectory(examples)
-    endif()
-    
-    if(LIBUSB_INSTALL_TARGETS)
-        install(TARGETS usb-1.0)
-        install(FILES "${LIBUSB_ROOT}/libusb.h" DESTINATION "include/libusb-1.0")
-    endif()
-    endif()
-endif()
\ No newline at end of file
diff --git a/hailort/cmake/external/tokenizers.cmake b/hailort/cmake/external/tokenizers.cmake
new file mode 100644
index 0000000..91e60bb
--- /dev/null
+++ b/hailort/cmake/external/tokenizers.cmake
@@ -0,0 +1,72 @@
+cmake_minimum_required(VERSION 3.14)
+
+include(FetchContent)
+
+FetchContent_Declare(
+    tokenizers
+    GIT_REPOSITORY https://github.com/mlc-ai/tokenizers-cpp.git
+    GIT_TAG 125d072f52290fa6d2944b3d72ccc937786ec631 # disable-sentencepiece
+    # GIT_SHALLOW TRUE
+    SOURCE_DIR ${HAILO_EXTERNAL_DIR}/tokenizers-src
+    SUBBUILD_DIR ${HAILO_EXTERNAL_DIR}/tokenizers-subbuild
+)
+
+# https://stackoverflow.com/questions/61499646/cmake-set-variable-readonly-protect-from-override
+macro(set_readonly VAR)
+  # Set the variable itself
+  set("${VAR}" "${ARGN}")
+  # Store the variable's value for restore it upon modifications.
+  set("_${VAR}_readonly_val" "${ARGN}")
+  # Register a watcher for a variable
+  variable_watch("${VAR}" readonly_guard)
+endmacro()
+
+# Watcher for a variable which emulates readonly property.
+macro(readonly_guard VAR access value current_list_file stack)
+  if ("${access}" STREQUAL "MODIFIED_ACCESS")
+    message(WARNING "Attempt to change readonly variable '${VAR}'!")
+    # Restore a value of the variable to the initial one.
+    set(${VAR} "${_${VAR}_readonly_val}")
+  endif()
+endmacro()
+
+# On kirkstone-builds we have an issue with compiling tokenizers_cpp, so we support getting .a path
+option(TOKENIZERS_LIB_PATH "Path to tokenizers_cpp library" "")
+option(TOKENIZERS_RUST_LIB_PATH "Path to tokenizers_cpp rust library" "")
+option(TOKENIZERS_INCLUDE_DIR "Path to include dir of tokenizers_cpp" "")
+if (TOKENIZERS_LIB_PATH AND TOKENIZERS_RUST_LIB_PATH AND TOKENIZERS_INCLUDE_DIR)
+  message(STATUS "Will link against given tokenizers: ${TOKENIZERS_LIB_PATH}")
+  message(STATUS "Will link against given tokenizers rust: ${TOKENIZERS_RUST_LIB_PATH}")
+  message(STATUS "Will include given include dir:     ${TOKENIZERS_INCLUDE_DIR}")
+
+  # Create an imported target for the static library
+  add_library(tokenizers_cpp STATIC IMPORTED)
+
+  # Set the properties of the imported library
+  set_target_properties(tokenizers_cpp PROPERTIES
+      IMPORTED_LOCATION ${TOKENIZERS_LIB_PATH}
+      INTERFACE_INCLUDE_DIRECTORIES ${TOKENIZERS_INCLUDE_DIR}
+  )
+
+  target_link_libraries(tokenizers_cpp INTERFACE ${TOKENIZERS_RUST_LIB_PATH} dl)
+else()
+  # https://stackoverflow.com/questions/65527126/disable-install-for-fetchcontent
+  FetchContent_GetProperties(tokenizers)
+  if(NOT tokenizers_POPULATED)
+      FetchContent_Populate(tokenizers)
+      if(CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64")
+          set_readonly(TOKENIZERS_CPP_CARGO_TARGET x86_64-unknown-linux-gnu)
+      elseif(CMAKE_SYSTEM_PROCESSOR STREQUAL "aarch64")
+          set_readonly(TOKENIZERS_CPP_CARGO_TARGET aarch64-unknown-linux-gnu)
+      endif()
+      set(MLC_ENABLE_SENTENCEPIECE_TOKENIZER OFF) # Disable sentencepiece for reducing binary size
+      if (NOT HAILO_EXTERNALS_EXCLUDE_TARGETS)
+          # This step requires cargo to be installed
+          find_program(CARGO_EXECUTABLE cargo)
+          if (NOT CARGO_EXECUTABLE)
+              message(FATAL_ERROR "Cargo is not installed or not found in PATH.")
+          endif()
+          add_subdirectory(${tokenizers_SOURCE_DIR} ${tokenizers_BINARY_DIR} EXCLUDE_FROM_ALL)
+      endif()
+  endif()
+endif()
diff --git a/hailort/common/async_thread.hpp b/hailort/common/async_thread.hpp
index 049fd8a..080668d 100644
--- a/hailort/common/async_thread.hpp
+++ b/hailort/common/async_thread.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/barrier.cpp b/hailort/common/barrier.cpp
index db3c120..e265d14 100644
--- a/hailort/common/barrier.cpp
+++ b/hailort/common/barrier.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/barrier.hpp b/hailort/common/barrier.hpp
index 1fae129..9f61439 100644
--- a/hailort/common/barrier.hpp
+++ b/hailort/common/barrier.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/buffer_pool.cpp b/hailort/common/buffer_pool.cpp
index 4b37502..91d2ed3 100644
--- a/hailort/common/buffer_pool.cpp
+++ b/hailort/common/buffer_pool.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file buffer_pool.cpp
  * @brief Buffer pool implementation
@@ -21,6 +21,33 @@ BasicBufferPool::BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&bu
     m_free_buffers_queue(std::move(free_buffers_queue))
 {}
 
+Expected<BasicBufferPoolPtr> BasicBufferPool::create_shared(size_t buffer_size, size_t buffer_count,
+    std::function<Expected<Buffer>(size_t)> allocate_func)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+    TRY(auto free_buffers_queue, SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
+
+    std::vector<BufferPtr> buffers;
+    buffers.reserve(buffer_count);
+
+    for (size_t i = 0; i < buffer_count; i++) {
+        TRY(auto buffer, allocate_func(buffer_size));
+        
+        auto buffer_ptr = make_shared_nothrow<Buffer>(std::move(buffer));
+        CHECK_NOT_NULL(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        auto status = free_buffers_queue.enqueue(buffer_ptr);
+        CHECK_SUCCESS(status);
+
+        buffers.emplace_back(buffer_ptr);
+    }
+
+    auto buffer_pool = make_shared_nothrow<BasicBufferPool>(buffer_size, std::move(buffers), std::move(free_buffers_queue), buffer_count);
+    CHECK_NOT_NULL(buffer_pool, HAILO_OUT_OF_HOST_MEMORY);
+
+    return buffer_pool;
+}
+
 Expected<BufferPtr> BasicBufferPool::acquire_buffer()
 {
     TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto buffer,
diff --git a/hailort/common/buffer_pool.hpp b/hailort/common/buffer_pool.hpp
index 11d6129..d72547c 100644
--- a/hailort/common/buffer_pool.hpp
+++ b/hailort/common/buffer_pool.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file buffer_pool.hpp
  * @brief Buffer pool
@@ -21,6 +21,8 @@
 
 namespace hailort
 {
+class BasicBufferPool;
+using BasicBufferPoolPtr = std::shared_ptr<BasicBufferPool>;
 
 // TODO: HRT-12690 - Make other buffer pools to use this as base class
 class BasicBufferPool
@@ -28,6 +30,8 @@ class BasicBufferPool
 public:
     BasicBufferPool(size_t buffer_size, std::vector<BufferPtr> &&buffers,
         SpscQueue<BufferPtr> &&m_free_buffers_queue, size_t buffers_count);
+    static Expected<BasicBufferPoolPtr> create_shared(size_t buffer_size, size_t buffer_count,
+        std::function<Expected<Buffer>(size_t)> allocate_func);
 
     BasicBufferPool(BasicBufferPool &&) = delete;
     BasicBufferPool(const BasicBufferPool &) = delete;
@@ -48,7 +52,82 @@ private:
     SpscQueue<BufferPtr> m_free_buffers_queue;
     std::mutex m_mutex;
 };
-using BasicBufferPoolPtr = std::shared_ptr<BasicBufferPool>;
+
+template<typename T>
+class ObjectPool
+{
+public:
+    static Expected<std::shared_ptr<ObjectPool<T>>> create_shared(size_t count, std::function<Expected<T>()> create_object_func)
+    {
+        TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+        TRY(auto free_objects_queue, SpscQueue<std::shared_ptr<T>>::create(count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
+
+        std::vector<std::shared_ptr<T>> objects;
+        objects.reserve(count);
+
+        for (size_t i = 0; i < count; i++) {
+            TRY(auto object, create_object_func());
+
+            auto object_ptr = make_shared_nothrow<T>(std::move(object));
+            CHECK_NOT_NULL(object_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+            auto status = free_objects_queue.enqueue(object_ptr);
+            CHECK_SUCCESS(status);
+
+            objects.emplace_back(object_ptr);
+        }
+
+        auto object_pool = make_shared_nothrow<ObjectPool<T>>(std::move(objects), std::move(free_objects_queue), count);
+        CHECK_NOT_NULL(object_pool, HAILO_OUT_OF_HOST_MEMORY);
+
+        return object_pool;
+    }
+
+    ObjectPool(std::vector<std::shared_ptr<T>> &&objects, SpscQueue<std::shared_ptr<T>> &&free_objects_queue,
+        size_t objects_count) :
+        m_objects_count(objects_count),
+        m_objects(std::move(objects)),
+        m_free_objects_queue(std::move(free_objects_queue))
+    {}
+
+    ObjectPool(ObjectPool &&) = delete;
+    ObjectPool(const ObjectPool &) = delete;
+    ObjectPool &operator=(ObjectPool &&) = delete;
+    ObjectPool &operator=(const ObjectPool &) = delete;
+    virtual ~ObjectPool() = default;
+
+    Expected<std::shared_ptr<T>> acquire()
+    {
+        TRY_WITH_ACCEPTABLE_STATUS(HAILO_SHUTDOWN_EVENT_SIGNALED, auto object,
+            m_free_objects_queue.dequeue(DEFAULT_TRANSFER_TIMEOUT));
+        return object;
+    }
+
+    hailo_status return_to_pool(std::shared_ptr<T> object)
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        auto status = m_free_objects_queue.enqueue(object);
+        CHECK_SUCCESS(status);
+
+        return HAILO_SUCCESS;
+    }
+
+    size_t count() const
+    {
+        return m_objects_count;
+    }
+
+    size_t current_count() const
+    {
+        return m_free_objects_queue.size_approx();
+    }
+
+private:
+    const size_t m_objects_count;
+    std::vector<std::shared_ptr<T>> m_objects;
+    SpscQueue<std::shared_ptr<T>> m_free_objects_queue;
+    std::mutex m_mutex;
+};
 
 // TODO: HRT-12690 - DMA buffer pool is also used in the service - code duplication
 class DmaAbleBufferPool : public BasicBufferPool
diff --git a/hailort/common/circular_buffer.hpp b/hailort/common/circular_buffer.hpp
index b3f381f..e6c785d 100644
--- a/hailort/common/circular_buffer.hpp
+++ b/hailort/common/circular_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/compiler_extensions_compat.hpp b/hailort/common/compiler_extensions_compat.hpp
index 0bd1652..729e76e 100644
--- a/hailort/common/compiler_extensions_compat.hpp
+++ b/hailort/common/compiler_extensions_compat.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/device_measurements.cpp b/hailort/common/device_measurements.cpp
index ae15885..2415944 100644
--- a/hailort/common/device_measurements.cpp
+++ b/hailort/common/device_measurements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,6 +11,8 @@
 #include "common/device_measurements.hpp"
 #include "common/utils.hpp"
 
+#include <algorithm>
+
 using namespace hailort;
 
 constexpr std::chrono::milliseconds DEFAULT_MEASUREMENTS_INTERVAL(100);
@@ -82,10 +84,10 @@ hailo_status TemperatureMeasurement::start_measurement()
                 break;
             }
 
-            float32_t ts_avg = ((temp_info->ts0_temperature + temp_info->ts1_temperature) / 2);
+            float32_t ts_max = std::max(temp_info->ts0_temperature, temp_info->ts1_temperature);
             {
                 std::unique_lock<std::mutex> lock(m_mutex);
-                m_acc->add_data_point(ts_avg, temp_info->sample_count);
+                m_acc->add_data_point(ts_max, temp_info->sample_count);
             }
             
             std::this_thread::sleep_for(DEFAULT_MEASUREMENTS_INTERVAL); 
diff --git a/hailort/common/device_measurements.hpp b/hailort/common/device_measurements.hpp
index fbf2812..a41b52d 100644
--- a/hailort/common/device_measurements.hpp
+++ b/hailort/common/device_measurements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/env_vars.hpp b/hailort/common/env_vars.hpp
index 0961ace..6cd4dee 100644
--- a/hailort/common/env_vars.hpp
+++ b/hailort/common/env_vars.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -20,6 +20,7 @@ namespace hailort
 
 #define SCHEDULER_MON_ENV_VAR ("HAILO_MONITOR")
 #define SCHEDULER_MON_ENV_VAR_VALUE ("1")
+#define SCHEDULER_MON_TIME_INTERVAL_IN_MILLISECONDS_ENV_VAR ("HAILO_MONITOR_TIME_INTERVAL")
 
 #define TRACE_ENV_VAR ("HAILO_TRACE")
 #define TRACE_ENV_VAR_VALUE ("scheduler")
diff --git a/hailort/common/ethernet_utils.hpp b/hailort/common/ethernet_utils.hpp
index eadfaed..8e94246 100644
--- a/hailort/common/ethernet_utils.hpp
+++ b/hailort/common/ethernet_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/event_internal.cpp b/hailort/common/event_internal.cpp
index f7b8ce5..4989684 100644
--- a/hailort/common/event_internal.cpp
+++ b/hailort/common/event_internal.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file event_internal.cpp
  * @brief Internal implementation for events, shared between all os.
diff --git a/hailort/common/event_internal.hpp b/hailort/common/event_internal.hpp
index ff0b47a..9abefcd 100644
--- a/hailort/common/event_internal.hpp
+++ b/hailort/common/event_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/file_descriptor.hpp b/hailort/common/file_descriptor.hpp
index a3f805a..1017021 100644
--- a/hailort/common/file_descriptor.hpp
+++ b/hailort/common/file_descriptor.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/file_utils.cpp b/hailort/common/file_utils.cpp
index 04e6cc7..334d760 100644
--- a/hailort/common/file_utils.cpp
+++ b/hailort/common/file_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/file_utils.hpp b/hailort/common/file_utils.hpp
index 12e5352..0223ca0 100644
--- a/hailort/common/file_utils.hpp
+++ b/hailort/common/file_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/filesystem.hpp b/hailort/common/filesystem.hpp
index 4cde399..51d2f8a 100644
--- a/hailort/common/filesystem.hpp
+++ b/hailort/common/filesystem.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/fork_support.cpp b/hailort/common/fork_support.cpp
index 7c3e44d..5aaf30d 100644
--- a/hailort/common/fork_support.cpp
+++ b/hailort/common/fork_support.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file fork_support.cpp
  **/
diff --git a/hailort/common/fork_support.hpp b/hailort/common/fork_support.hpp
index 4e90be9..bb99d74 100644
--- a/hailort/common/fork_support.hpp
+++ b/hailort/common/fork_support.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file fork_support.hpp
  * @brief Utilities/classes uses to support fork in the process.
diff --git a/hailort/common/genai/serializer/genai_rpc.hpp b/hailort/common/genai/serializer/genai_rpc.hpp
new file mode 100644
index 0000000..7fd9410
--- /dev/null
+++ b/hailort/common/genai/serializer/genai_rpc.hpp
@@ -0,0 +1,132 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file genai_rpc.hpp
+ * @brief HailoRT-GenAI protocol decleration
+ **/
+
+#ifndef _HAILO_COMMON_GENAI_RPC_HPP_
+#define _HAILO_COMMON_GENAI_RPC_HPP_
+
+namespace hailort
+{
+namespace genai
+{
+
+static const uint32_t MAX_STRING_SIZE = 128;
+
+#pragma pack(push, 1)
+struct LLM_Create_Request {
+    char lora_name[MAX_STRING_SIZE];
+    size_t lora_name_length;
+    bool is_builtin; // If builtin, the next message is the HEF raw buffers
+
+    char group_id[MAX_STRING_SIZE]; // We need 'hailo_vdevice_params_t', but only group-id is relevant
+    size_t group_id_length;
+};
+
+struct LLM_Create_Reply {
+    hailo_status status;
+};
+
+struct LLM_Get_Generator_Default_Params_Request {
+    uint8_t placeholder;
+};
+
+struct LLM_Get_Generator_Default_Params_Reply {
+    float32_t temperature;
+    float32_t top_p;
+    uint32_t top_k;
+    float32_t frequency_penalty;
+    uint32_t max_generated_tokens;
+    bool do_sample;
+    uint32_t seed;
+    hailo_status status;
+};
+
+struct LLM_Generator_Create_Request {
+    float temperature;
+    float top_p;
+    uint32_t top_k;
+    float32_t frequency_penalty;
+    uint32_t max_generated_tokens;
+    bool do_sample;
+    uint32_t seed;
+};
+
+struct LLM_Generator_Create_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Write_Request {
+    // Indicates that the next message to the server is the input prompt
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Write_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Generate_Request {
+    // Indicates that the server should start generating text
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Generate_Reply {
+    hailo_status status;
+};
+
+struct LLM_Generator_Read_Request {
+    // Indicates that the server should write back the next generated token
+    uint8_t placeholder;
+};
+
+struct LLM_Generator_Read_Reply {
+    hailo_status status;
+    char output_token[MAX_STRING_SIZE];
+    size_t output_token_length;
+    uint32_t generation_status;
+};
+
+enum class HailoGenAIActionID {
+    LLM__CREATE = 0,
+    LLM__GET_DEFAULT_GENERATOR_PARAMS,
+    LLM__GENERATOR_CREATE,
+    LLM__GENERATOR_WRITE,
+    LLM__GENERATOR_GENERATE,
+    LLM__GENERATOR_READ,
+
+    MAX_VALUE = HAILO_MAX_ENUM,
+};
+
+struct GenAIRequest {
+    HailoGenAIActionID type;
+    union {
+        LLM_Create_Request llm_create;
+        LLM_Get_Generator_Default_Params_Request llm_get_default_generator_params;
+        LLM_Generator_Create_Request llm_generator_create;
+        LLM_Generator_Write_Request llm_generator_write;
+        LLM_Generator_Generate_Request llm_generator_generate;
+        LLM_Generator_Read_Request llm_generator_read;
+    } data;
+};
+
+struct GenAIReply {
+    HailoGenAIActionID type;
+    union {
+        LLM_Create_Reply llm_create;
+        LLM_Get_Generator_Default_Params_Reply llm_get_default_generator_params;
+        LLM_Generator_Create_Reply llm_generator_create;
+        LLM_Generator_Write_Reply llm_generator_write;
+        LLM_Generator_Generate_Reply llm_generator_generate;
+        LLM_Generator_Read_Reply llm_generator_read;
+    } data;
+};
+#pragma pack(pop)
+
+} // namespace genai
+} // namespace hailort
+
+#endif /* _HAILO_COMMON_GENAI_RPC_HPP_ */
\ No newline at end of file
diff --git a/hailort/common/genai/serializer/serializer.cpp b/hailort/common/genai/serializer/serializer.cpp
new file mode 100644
index 0000000..dd32334
--- /dev/null
+++ b/hailort/common/genai/serializer/serializer.cpp
@@ -0,0 +1,295 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file serializer.cpp
+ * @brief HailoRT-GenAI protocol serialization implementation
+ **/
+
+#include "hailo/genai/common.hpp"
+#include "hailo/genai/llm/llm.hpp"
+
+#include "serializer.hpp"
+#include "hailo/buffer.hpp"
+#include "hailo/hailort.h"
+#include "hailo/hailort_common.hpp"
+#include "common/utils.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+Expected<Buffer> LLMCreateSerializer::serialize_request(const hailo_vdevice_params_t &vdevice_params, const LLMParams &llm_params)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__CREATE;
+    auto lora = llm_params.lora();
+    std::copy(lora.begin(), lora.end(), request->data.llm_create.lora_name);
+    request->data.llm_create.lora_name_length = llm_params.lora().size();
+    request->data.llm_create.is_builtin = (llm_params.hef() == BUILTIN);
+
+    std::string group_id = (nullptr == vdevice_params.group_id) ? "" :
+        std::string(vdevice_params.group_id);
+    std::copy(group_id.begin(), group_id.end(), request->data.llm_create.group_id);
+    request->data.llm_create.group_id_length = group_id.size();
+
+    return buffer;
+}
+
+Expected<std::tuple<std::string, bool, std::string>> LLMCreateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__CREATE), static_cast<int>(request->type));
+
+    std::string group_id = (0 == request->data.llm_create.group_id_length) ? "" :
+        std::string(request->data.llm_create.group_id, request->data.llm_create.group_id_length);
+    return std::tuple<std::string, bool, std::string>(std::string(request->data.llm_create.lora_name, request->data.llm_create.lora_name_length),
+        request->data.llm_create.is_builtin, group_id);
+}
+
+Expected<Buffer> LLMCreateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__CREATE;
+    reply->data.llm_create.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMCreateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__CREATE), static_cast<int>(reply->type));
+    
+    return reply->data.llm_create.status;
+}
+
+Expected<Buffer> LLMGetDefaultGeneratorParamsSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS;
+
+    return buffer;
+}
+
+hailo_status LLMGetDefaultGeneratorParamsSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGetDefaultGeneratorParamsSerializer::serialize_reply(const LLMGeneratorParams &default_generator_params, hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS;
+    reply->data.llm_get_default_generator_params.status = status;
+    reply->data.llm_get_default_generator_params.temperature = default_generator_params.temperature();
+    reply->data.llm_get_default_generator_params.top_p = default_generator_params.top_p();
+    reply->data.llm_get_default_generator_params.top_k = default_generator_params.top_k();
+    reply->data.llm_get_default_generator_params.frequency_penalty = default_generator_params.frequency_penalty();
+    reply->data.llm_get_default_generator_params.max_generated_tokens = default_generator_params.max_generated_tokens();
+    reply->data.llm_get_default_generator_params.do_sample = default_generator_params.do_sample();
+    reply->data.llm_get_default_generator_params.seed = default_generator_params.seed();
+
+    return buffer;
+}
+
+Expected<LLMGeneratorParams> LLMGetDefaultGeneratorParamsSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GET_DEFAULT_GENERATOR_PARAMS), static_cast<int>(reply->type));
+    CHECK_SUCCESS(reply->data.llm_get_default_generator_params.status, "Failed to get default generator params");
+
+    LLMGeneratorParams res(reply->data.llm_get_default_generator_params.temperature, reply->data.llm_get_default_generator_params.top_p,
+        reply->data.llm_get_default_generator_params.top_k, reply->data.llm_get_default_generator_params.frequency_penalty,
+        reply->data.llm_get_default_generator_params.max_generated_tokens, reply->data.llm_get_default_generator_params.do_sample,
+        reply->data.llm_get_default_generator_params.seed);
+
+    return res;
+}
+
+Expected<Buffer> LLMGeneratorCreateSerializer::serialize_request(const LLMGeneratorParams &params)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_CREATE;
+
+    request->data.llm_generator_create.temperature = params.temperature();
+    request->data.llm_generator_create.top_p = params.top_p();
+    request->data.llm_generator_create.top_k = params.top_k();
+    request->data.llm_generator_create.frequency_penalty = params.frequency_penalty();
+    request->data.llm_generator_create.max_generated_tokens = params.max_generated_tokens();
+    request->data.llm_generator_create.do_sample = params.do_sample();
+    request->data.llm_generator_create.seed = params.seed();
+
+    return buffer;
+}
+
+Expected<LLMGeneratorParams> LLMGeneratorCreateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+     const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_CREATE), static_cast<int>(request->type));
+ 
+    LLMGeneratorParams res(request->data.llm_generator_create.temperature, request->data.llm_generator_create.top_p,
+        request->data.llm_generator_create.top_k, request->data.llm_generator_create.frequency_penalty,
+        request->data.llm_generator_create.max_generated_tokens, request->data.llm_generator_create.do_sample,
+        request->data.llm_generator_create.seed);
+
+    return res;
+}
+
+Expected<Buffer> LLMGeneratorCreateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_CREATE;
+    reply->data.llm_generator_create.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorCreateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_CREATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_CREATE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_create.status;
+}
+
+Expected<Buffer> LLMGeneratorWriteSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_WRITE;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorWriteSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_WRITE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_WRITE), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorWriteSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_WRITE;
+    reply->data.llm_generator_write.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorWriteSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_WRITE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_WRITE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_write.status;
+}
+
+Expected<Buffer> LLMGeneratorGenerateSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_GENERATE;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorGenerateSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_GENERATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_GENERATE), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorGenerateSerializer::serialize_reply(hailo_status status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_GENERATE;
+    reply->data.llm_generator_generate.status = status;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorGenerateSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_GENERATE, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_GENERATE), static_cast<int>(reply->type));
+    return reply->data.llm_generator_generate.status;
+}
+
+Expected<Buffer> LLMGeneratorReadSerializer::serialize_request()
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIRequest), BufferStorageParams::create_dma()));
+    GenAIRequest *request = buffer.as_pointer<GenAIRequest>();
+
+    request->type = HailoGenAIActionID::LLM__GENERATOR_READ;
+
+    return buffer;
+}
+
+hailo_status LLMGeneratorReadSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    const GenAIRequest *request = serialized_request.as_pointer<GenAIRequest>();
+    CHECK(request->type == HailoGenAIActionID::LLM__GENERATOR_READ, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_READ), static_cast<int>(request->type));
+    return HAILO_SUCCESS;
+}
+
+Expected<Buffer> LLMGeneratorReadSerializer::serialize_reply(hailo_status status, const std::string &output, LLMGeneratorCompletion::Status generation_status)
+{
+    TRY(auto buffer, Buffer::create(sizeof(GenAIReply), BufferStorageParams::create_dma()));
+    GenAIReply *reply = buffer.as_pointer<GenAIReply>();
+
+    reply->type = HailoGenAIActionID::LLM__GENERATOR_READ;
+    reply->data.llm_generator_read.status = status;
+    reply->data.llm_generator_read.output_token_length = output.size();
+    std::copy(output.begin(), output.end(), reply->data.llm_generator_read.output_token);
+    reply->data.llm_generator_read.generation_status = static_cast<uint32_t>(generation_status);
+
+    return buffer;
+}
+
+Expected<std::pair<std::string, LLMGeneratorCompletion::Status>> LLMGeneratorReadSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    const GenAIReply *reply = serialized_reply.as_pointer<GenAIReply>();
+    CHECK(reply->type == HailoGenAIActionID::LLM__GENERATOR_READ, HAILO_INTERNAL_FAILURE, "Expected id {}, received {}",
+        static_cast<int>(HailoGenAIActionID::LLM__GENERATOR_READ), static_cast<int>(reply->type));
+    CHECK_SUCCESS(reply->data.llm_generator_read.status);
+
+    return std::make_pair(std::string(reply->data.llm_generator_read.output_token, reply->data.llm_generator_read.output_token_length),
+        static_cast<LLMGeneratorCompletion::Status>(reply->data.llm_generator_read.generation_status));
+}
+
+} /* namespace genai */
+} /* namespace hailort */
diff --git a/hailort/common/genai/serializer/serializer.hpp b/hailort/common/genai/serializer/serializer.hpp
new file mode 100644
index 0000000..7f05b26
--- /dev/null
+++ b/hailort/common/genai/serializer/serializer.hpp
@@ -0,0 +1,113 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file serializer.hpp
+ * @brief HailoRT-GenAI protocol serialization
+ **/
+
+#ifndef _HAILO_SERIALIZER_HPP_
+#define _HAILO_SERIALIZER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/buffer.hpp"
+#include "hailo/expected.hpp"
+#include "common/utils.hpp"
+
+#include "hailo/genai/llm/llm.hpp"
+
+#include "genai_rpc.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+// TODO: HRT-15919 - Text2Image Serialization
+#pragma pack(push, 1)
+typedef struct {
+    uint32_t steps_count;
+    uint32_t samples_count;
+    float32_t guidance_scale;
+    uint32_t seed;
+} text2image_generator_params_t;
+
+typedef struct {
+    bool has_negative_prompt;
+    bool has_ip_adapter;
+} text2image_generation_info_t;
+#pragma pack(pop)
+
+struct LLMCreateSerializer
+{
+    LLMCreateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(const hailo_vdevice_params_t &vdevice_params, const LLMParams &llm_params);
+    static Expected<std::tuple<std::string, bool, std::string>> deserialize_request(const MemoryView &serialized_request); // string - lora_name, bool - model_is_builtin
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGetDefaultGeneratorParamsSerializer
+{
+    LLMGetDefaultGeneratorParamsSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(const LLMGeneratorParams &default_generator_params, hailo_status status);
+    static Expected<LLMGeneratorParams> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorCreateSerializer
+{
+    LLMGeneratorCreateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request(const LLMGeneratorParams &params);
+    static Expected<LLMGeneratorParams> deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorWriteSerializer
+{
+    LLMGeneratorWriteSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorGenerateSerializer
+{
+    LLMGeneratorGenerateSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct LLMGeneratorReadSerializer
+{
+    LLMGeneratorReadSerializer() = delete;
+
+    static Expected<Buffer> serialize_request();
+    static hailo_status deserialize_request(const MemoryView &serialized_request);
+
+    static Expected<Buffer> serialize_reply(hailo_status status, const std::string &output = "",
+        LLMGeneratorCompletion::Status generation_status = LLMGeneratorCompletion::Status::GENERATING);
+    static Expected<std::pair<std::string, LLMGeneratorCompletion::Status>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+} /* namespace genai */
+
+} /* namespace hailort */
+
+#endif /* _HAILO_SERIALIZER_HPP_ */
diff --git a/hailort/common/genai/session_wrapper/session_wrapper.hpp b/hailort/common/genai/session_wrapper/session_wrapper.hpp
new file mode 100644
index 0000000..30a1453
--- /dev/null
+++ b/hailort/common/genai/session_wrapper/session_wrapper.hpp
@@ -0,0 +1,78 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file session_wrapper.hpp
+ * @brief a wrapper for session
+ **/
+
+#ifndef _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_
+#define _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/buffer.hpp"
+#include "hailo/hailo_session.hpp"
+#include "common/utils.hpp"
+
+#include "common/genai/serializer/serializer.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+class SessionWrapper final
+{
+public:
+    SessionWrapper(std::shared_ptr<Session> session) : m_session(session) {}
+    ~SessionWrapper() = default;
+
+    Expected<std::shared_ptr<Buffer>> read(std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        size_t size_to_read = 0;
+        CHECK_SUCCESS_AS_EXPECTED(m_session->read(reinterpret_cast<uint8_t*>(&size_to_read),
+            sizeof(size_to_read), timeout_guard.get_remaining_timeout()));
+
+        TRY(auto buffer, Buffer::create_shared(size_to_read, BufferStorageParams::create_dma()));
+        CHECK_SUCCESS(m_session->read(buffer->data(), size_to_read, timeout_guard.get_remaining_timeout()));
+
+        return buffer;
+    }
+
+    Expected<size_t> read(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        size_t size_to_read = 0;
+        CHECK_SUCCESS_AS_EXPECTED(m_session->read(reinterpret_cast<uint8_t*>(&size_to_read),
+            sizeof(size_to_read), timeout_guard.get_remaining_timeout()));
+
+        CHECK(size_to_read <= buffer.size(), HAILO_INVALID_OPERATION,
+            "Read buffer is smaller then necessary. Buffer size = {}, generation size = {}",
+            buffer.size(), size_to_read);
+
+        CHECK_SUCCESS(m_session->read(buffer.data(), size_to_read, timeout_guard.get_remaining_timeout()));
+        return size_to_read;
+    }
+
+    hailo_status write(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_WRITE_TIMEOUT)
+    {
+        TimeoutGuard timeout_guard(timeout);
+        // First we send the buffer's size. Then the buffer itself.
+        // TODO: Use hrpc protocol
+        size_t size = buffer.size();
+        CHECK_SUCCESS(m_session->write(reinterpret_cast<const uint8_t*>(&size), sizeof(size), timeout_guard.get_remaining_timeout()));
+        CHECK_SUCCESS(m_session->write(buffer.data(), size, timeout_guard.get_remaining_timeout()));
+
+        return HAILO_SUCCESS;
+    }
+
+private:
+    std::shared_ptr<Session> m_session;
+};
+
+} /* namespace genai */
+} /* namespace hailort */
+
+#endif /* _HAILO_COMMON_GENAI_SESSION_WRAPPER_HPP_ */
diff --git a/hailort/common/internal_env_vars.hpp b/hailort/common/internal_env_vars.hpp
index 7c7990a..11bf927 100644
--- a/hailort/common/internal_env_vars.hpp
+++ b/hailort/common/internal_env_vars.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -24,9 +24,6 @@ namespace hailort
 #define HAILO_SERVICE_SHARED_MEMORY_ENV_VAR ("HAILO_SERVICE_SHARED_MEMORY_OFF")
 #define HAILO_SERVICE_SHARED_MEMORY_OFF "1"
 
-/* Defines a costum pcie port for raw-connection */
-#define HAILO_CONNECTION_PCIE_PORT_ENV_VAR ("HAILO_CONNECTION_PCIE_PORT")
-
 /* Forces the client to use socket-based communication on a specific address. if not set, socket communicaiton wont be used. */
 #define HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR ("HAILO_SOCKET_COM_ADDR_CLIENT")
 
@@ -36,11 +33,11 @@ namespace hailort
 /* Forces Hailo session based on socket to use a specific device. This env var should be set to the iface name (i.e eth0)  */
 #define HAILO_SOCKET_BIND_TO_INTERFACE_ENV_VAR ("HAILO_SOCKET_BIND_TO_INTERFACE")
 
-/* HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR and HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR can be set to either ip:port ("X.X.X.X:P"),
+/* HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR and HAILO_SOCKET_COM_ADDR_SERVER_ENV_VAR can be set to either <ip> ("X.X.X.X"),
     or to HAILO_SOCKET_COM_ADDR_UNIX_SOCKET which forces working with unix-socket*/
 #define HAILO_SOCKET_COM_ADDR_UNIX_SOCKET ("localhost")
 
-/* Overrides hRPC requests timeout. value in seconds */
+/* Overrides hRPC/gRPC requests timeout. value in seconds */
 #define HAILO_REQUEST_TIMEOUT_SECONDS ("HAILO_REQUEST_TIMEOUT_SECONDS")
 
 /* General */
@@ -104,12 +101,28 @@ namespace hailort
 /* Forces using descriptor-lists instead of CCB for inter-context-channels on h1x devices */
 #define HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC_ENV_VAR ("HAILO_FORCE_INFER_CONTEXT_CHANNEL_OVER_DESC")
 
+/* Determines the size of each mapped buffer into which the ccws section will be splitted to.
+    Relevant only when the aligned_ccws feature is enbabled */
+#define HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE_ENV_VAR ("HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE")
+#define HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE (2 * 1024 * 1024)
+
+/* Forces copying the hef file content to a mapped buffer before configuring it's network groups.
+    When working with Hef as a file, we need this copy in order to work with the aligned ccws feature */
+#define HAILO_COPY_HEF_CONTENT_TO_A_MAPPED_BUFFER_PRE_CONFIGURE_ENV_VAR ("HAILO_COPY_HEF_CONTENT_TO_A_MAPPED_BUFFER_PRE_CONFIGURE")
+
+/* Disables the aligned ccws feature - in case this env var is set, the aligned_ccws feature won't be used.
+    Instead - we will alocate aligned config buffers and will copy the CCWs to them */
+#define HAILO_DISABLE_ALIGNED_CCWS_ENV_VAR ("HAILO_DISABLE_ALIGNED_CCWS")
+
 /* Forces using descriptor-lists instead of CCB for ddr-channels on h1x devices */
 #define HAILO_FORCE_DDR_CHANNEL_OVER_CCB_ENV_VAR ("HAILO_FORCE_DDR_CHANNEL_OVER_CCB")
 
 /* Sets the default power-mode of the ConfiguredNetworkGroups to `HAILO_POWER_MODE_ULTRA_PERFORMANCE` */
 #define FORCE_POWER_MODE_ULTRA_PERFORMANCE_ENV_VAR ("FORCE_POWER_MODE_ULTRA_PERFORMANCE")
 
+/* Set HW infer Tool to use CCB for Boundary Channels*/
+#define HAILO_HW_INFER_BOUNDARY_CHANNELS_OVER_CCB_ENV_VAR ("HAILO_HW_INFER_BOUNDARY_CHANNELS_OVER_CCB")
+
 } /* namespace hailort */
 
 #endif /* HAILO_INTERNAL_ENV_VARS_HPP_ */
\ No newline at end of file
diff --git a/hailort/common/latency_meter.hpp b/hailort/common/latency_meter.hpp
index 5178e1f..593223b 100644
--- a/hailort/common/latency_meter.hpp
+++ b/hailort/common/latency_meter.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/logger_macros.hpp b/hailort/common/logger_macros.hpp
index fbfe21a..c57298d 100644
--- a/hailort/common/logger_macros.hpp
+++ b/hailort/common/logger_macros.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/mmap_buffer.hpp b/hailort/common/mmap_buffer.hpp
index 3d47fa0..55269c9 100644
--- a/hailort/common/mmap_buffer.hpp
+++ b/hailort/common/mmap_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/ethernet_utils.cpp b/hailort/common/os/posix/ethernet_utils.cpp
index 556ead1..96c4188 100644
--- a/hailort/common/os/posix/ethernet_utils.cpp
+++ b/hailort/common/os/posix/ethernet_utils.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
 #include <stdio.h>
 #include <arpa/inet.h>
 #include <netinet/in.h>
diff --git a/hailort/common/os/posix/file_descriptor.cpp b/hailort/common/os/posix/file_descriptor.cpp
index 0cc6265..30d0ee6 100644
--- a/hailort/common/os/posix/file_descriptor.cpp
+++ b/hailort/common/os/posix/file_descriptor.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/filesystem.cpp b/hailort/common/os/posix/filesystem.cpp
index 9448f08..f2b4c16 100644
--- a/hailort/common/os/posix/filesystem.cpp
+++ b/hailort/common/os/posix/filesystem.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/linux/event_os_specific.cpp b/hailort/common/os/posix/linux/event_os_specific.cpp
index 5110754..93fbcb6 100644
--- a/hailort/common/os/posix/linux/event_os_specific.cpp
+++ b/hailort/common/os/posix/linux/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file event_os_specific.cpp
  * @brief Event/semaphore OS specific implementation for linux using eventfd
diff --git a/hailort/common/os/posix/mmap_buffer.cpp b/hailort/common/os/posix/mmap_buffer.cpp
index 38f15fb..588dafd 100644
--- a/hailort/common/os/posix/mmap_buffer.cpp
+++ b/hailort/common/os/posix/mmap_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/os_utils.cpp b/hailort/common/os/posix/os_utils.cpp
index e4cc8e4..3bfca66 100644
--- a/hailort/common/os/posix/os_utils.cpp
+++ b/hailort/common/os/posix/os_utils.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file os_utils.cpp
  * @brief Utilities for Posix methods
diff --git a/hailort/common/os/posix/process.cpp b/hailort/common/os/posix/process.cpp
index ca10204..888824c 100644
--- a/hailort/common/os/posix/process.cpp
+++ b/hailort/common/os/posix/process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/qnx/event_os_specific.cpp b/hailort/common/os/posix/qnx/event_os_specific.cpp
index 3e02081..8c3df90 100644
--- a/hailort/common/os/posix/qnx/event_os_specific.cpp
+++ b/hailort/common/os/posix/qnx/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file event_os_specific.cpp
  * @brief Event/semaphore OS specific implementation for qnx using pevents
diff --git a/hailort/common/os/posix/shared_memory_buffer.cpp b/hailort/common/os/posix/shared_memory_buffer.cpp
index 2dd16b0..c64a30c 100644
--- a/hailort/common/os/posix/shared_memory_buffer.cpp
+++ b/hailort/common/os/posix/shared_memory_buffer.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file shared_memory_buffer.cpp
  * @brief Posix Shared memory implementation
@@ -33,7 +33,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const st
     CHECK_AS_EXPECTED(res != -1, HAILO_INTERNAL_FAILURE, "Failed to set size of shared memory object, errno = {}", errno);
 
     TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), true);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
@@ -46,7 +46,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std:
     auto shm_fd = FileDescriptor(shm_segment_fd);
 
     TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), false);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
diff --git a/hailort/common/os/posix/socket.cpp b/hailort/common/os/posix/socket.cpp
index 62b70bf..4b8ea77 100644
--- a/hailort/common/os/posix/socket.cpp
+++ b/hailort/common/os/posix/socket.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -126,7 +126,15 @@ Expected<Socket> Socket::accept()
 hailo_status Socket::connect(const sockaddr *addr, socklen_t len)
 {
     int ret = ::connect(m_socket_fd, addr, len);
-    CHECK(0 == ret, HAILO_ETH_FAILURE, "Failed to connect to socket {}", errno);
+    if (0 != ret) {
+        switch (errno) {
+        case ECONNREFUSED:
+            return HAILO_CONNECTION_REFUSED;
+        default:
+            LOGGER__ERROR("Failed to connect to socket {}", errno);
+            return HAILO_ETH_FAILURE;
+        }
+    }
     return HAILO_SUCCESS;
 }
 
diff --git a/hailort/common/os/posix/traffic_control.cpp b/hailort/common/os/posix/traffic_control.cpp
index e80381f..ebed6f5 100644
--- a/hailort/common/os/posix/traffic_control.cpp
+++ b/hailort/common/os/posix/traffic_control.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/posix/traffic_control.hpp b/hailort/common/os/posix/traffic_control.hpp
index 82cbfff..a6d9edb 100644
--- a/hailort/common/os/posix/traffic_control.hpp
+++ b/hailort/common/os/posix/traffic_control.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/ethernet_utils.cpp b/hailort/common/os/windows/ethernet_utils.cpp
index 9f4fa86..7fa34e5 100644
--- a/hailort/common/os/windows/ethernet_utils.cpp
+++ b/hailort/common/os/windows/ethernet_utils.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
 
 #include "common/ethernet_utils.hpp"
 
diff --git a/hailort/common/os/windows/event_os_specific.cpp b/hailort/common/os/windows/event_os_specific.cpp
index cb13c9a..bb42972 100644
--- a/hailort/common/os/windows/event_os_specific.cpp
+++ b/hailort/common/os/windows/event_os_specific.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file event_os_specific.cpp
  * @brief Event/semaphore OS specific implementation for windows using event/semaphore HANDLE
diff --git a/hailort/common/os/windows/file_descriptor.cpp b/hailort/common/os/windows/file_descriptor.cpp
index e26cc4a..08272ac 100644
--- a/hailort/common/os/windows/file_descriptor.cpp
+++ b/hailort/common/os/windows/file_descriptor.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/filesystem.cpp b/hailort/common/os/windows/filesystem.cpp
index 44c4be7..e1497b4 100644
--- a/hailort/common/os/windows/filesystem.cpp
+++ b/hailort/common/os/windows/filesystem.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/mmap_buffer.cpp b/hailort/common/os/windows/mmap_buffer.cpp
index 8fbf3ce..0045a61 100644
--- a/hailort/common/os/windows/mmap_buffer.cpp
+++ b/hailort/common/os/windows/mmap_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/named_mutex_guard.cpp b/hailort/common/os/windows/named_mutex_guard.cpp
index c7cd773..2c07e59 100644
--- a/hailort/common/os/windows/named_mutex_guard.cpp
+++ b/hailort/common/os/windows/named_mutex_guard.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file named_mutex_guard.hpp
  * @brief Named mutex guard implementation
diff --git a/hailort/common/os/windows/named_mutex_guard.hpp b/hailort/common/os/windows/named_mutex_guard.hpp
index 9cea4a6..cad69c7 100644
--- a/hailort/common/os/windows/named_mutex_guard.hpp
+++ b/hailort/common/os/windows/named_mutex_guard.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file named_mutex_guard.hpp
  * @brief Named mutex guard
diff --git a/hailort/common/os/windows/os_utils.cpp b/hailort/common/os/windows/os_utils.cpp
index 178c6fc..94beeed 100644
--- a/hailort/common/os/windows/os_utils.cpp
+++ b/hailort/common/os/windows/os_utils.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file os_utils.cpp
  * @brief Utilities for Windows methods
diff --git a/hailort/common/os/windows/process.cpp b/hailort/common/os/windows/process.cpp
index c57641d..53249d9 100644
--- a/hailort/common/os/windows/process.cpp
+++ b/hailort/common/os/windows/process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/shared_memory_buffer.cpp b/hailort/common/os/windows/shared_memory_buffer.cpp
index 66816ad..6af0f21 100644
--- a/hailort/common/os/windows/shared_memory_buffer.cpp
+++ b/hailort/common/os/windows/shared_memory_buffer.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file shared_memory_buffer.cpp
  * @brief Shared memory implementaion in Windows. 
@@ -22,11 +22,11 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::create(size_t size, const st
     HANDLE handle_map_file = CreateFileMapping(INVALID_HANDLE_VALUE, nullptr, PAGE_READWRITE, 0,
             static_cast<DWORD>(size), static_cast<LPCSTR>(shm_name.c_str()));
     CHECK_AS_EXPECTED((handle_map_file != nullptr), HAILO_INTERNAL_FAILURE, "Failed to create shared memory object, error = {}", GetLastError());
-    
+
     auto shm_fd = FileDescriptor(handle_map_file);
     TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
 
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), true);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), true);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
@@ -40,7 +40,7 @@ Expected<SharedMemoryBufferPtr> SharedMemoryBuffer::open(size_t size, const std:
     auto shm_fd = FileDescriptor(handle_map_file);
     TRY(auto mmapped_buffer, MmapBuffer<void>::create_file_map(size, shm_fd, 0));
 
-    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(shm_fd), std::move(mmapped_buffer), false);
+    auto result = make_shared_nothrow<SharedMemoryBuffer>(shm_name, std::move(mmapped_buffer), false);
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
 
     return result;
diff --git a/hailort/common/os/windows/socket.cpp b/hailort/common/os/windows/socket.cpp
index ecb3d82..38e9cb5 100644
--- a/hailort/common/os/windows/socket.cpp
+++ b/hailort/common/os/windows/socket.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/string_conversion.cpp b/hailort/common/os/windows/string_conversion.cpp
index 2ca1255..6d21a63 100644
--- a/hailort/common/os/windows/string_conversion.cpp
+++ b/hailort/common/os/windows/string_conversion.cpp
@@ -1,3 +1,7 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
 #include <vector>
 
 #include "common/os/windows/string_conversion.hpp"
diff --git a/hailort/common/os/windows/string_conversion.hpp b/hailort/common/os/windows/string_conversion.hpp
index aa7f621..e3ceb70 100644
--- a/hailort/common/os/windows/string_conversion.hpp
+++ b/hailort/common/os/windows/string_conversion.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/os/windows/virtual_alloc_guard.cpp b/hailort/common/os/windows/virtual_alloc_guard.cpp
index 6d07ba4..c1c5ded 100644
--- a/hailort/common/os/windows/virtual_alloc_guard.cpp
+++ b/hailort/common/os/windows/virtual_alloc_guard.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file virtual_alloc_guard.cpp
  * @brief Guard object for VirtualAlloc and VirtualFree
diff --git a/hailort/common/os/windows/virtual_alloc_guard.hpp b/hailort/common/os/windows/virtual_alloc_guard.hpp
index d89c4ba..03fd80e 100644
--- a/hailort/common/os/windows/virtual_alloc_guard.hpp
+++ b/hailort/common/os/windows/virtual_alloc_guard.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file virtual_alloc_guard.hpp
  * @brief Guard object for VirtualAlloc and VirtualFree (only for windows os).
diff --git a/hailort/common/os_utils.hpp b/hailort/common/os_utils.hpp
index 3521ad9..86fa741 100644
--- a/hailort/common/os_utils.hpp
+++ b/hailort/common/os_utils.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file os_utils.hpp
  * @brief Utilities for OS methods
diff --git a/hailort/common/process.hpp b/hailort/common/process.hpp
index 014be18..b90a5bd 100644
--- a/hailort/common/process.hpp
+++ b/hailort/common/process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/runtime_statistics_internal.hpp b/hailort/common/runtime_statistics_internal.hpp
index c089fda..5e13428 100644
--- a/hailort/common/runtime_statistics_internal.hpp
+++ b/hailort/common/runtime_statistics_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/shared_memory_buffer.hpp b/hailort/common/shared_memory_buffer.hpp
index dbfb1a4..904ac2b 100644
--- a/hailort/common/shared_memory_buffer.hpp
+++ b/hailort/common/shared_memory_buffer.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file shared_memory_buffer.hpp
  * @brief Shared memory buffer
@@ -43,16 +43,14 @@ public:
     SharedMemoryBuffer &operator=(const SharedMemoryBuffer &) = delete;
     virtual ~SharedMemoryBuffer();
 
-    SharedMemoryBuffer(const std::string &shm_name, FileDescriptor &&shm_fd, MmapBuffer<void> &&shm_mmap_buffer, bool memory_owner) :
+    SharedMemoryBuffer(const std::string &shm_name, MmapBuffer<void> &&shm_mmap_buffer, bool memory_owner) :
         m_shm_name(shm_name),
-        m_shm_fd(std::move(shm_fd)),
         m_shm_mmap_buffer(std::move(shm_mmap_buffer)),
         m_memory_owner(memory_owner)
     {}
 
     SharedMemoryBuffer(SharedMemoryBuffer&& other) noexcept :
         m_shm_name(std::exchange(other.m_shm_name, "")),
-        m_shm_fd(std::move(other.m_shm_fd)),
         m_shm_mmap_buffer(std::move(other.m_shm_mmap_buffer)),
         m_memory_owner(std::exchange(other.m_memory_owner, false))
     {}
@@ -71,7 +69,6 @@ public:
 
 private:
     std::string m_shm_name;
-    FileDescriptor m_shm_fd;
     MmapBuffer<void> m_shm_mmap_buffer;
     bool m_memory_owner;
 };
diff --git a/hailort/common/socket.hpp b/hailort/common/socket.hpp
index 26a2257..cf4a1b4 100644
--- a/hailort/common/socket.hpp
+++ b/hailort/common/socket.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/thread_pool.hpp b/hailort/common/thread_pool.hpp
index afb720d..f40ceaa 100644
--- a/hailort/common/thread_pool.hpp
+++ b/hailort/common/thread_pool.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/thread_safe_queue.hpp b/hailort/common/thread_safe_queue.hpp
index 2c3c906..a8f05e6 100644
--- a/hailort/common/thread_safe_queue.hpp
+++ b/hailort/common/thread_safe_queue.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/utils.cpp b/hailort/common/utils.cpp
index 5c8fa84..517c0c8 100644
--- a/hailort/common/utils.cpp
+++ b/hailort/common/utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/common/utils.hpp b/hailort/common/utils.hpp
index dccfa5e..618b217 100644
--- a/hailort/common/utils.hpp
+++ b/hailort/common/utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -298,6 +298,7 @@ inline hailo_status get_status(const Expected<T> &exp)
 
 // Macros that check status. If status is 'valid_error', return without printing error to the prompt.
 #define CHECK_EXPECTED_WITH_ACCEPTABLE_STATUS(valid_error, exp, ...) if (valid_error == (exp).status()) {return make_unexpected(valid_error);} CHECK_SUCCESS(exp, __VA_ARGS__);
+#define CHECK_SUCCESS_WITH_ACCEPTABLE_STATUS(valid_error, status, ...) if ((valid_error) == (status)) {return make_unexpected(valid_error);} CHECK_SUCCESS(status, __VA_ARGS__);
 
 
 #define __HAILO_CONCAT(x, y) x ## y
@@ -397,6 +398,37 @@ static inline bool is_env_variable_on(const char *env_var_name, const std::strin
     return ((nullptr != env_var) && (strncmp(env_var, required_value.c_str(), required_value.size()) == 0));
 }
 
+static inline Expected<size_t> get_env_variable_as_size(const char *env_var_name) {
+    const char *env_val = std::getenv(env_var_name);
+    if (!env_val) {
+        return make_unexpected(HAILO_NOT_FOUND);
+    }
+
+    static const int DECIMAL_BASE = 10;
+    errno = 0;
+    char *end = nullptr;
+    size_t result = std::strtoull(env_val, &end, DECIMAL_BASE);
+
+    /*
+    * Check if the conversion succeeded completely:
+    * If an error occurs during conversion (for example, due to overflow), std::strtoull will set errno to a non-zero value.
+    * For a successful conversion, std::strtoull should consume the entire string, meaning that the character pointed
+    * to by 'end' must be the null terminator ('\0').
+    * Thus, a successful conversion requires both errno == 0 and *end == '\0'.
+    */
+    if (errno != 0 || (*end != '\0')) {
+        LOGGER__ERROR("Failed to parse environment variable HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE");
+        return make_unexpected(HAILO_INVALID_ARGUMENT);
+    }
+
+    return Expected<size_t>(result);
+}
+
+// When moving to C++17, use std::clamp
+constexpr size_t clamp(size_t v, size_t lo, size_t hi) {
+    return (v < lo) ? lo : (v > hi) ? hi : v;
+}
+
 static inline Expected<std::string> get_env_variable(const std::string &env_var_name)
 {
     const auto env_var = std::getenv(env_var_name.c_str());
@@ -413,6 +445,23 @@ static inline Expected<std::string> get_env_variable(const std::string &env_var_
     return Expected<std::string>(result);
 }
 
+template <typename T>
+Expected<hailo_format_type_t> get_hailo_format_type()
+{
+    static const std::unordered_map<size_t, hailo_format_type_t> type_map = {
+        {typeid(uint8_t).hash_code(), HAILO_FORMAT_TYPE_UINT8},
+        {typeid(uint16_t).hash_code(), HAILO_FORMAT_TYPE_UINT16},
+        {typeid(float32_t).hash_code(), HAILO_FORMAT_TYPE_FLOAT32}
+    };
+
+    auto it = type_map.find(typeid(T).hash_code());
+    if (it != type_map.end()) {
+        auto result = it->second;
+        return result;
+    }
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
 class CRC32 {
 public:
     CRC32() {
@@ -601,6 +650,25 @@ private:
     }
 };
 
+class TimeoutGuard final
+{
+public:
+    explicit TimeoutGuard(std::chrono::milliseconds total_timeout)
+        : m_start_time(std::chrono::steady_clock::now()), m_total_timeout(total_timeout) {}
+
+    std::chrono::milliseconds get_remaining_timeout() const {
+        auto elapsed = std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - m_start_time);
+        if (elapsed >= m_total_timeout) {
+            return std::chrono::milliseconds(0); // Timeout exceeded
+        }
+        return m_total_timeout - elapsed;
+    }
+
+private:
+    std::chrono::steady_clock::time_point m_start_time;
+    std::chrono::milliseconds m_total_timeout;
+};
+
 } /* namespace hailort */
 
 #endif /* HAILO_UTILS_H_ */
\ No newline at end of file
diff --git a/hailort/drivers/common/hailo_ioctl_common.h b/hailort/drivers/common/hailo_ioctl_common.h
index 1c58147..9821dc8 100644
--- a/hailort/drivers/common/hailo_ioctl_common.h
+++ b/hailort/drivers/common/hailo_ioctl_common.h
@@ -1,14 +1,14 @@
 // SPDX-License-Identifier: (GPL-2.0 WITH Linux-syscall-note) AND MIT
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  **/
 
 #ifndef _HAILO_IOCTL_COMMON_H_
 #define _HAILO_IOCTL_COMMON_H_
 
 #define HAILO_DRV_VER_MAJOR 4
-#define HAILO_DRV_VER_MINOR 20
-#define HAILO_DRV_VER_REVISION 1
+#define HAILO_DRV_VER_MINOR 21
+#define HAILO_DRV_VER_REVISION 0
 
 #define _STRINGIFY_EXPANDED( x ) #x
 #define _STRINGIFY_NUMBER( x ) _STRINGIFY_EXPANDED(x)
@@ -33,21 +33,20 @@
 #define INVALID_DRIVER_HANDLE_VALUE     ((uintptr_t)-1)
 
 // Used by windows and unix driver to raise the right CPU control handle to the FW. The same as in pcie_service FW
-#define FW_ACCESS_CORE_CPU_CONTROL_SHIFT    (1)
-#define FW_ACCESS_CORE_CPU_CONTROL_MASK     (1 << FW_ACCESS_CORE_CPU_CONTROL_SHIFT)
-#define FW_ACCESS_CONTROL_INTERRUPT_SHIFT   (0)
-#define FW_ACCESS_APP_CPU_CONTROL_MASK      (1 << FW_ACCESS_CONTROL_INTERRUPT_SHIFT)
-#define FW_ACCESS_DRIVER_SHUTDOWN_SHIFT     (2)
-#define FW_ACCESS_DRIVER_SHUTDOWN_MASK      (1 << FW_ACCESS_DRIVER_SHUTDOWN_SHIFT)
-// HRT-15790 TODO: separate nnc interrupts and soc interrupts
-#define FW_ACCESS_SOFT_RESET_SHIFT          (3)
-#define FW_ACCESS_SOFT_RESET_MASK           (1 << FW_ACCESS_SOFT_RESET_SHIFT)
+enum hailo_pcie_nnc_interrupt_masks {
+    FW_ACCESS_APP_CPU_CONTROL_MASK    =  (1 << 0),
+    FW_ACCESS_CORE_CPU_CONTROL_MASK   =  (1 << 1),
+    FW_ACCESS_DRIVER_SHUTDOWN_MASK    =  (1 << 2),
+    FW_ACCESS_SOFT_RESET_MASK         =  (1 << 3),
+};
 
-#define FW_ACCESS_SOC_CONTROL_SHIFT         (3)
-#define FW_ACCESS_SOC_CONTROL_MASK          (1 << FW_ACCESS_SOC_CONTROL_SHIFT)
+enum hailo_pcie_soc_interrupt_masks {
+    FW_ACCESS_SOC_CONTROL_MASK       =   (1 << 3),
+};
 
 #define INVALID_VDMA_CHANNEL                (0xff)
 
+#define HAILO_DMA_DIRECTION_EQUALS(a, b) (a == HAILO_DMA_BIDIRECTIONAL || b == HAILO_DMA_BIDIRECTIONAL || a == b)
 
 #if !defined(__cplusplus) && defined(NTDDI_VERSION)
 #include <wdm.h>
@@ -257,16 +256,40 @@ struct hailo_write_action_list_params {
 };
 
 /* structure used in ioctl HAILO_DESC_LIST_BIND_VDMA_BUFFER */
+/**
+ * Programs the descriptions list (desc_handle), starting from starting_desc, with the given buffer.
+ * The buffer is referenced by buffer_handle (the base buffer), size, offset and batch_size.
+ * The ioctl will start at offset, and will program `size` bytes in chunks of `batch_size` bytes.
+ *
+ * For example, if buffer_offset is 0x1000, buffer_size=0x300, batch_size=2, and desc_page_size is 0x200 (desc
+ * page size is taken from the descriptors list), we program the following pattern:
+ *   desc[starting_desc] =   { .address = base_buffer+0x1000, .size= 0x200 }
+ *   desc[starting_desc+1] = { .address = base_buffer+0x1200, .size= 0x100 }
+ *   desc[starting_desc+2] = { .address = base_buffer+0x1400, .size= 0x200 }
+ *   desc[starting_desc+3] = { .address = base_buffer+0x1600, .size= 0x100 }
+ *
+ * The stride is the amount of bytes to really program.
+ * If the stride is 0, the stride is calculated as the desc_page_size.
+ * Else, the stride is the given stride.
+ * The stride must be <= desc_page_size.
+ *
+ * For example, if stride=108, buffer_size=0x600 and desc_page_size is 0x200 the pattern will be:
+ *   desc[starting_desc] =   { .address = base_buffer, .size= 0x108 }
+ *   desc[starting_desc+1] = { .address = base_buffer+0x200, .size= 0x108 }
+ *   desc[starting_desc+2] = { .address = base_buffer+0x400, .size= 0x108 }
+ */
 struct hailo_desc_list_program_params {
     size_t buffer_handle;       // in
     size_t buffer_size;         // in
     size_t buffer_offset;       // in
+    uint32_t batch_size;        // in
     uintptr_t desc_handle;      // in
     uint8_t channel_index;      // in
     uint32_t starting_desc;     // in
     bool should_bind;           // in
     enum hailo_vdma_interrupts_domain last_interrupts_domain;  // in
     bool is_debug;              // in
+    uint32_t stride;            // in
 };
 
 /* structure used in ioctl HAILO_VDMA_ENABLE_CHANNELS */
@@ -284,11 +307,12 @@ struct hailo_vdma_disable_channels_params {
 struct hailo_vdma_interrupts_channel_data {
     uint8_t engine_index;
     uint8_t channel_index;
-    bool is_active;                 // If not activate, num_processed is ignored.
-    uint8_t transfers_completed;    // Number of transfers completed.
-    uint8_t host_error;             // Channel errors bits on source side
-    uint8_t device_error;           // Channel errors bits on dest side
-    bool validation_success;        // If the validation of the channel was successful
+
+#define HAILO_VDMA_TRANSFER_DATA_CHANNEL_NOT_ACTIVE  (0xff)
+#define HAILO_VDMA_TRANSFER_DATA_CHANNEL_WITH_ERROR  (0xfe)
+
+    // Either amount of transfers done or one of the above defines
+    uint8_t data;
 };
 
 struct hailo_vdma_interrupts_wait_params {
@@ -406,6 +430,7 @@ enum hailo_board_type {
     HAILO_BOARD_TYPE_HAILO15L,
     HAILO_BOARD_TYPE_HAILO10H,
     HAILO_BOARD_TYPE_HAILO10H_LEGACY,
+    HAILO_BOARD_TYPE_MARS,
     HAILO_BOARD_TYPE_COUNT,
 
     /** Max enum value to maintain ABI Integrity */
@@ -486,14 +511,15 @@ struct hailo_free_continuous_buffer_params {
 
 /* structures used in ioctl HAILO_VDMA_LAUNCH_TRANSFER */
 struct hailo_vdma_transfer_buffer {
-    size_t mapped_buffer_handle;       // in
-    uint32_t offset;                   // in
-    uint32_t size;                     // in
+    enum hailo_dma_buffer_type buffer_type; // in
+    uintptr_t addr_or_fd;                   // in
+    uint32_t size;                          // in
 };
 
-// We allow maximum 2 buffers per transfer since we may have an extra buffer 
-// to make sure each buffer is aligned to page size.
-#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (2)
+// The size is a tradeoff between ioctl/stack buffers size and the amount of buffers we
+// want to transfer. (If user mode wants to transfer more buffers, it should call the
+// ioctl multiple times).
+#define HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER (8)
 
 struct hailo_vdma_launch_transfer_params {
     uint8_t engine_index;                                               // in
@@ -512,9 +538,6 @@ struct hailo_vdma_launch_transfer_params {
 
     bool is_debug;                                                      // in, if set program hw to send
                                                                         // more info (e.g desc complete status)
-
-    uint32_t descs_programed;                                           // out, amount of descriptors programed.
-    int launch_transfer_status;                                         // out, status of the launch transfer call. (only used in case of error)
 };
 
 /* structure used in ioctl HAILO_SOC_CONNECT */
@@ -638,7 +661,7 @@ enum hailo_vdma_ioctl_code {
 #define HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC    _IOWR_(HAILO_VDMA_IOCTL_MAGIC, HAILO_VDMA_CONTINUOUS_BUFFER_ALLOC_CODE,      struct hailo_allocate_continuous_buffer_params)
 #define HAILO_VDMA_CONTINUOUS_BUFFER_FREE     _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_CONTINUOUS_BUFFER_FREE_CODE,       struct hailo_free_continuous_buffer_params)
 
-#define HAILO_VDMA_LAUNCH_TRANSFER           _IOWR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_LAUNCH_TRANSFER_CODE,              struct hailo_vdma_launch_transfer_params)
+#define HAILO_VDMA_LAUNCH_TRANSFER            _IOR_(HAILO_VDMA_IOCTL_MAGIC,  HAILO_VDMA_LAUNCH_TRANSFER_CODE,              struct hailo_vdma_launch_transfer_params)
 
 enum hailo_nnc_ioctl_code {
     HAILO_FW_CONTROL_CODE,
@@ -662,14 +685,14 @@ enum hailo_nnc_ioctl_code {
 enum hailo_soc_ioctl_code {
     HAILO_SOC_IOCTL_CONNECT_CODE,
     HAILO_SOC_IOCTL_CLOSE_CODE,
-
+    HAILO_SOC_IOCTL_POWER_OFF_CODE,
     // Must be last
     HAILO_SOC_IOCTL_MAX_NR,
 };
 
 #define HAILO_SOC_CONNECT       _IOWR_(HAILO_SOC_IOCTL_MAGIC, HAILO_SOC_IOCTL_CONNECT_CODE, struct hailo_soc_connect_params)
 #define HAILO_SOC_CLOSE         _IOR_(HAILO_SOC_IOCTL_MAGIC,  HAILO_SOC_IOCTL_CLOSE_CODE,   struct hailo_soc_close_params)
-
+#define HAILO_SOC_POWER_OFF     _IO_(HAILO_SOC_IOCTL_MAGIC,   HAILO_SOC_IOCTL_POWER_OFF_CODE)
 
 enum hailo_pci_ep_ioctl_code {
     HAILO_PCI_EP_ACCEPT_CODE,
diff --git a/hailort/hailort_server/CMakeLists.txt b/hailort/hailort_server/CMakeLists.txt
deleted file mode 100644
index eed3c62..0000000
--- a/hailort/hailort_server/CMakeLists.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-cmake_minimum_required(VERSION 3.5.0)
-
-set(THREADS_PREFER_PTHREAD_FLAG ON)
-find_package(Threads REQUIRED)
-
-set(HAILORT_SERVER_SOURCES
-    hailort_server.cpp
-    ${HRPC_CPP_SOURCES}
-    ${HRPC_PROTOCOL_CPP_SOURCES}
-    ${HAILORT_SERVICE_DIR}/cng_buffer_pool.cpp
-    ${DRIVER_OS_DIR}/driver_os_specific.cpp
-    ${HAILORT_SRC_DIR}/vdma/pcie_session.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/descriptor_list.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/mapped_buffer.cpp
-    ${HAILORT_SRC_DIR}/vdma/memory/dma_able_buffer.cpp
-    ${HAILORT_SRC_DIR}/vdma/driver/hailort_driver.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/interrupts_dispatcher.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/transfer_launcher.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/boundary_channel.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/channels_group.cpp
-    ${HAILORT_SRC_DIR}/vdma/channel/transfer_common.cpp
-)
-
-add_executable(hailort_server ${HAILORT_SERVER_SOURCES})
-target_include_directories(hailort_server PRIVATE
-    ${HAILORT_SRC_DIR}
-    ${COMMON_INC_DIR}
-    ${DRIVER_INC_DIR}
-)
-target_compile_options(hailort_server PRIVATE ${HAILORT_COMPILE_OPTIONS})
-set_property(TARGET hailort_server PROPERTY CXX_STANDARD 14)
-set_property(TARGET hailort_server PROPERTY INSTALL_RPATH "$ORIGIN" "../lib/") # Link with a relative libhailort
-target_link_libraries(hailort_server PRIVATE
-    libhailort
-    Threads::Threads
-    rpc_proto
-    hailort_common
-)
diff --git a/hailort/hailort_server/hailort_server.cpp b/hailort/hailort_server/hailort_server.cpp
deleted file mode 100644
index 254055e..0000000
--- a/hailort/hailort_server/hailort_server.cpp
+++ /dev/null
@@ -1,808 +0,0 @@
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file hailo_server.cpp
- * @brief Hailo Server
- **/
-
-#include "hailort_server.hpp"
-#include "hailo/hailort.h"
-#include "hrpc/server.hpp"
-#include "hailo/vdevice.hpp"
-#include "hrpc_protocol/serializer.hpp"
-#include "net_flow/ops/nms_post_process.hpp"
-#include "hailort_service/service_resource_manager.hpp"
-#include "common/thread_safe_queue.hpp"
-#include "hrpc/connection_context.hpp"
-#include "vdma/pcie_session.hpp"
-
-#include <spdlog/spdlog.h>
-#include <spdlog/sinks/stdout_color_sinks.h>
-
-using namespace hailort;
-
-// TODO: These macros should be merged with the grpc macros, also change them to TRY
-#define CHECK_EXPECTED_AS_HRPC_STATUS(_exepcted, T) \
-    do { \
-        if (!_exepcted) { \
-            LOGGER__ERROR("CHECK_EXPECTED_AS_HRPC_STATUS failed, status: {}", _exepcted.status()); \
-            auto reply = T::serialize_reply(_exepcted.status()); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-#define CHECK_SUCCESS_AS_HRPC_STATUS(_status, T) \
-    do { \
-        if (_status != HAILO_SUCCESS) { \
-            LOGGER__ERROR("CHECK_SUCCESS_AS_HRPC_STATUS failed, status: {}", _status); \
-            auto reply = T::serialize_reply(_status); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-#define CHECK_AS_HRPC_STATUS(_cond, _status, T) \
-    do { \
-        if (!(_cond)) { \
-            LOGGER__ERROR("CHECK_AS_HRPC_STATUS failed, status: {}", _status); \
-            auto reply = T::serialize_reply(_status); \
-            if (reply) return reply; \
-            LOGGER__CRITICAL("Failed to create reply with status: {}", reply.status()); \
-            return make_unexpected(HAILO_INTERNAL_FAILURE); \
-        } \
-    } while (0)
-
-#define __HAILO_CONCAT(x, y) x ## y
-#define _HAILO_CONCAT(x, y) __HAILO_CONCAT(x, y)
-
-#define _TRY_AS_HRPC_STATUS(expected_var_name, var_decl, expr, ...) \
-    auto expected_var_name = (expr); \
-    CHECK_EXPECTED_AS_HRPC_STATUS(expected_var_name, __VA_ARGS__); \
-    var_decl = expected_var_name.release()
-
-#define TRY_AS_HRPC_STATUS(var_decl, expr, ...) _TRY_AS_HRPC_STATUS(_HAILO_CONCAT(__expected, __COUNTER__), var_decl, expr, __VA_ARGS__)
-
-#ifdef NDEBUG
-#define LOGGER_PATTERN ("[%n] [%^%l%$] %v")
-#else
-#define LOGGER_PATTERN ("[%Y-%m-%d %X.%e] [%P] [%t] [%n] [%^%l%$] [%s:%#] [%!] %v")
-#endif
-
-// TODO: Benchmark this factor (HRT-15727)
-#define ASYNC_QUEUE_SIZE_FACTOR (2) // double buffer
-
-struct InferModelInfo
-{
-    std::unordered_map<std::string, size_t> input_streams_sizes;
-    std::unordered_map<std::string, size_t> output_streams_sizes;
-    std::vector<std::string> inputs_names;
-    std::vector<std::string> outputs_names;
-};
-
-void init_logger(const std::string &name)
-{
-    auto console_sink = make_shared_nothrow<spdlog::sinks::stderr_color_sink_mt>();
-    console_sink->set_level(spdlog::level::info);
-    console_sink->set_pattern(LOGGER_PATTERN);
-    spdlog::set_default_logger(make_shared_nothrow<spdlog::logger>(name, console_sink));
-}
-
-void HailoRTServer::cleanup_infer_model_hef_buffers(const std::vector<uint32_t> &infer_model_handles)
-{
-    for (const auto &infer_model_handle : infer_model_handles) {
-        auto hef_buffers_iter = m_hef_buffers_per_infer_model.find(infer_model_handle);
-        if (m_hef_buffers_per_infer_model.end() != hef_buffers_iter) {
-            m_hef_buffers_per_infer_model.erase(infer_model_handle);
-        }
-    }
-}
-
-void HailoRTServer::cleanup_cim_buffer_pools(const std::vector<uint32_t> &cim_handles)
-{
-    std::lock_guard<std::mutex> lock(m_buffer_pool_mutex);
-    for (const auto &cim_handle : cim_handles) {
-        m_buffer_pool_per_cim.erase(cim_handle);
-    }
-}
-
-hailo_status HailoRTServer::cleanup_client_resources(RpcConnection client_connection)
-{
-    std::set<uint32_t> pids = {SINGLE_CLIENT_PID};
-    auto cim_handles = ServiceResourceManager<ConfiguredInferModel>::get_instance().resources_handles_by_pids(pids);
-    (void)ServiceResourceManager<ConfiguredInferModel>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    cleanup_cim_buffer_pools(cim_handles);
-
-    auto infer_model_handles = ServiceResourceManager<InferModel>::get_instance().resources_handles_by_pids(pids);
-    (void)ServiceResourceManager<InferModelInfo>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    (void)ServiceResourceManager<InferModel>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    cleanup_infer_model_hef_buffers(infer_model_handles);
-    m_infer_model_to_info_id.clear();
-
-    (void)ServiceResourceManager<VDevice>::get_instance().release_by_pid(SINGLE_CLIENT_PID);
-    CHECK_SUCCESS(client_connection.close());
-    return HAILO_SUCCESS;
-}
-
-Expected<std::unique_ptr<HailoRTServer>> HailoRTServer::create_unique()
-{
-    TRY(auto connection_context, ConnectionContext::create_server_shared());
-    TRY(auto callbacks_queue_shutdown_event, Event::create_shared(Event::State::not_signalled));
-    auto callbacks_done_queue = SpscQueue<FinishedInferRequest>::create_shared(PcieSession::MAX_ONGOING_TRANSFERS, callbacks_queue_shutdown_event);
-    CHECK_NOT_NULL_AS_EXPECTED(callbacks_done_queue, HAILO_OUT_OF_HOST_MEMORY);
-
-    auto res = make_unique_nothrow<HailoRTServer>(connection_context, callbacks_done_queue, callbacks_queue_shutdown_event);
-    CHECK_NOT_NULL(res, HAILO_OUT_OF_HOST_MEMORY);
-    return res;
-}
-
-HailoRTServer::HailoRTServer(std::shared_ptr<ConnectionContext> connection_context,
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> callbacks_done_queue,
-    EventPtr callbacks_queue_shutdown_event) : Server(connection_context), m_callbacks_done_queue(callbacks_done_queue),
-    m_callbacks_queue_shutdown_event(callbacks_queue_shutdown_event)
-{
-    m_callbacks_thread = std::thread([this] {
-        auto status = callbacks_thread_loop();
-        if (HAILO_SUCCESS != status) {
-            LOGGER__CRITICAL("Callback thread has failed with status {}. Server should restart!", status);
-        }
-    });
-}
-
-hailo_status HailoRTServer::callbacks_thread_loop()
-{
-    while (true) {
-        auto request = m_callbacks_done_queue->dequeue(std::chrono::milliseconds(HAILO_INFINITE));
-        if (HAILO_SHUTDOWN_EVENT_SIGNALED == request.status()) {
-            break;
-        }
-        CHECK_EXPECTED_AS_STATUS(request);
-
-        auto status = trigger_callback(request->callback_id, request->completion_info.status, request->configured_infer_model_handle,
-            request->connection, [this, &request] (RpcConnection connection) -> hailo_status {
-            if (HAILO_SUCCESS == request->completion_info.status) {
-                for (auto output : request->outputs) {
-                    auto status = connection.wait_for_write_buffer_async_ready(output->size(), SERVER_TIMEOUT);
-                    CHECK_SUCCESS(status);
-
-                    status = connection.write_buffer_async(MemoryView(*output), [output] (hailo_status status) {
-                        (void)output; // capturing output so it won't be freed before the callback is called
-                        if (HAILO_SUCCESS != status) {
-                            LOGGER__ERROR("Failed to write buffer, status = {}", status);
-                        }
-                    });
-                    CHECK_SUCCESS(status);
-                }
-
-                std::lock_guard<std::mutex> lock(m_buffer_pool_mutex);
-                for (uint32_t i = 0; i < request->outputs.size(); i++) {
-                    if (m_buffer_pool_per_cim.contains(request->configured_infer_model_handle)) {
-                        auto status = m_buffer_pool_per_cim.at(request->configured_infer_model_handle)->return_to_pool(request->outputs_names[i], request->outputs[i]);
-                        CHECK_SUCCESS(status);
-                    }
-                }
-            }
-            return HAILO_SUCCESS;
-        });
-        // HAILO_COMMUNICATION_CLOSED means the client disconnected. Server doesn't need to restart in this case.
-        if (status != HAILO_COMMUNICATION_CLOSED) {
-            CHECK_SUCCESS(status);
-        }
-    }
-    return HAILO_SUCCESS;
-}
-
-HailoRTServer::~HailoRTServer()
-{
-    auto status = m_callbacks_queue_shutdown_event->signal();
-    if (HAILO_SUCCESS != status) {
-        LOGGER__CRITICAL("Failed to signal shutdown event, status = {}", status);
-    }
-
-    if (m_callbacks_thread.joinable()) {
-        m_callbacks_thread.join();
-    }
-}
-
-int main()
-{
-    init_logger("HailoRT-Server");
-    TRY(auto server, HailoRTServer::create_unique());
-    Dispatcher dispatcher;
-
-    // TODO: add a server implementation class, with resources heiracrhy and more
-    auto &infer_model_to_info_id = server->infer_model_to_info_id();
-    auto &buffer_pool_per_cim = server->buffer_pool_per_cim();
-
-    // Because the infer model is created with a hef buffer, we need to keep the buffer until the configure stage.
-    // Here I keep it until the infer model is destroyed
-    auto &hef_buffers = server->hef_buffers();
-
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__CREATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto vdevice_params, CreateVDeviceSerializer::deserialize_request(request), CreateVDeviceSerializer);
-        TRY_AS_HRPC_STATUS(auto vdevice, VDevice::create(vdevice_params.get()), CreateVDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<VDevice>::get_instance();
-        auto id = manager.register_resource(SINGLE_CLIENT_PID, std::move(vdevice));
-        auto reply = CreateVDeviceSerializer::serialize_reply(HAILO_SUCCESS, id);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__DESTROY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<VDevice>::get_instance();
-        TRY_AS_HRPC_STATUS(auto vdevice_handle, DestroyVDeviceSerializer::deserialize_request(request), DestroyVDeviceSerializer);
-        (void)manager.release_resource(vdevice_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyVDeviceSerializer::serialize_reply(HAILO_SUCCESS), DestroyVDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::VDEVICE__CREATE_INFER_MODEL,
-    [&hef_buffers] (const MemoryView &request, ServerContextPtr server_context) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto tuple, CreateInferModelSerializer::deserialize_request(request), CreateInferModelSerializer);
-        auto vdevice_handle = std::get<0>(tuple);
-        uint64_t hef_size = std::get<1>(tuple);
-        auto name = std::get<2>(tuple);
-
-        assert(hef_size <= SIZE_MAX);
-        TRY_AS_HRPC_STATUS(auto hef_buffer, Buffer::create(static_cast<size_t>(hef_size), BufferStorageParams::create_dma()),
-            CreateInferModelSerializer);
-
-        auto status = server_context->connection().read_buffer(MemoryView(hef_buffer));
-        CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateInferModelSerializer);
-
-        auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
-        auto lambda = [view = MemoryView(hef_buffer), &name] (std::shared_ptr<VDevice> vdevice) {
-            return vdevice->create_infer_model(view, name);
-        };
-        auto infer_model = vdevice_manager.execute<Expected<std::shared_ptr<InferModel>>>(vdevice_handle, lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model, CreateInferModelSerializer);
-
-        auto &infer_model_manager = ServiceResourceManager<InferModel>::get_instance();
-        auto infer_model_id = infer_model_manager.register_resource(SINGLE_CLIENT_PID, std::move(infer_model.release()));
-        hef_buffers.emplace(infer_model_id, std::move(hef_buffer));
-
-        TRY_AS_HRPC_STATUS(auto reply, CreateInferModelSerializer::serialize_reply(HAILO_SUCCESS, infer_model_id), CreateInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::INFER_MODEL__DESTROY,
-    [&hef_buffers] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<InferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto infer_model_handle, DestroyInferModelSerializer::deserialize_request(request), DestroyInferModelSerializer);
-        hef_buffers.erase(infer_model_handle);
-        (void)manager.release_resource(infer_model_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyInferModelSerializer::serialize_reply(HAILO_SUCCESS), DestroyInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::INFER_MODEL__CREATE_CONFIGURED_INFER_MODEL,
-    [&buffer_pool_per_cim, &infer_model_to_info_id]
-    (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &infer_model_manager = ServiceResourceManager<InferModel>::get_instance();
-
-        TRY_AS_HRPC_STATUS(auto request_params, CreateConfiguredInferModelSerializer::deserialize_request(request), CreateConfiguredInferModelSerializer);
-        const auto &infer_model_handle = request_params.infer_model_handle;
-        const auto &vdevice_handle = request_params.vdevice_handle;
-
-        auto lambda = [&request_params] (std::shared_ptr<InferModel> infer_model) -> Expected<ConfiguredInferModel> {
-            const auto &input_streams_formats = request_params.input_streams_params;
-            const auto &output_streams_formats = request_params.output_streams_params;
-            for (const auto &input_stream_format : input_streams_formats) {
-                TRY(auto input, infer_model->input(input_stream_format.first));
-
-                input.set_format_order(static_cast<hailo_format_order_t>(input_stream_format.second.format_order));
-                input.set_format_type(static_cast<hailo_format_type_t>(input_stream_format.second.format_type));
-                if (INVALID_NMS_CONFIG != input_stream_format.second.nms_score_threshold) {
-                    input.set_nms_score_threshold(input_stream_format.second.nms_score_threshold);
-                }
-                if (INVALID_NMS_CONFIG != input_stream_format.second.nms_iou_threshold) {
-                    input.set_nms_iou_threshold(input_stream_format.second.nms_iou_threshold);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_proposals_per_class) {
-                    input.set_nms_max_proposals_per_class(input_stream_format.second.nms_max_proposals_per_class);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_proposals_total) {
-                    input.set_nms_max_proposals_total(input_stream_format.second.nms_max_proposals_total);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != input_stream_format.second.nms_max_accumulated_mask_size) {
-                    input.set_nms_max_accumulated_mask_size(input_stream_format.second.nms_max_accumulated_mask_size);
-                }
-            }
-
-            for (const auto &output_stream_format : output_streams_formats) {
-                TRY(auto output, infer_model->output(output_stream_format.first));
-                output.set_format_order(static_cast<hailo_format_order_t>(output_stream_format.second.format_order));
-                output.set_format_type(static_cast<hailo_format_type_t>(output_stream_format.second.format_type));
-                if (INVALID_NMS_CONFIG != output_stream_format.second.nms_score_threshold) {
-                    output.set_nms_score_threshold(output_stream_format.second.nms_score_threshold);
-                }
-                if (INVALID_NMS_CONFIG != output_stream_format.second.nms_iou_threshold) {
-                    output.set_nms_iou_threshold(output_stream_format.second.nms_iou_threshold);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_proposals_per_class) {
-                    output.set_nms_max_proposals_per_class(output_stream_format.second.nms_max_proposals_per_class);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_proposals_total) {
-                    output.set_nms_max_proposals_total(output_stream_format.second.nms_max_proposals_total);
-                }
-                if (static_cast<uint32_t>(INVALID_NMS_CONFIG) != output_stream_format.second.nms_max_accumulated_mask_size) {
-                    output.set_nms_max_accumulated_mask_size(output_stream_format.second.nms_max_accumulated_mask_size);
-                }
-            }
-
-            infer_model->set_batch_size(request_params.batch_size);
-            infer_model->set_power_mode(request_params.power_mode);
-            infer_model->set_hw_latency_measurement_flags(request_params.latency_flag);
-
-            return infer_model->configure();
-        };
-
-        auto configured_infer_model = infer_model_manager.execute<Expected<ConfiguredInferModel>>(infer_model_handle, lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model, CreateConfiguredInferModelSerializer);
-
-        TRY_AS_HRPC_STATUS(auto async_queue_size, configured_infer_model->get_async_queue_size(), CreateConfiguredInferModelSerializer);
-        auto set_model_info_lambda = [] (std::shared_ptr<InferModel> infer_model) -> Expected<std::shared_ptr<InferModelInfo>> {
-            auto infer_model_info = make_shared_nothrow<InferModelInfo>();
-            CHECK_NOT_NULL_AS_EXPECTED(infer_model_info, HAILO_OUT_OF_HOST_MEMORY);
-
-            for (const auto &input : infer_model->inputs()) {
-                infer_model_info->input_streams_sizes.emplace(input.name(), input.get_frame_size());
-                infer_model_info->inputs_names.push_back(input.name());
-            }
-            for (const auto &output : infer_model->outputs()) {
-                infer_model_info->output_streams_sizes.emplace(output.name(), output.get_frame_size());
-                infer_model_info->outputs_names.push_back(output.name());
-            }
-            return infer_model_info;
-        };
-        auto model_info = infer_model_manager.execute<Expected<std::shared_ptr<InferModelInfo>>>(infer_model_handle, set_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(model_info, CreateConfiguredInferModelSerializer);
-
-        auto &infer_model_infos_manager = ServiceResourceManager<InferModelInfo>::get_instance();
-        auto infer_model_info_id = infer_model_infos_manager.register_resource(SINGLE_CLIENT_PID, std::move(model_info.release()));
-
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        auto cim_id = cim_manager.register_resource(SINGLE_CLIENT_PID,
-            std::move(make_shared_nothrow<ConfiguredInferModel>(configured_infer_model.release())));
-
-        auto buffer_pool = ServiceNetworkGroupBufferPool::create(vdevice_handle);
-        CHECK_EXPECTED_AS_HRPC_STATUS(buffer_pool, CreateConfiguredInferModelSerializer);
-
-        auto buffer_pool_ptr = buffer_pool.release();
-        auto get_infer_model_info_lambda = [] (std::shared_ptr<InferModelInfo> infer_model_info) {
-            return *infer_model_info;
-        };
-        auto infer_model_info = infer_model_infos_manager.execute<Expected<InferModelInfo>>(infer_model_info_id, get_infer_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model_info, CreateConfiguredInferModelSerializer);
-
-        for (const auto &input_name : infer_model_info->inputs_names) {
-            auto status = buffer_pool_ptr->allocate_pool(input_name, HAILO_DMA_BUFFER_DIRECTION_D2H,
-                infer_model_info->input_streams_sizes[input_name], async_queue_size * ASYNC_QUEUE_SIZE_FACTOR);
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateConfiguredInferModelSerializer);
-        }
-        for (const auto &output_name : infer_model_info->outputs_names) {
-            auto status = buffer_pool_ptr->allocate_pool(output_name, HAILO_DMA_BUFFER_DIRECTION_H2D,
-                infer_model_info->output_streams_sizes[output_name], async_queue_size * ASYNC_QUEUE_SIZE_FACTOR);
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateConfiguredInferModelSerializer);
-        }
-        buffer_pool_per_cim.emplace(cim_id, buffer_pool_ptr);
-
-        infer_model_to_info_id[infer_model_handle] = infer_model_info_id;
-        TRY_AS_HRPC_STATUS(auto reply,
-            CreateConfiguredInferModelSerializer::serialize_reply(HAILO_SUCCESS, cim_id, static_cast<uint32_t>(async_queue_size)),
-            CreateConfiguredInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__DESTROY,
-    [&server] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto configured_infer_model_handle, DestroyConfiguredInferModelSerializer::deserialize_request(request), DestroyInferModelSerializer);
-
-        auto shutdown_lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            configured_infer_model->shutdown();
-            return HAILO_SUCCESS;
-        };
-        manager.execute<hailo_status>(configured_infer_model_handle, shutdown_lambda);
-        server->cleanup_cim_buffer_pools({ configured_infer_model_handle });
-        (void)manager.release_resource(configured_infer_model_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyConfiguredInferModelSerializer::serialize_reply(HAILO_SUCCESS), DestroyInferModelSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_TIMEOUT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerTimeoutSerializer::deserialize_request(request), SetSchedulerTimeoutSerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &timeout = std::get<1>(tuple);
-        auto lambda = [timeout] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_timeout(timeout);
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerTimeoutSerializer::serialize_reply(status), SetSchedulerTimeoutSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_THRESHOLD,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerThresholdSerializer::deserialize_request(request), SetSchedulerThresholdSerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &threshold = std::get<1>(tuple);
-        auto lambda = [threshold] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_threshold(threshold);
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerThresholdSerializer::serialize_reply(status), SetSchedulerThresholdSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_PRIORITY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        TRY_AS_HRPC_STATUS(auto tuple, SetSchedulerPrioritySerializer::deserialize_request(request), SetSchedulerPrioritySerializer);
-        const auto &configured_infer_model_handle = std::get<0>(tuple);
-        const auto &priority = std::get<1>(tuple);
-        auto lambda = [priority] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->set_scheduler_priority(static_cast<uint8_t>(priority));
-        };
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle, lambda);
-        TRY_AS_HRPC_STATUS(auto reply, SetSchedulerPrioritySerializer::serialize_reply(status), SetSchedulerPrioritySerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__GET_HW_LATENCY_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = GetHwLatencyMeasurementSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, GetHwLatencyMeasurementSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->get_hw_latency_measurement();
-        };
-
-        auto latency_measurement_result = cim_manager.execute<Expected<LatencyMeasurementResult>>(configured_infer_model_handle.value(), lambda);
-        if (HAILO_NOT_AVAILABLE ==  latency_measurement_result.status()) {
-            return GetHwLatencyMeasurementSerializer::serialize_reply(HAILO_NOT_AVAILABLE);
-        }
-        CHECK_EXPECTED_AS_HRPC_STATUS(latency_measurement_result, GetHwLatencyMeasurementSerializer);
-
-        uint32_t avg_hw_latency = static_cast<uint32_t>(latency_measurement_result.value().avg_hw_latency.count());
-        TRY_AS_HRPC_STATUS(auto reply, GetHwLatencyMeasurementSerializer::serialize_reply(latency_measurement_result.status(), avg_hw_latency), GetHwLatencyMeasurementSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__ACTIVATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = ActivateSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, ActivateSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->activate();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, ActivateSerializer::serialize_reply(status), ActivateSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__DEACTIVATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = DeactivateSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, DeactivateSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->deactivate();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, DeactivateSerializer::serialize_reply(status), DeactivateSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__SHUTDOWN,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-
-        auto configured_infer_model_handle = ShutdownSerializer::deserialize_request(request);
-        CHECK_EXPECTED_AS_HRPC_STATUS(configured_infer_model_handle, ShutdownSerializer);
-
-        auto lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->shutdown();
-        };
-
-        auto status = cim_manager.execute<hailo_status>(configured_infer_model_handle.value(), lambda);
-        TRY_AS_HRPC_STATUS(auto reply, ShutdownSerializer::serialize_reply(status), ShutdownSerializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::CONFIGURED_INFER_MODEL__RUN_ASYNC,
-    [&infer_model_to_info_id, &buffer_pool_per_cim, callbacks_done_queue = server->callbacks_done_queue()]
-    (const MemoryView &request, ServerContextPtr server_context) -> Expected<Buffer> {
-        auto &cim_manager = ServiceResourceManager<ConfiguredInferModel>::get_instance();
-        auto bindings_lambda = [] (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-            return configured_infer_model->create_bindings();
-        };
-        TRY_AS_HRPC_STATUS(auto request_struct, RunAsyncSerializer::deserialize_request(request), RunAsyncSerializer);
-        auto configured_infer_model_handle = request_struct.configured_infer_model_handle;
-        auto infer_model_handle = request_struct.infer_model_handle;
-        auto callback_id = request_struct.callback_handle;
-
-        auto bindings = cim_manager.execute<Expected<ConfiguredInferModel::Bindings>>(configured_infer_model_handle, bindings_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(bindings, RunAsyncSerializer);
-
-        auto infer_model_info_lambda = [] (std::shared_ptr<InferModelInfo> infer_model_info) {
-            return *infer_model_info;
-        };
-        auto &infer_model_infos_manager = ServiceResourceManager<InferModelInfo>::get_instance();
-        auto infer_model_info = infer_model_infos_manager.execute<Expected<InferModelInfo>>(infer_model_to_info_id[infer_model_handle],
-            infer_model_info_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(infer_model_info, RunAsyncSerializer);
-
-        std::vector<BufferPtr> inputs; // TODO: add infer vector pool
-        inputs.reserve(infer_model_info->inputs_names.size());
-        uint32_t buffer_size_index = 0;
-
-        for (const auto &input_name : infer_model_info->inputs_names) {
-            TRY_AS_HRPC_STATUS(auto input, bindings->input(input_name), RunAsyncSerializer);
-
-            TRY_AS_HRPC_STATUS(auto buffer_ptr, buffer_pool_per_cim.at(configured_infer_model_handle)->acquire_buffer(input_name),
-                RunAsyncSerializer);
-
-            uint32_t read_size = 0;
-            while (read_size < buffer_ptr->size()) {
-                uint32_t current_size = request_struct.input_buffer_sizes[buffer_size_index++];
-                CHECK_AS_HRPC_STATUS(read_size + current_size <= buffer_ptr->size(), HAILO_INTERNAL_FAILURE,
-                    RunAsyncSerializer);
-
-                auto status = server_context->connection().read_buffer(MemoryView(buffer_ptr->data() + read_size, current_size));
-                CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-
-                read_size += current_size;
-            }
-
-            inputs.emplace_back(buffer_ptr);
-            auto status = input.set_buffer(MemoryView(*buffer_ptr));
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-        }
-
-        std::vector<BufferPtr> outputs; // TODO: add infer vector pool
-        outputs.reserve(infer_model_info->outputs_names.size());
-        for (const auto &output_name : infer_model_info->outputs_names) {
-            TRY_AS_HRPC_STATUS(auto buffer_ptr, buffer_pool_per_cim.at(configured_infer_model_handle)->acquire_buffer(output_name),
-                RunAsyncSerializer);
-
-            auto output = bindings->output(output_name);
-            CHECK_EXPECTED_AS_HRPC_STATUS(output, RunAsyncSerializer);
-
-            auto status = output->set_buffer(MemoryView(buffer_ptr->data(), buffer_ptr->size()));
-            CHECK_SUCCESS_AS_HRPC_STATUS(status, RunAsyncSerializer);
-
-            outputs.emplace_back(buffer_ptr);
-        }
-
-        auto infer_lambda =
-            [bindings = bindings.release(), callback_id, server_context, inputs, outputs, &buffer_pool_per_cim, configured_infer_model_handle,
-                infer_model_info, callbacks_done_queue]
-            (std::shared_ptr<ConfiguredInferModel> configured_infer_model) {
-                return configured_infer_model->run_async(bindings,
-                    [callback_id, server_context, inputs, outputs, &buffer_pool_per_cim, configured_infer_model_handle, infer_model_info,
-                    callbacks_done_queue]
-                        (const AsyncInferCompletionInfo &completion_info) {
-                    for (uint32_t i = 0; i < inputs.size(); i++) {
-                        auto status = buffer_pool_per_cim.at(configured_infer_model_handle)->return_to_pool(infer_model_info->inputs_names[i], inputs[i]);
-                        if (HAILO_SUCCESS != status) {
-                            LOGGER__ERROR("Failed to return buffer to pool, status = {}", status);
-                        }
-                    }
-
-                    FinishedInferRequest request;
-                    request.connection = server_context->connection();
-                    request.completion_info = completion_info;
-                    request.callback_id = callback_id;
-                    request.configured_infer_model_handle = configured_infer_model_handle;
-                    request.outputs = std::move(outputs);
-                    request.outputs_names = infer_model_info->outputs_names;
-                    auto status = callbacks_done_queue->enqueue(std::move(request));
-                    if (HAILO_SUCCESS != status) {
-                        LOGGER__ERROR("Failed to enqueue to infer requests queue, status = {}", status);
-                    }
-                });
-            };
-        auto job = cim_manager.execute<Expected<AsyncInferJob>>(configured_infer_model_handle, infer_lambda);
-        CHECK_EXPECTED_AS_HRPC_STATUS(job, RunAsyncSerializer);
-
-        job->detach();
-
-        TRY_AS_HRPC_STATUS(auto reply, RunAsyncSerializer::serialize_reply(HAILO_SUCCESS), RunAsyncSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__CREATE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto status = CreateDeviceSerializer::deserialize_request(request);
-        CHECK_SUCCESS_AS_HRPC_STATUS(status, CreateDeviceSerializer);
-
-        TRY_AS_HRPC_STATUS(auto device, Device::create(), CreateDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto id = manager.register_resource(SINGLE_CLIENT_PID, std::move(device));
-        auto reply = CreateDeviceSerializer::serialize_reply(HAILO_SUCCESS, id);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__DESTROY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        TRY_AS_HRPC_STATUS(auto device_handle, DestroyDeviceSerializer::deserialize_request(request), DestroyDeviceSerializer);
-        (void)manager.release_resource(device_handle, SINGLE_CLIENT_PID);
-        TRY_AS_HRPC_STATUS(auto reply, DestroyDeviceSerializer::serialize_reply(HAILO_SUCCESS), DestroyDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__IDENTIFY,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        TRY_AS_HRPC_STATUS(auto device_handle, IdentifyDeviceSerializer::deserialize_request(request), IdentifyDeviceSerializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->identify();
-        };
-        TRY_AS_HRPC_STATUS(auto identity,
-            manager.execute<Expected<hailo_device_identity_t>>(device_handle, device_lambda), IdentifyDeviceSerializer);
-        TRY_AS_HRPC_STATUS(auto reply, IdentifyDeviceSerializer::serialize_reply(HAILO_SUCCESS, identity), IdentifyDeviceSerializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__EXTENDED_INFO,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = ExtendedDeviceInfoSerializer;
-        using ActionReturnType = hailo_extended_device_information_t;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->get_extended_device_information();
-        };
-        TRY_AS_HRPC_STATUS(auto extended_info,
-            manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, extended_info), Serializer);
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__GET_CHIP_TEMPERATURE,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = GetChipTemperatureSerializer;
-        using ActionReturnType = hailo_chip_temperature_info_t;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->get_chip_temperature();
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = PowerMeasurementSerializer;
-        using ActionReturnType = float32_t;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto dvm = std::get<1>(tuple);
-        auto power_measurement_type = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [dvm, power_measurement_type] (std::shared_ptr<Device> device) {
-            return device->power_measurement(
-                static_cast<hailo_dvm_options_t>(dvm),
-                static_cast<hailo_power_measurement_types_t>(power_measurement_type));
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__SET_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr /*server_context*/) -> Expected<Buffer> {
-        using Serializer = SetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto dvm = std::get<1>(tuple);
-        auto power_measurement_type = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [dvm, power_measurement_type] (std::shared_ptr<Device> device) {
-            constexpr hailo_measurement_buffer_index_t not_used_buffer_index = HAILO_MEASUREMENT_BUFFER_INDEX_MAX_ENUM;
-            return device->set_power_measurement(
-                not_used_buffer_index, /* Relevant only for H8. Not used in H10 */
-                static_cast<hailo_dvm_options_t>(dvm),
-                static_cast<hailo_power_measurement_types_t>(power_measurement_type));
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__START_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = SetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto averaging_factor = std::get<1>(tuple);
-        auto sampling_period = std::get<2>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [sampling_period, averaging_factor] (std::shared_ptr<Device> device) {
-            return device->start_power_measurement(
-                static_cast<hailo_averaging_factor_t>(averaging_factor),
-                static_cast<hailo_sampling_period_t>(sampling_period));
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__GET_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = GetPowerMeasurementSerializer;
-        using ActionReturnType = hailo_power_measurement_data_t;
-
-        TRY_AS_HRPC_STATUS(auto tuple, Serializer::deserialize_request(request), Serializer);
-
-        auto device_handle = std::get<0>(tuple);
-        auto should_clear = std::get<1>(tuple);
-
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-        auto device_lambda = [should_clear] (std::shared_ptr<Device> device) {
-			constexpr hailo_measurement_buffer_index_t unused_buffer_index = HAILO_MEASUREMENT_BUFFER_INDEX_MAX_ENUM;
-            return device->get_power_measurement(unused_buffer_index, should_clear);
-        };
-
-        TRY_AS_HRPC_STATUS(auto info, manager.execute<Expected<ActionReturnType>>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS, info), Serializer);
-
-        return reply;
-    });
-    dispatcher.register_action(HailoRpcActionID::DEVICE__STOP_POWER_MEASUREMENT,
-    [] (const MemoryView &request, ServerContextPtr) -> Expected<Buffer> {
-        using Serializer = StopPowerMeasurementSerializer;
-        using ActionReturnType = hailo_status;
-
-        TRY_AS_HRPC_STATUS(auto device_handle, Serializer::deserialize_request(request), Serializer);
-        auto &manager = ServiceResourceManager<Device>::get_instance();
-
-        auto device_lambda = [] (std::shared_ptr<Device> device) {
-            return device->stop_power_measurement();
-        };
-
-        CHECK_SUCCESS_AS_HRPC_STATUS(manager.execute<ActionReturnType>(device_handle, device_lambda), Serializer);
-        TRY_AS_HRPC_STATUS(auto reply, Serializer::serialize_reply(HAILO_SUCCESS), Serializer);
-
-        return reply;
-    });
-
-    server->set_dispatcher(dispatcher);
-    auto status = server->serve();
-    if (status != HAILO_SUCCESS) {
-        LOGGER__ERROR("Error in serve, status = {}", status);
-        return status;
-    }
-
-    return 0;
-}
diff --git a/hailort/hailort_server/hailort_server.hpp b/hailort/hailort_server/hailort_server.hpp
deleted file mode 100644
index a9cdf00..0000000
--- a/hailort/hailort_server/hailort_server.hpp
+++ /dev/null
@@ -1,68 +0,0 @@
-#ifndef HAILORT_SERVER_HPP_
-/**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
-/**
- * @file hailort_server.hpp
- * @brief RPC Hailort Server Header
- **/
-
-#define HAILORT_SERVER_HPP_
-
-#include "hrpc/server.hpp"
-#include "hailort_service/cng_buffer_pool.hpp"
-#include "hailo/infer_model.hpp"
-#include "utils/thread_safe_map.hpp"
-
-namespace hailort
-{
-
-using infer_model_handle_t = uint32_t;
-
-struct FinishedInferRequest
-{
-public:
-    FinishedInferRequest() : completion_info(HAILO_UNINITIALIZED) {}
-    RpcConnection connection;
-    hailort::AsyncInferCompletionInfo completion_info;
-    uint32_t callback_id;
-    uint32_t configured_infer_model_handle;
-    std::vector<BufferPtr> outputs;
-    std::vector<std::string> outputs_names;
-};
-
-class Server;
-class HailoRTServer : public Server {
-public:
-    static Expected<std::unique_ptr<HailoRTServer>> create_unique();
-    explicit HailoRTServer(std::shared_ptr<ConnectionContext> connection_context,
-        std::shared_ptr<SpscQueue<FinishedInferRequest>> callbacks_done_queue,
-        EventPtr callbacks_queue_shutdown_event);
-    virtual ~HailoRTServer();
-
-    std::unordered_map<uint32_t, uint32_t> &infer_model_to_info_id() { return m_infer_model_to_info_id; };
-    ThreadSafeMap<uint32_t, std::shared_ptr<ServiceNetworkGroupBufferPool>> &buffer_pool_per_cim() { return m_buffer_pool_per_cim; };
-    std::unordered_map<infer_model_handle_t, Buffer> &hef_buffers() { return m_hef_buffers_per_infer_model; };
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> &callbacks_done_queue() { return m_callbacks_done_queue; };
-
-
-    void cleanup_cim_buffer_pools(const std::vector<uint32_t> &cim_handles);
-
-private:
-    virtual hailo_status cleanup_client_resources(RpcConnection client_connection) override;
-    void cleanup_infer_model_hef_buffers(const std::vector<uint32_t> &infer_model_handles);
-    hailo_status callbacks_thread_loop();
-
-    std::unordered_map<uint32_t, uint32_t> m_infer_model_to_info_id;
-    ThreadSafeMap<uint32_t, std::shared_ptr<ServiceNetworkGroupBufferPool>> m_buffer_pool_per_cim;
-    std::mutex m_buffer_pool_mutex;
-    std::unordered_map<infer_model_handle_t, Buffer> m_hef_buffers_per_infer_model;
-    std::shared_ptr<SpscQueue<FinishedInferRequest>> m_callbacks_done_queue;
-    EventPtr m_callbacks_queue_shutdown_event;
-    std::thread m_callbacks_thread;
-};
-
-} // namespace hailort
-
-#endif // HAILORT_SERVER_HPP_
\ No newline at end of file
diff --git a/hailort/hailort_service/cng_buffer_pool.cpp b/hailort/hailort_service/cng_buffer_pool.cpp
index 6d398b8..e149ff4 100644
--- a/hailort/hailort_service/cng_buffer_pool.cpp
+++ b/hailort/hailort_service/cng_buffer_pool.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file cng_buffer_pool.cpp
  * @brief Network group buffer pool implementation
@@ -14,13 +14,12 @@
 namespace hailort
 {
 
-Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer_pool(size_t buffer_size,
+Expected<BasicBufferPoolPtr> BaseNetworkGroupBufferPool::create_stream_buffer_pool(size_t buffer_size,
     size_t buffer_count, hailo_dma_buffer_direction_t direction, EventPtr shutdown_event)
 {
     auto map_buffer_lambda = [direction](std::shared_ptr<VDevice> vdevice, BufferPtr buffer) {
         return DmaMappedBuffer::create(*vdevice, buffer->data(), buffer->size(), direction);
     };
-    auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
 
     TRY(auto free_buffers_queue,
         SpscQueue<BufferPtr>::create(buffer_count, shutdown_event, DEFAULT_TRANSFER_TIMEOUT));
@@ -29,10 +28,7 @@ Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer
     buffers.reserve(buffer_count);
     for (size_t i = 0; i < buffer_count; i++) {
         TRY(auto buffer, Buffer::create_shared(buffer_size, BufferStorageParams::create_dma()));
-
-        TRY(auto mapped_buffer,
-            vdevice_manager.execute<Expected<DmaMappedBuffer>>(m_vdevice_handle, map_buffer_lambda, buffer));
-
+        TRY(auto mapped_buffer, m_map_buffer_func(m_vdevice_handle, map_buffer_lambda, buffer));
         auto status = free_buffers_queue.enqueue(buffer);
         CHECK_SUCCESS(status);
 
@@ -47,21 +43,13 @@ Expected<BasicBufferPoolPtr> ServiceNetworkGroupBufferPool::create_stream_buffer
     return buffer_pool_ptr;
 }
 
-Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
-{
-    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
-
-    auto cng_buffer_pool_ptr = make_shared_nothrow<ServiceNetworkGroupBufferPool>(shutdown_event, vdevice_handle);
-    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return cng_buffer_pool_ptr;
-}
-
-ServiceNetworkGroupBufferPool::ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle) :
-    m_stream_name_to_buffer_pool(), m_mapped_buffers(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle), m_is_shutdown(false)
+BaseNetworkGroupBufferPool::BaseNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle,
+    map_buffer_on_handle_func_t map_buffer_func)
+    : m_stream_name_to_buffer_pool(), m_mapped_buffers(), m_shutdown_event(shutdown_event), m_vdevice_handle(vdevice_handle),
+    m_map_buffer_func(map_buffer_func), m_is_shutdown(false)
 {}
 
-hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &name,
+hailo_status BaseNetworkGroupBufferPool::allocate_pool(const std::string &name,
     hailo_dma_buffer_direction_t direction, size_t frame_size, size_t pool_size)
 {
     TRY(auto buffer_pool, create_stream_buffer_pool(frame_size, pool_size, direction, m_shutdown_event));
@@ -72,7 +60,7 @@ hailo_status ServiceNetworkGroupBufferPool::allocate_pool(const std::string &nam
     return HAILO_SUCCESS;
 }
 
-hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &name,
+hailo_status BaseNetworkGroupBufferPool::reallocate_pool(const std::string &name,
     hailo_dma_buffer_direction_t direction, size_t frame_size)
 {
     std::unique_lock<std::mutex> lock(m_mutex);
@@ -86,7 +74,7 @@ hailo_status ServiceNetworkGroupBufferPool::reallocate_pool(const std::string &n
     return HAILO_SUCCESS;
 }
 
-Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::string &stream_name)
+Expected<BufferPtr> BaseNetworkGroupBufferPool::acquire_buffer(const std::string &stream_name)
 {
     CHECK_AS_EXPECTED(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
         "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
@@ -104,7 +92,7 @@ Expected<BufferPtr> ServiceNetworkGroupBufferPool::acquire_buffer(const std::str
     return buffer;
 }
 
-hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &stream_name, BufferPtr buffer)
+hailo_status BaseNetworkGroupBufferPool::return_to_pool(const std::string &stream_name, BufferPtr buffer)
 {
     CHECK(contains(m_stream_name_to_buffer_pool, stream_name), HAILO_INTERNAL_FAILURE,
         "acquire_buffer() for stream {} failed, stream name does not exist in buffer pool", stream_name);
@@ -119,7 +107,7 @@ hailo_status ServiceNetworkGroupBufferPool::return_to_pool(const std::string &st
     return HAILO_SUCCESS;
 }
 
-hailo_status ServiceNetworkGroupBufferPool::shutdown()
+hailo_status BaseNetworkGroupBufferPool::shutdown()
 {
     {
         std::unique_lock<std::mutex> lock(m_mutex);
@@ -129,4 +117,36 @@ hailo_status ServiceNetworkGroupBufferPool::shutdown()
     return m_shutdown_event->signal();
 }
 
+Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> ServiceNetworkGroupBufferPool::create(uint32_t vdevice_handle)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+
+    auto map_buffer_func = [](uint32_t handle, execute_map_on_vdevice_func_t execute_map_buffer_func, BufferPtr buffer) -> Expected<DmaMappedBuffer> {
+        auto &vdevice_manager = ServiceResourceManager<VDevice>::get_instance();
+        TRY(auto mapped_buffer,
+            vdevice_manager.execute<Expected<DmaMappedBuffer>>(handle, execute_map_buffer_func, buffer));
+        return mapped_buffer;
+    };
+    auto cng_buffer_pool_ptr = make_shared_nothrow<ServiceNetworkGroupBufferPool>(shutdown_event, vdevice_handle, map_buffer_func);
+    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return cng_buffer_pool_ptr;
+}
+
+Expected<std::shared_ptr<ServerNetworkGroupBufferPool>> ServerNetworkGroupBufferPool::create(uint32_t vdevice_handle)
+{
+    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
+
+    auto map_buffer_func = [](uint32_t handle, execute_map_on_vdevice_func_t execute_map_buffer_func, BufferPtr buffer) -> Expected<DmaMappedBuffer> {
+        auto &vdevice_manager = ServerResourceManager<VDevice>::get_instance();
+        TRY(auto mapped_buffer,
+            vdevice_manager.execute<Expected<DmaMappedBuffer>>(handle, execute_map_buffer_func, buffer));
+        return mapped_buffer;
+    };
+    auto cng_buffer_pool_ptr = make_shared_nothrow<ServerNetworkGroupBufferPool>(shutdown_event, vdevice_handle, map_buffer_func);
+    CHECK_NOT_NULL_AS_EXPECTED(cng_buffer_pool_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return cng_buffer_pool_ptr;
+}
+
 } /* namespace hailort */
diff --git a/hailort/hailort_service/cng_buffer_pool.hpp b/hailort/hailort_service/cng_buffer_pool.hpp
index 3574494..0d5f634 100644
--- a/hailort/hailort_service/cng_buffer_pool.hpp
+++ b/hailort/hailort_service/cng_buffer_pool.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file cng_buffer_pool.hpp
  * @brief This model represents the buffer pools for the streams of each network group. Used in async API
@@ -22,28 +22,28 @@ namespace hailort
 {
 
 using stream_name_t = std::string;
+using execute_map_on_vdevice_func_t = std::function<Expected<DmaMappedBuffer>(std::shared_ptr<VDevice>, BufferPtr)>;
+using map_buffer_on_handle_func_t = std::function<Expected<DmaMappedBuffer>(uint32_t, execute_map_on_vdevice_func_t, BufferPtr)>;
 
 // This object holds a buffer pool for each stream of the network group.
 // It is used to pre-allocate all the buffers necessary for the reads from the device.
 // The buffers are reuseable, which also prevents allocation during inference.
 // The buffers are mapped to the device during their creation, which prevent lazy mapping each frame inference.
 // Currently only used in async API.
-class ServiceNetworkGroupBufferPool
+class BaseNetworkGroupBufferPool
 {
 public:
-    static Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
-
     hailo_status allocate_pool(const std::string &name, hailo_dma_buffer_direction_t direction, size_t frame_size, size_t pool_size);
     // Used in order to reallocate the pool buffers with different frame_size
     hailo_status reallocate_pool(const std::string &name, hailo_dma_buffer_direction_t direction, size_t frame_size);
 
-    ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete;
-    ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete;
-    ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete;
-    ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete;
-    virtual ~ServiceNetworkGroupBufferPool() = default;
+    BaseNetworkGroupBufferPool(BaseNetworkGroupBufferPool &&) = delete;
+    BaseNetworkGroupBufferPool(const BaseNetworkGroupBufferPool &) = delete;
+    BaseNetworkGroupBufferPool &operator=(BaseNetworkGroupBufferPool &&) = delete;
+    BaseNetworkGroupBufferPool &operator=(const BaseNetworkGroupBufferPool &) = delete;
+    virtual ~BaseNetworkGroupBufferPool() = default;
 
-    ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle);
+    BaseNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func);
     Expected<BufferPtr> acquire_buffer(const std::string &stream_name);
     hailo_status return_to_pool(const std::string &stream_name, BufferPtr buffer);
     hailo_status shutdown();
@@ -57,11 +57,40 @@ private:
     std::vector<DmaMappedBuffer> m_mapped_buffers;
     EventPtr m_shutdown_event;
     uint32_t m_vdevice_handle;
+    map_buffer_on_handle_func_t m_map_buffer_func;
     std::mutex m_mutex;
     std::condition_variable m_cv;
     bool m_is_shutdown;
 };
 
+class ServiceNetworkGroupBufferPool : public BaseNetworkGroupBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServiceNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
+    ServiceNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func)
+        : BaseNetworkGroupBufferPool(shutdown_event, vdevice_handle, map_buffer_func) {}
+
+    ServiceNetworkGroupBufferPool(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool(const ServiceNetworkGroupBufferPool &) = delete;
+    ServiceNetworkGroupBufferPool &operator=(ServiceNetworkGroupBufferPool &&) = delete;
+    ServiceNetworkGroupBufferPool &operator=(const ServiceNetworkGroupBufferPool &) = delete;
+    virtual ~ServiceNetworkGroupBufferPool() = default;
+};
+
+class ServerNetworkGroupBufferPool : public BaseNetworkGroupBufferPool
+{
+public:
+    static Expected<std::shared_ptr<ServerNetworkGroupBufferPool>> create(uint32_t vdevice_handle);
+    ServerNetworkGroupBufferPool(EventPtr shutdown_event, uint32_t vdevice_handle, map_buffer_on_handle_func_t map_buffer_func)
+        : BaseNetworkGroupBufferPool(shutdown_event, vdevice_handle, map_buffer_func) {}
+
+    ServerNetworkGroupBufferPool(ServerNetworkGroupBufferPool &&) = delete;
+    ServerNetworkGroupBufferPool(const ServerNetworkGroupBufferPool &) = delete;
+    ServerNetworkGroupBufferPool &operator=(ServerNetworkGroupBufferPool &&) = delete;
+    ServerNetworkGroupBufferPool &operator=(const ServerNetworkGroupBufferPool &) = delete;
+    virtual ~ServerNetworkGroupBufferPool() = default;
+};
+
 } /* namespace hailort */
 
 #endif /* _HAILO_CNG_BUFFER_POOL_HPP_ */
diff --git a/hailort/hailort_service/hailort_rpc_service.cpp b/hailort/hailort_service/hailort_rpc_service.cpp
index a2cd57a..09be8ff 100644
--- a/hailort/hailort_service/hailort_rpc_service.cpp
+++ b/hailort/hailort_service/hailort_rpc_service.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -82,10 +82,10 @@ hailo_status HailoRtRpcService::abort_output_vstream(uint32_t handle)
 }
 
 // TODO: Add a named templated release functions for InputVStream and OutputVStream to call abort before release.
-void HailoRtRpcService::abort_vstreams_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::abort_vstreams_by_ids(std::set<uint32_t> &pids)
 {
-    auto inputs_handles = ServiceResourceManager<InputVStream>::get_instance().resources_handles_by_pids(pids);
-    auto outputs_handles = ServiceResourceManager<OutputVStream>::get_instance().resources_handles_by_pids(pids);
+    auto inputs_handles = ServiceResourceManager<InputVStream>::get_instance().resources_handles_by_ids(pids);
+    auto outputs_handles = ServiceResourceManager<OutputVStream>::get_instance().resources_handles_by_ids(pids);
     for (auto &input_handle : inputs_handles) {
         abort_input_vstream(input_handle);
     }
@@ -108,9 +108,9 @@ hailo_status HailoRtRpcService::shutdown_configured_network_group(uint32_t vdevi
 }
 
 
-void HailoRtRpcService::shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_configured_network_groups_by_ids(std::set<uint32_t> &pids)
 {
-    auto cng_handles = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().resources_handles_by_pids(pids);
+    auto cng_handles = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().resources_handles_by_ids(pids);
     for (auto &handle : cng_handles) {
         auto status = shutdown_configured_network_group(handle);
         if (status != HAILO_SUCCESS) {
@@ -119,9 +119,9 @@ void HailoRtRpcService::shutdown_configured_network_groups_by_pids(std::set<uint
     }
 }
 
-void HailoRtRpcService::shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_buffer_pool_by_ids(std::set<uint32_t> &pids)
 {
-    auto buffer_pools_handles = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().resources_handles_by_pids(pids);
+    auto buffer_pools_handles = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().resources_handles_by_ids(pids);
     for (auto &handle : buffer_pools_handles) {
         auto status = shutdown_cng_buffer_pool(handle);
         if (status != HAILO_SUCCESS) {
@@ -130,9 +130,9 @@ void HailoRtRpcService::shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids)
     }
 }
 
-void HailoRtRpcService::shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids)
+void HailoRtRpcService::shutdown_vdevice_cb_queue_by_ids(std::set<uint32_t> &pids)
 {
-    auto vdevice_cb_queue_handles = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().resources_handles_by_pids(pids);
+    auto vdevice_cb_queue_handles = ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().resources_handles_by_ids(pids);
     for (auto &handle : vdevice_cb_queue_handles) {
         auto status = shutdown_vdevice_cb_queue(handle);
         if (status != HAILO_SUCCESS) {
@@ -143,7 +143,6 @@ void HailoRtRpcService::shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pi
 
 void HailoRtRpcService::remove_disconnected_clients()
 {
-    std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
     auto now = std::chrono::high_resolution_clock::now();
     std::set<uint32_t> pids_to_remove;
     {
@@ -164,19 +163,19 @@ void HailoRtRpcService::remove_disconnected_clients()
         // We abort vstreams before releasing them to avoid cases where the vstream is stuck in execute of a
         // blocking operation (which will be finished with timeout).
         // To release the vstream the ServiceResourceManager is waiting for the resource_mutex which is also locked in execute.
-        abort_vstreams_by_pids(pids_to_remove);
+        abort_vstreams_by_ids(pids_to_remove);
 
         // It is important to shutdown the cb Queue before the NG shutdown, as ongoing callbacks might continue to try to enqueue
-        shutdown_vdevice_cb_queue_by_pids(pids_to_remove);
-        shutdown_configured_network_groups_by_pids(pids_to_remove);
-        shutdown_buffer_pool_by_pids(pids_to_remove);
+        shutdown_vdevice_cb_queue_by_ids(pids_to_remove);
+        shutdown_configured_network_groups_by_ids(pids_to_remove);
+        shutdown_buffer_pool_by_ids(pids_to_remove);
         for (auto &client_pid : pids_to_remove) {
-            ServiceResourceManager<OutputVStream>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<InputVStream>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().release_by_pid(client_pid);
-            ServiceResourceManager<VDevice>::get_instance().release_by_pid(client_pid);
+            ServiceResourceManager<OutputVStream>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<InputVStream>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<ConfiguredNetworkGroup>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<VDeviceCallbacksQueue>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance().release_by_id(client_pid);
+            ServiceResourceManager<VDevice>::get_instance().release_by_id(client_pid);
 
             LOGGER__INFO("Client disconnected, pid: {}", client_pid);
             HAILORT_OS_LOG_INFO("Client disconnected, pid: {}", client_pid);
@@ -188,6 +187,7 @@ void HailoRtRpcService::remove_disconnected_clients()
 void HailoRtRpcService::keep_alive()
 {
     while (true) {
+        std::this_thread::sleep_for(hailort::HAILO_KEEPALIVE_INTERVAL / 2);
         remove_disconnected_clients();
     }
 }
@@ -435,7 +435,7 @@ hailo_status HailoRtRpcService::allocate_pool_for_raw_streams(uint32_t ng_handle
 {
     auto &cng_buffer_pool_manager = ServiceResourceManager<ServiceNetworkGroupBufferPool>::get_instance();
     // For Async API - The buffer size in the pool will be the stream's hw frame size as used in the infer_model pipeline
-    TRY(const auto min_buffer_pool_size, get_min_buffer_pool_size(ng_handle));
+    TRY(const auto min_buffer_pool_size, infer_queue_size(ng_handle));
     TRY(const auto streams_infos, get_all_stream_infos(ng_handle));
 
     for (const auto &stream_info : streams_infos) {
@@ -953,12 +953,8 @@ void serialize_vstream_info(const hailo_vstream_info_t &info, ProtoVStreamInfo *
     if (HailoRTCommon::is_nms(info.format.order)) {
         auto nms_shape_proto = info_proto->mutable_nms_shape();
         nms_shape_proto->set_number_of_classes(info.nms_shape.number_of_classes);
-        if (info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE) {
-            nms_shape_proto->set_max_bboxes_total(info.nms_shape.max_bboxes_total);
-        } else {
-            nms_shape_proto->set_max_bboxes_per_class(info.nms_shape.max_bboxes_per_class);
-        }
-
+        nms_shape_proto->set_max_bboxes_total(info.nms_shape.max_bboxes_total);
+        nms_shape_proto->set_max_bboxes_per_class(info.nms_shape.max_bboxes_per_class);
         nms_shape_proto->set_max_accumulated_mask_size(info.nms_shape.max_accumulated_mask_size);
     } else {
         auto shape_proto = info_proto->mutable_shape();
@@ -1273,15 +1269,11 @@ void serialize_op_matadata(hailort::net_flow::OpMetadata &op_metadata, ProtoOpMe
         auto nms_config_proto = op_metadata_proto->mutable_nms_post_process_config();
         nms_config_proto->set_nms_score_th(nms_config.nms_score_th);
         nms_config_proto->set_nms_iou_th(nms_config.nms_iou_th);
-        if (HAILO_NMS_RESULT_ORDER_BY_SCORE == nms_config.order_type) {
-            nms_config_proto->set_max_proposals_total(nms_config.max_proposals_total);
-        } else {
-            nms_config_proto->set_max_proposals_per_class(nms_config.max_proposals_per_class);
-        }
+        nms_config_proto->set_max_proposals_total(nms_config.max_proposals_total);
+        nms_config_proto->set_max_proposals_per_class(nms_config.max_proposals_per_class);
         nms_config_proto->set_number_of_classes(nms_config.number_of_classes);
         nms_config_proto->set_background_removal(nms_config.background_removal);
         nms_config_proto->set_background_removal_index(nms_config.background_removal_index);
-        nms_config_proto->set_cross_classes(nms_config.cross_classes);
         nms_config_proto->set_bbox_only(nms_config.bbox_only);
     }
 
@@ -1775,7 +1767,6 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_all_stream_infos(grpc
             auto proto_nms_info = proto_stream_info.mutable_nms_info();
             proto_nms_info->set_number_of_classes(stream_info.nms_info.number_of_classes);
             proto_nms_info->set_max_bboxes_per_class(stream_info.nms_info.max_bboxes_per_class);
-            proto_nms_info->set_order_type(HAILO_NMS_RESULT_ORDER_HW);
             proto_nms_info->set_bbox_size(stream_info.nms_info.bbox_size);
             proto_nms_info->set_chunks_per_frame(stream_info.nms_info.chunks_per_frame);
             proto_nms_info->set_is_defused(stream_info.nms_info.is_defused);
@@ -1872,25 +1863,25 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_sorted_output_names(g
     return grpc::Status::OK;
 }
 
-Expected<size_t> HailoRtRpcService::get_min_buffer_pool_size(uint32_t ng_handle)
+Expected<size_t> HailoRtRpcService::infer_queue_size(uint32_t ng_handle)
 {
     auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng) {
-        return cng->get_min_buffer_pool_size();
+        return cng->infer_queue_size();
     };
     auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    TRY(auto min_buffer_pool_size, manager.execute<Expected<size_t>>(ng_handle, lambda));
+    TRY(auto queue_size, manager.execute<Expected<size_t>>(ng_handle, lambda));
 
-    return min_buffer_pool_size;
+    return queue_size;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
-    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply)
+grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_infer_queue_size(grpc::ServerContext*,
+    const ConfiguredNetworkGroup_infer_queue_size_Request *request,
+    ConfiguredNetworkGroup_infer_queue_size_Reply *reply)
 {
-    auto min_buffer_pool_size_expected = get_min_buffer_pool_size(request->identifier().network_group_handle());
-    CHECK_EXPECTED_AS_RPC_STATUS(min_buffer_pool_size_expected, reply);
+    auto queue_size_expected = infer_queue_size(request->identifier().network_group_handle());
+    CHECK_EXPECTED_AS_RPC_STATUS(queue_size_expected, reply);
 
-    reply->set_min_buffer_pool_size(static_cast<uint32_t>(min_buffer_pool_size_expected.release()));
+    reply->set_infer_queue_size(static_cast<uint32_t>(queue_size_expected.release()));
     reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
     return grpc::Status::OK;
 }
@@ -1996,21 +1987,6 @@ grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_bboxes_total(
     return grpc::Status::OK;
 }
 
-grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_result_order_type(grpc::ServerContext*,
-    const ConfiguredNetworkGroup_set_nms_result_order_type_Request *request,
-    ConfiguredNetworkGroup_set_nms_result_order_type_Reply *reply)
-{
-    auto lambda = [](std::shared_ptr<ConfiguredNetworkGroup> cng, const std::string &edge_name, hailo_nms_result_order_type_t order_type) {
-        return cng->set_nms_result_order_type(edge_name, order_type);
-    };
-    auto &manager = ServiceResourceManager<ConfiguredNetworkGroup>::get_instance();
-    auto status = manager.execute(request->identifier().network_group_handle(), lambda,
-                                    request->edge_name(), static_cast<hailo_nms_result_order_type_t>(request->nms_result_order_type()));
-    CHECK_SUCCESS_AS_RPC_STATUS(status, reply);
-    reply->set_status(static_cast<uint32_t>(HAILO_SUCCESS));
-    return grpc::Status::OK;
-}
-
 grpc::Status HailoRtRpcService::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
     const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
     ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply)
diff --git a/hailort/hailort_service/hailort_rpc_service.hpp b/hailort/hailort_service/hailort_rpc_service.hpp
index 50f122e..6d75011 100644
--- a/hailort/hailort_service/hailort_rpc_service.hpp
+++ b/hailort/hailort_service/hailort_rpc_service.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -190,9 +190,9 @@ public:
     virtual grpc::Status ConfiguredNetworkGroup_get_sorted_output_names(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_sorted_output_names_Request *request,
         ConfiguredNetworkGroup_get_sorted_output_names_Reply *reply) override;
-    virtual grpc::Status ConfiguredNetworkGroup_get_min_buffer_pool_size(grpc::ServerContext*,
-        const ConfiguredNetworkGroup_get_min_buffer_pool_size_Request *request,
-        ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply *reply) override;
+    virtual grpc::Status ConfiguredNetworkGroup_infer_queue_size(grpc::ServerContext*,
+        const ConfiguredNetworkGroup_infer_queue_size_Request *request,
+        ConfiguredNetworkGroup_infer_queue_size_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_get_layer_info(grpc::ServerContext*,
         const ConfiguredNetworkGroup_get_layer_info_Request *request,
         ConfiguredNetworkGroup_get_layer_info_Reply *reply) override;
@@ -211,9 +211,6 @@ public:
     virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_bboxes_total(grpc::ServerContext*,
         const ConfiguredNetworkGroup_set_nms_max_bboxes_total_Request *request,
         ConfiguredNetworkGroup_set_nms_max_bboxes_total_Reply *reply) override;
-    virtual grpc::Status ConfiguredNetworkGroup_set_nms_result_order_type(grpc::ServerContext*,
-        const ConfiguredNetworkGroup_set_nms_result_order_type_Request *request,
-        ConfiguredNetworkGroup_set_nms_result_order_type_Reply *reply) override;
     virtual grpc::Status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(grpc::ServerContext*,
         const ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request *request,
         ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply *reply) override;
@@ -232,11 +229,11 @@ private:
     hailo_status flush_input_vstream(uint32_t handle);
     hailo_status abort_input_vstream(uint32_t handle);
     hailo_status abort_output_vstream(uint32_t handle);
-    void abort_vstreams_by_pids(std::set<uint32_t> &pids);
-    void release_configured_network_groups_by_pid(uint32_t client_pid);
+    void abort_vstreams_by_ids(std::set<uint32_t> &pids);
+    void release_configured_network_groups_by_id(uint32_t client_pid);
     void remove_disconnected_clients();
     void update_client_id_timestamp(uint32_t pid);
-    Expected<size_t> get_min_buffer_pool_size(uint32_t ng_handle);
+    Expected<size_t> infer_queue_size(uint32_t ng_handle);
     Expected<std::vector<hailo_stream_info_t>> get_all_stream_infos(uint32_t ng_handle);
     Expected<std::vector<hailo_vstream_info_t>> get_all_vstream_infos(uint32_t ng_handle);
     Expected<std::string> output_vstream_name(uint32_t vstream_handle);
@@ -255,9 +252,9 @@ private:
     Expected<BufferPtr> acquire_buffer_from_cng_pool(uint32_t ng_handle, const std::string &output_name);
     Expected<size_t> output_vstream_frame_size(uint32_t vstream_handle);
     hailo_status update_buffer_size_in_pool(uint32_t vstream_handle, uint32_t network_group_handle);
-    void shutdown_configured_network_groups_by_pids(std::set<uint32_t> &pids);
-    void shutdown_buffer_pool_by_pids(std::set<uint32_t> &pids);
-    void shutdown_vdevice_cb_queue_by_pids(std::set<uint32_t> &pids);
+    void shutdown_configured_network_groups_by_ids(std::set<uint32_t> &pids);
+    void shutdown_buffer_pool_by_ids(std::set<uint32_t> &pids);
+    void shutdown_vdevice_cb_queue_by_ids(std::set<uint32_t> &pids);
     hailo_status shutdown_cng_buffer_pool(uint32_t network_group_handle);
     hailo_status shutdown_vdevice_cb_queue(uint32_t vdevice_handle);
     hailo_status shutdown_configured_network_group(uint32_t vdevice_handle);
diff --git a/hailort/hailort_service/service_resource_manager.hpp b/hailort/hailort_service/service_resource_manager.hpp
index 8d8d962..f9cbd88 100644
--- a/hailort/hailort_service/service_resource_manager.hpp
+++ b/hailort/hailort_service/service_resource_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -19,33 +19,25 @@
 #include <shared_mutex>
 #include <unordered_set>
 
-#define SINGLE_CLIENT_PID (0)
-
 namespace hailort
 {
 
 template<class T>
 struct Resource {
-    Resource(uint32_t pid, std::shared_ptr<T> resource)
+    Resource(uint32_t id, std::shared_ptr<T> resource)
         : resource(std::move(resource))
     {
-        pids.insert(pid);
+        ids.insert(id);
     }
 
     std::shared_ptr<T> resource;
-    std::unordered_set<uint32_t> pids;
+    std::unordered_set<uint32_t> ids;
 };
 
 template<class T>
-class ServiceResourceManager
+class BaseResourceManager
 {
 public:
-    static ServiceResourceManager& get_instance()
-    {
-        static ServiceResourceManager instance;
-        return instance;
-    }
-
     template<class K, class Func, typename... Args>
     K execute(uint32_t handle, Func &lambda, Args... args)
     {
@@ -55,7 +47,6 @@ public:
         std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
         lock.unlock();
         auto ret = lambda(resource->resource, args...);
-
         return ret;
     }
 
@@ -68,16 +59,15 @@ public:
         std::shared_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
         lock.unlock();
         auto ret = lambda(resource->resource, args...);
-
         return ret;
     }
 
-    uint32_t register_resource(uint32_t pid, const std::shared_ptr<T> &resource)
+    uint32_t register_resource(uint32_t id, const std::shared_ptr<T> &resource)
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         auto index = m_current_handle_index.load();
         // Create a new resource and register
-        m_resources.emplace(m_current_handle_index, std::make_shared<Resource<T>>(pid, std::move(resource)));
+        m_resources.emplace(m_current_handle_index, std::make_shared<Resource<T>>(id, std::move(resource)));
         m_resources_mutexes[m_current_handle_index]; // construct std::shared_timed_mutex
         m_current_handle_index++;
         return index;
@@ -90,25 +80,25 @@ public:
         m_current_handle_index++;
     }
 
-    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t pid)
+    Expected<uint32_t> dup_handle(uint32_t handle, uint32_t id)
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         TRY(auto resource, resource_lookup(handle));
         assert(contains(m_resources_mutexes, handle));
         std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-        resource->pids.insert(pid);
+        resource->ids.insert(id);
 
         return Expected<uint32_t>(handle);
     }
 
-    std::shared_ptr<T> release_resource(uint32_t handle, uint32_t pid)
+    std::shared_ptr<T> release_resource(uint32_t handle, uint32_t id)
     {
         std::shared_ptr<T> res = nullptr;
         std::unique_lock<std::mutex> lock(m_mutex);
         auto found = m_resources.find(handle);
         if (found == m_resources.end()) {
-            LOGGER__INFO("Failed to release resource with handle {} and PID {}. The resource no longer exists or may have already been released",
-                handle, pid);
+            LOGGER__INFO("Failed to release resource with handle {} and ID {}. The resource no longer exists or may have already been released",
+                handle, id);
             return res;
         }
 
@@ -117,8 +107,8 @@ public:
         bool release_resource = false;
         {
             std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-            resource->pids.erase(pid);
-            if ((SINGLE_CLIENT_PID == pid) || all_pids_dead(resource)) {
+            resource->ids.erase(id);
+            if (should_resource_be_released(resource)) {
                 release_resource = true;
                 res = resource->resource;
                 m_resources.erase(handle);
@@ -130,19 +120,19 @@ public:
         return res;
     }
 
-    std::vector<std::shared_ptr<T>> release_by_pid(uint32_t pid)
+    std::vector<std::shared_ptr<T>> release_by_id(uint32_t id)
     {
         std::vector<std::shared_ptr<T>> res;
         std::unique_lock<std::mutex> lock(m_mutex);
         for (auto iter = m_resources.begin(); iter != m_resources.end(); ) {
             auto handle = iter->first;
             bool release_resource = false;
-            if (contains(iter->second->pids, pid)) {
+            if (contains(iter->second->ids, id)) {
                 assert(contains(m_resources_mutexes, handle));
                 {
                     std::unique_lock<std::shared_timed_mutex> resource_lock(m_resources_mutexes[handle]);
-                    iter->second->pids.erase(pid);
-                    if (iter->second->pids.empty()) {
+                    iter->second->ids.erase(id);
+                    if (iter->second->ids.empty()) {
                         release_resource = true;
                         res.push_back(iter->second->resource);
                         iter = m_resources.erase(iter);
@@ -159,13 +149,13 @@ public:
         return res;
     }
 
-    std::vector<uint32_t> resources_handles_by_pids(std::set<uint32_t> &pids)
+    std::vector<uint32_t> resources_handles_by_ids(std::set<uint32_t> &ids)
     {
         std::unique_lock<std::mutex> lock(m_mutex);
         std::vector<uint32_t> resources_handles;
         for (auto &handle_resource_pair : m_resources) {
-            for (auto &pid : pids) {
-                if (contains(handle_resource_pair.second->pids, pid)) {
+            for (auto &id : ids) {
+                if (contains(handle_resource_pair.second->ids, id)) {
                     resources_handles.emplace_back(handle_resource_pair.first);
                 }
             }
@@ -173,11 +163,14 @@ public:
         return resources_handles;
     }
 
-private:
-    ServiceResourceManager()
+protected:
+    BaseResourceManager()
         : m_current_handle_index(0)
     {}
 
+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>> resource) = 0;
+
+private:
     Expected<std::shared_ptr<Resource<T>>> resource_lookup(uint32_t handle)
     {
         auto found = m_resources.find(handle);
@@ -186,22 +179,57 @@ private:
         return resource;
     }
 
-    bool all_pids_dead(std::shared_ptr<Resource<T>> resource)
-    {
-        for (auto &pid : resource->pids) {
-            if (OsUtils::is_pid_alive(pid)) {
-                return false;
-            }
-        }
-        return true;
-    }
-
     std::mutex m_mutex;
     std::atomic<uint32_t> m_current_handle_index;
     std::unordered_map<uint32_t, std::shared_ptr<Resource<T>>> m_resources;
     std::unordered_map<uint32_t, std::shared_timed_mutex> m_resources_mutexes;
 };
 
+template<class T>
+class ServiceResourceManager : public BaseResourceManager<T>
+{
+public:
+    static ServiceResourceManager& get_instance()
+    {
+        static ServiceResourceManager instance;
+        return instance;
+    }
+
+protected:
+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>> resource) override
+    {
+        for (auto &id : resource->ids) {
+            if (OsUtils::is_pid_alive(id)) {
+                return false;
+            }
+        }
+        return true;
+    }
+
+private:
+    ServiceResourceManager() = default;
+};
+
+template<class T>
+class ServerResourceManager : public BaseResourceManager<T>
+{
+public:
+    static ServerResourceManager& get_instance()
+    {
+        static ServerResourceManager instance;
+        return instance;
+    }
+
+protected:
+    virtual bool should_resource_be_released(std::shared_ptr<Resource<T>>) override
+    {
+        return true;
+    }
+
+private:
+    ServerResourceManager() = default;
+};
+
 }
 
 #endif /* HAILO_SERVICE_RESOURCE_MANAGER_HPP_ */
diff --git a/hailort/hailort_service/unix/hailort_service.cpp b/hailort/hailort_service/unix/hailort_service.cpp
index d009e78..c26be14 100644
--- a/hailort/hailort_service/unix/hailort_service.cpp
+++ b/hailort/hailort_service/unix/hailort_service.cpp
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- *
+ **/
+/**
  * @file hailort_service.cpp
  * @brief main for hailort service 
  * To run without daemonization run the hailort_service executable with `standalone`.
@@ -32,9 +33,29 @@
 
 using namespace hailort;
 
+bool is_default_service_address(const std::string server_address)
+{
+    return HAILORT_SERVICE_DEFAULT_ADDR == server_address;
+}
+
+bool socket_file_exists_and_unremovable()
+{
+    // Will return false in case we failed to remove the file for a reason other than "file doesn't exist"
+    return ((unlink(HAILO_DEFAULT_SERVICE_ADDR.c_str()) != 0) && (errno != ENOENT));
+}
+
 void RunService()
 {
     const std::string server_address = HAILORT_SERVICE_ADDRESS;
+
+    // If the socket file already exists and cannot be removed due to insufficient permissions,
+    // we should fail early to prevent grpc::BuildAndStart() from causing a segmentation fault.
+    if (is_default_service_address(server_address) && socket_file_exists_and_unremovable()) {
+        LOGGER__CRITICAL("Failed to remove existing socket file {}. This might indicate insufficient permissions for this operation.",
+            HAILO_DEFAULT_SERVICE_ADDR);
+        return;
+    }
+
     HailoRtRpcService service;
     grpc::ServerBuilder builder;
     builder.AddListeningPort(server_address, grpc::InsecureServerCredentials());
diff --git a/hailort/hailort_service/vdevice_callbacks_queue.hpp b/hailort/hailort_service/vdevice_callbacks_queue.hpp
index 41c3f21..0431042 100644
--- a/hailort/hailort_service/vdevice_callbacks_queue.hpp
+++ b/hailort/hailort_service/vdevice_callbacks_queue.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file vdevice_callbacks_queue.hpp
  * @brief Queue used for the callbacks in infer async over service.
diff --git a/hailort/hailort_service/windows/hailort_service.cpp b/hailort/hailort_service/windows/hailort_service.cpp
index ded5387..abb9e2f 100644
--- a/hailort/hailort_service/windows/hailort_service.cpp
+++ b/hailort/hailort_service/windows/hailort_service.cpp
@@ -1,7 +1,8 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- *
+ **/
+/**
  * @file hailort_service.cpp
  * @brief main for hailort service 
  * The service code is based on Microsoft's documenataion: https://learn.microsoft.com/en-us/windows/win32/services/the-complete-service-sample
@@ -24,7 +25,7 @@
  *
  *       5) Delete service:
  *           `sc delete hailort_service`
-*/
+ */
 
 #include "hailort_rpc_service.hpp"
 #include "rpc/rpc_definitions.hpp"
diff --git a/hailort/hailortcli/CMakeLists.txt b/hailort/hailortcli/CMakeLists.txt
index c549af7..8b3c0cf 100644
--- a/hailort/hailortcli/CMakeLists.txt
+++ b/hailort/hailortcli/CMakeLists.txt
@@ -24,6 +24,7 @@ set(HAILORTCLI_CPP_FILES
     common.cpp
     benchmark_command.cpp
     parse_hef_command.cpp
+    memory_requirements_command.cpp
     graph_printer.cpp
     mon_command.cpp
 
diff --git a/hailort/hailortcli/benchmark_command.cpp b/hailort/hailortcli/benchmark_command.cpp
index 82a2a66..27bb47f 100644
--- a/hailort/hailortcli/benchmark_command.cpp
+++ b/hailort/hailortcli/benchmark_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/benchmark_command.hpp b/hailort/hailortcli/benchmark_command.hpp
index 350cf12..ba4e218 100644
--- a/hailort/hailortcli/benchmark_command.hpp
+++ b/hailort/hailortcli/benchmark_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/board_config_command.cpp b/hailort/hailortcli/board_config_command.cpp
index ee9fc34..2940963 100644
--- a/hailort/hailortcli/board_config_command.cpp
+++ b/hailort/hailortcli/board_config_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/board_config_command.hpp b/hailort/hailortcli/board_config_command.hpp
index 8f8231b..9c6bdf5 100644
--- a/hailort/hailortcli/board_config_command.hpp
+++ b/hailort/hailortcli/board_config_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/buffer_utils.hpp b/hailort/hailortcli/buffer_utils.hpp
index 55ffe9f..197b470 100644
--- a/hailort/hailortcli/buffer_utils.hpp
+++ b/hailort/hailortcli/buffer_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/command.cpp b/hailort/hailortcli/command.cpp
index 96c45eb..6b8dec9 100644
--- a/hailort/hailortcli/command.cpp
+++ b/hailort/hailortcli/command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/command.hpp b/hailort/hailortcli/command.hpp
index 70cfa54..f5ccde5 100644
--- a/hailort/hailortcli/command.hpp
+++ b/hailort/hailortcli/command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/common.cpp b/hailort/hailortcli/common.cpp
index d69fff1..2754f54 100644
--- a/hailort/hailortcli/common.cpp
+++ b/hailort/hailortcli/common.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -76,7 +76,7 @@ void CliCommon::reset_cursor(size_t lines_count)
 
 void CliCommon::clear_terminal()
 {
-    std::cout << FORMAT_CLEAR_TERMINAL_CURSOR_FIRST_LINE;
+    std::cout << FORMAT_CLEAR_TERMINAL_CURSOR_FIRST_LINE << std::flush;
 }
 
 bool CliCommon::is_positive_number(const std::string &s)
diff --git a/hailort/hailortcli/common.hpp b/hailort/hailortcli/common.hpp
index c549c68..58af0ba 100644
--- a/hailort/hailortcli/common.hpp
+++ b/hailort/hailortcli/common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -23,6 +23,7 @@ using namespace hailort;
 #define FORMAT_CLEAR_LINE "\033[2K\r"
 #define FORMAT_CURSOR_UP_LINE "\033[F"
 #define FORMAT_CLEAR_TERMINAL_CURSOR_FIRST_LINE "\033[2J\033[1;1H"
+#define FORMAT_RESET_TERMINAL_CURSOR_FIRST_LINE "\033[H\033[J"
 #define FORMAT_ENTER_ALTERNATIVE_SCREEN "\033[?1049h"
 #define FORMAT_EXIT_ALTERNATIVE_SCREEN "\033[?1049l"
 #define FORMAT_GREEN_PRINT "\x1B[1;32m"
diff --git a/hailort/hailortcli/download_action_list_command.cpp b/hailort/hailortcli/download_action_list_command.cpp
index 6b04a53..e4714e2 100644
--- a/hailort/hailortcli/download_action_list_command.cpp
+++ b/hailort/hailortcli/download_action_list_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/download_action_list_command.hpp b/hailort/hailortcli/download_action_list_command.hpp
index 743331c..b339102 100644
--- a/hailort/hailortcli/download_action_list_command.hpp
+++ b/hailort/hailortcli/download_action_list_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_config_command.cpp b/hailort/hailortcli/fw_config_command.cpp
index 68d0f81..ffa66df 100644
--- a/hailort/hailortcli/fw_config_command.cpp
+++ b/hailort/hailortcli/fw_config_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_config_command.hpp b/hailort/hailortcli/fw_config_command.hpp
index 23abeb1..b9fb9f5 100644
--- a/hailort/hailortcli/fw_config_command.hpp
+++ b/hailort/hailortcli/fw_config_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_config_serializer.cpp b/hailort/hailortcli/fw_config_serializer.cpp
index 4d421a8..a8ff87c 100644
--- a/hailort/hailortcli/fw_config_serializer.cpp
+++ b/hailort/hailortcli/fw_config_serializer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_config_serializer.hpp b/hailort/hailortcli/fw_config_serializer.hpp
index 4e3ecba..53c37a3 100644
--- a/hailort/hailortcli/fw_config_serializer.hpp
+++ b/hailort/hailortcli/fw_config_serializer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_control_command.cpp b/hailort/hailortcli/fw_control_command.cpp
index cd86f4e..a6ed9de 100644
--- a/hailort/hailortcli/fw_control_command.cpp
+++ b/hailort/hailortcli/fw_control_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -13,6 +13,7 @@
 
 
 static const char *NOT_CONFIGURED_ATTR = "<N/A>";
+static const uint8_t INVALID_LCS = 0;
 #define MHz (1000 * 1000)
 
 
@@ -54,13 +55,13 @@ static std::string extended_device_information_supported_features(hailo_device_s
     return supported_features_str;
 }
 
-static void extended_device_information_print_array(uint8_t *array_for_print, size_t array_length, std::string splitter)
+static void extended_device_information_print_array(const uint8_t *array_for_print, size_t array_length, std::string splitter)
 {
     const bool UPPERCASE = true;
     std::cout << StringUtils::to_hex_string(array_for_print, array_length, UPPERCASE, splitter) << std::endl;
 }
 
-static bool extended_device_information_is_array_not_empty(uint8_t *array_for_print, size_t array_length)
+static bool extended_device_information_is_array_not_empty(const uint8_t *array_for_print, size_t array_length)
 {
     uint32_t i = 0;
     for(i = 0; i < array_length; i++) {
@@ -71,11 +72,16 @@ static bool extended_device_information_is_array_not_empty(uint8_t *array_for_pr
     return false;
 }
 
-static hailo_status print_extended_device_information(Device &device)
+static std::string lcs_string(uint8_t lcs)
 {
-    TRY(auto device_info, device.get_extended_device_information());
+    if (INVALID_LCS == lcs) {
+        return NOT_CONFIGURED_ATTR;
+    }
+    return std::to_string(lcs);
+}
 
-    // Print Board Extended information
+static void print_extended_device_information(const hailo_extended_device_information_t &device_info)
+{
     std::cout << "Boot source: " << extended_device_information_boot_string(device_info.boot_source) << std::endl;
     std::cout << "Neural Network Core Clock Rate: " << (device_info.neural_network_core_clock_rate/MHz) <<"MHz" <<std::endl;
 
@@ -83,7 +89,7 @@ static hailo_status print_extended_device_information(Device &device)
     if(supported_features_str.length() > 0) {
         std::cout << "Device supported features: " << supported_features_str << std::endl;
     }
-    std::cout << "LCS: " << static_cast<int>(device_info.lcs) << std::endl;
+    std::cout << "LCS: " << lcs_string(device_info.lcs) << std::endl;
 
     if(extended_device_information_is_array_not_empty(device_info.soc_id, sizeof(device_info.soc_id))){
         std::cout << "SoC ID: ";
@@ -105,7 +111,9 @@ static hailo_status print_extended_device_information(Device &device)
         extended_device_information_print_array(device_info.soc_pm_values, sizeof(device_info.soc_pm_values), "");
     }
 
-    return HAILO_SUCCESS;
+    if (device_info.gpio_mask != 0) {
+        std::cout << "GPIO Mask: " << std::setfill('0') << std::setw(4) << std::hex << device_info.gpio_mask << std::dec << std::endl;
+    }
 }
 
 static std::string fw_version_string(const hailo_device_identity_t &identity)
@@ -184,7 +192,8 @@ hailo_status FwControlIdentifyCommand::execute_on_device(Device &device)
         identity_attr_string(identity.product_name, identity.product_name_length) << std::endl;
 
     if (m_is_extended) {
-        print_extended_device_information(device);
+        TRY(auto device_info, device.get_extended_device_information());
+        print_extended_device_information(device_info);
     }
 
     std::cout << std::endl;
diff --git a/hailort/hailortcli/fw_control_command.hpp b/hailort/hailortcli/fw_control_command.hpp
index e7cdb90..f5fb372 100644
--- a/hailort/hailortcli/fw_control_command.hpp
+++ b/hailort/hailortcli/fw_control_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_logger_command.cpp b/hailort/hailortcli/fw_logger_command.cpp
index 2d23ff8..102cc20 100644
--- a/hailort/hailortcli/fw_logger_command.cpp
+++ b/hailort/hailortcli/fw_logger_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_logger_command.hpp b/hailort/hailortcli/fw_logger_command.hpp
index bb5568a..03d3a7e 100644
--- a/hailort/hailortcli/fw_logger_command.hpp
+++ b/hailort/hailortcli/fw_logger_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_update_command.cpp b/hailort/hailortcli/fw_update_command.cpp
index b37883d..d3a899f 100644
--- a/hailort/hailortcli/fw_update_command.cpp
+++ b/hailort/hailortcli/fw_update_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/fw_update_command.hpp b/hailort/hailortcli/fw_update_command.hpp
index f099407..8e72b79 100644
--- a/hailort/hailortcli/fw_update_command.hpp
+++ b/hailort/hailortcli/fw_update_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/graph_printer.cpp b/hailort/hailortcli/graph_printer.cpp
index 2975886..77df7b4 100644
--- a/hailort/hailortcli/graph_printer.cpp
+++ b/hailort/hailortcli/graph_printer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/graph_printer.hpp b/hailort/hailortcli/graph_printer.hpp
index f355a53..028e217 100644
--- a/hailort/hailortcli/graph_printer.hpp
+++ b/hailort/hailortcli/graph_printer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/hailortcli.cpp b/hailort/hailortcli/hailortcli.cpp
index a6463db..d731dc1 100644
--- a/hailort/hailortcli/hailortcli.cpp
+++ b/hailort/hailortcli/hailortcli.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -25,6 +25,7 @@
 #include "udp_rate_limiter_command.hpp"
 #endif
 #include "parse_hef_command.hpp"
+#include "memory_requirements_command.hpp"
 #include "fw_control_command.hpp"
 #include "measure_nnc_performance_command.hpp"
 
@@ -201,6 +202,7 @@ public:
         add_subcommand<HwInferEstimatorCommand>(OptionVisibility::HIDDEN);
 #endif
         add_subcommand<ParseHefCommand>();
+        add_subcommand<MemoryRequirementsCommand>(OptionVisibility::HIDDEN);
         add_subcommand<FwControlCommand>();
     }
 
diff --git a/hailort/hailortcli/hailortcli.hpp b/hailort/hailortcli/hailortcli.hpp
index b9325fa..f86fd54 100644
--- a/hailort/hailortcli/hailortcli.hpp
+++ b/hailort/hailortcli/hailortcli.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/infer_stats_printer.cpp b/hailort/hailortcli/infer_stats_printer.cpp
index 546c8cf..5544db8 100644
--- a/hailort/hailortcli/infer_stats_printer.cpp
+++ b/hailort/hailortcli/infer_stats_printer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/infer_stats_printer.hpp b/hailort/hailortcli/infer_stats_printer.hpp
index 0d28c4e..e22e02e 100644
--- a/hailort/hailortcli/infer_stats_printer.hpp
+++ b/hailort/hailortcli/infer_stats_printer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/inference_progress.cpp b/hailort/hailortcli/inference_progress.cpp
index f514ee0..a7ed40d 100644
--- a/hailort/hailortcli/inference_progress.cpp
+++ b/hailort/hailortcli/inference_progress.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/inference_progress.hpp b/hailort/hailortcli/inference_progress.hpp
index 0aaff6a..0a20597 100644
--- a/hailort/hailortcli/inference_progress.hpp
+++ b/hailort/hailortcli/inference_progress.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/inference_result.hpp b/hailort/hailortcli/inference_result.hpp
index 97c6c25..b7d6f24 100644
--- a/hailort/hailortcli/inference_result.hpp
+++ b/hailort/hailortcli/inference_result.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/measure_nnc_performance_command.cpp b/hailort/hailortcli/measure_nnc_performance_command.cpp
index 9db4855..0841c3e 100644
--- a/hailort/hailortcli/measure_nnc_performance_command.cpp
+++ b/hailort/hailortcli/measure_nnc_performance_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -80,10 +80,9 @@ hailo_status HwInferEstimatorCommand::execute()
     TRY(auto configure_params, get_configure_params(m_params, hef, interface));
 
     /* Use Env var to configure all desc list with max depth */
-    setenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR,"Y",1);
+    setenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR,"1",1);
     TRY(auto network_group_list,
         device->configure(hef, configure_params), "Failed configure device from hef");
-    unsetenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR);
 
     CHECK(1 == network_group_list.size(), HAILO_INVALID_OPERATION,
         "HW Inference is not supported on HEFs with multiple network groups");
@@ -108,5 +107,7 @@ hailo_status HwInferEstimatorCommand::execute()
     std::cout << "======================" << std::endl;
     std::cout << "    End of report" << std::endl;
     std::cout << "======================" << std::endl;
+
+    unsetenv(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR);
     return HAILO_SUCCESS;
 }
diff --git a/hailort/hailortcli/measure_nnc_performance_command.hpp b/hailort/hailortcli/measure_nnc_performance_command.hpp
index 8bcbadb..7d1fd75 100644
--- a/hailort/hailortcli/measure_nnc_performance_command.hpp
+++ b/hailort/hailortcli/measure_nnc_performance_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/memory_requirements_command.cpp b/hailort/hailortcli/memory_requirements_command.cpp
new file mode 100644
index 0000000..c3a422a
--- /dev/null
+++ b/hailort/hailortcli/memory_requirements_command.cpp
@@ -0,0 +1,173 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file memory_requirements_command.cpp
+ **/
+#include "memory_requirements_command.hpp"
+#include "common/filesystem.hpp"
+#include "hef/memory_requirements_calculator.hpp"
+
+
+using MemoryRequirementsNetworkParams = MemoryRequirementsCalculator::HefParams;
+
+class MemoryRequirementsNetworkApp : public CLI::App {
+public:
+    MemoryRequirementsNetworkApp();
+    MemoryRequirementsCalculator::HefParams get_params() const { return m_params; }
+
+private:
+    MemoryRequirementsCalculator::HefParams m_params;
+};
+
+class MemoryRequirementsApp : public CLI::App {
+public:
+    MemoryRequirementsApp();
+    auto get_network_params() const { return m_network_params; }
+
+private:
+    void add_net_subcom();
+
+    std::vector<MemoryRequirementsCalculator::HefParams> m_network_params;
+};
+
+MemoryRequirementsNetworkApp::MemoryRequirementsNetworkApp() :
+    CLI::App("Set network", "set-net")
+{
+    auto hef_path_option = add_option("hef", m_params.hef_path, "HEF file path")->check(CLI::ExistingFile);
+    add_option("--name", m_params.network_group_name, "Network group name")
+        ->default_val("")
+        ->needs(hef_path_option);
+    add_option("--batch-size", m_params.batch_size, "Batch size")->default_val(HAILO_DEFAULT_BATCH_SIZE);
+}
+
+MemoryRequirementsApp::MemoryRequirementsApp() :
+    CLI::App("Shows memory requirements for running models", "mem-req")
+{
+    add_net_subcom();
+}
+
+void MemoryRequirementsApp::add_net_subcom()
+{
+    auto net_app = std::make_shared<MemoryRequirementsNetworkApp>();
+    net_app->immediate_callback();
+    net_app->callback([this, net_app_weak=std::weak_ptr<MemoryRequirementsNetworkApp>(net_app)]() {
+        auto net_app = net_app_weak.lock();
+        if (!net_app) { return; }
+        m_network_params.push_back(net_app->get_params());
+
+        // Throw an error if anything is left over and should not be.
+        _process_extras();
+
+        remove_subcommand(net_app.get());
+        // Remove from parsed_subcommands_ as well (probably a bug in CLI11)
+        parsed_subcommands_.erase(std::remove_if(
+            parsed_subcommands_.begin(), parsed_subcommands_.end(),
+            [net_app](auto x){return x == net_app.get();}),
+            parsed_subcommands_.end());
+        add_net_subcom();
+    });
+    add_subcommand(net_app);
+}
+
+MemoryRequirementsCommand::MemoryRequirementsCommand(CLI::App &parent_app) :
+    Command(parent_app.add_subcommand(std::make_shared<MemoryRequirementsApp>()))
+{}
+
+static std::string pretty_byte_size_print(size_t size_bytes)
+{
+    auto size_bytes_d = static_cast<double>(size_bytes);
+    auto ordered_sizes = {"B", "KB", "MB"};
+    for (const auto &size_str : ordered_sizes) {
+        if (size_bytes_d < 1024) {
+            return fmt::format("{:.3} {}", size_bytes_d, size_str);
+        }
+        size_bytes_d /= 1024;
+    }
+
+    return fmt::format("{:.3} GB", size_bytes_d);
+}
+
+static std::string get_name(const MemoryRequirementsNetworkParams &net_params)
+{
+    auto name = Filesystem::basename(net_params.hef_path);
+    if (!net_params.network_group_name.empty()) {
+        name = fmt::format("{}:{}", name, net_params.network_group_name);
+    }
+    return name;
+}
+
+static std::string repeat(const std::string &str, size_t count)
+{
+    std::string result;
+    for (size_t i = 0; i < count; i++) {
+        result += str;
+    }
+    return result;
+}
+
+hailo_status MemoryRequirementsCommand::execute()
+{
+    auto &app = dynamic_cast<MemoryRequirementsApp&>(*m_app);
+    CHECK(0 < app.get_network_params().size(), HAILO_INVALID_OPERATION, "Nothing to run");
+
+    std::cout << "Parsing Hefs, calculating memory requirements...\n";
+    const auto params = app.get_network_params();
+    TRY(auto requirements, MemoryRequirementsCalculator::get_memory_requirements(params));
+    std::cout << "Parsing Hefs, calculating memory requirements... DONE\n";
+
+    const size_t model_name_size = 50;
+    const size_t element_size = 10;
+    const auto elements_count = 3; // CMA, CMA-Desc, Pinned
+
+    const size_t per_type_size = element_size * elements_count + (elements_count - 1); // Size includes (elements_count - 1) delimiters
+    const auto memory_types_count = 3; // Config, Intermediate, Total
+
+    const auto header_seperator = "+" + repeat("-", model_name_size) + "+" +
+        repeat(repeat("-", per_type_size) + "+", memory_types_count);
+    const auto header_first_format = fmt::format("|{}|", repeat(" ", model_name_size)) +
+        repeat(fmt::format("{{:^{}}}|", per_type_size), memory_types_count);
+    const auto header_second_format = fmt::format("|{{:^{}}}|", model_name_size) +
+        repeat(repeat("-", per_type_size) + "+", memory_types_count);
+
+    const auto body_seperator = "+" + repeat("-", model_name_size) + "+" +
+        repeat(repeat("-", element_size) + "+", elements_count * memory_types_count);
+    const auto body_table_format = fmt::format("|{{:^{}}}|", model_name_size) +
+        repeat(fmt::format("{{:^{}}}|", element_size), elements_count * memory_types_count);
+
+    const auto print_row = [=](const std::string &name, const MemoryRequirements &req) {
+        auto total = EdgeTypeMemoryRequirements{
+            req.config_buffers.cma_memory + req.intermediate_buffers.cma_memory,
+            req.config_buffers.cma_memory_for_descriptors + req.intermediate_buffers.cma_memory_for_descriptors,
+            req.config_buffers.pinned_memory + req.intermediate_buffers.pinned_memory
+        };
+        std::cout << fmt::format(body_table_format, name,
+            pretty_byte_size_print(req.config_buffers.cma_memory),
+            pretty_byte_size_print(req.config_buffers.cma_memory_for_descriptors),
+            pretty_byte_size_print(req.config_buffers.pinned_memory),
+            pretty_byte_size_print(req.intermediate_buffers.cma_memory),
+            pretty_byte_size_print(req.intermediate_buffers.cma_memory_for_descriptors),
+            pretty_byte_size_print(req.intermediate_buffers.pinned_memory),
+            pretty_byte_size_print(total.cma_memory),
+            pretty_byte_size_print(total.cma_memory_for_descriptors),
+            pretty_byte_size_print(total.pinned_memory)) << "\n";
+    };
+
+    std::cout << "Memory Requirements:\n";
+    std::cout << header_seperator << "\n";
+    std::cout << fmt::format(header_first_format, "Weights", "Inter", "Total") << "\n";
+    std::cout << fmt::format(header_second_format, "Model") << "\n";
+
+    std::cout << fmt::format(body_table_format, "", "CMA", "CMA-Desc", "Pinned",
+        "CMA", "CMA-Desc", "Pinned", "CMA", "CMA-Desc", "Pinned")  << "\n";
+    std::cout << body_seperator << "\n";
+    for (size_t i = 0; i < params.size(); i++) {
+        print_row(get_name(params[i]), requirements.hefs_memory_requirements[i]);
+    }
+    std::cout << body_seperator << "\n";
+    print_row("Total", requirements.total_memory_requirements);
+    std::cout << header_seperator << "\n";
+
+    return HAILO_SUCCESS;
+}
diff --git a/hailort/hailortcli/memory_requirements_command.hpp b/hailort/hailortcli/memory_requirements_command.hpp
new file mode 100644
index 0000000..351ba64
--- /dev/null
+++ b/hailort/hailortcli/memory_requirements_command.hpp
@@ -0,0 +1,22 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file memory_requirements_command.hpp
+ * @brief Command that prints memory requirements for running models
+ **/
+
+#ifndef _HAILO_memory_requirements_command_HPP_
+#define _HAILO_memory_requirements_command_HPP_
+
+#include "command.hpp"
+
+class MemoryRequirementsCommand : public Command {
+public:
+    explicit MemoryRequirementsCommand(CLI::App &parent_app);
+
+    virtual hailo_status execute() override;
+};
+
+#endif /* _HAILO_memory_requirements_command_HPP_ */
diff --git a/hailort/hailortcli/mon_command.cpp b/hailort/hailortcli/mon_command.cpp
index f9581f6..90035e3 100644
--- a/hailort/hailortcli/mon_command.cpp
+++ b/hailort/hailortcli/mon_command.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file mon_command.cpp
  * @brief Monitor of networks - Presents information about the running networks
@@ -57,16 +57,16 @@ hailo_status MonCommand::execute()
 #endif
 }
 
-void MonCommand::print_devices_info_header()
+void MonCommand::add_devices_info_header(std::ostream &buffer)
 {
-    std::cout << 
+    buffer <<
         std::setw(DEVICE_ID_WIDTH) << std::left << "Device ID" <<
         std::setw(UTILIZATION_WIDTH) << std::left << "Utilization (%)" <<
         std::setw(STRING_WIDTH) << std::left << "Architecture" <<
         "\n" << std::left << std::string(LINE_LENGTH, '-') << "\n";
 }
 
-void MonCommand::print_devices_info_table(const ProtoMon &mon_message)
+void MonCommand::add_devices_info_table(const ProtoMon &mon_message, std::ostream &buffer)
 {
     auto data_line_len = NUMBER_WIDTH + NETWORK_GROUP_NAME_WIDTH + DEVICE_ID_WIDTH;
     auto rest_line_len = LINE_LENGTH - data_line_len;
@@ -76,7 +76,7 @@ void MonCommand::print_devices_info_table(const ProtoMon &mon_message)
         auto utilization = device_info.utilization();
         auto device_arch = device_info.device_arch();
 
-        std::cout << std::setprecision(1) << std::fixed <<
+        buffer << std::setprecision(1) << std::fixed <<
             std::setw(DEVICE_ID_WIDTH) << std::left << device_id <<
             std::setw(UTILIZATION_WIDTH) << std::left << utilization <<
             std::setw(STRING_WIDTH) << std::left << device_arch <<
@@ -84,9 +84,9 @@ void MonCommand::print_devices_info_table(const ProtoMon &mon_message)
     }
 }
 
-void MonCommand::print_networks_info_header()
+void MonCommand::add_networks_info_header(std::ostream &buffer)
 {
-    std::cout << 
+    buffer <<
         std::setw(NETWORK_GROUP_NAME_WIDTH) << std::left << "Model" <<
         std::setw(UTILIZATION_WIDTH) << std::left << "Utilization (%) " <<
         std::setw(NUMBER_WIDTH) << std::left << "FPS" <<
@@ -94,7 +94,7 @@ void MonCommand::print_networks_info_header()
         "\n" << std::left << std::string(LINE_LENGTH, '-') << "\n";
 }
 
-void MonCommand::print_networks_info_table(const ProtoMon &mon_message)
+void MonCommand::add_networks_info_table(const ProtoMon &mon_message, std::ostream &buffer)
 {
     const uint32_t NUMBER_OBJECTS_COUNT = 3;
     auto data_line_len = (NUMBER_WIDTH * NUMBER_OBJECTS_COUNT) + NETWORK_GROUP_NAME_WIDTH;
@@ -107,7 +107,7 @@ void MonCommand::print_networks_info_table(const ProtoMon &mon_message)
         auto fps = net_info.fps();
         auto utilization = net_info.utilization();
 
-        std::cout << std::setprecision(1) << std::fixed <<
+        buffer << std::setprecision(1) << std::fixed <<
             std::setw(STRING_WIDTH) << std::left << net_name <<
             std::setw(UTILIZATION_WIDTH) << std::left << utilization <<
             std::setw(NUMBER_WIDTH) << std::left << fps <<
@@ -115,9 +115,9 @@ void MonCommand::print_networks_info_table(const ProtoMon &mon_message)
     }
 }
 
-void MonCommand::print_frames_header()
+void MonCommand::add_frames_header(std::ostream &buffer)
 {
-    std::cout <<
+    buffer <<
         std::setw(STRING_WIDTH) << std::left << "Model" <<
         std::setw(STRING_WIDTH) << std::left << "Stream" <<
         std::setw(NUMBER_WIDTH) << std::left << "Direction" <<
@@ -133,7 +133,7 @@ void MonCommand::print_frames_header()
         "\n" << std::left << std::string(LINE_LENGTH + NUMBER_WIDTH, '-') << "\n";
 }
 
-hailo_status MonCommand::print_frames_table(const ProtoMon &mon_message)
+hailo_status MonCommand::print_frames_table(const ProtoMon &mon_message, std::ostream &buffer)
 {
     for (const auto &net_info : mon_message.net_frames_infos()) {
         auto &original_net_name = net_info.network_name();
@@ -166,7 +166,7 @@ hailo_status MonCommand::print_frames_table(const ProtoMon &mon_message)
                 avg_frames_str = ss.str();
             }
 
-            std::cout <<
+            buffer <<
                 std::setw(STRING_WIDTH) << std::left << net_name <<
                 std::setw(STRING_WIDTH) << std::left << stream_name <<
                 std::setw(NUMBER_WIDTH) << std::left << stream_direction <<
@@ -195,27 +195,35 @@ Expected<uint16_t> get_terminal_line_width()
 
 hailo_status MonCommand::print_tables(const std::vector<ProtoMon> &mon_messages, uint32_t terminal_line_width)
 {
-    print_devices_info_header();
+    std::ostringstream buffer;
+    buffer.str("");  // Clear previous content
+    buffer.clear();  // Reset any error state
+
+    buffer << FORMAT_RESET_TERMINAL_CURSOR_FIRST_LINE;
+
+    add_devices_info_header(buffer);
     for (const auto &mon_message : mon_messages) {
-        print_devices_info_table(mon_message);
+        add_devices_info_table(mon_message, buffer);
     }
 
-    std::cout << std::string(terminal_line_width, ' ') << "\n";
-    std::cout << std::string(terminal_line_width, ' ') << "\n";   
-    
-    print_networks_info_header();
+    buffer << std::string(terminal_line_width, ' ') << "\n";
+    buffer << std::string(terminal_line_width, ' ') << "\n";
+
+    add_networks_info_header(buffer);
 
     for (const auto &mon_message : mon_messages) {
-        print_networks_info_table(mon_message);
+        add_networks_info_table(mon_message, buffer);
     }
 
-    std::cout << std::string(terminal_line_width, ' ') << "\n";
-    std::cout << std::string(terminal_line_width, ' ') << "\n";
+    buffer << std::string(terminal_line_width, ' ') << "\n";
+    buffer << std::string(terminal_line_width, ' ') << "\n";
 
-    print_frames_header();
+    add_frames_header(buffer);
     for (const auto &mon_message : mon_messages) {
-        CHECK_SUCCESS(print_frames_table(mon_message));
+        CHECK_SUCCESS(print_frames_table(mon_message, buffer));
     }
+
+    std::cout << buffer.str() << std::flush;
     return HAILO_SUCCESS;
 }
 
@@ -267,7 +275,6 @@ hailo_status MonCommand::run_monitor()
             << "If this is not the case, verify that environment variable '" << SCHEDULER_MON_ENV_VAR << "' is set to 1.\n" << FORMAT_NORMAL_PRINT;
         }
 
-        CliCommon::clear_terminal();
         std::this_thread::sleep_for(DEFAULT_SCHEDULER_MON_INTERVAL);
     }
 
diff --git a/hailort/hailortcli/mon_command.hpp b/hailort/hailortcli/mon_command.hpp
index b5a0cf9..752c666 100644
--- a/hailort/hailortcli/mon_command.hpp
+++ b/hailort/hailortcli/mon_command.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file mon_command.hpp
  * @brief Monitor of networks - Presents information about the running networks
@@ -32,12 +32,12 @@ public:
 private:
     hailo_status run_monitor();
     hailo_status print_tables(const std::vector<ProtoMon> &mon_messages, uint32_t terminal_line_width);
-    void print_devices_info_header();
-    void print_networks_info_header();
-    void print_frames_header();
-    void print_devices_info_table(const ProtoMon &mon_message);
-    void print_networks_info_table(const ProtoMon &mon_message);
-    hailo_status print_frames_table(const ProtoMon &mon_message);
+    void add_devices_info_header(std::ostream &buffer);
+    void add_networks_info_header(std::ostream &buffer);
+    void add_frames_header(std::ostream &buffer);
+    void add_devices_info_table(const ProtoMon &mon_message, std::ostream &buffer);
+    void add_networks_info_table(const ProtoMon &mon_message, std::ostream &buffer);
+    hailo_status print_frames_table(const ProtoMon &mon_message, std::ostream &buffer);
     hailo_status run_in_alternative_terminal();
 };
 
diff --git a/hailort/hailortcli/parse_hef_command.cpp b/hailort/hailortcli/parse_hef_command.cpp
index 6840b18..d835dcb 100644
--- a/hailort/hailortcli/parse_hef_command.cpp
+++ b/hailort/hailortcli/parse_hef_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/parse_hef_command.hpp b/hailort/hailortcli/parse_hef_command.hpp
index f85f968..8450348 100644
--- a/hailort/hailortcli/parse_hef_command.hpp
+++ b/hailort/hailortcli/parse_hef_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/power_measurement_command.cpp b/hailort/hailortcli/power_measurement_command.cpp
index 42b3dc3..0f4699e 100644
--- a/hailort/hailortcli/power_measurement_command.cpp
+++ b/hailort/hailortcli/power_measurement_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/power_measurement_command.hpp b/hailort/hailortcli/power_measurement_command.hpp
index 5d00fee..158f53f 100644
--- a/hailort/hailortcli/power_measurement_command.hpp
+++ b/hailort/hailortcli/power_measurement_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/io_wrappers.cpp b/hailort/hailortcli/run2/io_wrappers.cpp
index 3af75f0..85ea1b7 100644
--- a/hailort/hailortcli/run2/io_wrappers.cpp
+++ b/hailort/hailortcli/run2/io_wrappers.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file io_wrappers.cpp
  **/
diff --git a/hailort/hailortcli/run2/io_wrappers.hpp b/hailort/hailortcli/run2/io_wrappers.hpp
index 12fd291..89d8f0c 100644
--- a/hailort/hailortcli/run2/io_wrappers.hpp
+++ b/hailort/hailortcli/run2/io_wrappers.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2023 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file io_wrappers.hpp
  * @brief Wrappers for Input/Output Stream/VStream. Manages buffer allocation, framerate throttle, latency meter and
@@ -95,14 +95,15 @@ public:
         return get().wait_for_async_ready(m_dataset[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT);
     }
 
-    hailo_status write_async(typename Writer::TransferDoneCallback callback)
+    template<typename CB>
+    hailo_status write_async(CB &&callback)
     {
         before_write_start();
         auto self = std::enable_shared_from_this<WriterWrapper<Writer>>::shared_from_this();
         auto status = get().write_async(MemoryView(*next_buffer()),
             [self, original=callback](const typename Writer::CompletionInfo &completion_info) {
                 (void)self; // Keeping self here so the buffer won't be deleted until the callback is called.
-                original(completion_info);
+                original(completion_info.status);
             });
         if (HAILO_SUCCESS != status) {
             return status;
@@ -272,18 +273,24 @@ public:
         return get().wait_for_async_ready(m_buffer[0]->size(), HAILORTCLI_DEFAULT_TIMEOUT);
     }
 
-    hailo_status read_async(typename Reader::TransferDoneCallback callback)
+    template<typename CB>
+    hailo_status read_async(CB &&callback)
     {
         auto self = std::enable_shared_from_this<ReaderWrapper<Reader>>::shared_from_this();
         return get().read_async(MemoryView(*next_buffer()),
             [self, original=callback](const typename Reader::CompletionInfo &completion_info) {
-                original(completion_info);
+                original(completion_info.status);
                 if (completion_info.status == HAILO_SUCCESS) {
                     self->on_read_done();
                 }
             });
     }
 
+    void set_net_live_track(std::shared_ptr<NetworkLiveTrack> net_live_track)
+    {
+        m_net_live_track = net_live_track;
+    }
+
 private:
     ReaderWrapper(Reader &reader, std::vector<BufferPtr> &&buffer, std::vector<DmaMappedBuffer> &&mapped_buffer_ptr,
                   const LatencyMeterPtr &overall_latency_meter, std::shared_ptr<NetworkLiveTrack> net_live_track) :
diff --git a/hailort/hailortcli/run2/live_stats.cpp b/hailort/hailortcli/run2/live_stats.cpp
index 4d9531b..fb47def 100644
--- a/hailort/hailortcli/run2/live_stats.cpp
+++ b/hailort/hailortcli/run2/live_stats.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/live_stats.hpp b/hailort/hailortcli/run2/live_stats.hpp
index 7d28905..2f47550 100644
--- a/hailort/hailortcli/run2/live_stats.hpp
+++ b/hailort/hailortcli/run2/live_stats.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/measurement_live_track.cpp b/hailort/hailortcli/run2/measurement_live_track.cpp
index 28efe2c..f3ae8bf 100644
--- a/hailort/hailortcli/run2/measurement_live_track.cpp
+++ b/hailort/hailortcli/run2/measurement_live_track.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/measurement_live_track.hpp b/hailort/hailortcli/run2/measurement_live_track.hpp
index 7d64b6d..dc6b3fc 100644
--- a/hailort/hailortcli/run2/measurement_live_track.hpp
+++ b/hailort/hailortcli/run2/measurement_live_track.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/network_live_track.cpp b/hailort/hailortcli/run2/network_live_track.cpp
index c9d35ce..c6bc871 100644
--- a/hailort/hailortcli/run2/network_live_track.cpp
+++ b/hailort/hailortcli/run2/network_live_track.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/network_live_track.hpp b/hailort/hailortcli/run2/network_live_track.hpp
index 8b3c9bf..e1e0d26 100644
--- a/hailort/hailortcli/run2/network_live_track.hpp
+++ b/hailort/hailortcli/run2/network_live_track.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/network_runner.cpp b/hailort/hailortcli/run2/network_runner.cpp
index 6db9c58..c56fe7c 100644
--- a/hailort/hailortcli/run2/network_runner.cpp
+++ b/hailort/hailortcli/run2/network_runner.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -314,6 +314,8 @@ Expected<std::shared_ptr<NetworkRunner>> NetworkRunner::create_shared(VDevice &v
         }
     }
 
+    CHECK_SUCCESS(net_runner_ptr->prepare_buffers());
+
     return net_runner_ptr;
 }
 
@@ -437,34 +439,53 @@ FullSyncNetworkRunner::FullSyncNetworkRunner(const NetworkParams &params, const
 {
 }
 
+hailo_status FullSyncNetworkRunner::prepare_buffers()
+{
+    static const bool SYNC_API = false;
+
+    m_reader_wrappers.reserve(m_output_vstreams.size());
+    // Build output wrappers
+    for (auto &output_vstream : m_output_vstreams) {
+        TRY(auto reader_wrapper, ReaderWrapper<OutputVStream>::create(output_vstream, m_vdevice,
+            m_overall_latency_meter, nullptr, SYNC_API));
+        m_reader_wrappers.emplace_back(reader_wrapper);
+    }
+
+    m_writer_wrappers.reserve(m_input_vstreams.size());
+    // Build input wrappers
+    for (auto &input_vstream : m_input_vstreams) {
+        const auto vstream_params = get_params(input_vstream.name());
+        TRY(auto writer_wrapper, WriterWrapper<InputVStream>::create(input_vstream, vstream_params, m_vdevice,
+            m_overall_latency_meter, m_params.framerate, SYNC_API));
+        m_writer_wrappers.emplace_back(writer_wrapper);
+    }
+    return HAILO_SUCCESS;
+}
+
 Expected<std::vector<AsyncThreadPtr<hailo_status>>> FullSyncNetworkRunner::start_inference_threads(EventPtr shutdown_event,
     std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
-    static const bool SYNC_API = false;
     std::vector<AsyncThreadPtr<hailo_status>> threads;
-    for (auto &input_vstream : m_input_vstreams) {
-        const auto vstream_params = get_params(input_vstream.name());
-        TRY(auto writer, WriterWrapper<InputVStream>::create(input_vstream, vstream_params, m_vdevice,
-            m_overall_latency_meter, m_params.framerate, SYNC_API));
+    threads.reserve(m_writer_wrappers.size() + m_reader_wrappers.size());
 
+    for (auto &writer : m_writer_wrappers) {
         threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE",
             [this, writer, shutdown_event]() mutable {
                 return run_write(writer, shutdown_event, m_latency_barrier);
             }));
     }
 
-    bool first = true; //TODO: check with multiple outputs
-    for (auto &output_vstream : m_output_vstreams) {
-        TRY(auto reader, ReaderWrapper<OutputVStream>::create(output_vstream, m_vdevice,
-            m_overall_latency_meter, first ? net_live_track : nullptr, SYNC_API));
-
+    bool is_first_output = true;
+    for (auto &reader : m_reader_wrappers) {
+        if (is_first_output) {
+            reader->set_net_live_track(net_live_track);
+            is_first_output = false;
+        }
         threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ",
             [this, reader, shutdown_event]() mutable {
                 return run_read(reader, shutdown_event, m_latency_barrier);
             }));
-        first = false;
     }
-
     return threads;
 }
 
@@ -571,6 +592,48 @@ Expected<AsyncInferJob> FullAsyncNetworkRunner::create_infer_job(const Configure
     return job;
 }
 
+hailo_status FullAsyncNetworkRunner::prepare_buffers()
+{
+    TRY(m_bindings, m_configured_infer_model->create_bindings());
+
+    for (const auto &name : get_input_names()) {
+        TRY(auto input_config, m_infer_model->input(name));
+
+        auto params = get_params(name);
+        Buffer buffer {};
+        if (params.input_file_path.empty()) {
+            TRY(buffer, create_uniformed_buffer(input_config.get_frame_size(), BufferStorageParams::create_dma()));
+        } else {
+            TRY(buffer, read_binary_file(params.input_file_path, BufferStorageParams::create_dma()));
+        }
+        CHECK(0 == (buffer.size() % input_config.get_frame_size()), HAILO_INVALID_ARGUMENT,
+            "Size of data for input '{}' must be a multiple of the frame size {}. Received - {}", name, input_config.get_frame_size(), buffer.size());
+        m_input_buffers.emplace(name, std::move(buffer));
+
+        for (uint32_t i = 0; i < (m_input_buffers.at(name).size() % input_config.get_frame_size()); i++) {
+            TRY(auto mapped_buffer, DmaMappedBuffer::create(m_vdevice, m_input_buffers.at(name).data() + (i * input_config.get_frame_size()),
+                input_config.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D));
+            m_dma_mapped_buffers.emplace_back(std::move(mapped_buffer));
+        }
+    }
+
+    auto output_names = get_output_names();
+    m_output_buffers.reserve(output_names.size());
+    for (const auto &name : output_names) {
+        TRY(auto output_config, m_infer_model->output(name));
+        TRY(auto buffer, Buffer::create(output_config.get_frame_size(), 0, BufferStorageParams::create_dma()));
+        m_output_buffers.emplace_back(std::move(buffer));
+
+        TRY(auto mapped_buffer, DmaMappedBuffer::create(m_vdevice, m_output_buffers.back().data(), m_output_buffers.back().size(),
+            HAILO_DMA_BUFFER_DIRECTION_D2H));
+        m_dma_mapped_buffers.emplace_back(std::move(mapped_buffer));
+
+        CHECK_SUCCESS(m_bindings.output(name)->set_buffer(MemoryView(m_output_buffers.back())));
+    }
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
     std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
@@ -590,45 +653,6 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
         TRY(guard, ConfiguredInferModelActivationGuard::create(m_configured_infer_model));
     }
 
-    TRY(auto bindings, m_configured_infer_model->create_bindings());
-
-    std::unordered_map<std::string, Buffer> input_buffers; // Keys are inputs names
-    std::vector<Buffer> output_buffers;
-    std::vector<DmaMappedBuffer> dma_mapped_buffers;
-
-    for (const auto &name : get_input_names()) {
-        TRY(auto input_config, m_infer_model->input(name));
-
-        auto params = get_params(name);
-        Buffer buffer {};
-        if (params.input_file_path.empty()) {
-            TRY(buffer, create_uniformed_buffer(input_config.get_frame_size(), BufferStorageParams::create_dma()));
-        } else {
-            TRY(buffer, read_binary_file(params.input_file_path, BufferStorageParams::create_dma()));
-        }
-        CHECK(0 == (buffer.size() % input_config.get_frame_size()), HAILO_INVALID_ARGUMENT,
-            "Size of data for input '{}' must be a multiple of the frame size {}. Received - {}", name, input_config.get_frame_size(), buffer.size());
-        input_buffers.emplace(name, std::move(buffer));
-
-        for (uint32_t i = 0; i < (input_buffers.at(name).size() % input_config.get_frame_size()); i++) {
-            TRY(auto mapped_buffer, DmaMappedBuffer::create(m_vdevice, input_buffers.at(name).data() + (i * input_config.get_frame_size()),
-                input_config.get_frame_size(), HAILO_DMA_BUFFER_DIRECTION_H2D));
-            dma_mapped_buffers.emplace_back(std::move(mapped_buffer));
-        }
-    }
-
-    for (const auto &name : get_output_names()) {
-        TRY(auto output_config, m_infer_model->output(name));
-        TRY(auto buffer, Buffer::create(output_config.get_frame_size(), 0, BufferStorageParams::create_dma()));
-        output_buffers.emplace_back(std::move(buffer));
-
-        TRY(auto mapped_buffer, DmaMappedBuffer::create(m_vdevice, output_buffers.back().data(), output_buffers.back().size(),
-            HAILO_DMA_BUFFER_DIRECTION_D2H));
-        dma_mapped_buffers.emplace_back(std::move(mapped_buffer));
-
-        CHECK_SUCCESS(bindings.output(name)->set_buffer(MemoryView(output_buffers.back())));
-    }
-
     FramerateThrottle frame_rate_throttle(m_params.framerate);
 
     AsyncInferJob last_job;
@@ -638,13 +662,13 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
         for (uint32_t frames_in_cycle = 0; frames_in_cycle < m_params.batch_size; frames_in_cycle++) {
             for (const auto &name : get_input_names()) {
                 TRY(auto input_config, m_infer_model->input(name));
-                auto offset = (frame_id % (input_buffers.at(name).size() / input_config.get_frame_size())) * input_config.get_frame_size();
-                CHECK_SUCCESS(bindings.input(name)->set_buffer(MemoryView(input_buffers.at(name).data() + offset,
+                auto offset = (frame_id % (m_input_buffers.at(name).size() / input_config.get_frame_size())) * input_config.get_frame_size();
+                CHECK_SUCCESS(m_bindings.input(name)->set_buffer(MemoryView(m_input_buffers.at(name).data() + offset,
                     input_config.get_frame_size())));
             }
             frame_id++;
             if (HAILO_SUCCESS == m_configured_infer_model->wait_for_async_ready(DEFAULT_TRANSFER_TIMEOUT)) {
-                TRY(last_job, create_infer_job(bindings, net_live_track, frame_rate_throttle, inference_status));
+                TRY(last_job, create_infer_job(m_bindings, net_live_track, frame_rate_throttle, inference_status));
                 last_job.detach();
             }
         }
@@ -655,6 +679,7 @@ hailo_status FullAsyncNetworkRunner::run_single_thread_async_infer(EventPtr shut
     }
     m_configured_infer_model->shutdown();
     last_job.wait(HAILO_INFINITE_TIMEOUT);
+    m_dma_mapped_buffers.clear();
 
     return inference_status;
 }
@@ -673,11 +698,8 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
 {
     const bool async_streams = (m_params.is_async());
     std::vector<AsyncThreadPtr<hailo_status>> threads;
-    for (auto &input_stream : m_input_streams) {
-        const auto stream_params = get_params(input_stream.get().name());
-        TRY(auto writer, WriterWrapper<InputStream>::create(input_stream.get(), stream_params, m_vdevice,
-            m_overall_latency_meter, m_params.framerate, async_streams));
-
+    threads.reserve(m_writer_wrappers.size() + m_reader_wrappers.size());
+    for (auto &writer : m_writer_wrappers) {
         if (async_streams) {
             threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("WRITE_ASYNC",
                 [this, writer, shutdown_event]() mutable {
@@ -691,11 +713,12 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
         }
     }
 
-    bool first = true; //TODO: check with multiple outputs
-    for (auto &output_stream : m_output_streams) {
-        TRY(auto reader, ReaderWrapper<OutputStream>::create(output_stream.get(), m_vdevice,
-            m_overall_latency_meter, first ? net_live_track : nullptr, async_streams));
-
+    bool is_first_output = true;
+    for (auto &reader : m_reader_wrappers) {
+        if (is_first_output) {
+            reader->set_net_live_track(net_live_track);
+            is_first_output = false;
+        }
         if (async_streams) {
             threads.emplace_back(std::make_unique<AsyncThread<hailo_status>>("READ_ASYNC",
                 [this, reader, shutdown_event]() mutable {
@@ -707,88 +730,122 @@ Expected<std::vector<AsyncThreadPtr<hailo_status>>> RawNetworkRunner::start_infe
                     return run_read(reader, shutdown_event, m_latency_barrier);
                 }));
         }
-        first = false;
     }
 
     return threads;
 }
 
+static hailo_status launch_async(std::vector<ReaderWrapperPtr<OutputStream>> readers,
+    std::vector<WriterWrapperPtr<InputStream>> writers, size_t batch_size, bool wait_for_finish)
+{
+    // Only used if wait_for_finish is true
+    struct CallbackState {
+        size_t size_left;
+        hailo_status cb_status = HAILO_SUCCESS;
+        std::mutex m;
+        std::condition_variable cv;
+
+        CallbackState(size_t size_left) :
+            size_left(size_left)
+        {}
+    };
+
+    // Keeping cb_state as a shared_ptr to make sure it is alive until shutdown
+    std::function<void(hailo_status)> cb;
+    std::shared_ptr<CallbackState> cb_state;
+
+    if (!wait_for_finish) {
+        cb = [](hailo_status) {};
+    } else {
+        cb_state = std::make_shared<CallbackState>((readers.size() + writers.size()) * batch_size);
+        cb = [cb_state](hailo_status status) mutable {
+            {
+                std::unique_lock<std::mutex> lock(cb_state->m);
+                cb_state->size_left--;
+                if (cb_state->cb_status != HAILO_SUCCESS) {
+                    cb_state->cb_status = status;
+                }
+            }
+
+            cb_state->cv.notify_all();
+        };
+    }
+
+    for (size_t i = 0; i < batch_size; i++) {
+        for (auto &writer : writers) {
+            auto status = writer->wait_for_async_ready();
+            if (status != HAILO_SUCCESS) {
+                return status;
+            }
+        }
+        for (auto &reader : readers) {
+            auto status = reader->wait_for_async_ready();
+            if (status != HAILO_SUCCESS) {
+                return status;
+            }
+        }
+
+        for (auto &writer : writers) {
+            auto status = writer->write_async(cb);
+            if (status != HAILO_SUCCESS) {
+                return status;
+            }
+        }
+
+        for (auto &reader : readers) {
+            auto status = reader->read_async(cb);
+            if (status != HAILO_SUCCESS) {
+                return status;
+            }
+        }
+    }
+
+    if (wait_for_finish) {
+        std::unique_lock<std::mutex> lock(cb_state->m);
+        cb_state->cv.wait_for(lock, DEFAULT_TRANSFER_TIMEOUT,
+            [cb_state]() { return (0 == cb_state->size_left) || (cb_state->cb_status != HAILO_SUCCESS); });
+        return cb_state->cb_status;
+    } else {
+        // just return
+        return HAILO_SUCCESS;
+    }
+}
+
+hailo_status RawNetworkRunner::prepare_buffers()
+{
+    const bool async_streams = (m_params.is_async());
+
+    m_reader_wrappers.reserve(m_output_streams.size());
+    // Build output wrappers
+    for (auto &output_stream : m_output_streams) {
+        TRY(auto reader_wrapper, ReaderWrapper<OutputStream>::create(output_stream.get(), m_vdevice,
+            m_overall_latency_meter, nullptr, async_streams));
+        m_reader_wrappers.emplace_back(reader_wrapper);
+    }
+
+    m_writer_wrappers.reserve(m_input_streams.size());
+    // Build input wrappers
+    for (auto &input_stream : m_input_streams) {
+        const auto stream_params = get_params(input_stream.get().name());
+        TRY(auto writer_wrapper, WriterWrapper<InputStream>::create(input_stream.get(),
+            stream_params, m_vdevice, m_overall_latency_meter, m_params.framerate, async_streams));
+        m_writer_wrappers.emplace_back(writer_wrapper);
+    }
+    return HAILO_SUCCESS;
+}
+
 hailo_status RawNetworkRunner::run_single_thread_async_infer(EventPtr shutdown_event,
     std::shared_ptr<NetworkLiveTrack> net_live_track)
 {
-    static const bool ASYNC_API = true;
-
-    // Build output wrappers
-    std::vector<ReaderWrapperPtr<OutputStream>> reader_wrappers;
-    std::vector<SemaphorePtr> output_semaphores;
-    bool is_first_output = true;
-    for (auto &output_stream : m_output_streams) {
-        TRY(auto reader_wrapper, ReaderWrapper<OutputStream>::create(output_stream.get(), m_vdevice,
-            m_overall_latency_meter, is_first_output ? net_live_track : nullptr, ASYNC_API));
-        is_first_output = false;
-
-        TRY(auto max_queue_size, reader_wrapper->get().get_async_max_queue_size());
-        TRY(auto semaphore, Semaphore::create_shared(static_cast<uint32_t>(max_queue_size)));
-
-        output_semaphores.emplace_back(semaphore);
-        reader_wrappers.emplace_back(reader_wrapper);
+    auto signal_event_scope_guard = SignalEventScopeGuard(*shutdown_event);
+    if (!m_reader_wrappers.empty()) {
+        m_reader_wrappers[0]->set_net_live_track(net_live_track);
     }
-
-    // Build input wrappers
-    std::vector<WriterWrapperPtr<InputStream>> writer_wrappers;
-    std::vector<SemaphorePtr> input_semaphores;
-    for (auto &input_stream : m_input_streams) {
-        TRY(auto writer_wrapper, WriterWrapper<InputStream>::create(input_stream.get(),
-            get_params(input_stream.get().name()), m_vdevice, m_overall_latency_meter, m_params.framerate, ASYNC_API));
-
-        TRY(auto max_queue_size, writer_wrapper->get().get_async_max_queue_size());
-        TRY(auto semaphore, Semaphore::create_shared(static_cast<uint32_t>(max_queue_size)));
-
-        input_semaphores.emplace_back(semaphore);
-        writer_wrappers.emplace_back(writer_wrapper);
-    }
-
-    // Build waitables list with reference to previous input/output semaphores.
-    // We put output semaphores before inputs because we want to always have place to write
-    // the data into. It also makes sure that the framerate throttle will work properly.
-    const size_t shutdown_index = 0;
-    const size_t output_index_start = shutdown_index + 1;
-    const size_t input_index_start = output_index_start + output_semaphores.size();
-
-    std::vector<std::reference_wrapper<Waitable>> waitables;
-    waitables.emplace_back(std::ref(*shutdown_event));
-    auto add_to_waitables = [&waitables](const SemaphorePtr &sem) { waitables.emplace_back(std::ref(*sem)); };
-    std::for_each(output_semaphores.begin(), output_semaphores.end(), add_to_waitables);
-    std::for_each(input_semaphores.begin(), input_semaphores.end(), add_to_waitables);
-    WaitableGroup wait_group(std::move(waitables));
-
-    // Inference
+    const bool wait_for_finish = (m_latency_barrier != nullptr);
     while (true) {
-        TRY(auto wait_index, wait_group.wait_any(HAILORTCLI_DEFAULT_TIMEOUT));
-
-        if (wait_index == shutdown_index) {
-            // Stopping the network so we won't get timeout on the flush. The async operations may still be active
-            // (until network deactivation).
-            stop();
-            break;
-        } else if ((wait_index >= output_index_start) && (wait_index < input_index_start)) {
-            // output is ready
-            const size_t output_index = wait_index - output_index_start;
-            auto status = reader_wrappers[output_index]->read_async(
-                [semaphore=output_semaphores[output_index]](const OutputStream::CompletionInfo &) {
-                    (void)semaphore->signal();
-                }
-            );
-            CHECK_SUCCESS(status);
-        } else {
-            // input is ready
-            const size_t input_index = wait_index - input_index_start;
-            auto status = writer_wrappers[input_index]->write_async(
-                [semaphore=input_semaphores[input_index]](const InputStream::CompletionInfo &) {
-                    (void)semaphore->signal();
-                }
-            );
-            CHECK_SUCCESS(status);
+        auto status = launch_async(m_reader_wrappers, m_writer_wrappers, m_params.batch_size, wait_for_finish);
+        if (status != HAILO_SUCCESS) {
+            return status;
         }
     }
 
diff --git a/hailort/hailortcli/run2/network_runner.hpp b/hailort/hailortcli/run2/network_runner.hpp
index 58d2393..994a81e 100644
--- a/hailort/hailortcli/run2/network_runner.hpp
+++ b/hailort/hailortcli/run2/network_runner.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -130,6 +130,8 @@ public:
 
     hailo_status run(EventPtr shutdown_event, LiveStats &live_stats, Barrier &activation_barrier);
     virtual void stop() = 0;
+    virtual hailo_status prepare_buffers() = 0;
+
     // Must be called prior to run
     void set_overall_latency_meter(LatencyMeterPtr latency_meter);
     void set_latency_barrier(BarrierPtr latency_barrier);
@@ -202,7 +204,7 @@ protected:
                 CHECK_SUCCESS(status);
 
                 status = writer->write_async(
-                    [sync_event](const typename Writer::CompletionInfo &) {
+                    [sync_event](const auto &) {
                         if (sync_event) {
                             (void)sync_event->signal();
                         }
@@ -277,7 +279,7 @@ protected:
                 CHECK_SUCCESS(status);
 
                 status = reader->read_async(
-                    [sync_event](const typename Reader::CompletionInfo &) {
+                    [sync_event](const auto &) {
                         if (sync_event) {
                             (void)sync_event->signal();
                         }
@@ -337,10 +339,13 @@ public:
     virtual std::set<std::string> get_input_names() override;
     virtual std::set<std::string> get_output_names() override;
     VStreamParams get_params(const std::string &name);
+    virtual hailo_status prepare_buffers() override;
 
 private:
     std::vector<InputVStream> m_input_vstreams;
     std::vector<OutputVStream> m_output_vstreams;
+    std::vector<ReaderWrapperPtr<OutputVStream>> m_reader_wrappers;
+    std::vector<WriterWrapperPtr<InputVStream>> m_writer_wrappers;
 };
 
 class FullAsyncNetworkRunner : public NetworkRunner
@@ -404,7 +409,14 @@ public:
     virtual void stop() override;
     virtual std::set<std::string> get_input_names() override;
     virtual std::set<std::string> get_output_names() override;
+    virtual hailo_status prepare_buffers() override;
     VStreamParams get_params(const std::string &name);
+
+private:
+    std::unordered_map<std::string, Buffer> m_input_buffers; // Keys are inputs names
+    std::vector<Buffer> m_output_buffers;
+    std::vector<DmaMappedBuffer> m_dma_mapped_buffers;
+    ConfiguredInferModel::Bindings m_bindings;
 };
 
 class RawNetworkRunner : public NetworkRunner
@@ -424,10 +436,13 @@ public:
     virtual std::set<std::string> get_input_names() override;
     virtual std::set<std::string> get_output_names() override;
     StreamParams get_params(const std::string &name);
+    virtual hailo_status prepare_buffers() override;
 
 private:
     InputStreamRefVector m_input_streams;
     OutputStreamRefVector m_output_streams;
+    std::vector<ReaderWrapperPtr<OutputStream>> m_reader_wrappers;
+    std::vector<WriterWrapperPtr<InputStream>> m_writer_wrappers;
 };
 
 #endif /* _HAILO_HAILORTCLI_RUN2_NETWORK_RUNNER_HPP_ */
\ No newline at end of file
diff --git a/hailort/hailortcli/run2/run2_command.cpp b/hailort/hailortcli/run2/run2_command.cpp
index 33e62c8..21b69f8 100644
--- a/hailort/hailortcli/run2/run2_command.cpp
+++ b/hailort/hailortcli/run2/run2_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -200,7 +200,6 @@ VStreamApp::VStreamApp(const std::string &description, const std::string &name,
             { "nc", HAILO_FORMAT_ORDER_NC },
             { "bayer_rgb", HAILO_FORMAT_ORDER_BAYER_RGB },
             { "12_bit_bayer_rgb", HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB },
-            { "hailo_nms", HAILO_FORMAT_ORDER_HAILO_NMS },
             { "hailo_nms_by_class", HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS },
             { "hailo_nms_by_score", HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE },
             { "nchw", HAILO_FORMAT_ORDER_NCHW },
@@ -273,7 +272,10 @@ NetworkApp::NetworkApp(const std::string &description, const std::string &name)
         ->check(NetworkGroupNameValidator(hef_path_option));
     // NOTE: callbacks/params aren't called/updated before auto-completion (even after changing the order in App.hpp - at least for 2 jumps)
     auto net_params = add_option_group("Network Group Parameters");
-    net_params->add_option("--batch-size", m_params.batch_size, "Batch size")->default_val(HAILO_DEFAULT_BATCH_SIZE);
+    net_params->add_option("--batch-size", m_params.batch_size,
+        "Batch size\n"
+        "The default value is HAILO_DEFAULT_BATCH_SIZE - which means the batch is determined by HailoRT automatically")
+        ->default_val(HAILO_DEFAULT_BATCH_SIZE);
     net_params->add_option("--scheduler-threshold", m_params.scheduler_threshold, "Scheduler threshold")->default_val(0);
     net_params->add_option("--scheduler-timeout", m_params.scheduler_timeout_ms, "Scheduler timeout in milliseconds")->default_val(0);
     net_params->add_option("--scheduler-priority", m_params.scheduler_priority, "Scheduler priority")->default_val(HAILO_SCHEDULER_PRIORITY_NORMAL);
@@ -773,6 +775,9 @@ Expected<std::vector<std::shared_ptr<NetworkRunner>>> Run2::init_and_run_net_run
         net_runners[network_runner_index]->set_last_measured_fps(fps_per_network[network_runner_index]);
     }
     live_stats.reset(); // Ensures that the final print will include real values and not with values of when streams are already aborted.
+    for (auto net_runner : net_runners) {
+        net_runner->stop();
+    }
     shutdown_event->signal();
     wait_for_threads(threads);
     return net_runners;
diff --git a/hailort/hailortcli/run2/run2_command.hpp b/hailort/hailortcli/run2/run2_command.hpp
index 741eca4..085b2ea 100644
--- a/hailort/hailortcli/run2/run2_command.hpp
+++ b/hailort/hailortcli/run2/run2_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/timer_live_track.cpp b/hailort/hailortcli/run2/timer_live_track.cpp
index 65c241e..8c95a04 100644
--- a/hailort/hailortcli/run2/timer_live_track.cpp
+++ b/hailort/hailortcli/run2/timer_live_track.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run2/timer_live_track.hpp b/hailort/hailortcli/run2/timer_live_track.hpp
index c00f5c3..e5939ea 100644
--- a/hailort/hailortcli/run2/timer_live_track.hpp
+++ b/hailort/hailortcli/run2/timer_live_track.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/run_command.cpp b/hailort/hailortcli/run_command.cpp
index 6dc3ac1..54ebc62 100644
--- a/hailort/hailortcli/run_command.cpp
+++ b/hailort/hailortcli/run_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -131,7 +131,8 @@ static void add_run_command_params(CLI::App *run_subcommand, inference_runner_pa
         ->excludes(frames_count);
     auto total_batch_size = run_subcommand->add_option("--batch-size", params.batch_size,
         "Inference batch (should be a divisor of --frames-count if provided).\n"
-        "This batch applies to the whole network_group. for differential batch per network, see --net-batch-size")
+        "This batch applies to the whole network_group. for differential batch per network, see --net-batch-size.\n"
+        "The default value is HAILO_DEFAULT_BATCH_SIZE - which means the batch is determined by HailoRT automatically")
         ->check(CLI::NonNegativeNumber)
         ->default_val(HAILO_DEFAULT_BATCH_SIZE);
 
diff --git a/hailort/hailortcli/run_command.hpp b/hailort/hailortcli/run_command.hpp
index 14dd951..866d5fd 100644
--- a/hailort/hailortcli/run_command.hpp
+++ b/hailort/hailortcli/run_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/scan_command.cpp b/hailort/hailortcli/scan_command.cpp
index 814ce98..7ca6088 100644
--- a/hailort/hailortcli/scan_command.cpp
+++ b/hailort/hailortcli/scan_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/scan_command.hpp b/hailort/hailortcli/scan_command.hpp
index a50c124..3e45527 100644
--- a/hailort/hailortcli/scan_command.hpp
+++ b/hailort/hailortcli/scan_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/sensor_config_command.cpp b/hailort/hailortcli/sensor_config_command.cpp
index 0cb7137..3bd128e 100644
--- a/hailort/hailortcli/sensor_config_command.cpp
+++ b/hailort/hailortcli/sensor_config_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/sensor_config_command.hpp b/hailort/hailortcli/sensor_config_command.hpp
index 480563b..4f27553 100644
--- a/hailort/hailortcli/sensor_config_command.hpp
+++ b/hailort/hailortcli/sensor_config_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/ssb_update_command.cpp b/hailort/hailortcli/ssb_update_command.cpp
index 974f402..5f6db4c 100644
--- a/hailort/hailortcli/ssb_update_command.cpp
+++ b/hailort/hailortcli/ssb_update_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/ssb_update_command.hpp b/hailort/hailortcli/ssb_update_command.hpp
index 08ed1cb..bf4bd0f 100644
--- a/hailort/hailortcli/ssb_update_command.hpp
+++ b/hailort/hailortcli/ssb_update_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/udp_rate_limiter_command.cpp b/hailort/hailortcli/udp_rate_limiter_command.cpp
index 1323509..a0e53fe 100644
--- a/hailort/hailortcli/udp_rate_limiter_command.cpp
+++ b/hailort/hailortcli/udp_rate_limiter_command.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hailortcli/udp_rate_limiter_command.hpp b/hailort/hailortcli/udp_rate_limiter_command.hpp
index 6f5dd60..94e2a67 100644
--- a/hailort/hailortcli/udp_rate_limiter_command.hpp
+++ b/hailort/hailortcli/udp_rate_limiter_command.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/hrpc/client.cpp b/hailort/hrpc/client.cpp
index 5b90712..55b4c9b 100644
--- a/hailort/hrpc/client.cpp
+++ b/hailort/hrpc/client.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file client.hpp
  * @brief RPC Client
@@ -9,46 +9,20 @@
 
 #include "client.hpp"
 #include "connection_context.hpp"
+#include "vdma/pcie_session.hpp" // TODO: Remove include: (HRT-16534)
 
 namespace hailort
 {
+constexpr size_t REQUEST_PROTO_MAX_SIZE (128); // TODO: HRT-16644 - make it dynamic
 
-std::chrono::milliseconds get_request_timeout()
+// TODO: HRT-16034: make common function with grpc client.
+std::chrono::milliseconds get_request_timeout(const std::chrono::milliseconds default_timeout)
 {
     auto timeout_seconds = get_env_variable(HAILO_REQUEST_TIMEOUT_SECONDS);
     if (timeout_seconds) {
         return std::chrono::seconds(std::stoi(timeout_seconds.value()));
     }
-    return REQUEST_TIMEOUT;
-}
-
-Expected<std::shared_ptr<ResultEvent>> ResultEvent::create_shared()
-{
-    TRY(auto event, Event::create_shared(Event::State::not_signalled));
-    auto ptr = make_shared_nothrow<ResultEvent>(event);
-    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return ptr;
-}
-
-ResultEvent::ResultEvent(EventPtr event) :
-    m_event(event)
-{
-}
-
-Buffer &&ResultEvent::release()
-{
-    return std::move(m_value);
-}
-
-hailo_status ResultEvent::signal(Buffer &&value)
-{
-    m_value = std::move(value);
-    return m_event->signal();
-}
-
-hailo_status ResultEvent::wait(std::chrono::milliseconds timeout)
-{
-    return m_event->wait(timeout);
+    return default_timeout;
 }
 
 Client::~Client()
@@ -60,18 +34,31 @@ Client::~Client()
     }
 
     for (const auto &callback : m_replies_callbacks) {
-        Buffer buffer;
-        callback.second(HAILO_COMMUNICATION_CLOSED, std::move(buffer));
+        rpc_message_t message {};
+        callback.second(HAILO_COMMUNICATION_CLOSED, std::move(message));
     }
 }
 
+// TODO: Connect should be a static method that returns a client
 hailo_status Client::connect()
 {
-    TRY(m_conn_context, ConnectionContext::create_client_shared(m_device_id));
-    auto port = get_pcie_port();
-    TRY(auto conn, Session::connect(m_conn_context, port));
+    m_callback_dispatcher_manager = make_shared_nothrow<ClientCallbackDispatcherManager>();
+    CHECK_NOT_NULL(m_callback_dispatcher_manager, HAILO_OUT_OF_HOST_MEMORY);
 
-    TRY(m_connection, RpcConnection::create(conn));
+    TRY(m_conn_context, ConnectionContext::create_client_shared(m_device_id));
+    TRY(auto conn, Session::connect(m_conn_context, HAILORT_SERVER_PORT));
+
+    // TODO: Use conn.max_ongoing_transfers() function (HRT-16534)
+    TRY(m_pool_allocator, PoolAllocator::create_shared(PcieSession::MAX_ONGOING_TRANSFERS, REQUEST_PROTO_MAX_SIZE,
+        [conn] (size_t size) { return conn->allocate_buffer(size, HAILO_DMA_BUFFER_DIRECTION_H2D); }
+    ));
+
+    TRY(m_sync_requests_pool, ObjectPool<SyncRequest>::create_shared(PcieSession::MAX_ONGOING_TRANSFERS, [this] () {
+        return SyncRequest(*this, m_sync_mutex, m_sync_cv);
+    }));
+
+    TRY(auto connection_params, RpcConnection::Params::create(conn));
+    m_connection = RpcConnection(std::move(connection_params));
     m_thread = std::thread([this] {
         auto status = message_loop();
         if ((status != HAILO_SUCCESS) && (status != HAILO_COMMUNICATION_CLOSED)) { // TODO: Use this to prevent future requests
@@ -89,12 +76,12 @@ hailo_status Client::message_loop()
         assert(message.header.action_id < static_cast<uint32_t>(HailoRpcActionID::MAX_VALUE));
         auto action_id_enum = static_cast<HailoRpcActionID>(message.header.action_id);
         if (m_custom_callbacks.find(action_id_enum) != m_custom_callbacks.end()) {
-            auto status = m_custom_callbacks[action_id_enum](MemoryView(message.buffer), m_connection);
+            auto status = m_custom_callbacks[action_id_enum](MemoryView(message.buffer->data(), message.header.size), m_connection);
             CHECK_SUCCESS(status);
             continue;
         }
 
-        std::function<void(hailo_status, Buffer&&)> reply_received_callback = nullptr;
+        std::function<void(hailo_status, rpc_message_t)> reply_received_callback = nullptr;
         {
             std::unique_lock<std::mutex> lock(m_replies_mutex);
             m_replies_cv.wait(lock, [this, &message] () {
@@ -104,47 +91,66 @@ hailo_status Client::message_loop()
             m_replies_callbacks.erase(message.header.message_id);
         }
 
-        reply_received_callback(HAILO_SUCCESS, std::move(message.buffer));
+        reply_received_callback(HAILO_SUCCESS, std::move(message));
     }
 
     return HAILO_SUCCESS;
 }
 
-Expected<Buffer> Client::execute_request(HailoRpcActionID action_id, const MemoryView &request,
-    std::function<hailo_status(RpcConnection)> additional_writes_lambda)
+SyncRequest::SyncRequest(Client &client, std::mutex &sync_mutex, std::condition_variable &sync_cv)
+    : m_client(client), m_sync_mutex(sync_mutex), m_sync_cv(sync_cv), m_transfer_status(HAILO_UNINITIALIZED), m_out_reply({}) {}
+
+Expected<rpc_message_t> SyncRequest::execute(HailoRpcActionID action_id, const MemoryView &request,
+    std::vector<TransferBuffer> &&additional_buffers)
 {
-    auto status = wait_for_execute_request_ready(request, get_request_timeout());
+    auto status = m_client.wait_for_execute_request_ready(request, get_request_timeout(REQUEST_TIMEOUT));
     CHECK_SUCCESS(status);
 
-    hailo_status transfer_status = HAILO_UNINITIALIZED;
-    Buffer out_reply;
     auto request_sent_callback = [] (hailo_status status) {
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to send request, status = {}", status);
         }
     };
-    auto reply_received_callback = [&] (hailo_status status, Buffer &&reply) {
+    auto reply_received_callback = [this] (hailo_status status, rpc_message_t reply) {
         {
             std::unique_lock<std::mutex> lock(m_sync_mutex);
             assert(status != HAILO_UNINITIALIZED);
-            transfer_status = status;
-            out_reply = std::move(reply);
+            m_transfer_status = status;
+
+            if (HAILO_SUCCESS == status) {
+                m_out_reply = std::move(reply);
+            }
         }
         m_sync_cv.notify_one();
     };
-    status = execute_request_async(action_id, request, request_sent_callback,
-        reply_received_callback, additional_writes_lambda);
+    status = m_client.execute_request_async(action_id, request, request_sent_callback,
+        reply_received_callback, std::move(additional_buffers));
     if (HAILO_COMMUNICATION_CLOSED == status) {
         return make_unexpected(status);
     }
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     std::unique_lock<std::mutex> lock(m_sync_mutex);
-    CHECK_AS_EXPECTED(m_sync_cv.wait_for(lock, get_request_timeout(), [&] { return transfer_status != HAILO_UNINITIALIZED; }),
+    CHECK_AS_EXPECTED(m_sync_cv.wait_for(lock, get_request_timeout(REQUEST_TIMEOUT), [this] { return m_transfer_status != HAILO_UNINITIALIZED; }),
         HAILO_TIMEOUT, "Timeout waiting for transfer completion");
-    CHECK_SUCCESS(transfer_status);
+    CHECK_SUCCESS(m_transfer_status);
 
-    return out_reply;
+    auto copy = m_out_reply;
+    m_transfer_status = HAILO_UNINITIALIZED;
+    m_out_reply = {};
+    return copy;
+}
+
+Expected<rpc_message_t> Client::execute_request(HailoRpcActionID action_id, const MemoryView &request,
+    std::vector<TransferBuffer> &&additional_buffers)
+{
+    TRY(auto sync_request, m_sync_requests_pool->acquire());
+    TRY(auto reply, sync_request->execute(action_id, request, std::move(additional_buffers)));
+
+    auto status = m_sync_requests_pool->return_to_pool(sync_request);
+    CHECK_SUCCESS(status);
+
+    return reply;
 }
 
 hailo_status Client::wait_for_execute_request_ready(const MemoryView &request, std::chrono::milliseconds timeout)
@@ -154,8 +160,8 @@ hailo_status Client::wait_for_execute_request_ready(const MemoryView &request, s
 
 hailo_status Client::execute_request_async(HailoRpcActionID action_id, const MemoryView &request,
     std::function<void(hailo_status)> request_sent_callback,
-    std::function<void(hailo_status, Buffer&&)> reply_received_callback,
-    std::function<hailo_status(RpcConnection)> additional_writes_lambda)
+    std::function<void(hailo_status, rpc_message_t)> reply_received_callback,
+    std::vector<TransferBuffer> &&additional_buffers)
 {
     rpc_message_header_t header;
     {
@@ -164,13 +170,16 @@ hailo_status Client::execute_request_async(HailoRpcActionID action_id, const Mem
         header.message_id = m_messages_sent++;
         header.action_id = static_cast<uint32_t>(action_id);
 
-        auto status = m_connection.write_message_async(header, request, std::move(request_sent_callback));
-        CHECK_SUCCESS(status);
-
-        if (additional_writes_lambda) {
-            status = additional_writes_lambda(m_connection);
-            CHECK_SUCCESS(status);
+        TransferRequest transfer_request;
+        transfer_request.callback = std::move(request_sent_callback);
+        if (request.size() > 0) {
+            transfer_request.transfer_buffers.emplace_back(request);
         }
+        transfer_request.transfer_buffers.insert(transfer_request.transfer_buffers.end(),
+            additional_buffers.begin(), additional_buffers.end());
+
+        auto status = m_connection.write_message_async(header, std::move(transfer_request));
+        CHECK_SUCCESS(status);
     }
 
     {
@@ -188,4 +197,14 @@ void Client::register_custom_reply(HailoRpcActionID action_id,
     m_custom_callbacks[action_id] = callback;
 }
 
+Expected<BufferPtr> Client::allocate_request_buffer()
+{
+    return m_pool_allocator->allocate();
+}
+
+std::shared_ptr<ClientCallbackDispatcherManager> Client::callback_dispatcher_manager()
+{
+    return m_callback_dispatcher_manager;
+}
+
 } // namespace hailort
diff --git a/hailort/hrpc/client.hpp b/hailort/hrpc/client.hpp
index c6c141f..adacb07 100644
--- a/hailort/hrpc/client.hpp
+++ b/hailort/hrpc/client.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file client.hpp
  * @brief RPC Client Header
@@ -18,6 +18,9 @@
 #include "rpc_connection.hpp"
 #include "hrpc_protocol/serializer.hpp"
 #include "hrpc/connection_context.hpp"
+#include "utils/pool_allocator.hpp"
+#include "vdma/channel/transfer_common.hpp"
+#include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
 
 namespace hailort
 {
@@ -28,19 +31,20 @@ constexpr std::chrono::milliseconds REQUEST_TIMEOUT(std::chrono::seconds(10));
 constexpr std::chrono::milliseconds REQUEST_TIMEOUT(std::chrono::seconds(5000));
 #endif /* ifndef HAILO_EMULATOR */
 
-class ResultEvent
+class Client;
+class SyncRequest
 {
 public:
-    static Expected<std::shared_ptr<ResultEvent>> create_shared();
-    ResultEvent(EventPtr event);
-
-    Buffer &&release();
-    hailo_status signal(Buffer &&value);
-    hailo_status wait(std::chrono::milliseconds timeout);
+    SyncRequest(Client &client, std::mutex &sync_mutex, std::condition_variable &sync_cv);
+    Expected<rpc_message_t> execute(HailoRpcActionID action_id, const MemoryView &request,
+        std::vector<TransferBuffer> &&additional_buffers = {});
 
 private:
-    Buffer m_value;
-    EventPtr m_event;
+    Client &m_client;
+    std::mutex &m_sync_mutex;
+    std::condition_variable &m_sync_cv;
+    hailo_status m_transfer_status;
+    rpc_message_t m_out_reply;
 };
 
 class Client
@@ -50,15 +54,18 @@ public:
     ~Client();
 
     hailo_status connect();
-    Expected<Buffer> execute_request(HailoRpcActionID action_id, const MemoryView &request,
-        std::function<hailo_status(RpcConnection)> additional_writes_lambda = nullptr);
+    Expected<rpc_message_t> execute_request(HailoRpcActionID action_id, const MemoryView &request,
+        std::vector<TransferBuffer> &&additional_buffers = {});
     hailo_status wait_for_execute_request_ready(const MemoryView &request, std::chrono::milliseconds timeout);
     hailo_status execute_request_async(HailoRpcActionID action_id, const MemoryView &request,
         std::function<void(hailo_status)> request_sent_callback,
-        std::function<void(hailo_status, Buffer&&)> reply_received_callback,
-        std::function<hailo_status(RpcConnection)> additional_writes_lambda = nullptr);
+        std::function<void(hailo_status, rpc_message_t)> reply_received_callback,
+        std::vector<TransferBuffer> &&additional_buffers = {});
     void register_custom_reply(HailoRpcActionID action_id, std::function<hailo_status(const MemoryView&, RpcConnection connection)> callback);
     std::shared_ptr<HailoRTDriver> get_driver() { return m_conn_context->get_driver(); };
+    const std::string &device_id() const { return m_device_id; }
+    Expected<BufferPtr> allocate_request_buffer();
+    std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager();
 
 protected:
     hailo_status message_loop();
@@ -68,7 +75,7 @@ protected:
     std::shared_ptr<ConnectionContext> m_conn_context;
     RpcConnection m_connection;
     std::thread m_thread;
-    std::unordered_map<uint32_t, std::function<void(hailo_status, Buffer&&)>> m_replies_callbacks;
+    std::unordered_map<uint32_t, std::function<void(hailo_status, rpc_message_t)>> m_replies_callbacks;
     std::unordered_map<HailoRpcActionID, std::function<hailo_status(const MemoryView&, RpcConnection)>> m_custom_callbacks;
     uint32_t m_messages_sent;
     std::mutex m_write_mutex;
@@ -76,6 +83,9 @@ protected:
     std::mutex m_replies_mutex;
     std::mutex m_sync_mutex;
     std::condition_variable m_sync_cv;
+    std::shared_ptr<PoolAllocator> m_pool_allocator;
+    std::shared_ptr<ClientCallbackDispatcherManager> m_callback_dispatcher_manager;
+    std::shared_ptr<ObjectPool<SyncRequest>> m_sync_requests_pool;
 };
 
 } // namespace hailort
diff --git a/hailort/hrpc/connection_context.cpp b/hailort/hrpc/connection_context.cpp
index 5e475b7..549ea87 100644
--- a/hailort/hrpc/connection_context.cpp
+++ b/hailort/hrpc/connection_context.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file connection_context.cpp
  * @brief Connection Context
@@ -17,22 +17,6 @@
 namespace hailort
 {
 
-Expected<std::shared_ptr<ConnectionContext>> ConnectionContext::create_shared(const std::string &device_id)
-{
-    auto should_force_socket_com = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
-
-    // If forcing hrpc service, its because we work without EP driver -> use sockets
-    if (should_force_socket_com.has_value()) {
-        return OsConnectionContext::create_shared(false);
-    } else {
-        if (HailoRTDriver::is_pcie_ep_loaded()) {
-            return PcieConnectionContext::create_server_shared();
-        } else {
-            return PcieConnectionContext::create_client_shared(device_id);
-        }
-    }
-}
-
 Expected<std::shared_ptr<ConnectionContext>> ConnectionContext::create_client_shared(const std::string &device_id)
 {
     auto should_force_socket_com = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
diff --git a/hailort/hrpc/connection_context.hpp b/hailort/hrpc/connection_context.hpp
index 711b4f2..0559769 100644
--- a/hailort/hrpc/connection_context.hpp
+++ b/hailort/hrpc/connection_context.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file connection_context.hpp
  * @brief Connection Context - holds the driver instance
@@ -23,7 +23,6 @@ class ConnectionContext
 public:
     static Expected<std::shared_ptr<ConnectionContext>> create_client_shared(const std::string &device_id = "");
     static Expected<std::shared_ptr<ConnectionContext>> create_server_shared();
-    static Expected<std::shared_ptr<ConnectionContext>> create_shared(const std::string &device_id = "");
 
     bool is_accepting() const { return m_is_accepting; }
     virtual std::shared_ptr<HailoRTDriver> get_driver() { return nullptr; };
diff --git a/hailort/hrpc/hailo_session.cpp b/hailort/hrpc/hailo_session.cpp
index 818022a..882cb06 100644
--- a/hailort/hrpc/hailo_session.cpp
+++ b/hailort/hrpc/hailo_session.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file hailo_session.cpp
  * @brief Hailo Session
@@ -13,13 +13,15 @@
 #include "hrpc/raw_connection_internal/socket/hailo_session_internal.hpp"
 #include "common/internal_env_vars.hpp"
 #include "connection_context.hpp"
+#include "vdma/channel/transfer_common.hpp"
 
 namespace hailort
 {
 
 Expected<std::shared_ptr<SessionListener>> SessionListener::create_shared(uint16_t port, const std::string &device_id)
 {
-    TRY(auto context, ConnectionContext::create_shared(device_id));
+    (void)device_id; // Choosing the first device
+    TRY(auto context, ConnectionContext::create_server_shared());
     return SessionListener::create_shared(context, port);
 }
 
@@ -36,8 +38,7 @@ Expected<std::shared_ptr<SessionListener>> SessionListener::create_shared(std::s
 
 Expected<std::shared_ptr<Session>> Session::connect(uint16_t port, const std::string &device_id)
 {
-    // Create according to ConnectionContext type
-    TRY(auto context, ConnectionContext::create_shared(device_id));
+    TRY(auto context, ConnectionContext::create_client_shared(device_id));
     auto os_connection_context = std::dynamic_pointer_cast<OsConnectionContext>(context);
     if (os_connection_context != nullptr) {
         return OsSession::connect(os_connection_context, port);
@@ -46,6 +47,16 @@ Expected<std::shared_ptr<Session>> Session::connect(uint16_t port, const std::st
     }
 }
 
+hailo_status Session::write_async(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+{
+    return write_async(to_request(const_cast<uint8_t *>(buffer), size, std::move(callback)));
+}
+
+hailo_status Session::read_async(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+{
+    return read_async(to_request(buffer, size, std::move(callback)));
+}
+
 Expected<std::shared_ptr<Session>> Session::connect(std::shared_ptr<ConnectionContext> context, uint16_t port)
 {
     // Create according to ConnectionContext type
diff --git a/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.cpp b/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.cpp
index b6dbd72..eef3cfc 100644
--- a/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.cpp
+++ b/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file hailo_session_internal.cpp
  * @brief PCIE Hailo Session
@@ -13,6 +13,7 @@
 #include "common/internal_env_vars.hpp"
 #include "hailo/hailort.h"
 #include "vdma/driver/hailort_driver.hpp"
+#include "utils/buffer_storage.hpp"
 
 #define TRANSFER_TIMEOUT (std::chrono::seconds(10))
 
@@ -45,28 +46,6 @@ Expected<std::shared_ptr<ConnectionContext>> PcieConnectionContext::create_serve
     return std::dynamic_pointer_cast<ConnectionContext>(ptr);
 }
 
-hailo_status PcieConnectionContext::wait_for_available_connection()
-{
-    std::unique_lock<std::mutex> lock(m_mutex);
-    bool was_successful = m_cv.wait_for(lock, std::chrono::milliseconds(HAILO_INFINITE), [this] () -> bool {
-        return (m_conn_count == 0);
-    });
-    CHECK(was_successful, HAILO_TIMEOUT, "Got timeout in accept");
-
-    m_conn_count++;
-    return HAILO_SUCCESS;
-}
-
-void PcieConnectionContext::mark_connection_closed()
-{
-    if (0 == m_conn_count) return; // In case number of connections is 0 - no need to mark as closed
-    {
-        std::unique_lock<std::mutex> lock(m_mutex);
-        m_conn_count--;
-    }
-    m_cv.notify_one();
-}
-
 Expected<std::shared_ptr<RawPcieListener>> RawPcieListener::create_shared(std::shared_ptr<PcieConnectionContext> context, uint16_t port)
 {
     auto ptr = make_shared_nothrow<RawPcieListener>(context, port);
@@ -77,14 +56,11 @@ Expected<std::shared_ptr<RawPcieListener>> RawPcieListener::create_shared(std::s
 
 Expected<std::shared_ptr<Session>> RawPcieListener::accept()
 {
-    auto status = m_context->wait_for_available_connection();
-    CHECK_SUCCESS(status);
-
     auto new_conn = make_shared_nothrow<RawPcieSession>(m_context);
     CHECK_NOT_NULL_AS_EXPECTED(new_conn, HAILO_OUT_OF_HOST_MEMORY);
 
     TRY(auto session, PcieSession::accept(m_context->get_driver(), m_port));
-    status = new_conn->set_session(std::move(session));
+    auto status = new_conn->set_session(std::move(session));
     CHECK_SUCCESS(status);
 
     return std::dynamic_pointer_cast<Session>(new_conn);
@@ -166,8 +142,6 @@ hailo_status RawPcieSession::close()
         CHECK_SUCCESS(status);
     }
 
-    m_context->mark_connection_closed();
-
     {
         std::unique_lock<std::mutex> lock(m_ongoing_writes_mutex);
         m_ongoing_writes = 0;
@@ -192,39 +166,22 @@ hailo_status RawPcieSession::wait_for_write_async_ready(size_t transfer_size, st
     return HAILO_SUCCESS;
 }
 
-hailo_status RawPcieSession::write_async(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+hailo_status RawPcieSession::write_async(TransferRequest &&request)
 {
-    if (0 == size) {
-        callback(HAILO_SUCCESS);
-        return HAILO_SUCCESS;
-    }
-
-    bool is_aligned = ((reinterpret_cast<uintptr_t>(buffer) % OsUtils::get_dma_able_alignment()) == 0);
-    if (is_aligned) {
-        auto status = write_async_aligned(buffer, size, std::move(callback));
-        CHECK_SUCCESS(status);
-    } else {
-        auto status = write_async_unaligned(buffer, size, std::move(callback));
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status RawPcieSession::write_async_aligned(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
-{
-    std::unique_lock<std::mutex> lock(m_ongoing_writes_mutex);
-    auto status = m_session->write_async(buffer, size, [this, callback] (hailo_status status) {
+    request.callback = [this, original_callback=request.callback] (hailo_status status) {
         if (HAILO_STREAM_ABORT == status) {
-            callback(HAILO_COMMUNICATION_CLOSED);
+            original_callback(HAILO_COMMUNICATION_CLOSED);
             return;
         }
-        callback(status);
+        original_callback(status);
 
         std::unique_lock<std::mutex> lock(m_ongoing_writes_mutex);
         m_ongoing_writes--;
         m_ongoing_writes_cv.notify_all();
-    });
+    };
+
+    std::unique_lock<std::mutex> lock(m_ongoing_writes_mutex);
+    auto status = m_session->write_async(std::move(request));
     if (HAILO_STREAM_ABORT == status) {
         return HAILO_COMMUNICATION_CLOSED;
     }
@@ -234,22 +191,6 @@ hailo_status RawPcieSession::write_async_aligned(const uint8_t *buffer, size_t s
     return HAILO_SUCCESS;
 }
 
-hailo_status RawPcieSession::write_async_unaligned(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
-{
-    TRY(auto aligned_buffer, Buffer::create_shared(buffer, size, BufferStorageParams::create_dma()));
-    auto status = write_async_aligned(aligned_buffer->data(), aligned_buffer->size(),
-    [callback, aligned_buffer] (hailo_status status) {
-        (void)aligned_buffer; // Avoid compiler optimization
-        callback(status);
-    });
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return HAILO_COMMUNICATION_CLOSED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
 hailo_status RawPcieSession::wait_for_read_async_ready(size_t transfer_size, std::chrono::milliseconds timeout)
 {
     std::unique_lock<std::mutex> lock(m_ongoing_reads_mutex);
@@ -259,46 +200,23 @@ hailo_status RawPcieSession::wait_for_read_async_ready(size_t transfer_size, std
     return HAILO_SUCCESS;
 }
 
-hailo_status RawPcieSession::read_async(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+hailo_status RawPcieSession::read_async(TransferRequest &&request)
 {
-    if (0 == size) {
-        callback(HAILO_SUCCESS);
-        return HAILO_SUCCESS;
-    }
-
-    bool is_aligned = ((reinterpret_cast<uintptr_t>(buffer) % OsUtils::get_dma_able_alignment()) == 0);
-    if (is_aligned) {
-        auto status = read_async_aligned(buffer, size, std::move(callback));
-        if (HAILO_COMMUNICATION_CLOSED == status) {
-            return HAILO_COMMUNICATION_CLOSED;
-        }
-        CHECK_SUCCESS(status);
-    } else {
-        auto status = read_async_unaligned(buffer, size, std::move(callback));
-        if (HAILO_COMMUNICATION_CLOSED == status) {
-            return HAILO_COMMUNICATION_CLOSED;
-        }
-        CHECK_SUCCESS(status);
-    }
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status RawPcieSession::read_async_aligned(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
-{
-    std::unique_lock<std::mutex> lock(m_ongoing_reads_mutex);
-    auto status = m_session->read_async(buffer, size, [this, callback] (hailo_status status) {
+    request.callback = [this, original_callback=request.callback] (hailo_status status) {
         if (HAILO_STREAM_ABORT == status) {
-            callback(HAILO_COMMUNICATION_CLOSED);
+            original_callback(HAILO_COMMUNICATION_CLOSED);
             return;
         }
-        callback(status);
+        original_callback(status);
 
         std::unique_lock<std::mutex> lock(m_ongoing_reads_mutex);
         m_ongoing_reads--;
         m_ongoing_reads_cv.notify_all();
-    });
-    if ((HAILO_STREAM_ABORT == status) || (HAILO_STREAM_NOT_ACTIVATED == status)) {
+    };
+
+    std::unique_lock<std::mutex> lock(m_ongoing_reads_mutex);
+    auto status = m_session->read_async(std::move(request));
+    if (HAILO_STREAM_ABORT == status) {
         return HAILO_COMMUNICATION_CLOSED;
     }
     CHECK_SUCCESS(status);
@@ -307,24 +225,6 @@ hailo_status RawPcieSession::read_async_aligned(uint8_t *buffer, size_t size, st
     return HAILO_SUCCESS;
 }
 
-hailo_status RawPcieSession::read_async_unaligned(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
-{
-    TRY(auto aligned_buffer, Buffer::create_shared(size, BufferStorageParams::create_dma()));
-    auto status = read_async_aligned(aligned_buffer->data(), aligned_buffer->size(),
-    [buffer, aligned_buffer, callback] (hailo_status status) {
-        if (HAILO_SUCCESS == status) {
-            memcpy(buffer, aligned_buffer->data(), aligned_buffer->size());
-        }
-        callback(status);
-    });
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return HAILO_COMMUNICATION_CLOSED;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
 hailo_status RawPcieSession::set_session(PcieSession &&session)
 {
     m_session = make_shared_nothrow<PcieSession>(std::move(session));
@@ -333,4 +233,18 @@ hailo_status RawPcieSession::set_session(PcieSession &&session)
     return HAILO_SUCCESS;
 }
 
+Expected<Buffer> RawPcieSession::allocate_buffer(size_t size, hailo_dma_buffer_direction_t direction)
+{
+    TRY(auto buffer, Buffer::create(size, BufferStorageParams::create_dma()));
+
+    TRY(auto dmaable, vdma::DmaAbleBuffer::create_from_user_address(buffer.data(), buffer.size()));
+    TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared(dmaable, *m_context->get_driver(),
+        to_hailo_driver_direction(direction)));
+
+    auto dma_mapped_buffer_storage = make_shared_nothrow<DmaMappedBufferStorage>(std::move(buffer), mapped_buffer);
+    CHECK_NOT_NULL(dma_mapped_buffer_storage, HAILO_OUT_OF_HOST_MEMORY);
+
+    return Buffer::create(dma_mapped_buffer_storage, false);
+}
+
 } // namespace hailort
\ No newline at end of file
diff --git a/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.hpp b/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.hpp
index 7b1d48c..c80f2c8 100644
--- a/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.hpp
+++ b/hailort/hrpc/raw_connection_internal/pcie/hailo_session_internal.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file hailo_session_internal.hpp
  * @brief Hailo Session Header for pcie based comunication
@@ -28,20 +28,14 @@ public:
     static Expected<std::shared_ptr<ConnectionContext>> create_server_shared();
 
     PcieConnectionContext(std::shared_ptr<HailoRTDriver> &&driver, bool is_accepting)
-        : ConnectionContext(is_accepting), m_driver(std::move(driver)), m_conn_count(0) {}
+        : ConnectionContext(is_accepting), m_driver(std::move(driver)) {}
 
     virtual ~PcieConnectionContext() = default;
 
     virtual std::shared_ptr<HailoRTDriver> get_driver() override { return m_driver; }
 
-    hailo_status wait_for_available_connection();
-    void mark_connection_closed();
-
 private:
     std::shared_ptr<HailoRTDriver> m_driver;
-    uint32_t m_conn_count;
-    std::mutex m_mutex;
-    std::condition_variable m_cv;
 };
 
 class RawPcieSession : public Session
@@ -59,12 +53,14 @@ public:
     virtual hailo_status close() override;
 
     virtual hailo_status wait_for_write_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
-    virtual hailo_status write_async(const uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) override;
+    using Session::write_async;
+    virtual hailo_status write_async(TransferRequest &&request) override;
 
     virtual hailo_status wait_for_read_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
-    virtual hailo_status read_async(uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) override;
+    using Session::read_async;
+    virtual hailo_status read_async(TransferRequest &&request) override;
+
+    virtual Expected<Buffer> allocate_buffer(size_t size, hailo_dma_buffer_direction_t direction) override;
 
     explicit RawPcieSession(std::shared_ptr<PcieConnectionContext> context) : m_context(context), m_ongoing_writes(0),
         m_ongoing_reads(0) {}
@@ -73,10 +69,6 @@ public:
     hailo_status connect(uint16_t port);
 
 private:
-    hailo_status write_async_aligned(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback);
-    hailo_status write_async_unaligned(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback);
-    hailo_status read_async_aligned(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback);
-    hailo_status read_async_unaligned(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback);
 
     std::mutex m_read_mutex;
     std::condition_variable m_read_cv;
diff --git a/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.cpp b/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.cpp
index 07daf2e..2b3da51 100644
--- a/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.cpp
+++ b/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file hailo_session_internal.cpp
  * @brief Linux Sockets Hailo Session
@@ -12,7 +12,7 @@
 #include "common/utils.hpp"
 #include "common/internal_env_vars.hpp"
 #include "common/filesystem.hpp"
-#include "hailo/hailort.h"
+#include "vdma/channel/transfer_common.hpp"
 
 #include <string>
 
@@ -149,8 +149,8 @@ Expected<std::shared_ptr<SessionListener>> OsListener::create_shared(std::shared
     if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
         TRY(ptr, create_localhost_server(context, port));
     } else {
-        TRY(auto ip_port_pair, OsSession::parse_ip_port(force_socket_com_value.value()));
-        TRY(ptr, create_by_addr_server(context, std::get<0>(ip_port_pair), std::get<1>(ip_port_pair)));
+        auto ip = force_socket_com_value.value();
+        TRY(ptr, create_by_addr_server(context, ip, port));
     }
 
     return ptr;
@@ -169,7 +169,7 @@ Expected<std::shared_ptr<OsListener>> OsListener::create_by_addr_server(std::sha
     server_addr.sin_port = htons(port);
     auto status = socket.pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
     CHECK_SUCCESS_AS_EXPECTED(status,
-        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_SERVER' is set correctly (ip:port)");
+        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_SERVER' is set correctly <ip>)");
 
     status = socket.socket_bind((struct sockaddr*)&server_addr, addr_len);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -211,18 +211,6 @@ OsSession::~OsSession()
     close();
 }
 
-Expected<std::pair<std::string, uint16_t>> OsSession::parse_ip_port(const std::string &ip_port)
-{
-    std::istringstream ss(ip_port);
-    std::string ip;
-    uint16_t port;
-
-    if (std::getline(ss, ip, ':') && (ss >> port)) {
-        return std::make_pair(ip, port);
-    }
-    CHECK_AS_EXPECTED(false, HAILO_INVALID_ARGUMENT ,"Failed to parse ip and port. Format should be as follows: 'X.X.X.X:PP' (e.g. 127.0.0.1:2000)");
-}
-
 Expected<std::shared_ptr<OsSession>> OsSession::connect(std::shared_ptr<OsConnectionContext> context, uint16_t port)
 {
     (void)port;
@@ -235,8 +223,8 @@ Expected<std::shared_ptr<OsSession>> OsSession::connect(std::shared_ptr<OsConnec
     if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
         TRY(ptr, create_localhost_client(context, port));
     } else {
-        TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
-        TRY(ptr, create_by_addr_client(context, std::get<0>(ip_port_pair), std::get<1>(ip_port_pair)));
+        auto ip = force_socket_com_value.value();
+        TRY(ptr, create_by_addr_client(context, ip, port));
     }
     auto status = ptr->connect();
     CHECK_SUCCESS(status);
@@ -266,7 +254,7 @@ Expected<std::shared_ptr<OsSession>> OsSession::create_by_addr_client(std::share
     server_addr.sin_port = htons(port);
     auto status = socket.pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
     CHECK_SUCCESS_AS_EXPECTED(status,
-        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_CLIENT' is set correctly (ip:port)");
+        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_CLIENT' is set correctly <ip>");
 
     TRY(auto write_actions_thread, AsyncActionsThread::create(MAX_ONGOING_TRANSFERS));
     TRY(auto read_actions_thread, AsyncActionsThread::create(MAX_ONGOING_TRANSFERS));
@@ -284,8 +272,8 @@ hailo_status OsSession::connect()
         if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
             return connect_localhost();
         } else {
-            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
-            return connect_by_addr(std::get<0>(ip_port_pair), std::get<1>(ip_port_pair));
+            auto ip = force_socket_com_value.value();
+            return connect_by_addr(ip, m_port);
         }
     } else {
         auto force_socket_com_value = get_env_variable(HAILO_SOCKET_COM_ADDR_CLIENT_ENV_VAR);
@@ -293,8 +281,8 @@ hailo_status OsSession::connect()
         if (HAILO_SOCKET_COM_ADDR_UNIX_SOCKET == force_socket_com_value.value()) {
             return connect_localhost();
         } else {
-            TRY(auto ip_port_pair, parse_ip_port(force_socket_com_value.value()));
-            return connect_by_addr(std::get<0>(ip_port_pair), std::get<1>(ip_port_pair));
+            auto ip = force_socket_com_value.value();
+            return connect_by_addr(ip, m_port);
         }
     }
 }
@@ -332,7 +320,7 @@ hailo_status OsSession::connect_by_addr(const std::string &ip, uint16_t port)
     server_addr.sin_port = htons(port);
     auto status = m_socket.pton(AF_INET, ip.c_str(), &server_addr.sin_addr);
     CHECK_SUCCESS_AS_EXPECTED(status,
-        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_XX' is set correctly (ip:port)");
+        "Failed to run 'inet_pton'. make sure 'HAILO_SOCKET_COM_ADDR_XX' is set correctly <ip>");
     status = m_socket.connect((struct sockaddr*)&server_addr, addr_len);
     CHECK_SUCCESS(status);
 
@@ -413,20 +401,21 @@ hailo_status OsSession::wait_for_write_async_ready(size_t /*transfer_size*/, std
     return m_write_actions_thread->wait_for_enqueue_ready(timeout);
 }
 
-hailo_status OsSession::write_async(const uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+hailo_status OsSession::write_async(TransferRequest &&request)
 {
-    auto status = m_write_actions_thread->enqueue_nonblocking({[this, buffer, size] (bool is_aborted) -> hailo_status {
+    return m_write_actions_thread->enqueue_nonblocking({[this, buffers=std::move(request.transfer_buffers)] (bool is_aborted) -> hailo_status {
         if (is_aborted) {
-            return HAILO_STREAM_ABORTED_BY_USER;
+            return HAILO_STREAM_ABORT;
         }
 
-        auto status = m_socket.sendall(buffer, size, MSG_NOSIGNAL);
-        CHECK(status == HAILO_SUCCESS || status == HAILO_COMMUNICATION_CLOSED, status);
-        return status;
-    }, callback});
-    CHECK_SUCCESS(status);
+        for (auto transfer_buffer : buffers) {
+            TRY(auto buffer, transfer_buffer.base_buffer());
+            auto status = m_socket.sendall(buffer.data(), buffer.size(), MSG_NOSIGNAL);
+            CHECK_SUCCESS(status);
+        }
 
-    return HAILO_SUCCESS;
+        return HAILO_SUCCESS;
+    }, request.callback});
 }
 
 hailo_status OsSession::wait_for_read_async_ready(size_t /*transfer_size*/, std::chrono::milliseconds timeout)
@@ -434,21 +423,29 @@ hailo_status OsSession::wait_for_read_async_ready(size_t /*transfer_size*/, std:
     return m_read_actions_thread->wait_for_enqueue_ready(timeout);
 }
 
-hailo_status OsSession::read_async(uint8_t *buffer, size_t size, std::function<void(hailo_status)> &&callback)
+hailo_status OsSession::read_async(TransferRequest &&request)
 {
-    auto status = m_read_actions_thread->enqueue_nonblocking({[this, buffer, size] (bool is_aborted) -> hailo_status {
+    return m_read_actions_thread->enqueue_nonblocking({[this, buffers=std::move(request.transfer_buffers)] (bool is_aborted) -> hailo_status {
         if (is_aborted) {
-            return HAILO_STREAM_ABORTED_BY_USER;
+            return HAILO_STREAM_ABORT;
         }
 
-        auto status = m_socket.recvall(buffer, size);
-        CHECK(status == HAILO_SUCCESS || status == HAILO_COMMUNICATION_CLOSED, status);
-        return status;
-    }, callback});
+        for (auto transfer_buffer : buffers) {
+            TRY(auto buffer, transfer_buffer.base_buffer());
+            auto status = m_socket.recvall(buffer.data(), buffer.size());
+            if (HAILO_COMMUNICATION_CLOSED == status) {
+                return status;
+            }
+            CHECK_SUCCESS(status);
+        }
 
-    CHECK_SUCCESS(status);
+        return HAILO_SUCCESS;
+    }, request.callback});
+}
 
-    return HAILO_SUCCESS;   
+Expected<Buffer> OsSession::allocate_buffer(size_t size, hailo_dma_buffer_direction_t)
+{
+    return Buffer::create(size);
 }
 
 } // namespace hailort
\ No newline at end of file
diff --git a/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.hpp b/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.hpp
index a066094..48a8b34 100644
--- a/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.hpp
+++ b/hailort/hrpc/raw_connection_internal/socket/hailo_session_internal.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file hailo_session_internal.hpp
  * @brief Hailo Session Header for sockets based comunication
@@ -100,12 +100,14 @@ public:
     virtual hailo_status close() override;
 
     virtual hailo_status wait_for_write_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
-    virtual hailo_status write_async(const uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) override;
+    using Session::write_async;
+    virtual hailo_status write_async(TransferRequest &&request) override;
 
     virtual hailo_status wait_for_read_async_ready(size_t transfer_size, std::chrono::milliseconds timeout) override;
-    virtual hailo_status read_async(uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) override;
+    using Session::read_async;
+    virtual hailo_status read_async(TransferRequest &&request) override;
+
+    virtual Expected<Buffer> allocate_buffer(size_t size, hailo_dma_buffer_direction_t direction) override;
 
     OsSession(Socket &&socket, std::shared_ptr<OsConnectionContext> context,
         std::shared_ptr<AsyncActionsThread> write_actions_thread,
@@ -115,7 +117,6 @@ public:
             m_read_actions_thread(read_actions_thread) {}
 
     static Expected<sockaddr_un> get_localhost_server_addr();
-    static Expected<std::pair<std::string, uint16_t>> parse_ip_port(const std::string &ip_port);
     hailo_status connect();
 
 private:
diff --git a/hailort/hrpc/rpc_connection.cpp b/hailort/hrpc/rpc_connection.cpp
index 5a5bb20..c8fe518 100644
--- a/hailort/hrpc/rpc_connection.cpp
+++ b/hailort/hrpc/rpc_connection.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file rpc_connection.cpp
  * @brief RPC connection implementation
@@ -10,70 +10,45 @@
 #include "rpc_connection.hpp"
 #include "vdma/pcie_session.hpp"
 
+#include <numeric>
+
 namespace hailort
 {
 
-#define TRANSFER_TIMEOUT std::chrono::seconds(10)
+constexpr std::chrono::seconds TRANSFER_TIMEOUT(10);
+constexpr size_t READ_RPC_BUFFER_MAX_SIZE(2048);
+constexpr size_t MAX_READ_TRANSFERS(2);
 
-Expected<RpcConnection> RpcConnection::create(std::shared_ptr<Session> raw)
+Expected<RpcConnection::Params> RpcConnection::Params::create(std::shared_ptr<Session> raw)
 {
-    TRY(auto shutdown_event, Event::create_shared(Event::State::not_signalled));
-    TRY(auto write_rpc_headers, DmaAbleBufferPool::create_shared(sizeof(rpc_message_header_t),
-        PcieSession::MAX_ONGOING_TRANSFERS, shutdown_event));
-    TRY(auto read_rpc_headers, DmaAbleBufferPool::create_shared(sizeof(rpc_message_header_t),
-        PcieSession::MAX_ONGOING_TRANSFERS, shutdown_event));
+    auto create_dma_allocator = [raw](size_t pool_size, size_t buffer_size, hailo_dma_buffer_direction_t direction) {
+        return PoolAllocator::create_shared(pool_size, buffer_size, [raw, direction](size_t size) {
+            return raw->allocate_buffer(size, direction);
+        });
+    };
+
+    TRY(auto write_rpc_headers_allocator, create_dma_allocator(PcieSession::MAX_ONGOING_TRANSFERS, sizeof(rpc_message_header_t),
+        HAILO_DMA_BUFFER_DIRECTION_H2D));
+    TRY(auto read_rpc_headers_allocator, create_dma_allocator(MAX_READ_TRANSFERS, sizeof(rpc_message_header_t), HAILO_DMA_BUFFER_DIRECTION_D2H));
+    TRY(auto read_rpc_body_allocator, create_dma_allocator(MAX_READ_TRANSFERS, READ_RPC_BUFFER_MAX_SIZE, HAILO_DMA_BUFFER_DIRECTION_D2H));
 
     auto read_mutex = make_shared_nothrow<std::mutex>();
     CHECK_NOT_NULL(read_mutex, HAILO_OUT_OF_HOST_MEMORY);
 
-    auto write_mutex = make_shared_nothrow<std::mutex>();
-    CHECK_NOT_NULL(write_mutex, HAILO_OUT_OF_HOST_MEMORY);
-
     auto read_cv = make_shared_nothrow<std::condition_variable>();
     CHECK_NOT_NULL(read_cv, HAILO_OUT_OF_HOST_MEMORY);
 
     auto write_cv = make_shared_nothrow<std::condition_variable>();
     CHECK_NOT_NULL(write_cv, HAILO_OUT_OF_HOST_MEMORY);
 
-    return RpcConnection(raw, write_rpc_headers, read_rpc_headers, shutdown_event, read_mutex, write_mutex, read_cv, write_cv);
-}
+    RpcConnection::Params params = { raw, write_rpc_headers_allocator, read_rpc_headers_allocator, read_rpc_body_allocator, read_mutex, read_cv };
 
-hailo_status RpcConnection::write_message(const rpc_message_header_t &header, const MemoryView &buffer)
-{
-    hailo_status transfer_status = HAILO_UNINITIALIZED;
-
-    auto status = wait_for_write_message_async_ready(buffer.size(), TRANSFER_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    status = write_message_async(header, buffer, [&] (hailo_status status) {
-        {
-            std::unique_lock<std::mutex> lock(*m_write_mutex);
-            assert(status != HAILO_UNINITIALIZED);
-            transfer_status = status;
-        }
-        m_write_cv->notify_one();
-    });
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    std::unique_lock<std::mutex> lock(*m_write_mutex);
-    CHECK(m_write_cv->wait_for(lock, TRANSFER_TIMEOUT, [&] { return transfer_status != HAILO_UNINITIALIZED; }),
-        HAILO_TIMEOUT, "Timeout waiting for transfer completion");
-
-    return transfer_status;
+    return params;
 }
 
 Expected<rpc_message_t> RpcConnection::read_message()
 {
-    auto expected_dma_header_ptr = m_read_rpc_headers->acquire_buffer();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == expected_dma_header_ptr.status()) {
-        return make_unexpected(HAILO_COMMUNICATION_CLOSED);
-    }
-    CHECK_EXPECTED(expected_dma_header_ptr);
-
-    auto dma_header_ptr = expected_dma_header_ptr.release();
+    TRY(auto dma_header_ptr, m_read_rpc_headers_allocator->allocate());
     rpc_message_header_t &dma_header = *reinterpret_cast<rpc_message_header_t*>(dma_header_ptr->data());
 
     auto status = m_session->read(reinterpret_cast<uint8_t*>(&dma_header), sizeof(dma_header));
@@ -83,66 +58,52 @@ Expected<rpc_message_t> RpcConnection::read_message()
     CHECK_SUCCESS(status);
     CHECK(RPC_MESSAGE_MAGIC == dma_header.magic, HAILO_INTERNAL_FAILURE, "Invalid magic! {} != {}",
         dma_header.magic, RPC_MESSAGE_MAGIC);
+    CHECK(dma_header.size <= READ_RPC_BUFFER_MAX_SIZE, HAILO_INTERNAL_FAILURE, "Invalid size! {} > {}",
+        dma_header.size, READ_RPC_BUFFER_MAX_SIZE);
 
-    TRY(auto buffer, Buffer::create(dma_header.size, BufferStorageParams::create_dma()));
-    status = m_session->read(buffer.data(), buffer.size());
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return make_unexpected(status);
+    TRY(auto buffer, m_read_rpc_body_allocator->allocate());
+    if (dma_header.size > 0) {
+        status = m_session->read(buffer->data(), dma_header.size);
+        if (HAILO_COMMUNICATION_CLOSED == status) {
+            return make_unexpected(status);
+        }
+        CHECK_SUCCESS(status);
     }
-    CHECK_SUCCESS(status);
 
     rpc_message_t rpc_message = {};
     rpc_message.header = dma_header;
     rpc_message.buffer = std::move(buffer);
 
-    status = m_read_rpc_headers->return_to_pool(dma_header_ptr);
-    CHECK_SUCCESS(status);
-
     return rpc_message;
 }
 
-hailo_status RpcConnection::write_buffer(const MemoryView &buffer)
-{
-    hailo_status transfer_status = HAILO_UNINITIALIZED;
-
-    auto status = wait_for_write_buffer_async_ready(buffer.size(), TRANSFER_TIMEOUT);
-    CHECK_SUCCESS(status);
-
-    status = write_buffer_async(buffer, [&] (hailo_status status) {
-        {
-            std::unique_lock<std::mutex> lock(*m_write_mutex);
-            assert(status != HAILO_UNINITIALIZED);
-            transfer_status = status;
-        }
-        m_write_cv->notify_one();
-    });
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
-
-    std::unique_lock<std::mutex> lock(*m_write_mutex);
-    CHECK(m_write_cv->wait_for(lock, TRANSFER_TIMEOUT, [&] { return transfer_status != HAILO_UNINITIALIZED; }),
-        HAILO_TIMEOUT, "Timeout waiting for transfer completion");
-
-    return transfer_status;
-}
-
 hailo_status RpcConnection::read_buffer(MemoryView buffer)
+{
+    return read_buffers({ TransferBuffer(buffer) });
+}
+
+hailo_status RpcConnection::read_buffers(std::vector<TransferBuffer> &&buffers)
 {
     hailo_status transfer_status = HAILO_UNINITIALIZED;
 
-    auto status = wait_for_read_buffer_async_ready(buffer.size(), TRANSFER_TIMEOUT);
+    const size_t total_size = std::accumulate(buffers.begin(), buffers.end(), size_t{0},
+        [] (size_t acc, const TransferBuffer &buffer) { return acc + buffer.size(); });
+
+    auto status = wait_for_read_buffer_async_ready(total_size, TRANSFER_TIMEOUT);
     CHECK_SUCCESS(status);
 
-    status = read_buffer_async(buffer, [&] (hailo_status status) {
+    TransferRequest transfer_request;
+    transfer_request.transfer_buffers = std::move(buffers);
+    transfer_request.callback = [&] (hailo_status status) {
         {
             std::unique_lock<std::mutex> lock(*m_read_mutex);
             assert(status != HAILO_UNINITIALIZED);
             transfer_status = status;
         }
         m_read_cv->notify_one();
-    });
+    };
+
+    status = m_session->read_async(std::move(transfer_request));
     if (HAILO_COMMUNICATION_CLOSED == status) {
         return status;
     }
@@ -163,40 +124,34 @@ hailo_status RpcConnection::wait_for_write_message_async_ready(size_t buffer_siz
 hailo_status RpcConnection::write_message_async(const rpc_message_header_t &header, const MemoryView &buffer,
     std::function<void(hailo_status)> &&callback)
 {
-    auto expected_dma_header_ptr = m_write_rpc_headers->acquire_buffer();
-    if (HAILO_SHUTDOWN_EVENT_SIGNALED == expected_dma_header_ptr.status()) {
-        return HAILO_COMMUNICATION_CLOSED;
+    TransferRequest transfer_request;
+    transfer_request.callback = std::move(callback);
+    if (buffer.size() > 0) {
+        transfer_request.transfer_buffers.emplace_back(buffer);
     }
-    CHECK_EXPECTED(expected_dma_header_ptr);
 
-    auto dma_header_ptr = expected_dma_header_ptr.release();
+    return write_message_async(header, std::move(transfer_request));
+}
+
+hailo_status RpcConnection::write_message_async(const rpc_message_header_t &header, TransferRequest &&transfer_request)
+{
+    TRY(auto dma_header_ptr, m_write_rpc_headers_allocator->allocate());
     rpc_message_header_t &dma_header = *reinterpret_cast<rpc_message_header_t*>(dma_header_ptr->data());
     memcpy(&dma_header, &header, sizeof(header));
-
     dma_header.magic = RPC_MESSAGE_MAGIC;
-    auto status = m_session->write_async(reinterpret_cast<const uint8_t*>(&dma_header), sizeof(dma_header),
-    [write_rpc_headers = m_write_rpc_headers, dma_header_ptr] (hailo_status status) {
-        if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed to write header, status = {}", status);
-        }
 
-        status = write_rpc_headers->return_to_pool(dma_header_ptr);
-        if (HAILO_SUCCESS != status) {
-            LOGGER__CRITICAL("Could not return buffer to pool! status = {}", status);
-        }
-    });
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
+    // Insert the dma_header before all other buffers
+    transfer_request.transfer_buffers.insert(transfer_request.transfer_buffers.begin(),
+        MemoryView(reinterpret_cast<uint8_t*>(&dma_header), sizeof(dma_header)));
 
-    status = m_session->write_async(buffer.data(), dma_header.size, std::move(callback));
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return status;
-    }
-    CHECK_SUCCESS(status);
+    // Callback should capture the dma_header_ptr
+    transfer_request.callback = [dma_header_ptr,
+                                 original_callback=transfer_request.callback](hailo_status status) mutable {
+        dma_header_ptr.reset();
+        original_callback(status);
+    };
 
-    return HAILO_SUCCESS;
+    return m_session->write_async(std::move(transfer_request));
 }
 
 hailo_status RpcConnection::wait_for_write_buffer_async_ready(size_t buffer_size, std::chrono::milliseconds timeout)
@@ -220,17 +175,6 @@ hailo_status RpcConnection::wait_for_read_buffer_async_ready(size_t buffer_size,
     return m_session->wait_for_read_async_ready(buffer_size, timeout);
 }
 
-hailo_status RpcConnection::read_buffer_async(MemoryView buffer, std::function<void(hailo_status)> &&callback)
-{
-    auto status = m_session->read_async(buffer.data(), buffer.size(), std::move(callback));
-    if (HAILO_COMMUNICATION_CLOSED == status) {
-        return make_unexpected(status);
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
 hailo_status RpcConnection::close()
 {
     hailo_status status = HAILO_UNINITIALIZED;
diff --git a/hailort/hrpc/rpc_connection.hpp b/hailort/hrpc/rpc_connection.hpp
index 297385c..ce1b6e0 100644
--- a/hailort/hrpc/rpc_connection.hpp
+++ b/hailort/hrpc/rpc_connection.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file rpc_connection.hpp
  * @brief RPC Connection Header
@@ -16,27 +16,19 @@
 #include "common/utils.hpp"
 #include "common/internal_env_vars.hpp"
 #include "common/buffer_pool.hpp"
+#include "utils/pool_allocator.hpp"
+#include "vdma/channel/transfer_common.hpp"
 
 #define RPC_MESSAGE_MAGIC (0x8A554432)
-#define DEFAULT_PCIE_PORT (12133)
+#define HAILORT_SERVER_PORT (12133)
 
 namespace hailort
 {
 
-// TODO: HRT-15413 - Remove Env var usage. Use the API to choose port, use defaults ports for each server.
-inline uint16_t get_pcie_port()
-{
-    auto port_str = get_env_variable(HAILO_CONNECTION_PCIE_PORT_ENV_VAR);
-    if (port_str) {
-        return static_cast<uint16_t>(std::stoi(port_str.value()));
-    }
-    return DEFAULT_PCIE_PORT;
-}
-
 #pragma pack(push, 1)
 struct rpc_message_header_t
 {
-    uint32_t magic; // TODO: consider removing. check if hurts performance
+    uint32_t magic;
     uint32_t size;
     uint32_t message_id;
     uint32_t action_id;
@@ -46,47 +38,59 @@ struct rpc_message_header_t
 struct rpc_message_t
 {
     rpc_message_header_t header;
-    Buffer buffer;
+    BufferPtr buffer;
 };
 
 class RpcConnection
 {
 public:
-    static Expected<RpcConnection> create(std::shared_ptr<Session> raw);
-    RpcConnection() = default;
-    explicit RpcConnection(std::shared_ptr<Session> raw, std::shared_ptr<DmaAbleBufferPool> write_rpc_headers,
-        std::shared_ptr<DmaAbleBufferPool> read_rpc_headers, EventPtr shutdown_event, std::shared_ptr<std::mutex> read_mutex,
-        std::shared_ptr<std::mutex> write_mutex, std::shared_ptr<std::condition_variable> read_cv, std::shared_ptr<std::condition_variable> write_cv) :
-            m_session(raw), m_write_rpc_headers(write_rpc_headers), m_read_rpc_headers(read_rpc_headers), m_shutdown_event(shutdown_event),
-            m_read_mutex(read_mutex), m_write_mutex(write_mutex), m_read_cv(read_cv), m_write_cv(write_cv) {}
+    struct Params
+    {
+    public:
+        static Expected<Params> create(std::shared_ptr<Session> session);
+
+        std::shared_ptr<Session> session;
+        std::shared_ptr<PoolAllocator> write_rpc_headers_allocator;
+        std::shared_ptr<PoolAllocator> read_rpc_headers_allocator;
+        std::shared_ptr<PoolAllocator> read_rpc_body_allocator;
+        std::shared_ptr<std::mutex> read_mutex;
+        std::shared_ptr<std::condition_variable> read_cv;
+    };
+
+    RpcConnection() = default;
+    RpcConnection(Params &&params) :
+            m_session(params.session), m_write_rpc_headers_allocator(params.write_rpc_headers_allocator),
+            m_read_rpc_headers_allocator(params.read_rpc_headers_allocator), m_read_rpc_body_allocator(params.read_rpc_body_allocator),
+            m_read_mutex(params.read_mutex),
+            m_read_cv(params.read_cv) {}
+    ~RpcConnection() = default;
 
-    hailo_status write_message(const rpc_message_header_t &header, const MemoryView &buffer);
     Expected<rpc_message_t> read_message();
 
-    hailo_status write_buffer(const MemoryView &buffer);
     hailo_status read_buffer(MemoryView buffer);
+    hailo_status read_buffers(std::vector<TransferBuffer> &&buffers);
 
     hailo_status wait_for_write_message_async_ready(size_t buffer_size, std::chrono::milliseconds timeout);
     hailo_status write_message_async(const rpc_message_header_t &header, const MemoryView &buffer,
         std::function<void(hailo_status)> &&callback);
 
+    hailo_status write_message_async(const rpc_message_header_t &header, TransferRequest &&transfer_request);
+
     hailo_status wait_for_write_buffer_async_ready(size_t buffer_size, std::chrono::milliseconds timeout);
     hailo_status write_buffer_async(const MemoryView &buffer, std::function<void(hailo_status)> &&callback);
 
     hailo_status wait_for_read_buffer_async_ready(size_t buffer_size, std::chrono::milliseconds timeout);
-    hailo_status read_buffer_async(MemoryView buffer, std::function<void(hailo_status)> &&callback);
 
     hailo_status close();
 
 private:
     std::shared_ptr<Session> m_session;
-    DmaAbleBufferPoolPtr m_write_rpc_headers;
-    DmaAbleBufferPoolPtr m_read_rpc_headers;
+    std::shared_ptr<PoolAllocator> m_write_rpc_headers_allocator;
+    std::shared_ptr<PoolAllocator> m_read_rpc_headers_allocator;
+    std::shared_ptr<PoolAllocator> m_read_rpc_body_allocator;
     EventPtr m_shutdown_event;
     std::shared_ptr<std::mutex> m_read_mutex;
-    std::shared_ptr<std::mutex> m_write_mutex;
     std::shared_ptr<std::condition_variable> m_read_cv;
-    std::shared_ptr<std::condition_variable> m_write_cv;
 };
 
 } // namespace hailort
diff --git a/hailort/hrpc/server.cpp b/hailort/hrpc/server.cpp
index 4ea41b0..a4aaacb 100644
--- a/hailort/hrpc/server.cpp
+++ b/hailort/hrpc/server.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file server.cpp
  * @brief RPC Server
@@ -12,30 +12,27 @@
 namespace hailort
 {
 
-ServerContext::ServerContext(Server &server, RpcConnection connection) :
-    m_server(server), m_connection(connection) {}
-
-hailo_status ServerContext::trigger_callback(uint32_t callback_id, hailo_status callback_status,
-    rpc_object_handle_t callback_owner_handle, std::function<hailo_status(RpcConnection)> additional_writes_lambda)
+Expected<ClientConnection> ClientConnection::create(std::shared_ptr<Session> session, uint32_t client_id)
 {
-    return m_server.trigger_callback(callback_id, callback_status, callback_owner_handle, m_connection, additional_writes_lambda);
+    TRY(auto conn_params, RpcConnection::Params::create(session));
+    return ClientConnection(std::move(conn_params), client_id);
 }
 
-RpcConnection &ServerContext::connection()
+uint32_t ClientConnection::client_id() const
 {
-    return m_connection;
+    return m_client_id;
 }
 
 void Dispatcher::register_action(HailoRpcActionID action_id,
-    std::function<Expected<Buffer>(const MemoryView&, ServerContextPtr)> action)
+    std::function<Expected<Buffer>(const MemoryView&, ClientConnection)> action)
 {
     m_actions[action_id] = action;
 }
 
-Expected<Buffer> Dispatcher::call_action(HailoRpcActionID action_id, const MemoryView &request, ServerContextPtr server_context)
+Expected<Buffer> Dispatcher::call_action(HailoRpcActionID action_id, const MemoryView &request, ClientConnection client_connection)
 {
     if (m_actions.find(action_id) != m_actions.end()) {
-        return m_actions[action_id](request, server_context);
+        return m_actions[action_id](request, client_connection);
     }
     LOGGER__ERROR("Failed to find RPC action {}", static_cast<int>(action_id));
     return make_unexpected(HAILO_RPC_FAILED);
@@ -43,7 +40,7 @@ Expected<Buffer> Dispatcher::call_action(HailoRpcActionID action_id, const Memor
 
 hailo_status Server::serve()
 {
-    TRY(auto server_connection, SessionListener::create_shared(m_connection_context, get_pcie_port()));
+    TRY(auto server_connection, SessionListener::create_shared(m_connection_context, HAILORT_SERVER_PORT));
     while (true) {
         TRY(auto client_connection, create_client_connection(server_connection));
         auto th = std::thread([this, client_connection]() { serve_client(client_connection); });
@@ -57,17 +54,15 @@ void Server::set_dispatcher(Dispatcher dispatcher)
     m_dispatcher = dispatcher;
 }
 
-Expected<RpcConnection> Server::create_client_connection(std::shared_ptr<SessionListener> server_connection)
+Expected<ClientConnection> Server::create_client_connection(std::shared_ptr<SessionListener> server_connection)
 {
     TRY(auto conn, server_connection->accept());
-    TRY(auto rpc_conn, RpcConnection::create(conn));
+    TRY(auto rpc_conn, ClientConnection::create(conn, ++m_client_count));
     return rpc_conn;
 }
 
-hailo_status Server::serve_client(RpcConnection client_connection)
+hailo_status Server::serve_client(ClientConnection client_connection)
 {
-    auto server_context = make_shared_nothrow<ServerContext>(*this, client_connection);
-    CHECK_NOT_NULL(server_context, HAILO_OUT_OF_HOST_MEMORY);
     while (true) {
         auto request = client_connection.read_message();
         if (HAILO_COMMUNICATION_CLOSED == request.status()) {
@@ -78,7 +73,7 @@ hailo_status Server::serve_client(RpcConnection client_connection)
 
         assert(request->header.action_id < static_cast<uint32_t>(HailoRpcActionID::MAX_VALUE));
         TRY(auto reply, m_dispatcher.call_action(static_cast<HailoRpcActionID>(request->header.action_id),
-            MemoryView(request->buffer), server_context));
+            MemoryView(request->buffer->data(), request->header.size), client_connection));
         request->header.size = static_cast<uint32_t>(reply.size());
 
         auto status = client_connection.wait_for_write_message_async_ready(reply.size(), SERVER_TIMEOUT);
@@ -92,7 +87,7 @@ hailo_status Server::serve_client(RpcConnection client_connection)
 
             status = client_connection.write_message_async(request->header, reply_memview,
             [reply_ptr] (hailo_status status) {
-                if (HAILO_SUCCESS != status) {
+                if ((HAILO_SUCCESS != status) && (HAILO_COMMUNICATION_CLOSED != status)) {
                     LOGGER__ERROR("Failed to send reply, status = {}", status);
                 }
             });
@@ -108,14 +103,14 @@ hailo_status Server::serve_client(RpcConnection client_connection)
     return HAILO_SUCCESS;
 }
 
-hailo_status Server::trigger_callback(uint32_t callback_id, hailo_status callback_status, rpc_object_handle_t callback_owner_handle,
-    RpcConnection connection, std::function<hailo_status(RpcConnection)> additional_writes_lambda)
+hailo_status Server::trigger_callback(const RpcCallback &callback, ClientConnection connection,
+    std::function<hailo_status(ClientConnection)> additional_writes_lambda)
 {
     // TODO: callback handling should be outside of HRPC (HRT-14638)
-    TRY(auto reply, CallbackCalledSerializer::serialize_reply(callback_status, callback_id, callback_owner_handle));
+    TRY(auto reply, CallbackCalledSerializer::serialize_reply(callback));
     rpc_message_header_t header;
     header.action_id = static_cast<uint32_t>(HailoRpcActionID::CALLBACK_CALLED);
-    header.message_id = callback_id;
+    header.message_id = callback.callback_id;
     header.size = static_cast<uint32_t>(reply.size());
 
     auto reply_ptr = make_shared_nothrow<Buffer>(std::move(reply));
diff --git a/hailort/hrpc/server.hpp b/hailort/hrpc/server.hpp
index 615dda1..5585420 100644
--- a/hailort/hrpc/server.hpp
+++ b/hailort/hrpc/server.hpp
@@ -1,13 +1,13 @@
-#ifndef _SERVER_HPP_
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file server.hpp
  * @brief RPC Server Header
  **/
 
+#ifndef _SERVER_HPP_
 #define _SERVER_HPP_
 
 #include <functional>
@@ -22,20 +22,19 @@ constexpr auto SERVER_TIMEOUT = std::chrono::seconds(10);
 namespace hailort
 {
 
-class Server;
-class ServerContext
+class ClientConnection : public RpcConnection
 {
 public:
-    ServerContext(Server &server, RpcConnection connection);
-    hailo_status trigger_callback(uint32_t callback_id, hailo_status callback_status,
-        rpc_object_handle_t callback_owner_handle, std::function<hailo_status(RpcConnection)> additional_writes_lambda = nullptr);
-    RpcConnection &connection();
+    static Expected<ClientConnection> create(std::shared_ptr<Session> session, uint32_t client_id);
+    ClientConnection() = default;
+    ClientConnection(RpcConnection::Params &&params, uint32_t client_id) : RpcConnection(std::move(params)),
+            m_client_id(client_id) {}
+
+    uint32_t client_id() const;
 
 private:
-    Server &m_server;
-    RpcConnection m_connection;
+    uint32_t m_client_id;
 };
-using ServerContextPtr = std::shared_ptr<ServerContext>;
 
 class Dispatcher
 {
@@ -43,37 +42,36 @@ public:
     Dispatcher() = default;
 
     void register_action(HailoRpcActionID action_id,
-        std::function<Expected<Buffer>(const MemoryView&, ServerContextPtr)> action);
-    Expected<Buffer> call_action(HailoRpcActionID action_id, const MemoryView &request, ServerContextPtr server_context);
+        std::function<Expected<Buffer>(const MemoryView&, ClientConnection)> action);
+    Expected<Buffer> call_action(HailoRpcActionID action_id, const MemoryView &request, ClientConnection client_connection);
 
 private:
-    std::unordered_map<HailoRpcActionID, std::function<Expected<Buffer>(const MemoryView&, ServerContextPtr)>> m_actions;
+    std::unordered_map<HailoRpcActionID, std::function<Expected<Buffer>(const MemoryView&, ClientConnection)>> m_actions;
 };
 
 class Server
 {
 public:
-    Server(std::shared_ptr<ConnectionContext> connection_context) : m_connection_context(connection_context) {};
+    Server(std::shared_ptr<ConnectionContext> connection_context) : m_connection_context(connection_context), m_client_count(0) {};
     virtual ~Server() = default;
 
     hailo_status serve();
     void set_dispatcher(Dispatcher dispatcher);
 
-    friend class ServerContext;
-
 protected:
-    hailo_status trigger_callback(uint32_t callback_id, hailo_status callback_status, rpc_object_handle_t callback_owner_handle,
-        RpcConnection connection, std::function<hailo_status(RpcConnection)> additional_writes_lambda = nullptr);
+    hailo_status trigger_callback(const RpcCallback &callback, ClientConnection connection,
+        std::function<hailo_status(ClientConnection)> additional_writes_lambda = nullptr);
 
     std::shared_ptr<ConnectionContext> m_connection_context;
 
 private:
-    Expected<RpcConnection> create_client_connection(std::shared_ptr<SessionListener> server_connection);
-    hailo_status serve_client(RpcConnection client_connection);
-    virtual hailo_status cleanup_client_resources(RpcConnection client_connection) = 0;
+    Expected<ClientConnection> create_client_connection(std::shared_ptr<SessionListener> server_connection);
+    hailo_status serve_client(ClientConnection client_connection);
+    virtual hailo_status cleanup_client_resources(ClientConnection client_connection) = 0;
 
     Dispatcher m_dispatcher;
     std::mutex m_write_mutex;
+    uint32_t m_client_count;
 };
 
 } // namespace hailort
diff --git a/hailort/hrpc_protocol/rpc.proto b/hailort/hrpc_protocol/rpc.proto
index af2bff2..90a23b8 100644
--- a/hailort/hrpc_protocol/rpc.proto
+++ b/hailort/hrpc_protocol/rpc.proto
@@ -31,6 +31,11 @@ message RpcRequest {
         Device_StartPowerMeasurement_Request start_power_measurement_request = 22;
         Device_GetPowerMeasurement_Request get_power_measurement_request = 23;
         Device_StopPowerMeasurement_Request stop_power_measurement_request = 24;
+        Device_QueryHealthStats_Request query_health_stats_request = 25;
+        Device_QueryPerformanceStats_Request query_performance_stats_request = 26;
+        Device_GetArchitecture_Request get_architecture_request = 27;
+        Device_SetNotificationCallback_Request set_notification_callback_request = 28;
+        Device_RemoveNotificationCallback_Request remove_notification_callback_request = 29;
     }
 }
 
@@ -63,6 +68,11 @@ message RpcReply {
         Device_StartPowerMeasurement_Reply start_power_measurement_reply = 22;
         Device_GetPowerMeasurement_Reply get_power_measurement_reply = 23;
         Device_StopPowerMeasurement_Reply stop_power_measurement_reply = 24;
+        Device_QueryHealthStats_Reply query_health_stats_reply = 25;
+        Device_QueryPerformanceStats_Reply query_performance_stats_reply = 26;
+        Device_GetArchitecture_Reply get_architecture_reply = 27;
+        Device_SetNotificationCallback_Reply set_notification_callback_reply = 28;
+        Device_RemoveNotificationCallback_Reply remove_notification_callback_reply = 29;
 
         // Here comes replies that have no matching requests
         CallbackCalled_Reply callback_called_reply = 100;
@@ -75,11 +85,13 @@ message HailoObjectHandle {
 
 message HailoCallbackHandle {
     uint32 id = 1;
+    uint32 dispatcher_id = 2;
 }
 
 message VDeviceParamsProto {
     uint32 scheduling_algorithm = 1;
     string group_id = 2;
+    bool is_device_id_user_specific = 3;
 }
 
 message VDevice_Create_Request {
@@ -219,7 +231,7 @@ message ConfiguredInferModel_AsyncInfer_Request {
     HailoObjectHandle infer_model_handle = 2;
     HailoCallbackHandle callback_handle = 3;
     repeated uint32 input_buffer_sizes = 4;
-    // Protocol note: After this messgae, server expects to get the input buffers, one after the other, in order
+    // Protocol note: After this message, server expects to get the input buffers, one after the other, in order
 }
 
 message ConfiguredInferModel_AsyncInfer_Reply {
@@ -227,10 +239,38 @@ message ConfiguredInferModel_AsyncInfer_Reply {
 }
 
 message CallbackCalled_Reply {
+    HailoCallbackHandle callback_handle = 1;
+    oneof callback_type {
+        RunAsyncCallback run_async = 2;
+        DeviceNotificationCallback device_notification = 3;
+    }
+}
+
+message RunAsyncCallback {
     uint32 status = 1;
-    HailoCallbackHandle callback_handle = 2;
-    HailoObjectHandle configured_infer_model_handle = 3;
-    // Protocol note: After this messgae, and only if status is HAILO_SUCCESS, server expects to get the output buffers, one after the other, in order
+    // Protocol note: After this message, and only if status is HAILO_SUCCESS, client expects to get the output buffers, one after the other, in order
+}
+
+message TemperatureAlarmNotification {
+    uint32 temperature_zone = 1;
+    uint32 alarm_ts_id = 2;
+    float ts0_temperature = 3;
+    float ts1_temperature = 4;
+}
+
+message OverCurrentAlertNotification {
+    uint32 overcurrent_zone = 1;
+    float exceeded_alert_threshold = 2;
+    bool is_last_overcurrent_violation_reached = 3;
+}
+
+message DeviceNotificationCallback {
+    uint32 notification_id = 1;
+    uint32 sequence = 2;
+    oneof message {
+        TemperatureAlarmNotification temperature_alarm = 3;
+        OverCurrentAlertNotification overcurrent_alert = 4;
+    }
 }
 
 message Device_Create_Request {
@@ -317,6 +357,7 @@ message Device_ExtendedInfo_Reply {
     repeated uint32 eth_mac_address = 7;
     repeated uint32 unit_level_tracking_id = 8;
     repeated uint32 soc_pm_values = 9;
+    uint32 gpio_mask = 10;
 }
 
 message Device_GetChipTemperature_Request {
@@ -330,6 +371,31 @@ message Device_GetChipTemperature_Reply {
     uint32 sample_count = 4;
 }
 
+message Device_QueryHealthStats_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+message Device_QueryHealthStats_Reply {
+    uint32 status = 1;
+    float on_die_temperature = 2;
+    float on_die_voltage = 3;
+    int32 startup_bist_mask = 4;
+}
+
+message Device_QueryPerformanceStats_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+message Device_QueryPerformanceStats_Reply {
+    uint32 status = 1;
+    float cpu_utilization = 2;
+    int64 ram_size_total = 3;
+    int64 ram_size_used = 4;
+    float nnc_utilization = 5;
+    int32 ddr_noc_total_transactions = 6;
+    int32 dsp_utilization = 7;
+}
+
 message Device_PowerMeasurement_Request {
     HailoObjectHandle device_handle = 1;
     uint32 hailo_dvm_options = 2; /* corresponds to hailo_dvm_options_t */
@@ -386,3 +452,31 @@ message Device_StopPowerMeasurement_Request {
 message Device_StopPowerMeasurement_Reply {
     uint32 status = 1;
 }
+
+message Device_GetArchitecture_Request {
+    HailoObjectHandle device_handle = 1;
+}
+
+message Device_GetArchitecture_Reply {
+    uint32 status = 1;
+    uint32 device_architecture = 2;
+}
+
+message Device_SetNotificationCallback_Request {
+    HailoObjectHandle device_handle = 1;
+    uint32 notification_id = 2;
+    HailoCallbackHandle callback = 3;
+}
+
+message Device_SetNotificationCallback_Reply {
+    uint32 status = 1;
+}
+
+message Device_RemoveNotificationCallback_Request {
+    HailoObjectHandle device_handle = 1;
+    uint32 notification_id = 2;
+}
+
+message Device_RemoveNotificationCallback_Reply {
+    uint32 status = 1;
+}
diff --git a/hailort/hrpc_protocol/serializer.cpp b/hailort/hrpc_protocol/serializer.cpp
index ee0c702..baa4504 100644
--- a/hailort/hrpc_protocol/serializer.cpp
+++ b/hailort/hrpc_protocol/serializer.cpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file serializer.cpp
  * @brief HRPC Serialization implementation
@@ -34,15 +34,16 @@
 namespace hailort
 {
 
-Expected<Buffer> CreateVDeviceSerializer::serialize_request(const hailo_vdevice_params_t &params)
+Expected<size_t> CreateVDeviceSerializer::serialize_request(const hailo_vdevice_params_t &params, MemoryView buffer)
 {
     VDevice_Create_Request request;
 
     auto proto_params = request.mutable_params();
     proto_params->set_scheduling_algorithm(params.scheduling_algorithm);
     proto_params->set_group_id(params.group_id == nullptr ? "" : std::string(params.group_id));
+    proto_params->set_is_device_id_user_specific(params.device_ids != nullptr);
 
-    return get_serialized_request<VDevice_Create_Request>(request, "CreateVDevice");
+    return get_serialized_request<VDevice_Create_Request>(request, "CreateVDevice", buffer);
 }
 
 Expected<SerializerVDeviceParamsWrapper> CreateVDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -52,13 +53,10 @@ Expected<SerializerVDeviceParamsWrapper> CreateVDeviceSerializer::deserialize_re
     CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize 'CreateVDevice'");
 
-    bool multi_process_service_flag = false;
-    SerializerVDeviceParamsWrapper params( 
-        1,
-        nullptr,
+    SerializerVDeviceParamsWrapper params(
         static_cast<hailo_scheduling_algorithm_e>(request.params().scheduling_algorithm()),
         request.params().group_id(),
-        multi_process_service_flag);
+        request.params().is_device_id_user_specific());
 
     return params;
 }
@@ -72,7 +70,6 @@ Expected<Buffer> CreateVDeviceSerializer::serialize_reply(hailo_status status, r
     proto_vdevice_handle->set_id(vdevice_handle);
 
     return get_serialized_reply<VDevice_Create_Reply>(reply, "CreateVDevice");
-
 }
 
 Expected<std::tuple<hailo_status, rpc_object_handle_t>> CreateVDeviceSerializer::deserialize_reply(const MemoryView &serialized_reply)
@@ -85,19 +82,14 @@ Expected<std::tuple<hailo_status, rpc_object_handle_t>> CreateVDeviceSerializer:
     return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.vdevice_handle().id());
 }
 
-Expected<Buffer> DestroyVDeviceSerializer::serialize_request(rpc_object_handle_t vdevice_handle)
+Expected<size_t> DestroyVDeviceSerializer::serialize_request(rpc_object_handle_t vdevice_handle, MemoryView buffer)
 {
     VDevice_Destroy_Request request;
 
     auto proto_vdevice_handle= request.mutable_vdevice_handle();
     proto_vdevice_handle->set_id(vdevice_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'DestroyVDevice'");
-
-    return serialized_request;
+    return get_serialized_request<VDevice_Destroy_Request>(request, "DestroyVDevice", buffer);
 }
 
 Expected<rpc_object_handle_t> DestroyVDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -129,7 +121,7 @@ hailo_status DestroyVDeviceSerializer::deserialize_reply(const MemoryView &seria
         serialized_reply, "DestroyVDevice");
 }
 
-Expected<Buffer> CreateInferModelSerializer::serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size, const std::string &name)
+Expected<size_t> CreateInferModelSerializer::serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size, const std::string &name, MemoryView buffer)
 {
     VDevice_CreateInferModel_Request request;
 
@@ -138,12 +130,7 @@ Expected<Buffer> CreateInferModelSerializer::serialize_request(rpc_object_handle
     request.set_hef_size(hef_size);
     request.set_name(name);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'CreateVInferModel'");
-
-    return serialized_request;
+    return get_serialized_request<VDevice_CreateInferModel_Request>(request, "CreateVInferModel", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint64_t, std::string>> CreateInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -182,19 +169,14 @@ Expected<std::tuple<hailo_status, rpc_object_handle_t>> CreateInferModelSerializ
     return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.infer_model_handle().id());
 }
 
-Expected<Buffer> DestroyInferModelSerializer::serialize_request(rpc_object_handle_t infer_model_handle)
+Expected<size_t> DestroyInferModelSerializer::serialize_request(rpc_object_handle_t infer_model_handle, MemoryView buffer)
 {
     InferModel_Destroy_Request request;
 
     auto proto_infer_model_handle = request.mutable_infer_model_handle();
     proto_infer_model_handle->set_id(infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'DestroyInferModel'");
-
-    return serialized_request;
+    return get_serialized_request<InferModel_Destroy_Request>(request, "DestroyInferModel", buffer);
 }
 
 Expected<rpc_object_handle_t> DestroyInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -226,7 +208,7 @@ hailo_status DestroyInferModelSerializer::deserialize_reply(const MemoryView &se
         serialized_reply, "DestroyInferModel");
 }
 
-Expected<Buffer> CreateConfiguredInferModelSerializer::serialize_request(rpc_create_configured_infer_model_request_params_t params)
+Expected<size_t> CreateConfiguredInferModelSerializer::serialize_request(rpc_create_configured_infer_model_request_params_t params, MemoryView buffer)
 {
     InferModel_CreateConfiguredInferModel_Request request;
 
@@ -264,12 +246,7 @@ Expected<Buffer> CreateConfiguredInferModelSerializer::serialize_request(rpc_cre
     request.set_power_mode(static_cast<uint32_t>(params.power_mode));
     request.set_latency_flag(static_cast<uint32_t>(params.latency_flag));
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'CreateConfiguredInferModel'");
-
-    return serialized_request;
+    return get_serialized_request<InferModel_CreateConfiguredInferModel_Request>(request, "CreateConfiguredInferModel", buffer);
 }
 
 Expected<rpc_create_configured_infer_model_request_params_t> CreateConfiguredInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -343,19 +320,14 @@ Expected<std::tuple<hailo_status, rpc_object_handle_t, uint32_t>> CreateConfigur
     return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.configured_infer_model_handle().id(), reply.async_queue_size());
 }
 
-Expected<Buffer> DestroyConfiguredInferModelSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle)
+Expected<size_t> DestroyConfiguredInferModelSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer)
 {
     ConfiguredInferModel_Destroy_Request request;
 
     auto proto_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'DestroyConfiguredInferModel'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_Destroy_Request>(request, "DestroyConfiguredInferModel", buffer);
 }
 
 Expected<rpc_object_handle_t> DestroyConfiguredInferModelSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -387,7 +359,7 @@ hailo_status DestroyConfiguredInferModelSerializer::deserialize_reply(const Memo
         serialized_reply, "DestroyConfiguredInferModel");
 }
 
-Expected<Buffer> SetSchedulerTimeoutSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, const std::chrono::milliseconds &timeout)
+Expected<size_t> SetSchedulerTimeoutSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, const std::chrono::milliseconds &timeout, MemoryView buffer)
 {
     ConfiguredInferModel_SetSchedulerTimeout_Request request;
 
@@ -395,12 +367,7 @@ Expected<Buffer> SetSchedulerTimeoutSerializer::serialize_request(rpc_object_han
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_timeout(static_cast<uint32_t>(timeout.count()));
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerTimeout'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_SetSchedulerTimeout_Request>(request, "SetSchedulerTimeout", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, std::chrono::milliseconds>> SetSchedulerTimeoutSerializer::deserialize_request(
@@ -433,7 +400,7 @@ hailo_status SetSchedulerTimeoutSerializer::deserialize_reply(const MemoryView &
         serialized_reply, "SetSchedulerTimeout");
 }
 
-Expected<Buffer> SetSchedulerThresholdSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t threshold)
+Expected<size_t> SetSchedulerThresholdSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t threshold, MemoryView buffer)
 {
     ConfiguredInferModel_SetSchedulerThreshold_Request request;
 
@@ -441,12 +408,7 @@ Expected<Buffer> SetSchedulerThresholdSerializer::serialize_request(rpc_object_h
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_threshold(threshold);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerThreshold'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_SetSchedulerThreshold_Request>(request, "SetSchedulerThreshold", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint32_t>> SetSchedulerThresholdSerializer::deserialize_request(
@@ -479,7 +441,7 @@ hailo_status SetSchedulerThresholdSerializer::deserialize_reply(const MemoryView
         serialized_reply, "SetSchedulerThreshold");
 }
 
-Expected<Buffer> SetSchedulerPrioritySerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t priority)
+Expected<size_t> SetSchedulerPrioritySerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t priority, MemoryView buffer)
 {
     ConfiguredInferModel_SetSchedulerPriority_Request request;
 
@@ -487,12 +449,7 @@ Expected<Buffer> SetSchedulerPrioritySerializer::serialize_request(rpc_object_ha
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
     request.set_priority(priority);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'SetSchedulerPriority'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_SetSchedulerPriority_Request>(request, "SetSchedulerPriority", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint32_t>> SetSchedulerPrioritySerializer::deserialize_request(
@@ -525,19 +482,14 @@ hailo_status SetSchedulerPrioritySerializer::deserialize_reply(const MemoryView
         serialized_reply, "SetSchedulerPriority");
 }
 
-Expected<Buffer> GetHwLatencyMeasurementSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle)
+Expected<size_t> GetHwLatencyMeasurementSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer)
 {
     ConfiguredInferModel_GetHwLatencyMeasurement_Request request;
 
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'GetHwLatencyMeasurement'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_GetHwLatencyMeasurement_Request>(request, "GetHwLatencyMeasurement", buffer);
 }
 
 Expected<rpc_object_handle_t> GetHwLatencyMeasurementSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -574,19 +526,14 @@ Expected<std::tuple<hailo_status, std::chrono::nanoseconds>> GetHwLatencyMeasure
     return std::make_tuple(static_cast<hailo_status>(reply.status()), std::chrono::nanoseconds(reply.avg_hw_latency()));
 }
 
-Expected<Buffer> ActivateSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle)
+Expected<size_t> ActivateSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer)
 {
     ConfiguredInferModel_Activate_Request request;
 
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'Activate'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_Activate_Request>(request, "Activate", buffer);
 }
 
 Expected<rpc_object_handle_t> ActivateSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -618,19 +565,14 @@ hailo_status ActivateSerializer::deserialize_reply(const MemoryView &serialized_
         serialized_reply, "Activate");
 }
 
-Expected<Buffer> DeactivateSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle)
+Expected<size_t> DeactivateSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer)
 {
     ConfiguredInferModel_Deactivate_Request request;
 
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'Deactivate'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_Deactivate_Request>(request, "Deactivate", buffer);
 }
 
 Expected<rpc_object_handle_t> DeactivateSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -662,19 +604,14 @@ hailo_status DeactivateSerializer::deserialize_reply(const MemoryView &serialize
         serialized_reply, "Deactivate");
 }
 
-Expected<Buffer> ShutdownSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle)
+Expected<size_t> ShutdownSerializer::serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer)
 {
     ConfiguredInferModel_Shutdown_Request request;
 
     auto proto_configured_infer_model_handle = request.mutable_configured_infer_model_handle();
     proto_configured_infer_model_handle->set_id(configured_infer_model_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'Shutdown'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_Shutdown_Request>(request, "Shutdown", buffer);
 }
 
 Expected<rpc_object_handle_t> ShutdownSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -706,7 +643,7 @@ hailo_status ShutdownSerializer::deserialize_reply(const MemoryView &serialized_
         serialized_reply, "Shutdown");
 }
 
-Expected<Buffer> RunAsyncSerializer::serialize_request(const RunAsyncSerializer::Request &request_struct)
+Expected<size_t> RunAsyncSerializer::serialize_request(const RunAsyncSerializer::Request &request_struct, MemoryView buffer)
 {
     ConfiguredInferModel_AsyncInfer_Request request;
 
@@ -718,15 +655,11 @@ Expected<Buffer> RunAsyncSerializer::serialize_request(const RunAsyncSerializer:
 
     auto proto_cb_handle = request.mutable_callback_handle();
     proto_cb_handle->set_id(request_struct.callback_handle);
+    proto_cb_handle->set_dispatcher_id(request_struct.dispatcher_id);
 
     *request.mutable_input_buffer_sizes() = {request_struct.input_buffer_sizes.begin(), request_struct.input_buffer_sizes.end()};
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'RunAsync'");
-
-    return serialized_request;
+    return get_serialized_request<ConfiguredInferModel_AsyncInfer_Request>(request, "RunAsync", buffer);
 }
 
 Expected<RunAsyncSerializer::Request> RunAsyncSerializer::deserialize_request(
@@ -743,6 +676,7 @@ Expected<RunAsyncSerializer::Request> RunAsyncSerializer::deserialize_request(
     request_struct.configured_infer_model_handle = request.configured_infer_model_handle().id();
     request_struct.infer_model_handle = request.infer_model_handle().id();
     request_struct.callback_handle = request.callback_handle().id();
+    request_struct.dispatcher_id = request.callback_handle().dispatcher_id();
     request_struct.input_buffer_sizes = input_buffer_sizes;
     return request_struct;
 }
@@ -766,49 +700,125 @@ hailo_status RunAsyncSerializer::deserialize_reply(const MemoryView &serialized_
         serialized_reply, "RunAsync");
 }
 
-Expected<Buffer> CallbackCalledSerializer::serialize_reply(hailo_status status, rpc_object_handle_t callback_handle,
-    rpc_object_handle_t configured_infer_model_handle)
+Expected<Buffer> CallbackCalledSerializer::serialize_reply(const RpcCallback &callback)
 {
     CallbackCalled_Reply reply;
 
-    reply.set_status(status);
-    auto proto_callback_handle = reply.mutable_callback_handle();
-    proto_callback_handle->set_id(callback_handle);
+    switch (callback.type) {
+    case RpcCallbackType::RUN_ASYNC:
+    {
+        auto run_async = reply.mutable_run_async();  
+        run_async->set_status(callback.data.run_async.status);
+        break;
+    }
+    case RpcCallbackType::DEVICE_NOTIFICATION:
+    {
+        auto device_notif = reply.mutable_device_notification();  
+        device_notif->set_notification_id(callback.data.device_notification.notification.id);
+        device_notif->set_sequence(callback.data.device_notification.notification.sequence);
+        switch (callback.data.device_notification.notification.id) {
+        case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_TEMPERATURE_ALARM:
+        {
+            auto temp_alarm = device_notif->mutable_temperature_alarm();
+            auto &msg = callback.data.device_notification.notification.body.health_monitor_temperature_alarm_notification;
+            temp_alarm->set_temperature_zone(msg.temperature_zone);
+            temp_alarm->set_alarm_ts_id(msg.alarm_ts_id);
+            temp_alarm->set_ts0_temperature(msg.ts0_temperature);
+            temp_alarm->set_ts1_temperature(msg.ts1_temperature);
+            break;
+        }
+        case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_OVERCURRENT_ALARM:
+        {
+            auto overcurrent_alert = device_notif->mutable_overcurrent_alert();
+            auto &msg = callback.data.device_notification.notification.body.health_monitor_overcurrent_alert_notification;
+            overcurrent_alert->set_overcurrent_zone(msg.overcurrent_zone);
+            overcurrent_alert->set_exceeded_alert_threshold(msg.exceeded_alert_threshold);
+            overcurrent_alert->set_is_last_overcurrent_violation_reached(msg.is_last_overcurrent_violation_reached);
+            break;
+        }
+        default:
+            LOGGER__ERROR("Got unexpected notification id = {}", static_cast<uint32_t>(callback.data.device_notification.notification.id));
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
+        }
+        break;
+    }
+    default:
+        LOGGER__ERROR("Got unexpected callback type = {}", static_cast<uint32_t>(callback.type));
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
 
-    auto proto_cim_handle = reply.mutable_configured_infer_model_handle();
-    proto_cim_handle->set_id(configured_infer_model_handle);
+    auto proto_callback_handle = reply.mutable_callback_handle();
+    proto_callback_handle->set_id(callback.callback_id);
+    proto_callback_handle->set_dispatcher_id(callback.dispatcher_id);
 
     TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
-
     CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
         HAILO_RPC_FAILED, "Failed to serialize 'CallbackCalled'");
 
     return serialized_reply;
 }
 
-Expected<std::tuple<hailo_status, rpc_object_handle_t, rpc_object_handle_t>>
-CallbackCalledSerializer::deserialize_reply(const MemoryView &serialized_reply)
+Expected<RpcCallback> CallbackCalledSerializer::deserialize_reply(const MemoryView &serialized_reply)
 {
     CallbackCalled_Reply reply;
 
     CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize 'CallbackCalled'");
 
-    return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.callback_handle().id(),
-        reply.configured_infer_model_handle().id());
+    RpcCallback rpc_callback = {};
+    rpc_callback.callback_id = reply.callback_handle().id();
+    rpc_callback.dispatcher_id = reply.callback_handle().dispatcher_id();
+
+    switch (reply.callback_type_case()) {
+    case CallbackCalled_Reply::kRunAsync:
+    {
+        rpc_callback.type = RpcCallbackType::RUN_ASYNC;
+        rpc_callback.data.run_async.status = static_cast<hailo_status>(reply.run_async().status());
+        break;
+    }
+    case CallbackCalled_Reply::kDeviceNotification:
+    {
+        rpc_callback.type = RpcCallbackType::DEVICE_NOTIFICATION;
+        rpc_callback.data.device_notification.notification.id = static_cast<hailo_notification_id_t>(reply.device_notification().notification_id());
+        rpc_callback.data.device_notification.notification.sequence = static_cast<hailo_notification_id_t>(reply.device_notification().sequence());
+        switch (rpc_callback.data.device_notification.notification.id) {
+        case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_TEMPERATURE_ALARM:
+        {
+            auto &temp_alarm = reply.device_notification().temperature_alarm();
+            auto &msg = rpc_callback.data.device_notification.notification.body.health_monitor_temperature_alarm_notification;
+            msg.temperature_zone = static_cast<hailo_temperature_protection_temperature_zone_t>(temp_alarm.temperature_zone());
+            msg.alarm_ts_id = temp_alarm.alarm_ts_id();
+            msg.ts0_temperature = temp_alarm.ts0_temperature();
+            msg.ts1_temperature = temp_alarm.ts1_temperature();
+            break;
+        }
+        case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_OVERCURRENT_ALARM:
+        {
+            auto &overcurrent_alert = reply.device_notification().overcurrent_alert();
+            auto &msg = rpc_callback.data.device_notification.notification.body.health_monitor_overcurrent_alert_notification;
+            msg.overcurrent_zone = static_cast<hailo_overcurrent_protection_overcurrent_zone_t>(overcurrent_alert.overcurrent_zone());
+            msg.exceeded_alert_threshold = overcurrent_alert.exceeded_alert_threshold();
+            msg.is_last_overcurrent_violation_reached = overcurrent_alert.is_last_overcurrent_violation_reached();
+            break;
+        }
+        default:
+            LOGGER__ERROR("Got unexpected notification id = {}", static_cast<uint32_t>(rpc_callback.data.device_notification.notification.id));
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
+        }
+        break;
+    }
+    default:
+        LOGGER__ERROR("Got unexpected callback type = {}", static_cast<uint32_t>(reply.callback_type_case()));
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+    return rpc_callback;
 }
 
-Expected<Buffer> CreateDeviceSerializer::serialize_request()
+Expected<size_t> CreateDeviceSerializer::serialize_request(MemoryView buffer)
 {
     Device_Create_Request request;
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'CreateDevice'");
-
-    return serialized_request;
+    return get_serialized_request<Device_Create_Request>(request, "CreateDevice", buffer);
 }
 
 hailo_status CreateDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -847,19 +857,14 @@ Expected<std::tuple<hailo_status, rpc_object_handle_t>> CreateDeviceSerializer::
     return std::make_tuple(static_cast<hailo_status>(reply.status()), reply.device_handle().id());
 }
 
-Expected<Buffer> DestroyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle)
+Expected<size_t> DestroyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
 {
     Device_Destroy_Request request;
 
     auto proto_device_handle= request.mutable_device_handle();
     proto_device_handle->set_id(device_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'DestroyDevice'");
-
-    return serialized_request;
+    return get_serialized_request<Device_Destroy_Request>(request, "DestroyDevice", buffer);
 }
 
 Expected<rpc_object_handle_t> DestroyDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -891,19 +896,14 @@ hailo_status DestroyDeviceSerializer::deserialize_reply(const MemoryView &serial
         serialized_reply, "DestroyDevice");
 }
 
-Expected<Buffer> IdentifyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle)
+Expected<size_t> IdentifyDeviceSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
 {
     Device_Identify_Request request;
 
     auto proto_device_handle = request.mutable_device_handle();
     proto_device_handle->set_id(device_handle);
 
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
-        HAILO_RPC_FAILED, "Failed to serialize 'IdentifyDevice'");
-
-    return serialized_request;
+    return get_serialized_request<Device_Identify_Request>(request, "IdentifyDevice", buffer);
 }
 
 Expected<rpc_object_handle_t> IdentifyDeviceSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -989,14 +989,14 @@ Expected<std::tuple<hailo_status, hailo_device_identity_t>> IdentifyDeviceSerial
     return std::make_tuple(static_cast<hailo_status>(reply.status()), identity);
 }
 
-Expected<Buffer> ExtendedDeviceInfoSerializer::serialize_request(rpc_object_handle_t device_handle)
+Expected<size_t> ExtendedDeviceInfoSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
 {
     Device_ExtendedInfo_Request request;
 
     auto proto_device_handle = request.mutable_device_handle();
     proto_device_handle->set_id(device_handle);
 
-    return get_serialized_request<Device_ExtendedInfo_Request>(request, "ExtendedDeviceInfo");
+    return get_serialized_request<Device_ExtendedInfo_Request>(request, "ExtendedDeviceInfo", buffer);
 }
 
 Expected<rpc_object_handle_t> ExtendedDeviceInfoSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -1042,6 +1042,8 @@ Expected<Buffer> ExtendedDeviceInfoSerializer::serialize_reply(hailo_status stat
         soc_pm_values->Add(extended_info.soc_pm_values[i]);
     }
 
+    reply.set_gpio_mask(extended_info.gpio_mask);
+
     return get_serialized_reply<Device_ExtendedInfo_Reply>(reply, "ExtendedDeviceInfo");
 }
 
@@ -1066,6 +1068,9 @@ Expected<std::tuple<hailo_status, hailo_extended_device_information_t>> Extended
     });
     extended_info.lcs = static_cast<uint8_t>(reply.lcs());
 
+    assert(reply.gpio_mask() <= std::numeric_limits<uint16_t>::max());
+    extended_info.gpio_mask = static_cast<uint16_t>(reply.gpio_mask());
+
     // Ensure that the sizes of the input and output arrays match before transformation
     assert(reply.eth_mac_address().size() == HAILO_ETH_MAC_LENGTH);
     std::transform(reply.eth_mac_address().begin(), reply.eth_mac_address().begin() + HAILO_ETH_MAC_LENGTH,
@@ -1082,15 +1087,14 @@ Expected<std::tuple<hailo_status, hailo_extended_device_information_t>> Extended
     return std::make_tuple(static_cast<hailo_status>(reply.status()), extended_info);
 }
 
-
-Expected<Buffer> GetChipTemperatureSerializer::serialize_request(rpc_object_handle_t device_handle)
+Expected<size_t> GetChipTemperatureSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
 {
     Device_GetChipTemperature_Request request;
 
     auto proto_device_handle = request.mutable_device_handle();
     proto_device_handle->set_id(device_handle);
 
-    return get_serialized_request<Device_GetChipTemperature_Request>(request, "GetChipTemperature");
+    return get_serialized_request<Device_GetChipTemperature_Request>(request, "GetChipTemperature", buffer);
 }
 
 Expected<rpc_object_handle_t> GetChipTemperatureSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -1122,7 +1126,93 @@ Expected<std::tuple<hailo_status, hailo_chip_temperature_info_t>> GetChipTempera
     return std::make_tuple(static_cast<hailo_status>(reply.status()), info);
 }
 
-Expected<Buffer> PowerMeasurementSerializer::serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type)
+Expected<size_t> QueryHealthStatsSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
+{
+    Device_QueryHealthStats_Request request;
+
+    auto proto_device_handle = request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    return get_serialized_request<Device_QueryHealthStats_Request>(request, "QueryHealthStats", buffer);
+}
+
+Expected<rpc_object_handle_t> QueryHealthStatsSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    return get_deserialized_request<Device_QueryHealthStats_Request>(serialized_request, "QueryHealthStats");
+}
+
+Expected<Buffer> QueryHealthStatsSerializer::serialize_reply(hailo_status status, const hailo_health_stats_t &info)
+{
+    Device_QueryHealthStats_Reply reply;
+
+    reply.set_status(status);
+    reply.set_on_die_temperature(info.on_die_temperature);
+    reply.set_on_die_voltage(info.on_die_voltage);
+    reply.set_startup_bist_mask(info.startup_bist_mask);
+
+    return get_serialized_reply<Device_QueryHealthStats_Reply>(reply, "QueryHealthStats");
+}
+
+Expected<std::tuple<hailo_status, hailo_health_stats_t>> QueryHealthStatsSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_QueryHealthStats_Reply reply;
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'QueryHealthStats'");
+    hailo_health_stats_t info = {};
+    info.on_die_temperature = reply.on_die_temperature();
+    info.on_die_voltage = reply.on_die_voltage();
+    info.startup_bist_mask = reply.startup_bist_mask();
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), info);
+}
+
+Expected<size_t> QueryPerformanceStatsSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
+{
+    Device_QueryPerformanceStats_Request request;
+
+    auto proto_device_handle = request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    return get_serialized_request<Device_QueryPerformanceStats_Request>(request, "QueryPerformanceStats", buffer);
+}
+
+Expected<rpc_object_handle_t> QueryPerformanceStatsSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    return get_deserialized_request<Device_QueryPerformanceStats_Request>(serialized_request, "QueryPerformanceStats");
+}
+
+Expected<Buffer> QueryPerformanceStatsSerializer::serialize_reply(hailo_status status, const hailo_performance_stats_t &info)
+{
+    Device_QueryPerformanceStats_Reply reply;
+
+    reply.set_status(status);
+    reply.set_cpu_utilization(info.cpu_utilization);
+    reply.set_ram_size_total(info.ram_size_total);
+    reply.set_ram_size_used(info.ram_size_used);
+    reply.set_nnc_utilization(info.nnc_utilization);
+    reply.set_ddr_noc_total_transactions(info.ddr_noc_total_transactions);
+    reply.set_dsp_utilization(info.dsp_utilization);
+
+    return get_serialized_reply<Device_QueryPerformanceStats_Reply>(reply, "QueryPerformanceStats");
+}
+
+Expected<std::tuple<hailo_status, hailo_performance_stats_t>> QueryPerformanceStatsSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_QueryPerformanceStats_Reply reply;
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'QueryPerformanceStats'");
+    hailo_performance_stats_t info = {};
+    info.cpu_utilization = reply.cpu_utilization();
+    info.ram_size_total = reply.ram_size_total();
+    info.ram_size_used = reply.ram_size_used();
+    info.nnc_utilization = reply.nnc_utilization();
+    info.ddr_noc_total_transactions = reply.ddr_noc_total_transactions();
+    info.dsp_utilization = reply.dsp_utilization();
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), info);
+}
+
+Expected<size_t> PowerMeasurementSerializer::serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type, MemoryView buffer)
 {
     Device_PowerMeasurement_Request request;
 
@@ -1131,7 +1221,7 @@ Expected<Buffer> PowerMeasurementSerializer::serialize_request(rpc_object_handle
     request.set_hailo_dvm_options(hailo_dvm_options);
     request.set_hailo_power_measurement_type(hailo_power_measurement_type);
 
-    return get_serialized_request<Device_PowerMeasurement_Request>(request, "PowerMeasurement");
+    return get_serialized_request<Device_PowerMeasurement_Request>(request, "PowerMeasurement", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>> PowerMeasurementSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -1165,7 +1255,7 @@ Expected<std::tuple<hailo_status, float32_t>> PowerMeasurementSerializer::deseri
     return std::make_tuple(static_cast<hailo_status>(reply.status()), power);
 }
 
-Expected<Buffer> SetPowerMeasurementSerializer::serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type)
+Expected<size_t> SetPowerMeasurementSerializer::serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type, MemoryView buffer)
 {
     Device_SetPowerMeasurement_Request request;
 
@@ -1174,7 +1264,7 @@ Expected<Buffer> SetPowerMeasurementSerializer::serialize_request(rpc_object_han
     request.set_hailo_dvm_options(hailo_dvm_options);
     request.set_hailo_power_measurement_type(hailo_power_measurement_type);
 
-    return get_serialized_request<Device_SetPowerMeasurement_Request>(request, "SetPowerMeasurement");
+    return get_serialized_request<Device_SetPowerMeasurement_Request>(request, "SetPowerMeasurement", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>> SetPowerMeasurementSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -1201,9 +1291,9 @@ hailo_status SetPowerMeasurementSerializer::deserialize_reply(const MemoryView &
     return get_deserialized_status_only_reply<Device_SetPowerMeasurement_Reply>(serialized_reply, "SetPowerMeasurement");
 }
 
-Expected<Buffer> StartPowerMeasurementSerializer::serialize_request(
+Expected<size_t> StartPowerMeasurementSerializer::serialize_request(
     rpc_object_handle_t device_handle, uint32_t averaging_factor,
-    uint32_t sampling_period)
+    uint32_t sampling_period, MemoryView buffer)
 {
     Device_StartPowerMeasurement_Request request;
 
@@ -1213,7 +1303,7 @@ Expected<Buffer> StartPowerMeasurementSerializer::serialize_request(
     request.set_sampling_period(sampling_period);
 
     return get_serialized_request<Device_StartPowerMeasurement_Request>(
-        request, "StartPowerMeasurement");
+        request, "StartPowerMeasurement", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>>
@@ -1246,8 +1336,8 @@ hailo_status StartPowerMeasurementSerializer::deserialize_reply(
         serialized_reply, "StartPowerMeasurement");
 }
 
-Expected<Buffer> GetPowerMeasurementSerializer::serialize_request(
-    rpc_object_handle_t device_handle, bool should_clear)
+Expected<size_t> GetPowerMeasurementSerializer::serialize_request(
+    rpc_object_handle_t device_handle, bool should_clear, MemoryView buffer)
 {
     Device_GetPowerMeasurement_Request request;
 
@@ -1255,7 +1345,7 @@ Expected<Buffer> GetPowerMeasurementSerializer::serialize_request(
     proto_device_handle->set_id(device_handle);
     request.set_should_clear(should_clear);
 
-    return get_serialized_request<Device_GetPowerMeasurement_Request>(request, "GetPowerMeasurement");
+    return get_serialized_request<Device_GetPowerMeasurement_Request>(request, "GetPowerMeasurement", buffer);
 }
 
 Expected<std::tuple<rpc_object_handle_t, bool>> GetPowerMeasurementSerializer::deserialize_request(const MemoryView &serialized_request)
@@ -1310,8 +1400,8 @@ GetPowerMeasurementSerializer::deserialize_reply(
         static_cast<hailo_status>(reply.status()), (data));
 }
 
-Expected<Buffer> StopPowerMeasurementSerializer::serialize_request(
-    rpc_object_handle_t device_handle)
+Expected<size_t> StopPowerMeasurementSerializer::serialize_request(
+    rpc_object_handle_t device_handle, MemoryView buffer)
 {
     Device_StopPowerMeasurement_Request request;
 
@@ -1319,7 +1409,7 @@ Expected<Buffer> StopPowerMeasurementSerializer::serialize_request(
     proto_device_handle->set_id(device_handle);
 
     return get_serialized_request<Device_StopPowerMeasurement_Request>(
-        request, "StopPowerMeasurement");
+        request, "StopPowerMeasurement", buffer);
 }
 
 Expected<rpc_object_handle_t> StopPowerMeasurementSerializer::deserialize_request(
@@ -1345,4 +1435,117 @@ hailo_status StopPowerMeasurementSerializer::deserialize_reply(
         serialized_reply, "StopPowerMeasurement");
 }
 
+Expected<size_t> GetArchitectureSerializer::serialize_request(rpc_object_handle_t device_handle, MemoryView buffer)
+{
+    Device_GetArchitecture_Request request;
+
+    auto proto_device_handle = request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+
+    return get_serialized_request<Device_GetArchitecture_Request>(request, "GetArchitecture", buffer);
+}
+
+Expected<rpc_object_handle_t> GetArchitectureSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    return get_deserialized_request<Device_GetArchitecture_Request>(serialized_request, "GetArchitecture");
+}
+
+Expected<Buffer> GetArchitectureSerializer::serialize_reply(hailo_status status, const hailo_device_architecture_t &device_architecture)
+{
+    Device_GetArchitecture_Reply reply;
+
+    reply.set_status(status);
+    reply.set_device_architecture(device_architecture);
+
+    return get_serialized_reply<Device_GetArchitecture_Reply>(reply, "GetArchitecture");
+}
+
+Expected<std::tuple<hailo_status, hailo_device_architecture_t>> GetArchitectureSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    Device_GetArchitecture_Reply reply;
+
+    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'GetArchitecture'");
+
+    hailo_device_architecture_t device_architecture = static_cast<hailo_device_architecture_t>(reply.device_architecture());
+
+    return std::make_tuple(static_cast<hailo_status>(reply.status()), device_architecture);
+}
+
+Expected<size_t> SetNotificationCallbackSerializer::serialize_request(const Request &request, MemoryView buffer)
+{
+    Device_SetNotificationCallback_Request proto_request;
+    proto_request.set_notification_id(request.notification_id);
+
+    auto proto_device_handle = proto_request.mutable_device_handle();
+    proto_device_handle->set_id(request.device_handle);
+
+    auto proto_callback = proto_request.mutable_callback();
+    proto_callback->set_id(request.callback);
+    proto_callback->set_dispatcher_id(request.dispatcher_id);
+
+    return get_serialized_request<Device_SetNotificationCallback_Request>(proto_request, "SetNotificationCallback", buffer);
+}
+
+Expected<SetNotificationCallbackSerializer::Request> SetNotificationCallbackSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_SetNotificationCallback_Request proto_request;
+    CHECK_AS_EXPECTED(proto_request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'SetNotificationCallback'");
+
+    Request request;
+    request.device_handle = proto_request.device_handle().id();
+    request.notification_id = static_cast<hailo_notification_id_t>(proto_request.notification_id());
+    request.callback = proto_request.callback().id();
+    request.dispatcher_id = proto_request.callback().dispatcher_id();
+    return request;
+}
+
+Expected<Buffer> SetNotificationCallbackSerializer::serialize_reply(hailo_status status)
+{
+    Device_SetNotificationCallback_Reply proto_reply;
+    proto_reply.set_status(status);
+    return get_serialized_reply<Device_SetNotificationCallback_Reply>(proto_reply, "SetNotificationCallback");
+}
+
+hailo_status SetNotificationCallbackSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    return get_deserialized_status_only_reply<Device_SetNotificationCallback_Reply>(serialized_reply, "SetNotificationCallback");
+}
+
+Expected<size_t> RemoveNotificationCallbackSerializer::serialize_request(rpc_object_handle_t device_handle,
+    hailo_notification_id_t notification_id, MemoryView buffer)
+{
+    Device_RemoveNotificationCallback_Request proto_request;
+
+    auto proto_device_handle = proto_request.mutable_device_handle();
+    proto_device_handle->set_id(device_handle);
+    proto_request.set_notification_id(notification_id);
+    return get_serialized_request<Device_RemoveNotificationCallback_Request>(proto_request, "RemoveNotificationCallback", buffer);
+}
+
+Expected<std::tuple<rpc_object_handle_t, hailo_notification_id_t>> RemoveNotificationCallbackSerializer::deserialize_request(const MemoryView &serialized_request)
+{
+    Device_RemoveNotificationCallback_Request proto_request;
+
+    CHECK_AS_EXPECTED(proto_request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+        HAILO_RPC_FAILED, "Failed to de-serialize 'RemoveNotificationCallback'");
+
+    return std::make_tuple(proto_request.device_handle().id(), static_cast<hailo_notification_id_t>(proto_request.notification_id()));
+}
+
+Expected<Buffer> RemoveNotificationCallbackSerializer::serialize_reply(hailo_status status)
+{
+    Device_RemoveNotificationCallback_Reply proto_reply;
+
+    proto_reply.set_status(status);
+
+    return get_serialized_reply<Device_RemoveNotificationCallback_Reply>(proto_reply, "RemoveNotificationCallback");
+}
+
+hailo_status RemoveNotificationCallbackSerializer::deserialize_reply(const MemoryView &serialized_reply)
+{
+    return get_deserialized_status_only_reply<Device_RemoveNotificationCallback_Reply>(serialized_reply, "RemoveNotificationCallback");
+}
+
 } /* namespace hailort */
diff --git a/hailort/hrpc_protocol/serializer.hpp b/hailort/hrpc_protocol/serializer.hpp
index 5a583e1..c78ff9a 100644
--- a/hailort/hrpc_protocol/serializer.hpp
+++ b/hailort/hrpc_protocol/serializer.hpp
@@ -1,7 +1,7 @@
 /**
- * Copyright (c) 2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
-**/
+ **/
 /**
  * @file serializer.hpp
  * @brief HRPC protocol serialization
@@ -53,6 +53,11 @@ enum class HailoRpcActionID {
     DEVICE__GET_POWER_MEASUREMENT,
     DEVICE__START_POWER_MEASUREMENT,
     DEVICE__STOP_POWER_MEASUREMENT,
+    DEVICE__QUERY_HEALTH_STATS,
+    DEVICE__QUERY_PERFORMANCE_STATS,
+    DEVICE__GET_ARCHITECTURE,
+    DEVICE__SET_NOTIFICATION_CALLBACK,
+    DEVICE__REMOVE_NOTIFICATION_CALLBACK,
 
     CALLBACK_CALLED,
 
@@ -82,46 +87,91 @@ struct rpc_create_configured_infer_model_request_params_t
     hailo_latency_measurement_flags_t latency_flag;
 };
 
+struct RunAsyncRpcCallback {
+    hailo_status status;
+};
+
+struct DeviceNotifcationRpcCallback {
+    hailo_notification_t notification;
+};
+
+enum RpcCallbackType {
+    INVALID = 0,
+    RUN_ASYNC,
+    DEVICE_NOTIFICATION
+};
+
+union RpcCallbackUnion {
+    RunAsyncRpcCallback run_async;
+    DeviceNotifcationRpcCallback device_notification;
+};
+
+struct RpcCallback {
+    rpc_object_handle_t callback_id;
+    rpc_object_handle_t dispatcher_id;
+    RpcCallbackType type;
+    RpcCallbackUnion data;
+};
+
 class SerializerVDeviceParamsWrapper
 {
 public:
-    SerializerVDeviceParamsWrapper(uint32_t device_count,
-                                  hailo_device_id_t *device_ids,
-                                  hailo_scheduling_algorithm_t scheduling_algorithm,
-                                  const std::string &group_id,
-                                  bool multi_process_service) : m_group_id(group_id)
+    SerializerVDeviceParamsWrapper(hailo_scheduling_algorithm_t scheduling_algorithm, const std::string &group_id, bool is_device_id_user_specific)
+        : m_group_id(group_id), m_is_device_id_user_specific(is_device_id_user_specific)
     {
+        constexpr static bool DISABLE_MULTI_PROCESS_SERVICE = false;
         m_vdevice_params = {
-            device_count,
-            device_ids,
+            1,
+            nullptr,
             scheduling_algorithm,
             m_group_id.c_str(),
-            multi_process_service
+            DISABLE_MULTI_PROCESS_SERVICE
         };
     }
+
+    SerializerVDeviceParamsWrapper(SerializerVDeviceParamsWrapper &&other) noexcept
+        : m_group_id(std::move(other.m_group_id)), m_vdevice_params(std::move(other.m_vdevice_params)),
+            m_is_device_id_user_specific(other.m_is_device_id_user_specific)
+    {
+        m_vdevice_params.group_id = m_group_id.c_str();
+    }
+
+    SerializerVDeviceParamsWrapper& operator=(SerializerVDeviceParamsWrapper &&other) noexcept
+    {
+        if (this != &other) {
+            m_group_id = std::move(other.m_group_id);
+            m_vdevice_params = std::move(other.m_vdevice_params);
+            m_vdevice_params.group_id = m_group_id.c_str();
+            m_is_device_id_user_specific = other.m_is_device_id_user_specific;
+        }
+        return *this;
+    }
+
     const hailo_vdevice_params_t &get() const { return m_vdevice_params; }
+    bool is_device_id_user_specific() const { return m_is_device_id_user_specific; }
+
 private:
     std::string m_group_id;
     hailo_vdevice_params_t m_vdevice_params;
-
+    bool m_is_device_id_user_specific;
 };
 
 template <typename T>
-Expected<Buffer> get_serialized_request(T request, const std::string &name)
+Expected<size_t> get_serialized_request(T request, const std::string &name, MemoryView buffer)
 {
-    // TODO (HRT-14732) - check if we can use GetCachedSize
-    TRY(auto serialized_request, Buffer::create(request.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(request.SerializeToArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+    CHECK(buffer.size() >= request.ByteSizeLong(), HAILO_INTERNAL_FAILURE);
+
+    CHECK(request.SerializeToArray(buffer.data(), static_cast<int>(request.ByteSizeLong())),
         HAILO_RPC_FAILED, "Failed to serialize '{}'", name);
 
-    return serialized_request;
+    return request.ByteSizeLong();
 }
 
 template <typename T>
 Expected<rpc_object_handle_t> get_deserialized_request(const MemoryView &serialized_request, const std::string &name)
 {
     T request;
-    CHECK_AS_EXPECTED(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
+    CHECK(request.ParseFromArray(serialized_request.data(), static_cast<int>(serialized_request.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize '{}'", name);
     return request.device_handle().id();
 }
@@ -129,8 +179,9 @@ Expected<rpc_object_handle_t> get_deserialized_request(const MemoryView &seriali
 template <typename T>
 Expected<Buffer> get_serialized_reply(T reply, const std::string &name)
 {
+    // TODO: serialize_reply should receive a buffer instead of creating one (HRT-16540)
     TRY(auto serialized_reply, Buffer::create(reply.ByteSizeLong(), BufferStorageParams::create_dma()));
-    CHECK_AS_EXPECTED(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())), \
+    CHECK(reply.SerializeToArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())), \
         HAILO_RPC_FAILED, "Failed to serialize '{}'", name);
     return serialized_reply;
 }
@@ -139,7 +190,7 @@ template <typename T>
 hailo_status get_deserialized_status_only_reply(const MemoryView &serialized_reply, const std::string &name)
 {
     T reply;
-    CHECK_AS_EXPECTED(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
+    CHECK(reply.ParseFromArray(serialized_reply.data(), static_cast<int>(serialized_reply.size())),
         HAILO_RPC_FAILED, "Failed to de-serialize '{}'", name);
     return static_cast<hailo_status>(reply.status());
 }
@@ -148,7 +199,7 @@ struct CreateVDeviceSerializer
 {
     CreateVDeviceSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(const hailo_vdevice_params_t &params);
+    static Expected<size_t> serialize_request(const hailo_vdevice_params_t &params, MemoryView buffer);
     static Expected<SerializerVDeviceParamsWrapper> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t vdevice_handle = INVALID_HANDLE_ID);
@@ -159,7 +210,7 @@ struct DestroyVDeviceSerializer
 {
     DestroyVDeviceSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t vdevice_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t vdevice_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -170,7 +221,7 @@ struct CreateInferModelSerializer
 {
     CreateInferModelSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size, const std::string &name);
+    static Expected<size_t> serialize_request(rpc_object_handle_t vdevice_handle, uint64_t hef_size, const std::string &name, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint64_t, std::string>> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t infer_model_handle = INVALID_HANDLE_ID);
@@ -181,7 +232,7 @@ struct DestroyInferModelSerializer
 {
     DestroyInferModelSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -192,7 +243,7 @@ struct CreateConfiguredInferModelSerializer
 {
     CreateConfiguredInferModelSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_create_configured_infer_model_request_params_t params);
+    static Expected<size_t> serialize_request(rpc_create_configured_infer_model_request_params_t params, MemoryView buffer);
     static Expected<rpc_create_configured_infer_model_request_params_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t configured_infer_handle = INVALID_HANDLE_ID,
@@ -204,7 +255,7 @@ struct DestroyConfiguredInferModelSerializer
 {
     DestroyConfiguredInferModelSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -215,7 +266,7 @@ struct SetSchedulerTimeoutSerializer
 {
     SetSchedulerTimeoutSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle, const std::chrono::milliseconds &timeout);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, const std::chrono::milliseconds &timeout, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, std::chrono::milliseconds>> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -226,7 +277,7 @@ struct SetSchedulerThresholdSerializer
 {
     SetSchedulerThresholdSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t threshold);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t threshold, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint32_t>> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -237,7 +288,7 @@ struct SetSchedulerPrioritySerializer
 {
     SetSchedulerPrioritySerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t priority);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, uint32_t priority, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint32_t>> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -248,7 +299,7 @@ struct GetHwLatencyMeasurementSerializer
 {
     GetHwLatencyMeasurementSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, uint32_t avg_hw_latency = INVALID_LATENCY_MEASUREMENT);
@@ -259,7 +310,7 @@ struct ActivateSerializer
 {
     ActivateSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -270,7 +321,7 @@ struct DeactivateSerializer
 {
     DeactivateSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -281,7 +332,7 @@ struct ShutdownSerializer
 {
     ShutdownSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t configured_infer_model_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t configured_infer_model_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -297,10 +348,11 @@ struct RunAsyncSerializer
         rpc_object_handle_t configured_infer_model_handle;
         rpc_object_handle_t infer_model_handle;
         rpc_object_handle_t callback_handle;
+        rpc_object_handle_t dispatcher_id;
         std::vector<uint32_t> input_buffer_sizes;
     };
 
-    static Expected<Buffer> serialize_request(const Request &request_struct);
+    static Expected<size_t> serialize_request(const Request &request_struct, MemoryView buffer);
     static Expected<Request> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -311,16 +363,15 @@ struct CallbackCalledSerializer
 {
     CallbackCalledSerializer() = delete;
 
-    static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t callback_handle = INVALID_HANDLE_ID,
-        rpc_object_handle_t configured_infer_model_handle = INVALID_HANDLE_ID);
-    static Expected<std::tuple<hailo_status, rpc_object_handle_t, rpc_object_handle_t>> deserialize_reply(const MemoryView &serialized_reply);
+    static Expected<Buffer> serialize_reply(const RpcCallback &callback);
+    static Expected<RpcCallback> deserialize_reply(const MemoryView &serialized_reply);
 };
 
 struct CreateDeviceSerializer
 {
     CreateDeviceSerializer() = delete;
 
-    static Expected<Buffer> serialize_request();
+    static Expected<size_t> serialize_request(MemoryView buffer);
     static hailo_status deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, rpc_object_handle_t device_handle = INVALID_HANDLE_ID);
@@ -331,7 +382,7 @@ struct DestroyDeviceSerializer
 {
     DestroyDeviceSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status);
@@ -342,7 +393,7 @@ struct IdentifyDeviceSerializer
 {
     IdentifyDeviceSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, const hailo_device_identity_t &identity = {});
@@ -353,7 +404,7 @@ struct ExtendedDeviceInfoSerializer
 {
     ExtendedDeviceInfoSerializer() = delete;
 
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
 
     static Expected<Buffer> serialize_reply(hailo_status status, const hailo_extended_device_information_t &extended_info = {});
@@ -363,16 +414,34 @@ struct ExtendedDeviceInfoSerializer
 struct GetChipTemperatureSerializer
 {
     GetChipTemperatureSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status, const hailo_chip_temperature_info_t &info = {});
     static Expected<std::tuple<hailo_status, hailo_chip_temperature_info_t>> deserialize_reply(const MemoryView &serialized_reply);
 };
 
+struct QueryHealthStatsSerializer
+{
+    QueryHealthStatsSerializer() = delete;
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_reply(hailo_status status, const hailo_health_stats_t &info = {});
+    static Expected<std::tuple<hailo_status, hailo_health_stats_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct QueryPerformanceStatsSerializer
+{
+    QueryPerformanceStatsSerializer() = delete;
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_reply(hailo_status status, const hailo_performance_stats_t &info = {});
+    static Expected<std::tuple<hailo_status, hailo_performance_stats_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
 struct PowerMeasurementSerializer
 {
     PowerMeasurementSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status, const float32_t &power = 0.0f);
     static Expected<std::tuple<hailo_status, float32_t>> deserialize_reply(const MemoryView &serialized_reply);
@@ -381,7 +450,7 @@ struct PowerMeasurementSerializer
 struct SetPowerMeasurementSerializer
 {
     SetPowerMeasurementSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, uint32_t hailo_dvm_options, uint32_t hailo_power_measurement_type, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status);
     static hailo_status deserialize_reply(const MemoryView &serialized_reply);
@@ -390,7 +459,7 @@ struct SetPowerMeasurementSerializer
 struct StartPowerMeasurementSerializer
 {
     StartPowerMeasurementSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle, uint32_t averaging_factor, uint32_t sampling_period);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, uint32_t averaging_factor, uint32_t sampling_period, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, uint32_t, uint32_t>> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status);
     static hailo_status deserialize_reply(const MemoryView &serialized_reply);
@@ -399,7 +468,7 @@ struct StartPowerMeasurementSerializer
 struct GetPowerMeasurementSerializer
 {
     GetPowerMeasurementSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle, bool should_clear);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, bool should_clear, MemoryView buffer);
     static Expected<std::tuple<rpc_object_handle_t, bool>> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status, const hailo_power_measurement_data_t &data = {});
     static Expected<std::tuple<hailo_status, hailo_power_measurement_data_t>> deserialize_reply(const MemoryView &serialized_reply);
@@ -408,12 +477,47 @@ struct GetPowerMeasurementSerializer
 struct StopPowerMeasurementSerializer
 {
     StopPowerMeasurementSerializer() = delete;
-    static Expected<Buffer> serialize_request(rpc_object_handle_t device_handle);
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
     static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
     static Expected<Buffer> serialize_reply(hailo_status status);
     static hailo_status deserialize_reply(const MemoryView &serialized_reply);
 };
 
+struct GetArchitectureSerializer
+{
+    GetArchitectureSerializer() = delete;
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, MemoryView buffer);
+    static Expected<rpc_object_handle_t> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_reply(hailo_status status, const hailo_device_architecture_t &device_architecture = HAILO_ARCH_MAX_ENUM);
+    static Expected<std::tuple<hailo_status, hailo_device_architecture_t>> deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct SetNotificationCallbackSerializer
+{
+    struct Request {
+        rpc_object_handle_t device_handle;
+        hailo_notification_id_t notification_id;
+        rpc_object_handle_t callback;
+        rpc_object_handle_t dispatcher_id;
+    };
+
+    SetNotificationCallbackSerializer() = delete;
+    static Expected<size_t> serialize_request(const Request &request, MemoryView buffer);
+    static Expected<Request> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
+struct RemoveNotificationCallbackSerializer
+{
+    RemoveNotificationCallbackSerializer() = delete;
+    static Expected<size_t> serialize_request(rpc_object_handle_t device_handle, hailo_notification_id_t notification_id,
+        MemoryView buffer);
+    static Expected<std::tuple<rpc_object_handle_t, hailo_notification_id_t>> deserialize_request(const MemoryView &serialized_request);
+    static Expected<Buffer> serialize_reply(hailo_status status);
+    static hailo_status deserialize_reply(const MemoryView &serialized_reply);
+};
+
 } /* namespace hailort */
 
 #endif /* _HAILO_SERIALIZER_HPP_ */
diff --git a/hailort/libhailort/CMakeLists.txt b/hailort/libhailort/CMakeLists.txt
index cad56f9..b63752e 100644
--- a/hailort/libhailort/CMakeLists.txt
+++ b/hailort/libhailort/CMakeLists.txt
@@ -2,8 +2,8 @@ cmake_minimum_required(VERSION 3.5.0)
 # set(CMAKE_C_CLANG_TIDY "clang-tidy;-checks=*")
 
 set(HAILORT_MAJOR_VERSION    4)
-set(HAILORT_MINOR_VERSION    20)
-set(HAILORT_REVISION_VERSION 1)
+set(HAILORT_MINOR_VERSION    21)
+set(HAILORT_REVISION_VERSION 0)
 
 # Add the cmake folder so the modules there are found
 set(CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
diff --git a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
index 3569432..75468ea 100644
--- a/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
+++ b/hailort/libhailort/bindings/gstreamer/CMakeLists.txt
@@ -4,7 +4,7 @@ project(gsthailo)
 
 include(GNUInstallDirs)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 # GST_PLUGIN_DEFINE needs PACKAGE to be defined
 set(GST_HAILO_PACKAGE_NAME "hailo")
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
index 2671a23..0c4d600 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
index f6a5ede..b885bce 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
index 00785c6..e112b8f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
index 01d7142..21f15ab 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_allocator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
index e232872..ef6c796 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
index 583f545..6ac6bc8 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailo_dmabuf_allocator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
index 51abd71..9d47e01 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.hpp
index d0f528a..deddbc1 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailodevicestats.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
index 96a535c..7029e41 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
@@ -407,7 +407,7 @@ static hailo_status gst_hailonet_allocate_infer_resources(GstHailoNet *self)
             self->impl->thread_cv.notify_all();
             if (GST_IS_PAD(self->srcpad)) { // Checking because we fail here when exiting the application
                 GstFlowReturn ret = gst_pad_push(self->srcpad, buffer);
-                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && ((GST_FLOW_EOS != ret)) && (!self->impl->has_got_eos)) {
+                if ((GST_FLOW_OK != ret) && (GST_FLOW_FLUSHING != ret) && ((GST_FLOW_EOS != ret)) && (!self->impl->has_sent_eos)) {
                     HAILONET_ERROR("gst_pad_push failed with status = %d\n", ret);
                     break;
                 }
@@ -1143,6 +1143,22 @@ static hailo_status gst_hailonet_call_run_async(GstHailoNet *self, const std::un
         self->impl->flush_cv.notify_all();
 
         gst_hailonet_push_buffer_to_thread(self, buffer);
+
+        if (self->impl->has_pending_eos) {
+            bool is_last_frame = false;
+            {
+                std::unique_lock<std::mutex> lock(self->impl->flush_mutex);
+                if (0 == self->impl->ongoing_frames) {
+                    is_last_frame = true;
+                }
+            }
+
+            if (is_last_frame) {
+                self->impl->has_sent_eos = true;
+                auto event = gst_event_new_eos();
+                (void)gst_pad_push_event(self->srcpad, event);
+            }
+        }
     }));
     job.detach();
 
@@ -1456,8 +1472,9 @@ static gboolean gst_hailonet_sink_event(GstPad *pad, GstObject *parent, GstEvent
 {
     GstHailoNet *self = GST_HAILONET(parent);
     if (GST_EVENT_TYPE(event) == GST_EVENT_EOS) {
-        self->impl->has_got_eos = true;
-        return gst_pad_push_event(self->srcpad, event);
+        // We want to forward EOS event only after all the frames have been processed (see callback of run_async)
+        self->impl->has_pending_eos = true;
+        return TRUE;
     }
     if (GST_EVENT_IS_STICKY(event)) {
         gst_hailonet_push_event_to_queue(self, event);
@@ -1477,7 +1494,7 @@ static void gst_hailonet_flush_callback(GstHailoNet *self, gpointer /*data*/)
 
 HailoNetImpl::HailoNetImpl() :
     events_queue_per_buffer(), curr_event_queue(), input_queue(nullptr), thread_queue(nullptr), buffers_in_thread_queue(0),
-    props(), input_caps(nullptr), is_thread_running(false), has_got_eos(false),
+    props(), input_caps(nullptr), is_thread_running(false), has_pending_eos(false), has_sent_eos(false),
     did_critical_failure_happen(false), vdevice(nullptr), is_configured(false), has_called_activate(false), ongoing_frames(0)
 {}
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
index e6defb2..7c068f4 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailonet.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
@@ -110,7 +110,8 @@ public:
     HailoNetProperties props;
     GstCaps *input_caps;
     std::atomic_bool is_thread_running;
-    std::atomic_bool has_got_eos;
+    std::atomic_bool has_pending_eos;
+    std::atomic_bool has_sent_eos;
     std::mutex sink_probe_change_state_mutex;
     bool did_critical_failure_happen;
 
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
index 30805bd..0732ed4 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/gsthailoplugin.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.cpp
index 8818f01..cdb94c6 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
index 0a49b8d..5db9aa1 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_events/hailo_events.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_output_info.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_output_info.hpp
index a6f4d3c..aecf4e6 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_output_info.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/hailo_output_info.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/include/hailo_gst.h b/hailort/libhailort/bindings/gstreamer/gst-hailo/include/hailo_gst.h
index fe54831..142fb27 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/include/hailo_gst.h
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/include/hailo_gst.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.cpp
index 87d3cad..81d77b2 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
index 7136bc8..a1f6781 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/hailo_buffer_flag_meta.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.cpp
index d91f7d0..812f3ca 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
index 94f0649..1105e1e 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/metadata/tensor_meta.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
index 1de9136..86e5467 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
index bc05edf..e2e4cc2 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/network_group_handle.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
index 8815785..eb39271 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
index 820d13d..af854fd 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/linux/dma_buf_allocator_wrapper.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
index 2204090..a54395f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
index 13b7a08..bbde91c 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/os/windows/dma_buf_allocator_wrapper.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
index e44692a..93157bb 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp
index dd54fca..10bb3a5 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailorecv.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
index 4ff9557..7b98e8f 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
index 0741984..843b27d 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gst_hailosend.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
index 7f839c2..ed41b74 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
index 794cf4e..3e0ab78 100644
--- a/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
+++ b/hailort/libhailort/bindings/gstreamer/gst-hailo/sync_gsthailonet.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the LGPL 2.1 license (https://www.gnu.org/licenses/old-licenses/lgpl-2.1.txt)
  *
  * This library is free software; you can redistribute it and/or
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
index 739162c..3925ff2 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/__init__.py
@@ -43,7 +43,7 @@ def _verify_pyhailort_lib_exists():
 
 _verify_pyhailort_lib_exists()
 
-__version__ = "4.20.1"
+__version__ = "4.21.0"
 if _pyhailort.__version__ != __version__:
     raise ImportError(
         f"_pyhailort version ({_pyhailort.__version__}) does not match pyhailort version ({__version__})"
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
index 269aece..3b38921 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/pyhailort/pyhailort.py
@@ -39,6 +39,7 @@ PCIE_ANY_DOMAIN = _pyhailort.HailoRTDefaults.PCIE_ANY_DOMAIN()
 HAILO_UNIQUE_VDEVICE_GROUP_ID = _pyhailort.HailoRTDefaults.HAILO_UNIQUE_VDEVICE_GROUP_ID()
 DEFAULT_VSTREAM_TIMEOUT_MS = 10000
 DEFAULT_VSTREAM_QUEUE_SIZE = 2
+BOARD_INFO_NOT_CONFIGURED_ATTR = "<N/A>"
 
 class HailoSocket(object):
     MAX_UDP_PAYLOAD_SIZE = HailoSocketDefs.MAX_UDP_PAYLOAD_SIZE()
@@ -90,6 +91,9 @@ class HailoRTNotFoundException(HailoRTException):
 class HailoRTInvalidHEFException(HailoRTException):
     pass
 
+class HailoRTHEFNotCompatibleWithDevice(HailoRTException):
+    pass
+
 class HailoRTEthException(HailoRTException):
     pass
 
@@ -146,6 +150,8 @@ class ExceptionWrapper(object):
 
         if string_error_code == "HAILO_INVALID_HEF":
             return HailoRTInvalidHEFException("Invalid HEF. See hailort.log for more information")
+        if string_error_code == "HAILO_HEF_NOT_COMPATIBLE_WITH_DEVICE":
+            return HailoRTHEFNotCompatibleWithDevice("HEF file is not compatible with device. See hailort.log for more information")
 
         if string_error_code == "HAILO_ETH_FAILURE":
             return HailoRTEthException("Ethernet failure. See hailort.log for more information")
@@ -373,6 +379,11 @@ class HEF(object):
         with ExceptionWrapper():
             return self._hef.get_network_groups_infos()
 
+    def _get_external_resources(self):
+        # Returns a dict of the appended external resources. Key is name, Value is the resource raw-bytes.
+        with ExceptionWrapper():
+            return self._hef.get_external_resources()
+
     def get_input_vstream_infos(self, name=None):
         """Get input vstreams information.
 
@@ -764,8 +775,8 @@ class ConfiguredNetwork(object):
         """
         return self._configured_network.set_scheduler_priority(priority)
 
-    def init_cache(self, read_offset, write_offset_delta):
-        return self._configured_network.init_cache(read_offset, write_offset_delta)
+    def init_cache(self, read_offset):
+        return self._configured_network.init_cache(read_offset)
 
     def update_cache_offset(self, offset_delta_entries):
         return self._configured_network.update_cache_offset(offset_delta_entries)
@@ -880,7 +891,6 @@ class InferVStreams(object):
         self._input_vstreams_params = input_vstreams_params
         self._output_vstreams_params = output_vstreams_params
         self._tf_nms_format = tf_nms_format
-        self._validate_output_vstreams_params()
         self._total_time = None
         self._hw_time = None
         self._network_name_to_outputs = InferVStreams._get_network_to_outputs_mapping(configured_net_group)
@@ -920,9 +930,8 @@ class InferVStreams(object):
                     if (output_buffers_info[output_name].output_order == FormatOrder.HAILO_NMS_WITH_BYTE_MASK):
                         # Note: In python bindings the output data gets converted to py::array with dtype=dtype.
                         #   In `HAILO_NMS_WITH_BYTE_MASK` we would like to get the data as uint8 and convert it by it's format.
-                        #   Therefore we need to get it as uint8 instead of float32 and adjust the shape size.
+                        #   Therefore we need to get it as uint8 instead of float32
                         dtype = numpy.uint8
-                        shape[0] = shape[0] * 4
                     output_buffers[output_name] = numpy.empty([batch_size] + list(shape), dtype=dtype)
         return output_buffers, output_buffers_info
 
@@ -1037,11 +1046,6 @@ class InferVStreams(object):
             raise HailoRTException("{} numpy array item size is {}, not {}".format(input_layer_name,
                 input_item_size, input_expected_item_size))
 
-    def _validate_output_vstreams_params(self):
-        for output_vstream in self._output_vstreams_params.values():
-            if output_vstream.user_buffer_format.order == FormatOrder.HAILO_NMS_BY_SCORE:
-                raise HailoRTException("HAILO_NMS_BY_SCORE format is not supported")
-
     @staticmethod
     def _get_number_of_frames(input_data):
         # Checks that all the batch-sizes of the input_data are equals for all input layers
@@ -1114,9 +1118,50 @@ class InferVStreams(object):
         self._infer_pipeline.release()
         return False
 
+class Detection(object):
+    """Represents a detection information"""
 
-class HailoDetection(object):
-    """Represents Hailo detection information"""
+    def __init__(self, detection):
+        self._y_min = detection.y_min
+        self._x_min = detection.x_min
+        self._y_max = detection.y_max
+        self._x_max = detection.x_max
+        self._score = detection.score
+        self._class_id = detection.class_id
+
+    @property
+    def y_min(self):
+        """Get detection's box y_min coordinate"""
+        return self._y_min
+
+    @property
+    def x_min(self):
+        """Get detection's box x_min coordinate"""
+        return self._x_min
+
+    @property
+    def y_max(self):
+        """Get detection's box y_max coordinate"""
+        return self._y_max
+
+    @property
+    def x_max(self):
+        """Get detection's box x_max coordinate"""
+        return self._x_max
+
+    @property
+    def score(self):
+        """Get detection's score"""
+        return self._score
+
+    @property
+    def class_id(self):
+        """Get detection's class_id"""
+        return self._class_id
+
+
+class DetectionWithByteMask(object):
+    """Represents a detection with byte mask information"""
 
     def __init__(self, detection):
         self._y_min = detection.box.y_min
@@ -1285,6 +1330,15 @@ class HailoRTTransformUtils(object):
                 offset += BBOX_PARAMS * class_bboxes_amount
         return converted_output_frame
 
+    @staticmethod
+    def _output_raw_buffer_to_nms_by_score_format_single_frame(raw_output_buffer):
+        detections = _pyhailort.convert_nms_by_score_buffer_to_detections(raw_output_buffer)
+        converted_output_frame = []
+        for detection in detections:
+            converted_output_frame.append(Detection(detection))
+
+        return converted_output_frame
+
     @staticmethod
     def _output_raw_buffer_to_nms_with_byte_mask_format(raw_output_buffer, number_of_classes, batch_size, image_height, image_width,
             max_bboxes_per_class, output_dtype, is_tf_format=False):
@@ -1307,7 +1361,7 @@ class HailoRTTransformUtils(object):
         detections = _pyhailort.convert_nms_with_byte_mask_buffer_to_detections(raw_output_buffer)
         converted_output_frame = []
         for detection in detections:
-            converted_output_frame.append(HailoDetection(detection))
+            converted_output_frame.append(DetectionWithByteMask(detection))
 
         return converted_output_frame
 
@@ -1534,8 +1588,8 @@ class HailoFormatFlags(_pyhailort.FormatFlags):
 
 SUPPORTED_PROTOCOL_VERSION = 2
 SUPPORTED_FW_MAJOR = 4
-SUPPORTED_FW_MINOR = 20
-SUPPORTED_FW_REVISION = 1
+SUPPORTED_FW_MINOR = 21
+SUPPORTED_FW_REVISION = 0
 
 MEGA_MULTIPLIER = 1000.0 * 1000.0
 
@@ -1569,7 +1623,7 @@ class BoardInformation(object):
     
     def _string_field_str(self, string_field):
         # Return <N/A> if the string field is empty
-        return string_field.rstrip('\x00') or "<N/A>"
+        return string_field.rstrip('\x00') or BOARD_INFO_NOT_CONFIGURED_ATTR
 
     def __str__(self):
         """Returns:
@@ -1709,7 +1763,7 @@ class HealthInformation(object):
                        self.current_temperature_zone, self.current_temperature_throttling_level, temperature_throttling_levels_str)
 
 class ExtendedDeviceInformation(object):
-    def __init__(self, neural_network_core_clock_rate, supported_features, boot_source, lcs, soc_id, eth_mac_address, unit_level_tracking_id, soc_pm_values):
+    def __init__(self, neural_network_core_clock_rate, supported_features, boot_source, lcs, soc_id, eth_mac_address, unit_level_tracking_id, soc_pm_values, gpio_mask):
         self.neural_network_core_clock_rate = neural_network_core_clock_rate
         self.supported_features = SupportedFeatures(supported_features)
         self.boot_source = boot_source
@@ -1718,21 +1772,23 @@ class ExtendedDeviceInformation(object):
         self.eth_mac_address = eth_mac_address
         self.unit_level_tracking_id = unit_level_tracking_id
         self.soc_pm_values = soc_pm_values
+        self.gpio_mask = gpio_mask
 
     def __str__(self):
         """Returns:
             str: Human readable string.
         """
+        INVALID_LCS = 0
         string = 'Neural Network Core Clock Rate: {}MHz\n' \
                  '{}' \
                  'Boot source: {}\n' \
-                 'LCS: {}\n'.format(
+                 'LCS: {}'.format(
             self.neural_network_core_clock_rate / MEGA_MULTIPLIER,
             str(self.supported_features),
             str(self.boot_source.name),
-            str(self.lcs))
+            BOARD_INFO_NOT_CONFIGURED_ATTR if self.lcs == INVALID_LCS else str(self.lcs))
         if any(self.soc_id):
-            string += 'SoC ID: ' + (self.soc_id.hex())
+            string += '\nSoC ID: ' + (self.soc_id.hex())
 
         if any(self.eth_mac_address):
             string += '\nMAC Address: ' + (":".join("{:02X}".format(i) for i in self.eth_mac_address))
@@ -1743,6 +1799,8 @@ class ExtendedDeviceInformation(object):
         if any(self.soc_pm_values):
             string += '\nPM Values: ' + (self.soc_pm_values.hex())
 
+        if 0 != self.gpio_mask:
+            string += '\nGPIO Mask: ' + f'{self.gpio_mask:04x}'
 
         return string
 
@@ -2413,7 +2471,8 @@ class Control:
         with ExceptionWrapper():
             response = self._device.get_extended_device_information()
         device_information = ExtendedDeviceInformation(response.neural_network_core_clock_rate,
-            response.supported_features, response.boot_source, response.lcs, response.soc_id,  response.eth_mac_address , response.unit_level_tracking_id, response.soc_pm_values)
+            response.supported_features, response.boot_source, response.lcs, response.soc_id,
+            response.eth_mac_address, response.unit_level_tracking_id, response.soc_pm_values, response.gpio_mask)
         return device_information
 
     def _get_health_information(self):
@@ -2663,9 +2722,6 @@ class InferModel:
             Args:
                 order (_pyhailort.hailo_format_order_t): the format order
             """
-            # TODO: HRT-15612 support HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE in pyhailort
-            if FormatOrder.HAILO_NMS_BY_SCORE == order:
-                raise HailoRTException("Format order HAILO_NMS_BY_SCORE is not supported")
             with ExceptionWrapper():
                 self._infer_stream.set_format_order(order)
 
@@ -2779,7 +2835,7 @@ class InferModel:
         frames after which the scheduler will attempt to switch to another model.
 
         Note:
-            Default value is `HAILO_DEFAULT_BATCH_SIZE`. It means automatic batch determined by hailort.
+            The default value is `HAILO_DEFAULT_BATCH_SIZE` - which means the batch is determined by HailoRT automatically.
 
         Args:
             batch_size (int): The new batch size to be set.
@@ -2968,6 +3024,8 @@ class ConfiguredInferModel:
                 """
                 with ExceptionWrapper():
                     self._validate_c_contiguous(buffer)
+                    if self._nms_info and self._nms_info.format_order in [FormatOrder.HAILO_NMS_WITH_BYTE_MASK, FormatOrder.HAILO_NMS_BY_SCORE] and buffer.dtype != numpy.uint8:
+                        raise HailoRTException(f"Buffer must be of type uint8 for {self._nms_info.format_order} format order. Got {buffer.dtype}.")
                     self._infer_stream.set_buffer(buffer)
 
                 self._buffer = buffer
@@ -2979,24 +3037,27 @@ class ConfiguredInferModel:
                 Args:
                     tf_format (bool, optional): Whether the output format is tf or hailo. Relevant for NMS outputs. The output
                         can be re-formatted into two formats (TF, Hailo) and the user through choosing the True/False function
-                        parameter, can decide which format to receive.
+                        parameter, can decide which format to receive. Does not support format order HAILO_NMS_BY_SCORE.
 
                         For detection outputs:
                         TF format is an :obj:`numpy.array` with shape [number of classes, bounding box params, max bounding boxes per class]
                         where the 2nd dimension (bounding box params) is of a fixed length of 5 (y_min, x_min, y_max, x_max, score).
 
-                        Hailo format is a list of :obj:`numpy.array` where each array represents the detections for a specific class:
+                        hailo HAILO_NMS_BY_CLASS format is a list of :obj:`numpy.array` where each array represents the detections for a specific class:
                         [cls0_detections, cls1_detections, ...]. The length of the list is the number of classes.
                         Each :obj:`numpy.array` shape is (number of detections, bounding box params) where the 2nd dimension
                         (bounding box params) is of a fixed length of 5 (y_min, x_min, y_max, x_max, score).
 
+                        hailo HAILO_NMS_BY_SCORE format is a list of detections where each detection is an :obj:`Detection` object.
+                        The detections are sorted decreasingly by score.
+
                         For segmentation outputs:
                         TF format is an :obj:`numpy.array` with shape [1, image_size + number_of_params, max bounding boxes per class]
                         where the 2nd dimension (image_size + number_of_params) is calculated as: mask (image_width * image_height) + (y_min, x_min, y_max, x_max, score, class_id).
                         The mask is a binary mask of the segmentation output where the ROI (region of interest) is mapped to 1 and the background is mapped to 0.
 
-                        Hailo format is a list of detections per class: [detecion0, detection1, ... detection_m]
-                        where each detection is an :obj:`HailoDetection`. The detections are sorted decreasingly by score.
+                        Hailo format is a list of detections: [detecion0, detection1, ... detection_m]
+                        where each detection is an :obj:`DetectionWithByteMask`. The detections are sorted decreasingly by score.
 
                 Returns:
                     buffer (numpy.array): the buffer of the edge.
@@ -3007,7 +3068,6 @@ class ConfiguredInferModel:
                     # A user would prefer the plain buffer, with no transformation.
                     # This is especially useful when the output is not ready, which would potentially cause the NMS transformation to fail.
                     return buffer
-
                 if self._nms_info:
                     nms_info_class = ConfiguredInferModel.NmsTfTransformationInfo if tf_format else ConfiguredInferModel.NmsHailoTransformationInfo
                     nms_info = nms_info_class(**self._nms_info.__dict__)
@@ -3032,7 +3092,7 @@ class ConfiguredInferModel:
                             )
                         else:
                             buffer = HailoRTTransformUtils._output_raw_buffer_to_nms_with_byte_mask_hailo_format_single_frame(self._buffer)
-                    elif nms_info.format_order in [FormatOrder.HAILO_NMS, FormatOrder.HAILO_NMS_BY_CLASS]:
+                    elif nms_info.format_order in [FormatOrder.HAILO_NMS_BY_CLASS]:
                         if nms_info.use_tf_nms_format:
                             nms_shape = [
                                 nms_info.number_of_classes,
@@ -3054,6 +3114,11 @@ class ConfiguredInferModel:
                                 self._buffer,
                                 nms_info.number_of_classes,
                             )
+                    elif nms_info.format_order in [FormatOrder.HAILO_NMS_BY_SCORE]:
+                        if nms_info.use_tf_nms_format:
+                            raise HailoRTException(f"Use of tf format is unsupported for format order: {nms_info.format_order}.")
+                        else:
+                            buffer = HailoRTTransformUtils._output_raw_buffer_to_nms_by_score_format_single_frame(self._buffer)
                     else:
                         raise HailoRTException(f"Unsupported NMS format order: {nms_info.format_order}.")
 
@@ -3375,7 +3440,6 @@ class ConfiguredInferModel:
 
             if format_order in (
                 FormatOrder.HAILO_NMS_WITH_BYTE_MASK,
-                FormatOrder.HAILO_NMS,
                 FormatOrder.HAILO_NMS_BY_CLASS,
                 FormatOrder.HAILO_NMS_BY_SCORE
             ):
@@ -3866,7 +3930,7 @@ class OutputLayerUtils(object):
             self._user_buffer_format = pipeline.get_user_buffer_format()
             self._output_shape = pipeline.shape
 
-        self._is_nms = (self._user_buffer_format.order in [FormatOrder.HAILO_NMS, FormatOrder.HAILO_NMS_BY_CLASS, FormatOrder.HAILO_NMS_BY_SCORE])
+        self._is_nms = (self._user_buffer_format.order in [FormatOrder.HAILO_NMS_BY_CLASS, FormatOrder.HAILO_NMS_BY_SCORE])
 
         if self._is_nms:
             self._quantized_empty_bbox = numpy.asarray([0] * BBOX_PARAMS, dtype=self.output_dtype)
diff --git a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
index e48ff6c..a359088 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
+++ b/hailort/libhailort/bindings/python/platform/hailo_platform/tools/hailocli/main.py
@@ -61,14 +61,7 @@ class PlatformCommands:
 
     def run(self):
         argv = sys.argv[1:]
-        ret_val = self._run(argv)
-        if isinstance(ret_val, (int, str)):
-            return ret_val
-        if isinstance(ret_val, bool):
-            return int(not ret_val)
-
-        # possible returned value can be an object of client runner which represents successful run
-        return 0
+        return self._run(argv)
 
     # Dependency injection for testing
     def _run(self, argv):
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
index 469bcdf..d7efa98 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_1_Async_Inference_Multiple_Models_Tutorial.ipynb
@@ -66,7 +66,7 @@
     "\n",
     "        # For a single input / output model, the input / output object \n",
     "        # can be accessed with a name parameter ...\n",
-    "        infer_model.input(\"input_layer1\").set_format_type(FormatType.FLOAT32)\n",
+    "        infer_model.input(\"resnet_v1_18/input_layer1\").set_format_type(FormatType.FLOAT32)\n",
     "        # ... or without\n",
     "        infer_model.output().set_format_type(FormatType.FLOAT32)\n",
     "\n",
@@ -145,11 +145,18 @@
     "\n",
     "print('Starting async inference on multiple models using processes')\n",
     "\n",
+    "job_failed = False\n",
     "for job in pool:\n",
     "    job.start()\n",
     "for job in pool:\n",
     "    job.join()\n",
-    "\n",
+    "    \n",
+    "    # Using Process instead of Thread allows accessing the exitcode of the job\n",
+    "    if job.exitcode:\n",
+    "        job_failed = True\n",
+    "        \n",
+    "if job_failed:\n",
+    "    raise Exception(\"job failed\")\n",
     "print('Done inference')"
    ]
   }
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
index 95c607d..7aef184 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_2_Infer_Pipeline_Inference_Tutorial.ipynb
@@ -130,8 +130,14 @@
     "        proc = Process(target=recv, args=(configured_network, vstreams_params, num_frames))\n",
     "        proc.start()\n",
     "        recv_procs.append(proc)\n",
+    "    recv_failed = False\n",
     "    for proc in recv_procs:\n",
-    "        proc.join()"
+    "        proc.join()\n",
+    "        if proc.exitcode:\n",
+    "            recv_failed = True\n",
+    "            \n",
+    "    if recv_failed:\n",
+    "        raise Exception(\"recv failed\")"
    ]
   },
   {
@@ -158,6 +164,12 @@
     "with network_group.activate(network_group_params):\n",
     "    send_process.join()\n",
     "    recv_process.join()\n",
+    "    \n",
+    "    if send_process.exitcode:\n",
+    "        raise Exception(\"send process failed\")\n",
+    "    if recv_process.exitcode:\n",
+    "        raise Exception(\"recv process failed\")\n",
+    "        \n",
     "print('Done')\n",
     "\n",
     "target.release()"
diff --git a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
index 7bf440f..3eabc65 100644
--- a/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
+++ b/hailort/libhailort/bindings/python/platform/hailo_tutorials/notebooks/HRT_3_Infer_Pipeline_Inference_Multiple_Models_Tutorial.ipynb
@@ -84,12 +84,18 @@
     "    infer_processes.append(infer_process)\n",
     "\n",
     "print(f'Starting inference on multiple models using scheduler')\n",
+    "\n",
+    "infer_failed = False\n",
     "for infer_process in infer_processes:\n",
     "    infer_process.start()\n",
     "for infer_process in infer_processes:\n",
     "    infer_process.join()\n",
-    "\n",
-    "    print('Done inference')"
+    "    if infer_process.exitcode:\n",
+    "        infer_failed = True\n",
+    "        \n",
+    "if infer_failed:\n",
+    "    raise Exception(\"infer process failed\")\n",
+    "print('Done inference')"
    ]
   }
  ],
diff --git a/hailort/libhailort/bindings/python/platform/setup.py b/hailort/libhailort/bindings/python/platform/setup.py
index 1cb4061..7ea0efb 100644
--- a/hailort/libhailort/bindings/python/platform/setup.py
+++ b/hailort/libhailort/bindings/python/platform/setup.py
@@ -146,6 +146,6 @@ if __name__ == "__main__":
             "linux_aarch64",
         ],
         url="https://hailo.ai/",
-        version="4.20.1",
+        version="4.21.0",
         zip_safe=False,
     )
diff --git a/hailort/libhailort/bindings/python/src/CMakeLists.txt b/hailort/libhailort/bindings/python/src/CMakeLists.txt
index add0473..bd00b4c 100644
--- a/hailort/libhailort/bindings/python/src/CMakeLists.txt
+++ b/hailort/libhailort/bindings/python/src/CMakeLists.txt
@@ -76,8 +76,8 @@ if(LIBHAILORT_PATH AND HAILORT_INCLUDE_DIR)
         _pyhailort
         PUBLIC
         HAILORT_MAJOR_VERSION=4
-        HAILORT_MINOR_VERSION=20
-        HAILORT_REVISION_VERSION=1
+        HAILORT_MINOR_VERSION=21
+        HAILORT_REVISION_VERSION=0
     )
     set_target_properties(
         _pyhailort
@@ -88,7 +88,7 @@ if(LIBHAILORT_PATH AND HAILORT_INCLUDE_DIR)
 elseif(LIBHAILORT_PATH OR HAILORT_INCLUDE_DIR)
     message(FATAL_ERROR "Both LIBHAILORT_PATH and HAILORT_INCLUDE_DIR must be defined or none of them. LIBHAILORT_PATH: '${LIBHAILORT_PATH}', HAILORT_INCLUDE_DIR: '${HAILORT_INCLUDE_DIR}'")
 else()
-    find_package(HailoRT 4.20.1 EXACT REQUIRED)
+    find_package(HailoRT 4.21.0 EXACT REQUIRED)
     target_link_libraries(_pyhailort PRIVATE HailoRT::libhailort)
 endif()
 
diff --git a/hailort/libhailort/bindings/python/src/bindings_common.hpp b/hailort/libhailort/bindings/python/src/bindings_common.hpp
index cf4527b..ec9c7ee 100644
--- a/hailort/libhailort/bindings/python/src/bindings_common.hpp
+++ b/hailort/libhailort/bindings/python/src/bindings_common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -45,13 +45,14 @@ public:
     {
         switch (user_format.order)
         {
-        case HAILO_FORMAT_ORDER_HAILO_NMS:
         case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
-            return { HailoRTCommon::get_nms_host_shape_size(nms_shape) };
+            return { HailoRTCommon::get_nms_by_class_host_shape_size(nms_shape) };
         case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
-            throw HailoRTStatusException("Format order HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE is not supported in python API.");
         case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:
-            return {HailoRTCommon::get_nms_host_frame_size(nms_shape, user_format) / HailoRTCommon::get_format_data_bytes(user_format)};
+            // In reality, there is no such thing as shape for HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK and HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE result,
+            // but since this function is used in order to calculate the size of the output buffer numpy array, we return the host frame size
+            // with assumption that the buffer type is uint8 (an exception is thrown in such case).
+            return {HailoRTCommon::get_nms_host_frame_size(nms_shape, user_format)};
         case HAILO_FORMAT_ORDER_NC:
             return {shape.features};
         case HAILO_FORMAT_ORDER_NHW:
diff --git a/hailort/libhailort/bindings/python/src/device_api.cpp b/hailort/libhailort/bindings/python/src/device_api.cpp
index 8c9b8cf..b644b90 100644
--- a/hailort/libhailort/bindings/python/src/device_api.cpp
+++ b/hailort/libhailort/bindings/python/src/device_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -23,28 +23,28 @@ std::vector<std::string> DeviceWrapper::scan()
     return device_ids.release();
 }
 
-DeviceWrapper DeviceWrapper::create(const std::string &device_id)
+DeviceWrapperPtr DeviceWrapper::create(const std::string &device_id)
 {
     auto device = Device::create(device_id);
     VALIDATE_EXPECTED(device);
-    return DeviceWrapper(device.release());
+    return std::make_shared<DeviceWrapper>(device.release());
 }
 
-DeviceWrapper DeviceWrapper::create_pcie(hailo_pcie_device_info_t &device_info)
+DeviceWrapperPtr DeviceWrapper::create_pcie(hailo_pcie_device_info_t &device_info)
 {
     auto device = Device::create_pcie(device_info);
     VALIDATE_EXPECTED(device);
 
-    return DeviceWrapper(device.release());
+    return std::make_shared<DeviceWrapper>(device.release());
 }
 
-DeviceWrapper DeviceWrapper::create_eth(const std::string &device_address, uint16_t port,
+DeviceWrapperPtr DeviceWrapper::create_eth(const std::string &device_address, uint16_t port,
     uint32_t timeout_milliseconds, uint8_t max_number_of_attempts)
 {
     auto device = Device::create_eth(device_address, port, timeout_milliseconds, max_number_of_attempts);
     VALIDATE_EXPECTED(device);
 
-    return DeviceWrapper(device.release());
+    return std::make_shared<DeviceWrapper>(device.release());
 }
 
 void DeviceWrapper::release()
@@ -444,7 +444,7 @@ void DeviceWrapper::set_sleep_state(hailo_sleep_state_t sleep_state)
 
 void DeviceWrapper::bind(py::module &m)
 {
-    py::class_<DeviceWrapper>(m, "Device")
+    py::class_<DeviceWrapper, DeviceWrapperPtr>(m, "Device")
     .def("is_valid", &DeviceWrapper::is_valid)
 
     // Scan
diff --git a/hailort/libhailort/bindings/python/src/device_api.hpp b/hailort/libhailort/bindings/python/src/device_api.hpp
index 1c06836..976a7b0 100644
--- a/hailort/libhailort/bindings/python/src/device_api.hpp
+++ b/hailort/libhailort/bindings/python/src/device_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -42,17 +42,30 @@ public:
 };
 
 
+class DeviceWrapper;
+using DeviceWrapperPtr = std::shared_ptr<DeviceWrapper>;
+
 class DeviceWrapper final
 {
 public:
 
     static std::vector<std::string> scan();
-    static DeviceWrapper create(const std::string &device_id);
-    static DeviceWrapper create_pcie(hailo_pcie_device_info_t &device_info);
-    static DeviceWrapper create_eth(const std::string &device_address, uint16_t port,
+    static DeviceWrapperPtr create(const std::string &device_id);
+    static DeviceWrapperPtr create_pcie(hailo_pcie_device_info_t &device_info);
+    static DeviceWrapperPtr create_eth(const std::string &device_address, uint16_t port,
         uint32_t timeout_milliseconds, uint8_t max_number_of_attempts);
     void release();
 
+    DeviceWrapper(std::unique_ptr<Device> &&device) : m_device(std::move(device))
+#ifdef HAILO_IS_FORK_SUPPORTED
+    , m_atfork_guard(this, {
+        .before_fork = [this]() { if (m_device) m_device->before_fork(); },
+        .after_fork_in_parent = [this]() { if (m_device) m_device->after_fork_in_parent(); },
+        .after_fork_in_child = [this]() { if (m_device) m_device->after_fork_in_child(); },
+    })
+#endif
+    {}
+
     Device& device()
     {
         VALIDATE_NOT_NULL(m_device, HAILO_INTERNAL_FAILURE);
@@ -135,10 +148,10 @@ public:
     static void bind(py::module &m);
 
 private:
-    DeviceWrapper(std::unique_ptr<Device> &&device)
-        : m_device(std::move(device)) {}
-
     std::unique_ptr<Device> m_device;
+#ifdef HAILO_IS_FORK_SUPPORTED
+    AtForkRegistry::AtForkGuard m_atfork_guard;
+#endif
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/bindings/python/src/hef_api.cpp b/hailort/libhailort/bindings/python/src/hef_api.cpp
index fa23f08..4b018a5 100644
--- a/hailort/libhailort/bindings/python/src/hef_api.cpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -173,6 +173,17 @@ py::dict HefWrapper::create_configure_params_mipi_input(hailo_stream_interface_t
     return py::cast(configure_params.release());
 }
 
+py::dict HefWrapper::get_external_resources()
+{
+    auto external_resource = hef->get_external_resources();
+    VALIDATE_EXPECTED(external_resource);
+    std::map<std::string, py::bytes> external_resources;
+    for (const auto &resource : external_resource.value()) {
+        external_resources[resource.first] = py::bytes(resource.second);
+    }
+    return py::cast(external_resources);
+}
+
 py::list HefWrapper::get_networks_names(const std::string &net_group_name)
 {
     auto network_infos = hef->get_network_infos(net_group_name);
@@ -209,6 +220,7 @@ void HefWrapper::bind(py::module &m)
         .def("get_output_stream_infos", &HefWrapper::get_output_stream_infos)
         .def("get_all_stream_infos", &HefWrapper::get_all_stream_infos)
         .def("get_networks_names", &HefWrapper::get_networks_names)
+        .def("get_external_resources", &HefWrapper::get_external_resources)
         ;
 }
 
diff --git a/hailort/libhailort/bindings/python/src/hef_api.hpp b/hailort/libhailort/bindings/python/src/hef_api.hpp
index 02c62b4..6b20d8b 100644
--- a/hailort/libhailort/bindings/python/src/hef_api.hpp
+++ b/hailort/libhailort/bindings/python/src/hef_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -57,6 +57,8 @@ public:
         return hef;
     }
 
+    py::dict get_external_resources();
+
     py::dict create_configure_params_mipi_input(hailo_stream_interface_t output_interface,
         const hailo_mipi_input_stream_params_t &mipi_params);
     py::list get_networks_names(const std::string &net_group_name);
diff --git a/hailort/libhailort/bindings/python/src/infer_model_api.cpp b/hailort/libhailort/bindings/python/src/infer_model_api.cpp
index 85bcbb9..ea409ed 100644
--- a/hailort/libhailort/bindings/python/src/infer_model_api.cpp
+++ b/hailort/libhailort/bindings/python/src/infer_model_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -362,7 +362,7 @@ void ConfiguredInferModelWrapper::set_scheduler_priority(uint8_t priority)
     VALIDATE_STATUS(status);
 }
 
-size_t ConfiguredInferModelWrapper::get_async_queue_size()
+size_t ConfiguredInferModelWrapper::get_async_queue_size() const
 {
     auto size = m_configured_infer_model.get_async_queue_size();
     VALIDATE_EXPECTED(size);
diff --git a/hailort/libhailort/bindings/python/src/infer_model_api.hpp b/hailort/libhailort/bindings/python/src/infer_model_api.hpp
index 373f4c5..eb4f003 100644
--- a/hailort/libhailort/bindings/python/src/infer_model_api.hpp
+++ b/hailort/libhailort/bindings/python/src/infer_model_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -105,7 +105,7 @@ public:
     void set_scheduler_timeout(const std::chrono::milliseconds &timeout);
     void set_scheduler_threshold(uint32_t threshold);
     void set_scheduler_priority(uint8_t priority);
-    size_t get_async_queue_size();
+    size_t get_async_queue_size() const;
     void shutdown();
 
     static void bind(py::module &m);
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.cpp b/hailort/libhailort/bindings/python/src/network_group_api.cpp
index ec9441c..c2d7c6e 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.cpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/network_group_api.hpp b/hailort/libhailort/bindings/python/src/network_group_api.hpp
index 4faab20..dd2bf6c 100644
--- a/hailort/libhailort/bindings/python/src/network_group_api.hpp
+++ b/hailort/libhailort/bindings/python/src/network_group_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -136,9 +136,9 @@ public:
         VALIDATE_STATUS(status);
     }
 
-    void init_cache(uint32_t read_offset, int32_t write_offset_delta)
+    void init_cache(uint32_t read_offset)
     {
-        auto status = get().init_cache(read_offset, write_offset_delta);
+        auto status = get().init_cache(read_offset);
         VALIDATE_STATUS(status);
     }
 
diff --git a/hailort/libhailort/bindings/python/src/pyhailort.cpp b/hailort/libhailort/bindings/python/src/pyhailort.cpp
index 7fcc255..a16c8c8 100644
--- a/hailort/libhailort/bindings/python/src/pyhailort.cpp
+++ b/hailort/libhailort/bindings/python/src/pyhailort.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -143,6 +143,22 @@ public:
     }
 };
 
+std::vector<hailo_detection_t> convert_nms_by_score_buffer_to_detections(py::array src_buffer)
+{
+    std::vector<hailo_detection_t> detections;
+    uint8_t *src_ptr = static_cast<uint8_t*>(src_buffer.mutable_data());
+    uint16_t detections_count = *(uint16_t*)src_ptr;
+    detections.reserve(detections_count);
+
+    size_t buffer_offset = sizeof(uint16_t);
+    for (size_t i = 0; i < detections_count; i++) {
+        hailo_detection_t detection = *(hailo_detection_t*)(src_ptr + buffer_offset);
+        buffer_offset += sizeof(hailo_detection_t);
+        detections.emplace_back(std::move(detection));
+    }
+    return detections;
+}
+
 std::vector<hailo_detection_with_byte_mask_t> convert_nms_with_byte_mask_buffer_to_detections(py::array src_buffer)
 {
     std::vector<hailo_detection_with_byte_mask_t> detections;
@@ -184,6 +200,7 @@ PYBIND11_MODULE(_pyhailort, m) {
     validate_versions_match();
 
     m.def("get_status_message", &get_status_message);
+    m.def("convert_nms_by_score_buffer_to_detections", &convert_nms_by_score_buffer_to_detections);
     m.def("convert_nms_with_byte_mask_buffer_to_detections", &convert_nms_with_byte_mask_buffer_to_detections);
     m.def("dequantize_output_buffer_in_place", &QuantizationBindings::dequantize_output_buffer_in_place);
     m.def("dequantize_output_buffer", &QuantizationBindings::dequantize_output_buffer);
@@ -237,7 +254,7 @@ PYBIND11_MODULE(_pyhailort, m) {
         .def_readonly("x_max", &hailo_rectangle_t::x_max)
         ;
 
-    py::class_<hailo_detection_with_byte_mask_t>(m, "HailoDetectionWithByteMask")
+    py::class_<hailo_detection_with_byte_mask_t>(m, "DetectionWithByteMask")
         .def_readonly("box", &hailo_detection_with_byte_mask_t::box)
         .def_readonly("mask_size", &hailo_detection_with_byte_mask_t::mask_size)
         .def_readonly("score", &hailo_detection_with_byte_mask_t::score)
@@ -248,6 +265,15 @@ PYBIND11_MODULE(_pyhailort, m) {
         })
         ;
 
+    py::class_<hailo_detection_t>(m, "Detection")
+        .def_readonly("y_min", &hailo_detection_t::y_min)
+        .def_readonly("x_min", &hailo_detection_t::x_min)
+        .def_readonly("y_max", &hailo_detection_t::y_max)
+        .def_readonly("x_max", &hailo_detection_t::x_max)
+        .def_readonly("score", &hailo_detection_t::score)
+        .def_readonly("class_id", &hailo_detection_t::class_id)
+        ;
+
     py::enum_<hailo_device_architecture_t>(m, "DeviceArchitecture")
         .value("HAILO8_A0", HAILO_ARCH_HAILO8_A0)
         .value("HAILO8", HAILO_ARCH_HAILO8)
@@ -353,8 +379,6 @@ PYBIND11_MODULE(_pyhailort, m) {
         ;
 
     py::class_<hailo_health_monitor_dataflow_shutdown_notification_message_t>(m, "HealthMonitorDataflowShutdownNotificationMessage")
-        .def_readonly("closed_input_streams", &hailo_health_monitor_dataflow_shutdown_notification_message_t::closed_input_streams)
-        .def_readonly("closed_output_streams", &hailo_health_monitor_dataflow_shutdown_notification_message_t::closed_output_streams)
         .def_readonly("ts0_temperature", &hailo_health_monitor_dataflow_shutdown_notification_message_t::ts0_temperature)
         .def_readonly("ts1_temperature", &hailo_health_monitor_dataflow_shutdown_notification_message_t::ts1_temperature)
         ;
@@ -551,7 +575,6 @@ PYBIND11_MODULE(_pyhailort, m) {
         .value("NC", HAILO_FORMAT_ORDER_NC)
         .value("BAYER_RGB", HAILO_FORMAT_ORDER_BAYER_RGB)
         .value("12_BIT_BAYER_RGB", HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB)
-        .value("HAILO_NMS", HAILO_FORMAT_ORDER_HAILO_NMS)
         .value("RGB888", HAILO_FORMAT_ORDER_RGB888)
         .value("NCHW", HAILO_FORMAT_ORDER_NCHW)
         .value("YUY2", HAILO_FORMAT_ORDER_YUY2)
@@ -1008,6 +1031,7 @@ PYBIND11_MODULE(_pyhailort, m) {
         .def_property_readonly("soc_pm_values", [](const hailo_extended_device_information_t& info) -> py::bytes {
             return std::string((const char*) info.soc_pm_values, sizeof(info.soc_pm_values));
         })
+        .def_readonly("gpio_mask", &hailo_extended_device_information_t::gpio_mask)
         ;
 
     py::enum_<hailo_device_boot_source_t>(m, "BootSource")
@@ -1078,11 +1102,11 @@ PYBIND11_MODULE(_pyhailort, m) {
                     return py::make_tuple(self.shape.features);
                 case HAILO_FORMAT_ORDER_NHW:
                     return py::make_tuple(self.shape.height, self.shape.width);
-                case HAILO_FORMAT_ORDER_HAILO_NMS:
                 case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
                     return py::make_tuple(self.nms_shape.number_of_classes, HailoRTCommon::BBOX_PARAMS, self.nms_shape.max_bboxes_per_class);
                 case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
-                    throw HailoRTCustomException("HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE format order is not supported");
+                case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:
+                    throw HailoRTCustomException("Format order has no shape");
                 default:
                     return py::make_tuple(self.shape.height, self.shape.width, self.shape.features);
             }
diff --git a/hailort/libhailort/bindings/python/src/quantization_api.cpp b/hailort/libhailort/bindings/python/src/quantization_api.cpp
index 2ba23f8..1d8a75b 100644
--- a/hailort/libhailort/bindings/python/src/quantization_api.cpp
+++ b/hailort/libhailort/bindings/python/src/quantization_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/quantization_api.hpp b/hailort/libhailort/bindings/python/src/quantization_api.hpp
index 3ec188d..5a8539f 100644
--- a/hailort/libhailort/bindings/python/src/quantization_api.hpp
+++ b/hailort/libhailort/bindings/python/src/quantization_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/utils.hpp b/hailort/libhailort/bindings/python/src/utils.hpp
index 298aa3c..d470016 100644
--- a/hailort/libhailort/bindings/python/src/utils.hpp
+++ b/hailort/libhailort/bindings/python/src/utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
diff --git a/hailort/libhailort/bindings/python/src/vdevice_api.cpp b/hailort/libhailort/bindings/python/src/vdevice_api.cpp
index e4077fb..1a5243c 100644
--- a/hailort/libhailort/bindings/python/src/vdevice_api.cpp
+++ b/hailort/libhailort/bindings/python/src/vdevice_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/vdevice_api.hpp b/hailort/libhailort/bindings/python/src/vdevice_api.hpp
index ca1bbf0..b3dfed6 100644
--- a/hailort/libhailort/bindings/python/src/vdevice_api.hpp
+++ b/hailort/libhailort/bindings/python/src/vdevice_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/vstream_api.cpp b/hailort/libhailort/bindings/python/src/vstream_api.cpp
index 1289bd8..853d114 100644
--- a/hailort/libhailort/bindings/python/src/vstream_api.cpp
+++ b/hailort/libhailort/bindings/python/src/vstream_api.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/bindings/python/src/vstream_api.hpp b/hailort/libhailort/bindings/python/src/vstream_api.hpp
index cfd2960..6733d3f 100644
--- a/hailort/libhailort/bindings/python/src/vstream_api.hpp
+++ b/hailort/libhailort/bindings/python/src/vstream_api.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/common/common.h b/hailort/libhailort/examples/c/common/common.h
index 319e1ac..6ed5e68 100644
--- a/hailort/libhailort/examples/c/common/common.h
+++ b/hailort/libhailort/examples/c/common/common.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/common/hailo_thread.h b/hailort/libhailort/examples/c/common/hailo_thread.h
index 64cae59..f6c34cc 100644
--- a/hailort/libhailort/examples/c/common/hailo_thread.h
+++ b/hailort/libhailort/examples/c/common/hailo_thread.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
index 1f2891e..29d12f7 100644
--- a/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/data_quantization_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(data_quantization_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c b/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c
index 15b4ea3..6afdd26 100644
--- a/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c
+++ b/hailort/libhailort/examples/c/data_quantization_example/data_quantization_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
index e72ac5b..bb4f4d6 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(infer_pipeline_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
index e1696df..4ecc3ab 100644
--- a/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
+++ b/hailort/libhailort/examples/c/infer_pipeline_example/infer_pipeline_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
index 2ca14c4..0b6b9d1 100644
--- a/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_device_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
index 12c187c..f546e5c 100644
--- a/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
+++ b/hailort/libhailort/examples/c/multi_device_example/multi_device_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
index f36a1a2..1b60e45 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(multi_network_vstream_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
index b809230..ae16d74 100644
--- a/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
+++ b/hailort/libhailort/examples/c/multi_network_vstream_example/multi_network_vstream_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
index 616c1d6..707bf72 100644
--- a/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/notification_callback_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(notification_callback_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c b/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c
index 7ff3032..a40ca17 100644
--- a/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c
+++ b/hailort/libhailort/examples/c/notification_callback_example/notification_callback_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
index d023337..0b9c428 100644
--- a/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(power_measurement_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c b/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c
index 1cd19d5..96a1147 100644
--- a/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c
+++ b/hailort/libhailort/examples/c/power_measurement_example/power_measurement_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
index be5ddbc..7e64c8a 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_async_streams_single_thread_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
index 954191f..2314c88 100644
--- a/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
+++ b/hailort/libhailort/examples/c/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
index e5d71cb..94bd43a 100644
--- a/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(raw_streams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c b/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c
index eb3a483..27f5090 100644
--- a/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c
+++ b/hailort/libhailort/examples/c/raw_streams_example/raw_streams_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
index e34cac7..e3c2f47 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
index a323857..85cfc4b 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_example/switch_network_groups_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
index 3de86db..cb4ac9e 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(switch_network_groups_manually_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
index 2145754..46165ba 100644
--- a/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
+++ b/hailort/libhailort/examples/c/switch_network_groups_manually_example/switch_network_groups_manually_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
index 064adc2..207b3bc 100644
--- a/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/c/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 SET_SOURCE_FILES_PROPERTIES(vstreams_example.c PROPERTIES LANGUAGE C)
 
diff --git a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
index 3a7b4a0..0bbf99e 100644
--- a/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
+++ b/hailort/libhailort/examples/c/vstreams_example/vstreams_example.c
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/CMakeLists.txt b/hailort/libhailort/examples/cpp/CMakeLists.txt
index 8ad305b..289ce4c 100644
--- a/hailort/libhailort/examples/cpp/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/CMakeLists.txt
@@ -13,7 +13,6 @@ add_subdirectory(power_measurement_example)
 add_subdirectory(multi_process_example)
 add_subdirectory(notification_callback_example)
 
-
 set(CPP_EXAMPLE_TARGETS
     cpp_vstreams_example
     cpp_infer_pipeline_example
diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
index f14f033..3cbb0b7 100644
--- a/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_async_infer_advanced_example async_infer_advanced_example.cpp)
 target_link_libraries(cpp_async_infer_advanced_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
index d2f2d3b..25e3af9 100644
--- a/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_advanced_example/async_infer_advanced_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
index 0a2d7f6..60cdba1 100644
--- a/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_async_infer_basic_example async_infer_basic_example.cpp)
 target_link_libraries(cpp_async_infer_basic_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
index 2562635..394bf95 100644
--- a/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
+++ b/hailort/libhailort/examples/cpp/async_infer_basic_example/async_infer_basic_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
index 02488e9..068abc2 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_infer_pipeline_example infer_pipeline_example.cpp)
 target_link_libraries(cpp_infer_pipeline_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
index e13786f..a167bd0 100644
--- a/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
+++ b/hailort/libhailort/examples/cpp/infer_pipeline_example/infer_pipeline_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
index 648a432..4767ff7 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_device_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_device_example multi_device_example.cpp)
 target_link_libraries(cpp_multi_device_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
index 96b4ff4..e23401a 100644
--- a/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_device_example/multi_device_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
index d5d62e1..bd731e5 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_network_vstream_example multi_network_vstream_example.cpp)
 target_link_libraries(cpp_multi_network_vstream_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp b/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
index 257abf5..047e69f 100644
--- a/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_network_vstream_example/multi_network_vstream_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
index d6a3f5e..6c4be1b 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/multi_process_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_multi_process_example multi_process_example.cpp)
 target_link_libraries(cpp_multi_process_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
index 003de42..b04f5af 100644
--- a/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
+++ b/hailort/libhailort/examples/cpp/multi_process_example/multi_process_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
index 2ceb7b3..a49f038 100644
--- a/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_notification_callback_example notification_callback_example.cpp)
 target_link_libraries(cpp_notification_callback_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/notification_callback_example/notification_callback_example.cpp b/hailort/libhailort/examples/cpp/notification_callback_example/notification_callback_example.cpp
index 38e18ab..fca99c0 100644
--- a/hailort/libhailort/examples/cpp/notification_callback_example/notification_callback_example.cpp
+++ b/hailort/libhailort/examples/cpp/notification_callback_example/notification_callback_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
index febec92..9514ebe 100644
--- a/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/CMakeLists.txt
@@ -1,6 +1,6 @@
 cmake_minimum_required(VERSION 3.5.0)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_power_measurement_example power_measurement_example.cpp)
 target_link_libraries(cpp_power_measurement_example PRIVATE HailoRT::libhailort)
diff --git a/hailort/libhailort/examples/cpp/power_measurement_example/power_measurement_example.cpp b/hailort/libhailort/examples/cpp/power_measurement_example/power_measurement_example.cpp
index c6bd5a6..9e10820 100644
--- a/hailort/libhailort/examples/cpp/power_measurement_example/power_measurement_example.cpp
+++ b/hailort/libhailort/examples/cpp/power_measurement_example/power_measurement_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
index b426979..402a952 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_multi_thread_example raw_async_streams_multi_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_multi_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
index fe2dcd3..2f126c1 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_multi_thread_example/raw_async_streams_multi_thread_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
index 0427c2e..f5582bc 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_async_streams_single_thread_example raw_async_streams_single_thread_example.cpp)
 target_link_libraries(cpp_raw_async_streams_single_thread_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
index d3998c8..e76eb29 100644
--- a/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_async_streams_single_thread_example/raw_async_streams_single_thread_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
index 889f50d..69d9015 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_raw_streams_example raw_streams_example.cpp)
 target_link_libraries(cpp_raw_streams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
index cfb7f42..a4537b7 100644
--- a/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
+++ b/hailort/libhailort/examples/cpp/raw_streams_example/raw_streams_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
index 5e08e8c..535c2b2 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_example switch_network_groups_example.cpp)
 target_link_libraries(cpp_switch_network_groups_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
index 2c04dc8..88c3ab5 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_example/switch_network_groups_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
index 2db2d99..2ed3550 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 find_package(Threads REQUIRED)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_switch_network_groups_manually_example switch_network_groups_manually_example.cpp)
 target_link_libraries(cpp_switch_network_groups_manually_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
index 292cb5f..3ab2dc2 100644
--- a/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
+++ b/hailort/libhailort/examples/cpp/switch_network_groups_manually_example/switch_network_groups_manually_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
index 4e26d19..8241d64 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
+++ b/hailort/libhailort/examples/cpp/vstreams_example/CMakeLists.txt
@@ -3,7 +3,7 @@ cmake_minimum_required(VERSION 3.5.0)
 set(THREADS_PREFER_PTHREAD_FLAG ON)
 find_package(Threads REQUIRED)
 
-find_package(HailoRT 4.20.1 EXACT REQUIRED)
+find_package(HailoRT 4.21.0 EXACT REQUIRED)
 
 add_executable(cpp_vstreams_example vstreams_example.cpp)
 target_link_libraries(cpp_vstreams_example PRIVATE HailoRT::libhailort Threads::Threads)
diff --git a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
index 2abc53d..73fe922 100644
--- a/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
+++ b/hailort/libhailort/examples/cpp/vstreams_example/vstreams_example.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/hef.proto b/hailort/libhailort/hef.proto
index 20f35f1..2e3e594 100644
--- a/hailort/libhailort/hef.proto
+++ b/hailort/libhailort/hef.proto
@@ -9,6 +9,13 @@ message ProtoHEFHef {
     ProtoHEFIncludedFeatures included_features = 4;
     repeated ProtoHEFExtension extensions = 5;
     repeated ProtoHEFOptionalExtension optional_extensions = 6;
+    repeated ProtoHEFExternalResource external_resources = 7;
+}
+
+message ProtoHEFExternalResource {
+    uint64 offset = 1;
+    uint64 size = 2;
+    string name = 3;
 }
 
 message ProtoHEFIncludedFeatures {
@@ -53,6 +60,9 @@ enum ProtoHEFExtensionType {
     HAILO_NET_FLOW_YOLOV8_NMS = 27;
     BATCH_REGISTER_CONFIG = 28;
     HAILO_NET_FLOW_BBOX_DECODING = 29;
+    CCW_PTR_SQUEEZE = 30;
+    EXTERNAL_RESOURCES = 31;
+    SHARED_CONFIG = 32;
     UNUSED = 0XFFFF;
 }
 
diff --git a/hailort/libhailort/include/hailo/buffer.hpp b/hailort/libhailort/include/hailo/buffer.hpp
index 6c5dcfa..246066f 100644
--- a/hailort/libhailort/include/hailo/buffer.hpp
+++ b/hailort/libhailort/include/hailo/buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -199,6 +199,7 @@ public:
 
     // Internal functions
     static Expected<Buffer> create(BufferStoragePtr storage, bool register_storage = true);
+    static Expected<BufferPtr> create_shared(BufferStoragePtr storage, bool register_storage = true);
 
 private:
     class StorageImpl;
@@ -222,6 +223,7 @@ public:
     MemoryView() noexcept;
     explicit MemoryView(Buffer &buffer) noexcept;
     MemoryView(void *data, size_t size) noexcept;
+    MemoryView(const std::string &data) noexcept;
     ~MemoryView() = default;
 
     MemoryView& operator=(MemoryView&& other) noexcept = default;
@@ -256,6 +258,16 @@ public:
     // Stream operator overload
     friend std::ostream& operator<<(std::ostream&, const MemoryView&);
 
+    // Returns a pointer to the start of the buffer, cast to T*
+    // Note: If this->size() is less than sizeof(T), then part of the data pointed to by the returned pointer
+    //       will be outside of the buffer's bounds.
+    template<typename T, std::enable_if_t<std::is_pod<T>::value, int> = 0>
+    T* as_pointer() const
+    {
+        assert(m_size >= sizeof(T));
+        return reinterpret_cast<T*>(m_data);
+    }
+
 private:
     void *m_data;
     size_t m_size;
diff --git a/hailort/libhailort/include/hailo/device.hpp b/hailort/libhailort/include/hailo/device.hpp
index 24680bf..7399ab0 100644
--- a/hailort/libhailort/include/hailo/device.hpp
+++ b/hailort/libhailort/include/hailo/device.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -451,6 +451,24 @@ public:
      */
     virtual Expected<hailo_chip_temperature_info_t> get_chip_temperature();
 
+    /**
+     * Gets health stats of the Hailo device.
+     *
+     * @return Upon success, returns @a hailo_health_stats_t, containing health information.
+     *         Otherwise, returns a ::hailo_status error.
+     * @note Supported only on Hailo-10/Hailo-15 devices running on Linux.
+     */
+    virtual Expected<hailo_health_stats_t> query_health_stats();
+
+    /**
+     * Gets performance stats of the Hailo device, and of the system it is connected to.
+     *
+     * @return Upon success, returns @a hailo_performance_stats_t, containing performance information.
+     *         Otherwise, returns a ::hailo_status error.
+     * @note Supported only on Hailo-10/Hailo-15 devices running on Linux.
+     */
+    virtual Expected<hailo_performance_stats_t> query_performance_stats();
+
     /**
      * Reset device.
      * 
@@ -798,6 +816,10 @@ public:
     hailo_status clear_context_switch_breakpoint(uint8_t breakpoint_id);
     Expected<uint8_t> get_context_switch_breakpoint_status(uint8_t breakpoint_id);
 
+    virtual hailo_status before_fork() = 0;
+    virtual hailo_status after_fork_in_parent() = 0;
+    virtual hailo_status after_fork_in_child() = 0;
+
     virtual ~Device() = default;
     Device(const Device &) = delete;
     Device &operator=(const Device &) = delete;
diff --git a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp
index b8d53c1..f9830f1 100644
--- a/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp
+++ b/hailort/libhailort/include/hailo/dma_mapped_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/event.hpp b/hailort/libhailort/include/hailo/event.hpp
index 0db4f6e..7289500 100644
--- a/hailort/libhailort/include/hailo/event.hpp
+++ b/hailort/libhailort/include/hailo/event.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/expected.hpp b/hailort/libhailort/include/hailo/expected.hpp
index 2125f50..17dc795 100644
--- a/hailort/libhailort/include/hailo/expected.hpp
+++ b/hailort/libhailort/include/hailo/expected.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/genai/common.hpp b/hailort/libhailort/include/hailo/genai/common.hpp
index 5a5c8ca..3cf9f0a 100644
--- a/hailort/libhailort/include/hailo/genai/common.hpp
+++ b/hailort/libhailort/include/hailo/genai/common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/genai/llm/llm.hpp b/hailort/libhailort/include/hailo/genai/llm/llm.hpp
index 6e795fe..99072bc 100644
--- a/hailort/libhailort/include/hailo/genai/llm/llm.hpp
+++ b/hailort/libhailort/include/hailo/genai/llm/llm.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -21,6 +21,8 @@ namespace hailort
 namespace genai
 {
 
+class LLM;
+
 /*! Parameters to configure the LLM model */
 class HAILORTAPI LLMParams
 {
@@ -40,46 +42,27 @@ public:
     /**
      * @return The Hef path of the LLM model.
      */
-    std::string hef() const;
+    const std::string& hef() const;
 
     /**
      * @return The LoRA name of the LLM model.
      */
-    std::string lora() const;
-
-    /**
-     * Sets the LLM vocabulary file path.
-     *
-     * @param[in] path            The path of the vocabulary file.
-     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     */
-    hailo_status set_vocabulary(const std::string &path);
-
-    /**
-     * @return The vocabulary file path.
-     */
-    std::string vocabulary() const;
+    const std::string& lora() const;
 
 private:
-    hailo_status set_hef(const std::string &path);
-
     std::string m_hef_path;
     std::string m_lora;
-    std::string m_vocabulary_path;
 };
 
 /*! The LLMGeneratorParams represents the parameters for text generation, which can be changed during runtime for each generator. */
 class HAILORTAPI LLMGeneratorParams
 {
 public:
-    LLMGeneratorParams() = default;
-
     /**
      * Sets the sampling temperature of the LLM model.
      *
      * @param[in] temperature           The sampling temperature.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note: Currently not implemented.
      */
     hailo_status set_temperature(float32_t temperature);
 
@@ -91,9 +74,8 @@ public:
     /**
      * Sets the top_p parameter of the LLM model.
      *
-     * @param[in] top_p           The top_p sampling value.
+     * @param[in] top_p                 The top_p sampling value.
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
-     * @note: Currently not implemented.
      */
     hailo_status set_top_p(float32_t top_p);
 
@@ -102,9 +84,91 @@ public:
      */
     float32_t top_p() const;
 
+    /**
+     * Sets the top_k parameter of the LLM model.
+     *
+     * @param[in] top_k                 The top_k sampling value.
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     */
+    hailo_status set_top_k(uint32_t top_k);
+
+    /**
+     * @return The top_k sampling value.
+     */
+    uint32_t top_k() const;
+
+    /**
+     * Sets the frequency_penalty parameter of the LLM model.
+     *
+     * @param[in] frequency_penalty     The frequency_penalty for generated tokens.
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     */
+    hailo_status set_frequency_penalty(float32_t frequency_penalty);
+
+    /**
+     * @return The frequency_penalty for generated tokens.
+     */
+    float32_t frequency_penalty() const;
+
+    /**
+     * Sets the max_generated_tokens parameter of the LLM model.
+     *
+     * @param[in] max_generated_tokens  The maximum number of tokens that can be generated, not including the input tokens.
+     *
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note This number includes all special tokens such as start and end tokens.
+     */
+    hailo_status set_max_generated_tokens(uint32_t max_generated_tokens);
+
+    /**
+     * @return The maximum number of tokens that can be generated.
+     */
+    uint32_t max_generated_tokens() const;
+
+    /**
+     * Whether the LLM sampling should be statistical or greedy.
+     *
+     * @param[in] do_sample     true meaning the model will perform statistical sampling, false meaning the model will perform greedy sampling.
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     */
+    hailo_status set_do_sample(bool do_sample);
+
+    /**
+     * @return Whether the LLM sampling should be statistical or greedy.
+     */
+    bool do_sample() const;
+
+    /**
+     * Sets the seed for the LLM model.
+     *
+     * @param[in] seed          The seed for the random number generator for statistical sampling.
+     * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns a ::hailo_status error.
+     * @note If the seed remains unchanged between calls, the generator will continue producing values from its
+     *       current state. If a new seed is provided, the generator is reinitialized.
+     */
+    hailo_status set_seed(uint32_t seed);
+
+    /**
+     * @return The seed for the random number generator.
+     */
+    uint32_t seed() const;
+
+    LLMGeneratorParams(float32_t temperature, float32_t top_p, uint32_t top_k, float32_t frequency_penalty,
+        uint32_t max_generated_tokens, bool do_sample, uint32_t seed) :
+            m_temperature(temperature), m_top_p(top_p), m_top_k(top_k), m_frequency_penalty(frequency_penalty),
+            m_max_generated_tokens(max_generated_tokens), m_do_sample(do_sample), m_seed(seed) {}
+ 
 private:
+    LLMGeneratorParams() = default;
+    friend class LLM;
+
     float32_t m_temperature;
     float32_t m_top_p;
+    uint32_t m_top_k;
+    float32_t m_frequency_penalty;
+    uint32_t m_max_generated_tokens;
+    bool m_do_sample;
+    uint32_t m_seed;
 };
 
 /*! The LLMGeneratorCompletion object is used to read token completions */
@@ -117,6 +181,14 @@ public:
     LLMGeneratorCompletion &operator=(const LLMGeneratorCompletion &) = delete;
     ~LLMGeneratorCompletion();
 
+    enum class Status {
+        GENERATING = 0,
+        MAX_TOKENS_REACHED,
+        LOGICAL_END_OF_GENERATION,
+
+        MAX_VALUE = HAILO_MAX_ENUM,
+    };
+
     /**
      * Reads the next token completion.
      *
@@ -132,19 +204,23 @@ public:
      * Reads the next token completion.
      *
      * @param[in] timeout          The timeout for the read operation.
-     * @return Upon success, returns Expected of std::string, represnting the next token completion. Otherwise, returns Unexpected of ::hailo_status error.
+     * @return Upon success, returns Expected of std::string, representing the next token completion. Otherwise, returns Unexpected of ::hailo_status error.
      * @note The returned output is a UTF-8 encoded string.
      */
     Expected<std::string> read(std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT);
 
     /**
-     * Check if the token completion has finished.
+     * Returns the current generation status.
      *
-     * @return True if the generation has finished, false otherwise.
-     * @note Once this function returns true, no further reads should be attempted on this LLMGeneratorCompletion object,
-     * as it indicates the end of available token completions.
+     * @return Status - The current generation status, which can indicate:
+     *         - GENERATING: The generation process is ongoing.
+     *         - MAX_TOKENS_REACHED: The maximum number of tokens has been generated.
+     *         - LOGICAL_END_OF_GENERATION: The generation reached its logical end.
+     * @note Once this function returns a status indicating the end of generation  (e.g., MAX_TOKENS_REACHED or LOGICAL_END_OF_GENERATION),
+     *  no further reads should be attempted on this LLMGeneratorCompletion object,
+     *  as it indicates that all token completions have been provided.
      */
-    bool end_of_generation() const;
+    Status generation_status() const;
 
     static constexpr std::chrono::milliseconds DEFAULT_READ_TIMEOUT = std::chrono::seconds(10);
 
@@ -185,7 +261,7 @@ public:
 
     /**
      * Marks the end of input and initiates the generation process.
-     * Returns a LLMGeneratorCompletion, which allows fetching generated tokens incrementally.
+     * Returns an LLMGeneratorCompletion, which allows fetching generated tokens incrementally.
      *
      * @return Upon success, returns Expected of LLMGeneratorCompletion. Otherwise, returns Unexpected of ::hailo_status error.
      * @note Once this function is called, the LLMGenerator is no longer functional.
@@ -216,13 +292,26 @@ public:
     static Expected<LLM> create(std::shared_ptr<VDevice> vdevice, const LLMParams &llm_params);
 
     /**
-     * Creates a LLMGenerator object from the provided generation parameters or defaults if none are specified.
+     * Creates an LLMGeneratorParams object with the model's default values.
+     *
+     * @return Upon success, returns Expected of LLMGeneratorParams. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<LLMGeneratorParams> create_generator_params();
+
+    /**
+     * Creates an LLMGenerator object from the provided generation parameters.
      *
      * @param[in] params            The LLMGeneratorParams used to set the generator parameters.
-     *                              If not provided, the model will use it's default params.
      * @return Upon success, returns Expected of LLMGenerator. Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<LLMGenerator> create_generator(const LLMGeneratorParams &params = LLMGeneratorParams());
+    Expected<LLMGenerator> create_generator(const LLMGeneratorParams &params);
+
+    /**
+     * Creates an LLMGenerator object using the model's default generator parameters.
+     *
+     * @return Upon success, returns Expected of LLMGenerator. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<LLMGenerator> create_generator();
 
     LLM(LLM &&);
     LLM &operator=(LLM &&) = delete;
diff --git a/hailort/libhailort/include/hailo/genai/text2image/text2image.hpp b/hailort/libhailort/include/hailo/genai/text2image/text2image.hpp
index 0df51dc..c22de8c 100644
--- a/hailort/libhailort/include/hailo/genai/text2image/text2image.hpp
+++ b/hailort/libhailort/include/hailo/genai/text2image/text2image.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -13,14 +13,19 @@
 
 #include "hailo/hailort.h"
 #include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
 #include "hailo/genai/vdevice_genai.hpp"
 #include "hailo/genai/common.hpp"
 
+#include <vector>
+
 namespace hailort
 {
 namespace genai
 {
 
+class Text2Image;
+
 /*! Scheduler type for the diffusion process */
 enum class HailoDiffuserSchedulerType
 {
@@ -32,7 +37,7 @@ enum class HailoDiffuserSchedulerType
 class HAILORTAPI Text2ImageParams
 {
 public:
-    Text2ImageParams() = default;
+    Text2ImageParams();
 
     /**
      * Sets the denoise model.
@@ -79,15 +84,60 @@ public:
      */
     hailo_status set_scheduler(HailoDiffuserSchedulerType scheduler_type);
 
+    /**
+     * @return The Hef path of the denoising model.
+     */
+    const std::string& denoise_hef() const;
+
+    /**
+     * @return The LoRA of the denoising model.
+     */
+    const std::string& denoise_lora() const;
+
+    /**
+     * @return The Hef path of the text encoder model.
+     */
+    const std::string& text_encoder_hef() const;
+
+    /**
+     * @return The LoRA of the text encoder model.
+     */
+    const std::string& text_encoder_lora() const;
+
+    /**
+     * @return The Hef path of the image decoder model.
+     */
+    const std::string& image_decoder_hef() const;
+
+    /**
+     * @return The LoRA of the image decoder model.
+     */
+    const std::string& image_decoder_lora() const;
+
+    /**
+     * @return The Hef path of the ip adapter model.
+     */
+    const std::string& ip_adapter_hef() const;
+
+    /**
+     * @return The LoRA of the ip adapter model.
+     */
+    const std::string& ip_adapter_lora() const;
+
+    /**
+     * @return The scheduler type for the diffusion process.
+     */
+    HailoDiffuserSchedulerType scheduler() const;
+
 private:
     std::string m_denoise_hef;
     std::string m_denoise_lora;
 
-    std::string m_encoder_hef;
-    std::string m_encoder_lora;
+    std::string m_text_encoder_hef;
+    std::string m_text_encoder_lora;
 
-    std::string m_decoder_hef;
-    std::string m_decoder_lora;
+    std::string m_image_decoder_hef;
+    std::string m_image_decoder_lora;
 
     std::string m_ip_adapter_hef;
     std::string m_ip_adapter_lora;
@@ -99,8 +149,6 @@ private:
 class HAILORTAPI Text2ImageGeneratorParams
 {
 public:
-    Text2ImageGeneratorParams() = default;
-
     /**
      * Sets the numer of images to generate.
      *
@@ -158,6 +206,10 @@ public:
     uint32_t seed() const;
 
 private:
+    Text2ImageGeneratorParams() = default;
+    friend class Text2Image;
+    friend class Text2ImageServer;
+
     uint32_t m_samples_count;
     uint32_t m_steps_count;
     float32_t m_guidance_scale;
@@ -168,12 +220,11 @@ private:
 class HAILORTAPI Text2ImageGenerator
 {
 public:
-    Text2ImageGenerator(std::shared_ptr<VDevice> vdevice);
-    Text2ImageGenerator(Text2ImageGenerator &&) = default;
-    Text2ImageGenerator &operator=(Text2ImageGenerator &&) = default;
+    Text2ImageGenerator(Text2ImageGenerator &&);
+    Text2ImageGenerator &operator=(Text2ImageGenerator &&) = delete;
     Text2ImageGenerator(const Text2ImageGenerator &) = delete;
     Text2ImageGenerator &operator=(const Text2ImageGenerator &) = delete;
-    virtual ~Text2ImageGenerator() = default;
+    virtual ~Text2ImageGenerator();
 
    /**
      * Generates the output samples images.
@@ -186,7 +237,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      * @note: If the pipeline is configured with IP Adapter this function will fail and return error.
      */
-    Expected<std::vector<Buffers>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
+    Expected<std::vector<Buffer>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
         std::chrono::milliseconds timeout = DEFAULT_OPERATION_TIMEOUT);
 
    /**
@@ -202,7 +253,7 @@ public:
      *         Otherwise, returns Unexpected of ::hailo_status error.
      * @note: If the pipeline is configured without IP Adapter the function will fail and return error.
      */
-    Expected<std::vector<Buffers>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
+    Expected<std::vector<Buffer>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
         const MemoryView &ip_adapter, std::chrono::milliseconds timeout = DEFAULT_OPERATION_TIMEOUT);
 
     /**
@@ -249,21 +300,23 @@ public:
 
     static constexpr std::chrono::milliseconds DEFAULT_OPERATION_TIMEOUT = std::chrono::seconds(30);
 
+    class Impl;
+    Text2ImageGenerator(std::unique_ptr<Impl> pimpl);
 private:
-    std::shared_ptr<VDevice> m_vdevice_connection;
+    std::unique_ptr<Impl> m_pimpl;
 };
 
 /*! Represents the Text2Image Model pipeline.
  *  Manages the lifecycle and configuration of a Text2Image model instance.
  */
-class Text2Image
+class HAILORTAPI Text2Image
 {
 public:
-    Text2Image(Text2Image &&) = default;
-    Text2Image(const Text2Image &) = default;
+    Text2Image(Text2Image &&);
+    Text2Image(const Text2Image &) = delete;
     Text2Image &operator=(Text2Image &&) = delete;
     Text2Image &operator=(const Text2Image &) = delete;
-    virtual ~Text2Image() = default;
+    virtual ~Text2Image();
 
     /**
      * Creates Text2Image model pipeline instance configured with the specified parameters.
@@ -275,13 +328,27 @@ public:
     static Expected<Text2Image> create(std::shared_ptr<VDevice> vdevice, const Text2ImageParams &params);
 
     /**
-     * Creates a Generator object from the provided generation parameters or defaults if none are specified.
+     * Creates an Text2ImageGeneratorParams object with the model's default values.
+     *
+     * @return Upon success, returns Expected of Text2ImageGeneratorParams. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<Text2ImageGeneratorParams> create_generator_params();
+
+    /**
+     * Creates a Generator object from the provided generator parameters.
      *
      * @param[in] params            The Text2ImageGeneratorParams used to set the generator parameters.
-     *                              If not provided, the model will use it's default params.
+     *
      * @return Upon success, returns Expected of Text2ImageGenerator. Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<Text2ImageGenerator> create_generator(const Text2ImageGeneratorParams &params = Text2ImageGeneratorParams());
+    Expected<Text2ImageGenerator> create_generator(const Text2ImageGeneratorParams &params);
+
+    /**
+     * Creates a Generator object using the model's default generator parameters.
+     *
+     * @return Upon success, returns Expected of Text2ImageGenerator. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<Text2ImageGenerator> create_generator();
 
     /**
      * @return The frame size of a single output sample.
@@ -327,11 +394,10 @@ public:
      */
     Expected<hailo_format_order_t> ip_adapter_format_order() const;
 
+    class Impl;
+    Text2Image(std::unique_ptr<Impl> pimpl);
 private:
-    Text2Image(std::shared_ptr<VDevice> vdevice, const Text2ImageParams &params);
-
-    std::shared_ptr<VDevice> m_vdevice;
-    Text2ImageParams m_params;
+    std::unique_ptr<Impl> m_pimpl;
 };
 
 } /* namespace genai */
diff --git a/hailort/libhailort/include/hailo/genai/vdevice_genai.hpp b/hailort/libhailort/include/hailo/genai/vdevice_genai.hpp
index 05e1acc..c0d9bc2 100644
--- a/hailort/libhailort/include/hailo/genai/vdevice_genai.hpp
+++ b/hailort/libhailort/include/hailo/genai/vdevice_genai.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -12,14 +12,21 @@
 #define _HAILO_GENAI_VDEVICE_GENAI_HPP_
 
 #include "hailo/hailo_session.hpp"
+#include "hailo/genai/common.hpp"
+#include "hailo/buffer.hpp"
+
 
 namespace hailort
 {
 namespace genai
 {
 
+// TODO (HRT-16126): - adjusting all ack's once server is written in cpp
+const size_t SERVER_ACK_SIZE = 128;
+
 // Forward decleration
 class GenAISession;
+class SessionWrapper;
 
 class HAILORTAPI VDeviceGenAI
 {
@@ -35,13 +42,19 @@ public:
 
     Expected<std::shared_ptr<GenAISession>> create_session(uint16_t port);
 
-    VDeviceGenAI(hailo_device_id_t device_id);
+    const hailo_vdevice_params_t get_params() const {
+        return m_vdevice_params;
+    }
+
+    VDeviceGenAI(hailo_device_id_t device_id, const hailo_vdevice_params_t &params);
 private:
     static hailo_status validate_params(const hailo_vdevice_params_t &params);
 
     hailo_device_id_t m_device_id;
+    hailo_vdevice_params_t m_vdevice_params;
 };
 
+// TODO (HRT-16126): Delete this class
 class HAILORTAPI GenAISession
 {
 public:
@@ -53,12 +66,16 @@ public:
     GenAISession &operator=(const GenAISession &) = delete;
     virtual ~GenAISession() = default;
 
-    hailo_status write(const uint8_t *buffer, size_t size, std::chrono::milliseconds timeout = Session::DEFAULT_WRITE_TIMEOUT);
-    Expected<size_t> read(uint8_t *buffer, size_t size, std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT);
+    hailo_status write(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_WRITE_TIMEOUT);
+    Expected<size_t> read(MemoryView buffer, std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT);
+    Expected<std::shared_ptr<Buffer>> read(std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT);
 
-    GenAISession(std::shared_ptr<Session> session);
+    hailo_status send_file(const std::string &path);
+    Expected<std::string> get_ack(std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT);
+
+    GenAISession(std::shared_ptr<SessionWrapper> session_wrapper);
 private:
-    std::shared_ptr<Session> m_session;
+    std::shared_ptr<SessionWrapper> m_session_wrapper;
 };
 
 using VDevice = VDeviceGenAI;
diff --git a/hailort/libhailort/include/hailo/hailo_session.hpp b/hailort/libhailort/include/hailo/hailo_session.hpp
index cf2a60a..b201738 100644
--- a/hailort/libhailort/include/hailo/hailo_session.hpp
+++ b/hailort/libhailort/include/hailo/hailo_session.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,6 +11,7 @@
 #define _HAILO_RAW_CONNECTION_HPP_
 
 #include "hailo/expected.hpp"
+#include "hailo/buffer.hpp"
 
 #include <memory>
 #include <chrono>
@@ -22,6 +23,7 @@ namespace hailort
 
 class ConnectionContext;
 class Session;
+struct TransferRequest;
 
 /**
  * The Listener class is used to accept new connections.
@@ -36,7 +38,7 @@ public:
      * Creates a new SessionListener.
      * This function should be used from the server.
      * The returned SessionListener object should be used to accept new clients.
-     * 
+     *
      * @param[in] port       The port to listen on.
      * @param[in] device_id  The device id to listen on.
      * @return Upon success, returns Expected of a shared pointer of listener, representing the listener object.
@@ -46,8 +48,8 @@ public:
     /**
      * This function should be called by the server side (device) in order to accept a new connection.
      * This call is blocking and will wait until a new client connection is established.
-     * 
-     * @return Upon success, returns Expected of a shared pointer of a Session, representing the connection with 
+     *
+     * @return Upon success, returns Expected of a shared pointer of a Session, representing the connection with
      * the new client.
      */
     virtual hailort::Expected<std::shared_ptr<Session>> accept() = 0;
@@ -76,11 +78,11 @@ public:
     /**
      * Creates a new Session and connects to the server.
      * This function should be used from the client side.
-     * 
+     *
      * @param[in] port  The port to connect to.
      * @param[in] device_id  The device id to connect to.
      * @return Upon success, returns Expected of a shared pointer of session, representing the session object.
-    */
+     */
     static Expected<std::shared_ptr<Session>> connect(uint16_t port, const std::string &device_id = "");
 
     /**
@@ -96,7 +98,7 @@ public:
      */
     virtual hailo_status write(const uint8_t *buffer, size_t size,
         std::chrono::milliseconds timeout = DEFAULT_WRITE_TIMEOUT) = 0;
-    
+
     /**
      * Reads the entire buffer over the connection, synchronously.
      *
@@ -110,12 +112,12 @@ public:
      */
     virtual hailo_status read(uint8_t *buffer, size_t size,
         std::chrono::milliseconds timeout = DEFAULT_READ_TIMEOUT) = 0;
-    
+
     /**
      * Closes the connection.
-     * 
+     *
      * @return Upon success, returns ::HAILO_SUCCESS. Otherwise, returns an ::hailo_status error.
-    */
+     */
     virtual hailo_status close() = 0;
 
     /**
@@ -149,7 +151,10 @@ public:
      *       by 8 bytes, they should be read in the same order: 10 bytes first, then 8 bytes.
      */
     virtual hailo_status write_async(const uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) = 0;
+        std::function<void(hailo_status)> &&callback);
+
+    // Internal
+    virtual hailo_status write_async(TransferRequest &&request) = 0;
 
     /**
      * Waits until the session is ready to launch a new call to `Session::read_async()`. Each session has a
@@ -182,7 +187,12 @@ public:
      *       by 8 bytes, they should be read in the same order: 10 bytes first, then 8 bytes.
      */
     virtual hailo_status read_async(uint8_t *buffer, size_t size,
-        std::function<void(hailo_status)> &&callback) = 0;
+        std::function<void(hailo_status)> &&callback);
+
+    // Internal
+    virtual hailo_status read_async(TransferRequest &&request) = 0;
+
+    virtual Expected<Buffer> allocate_buffer(size_t size, hailo_dma_buffer_direction_t direction) = 0;
 
     static constexpr std::chrono::milliseconds DEFAULT_WRITE_TIMEOUT = std::chrono::milliseconds(10000);
     static constexpr std::chrono::milliseconds DEFAULT_READ_TIMEOUT = std::chrono::milliseconds(HAILO_INFINITE);
diff --git a/hailort/libhailort/include/hailo/hailort.h b/hailort/libhailort/include/hailo/hailort.h
index 3bf87d2..ee6b2a3 100644
--- a/hailort/libhailort/include/hailo/hailort.h
+++ b/hailort/libhailort/include/hailo/hailort.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -66,6 +66,7 @@ extern "C" {
 #define HAILO_ETH_MAC_LENGTH (6)
 #define HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH (12)
 #define HAILO_SOC_PM_VALUES_BYTES_LENGTH (24)
+#define HAILO_GPIO_MASK_VALUES_LENGTH (16)
 #define HAILO_MAX_TEMPERATURE_THROTTLING_LEVELS_NUMBER (4)
 
 #define HAILO_UNIQUE_VDEVICE_GROUP_ID ("UNIQUE")
@@ -174,9 +175,11 @@ typedef uint16_t nms_bbox_counter_t;
     HAILO_STATUS__X(86, HAILO_DRIVER_INVALID_IOCTL                    /*!< Driver cannot handle ioctl. Can happen on libhailort vs driver version mismatch or when ioctl function is not supported */)\
     HAILO_STATUS__X(87, HAILO_DRIVER_TIMEOUT                          /*!< Driver operation returned a timeout. Device reset may be required. */)\
     HAILO_STATUS__X(88, HAILO_DRIVER_INTERRUPTED                      /*!< Driver operation interrupted by system request (i.e can happen on application exit) */)\
-    HAILO_STATUS__X(89, HAILO_CONNECTION_REFUSED                      /*!< Connection was refused by other side. */)\
-    HAILO_STATUS__X(90, HAILO_DRIVER_WAIT_CANCELED                    /*!< Driver operation was canceled. */)\
-
+    HAILO_STATUS__X(89, HAILO_CONNECTION_REFUSED                      /*!< Connection was refused by other side */)\
+    HAILO_STATUS__X(90, HAILO_DRIVER_WAIT_CANCELED                    /*!< Driver operation was canceled */)\
+    HAILO_STATUS__X(91, HAILO_HEF_FILE_CORRUPTED                      /*!< HEF file is corrupted */)\
+    HAILO_STATUS__X(92, HAILO_HEF_NOT_SUPPORTED                       /*!< HEF file is not supported. Make sure the DFC version is compatible. */)\
+    HAILO_STATUS__X(93, HAILO_HEF_NOT_COMPATIBLE_WITH_DEVICE          /*!< HEF file is not compatible with device. */)\
 
 typedef enum {
 #define HAILO_STATUS__X(value, name) name = value,
@@ -430,6 +433,7 @@ typedef enum hailo_device_architecture_e {
     HAILO_ARCH_HAILO15L,
     HAILO_ARCH_HAILO15M,
     HAILO_ARCH_HAILO10H,
+    HAILO_ARCH_MARS,
 
     /** Max enum value to maintain ABI Integrity */
     HAILO_ARCH_MAX_ENUM = HAILO_MAX_ENUM
@@ -516,6 +520,8 @@ typedef struct {
     uint8_t unit_level_tracking_id[HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH];
     /** Hailo device pm values */
     uint8_t soc_pm_values[HAILO_SOC_PM_VALUES_BYTES_LENGTH]; 
+    /** Hailo device GPIO mask values */
+    uint16_t gpio_mask;
 } hailo_extended_device_information_t;
 
 /** Endianness (byte order) */
@@ -641,7 +647,7 @@ typedef enum {
      * Deprecated. Should use HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS, HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE (user formats)
      * or HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP (device format) instead.
      */
-    HAILO_FORMAT_ORDER_HAILO_NMS                        = 9,    // TODO: HRT-15612
+    HAILO_FORMAT_ORDER_HAILO_NMS                        = 9,
 
     /**
      * - Not used for host side
@@ -775,6 +781,7 @@ typedef enum {
      *
      *
      *      Maximum amount of bboxes is ::hailo_nms_shape_t.max_bboxes_total.
+     *      It is possible to use ::hailo_detections_t to parse the data.
      *
      * - Not used for device side
      */
@@ -1300,28 +1307,14 @@ typedef enum {
     HAILO_BURST_TYPE_COUNT
 } hailo_nms_burst_type_t;
 
-/** NMS result order */
-typedef enum {
-    HAILO_NMS_RESULT_ORDER_HW = 0,
-    HAILO_NMS_RESULT_ORDER_BY_CLASS,
-    HAILO_NMS_RESULT_ORDER_BY_SCORE,
-} hailo_nms_result_order_type_t;
-
 /** NMS Internal HW Info */
 typedef struct {
     /** Amount of NMS classes */
     uint32_t number_of_classes;
-    union
-    {
-        /** Maximum amount of bboxes per nms class
-        * Valid when order_type is 'HAILO_NMS_RESULT_ORDER_BY_CLASS', 'HAILO_NMS_RESULT_ORDER_HW'
-        */
-        uint32_t max_bboxes_per_class;
-        /** Maximum amount of total bboxes
-        * Valid when order_type is 'HAILO_NMS_RESULT_ORDER_BY_SCORE'
-        */
-        uint32_t max_bboxes_total;
-    };
+    /** Maximum amount of bboxes per nms class */
+    uint32_t max_bboxes_per_class;
+    /** Maximum amount of total bboxes */
+    uint32_t max_bboxes_total;
     /** Internal usage */
     uint32_t bbox_size;
     /** Internal usage */
@@ -1332,8 +1325,6 @@ typedef struct {
     uint32_t burst_size;
     /** NMS burst type */
     hailo_nms_burst_type_t burst_type;
-    /** Order of NMS results **/
-    hailo_nms_result_order_type_t order_type;
 } hailo_nms_info_t;
 
 /** NMS Fuse Input */
@@ -1347,24 +1338,15 @@ typedef struct {
 typedef struct {
     /** Amount of NMS classes */
     uint32_t number_of_classes;
-    union
-    {
-        /** Maximum amount of bboxes per nms class
-        * Valid when order_type is 'HAILO_NMS_RESULT_ORDER_BY_CLASS', 'HAILO_NMS_RESULT_ORDER_HW'
-        */
-        uint32_t max_bboxes_per_class;
-        /** Maximum amount of total bboxes
-        * Valid when order_type is 'HAILO_NMS_RESULT_ORDER_BY_SCORE'
-        */
-        uint32_t max_bboxes_total;
-    };
+    /** Maximum amount of bboxes per nms class */
+    uint32_t max_bboxes_per_class;
+    /** Maximum amount of total bboxes */
+    uint32_t max_bboxes_total;
     /** Maximum accumulated mask size for all of the detections in a frame.
      *  Used only with 'HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK' format order.
      *  The default value is (`input_image_size` * 2)
      */
     uint32_t max_accumulated_mask_size;
-    /** Order of NMS results **/
-    hailo_nms_result_order_type_t order_type;
 } hailo_nms_shape_t;
 
 #pragma pack(push, 1)
@@ -1400,6 +1382,22 @@ typedef struct {
     uint16_t class_id;
 } hailo_detection_t;
 
+#if defined(_MSC_VER)
+// TODO: warning C4200
+#pragma warning(push)
+#pragma warning(disable: 4200)
+#endif
+typedef struct {
+    /** Number of detections */
+    uint16_t count;
+
+    /** Array of detections (it's size is determined by count field) */
+    hailo_detection_t detections[0];
+} hailo_detections_t;
+#if defined(_MSC_VER)
+#pragma warning(pop)
+#endif
+
 typedef struct {
     /** Detection's box coordinates */
     hailo_rectangle_t box;
@@ -1554,16 +1552,16 @@ typedef enum {
 
 typedef struct {
     /**
-     * Sets the batch size of the InferModel.
-     * This parameter determines the number of frames that be sent for inference in a single batch.
+     * This parameter determines the number of frames that will be sent for inference in a single batch.
      * If a scheduler is enabled, this parameter determines the 'burst size' - the max number of frames after which the scheduler will attempt
      *  to switch to another model.
+     * If scheduler is disabled, the number of frames for inference should be a multiplication of batch_size (unless model is in single context).
      *
      * User is advised to modify this (single network parameter) or @a hailo_configure_network_group_params_t batch size parameter. Not both.
      * In case user wishes to work with the same batch size for all networks inside a network group, user is advised to set batch_size in @a hailo_configure_network_group_params_t.
      * In case user wished to work with batch size per network, user is advised to use this parameter.
 
-     * note: Default value is @a HAILO_DEFAULT_BATCH_SIZE - means automatic batch determined by hailort.
+     * @note The default value is @a HAILO_DEFAULT_BATCH_SIZE - which means the batch is determined by HailoRT automatically.
      */
     uint16_t batch_size;
 } hailo_network_parameters_t;
@@ -1671,10 +1669,6 @@ typedef struct {
 
 /** Health monitor - Dataflow shutdown notification message */
 typedef struct {
-    /** Bit mask of closed input streams indices */
-    uint32_t closed_input_streams;
-    /** Bit mask of closed output streams indices */
-    uint32_t closed_output_streams;
     float32_t ts0_temperature;
     float32_t ts1_temperature;
 } hailo_health_monitor_dataflow_shutdown_notification_message_t;
@@ -1716,6 +1710,27 @@ typedef struct {
     uint32_t memory_bitmap;
 } hailo_health_monitor_cpu_ecc_notification_message_t;
 
+typedef struct {
+    // In percentage
+    float32_t cpu_utilization;
+    // In bytes
+    int64_t ram_size_total;
+    // In bytes
+    int64_t ram_size_used;
+    // In percentage
+    float32_t nnc_utilization;
+    // Per second
+    int32_t ddr_noc_total_transactions;
+    // In percentage
+    int32_t dsp_utilization;
+} hailo_performance_stats_t;
+
+typedef struct {
+    float32_t on_die_temperature;
+    float32_t on_die_voltage;
+    int32_t startup_bist_mask;
+} hailo_health_stats_t;
+
 /** Context switch - breakpoint reached notification message */
 typedef struct {
     uint8_t network_group_index;
diff --git a/hailort/libhailort/include/hailo/hailort.hpp b/hailort/libhailort/include/hailo/hailort.hpp
index ed2379f..57b2e22 100644
--- a/hailort/libhailort/include/hailo/hailort.hpp
+++ b/hailort/libhailort/include/hailo/hailort.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/hailort_common.hpp b/hailort/libhailort/include/hailo/hailort_common.hpp
index dd45262..d4dbda4 100644
--- a/hailort/libhailort/include/hailo/hailort_common.hpp
+++ b/hailort/libhailort/include/hailo/hailort_common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -48,15 +48,25 @@ public:
     static const size_t DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION = 64;
     static const size_t DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION = 4096;
 
+    /**
+     * Deprecated: use get_nms_by_class_host_shape_size instead
+     */
+    static uint32_t get_nms_host_shape_size(const hailo_nms_info_t &nms_info);
+
+    /**
+     * Deprecated: use get_nms_by_class_host_shape_size instead
+     */
+    static uint32_t get_nms_host_shape_size(const hailo_nms_shape_t &nms_shape);
+
     /**
      * Gets the NMS host shape size (number of elements) from NMS info.
      *
      * @param[in] nms_info             The NMS info to get shape size from.
      * @return The host shape size (number of elements).
      * @note The size in bytes can be calculated using 
-     *  get_nms_host_frame_size(const hailo_nms_info_t &nms_info, const hailo_format_t &format).
+     *  get_nms_by_class_host_frame_size(const hailo_nms_info_t &nms_info, const hailo_format_t &format).
      */
-    static constexpr uint32_t get_nms_host_shape_size(const hailo_nms_info_t &nms_info)
+    static constexpr uint32_t get_nms_by_class_host_shape_size(const hailo_nms_info_t &nms_info)
     {
         const uint32_t max_bboxes_per_class = nms_info.chunks_per_frame * nms_info.max_bboxes_per_class;
         // Counter + bboxes
@@ -72,7 +82,7 @@ public:
      * @note The size in bytes can be calculated using 
      *  get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format).
      */
-    static constexpr uint32_t get_nms_host_shape_size(const hailo_nms_shape_t &nms_shape)
+    static constexpr uint32_t get_nms_by_class_host_shape_size(const hailo_nms_shape_t &nms_shape)
     {
         const uint32_t max_bboxes_per_class = nms_shape.max_bboxes_per_class;
         // Counter + bboxes
@@ -256,6 +266,8 @@ public:
             return "HAILO15M";
         case HAILO_ARCH_HAILO10H:
             return "HAILO10H";
+        case HAILO_ARCH_MARS:
+            return "MARS";
         default:
             return "UNKNOWN ARCHITECTURE";
         }
@@ -287,8 +299,6 @@ public:
             return "BAYER RGB";
         case HAILO_FORMAT_ORDER_12_BIT_BAYER_RGB:
             return "12 BIT BAYER RGB";
-        case HAILO_FORMAT_ORDER_HAILO_NMS:
-            return "HAILO NMS";
         case HAILO_FORMAT_ORDER_RGB888:
             return "RGB 888";
         case HAILO_FORMAT_ORDER_NCHW:
@@ -322,27 +332,6 @@ public:
         }
     }
 
-    /**
-     * Gets a string reprenestation of the given NMS result order type.
-     *
-     * @param[in] nms_result_order_type             A ::hailo_nms_result_order_type_t object.
-     * @return The string representation of the NMS result order type.
-     */
-    static std::string get_nms_result_order_type_str(const hailo_nms_result_order_type_t &nms_result_order_type)
-    {
-        switch (nms_result_order_type)
-        {
-        case HAILO_NMS_RESULT_ORDER_HW:
-            return "HW";
-        case HAILO_NMS_RESULT_ORDER_BY_CLASS:
-            return "BY_CLASS";
-        case HAILO_NMS_RESULT_ORDER_BY_SCORE:
-            return "BY_SCORE";
-        default:
-            return "Nan";
-        }
-    }
-
     /**
      * Gets the size of each element in bytes from buffer's format.
      *
@@ -361,10 +350,9 @@ public:
      * @param[in] format             A ::hailo_format_t object.
      * @return The NMS host frame size in bytes.
      */
-    // TODO HRT-15612: Consider changing the name to get_nms_by_class_host_frame_size
-    static constexpr uint32_t get_nms_host_frame_size(const hailo_nms_info_t &nms_info, const hailo_format_t &format)
+    static constexpr uint32_t get_nms_by_class_host_frame_size(const hailo_nms_info_t &nms_info, const hailo_format_t &format)
     {
-        return get_nms_host_shape_size(nms_info) * get_format_data_bytes(format);
+        return get_nms_by_class_host_shape_size(nms_info) * get_format_data_bytes(format);
     }
 
     /**
@@ -374,7 +362,6 @@ public:
      * @param[in] format            A ::hailo_format_t object.
      * @return The NMS host frame size in bytes.
      */
-    // TODO HRT-15612: Consider changing the name to get_nms_by_class_host_frame_size
     static uint32_t get_nms_host_frame_size(const hailo_nms_shape_t &nms_shape, const hailo_format_t &format);
 
     /**
@@ -385,11 +372,8 @@ public:
      */
     static constexpr uint32_t get_nms_with_byte_mask_host_frame_size(const hailo_nms_shape_t &nms_shape)
     {
-        // TODO: HRT-12035 - Change `max_bboxes_per_class` to `max_boxes`
-        auto max_detections = nms_shape.number_of_classes * nms_shape.max_bboxes_per_class;
-        auto max_detections_size = max_detections * DETECTION_WITH_BYTE_MASK_SIZE;
-        auto frame_size = DETECTION_COUNT_SIZE + max_detections_size + nms_shape.max_accumulated_mask_size;
-        return frame_size;
+        auto max_detections_size = nms_shape.max_bboxes_total * DETECTION_WITH_BYTE_MASK_SIZE;
+        return (DETECTION_COUNT_SIZE + max_detections_size + nms_shape.max_accumulated_mask_size);
     }
 
     /**
@@ -450,7 +434,7 @@ public:
         }
 
         if (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == stream_info.format.order) {
-            return get_nms_host_frame_size(stream_info.nms_info, trans_params.user_buffer_format);
+            return get_nms_by_class_host_frame_size(stream_info.nms_info, trans_params.user_buffer_format);
         } else {
             auto shape = (HAILO_STREAM_NO_TRANSFORM == trans_params.transform_mode) ? stream_info.hw_shape :
                 stream_info.shape;
@@ -512,8 +496,17 @@ public:
 
     static constexpr bool is_nms(const hailo_format_order_t &order)
     {
-        return ((HAILO_FORMAT_ORDER_HAILO_NMS == order) || (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == order) ||
-            (HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS == order) || (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == order));
+        return (is_nms_by_class(order) || is_nms_by_score(order));
+    }
+
+    static constexpr bool is_nms_by_class(const hailo_format_order_t &order)
+    {
+        return (HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS == order);
+    }
+
+    static constexpr bool is_nms_by_score(const hailo_format_order_t &order)
+    {
+        return ((HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == order) || (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == order));
     }
 
     // TODO HRT-10073: change to supported features list
diff --git a/hailort/libhailort/include/hailo/hailort_defaults.hpp b/hailort/libhailort/include/hailo/hailort_defaults.hpp
index e836570..09d91c5 100644
--- a/hailort/libhailort/include/hailo/hailort_defaults.hpp
+++ b/hailort/libhailort/include/hailo/hailort_defaults.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/hailort_dma-heap.h b/hailort/libhailort/include/hailo/hailort_dma-heap.h
index c6bd2c1..3b1926d 100644
--- a/hailort/libhailort/include/hailo/hailort_dma-heap.h
+++ b/hailort/libhailort/include/hailo/hailort_dma-heap.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/hef.hpp b/hailort/libhailort/include/hailo/hef.hpp
index b2106b3..5a7f810 100644
--- a/hailort/libhailort/include/hailo/hef.hpp
+++ b/hailort/libhailort/include/hailo/hef.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -70,7 +70,6 @@ public:
      *
      * @param[in] hef_path            The path of the Hef file.
      * @return Upon success, returns Expected of Hef. Otherwise, returns Unexpected of ::hailo_status error.
-     *         
      */
     static Expected<Hef> create(const std::string &hef_path);
 
@@ -79,9 +78,18 @@ public:
      *
      * @param[in] hef_buffer          A buffer that contains the Hef content.
      * @return Upon success, returns Expected of Hef. Otherwise, returns Unexpected of ::hailo_status error.
+     * @note During Hef creation, the buffer's content is copied to an internal buffer.
      */
     static Expected<Hef> create(const MemoryView &hef_buffer);
 
+    /**
+     * Creates an Hef from a buffer.
+     *
+     * @param[in] hef_buffer          A buffer that contains the Hef content.
+     * @return Upon success, returns Expected of Hef. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    static Expected<Hef> create(std::shared_ptr<Buffer> hef_buffer);
+
     /**
      * Gets input streams informations.
      *
@@ -453,10 +461,12 @@ public:
 
     Expected<std::string> get_description(bool stream_infos, bool vstream_infos) const;
 
+    Expected<std::map<std::string, std::string>> get_external_resources() const;
+
     ~Hef();
     Hef(Hef &&);
     Hef &operator=(Hef &&);
-    Hef(const Hef &) = delete;
+    Hef(const Hef &) = default;
     Hef &operator=(const Hef &) = delete;
 
 private:
@@ -470,14 +480,17 @@ private:
     friend class CoreOp;
     friend class VDeviceBase;
     friend class InferModelBase;
+    friend class MemoryRequirementsCalculator;
+    friend class ContextResources;
+    friend class ResourcesManagerBuilder;
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
     friend class HailoRtRpcClient;
 #endif // HAILO_SUPPORT_MULTI_PROCESS
 
     class Impl;
-    Hef(std::unique_ptr<Impl> pimpl);
-    std::unique_ptr<Impl> pimpl;
+    Hef(std::shared_ptr<Impl> pimpl);
+    std::shared_ptr<Impl> pimpl;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/include/hailo/infer_model.hpp b/hailort/libhailort/include/hailo/infer_model.hpp
index c1de080..adaeaa5 100644
--- a/hailort/libhailort/include/hailo/infer_model.hpp
+++ b/hailort/libhailort/include/hailo/infer_model.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -237,6 +237,15 @@ public:
      */
     Expected<Bindings> create_bindings();
 
+    /**
+     * Creates a Bindings object.
+     *
+     * @param[in] buffers           map of input and output names and buffers.
+     *
+     * @return Upon success, returns Expected of Bindings. Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    Expected<Bindings> create_bindings(const std::map<std::string, MemoryView> &buffers);
+
     /**
      * The readiness of the model to launch is determined by the ability to push buffers to the asynchronous inference pipeline.
      * If the model is ready, the method will return immediately.
@@ -362,7 +371,7 @@ public:
      * @return Upon success, returns Expected of a the number of inferences that can be queued simultaneously for execution.
      *  Otherwise, returns Unexpected of ::hailo_status error.
      */
-    Expected<size_t> get_async_queue_size();
+    Expected<size_t> get_async_queue_size() const;
 
     /**
      * Shuts the inference down. After calling this method, the model is no longer usable.
@@ -370,6 +379,9 @@ public:
      */
     hailo_status shutdown();
 
+
+    hailo_status update_cache_offset(int32_t offset_delta_entries);
+
 private:
     friend class InferModelBase;
     friend class ConfiguredInferModelBase;
@@ -538,8 +550,9 @@ public:
      * This parameter determines the number of frames that be sent for inference in a single batch.
      * If a scheduler is enabled, this parameter determines the 'burst size' - the max number of frames after which the scheduler will attempt
      *  to switch to another model.
+     * If scheduler is disabled, the number of frames for inference should be a multiplication of batch_size (unless model is in single context).
      *
-     * note: Default value is HAILO_DEFAULT_BATCH_SIZE - means automatic batch determined by hailort.
+     * @note The default value is @a HAILO_DEFAULT_BATCH_SIZE - which means the batch is determined by HailoRT automatically.
      *
      * @param[in] batch_size      The new batch size to be set.
      */
@@ -566,7 +579,6 @@ public:
      *
      * @return Upon success, returns Expected of ConfiguredInferModel, which can be used to perform an asynchronous inference.
      *  Otherwise, returns Unexpected of ::hailo_status error.
-     * @note InferModel can be configured once.
      */
     virtual Expected<ConfiguredInferModel> configure() = 0;
 
diff --git a/hailort/libhailort/include/hailo/inference_pipeline.hpp b/hailort/libhailort/include/hailo/inference_pipeline.hpp
index d900145..c93f56f 100644
--- a/hailort/libhailort/include/hailo/inference_pipeline.hpp
+++ b/hailort/libhailort/include/hailo/inference_pipeline.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/network_group.hpp b/hailort/libhailort/include/hailo/network_group.hpp
index 1f56e24..734c6d8 100644
--- a/hailort/libhailort/include/hailo/network_group.hpp
+++ b/hailort/libhailort/include/hailo/network_group.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -99,7 +99,7 @@ public:
     /**
      * @return The network group name.
      */
-    virtual const std::string &get_network_group_name() const = 0;
+    virtual const std::string& get_network_group_name() const = 0;
 
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &key) = 0;
 
@@ -125,13 +125,13 @@ public:
     /**
      * @return The network group name.
      */
-    virtual const std::string &get_network_group_name() const
+    virtual const std::string& get_network_group_name() const
         DEPRECATED("'get_network_group_name' is deprecated. One should use 'name()'.") = 0;
 
     /**
      * @return The network group name.
      */
-    virtual const std::string &name() const = 0;
+    virtual const std::string& name() const = 0;
 
     /**
      * Gets the stream's default interface.
@@ -424,7 +424,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) = 0;
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) = 0;
-    virtual Expected<size_t> get_min_buffer_pool_size() = 0;
+    virtual Expected<size_t> infer_queue_size() const = 0;
 
     virtual Expected<HwInferResults> run_hw_infer_estimator() = 0;
 
@@ -453,10 +453,9 @@ public:
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) = 0;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) = 0;
     virtual hailo_status set_nms_max_bboxes_total(const std::string &edge_name, uint32_t max_bboxes_total) = 0;
-    virtual hailo_status set_nms_result_order_type(const std::string &edge_name, hailo_nms_result_order_type_t order_type) = 0;
     virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) = 0;
 
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) = 0;
+    virtual hailo_status init_cache(uint32_t read_offset) = 0;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) = 0;
 
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const = 0;
diff --git a/hailort/libhailort/include/hailo/network_rate_calculator.hpp b/hailort/libhailort/include/hailo/network_rate_calculator.hpp
index 40a96be..a40fdf9 100644
--- a/hailort/libhailort/include/hailo/network_rate_calculator.hpp
+++ b/hailort/libhailort/include/hailo/network_rate_calculator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/platform.h b/hailort/libhailort/include/hailo/platform.h
index 835fc40..4a16506 100644
--- a/hailort/libhailort/include/hailo/platform.h
+++ b/hailort/libhailort/include/hailo/platform.h
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/quantization.hpp b/hailort/libhailort/include/hailo/quantization.hpp
index 60afeca..fcdf643 100644
--- a/hailort/libhailort/include/hailo/quantization.hpp
+++ b/hailort/libhailort/include/hailo/quantization.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/runtime_statistics.hpp b/hailort/libhailort/include/hailo/runtime_statistics.hpp
index 628c37d..dde6978 100644
--- a/hailort/libhailort/include/hailo/runtime_statistics.hpp
+++ b/hailort/libhailort/include/hailo/runtime_statistics.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/stream.hpp b/hailort/libhailort/include/hailo/stream.hpp
index ca2e387..f9ca726 100644
--- a/hailort/libhailort/include/hailo/stream.hpp
+++ b/hailort/libhailort/include/hailo/stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/transform.hpp b/hailort/libhailort/include/hailo/transform.hpp
index 21aa740..98ef5ee 100644
--- a/hailort/libhailort/include/hailo/transform.hpp
+++ b/hailort/libhailort/include/hailo/transform.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/include/hailo/vdevice.hpp b/hailort/libhailort/include/hailo/vdevice.hpp
index d801420..59166a2 100644
--- a/hailort/libhailort/include/hailo/vdevice.hpp
+++ b/hailort/libhailort/include/hailo/vdevice.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -36,6 +36,15 @@ public:
      */
     static Expected<std::unique_ptr<VDevice>> create(const hailo_vdevice_params_t &params);
 
+    /**
+     * Creates a vdevice.
+     * 
+     * @param[in]  params        A @a hailo_vdevice_params_t.
+     * @return Upon success, returns Expected of a shared_ptr to VDevice object.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    static Expected<std::shared_ptr<VDevice>> create_shared(const hailo_vdevice_params_t &params);
+
     /**
      * Creates a vdevice.
      * 
@@ -45,15 +54,35 @@ public:
      */
     static Expected<std::unique_ptr<VDevice>> create();
 
+    /**
+     * Creates a vdevice.
+     * 
+     * @return Upon success, returns Expected of a shared_ptr to VDevice object.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     * @note calling this create method will apply default vdevice params.
+     */
+    static Expected<std::shared_ptr<VDevice>> create_shared();
+
     /**
      * Creates a vdevice from the given phyiscal device ids.
      * 
+     * @param[in]  device_ids        A vector of std::string, represents the device-ids from which to create the VDevice.
      * @return Upon success, returns Expected of a unique_ptr to VDevice object.
      *         Otherwise, returns Unexpected of ::hailo_status error.
      * @note calling this create method will apply default vdevice params.
      */
     static Expected<std::unique_ptr<VDevice>> create(const std::vector<std::string> &device_ids);
 
+    /**
+     * Creates a vdevice from the given phyiscal device ids.
+     * 
+     * @param[in]  device_ids        A vector of std::string, represents the device-ids from which to create the VDevice.
+     * @return Upon success, returns Expected of a shared_ptr to VDevice object.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     * @note calling this create method will apply default vdevice params.
+     */
+    static Expected<std::shared_ptr<VDevice>> create_shared(const std::vector<std::string> &device_ids);
+
     /**
      * Configures the vdevice from an hef.
      *
@@ -72,7 +101,6 @@ public:
      * @param[in] name                        A string of the model name (optional).
      * @return Upon success, returns Expected of a shared pointer of infer model.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note the Hef file must be maintained until the completion of the configuration phase.
      */
     virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const std::string &hef_path,
         const std::string &name = "");
@@ -84,11 +112,33 @@ public:
      * @param[in] name                        A string of the model name (optional).
      * @return Upon success, returns Expected of a shared pointer of infer model.
      *         Otherwise, returns Unexpected of ::hailo_status error.
-     * @note the Hef buffer must be maintained until the completion of the configuration phase.
+     * @note During Hef creation, the hef_buffer's content is copied to an internal buffer.
      */
     virtual Expected<std::shared_ptr<InferModel>> create_infer_model(const MemoryView hef_buffer,
         const std::string &name = "");
 
+    /**
+     * Creates the infer model from an hef buffer
+     *
+     * @param[in] hef_buffer                  A pointer to a buffer containing the hef file.
+     * @param[in] name                        A string of the model name (optional).
+     * @return Upon success, returns Expected of a shared pointer of infer model.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(std::shared_ptr<Buffer> hef_buffer,
+        const std::string &name = "");
+
+    /**
+     * Creates the infer model from an hef
+     *
+     * @param[in] hef                         A Hef object
+     * @param[in] name                        A string of the model name (optional).
+     * @return Upon success, returns Expected of a shared pointer of infer model.
+     *         Otherwise, returns Unexpected of ::hailo_status error.
+     */
+    virtual Expected<std::shared_ptr<InferModel>> create_infer_model(Hef hef,
+        const std::string &name = "");
+
     /**
      * Gets the underlying physical devices.
      * 
@@ -207,6 +257,10 @@ public:
      */
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) = 0;
 
+    const hailo_vdevice_params_t get_params() const {
+        return m_params;
+    }
+
     virtual hailo_status before_fork();
     virtual hailo_status after_fork_in_parent();
     virtual hailo_status after_fork_in_child();
@@ -221,7 +275,10 @@ public:
     static bool should_force_hrpc_client();
 
 protected:
-    VDevice() = default;
+    VDevice(const hailo_vdevice_params_t &params) : m_params(params)
+        {};
+
+    hailo_vdevice_params_t m_params;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/include/hailo/vstream.hpp b/hailort/libhailort/include/hailo/vstream.hpp
index 2e74f4d..7ecb4fc 100644
--- a/hailort/libhailort/include/hailo/vstream.hpp
+++ b/hailort/libhailort/include/hailo/vstream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/CMakeLists.txt b/hailort/libhailort/src/CMakeLists.txt
index 15b8c03..6eee237 100644
--- a/hailort/libhailort/src/CMakeLists.txt
+++ b/hailort/libhailort/src/CMakeLists.txt
@@ -99,11 +99,6 @@ if(CMAKE_SYSTEM_NAME STREQUAL QNX)
     target_link_libraries(libhailort PRIVATE pci)
 endif()
 
-if (HAILO_INTERNAL_BUILD)
-    include(${HAILO_EXTERNALS_CMAKE_SCRIPTS}/libusb.cmake)
-    target_link_libraries(libhailort PRIVATE usb-1.0)
-endif()
-
 set(HAILORT_PUBLIC_HEADERS
     ${HAILORT_INC_DIR}/hailo/hailort.h
     ${HAILORT_INC_DIR}/hailo/platform.h
diff --git a/hailort/libhailort/src/core_op/CMakeLists.txt b/hailort/libhailort/src/core_op/CMakeLists.txt
index 2251967..a98083e 100644
--- a/hailort/libhailort/src/core_op/CMakeLists.txt
+++ b/hailort/libhailort/src/core_op/CMakeLists.txt
@@ -6,7 +6,6 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/resource_manager_builder.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/config_buffer.cpp
-    ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/intermediate_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/cache_buffer.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/channel_allocator.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
diff --git a/hailort/libhailort/src/core_op/active_core_op_holder.hpp b/hailort/libhailort/src/core_op/active_core_op_holder.hpp
index 2e45ac4..09a0648 100644
--- a/hailort/libhailort/src/core_op/active_core_op_holder.hpp
+++ b/hailort/libhailort/src/core_op/active_core_op_holder.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/core_op/core_op.cpp b/hailort/libhailort/src/core_op/core_op.cpp
index e653ac5..6b408b4 100644
--- a/hailort/libhailort/src/core_op/core_op.cpp
+++ b/hailort/libhailort/src/core_op/core_op.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -276,7 +276,7 @@ uint16_t CoreOp::get_smallest_configured_batch_size(const ConfigureNetworkParams
     return (UINT16_MAX == min_batch_size) ? DEFAULT_ACTUAL_BATCH_SIZE : min_batch_size;
 }
 
-const std::string &CoreOp::name() const
+const std::string& CoreOp::name() const
 {
     return m_metadata->core_op_name();
 }
@@ -403,7 +403,7 @@ hailo_status CoreOp::wrap_streams_for_remote_process()
     return HAILO_SUCCESS;
 }
 
-Expected<size_t> CoreOp::get_async_max_queue_size() const
+Expected<size_t> CoreOp::infer_queue_size() const
 {
     size_t queue_size = std::numeric_limits<size_t>::max();
 
diff --git a/hailort/libhailort/src/core_op/core_op.hpp b/hailort/libhailort/src/core_op/core_op.hpp
index f7416a0..6f40fc0 100644
--- a/hailort/libhailort/src/core_op/core_op.hpp
+++ b/hailort/libhailort/src/core_op/core_op.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -59,7 +59,7 @@ public:
 
     virtual hailo_status wait_for_activation(const std::chrono::milliseconds &timeout);
 
-    virtual const std::string &name() const;
+    virtual const std::string& name() const;
 
     virtual bool is_scheduled() const = 0;
     virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout, const std::string &network_name) = 0;
@@ -107,7 +107,8 @@ public:
     void set_vdevice_core_op_handle(vdevice_core_op_handle_t handle) { m_vdevice_core_op_handle = handle;}
     vdevice_core_op_handle_t vdevice_core_op_handle() { return m_vdevice_core_op_handle;}
 
-    Expected<size_t> get_async_max_queue_size() const;
+    // Amount of parallel ongoing infer requests for the core op.
+    Expected<size_t> infer_queue_size() const;
 
     /**
      * The function returns `HAILO_SUCCESS` if at least one of the writes or reads happened.
@@ -124,7 +125,7 @@ public:
     virtual Expected<uint32_t> get_cache_read_length() const = 0;
     virtual Expected<uint32_t> get_cache_write_length() const = 0;
     virtual Expected<uint32_t> get_cache_entry_size(uint32_t cache_id) const = 0;
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) = 0;
+    virtual hailo_status init_cache(uint32_t read_offset) = 0;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) = 0;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const = 0;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) = 0;
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
index 6ed5411..824cc9a 100644
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
index 784f1c2..2a30c5a 100644
--- a/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/action_list_buffer_builder/action_list_buffer_builder.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
index 66ccccf..319d933 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -10,12 +10,15 @@
 #include "cache_buffer.hpp"
 #include "hailo/hailort.h"
 #include "vdma/memory/sg_buffer.hpp"
+#include "core_op/resource_manager/resource_manager.hpp"
+#include "vdma/memory/sg_edge_layer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
 
 namespace hailort
 {
 
 Expected<CacheBuffer> CacheBuffer::create(std::shared_ptr<vdma::VdmaBuffer> backing_buffer, uint32_t cache_size,
-    uint32_t input_size, uint32_t output_size, uint32_t entry_size)
+    uint32_t input_size, uint32_t output_size, uint32_t entry_size, uint32_t padded_entry_size)
 {
     CHECK_ARG_NOT_NULL(backing_buffer);
     CHECK((cache_size > 0) && (cache_size == backing_buffer->size()), HAILO_INVALID_ARGUMENT);
@@ -24,19 +27,22 @@ Expected<CacheBuffer> CacheBuffer::create(std::shared_ptr<vdma::VdmaBuffer> back
     CHECK((output_size > 0) && (output_size < cache_size), HAILO_INVALID_ARGUMENT,
         "Invalid cache output size: {} (cache size: {})", output_size, cache_size);
 
-    CHECK((entry_size > 0) && (entry_size <= std::numeric_limits<uint16_t>::max()) &&
-        ((cache_size % entry_size) == 0) && ((input_size % entry_size) == 0) && ((output_size % entry_size) == 0),
-        HAILO_INVALID_ARGUMENT, "Invalid cache entry size: {}", entry_size);
+    CHECK((padded_entry_size > 0) && (padded_entry_size <= std::numeric_limits<uint16_t>::max()) &&
+        ((cache_size % padded_entry_size) == 0) && ((input_size % padded_entry_size) == 0) &&
+        ((output_size % padded_entry_size) == 0),
+        HAILO_INVALID_ARGUMENT, "Invalid cache entry size: {}", padded_entry_size);
 
-    return CacheBuffer(cache_size, input_size, output_size, static_cast<uint16_t>(entry_size), backing_buffer);
+    return CacheBuffer(cache_size, input_size, output_size, static_cast<uint16_t>(entry_size),
+        static_cast<uint16_t>(padded_entry_size), backing_buffer);
 }
 
 CacheBuffer::CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size, uint16_t entry_size,
-                         std::shared_ptr<vdma::VdmaBuffer> backing_buffer) :
+                          uint16_t padded_entry_size, std::shared_ptr<vdma::VdmaBuffer> backing_buffer) :
     m_entry_size(entry_size),
-    m_cache_length(cache_size / entry_size),
-    m_input_length(input_size / entry_size),
-    m_output_length(output_size / entry_size),
+    m_padded_entry_size(padded_entry_size),
+    m_cache_length(cache_size / padded_entry_size),
+    m_input_length(input_size / padded_entry_size),
+    m_output_length(output_size / padded_entry_size),
     m_backing_buffer(backing_buffer)
 {
     // This is validated in the create function too; it's here just to be safe
@@ -45,52 +51,78 @@ CacheBuffer::CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t outp
     assert(output_size % entry_size == 0);
 }
 
-ExpectedRef<IntermediateBuffer> CacheBuffer::set_input_channel(HailoRTDriver &driver, vdma::ChannelId channel_id)
+Expected<std::shared_ptr<vdma::SgEdgeLayer>> CacheBuffer::create_sg_edge_layer_shared(HailoRTDriver &driver,
+        uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId channel_id,
+        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size)
+{
+    LOGGER__TRACE("Creating CacheBuffer: transfer_size = {}, channel_id = {}, "
+        "buffer_offset = {}, max_desc_size = {}, batch_size = {}",
+        transfer_size, channel_id, buffer_offset, max_desc_size, batch_size);
+
+    const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
+    const auto FORCE_BATCH_SIZE = true;
+    const auto IS_VDMA_ALIGNED_BUFFER = true;
+    max_desc_size = std::min(max_desc_size, driver.desc_max_page_size());
+    TRY(const auto buffer_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_desc_size, batch_size, batch_size, transfer_size,
+        false , DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER, false));
+    auto desc_page_size = buffer_requirements.desc_page_size();
+    const auto descs_count = buffer_requirements.descs_count();
+    const auto buffer_size = buffer_requirements.buffer_size();
+
+    TRY(auto edge_layer, vdma::SgEdgeLayer::create(std::static_pointer_cast<vdma::SgBuffer>(buffer), buffer_size,
+        buffer_offset, driver, descs_count, desc_page_size, false, channel_id));
+
+    auto edge_layer_ptr = make_shared_nothrow<vdma::SgEdgeLayer>(std::move(edge_layer));
+    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return edge_layer_ptr;
+}
+
+ExpectedRef<CacheBuffer> CacheBuffer::set_input_channel(HailoRTDriver &driver, vdma::ChannelId channel_id)
 {
     if (m_cache_input) {
-        return std::ref(*m_cache_input);
+        return std::ref(*this);
     }
 
-    static const auto SINGLE_BATCH = 1;
     static const auto BUFFER_START = 0;
     // Passing the entry size as the max desc size, so that we can update the cache by entry granularity, even if the
     // entry is smaller than the default desc size. E.g. Updating the cache by one 64B entry, won't work if the desc size
     // is 512B, so the desc list should be programmed with 64B. If it is g.t.e. than 512B, the desc list will be programmed
     // as usual.
-    TRY(auto intermediate_buffer, IntermediateBuffer::create_shared(driver, m_input_length * m_entry_size, SINGLE_BATCH,
-        channel_id, IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START, m_entry_size));
-    m_cache_input = intermediate_buffer;
-    return std::ref(*m_cache_input);
+    TRY(auto cache_layer, create_sg_edge_layer_shared(driver, m_entry_size, static_cast<uint16_t>(m_input_length),
+        channel_id, m_backing_buffer, BUFFER_START, m_padded_entry_size));
+    m_cache_input = std::move(cache_layer);
+    return std::ref(*this);
 }
 
-ExpectedRef<IntermediateBuffer> CacheBuffer::set_output_channel(HailoRTDriver &driver, vdma::ChannelId channel_id)
+ExpectedRef<CacheBuffer> CacheBuffer::set_output_channel(HailoRTDriver &driver, vdma::ChannelId channel_id)
 {
     if (m_cache_output) {
-        return std::ref(*m_cache_output);
+        return std::ref(*this);
     }
 
-    static const auto SINGLE_BATCH = 1;
     static const auto BUFFER_START = 0;
     // Passing the entry size as the max desc size, so that we can update the cache by entry granularity, even if the
     // entry is smaller than the default desc size. E.g. Updating the cache by one 64B entry, won't work if the desc size
     // is 512B, so the desc list should be programmed with 64B. If it is g.t.e. than 512B, the desc list will be programmed
     // as usual.
-    TRY(auto intermediate_buffer, IntermediateBuffer::create_shared(driver, m_output_length * m_entry_size, SINGLE_BATCH,
-        channel_id, IntermediateBuffer::StreamingType::BURST, m_backing_buffer, BUFFER_START, m_entry_size));
-    m_cache_output = intermediate_buffer;
-    return std::ref(*m_cache_output);
+    TRY(auto cache_layer, create_sg_edge_layer_shared(driver, m_entry_size, static_cast<uint16_t>(m_output_length),
+        channel_id, m_backing_buffer, BUFFER_START, m_padded_entry_size));
+    m_cache_output = std::move(cache_layer);
+    return std::ref(*this);
 }
 
-ExpectedRef<IntermediateBuffer> CacheBuffer::get_input()
+ExpectedRef<CacheBuffer> CacheBuffer::get_input()
 {
     CHECK(m_cache_input, HAILO_INTERNAL_FAILURE, "Input not set");
-    return std::ref(*m_cache_input);
+    return std::ref(*this);
 }
 
-ExpectedRef<IntermediateBuffer> CacheBuffer::get_output()
+ExpectedRef<CacheBuffer> CacheBuffer::get_output()
 {
     CHECK(m_cache_output, HAILO_INTERNAL_FAILURE, "Output not set");
-    return std::ref(*m_cache_output);
+    return std::ref(*this);
 }
 
 Expected<Buffer> CacheBuffer::read_cache()
@@ -111,20 +143,65 @@ hailo_status CacheBuffer::write_cache(MemoryView buffer)
     return m_backing_buffer->write(buffer.data(), buffer.size(), 0);
 }
 
+hailo_status CacheBuffer::reprogram_descriptors_per_side(bool is_side_input, size_t buffer_offset)
+{
+    std::shared_ptr<vdma::SgEdgeLayer> sg_edge_layer = is_side_input ? m_cache_input : m_cache_output;
+    auto batch_size = is_side_input ? static_cast<uint16_t>(m_input_length) : static_cast<uint16_t>(m_output_length);
+    CHECK(buffer_offset % sg_edge_layer->desc_page_size() == 0, HAILO_INTERNAL_FAILURE,
+        "Buffer offset must be aligned to descriptor page size");
+    const auto total_transfer_size = static_cast<size_t>(batch_size * m_padded_entry_size);
+    assert(sg_edge_layer->backing_buffer_size() >= buffer_offset);
+    const auto size_to_end = sg_edge_layer->backing_buffer_size() - buffer_offset;
+    const auto first_chunk_size = std::min(size_to_end, static_cast<size_t>(batch_size * m_padded_entry_size));
+    CHECK(first_chunk_size % m_padded_entry_size == 0, HAILO_INTERNAL_FAILURE,
+        "First chunk size must be aligned to entry size");
+
+    // Program the first chunk of descriptors - from the buffer offset to the end of the buffer
+    const bool BIND = true;
+    const size_t DESC_LIST_START = 0;
+    const uint32_t SINGLE_BATCH = 1;
+    auto transfer_size = first_chunk_size;
+    const auto stride = m_entry_size;
+    TRY(const uint32_t first_chunk_desc_count, sg_edge_layer->program_descriptors(transfer_size,
+        InterruptsDomain::NONE, DESC_LIST_START, buffer_offset, SINGLE_BATCH, BIND, stride));
+
+    uint32_t second_chunk_desc_count = 0;
+    if (first_chunk_size < total_transfer_size) {
+        // Program the second chunk of descriptors - from the start of the buffer till the end of the remaining size
+        const size_t BUFFER_START = 0;
+        const auto second_chunk_size = total_transfer_size - first_chunk_size;
+        CHECK(second_chunk_size % m_padded_entry_size == 0, HAILO_INTERNAL_FAILURE,
+            "Second chunk size must be aligned to entry size");
+        transfer_size = second_chunk_size;
+        TRY(second_chunk_desc_count, sg_edge_layer->program_descriptors(transfer_size, InterruptsDomain::NONE,
+            first_chunk_desc_count, BUFFER_START, SINGLE_BATCH, BIND, stride));
+    }
+
+    const auto expected_desc_count = sg_edge_layer->descs_count() - 1;
+    CHECK(first_chunk_desc_count + second_chunk_desc_count == expected_desc_count, HAILO_INTERNAL_FAILURE,
+        "Expected {} descriptors, got {}", expected_desc_count, first_chunk_desc_count + second_chunk_desc_count);
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status CacheBuffer::reprogram_descriptors(uint32_t new_read_offset_entries)
 {
     CHECK(m_cache_input && m_cache_output, HAILO_INTERNAL_FAILURE, "IOs not set");
+    bool is_side_input = true;
 
-    const auto new_read_offset_bytes = new_read_offset_entries * entry_size();
-    auto status = m_cache_input->reprogram_descriptors(new_read_offset_bytes);
+    const auto new_read_offset_bytes = new_read_offset_entries * padded_entry_size();
+    // Input buffer
+    auto status = reprogram_descriptors_per_side(is_side_input, new_read_offset_bytes);
     CHECK_SUCCESS(status, "Failed to reprogram read cache descriptors to offset 0x{:x} (0x{:x} B)",
         new_read_offset_entries, new_read_offset_bytes);
 
     // The write offset is right after the end of read buffer (i.e. cache_input_length entries from the read offset)
     const auto write_offset_entries_delta = input_length();
     const auto new_write_offset_entries = (new_read_offset_entries + write_offset_entries_delta) % m_cache_length;
-    const auto new_write_offset_bytes = new_write_offset_entries * entry_size();
-    status = m_cache_output->reprogram_descriptors(new_write_offset_bytes);
+    const auto new_write_offset_bytes = new_write_offset_entries * padded_entry_size();
+    // Output buffer
+    is_side_input = false;
+    status = reprogram_descriptors_per_side(is_side_input, new_write_offset_bytes);
     CHECK_SUCCESS(status, "Failed to reprogram write cache descriptors to offset 0x{:x} (0x{:x} B)",
         new_write_offset_entries, new_write_offset_bytes);
 
@@ -151,6 +228,11 @@ uint32_t CacheBuffer::output_length() const
     return m_output_length;
 }
 
+uint16_t CacheBuffer::padded_entry_size() const
+{
+    return m_padded_entry_size;
+}
+
 bool CacheBuffer::is_configured() const
 {
     return m_cache_input && m_cache_output;
@@ -177,5 +259,25 @@ Expected<CacheBuffer::Snapshot> CacheBuffer::create_snapshot(uint32_t read_offse
     return Snapshot(std::move(buffer), read_offset);
 }
 
+CONTROL_PROTOCOL__host_buffer_info_t CacheBuffer::get_host_input_buffer_info() const
+{
+    return m_cache_input->get_host_buffer_info(m_entry_size);
+}
+
+CONTROL_PROTOCOL__host_buffer_info_t CacheBuffer::get_host_output_buffer_info() const
+{
+    return m_cache_output->get_host_buffer_info(m_entry_size);
+}
+
+uint16_t CacheBuffer::output_batch_size() const
+{
+    return static_cast<uint16_t>(m_output_length);
+}
+
+uint16_t CacheBuffer::input_batch_size() const
+{
+    return static_cast<uint16_t>(m_input_length);
+}
+
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
index 861fdb7..46d65c2 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,7 +11,13 @@
 #define _HAILO_CACHE_BUFFER_HPP_
 
 #include "hailo/hailort.h"
-#include "core_op/resource_manager/intermediate_buffer.hpp"
+#include "hailo/buffer.hpp"
+
+#include "vdma/driver/hailort_driver.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+#include "control_protocol.h"
+#include "vdma/memory/sg_edge_layer.hpp"
 
 namespace hailort
 {
@@ -20,7 +26,7 @@ class CacheBuffer final
 {
 public:
     static Expected<CacheBuffer> create(std::shared_ptr<vdma::VdmaBuffer> backing_buffer, uint32_t cache_size,
-        uint32_t input_size, uint32_t output_size, uint32_t entry_size);
+        uint32_t input_size, uint32_t output_size, uint32_t entry_size, uint32_t padded_entry_size);
 
     CacheBuffer(CacheBuffer &&) = default;
     CacheBuffer(const CacheBuffer &) = delete;
@@ -30,10 +36,10 @@ public:
 
     // Set input/output channels to/from the cache. Will only be set once for each direction.
     // (subsequent calls will return the same IntermediateBuffer.)
-    ExpectedRef<IntermediateBuffer> set_input_channel(HailoRTDriver &driver, vdma::ChannelId channel_id);
-    ExpectedRef<IntermediateBuffer> set_output_channel(HailoRTDriver &driver, vdma::ChannelId channel_id);
-    ExpectedRef<IntermediateBuffer> get_input();
-    ExpectedRef<IntermediateBuffer> get_output();
+    ExpectedRef<CacheBuffer> set_input_channel(HailoRTDriver &driver, vdma::ChannelId channel_id);
+    ExpectedRef<CacheBuffer> set_output_channel(HailoRTDriver &driver, vdma::ChannelId channel_id);
+    ExpectedRef<CacheBuffer> get_input();
+    ExpectedRef<CacheBuffer> get_output();
     Expected<Buffer> read_cache();
     hailo_status write_cache(MemoryView buffer);
     hailo_status reprogram_descriptors(uint32_t new_read_offset_entries);
@@ -42,9 +48,14 @@ public:
     uint32_t cache_length() const;
     uint32_t input_length() const;
     uint32_t output_length() const;
+    uint16_t padded_entry_size() const;
     // Returns true if both input and output channels are set.
     bool is_configured() const;
 
+    CONTROL_PROTOCOL__host_buffer_info_t get_host_input_buffer_info() const;
+    CONTROL_PROTOCOL__host_buffer_info_t get_host_output_buffer_info() const;
+    uint16_t output_batch_size() const;
+    uint16_t input_batch_size() const;
     class Snapshot final
     {
     public:
@@ -70,9 +81,10 @@ public:
 
 private:
     CacheBuffer(uint32_t cache_size, uint32_t input_size, uint32_t output_size, uint16_t entry_size,
-        std::shared_ptr<vdma::VdmaBuffer> backing_buffer);
+        uint16_t padded_entry_size, std::shared_ptr<vdma::VdmaBuffer> backing_buffer);
 
     const uint16_t m_entry_size;
+    const uint16_t m_padded_entry_size;
     const uint32_t m_cache_length;
     const uint32_t m_input_length;
     const uint32_t m_output_length;
@@ -81,8 +93,14 @@ private:
     // * They both share the same backing buffer.
     // * They each have separate descriptor lists that will be programmed separately.
     // * This way we can read/write/reprogram the cache buffer without affecting the other direction.
-    std::shared_ptr<IntermediateBuffer> m_cache_input;
-    std::shared_ptr<IntermediateBuffer> m_cache_output;
+    std::shared_ptr<vdma::SgEdgeLayer> m_cache_input;
+    std::shared_ptr<vdma::SgEdgeLayer> m_cache_output;
+
+    Expected<std::shared_ptr<vdma::SgEdgeLayer>> create_sg_edge_layer_shared(HailoRTDriver &driver,
+        uint32_t transfer_size, uint16_t batch_size, vdma::ChannelId channel_id,
+        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size);
+
+    hailo_status reprogram_descriptors_per_side(bool is_side_input, size_t buffer_offset);
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
index 0e97eb8..e7a361b 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_manager.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -69,11 +69,11 @@ bool CacheManager::validate_cache_ids(std::shared_ptr<CoreOpMetadata> core_op_me
     std::unordered_set<uint32_t> cache_ids;
     for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
         for (const auto &layer_info : context_metadata.get_cache_input_layers()) {
-            cache_ids.insert(layer_info.cache_id);
+            cache_ids.insert(layer_info.cache_info.cache_id);
         }
 
         for (const auto &layer_info : context_metadata.get_cache_output_layers()) {
-            cache_ids.insert(layer_info.cache_id);
+            cache_ids.insert(layer_info.cache_info.cache_id);
         }
     }
 
@@ -103,7 +103,7 @@ ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> CacheManager::get_cache_b
     return std::ref(core_op_manager_it->second.get_cache_buffers());
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::set_cache_input_channel(const std::string &core_op_name,
+ExpectedRef<CacheBuffer> CacheManager::set_cache_input_channel(const std::string &core_op_name,
     uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id)
 {
     const auto core_op_manager_it = m_core_op_managers.find(core_op_name);
@@ -114,7 +114,7 @@ ExpectedRef<IntermediateBuffer> CacheManager::set_cache_input_channel(const std:
     return core_op_manager_it->second.set_cache_input_channel(cache_id, batch_size, channel_id);
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::set_cache_output_channel(const std::string &core_op_name,
+ExpectedRef<CacheBuffer> CacheManager::set_cache_output_channel(const std::string &core_op_name,
     uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id)
 {
     const auto core_op_manager_it = m_core_op_managers.find(core_op_name);
@@ -125,8 +125,7 @@ ExpectedRef<IntermediateBuffer> CacheManager::set_cache_output_channel(const std
     return core_op_manager_it->second.set_cache_output_channel(cache_id, batch_size, channel_id);
 }
 
-// TODO: Support write_offset_delta_entries in CacheManager::init_caches (HRT-14397)
-hailo_status CacheManager::init_caches(uint32_t initial_read_offset_entries, int32_t write_offset_delta_entries)
+hailo_status CacheManager::init_caches(uint32_t initial_read_offset_entries)
 {
     if (!m_caches_created) {
         // No cache layers found, nothing to do
@@ -135,12 +134,9 @@ hailo_status CacheManager::init_caches(uint32_t initial_read_offset_entries, int
     }
 
     CHECK(initial_read_offset_entries < m_cache_length, HAILO_INVALID_ARGUMENT);
-    CHECK(write_offset_delta_entries != 0, HAILO_INVALID_ARGUMENT);
-
     m_read_offset_entries = initial_read_offset_entries;
 
-    LOGGER__INFO("Initializing caches [read_offset={}, write_offset_delta={}]",
-        initial_read_offset_entries, write_offset_delta_entries);
+    LOGGER__INFO("Initializing caches @ read_offset={}", initial_read_offset_entries);
 
     static const auto INITIAL_CONFIGURATION_OFFSET = 0;
     return update_cache_offset(INITIAL_CONFIGURATION_OFFSET);
@@ -216,11 +212,17 @@ Expected<CoreOpCacheIoInfos> CacheManager::CoreOpManager::get_cache_ios_infos(
     CoreOpCacheIoInfos cache_inputs_info;
     for (const auto &context_metadata : core_op_metadata->dynamic_contexts()) {
         for (const auto &layer_info : (input ? context_metadata.get_cache_input_layers() : context_metadata.get_cache_output_layers())) {
-            const auto cache_id = layer_info.cache_id;
+            const auto cache_id = layer_info.cache_info.cache_id;
             CHECK(!contains(cache_inputs_info, cache_id), HAILO_INTERNAL_FAILURE,
                 "Duplicate cache_id found in cache input layers (cache_id {})", cache_id);
-            cache_inputs_info[cache_id].io_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
             cache_inputs_info[cache_id].entry_size = layer_info.hw_shape.features;
+            if (!IS_POWEROF2(cache_inputs_info[cache_id].entry_size)) {
+                cache_inputs_info[cache_id].padded_entry_size = get_nearest_powerof_2(layer_info.hw_shape.features, vdma::MIN_SG_PAGE_SIZE);
+            } else {
+                cache_inputs_info[cache_id].padded_entry_size = cache_inputs_info[cache_id].entry_size;
+            }
+            cache_inputs_info[cache_id].io_size = (cache_inputs_info[cache_id].padded_entry_size *
+                layer_info.hw_shape.height * layer_info.hw_shape.width);
         }
     }
 
@@ -272,7 +274,7 @@ Expected<std::unordered_map<uint32_t, CacheBuffer>> CacheManager::CoreOpManager:
 
         TRY(auto backing_buffer, storage_manager.get_backing_buffer(cache_id, cache_info.size));
         TRY(auto cache_buffer, CacheBuffer::create(backing_buffer, cache_info.size, cache_info.input_size,
-            cache_info.output_size, cache_info.entry_size));
+            cache_info.output_size, cache_info.entry_size, cache_info.padded_entry_size));
         auto emplace_res = cache_buffers.emplace(cache_id, std::move(cache_buffer));
         CHECK(emplace_res.second, HAILO_INTERNAL_FAILURE);
     }
@@ -309,7 +311,7 @@ ExpectedRef<CacheBuffer> CacheManager::CoreOpManager::get_cache_buffer(uint32_t
     return make_unexpected(HAILO_NOT_FOUND);
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::CoreOpManager::set_cache_input_channel(uint32_t cache_id,
+ExpectedRef<CacheBuffer> CacheManager::CoreOpManager::set_cache_input_channel(uint32_t cache_id,
     uint16_t batch_size, vdma::ChannelId channel_id)
 {
     CHECK(1 == batch_size, HAILO_INVALID_ARGUMENT, "Cache input batch size must be 1");
@@ -330,7 +332,7 @@ ExpectedRef<IntermediateBuffer> CacheManager::CoreOpManager::set_cache_input_cha
     return result;
 }
 
-ExpectedRef<IntermediateBuffer> CacheManager::CoreOpManager::set_cache_output_channel(uint32_t cache_id,
+ExpectedRef<CacheBuffer> CacheManager::CoreOpManager::set_cache_output_channel(uint32_t cache_id,
     uint16_t batch_size, vdma::ChannelId channel_id)
 {
     CHECK(1 == batch_size, HAILO_INVALID_ARGUMENT, "Cache output batch size must be 1");
@@ -360,9 +362,9 @@ hailo_status CacheManager::CoreOpManager::validate_cache_update(const CacheBuffe
     const CacheBuffer::Snapshot &curr_snapshot, const CacheBuffer::Snapshot &prev_snapshot, bool require_changes)
 {
     const auto curr_write_offset_start = (curr_snapshot.read_offset() + cache_buffer.input_length()) % cache_buffer.cache_length();
-    const auto curr_write_offset_start_bytes = curr_write_offset_start * cache_buffer.entry_size();
+    const auto curr_write_offset_start_bytes = curr_write_offset_start * cache_buffer.padded_entry_size();
     const auto curr_write_offset_end = (curr_write_offset_start + cache_buffer.output_length()) % cache_buffer.cache_length();
-    const auto curr_write_offset_end_bytes = curr_write_offset_end * cache_buffer.entry_size();
+    const auto curr_write_offset_end_bytes = curr_write_offset_end * cache_buffer.padded_entry_size();
 
     if (curr_write_offset_end > curr_write_offset_start) {
         return validate_non_wrapping_update(cache_id, curr_snapshot, prev_snapshot,
diff --git a/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
index dc43ccc..a245324 100644
--- a/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/cache_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -25,6 +25,7 @@ struct CacheIoInfo
 {
     uint32_t io_size;
     uint32_t entry_size;
+    uint32_t padded_entry_size;
 };
 
 struct CacheInfo
@@ -35,19 +36,20 @@ struct CacheInfo
             "Input and output entry sizes must match: input={}, output={}",
             input_info.entry_size, output_info.entry_size);
         // Asserting is good enough here, as it'll be validated down the line
-        assert(input_info.io_size % input_info.entry_size == 0);
-        assert(output_info.io_size % output_info.entry_size == 0);
+        assert(input_info.io_size % input_info.padded_entry_size == 0);
+        assert(output_info.io_size % output_info.padded_entry_size == 0);
 
         return CacheInfo{input_info.io_size + output_info.io_size, input_info.entry_size, input_info.io_size,
-            output_info.io_size};
+            output_info.io_size, input_info.padded_entry_size};
     }
 
     uint32_t size;
     uint32_t entry_size;
     uint32_t input_size;
     uint32_t output_size;
+    uint32_t padded_entry_size;
 
-    uint32_t cache_length() const { return size / entry_size; }
+    uint32_t cache_length() const { return size / padded_entry_size; }
 };
 
 // Cache ID -> CacheIoInfo
@@ -61,10 +63,6 @@ class CacheManager final
 public:
     static constexpr uint32_t CACHE_LENGTH_NOT_SET = 0;
 
-    // TODO: Support getting initial_read_offset_entries + write_offset_delta_entries from configured_network_params
-    //       s.t. the CacheManager can be created with the correct offsets, and init_caches won't be needed at the start.
-    //       Currently, the CacheManager is created with the m_read_offset_entries=0 and
-    //       m_write_offset_delta_entries=m_cache_input_size (i.e. right after where data was read from) (HRT-14288)
     static Expected<CacheManagerPtr> create_shared(HailoRTDriver &driver);
 
     CacheManager(HailoRTDriver &driver);
@@ -75,15 +73,15 @@ public:
     ~CacheManager() = default;
 
     hailo_status create_caches_from_core_op(std::shared_ptr<CoreOpMetadata> core_op_metadata);
-    ExpectedRef<IntermediateBuffer> set_cache_input_channel(const std::string &core_op_name, uint32_t cache_id,
+    ExpectedRef<CacheBuffer> set_cache_input_channel(const std::string &core_op_name, uint32_t cache_id,
         uint16_t batch_size, vdma::ChannelId channel_id);
-    ExpectedRef<IntermediateBuffer> set_cache_output_channel(const std::string &core_op_name, uint32_t cache_id,
+    ExpectedRef<CacheBuffer> set_cache_output_channel(const std::string &core_op_name, uint32_t cache_id,
         uint16_t batch_size, vdma::ChannelId channel_id);
     ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> get_cache_buffers(const std::string &core_op_name);
 
     // Note: These functions are not thread-safe!
-    // Programs the CacheManager instance with the given offsets, overriding the current offsets.
-    hailo_status init_caches(uint32_t initial_read_offset_entries, int32_t write_offset_delta_entries);
+    // Programs the CacheManager instance with the given offset read overriding the current offset.
+    hailo_status init_caches(uint32_t initial_read_offset_entries);
     // Updates the read offset by the given delta
     // * If check_snapshots is true, the function will validate that the caches have only been updated at the
     //   correct offsets
@@ -129,9 +127,9 @@ private:
         std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers();
         const std::unordered_map<uint32_t, CacheBuffer> &get_cache_buffers() const;
         ExpectedRef<CacheBuffer> get_cache_buffer(uint32_t cache_id);
-        ExpectedRef<IntermediateBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
+        ExpectedRef<CacheBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
             vdma::ChannelId channel_id);
-        ExpectedRef<IntermediateBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
+        ExpectedRef<CacheBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
             vdma::ChannelId channel_id);
         uint32_t cache_length() const;
         // Note: read_offset is absolute, not relative to the current read offset
diff --git a/hailort/libhailort/src/core_op/resource_manager/channel_allocator.cpp b/hailort/libhailort/src/core_op/resource_manager/channel_allocator.cpp
index d16212a..9a9a5c3 100644
--- a/hailort/libhailort/src/core_op/resource_manager/channel_allocator.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/channel_allocator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -8,7 +8,7 @@
  **/
 
 #include "core_op/resource_manager/channel_allocator.hpp"
-
+#include "common/internal_env_vars.hpp"
 
 namespace hailort
 {
@@ -18,10 +18,12 @@ ChannelAllocator::ChannelAllocator(size_t max_engines_count) :
 {}
 
 Expected<vdma::ChannelId> ChannelAllocator::get_available_channel_id(const LayerIdentifier &layer_identifier,
-    HailoRTDriver::DmaDirection direction, uint8_t engine_index)
+    HailoRTDriver::DmaDirection direction, uint8_t engine_index, bool use_enhanced_channel)
 {
     CHECK_AS_EXPECTED(engine_index < m_max_engines_count, HAILO_INVALID_ARGUMENT,
         "Invalid engine index {}, max is {}", engine_index, m_max_engines_count);
+    CHECK_AS_EXPECTED(!use_enhanced_channel || (HailoRTDriver::DmaDirection::D2H == direction), HAILO_INVALID_ARGUMENT,
+        "Error, cannot use enhanced channel when direction is not D2H");
 
     const auto found_channel = m_allocated_channels.find(layer_identifier);
     if (found_channel != m_allocated_channels.end()) {
@@ -41,6 +43,11 @@ Expected<vdma::ChannelId> ChannelAllocator::get_available_channel_id(const Layer
     uint8_t max_channel_index =
         (direction == HailoRTDriver::DmaDirection::H2D) ? MAX_H2D_CHANNEL_INDEX : MAX_D2H_CHANNEL_INDEX;
 
+    // In case that enhance CCB channel is needed for hw infer
+    if ((LayerType::BOUNDARY == std::get<0>(layer_identifier)) && use_enhanced_channel) {
+        min_channel_index = MIN_ENHANCED_D2H_CHANNEL_INDEX;
+    }
+
     for (uint8_t index = min_channel_index; index <= max_channel_index; ++index) {
         const vdma::ChannelId channel_id = {engine_index, index};
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/channel_allocator.hpp b/hailort/libhailort/src/core_op/resource_manager/channel_allocator.hpp
index e2ab122..4ce58fe 100644
--- a/hailort/libhailort/src/core_op/resource_manager/channel_allocator.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/channel_allocator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -29,7 +29,7 @@ public:
     ChannelAllocator(ChannelAllocator &&other) = default;
 
     Expected<vdma::ChannelId> get_available_channel_id(const LayerIdentifier &layer_identifier,
-        HailoRTDriver::DmaDirection direction, uint8_t engine_index);
+        HailoRTDriver::DmaDirection direction, uint8_t engine_index, bool use_enhanced_channel = false);
     hailo_status free_channel_index(const LayerIdentifier &layer_identifier);
 
 private:
diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
index 25c38c1..3808bef 100644
--- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -19,13 +19,26 @@
 
 namespace hailort {
 
+Expected<std::unique_ptr<vdma::DescriptorList>> ConfigBuffer::build_desc_list(HailoRTDriver &driver, 
+    std::vector<std::pair<uint64_t, uint64_t>> ccw_dma_transfers)
+{
+    std::vector<uint32_t> burst_sizes;
+    burst_sizes.push_back(NOPS_TRANSFERS_PER_ALIGNED_CCWS_TRANSFER); // For padding - adding the NOPs
+    for (const auto& ccw_dma_transfer : ccw_dma_transfers) {
+        burst_sizes.push_back(static_cast<uint32_t>(ccw_dma_transfer.second));
+    }
+    TRY(auto requirements, get_sg_buffer_requirements(burst_sizes, driver.desc_max_page_size()));
+    TRY(auto desc_list, vdma::DescriptorList::create(requirements.descs_count(), requirements.desc_page_size(), false, driver));
+    return make_unique_nothrow<vdma::DescriptorList>(std::move(desc_list));
+}
+
 Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
     const std::vector<uint32_t> &bursts_sizes, const uint32_t buffer_size)
 {
-    auto buffer_ptr = should_use_ccb(driver) ?
+    auto buffer_ptr = should_use_ccb(driver.dma_type()) ?
         create_ccb_buffer(driver, buffer_size) :
         create_sg_buffer(driver, channel_id, bursts_sizes);
-    if (should_use_ccb(driver) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_ptr.status())) {
+    if (should_use_ccb(driver.dma_type()) && (HAILO_OUT_OF_HOST_CMA_MEMORY == buffer_ptr.status())) {
         /* Try to use sg buffer instead */
         return create_sg_buffer(driver, channel_id, bursts_sizes);
     } else {
@@ -33,16 +46,37 @@ Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_buffer(Hailo
     }
 }
 
-Expected<ConfigBuffer> ConfigBuffer::create(HailoRTDriver &driver, vdma::ChannelId channel_id,
-    const std::vector<uint32_t> &bursts_sizes)
+Expected<ConfigBuffer> ConfigBuffer::create_for_aligned_ccws(HailoRTDriver &driver, vdma::ChannelId channel_id,
+    const ConfigBufferInfo &config_buffer_info, std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers,
+    std::shared_ptr<vdma::MappedBuffer> nops_buffer)
 {
-    const auto buffer_size = std::accumulate(bursts_sizes.begin(), bursts_sizes.end(), 0);
-    CHECK_AS_EXPECTED(IS_FIT_IN_UINT32(buffer_size), HAILO_INTERNAL_FAILURE, "config buffer size exceeded UINT32 range limit");
-    TRY(auto buffer_ptr, create_buffer(driver, channel_id, bursts_sizes, static_cast<uint32_t>(buffer_size)));
+    const auto &ccw_dma_transfers = config_buffer_info.ccw_dma_transfers;
+    TRY(auto desc_list, build_desc_list(driver, ccw_dma_transfers));
+    auto config_buffer = ConfigBuffer(std::move(desc_list), channel_id, mapped_buffers, ccw_dma_transfers, nops_buffer);
+    auto status = config_buffer.program_descriptors_for_aligned_ccws(config_buffer_info.ccw_dma_transfers);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+    return config_buffer;
+}
 
+Expected<ConfigBuffer> ConfigBuffer::create_with_copy_descriptors(HailoRTDriver &driver, vdma::ChannelId channel_id,
+    const ConfigBufferInfo &config_buffer_info)
+{
+    const auto &bursts_sizes = config_buffer_info.bursts_sizes;
+    const auto buffer_size = std::accumulate(bursts_sizes.begin(), bursts_sizes.end(), uint32_t{0});
+    TRY(auto buffer_ptr, create_buffer(driver, channel_id, bursts_sizes, buffer_size));
     return ConfigBuffer(std::move(buffer_ptr), channel_id, buffer_size);
 }
 
+Expected<vdma::BufferSizesRequirements> ConfigBuffer::get_buffer_requirements(const ConfigBufferInfo &config_buffer_info,
+    HailoRTDriver::DmaType dma_type, uint16_t max_desc_page_size)
+{
+    const auto &bursts_sizes = config_buffer_info.bursts_sizes;
+    const auto buffer_size = std::accumulate(bursts_sizes.begin(), bursts_sizes.end(), uint32_t{0});
+    return should_use_ccb(dma_type) ?
+        get_ccb_buffer_requirements(buffer_size, max_desc_page_size) :
+        get_sg_buffer_requirements(bursts_sizes, max_desc_page_size);
+}
+
 ConfigBuffer::ConfigBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer,
     vdma::ChannelId channel_id, size_t total_buffer_size)
     : m_buffer(std::move(buffer)),
@@ -51,8 +85,82 @@ ConfigBuffer::ConfigBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer,
       m_current_buffer_size(0)
 {}
 
+ConfigBuffer::ConfigBuffer(std::unique_ptr<vdma::DescriptorList> &&desc_list, vdma::ChannelId channel_id, std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers,
+    const std::vector<std::pair<uint64_t, uint64_t>> &ccw_dma_transfers, std::shared_ptr<vdma::MappedBuffer> nops_buffer)
+    : m_buffer(nullptr),
+      m_channel_id(channel_id),
+      m_total_buffer_size(0), m_acc_buffer_offset(0), m_acc_desc_count(0),
+      m_current_buffer_size(0), m_desc_list(std::move(desc_list)),
+      m_aligned_ccws(true), m_mapped_buffers(mapped_buffers),
+      m_ccw_dma_transfers(ccw_dma_transfers), m_nops_buffer(nops_buffer)
+{}
+
+Expected<uint32_t> ConfigBuffer::program_descriptors_for_transfer(const std::pair<uint64_t, uint64_t> &ccw_dma_transfer, uint32_t total_desc_count)
+{
+    /*
+    * This function programs the descriptors for a single ccw_dma_transfer.
+    * We start by getting the first buffer index + the offset inside that buffer (first since a single ccw_dma_transfer can span over multiple buffers).
+    * Starting from that index, we loop over the buffers and program the descriptors for each buffer, until we have transferred the required total number of bytes.
+    */
+    const uint64_t total_size_to_transfer = ccw_dma_transfer.second;
+    const uint64_t start_offset = ccw_dma_transfer.first;
+
+    uint32_t transfer_desc_count = 0;
+    uint64_t bytes_transferred = 0;
+
+    size_t current_buffer_index = start_offset / m_mapped_buffers.at(0)->size(); // all vectors in m_mapped_buffers have the same size (except from the last one)
+    uint64_t offset_in_buffer = start_offset % m_mapped_buffers.at(0)->size();
+
+    while (bytes_transferred < total_size_to_transfer) {
+        const auto &curr_buffer = m_mapped_buffers.at(current_buffer_index);
+        const uint64_t available_bytes = curr_buffer->size() - offset_in_buffer; // Calculate how many bytes are available in the current buffer from the current offset.
+        
+        // We transfer the minimum of what remains to be transferred and what is available in the current buffer.
+        const uint64_t curr_bytes_to_transfer = std::min(total_size_to_transfer - bytes_transferred, available_bytes);
+        CHECK_SUCCESS(m_desc_list->program(*curr_buffer, curr_bytes_to_transfer, offset_in_buffer, m_channel_id,
+                                           total_desc_count + transfer_desc_count, DEFAULT_PROGRAM_BATCH_SIZE,
+                                           true, InterruptsDomain::DEVICE));
+
+        transfer_desc_count += m_desc_list->descriptors_in_buffer(curr_bytes_to_transfer);
+        bytes_transferred += curr_bytes_to_transfer;
+        offset_in_buffer = 0;
+        current_buffer_index++;
+    }
+    return transfer_desc_count;
+}
+
+Expected<uint32_t> ConfigBuffer::program_descriptors_for_aligned_ccws(const std::vector<std::pair<uint64_t, uint64_t>> &ccw_dma_transfers)
+{
+    uint32_t total_desc_count = 0;
+    const uint64_t page_size = m_desc_list->desc_page_size();
+
+    // Transfer nops such that the number of total transferred bytes is a multiple of page_size
+    const auto total_dma_transfers_size = std::accumulate(ccw_dma_transfers.begin(), ccw_dma_transfers.end(), uint64_t{0},
+        [](const auto &acc, const auto &ccw_dma_transfer) { return acc + ccw_dma_transfer.second; });
+
+    auto padding_count = page_size - (total_dma_transfers_size % page_size);
+    if (padding_count > 0) {
+        CHECK_SUCCESS(m_desc_list->program(*m_nops_buffer, padding_count, 0, m_channel_id,
+            total_desc_count, 1, true, InterruptsDomain::DEVICE));
+        total_desc_count += 1;
+    }
+
+    for (const auto &dma_tranfer : ccw_dma_transfers) {
+        // Transfer the actual data (ccws)
+        TRY(auto current_transfer_desc_count, program_descriptors_for_transfer(dma_tranfer, total_desc_count));
+        total_desc_count += current_transfer_desc_count;
+    }
+    m_acc_desc_count += total_desc_count;
+
+    return total_desc_count;
+}
+
 Expected<uint32_t> ConfigBuffer::program_descriptors()
 {
+    // TODO HRT-16583: Split ConfigBuffer to 2 classes: one for aligned_ccws case and for the regular case
+    // After splitting - remove this check
+    CHECK_AS_EXPECTED(!m_aligned_ccws, HAILO_INTERNAL_FAILURE, "Program descriptors for aligned ccws should be called");
+
     // TODO HRT-9657: remove DEVICE interrupts
     TRY(auto descriptors_count,
         m_buffer->program_descriptors(m_acc_buffer_offset, InterruptsDomain::DEVICE, m_acc_desc_count));
@@ -90,6 +198,10 @@ hailo_status ConfigBuffer::pad_with_nops()
 
 hailo_status ConfigBuffer::write(const MemoryView &data)
 {
+    // TODO HRT-16583: Split ConfigBuffer to 2 classes: one for aligned_ccws case and for the regular case
+    // After splitting - remove this check
+    CHECK(!m_aligned_ccws, HAILO_INTERNAL_FAILURE, "Writing to ConfigBuffer when using alligned ccws is not supported");
+
     CHECK(data.size() <= size_left(), HAILO_INTERNAL_FAILURE, "Write too many config words");
     auto status = write_inner(data);
     CHECK_SUCCESS(status);
@@ -121,7 +233,12 @@ vdma::ChannelId ConfigBuffer::channel_id() const
 
 CONTROL_PROTOCOL__host_buffer_info_t ConfigBuffer::get_host_buffer_info() const
 {
-    return m_buffer->get_host_buffer_info(m_acc_desc_count * m_buffer->desc_page_size());
+    if (m_aligned_ccws) {
+        return vdma::VdmaEdgeLayer::get_host_buffer_info(vdma::VdmaEdgeLayer::Type::SCATTER_GATHER, m_desc_list->dma_address(),
+            m_desc_list->desc_page_size(), m_desc_list->count(), m_acc_desc_count * m_desc_list->desc_page_size());
+    } else {
+        return m_buffer->get_host_buffer_info(m_acc_desc_count * m_buffer->desc_page_size());
+    }
 }
 
 hailo_status ConfigBuffer::write_inner(const MemoryView &data)
@@ -138,25 +255,17 @@ Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_sg_buffer(Ha
     vdma::ChannelId channel_id, const std::vector<uint32_t> &bursts_sizes)
 {
     static const auto NOT_CIRCULAR = false;
-    // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
-    // Therefore we use the flag force_default_page_size for those types of buffers.
-    static const auto FORCE_DEFAULT_PAGE_SIZE = true;
-    static const auto FORCE_BATCH_SIZE = true;
-    TRY(const auto buffer_size_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
-        vdma::VdmaBuffer::Type::SCATTER_GATHER, driver.desc_max_page_size(), 1, bursts_sizes, NOT_CIRCULAR,
-        FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE));
-    const auto page_size = buffer_size_requirements.desc_page_size();
-    const auto descs_count = buffer_size_requirements.descs_count();
-    const auto buffer_size = buffer_size_requirements.buffer_size();
 
-    TRY(auto buffer, vdma::SgBuffer::create(driver, buffer_size, HailoRTDriver::DmaDirection::H2D));
+    TRY(const auto requirements,  get_sg_buffer_requirements(bursts_sizes, driver.desc_max_page_size()));
+
+    TRY(auto buffer, vdma::SgBuffer::create(driver, requirements.buffer_size(), HailoRTDriver::DmaDirection::H2D));
 
     auto buffer_ptr = make_shared_nothrow<vdma::SgBuffer>(std::move(buffer));
     CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     static const auto DEFAULT_OFFSET = 0;
-    TRY(auto edge_layer, vdma::SgEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, driver, descs_count,
-        page_size, NOT_CIRCULAR, channel_id));
+    TRY(auto edge_layer, vdma::SgEdgeLayer::create(std::move(buffer_ptr), requirements.buffer_size(), DEFAULT_OFFSET,
+        driver, requirements.descs_count(), requirements.desc_page_size(), NOT_CIRCULAR, channel_id));
 
     auto edge_layer_ptr = make_unique_nothrow<vdma::SgEdgeLayer>(std::move(edge_layer));
     CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
@@ -167,27 +276,17 @@ Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_sg_buffer(Ha
 Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_ccb_buffer(HailoRTDriver &driver,
     uint32_t buffer_size)
 {
-    static const auto NOT_CIRCULAR = false;
-    // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
-    // Therefore we use the flag force_default_page_size for those types of buffers.
-    static const auto FORCE_DEFAULT_PAGE_SIZE = true;
-    static const auto FORCE_BATCH_SIZE = true;
-    static const auto DEFAULT_BATCH_SIZE = 1;
-    static const auto IS_VDMA_ALIGNED_BUFFER = true;
-    TRY(const auto buffer_size_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
-        vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE,
-        buffer_size, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER));
+    TRY(const auto requirements,  get_ccb_buffer_requirements(buffer_size, driver.desc_max_page_size()));
 
-    const auto page_size = buffer_size_requirements.desc_page_size();
-    const auto descs_count = buffer_size_requirements.descs_count();
     TRY_WITH_ACCEPTABLE_STATUS(HAILO_OUT_OF_HOST_CMA_MEMORY, auto buffer,
-        vdma::ContinuousBuffer::create(buffer_size_requirements.buffer_size(), driver));
+        vdma::ContinuousBuffer::create(requirements.buffer_size(), driver));
 
     auto buffer_ptr = make_shared_nothrow<vdma::ContinuousBuffer>(std::move(buffer));
     CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     static const auto DEFAULT_OFFSET = 0;
-    TRY(auto edge_layer, vdma::ContinuousEdgeLayer::create(std::move(buffer_ptr), buffer_size, DEFAULT_OFFSET, page_size, descs_count));
+    TRY(auto edge_layer, vdma::ContinuousEdgeLayer::create(std::move(buffer_ptr), requirements.buffer_size(),
+        DEFAULT_OFFSET, requirements.desc_page_size(), requirements.descs_count()));
 
     auto edge_layer_ptr = make_unique_nothrow<vdma::ContinuousEdgeLayer>(std::move(edge_layer));
     CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
@@ -195,19 +294,46 @@ Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> ConfigBuffer::create_ccb_buffer(H
     return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
 }
 
-bool ConfigBuffer::should_use_ccb(HailoRTDriver &driver)
+
+Expected<vdma::BufferSizesRequirements> ConfigBuffer::get_sg_buffer_requirements(const std::vector<uint32_t> &cfg_sizes,
+    uint16_t max_desc_page_size)
 {
-    if (driver.dma_type() != HailoRTDriver::DmaType::DRAM) {
+    const auto NOT_CIRCULAR = false;
+    // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
+    // Therefore we use the flag force_default_page_size for those types of buffers.
+    const auto FORCE_DEFAULT_PAGE_SIZE = true;
+    const auto FORCE_BATCH_SIZE = true;
+    const bool NOT_DDR = false;
+
+    return vdma::BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
+        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_desc_page_size, 1, cfg_sizes, NOT_CIRCULAR,
+        FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, NOT_DDR);
+}
+
+Expected<vdma::BufferSizesRequirements> ConfigBuffer::get_ccb_buffer_requirements(uint32_t buffer_size,
+    uint16_t max_desc_page_size)
+{
+    const auto NOT_CIRCULAR = false;
+    // For config channels (In Hailo15), the page size must be a multiplication of host default page size.
+    // Therefore we use the flag force_default_page_size for those types of buffers.
+    static const auto FORCE_DEFAULT_PAGE_SIZE = true;
+    static const auto FORCE_BATCH_SIZE = true;
+    static const uint16_t DEFAULT_BATCH_SIZE = 1;
+    static const auto IS_VDMA_ALIGNED_BUFFER = true;
+    static const bool IS_NOT_DDR = false;
+
+    return vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
+        vdma::VdmaBuffer::Type::CONTINUOUS, max_desc_page_size, DEFAULT_BATCH_SIZE, DEFAULT_BATCH_SIZE,
+        buffer_size, NOT_CIRCULAR, FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER, IS_NOT_DDR);
+}
+
+bool ConfigBuffer::should_use_ccb(HailoRTDriver::DmaType dma_type)
+{
+    if (dma_type != HailoRTDriver::DmaType::DRAM) {
         return false; // not supported
     }
 
-    if (is_env_variable_on(HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR)) {
-        LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.\n");
-        return false;
-    }
-    else {
-        return true;
-    }
+    return !is_env_variable_on(HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR);
 }
 
 } /* hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
index 4c0c888..5f2b7f2 100644
--- a/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/config_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -14,6 +14,8 @@
 #include "hailo/buffer.hpp"
 
 #include "vdma/memory/vdma_edge_layer.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
+#include "hef/core_op_metadata.hpp"
 
 
 namespace hailort {
@@ -21,12 +23,21 @@ namespace hailort {
 #define CCW_BYTES_IN_WORD (4)
 #define CCW_DATA_OFFSET (CCW_BYTES_IN_WORD * 2)
 #define CCW_HEADER_SIZE (CCW_DATA_OFFSET)
+#define DEFAULT_PROGRAM_BATCH_SIZE (1)
+#define NOPS_TRANSFERS_PER_ALIGNED_CCWS_TRANSFER (1)
 
-class ConfigBuffer final
-{
+class ConfigBuffer final {
 public:
-    static Expected<ConfigBuffer> create(HailoRTDriver &driver, vdma::ChannelId channel_id,
-        const std::vector<uint32_t> &bursts_sizes);
+    // TODO HRT-16583: Split ConfigBuffer to 2 classes: one for aligned_ccws case and for the regular case
+    static Expected<ConfigBuffer> create_for_aligned_ccws(HailoRTDriver &driver, vdma::ChannelId channel_id,
+        const ConfigBufferInfo &config_buffer_info, std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers,
+        std::shared_ptr<vdma::MappedBuffer> nops_buffer);
+    static Expected<ConfigBuffer> create_with_copy_descriptors(HailoRTDriver &driver, vdma::ChannelId channel_id,
+        const ConfigBufferInfo &config_buffer_info);
+
+    static Expected<vdma::BufferSizesRequirements> get_buffer_requirements(const ConfigBufferInfo &config_buffer_info,
+        HailoRTDriver::DmaType dma_type, uint16_t max_desc_page_size);
+    static bool should_use_ccb(HailoRTDriver::DmaType driver);
 
     // Write data to config channel
     hailo_status write(const MemoryView &data);
@@ -34,6 +45,9 @@ public:
     // Program the descriptors for the data written so far
     Expected<uint32_t> program_descriptors();
 
+    // Program the descriptors for alligned ccws case
+    Expected<uint32_t> program_descriptors_for_aligned_ccws(const std::vector<std::pair<uint64_t, uint64_t>> &ccw_dma_transfers);
+
     // On prefetch mode, we need to pad the config buffer with nops BEFORE the last write.
     hailo_status pad_with_nops();
 
@@ -50,6 +64,10 @@ public:
 private:
     ConfigBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer, vdma::ChannelId channel_id, size_t total_buffer_size);
 
+    // Constructor for the case of alligned ccws
+    ConfigBuffer(std::unique_ptr<vdma::DescriptorList> &&desc_list, vdma::ChannelId channel_id, std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers,
+        const std::vector<std::pair<uint64_t, uint64_t>> &ccw_dma_transfers, std::shared_ptr<vdma::MappedBuffer> nops_buffer);
+
     hailo_status write_inner(const MemoryView &data);
 
     static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_sg_buffer(HailoRTDriver &driver,
@@ -59,7 +77,16 @@ private:
     static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_buffer(HailoRTDriver &driver, vdma::ChannelId channel_id,
         const std::vector<uint32_t> &cfg_sizes, const uint32_t buffer_size);
 
-    static bool should_use_ccb(HailoRTDriver &driver);
+    // Build the descriptor list for the alligned ccws case
+    static Expected<std::unique_ptr<vdma::DescriptorList>> build_desc_list(HailoRTDriver &driver,
+        std::vector<std::pair<uint64_t, uint64_t>> ccw_dma_transfers);
+
+    static Expected<vdma::BufferSizesRequirements> get_sg_buffer_requirements(const std::vector<uint32_t> &cfg_sizes,
+        uint16_t max_desc_page_size);
+    static Expected<vdma::BufferSizesRequirements> get_ccb_buffer_requirements(uint32_t buffer_size,
+        uint16_t max_desc_page_size);
+
+    Expected<uint32_t> program_descriptors_for_transfer(const std::pair<uint64_t, uint64_t> &ccw_dma_transfer, uint32_t total_desc_count);
 
     std::unique_ptr<vdma::VdmaEdgeLayer> m_buffer;
     vdma::ChannelId m_channel_id;
@@ -67,6 +94,13 @@ private:
     size_t m_acc_buffer_offset;
     uint32_t m_acc_desc_count;
     size_t m_current_buffer_size;
+
+    // TODO HRT-16583: Split ConfigBuffer to 2 classes: one for aligned_ccws case and for the regular case
+    std::unique_ptr<vdma::DescriptorList> m_desc_list;
+    bool m_aligned_ccws = false;
+    std::vector<std::shared_ptr<vdma::MappedBuffer>> m_mapped_buffers;
+    const std::vector<std::pair<uint64_t, uint64_t>> m_ccw_dma_transfers;
+    std::shared_ptr<vdma::MappedBuffer> m_nops_buffer;
 };
 
 } /* hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
deleted file mode 100644
index 98fedff..0000000
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.cpp
+++ /dev/null
@@ -1,199 +0,0 @@
-/**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file intermediate_buffer.cpp
- * @brief Manages intermediate buffers, including inter-context and ddr buffers.
- */
-
-#include "intermediate_buffer.hpp"
-
-#include "core_op/resource_manager/resource_manager.hpp"
-#include "vdma/memory/sg_edge_layer.hpp"
-#include "vdma/memory/continuous_edge_layer.hpp"
-#include "vdma/memory/buffer_requirements.hpp"
-
-
-namespace hailort
-{
-Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_edge_layer(
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type, uint16_t max_desc_size)
-{
-    const bool is_circular = (streaming_type == StreamingType::CIRCULAR_CONTINUOS);
-    auto buffer_exp = (vdma::VdmaBuffer::Type::CONTINUOUS == buffer->type()) ?
-        create_ccb_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size, is_circular) :
-        create_sg_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size, d2h_channel_id, is_circular,
-            max_desc_size);
-
-    return buffer_exp;
-}
-
-Expected<IntermediateBuffer> IntermediateBuffer::create(HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size)
-{
-    max_desc_size = std::min(max_desc_size, driver.desc_max_page_size());
-
-    LOGGER__TRACE("Creating IntermediateBuffer: transfer_size = {}, max_batch_size = {}, d2h_channel_id = {}, "
-        "streaming_type = {}, buffer = 0x{:X}, buffer_offset = {}, max_desc_size = {}",
-        transfer_size, max_batch_size, d2h_channel_id, streaming_type, (uintptr_t)buffer.get(), buffer_offset,
-        max_desc_size);
-
-    TRY(auto edge_layer_ptr, create_edge_layer(buffer, buffer_offset, driver, transfer_size, max_batch_size,
-        d2h_channel_id, streaming_type, max_desc_size));
-
-    if (streaming_type == StreamingType::BURST) {
-        // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt
-        // to the device.
-        size_t desc_acc_offset = 0;
-        size_t buffer_acc_offset = 0;
-        for (uint16_t i = 0; i < max_batch_size; i++) {
-            const auto last_desc_interrupts_domain = ((max_batch_size - 1) == i) ?
-                InterruptsDomain::DEVICE : InterruptsDomain::NONE;
-            TRY(const auto desc_count_local, edge_layer_ptr->program_descriptors(transfer_size,
-                last_desc_interrupts_domain, desc_acc_offset, buffer_acc_offset),
-                "Failed to program descs for inter context channels. Given max_batch_size is too big.");
-            desc_acc_offset += desc_count_local;
-            buffer_acc_offset += (desc_count_local * edge_layer_ptr->desc_page_size());
-        }
-    } else {
-        // Program all descriptors, no need for interrupt.
-        const auto interrupts_domain = InterruptsDomain::NONE;
-        const auto total_size = edge_layer_ptr->descs_count() * edge_layer_ptr->desc_page_size();
-        TRY(const auto desc_count_local, edge_layer_ptr->program_descriptors(total_size, interrupts_domain, 0));
-        (void)desc_count_local;
-    }
-
-    return IntermediateBuffer(std::move(edge_layer_ptr), transfer_size, streaming_type, max_batch_size);
-}
-
-Expected<std::shared_ptr<IntermediateBuffer>> IntermediateBuffer::create_shared(HailoRTDriver &driver,
-    uint32_t transfer_size, uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, uint16_t max_desc_size)
-{
-    TRY(auto intermediate_buffer, create(driver, transfer_size, max_batch_size, d2h_channel_id, streaming_type,
-        buffer, buffer_offset, max_desc_size));
-
-    auto intermediate_buffer_ptr = make_shared_nothrow<IntermediateBuffer>(std::move(intermediate_buffer));
-    CHECK_NOT_NULL_AS_EXPECTED(intermediate_buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-    return intermediate_buffer_ptr;
-}
-
-Expected<Buffer> IntermediateBuffer::read(size_t size)
-{
-    if (size == 0) {
-        size = m_transfer_size * m_dynamic_batch_size;
-    }
-    CHECK_AS_EXPECTED(size <= m_edge_layer->backing_buffer_size(), HAILO_INTERNAL_FAILURE,
-        "Requested size {} is bigger than buffer size {}", size, m_edge_layer->backing_buffer_size());
-
-    TRY(auto res, Buffer::create(size));
-
-    auto status = m_edge_layer->read(res.data(), size, 0);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
-    return res;
-}
-
-CONTROL_PROTOCOL__host_buffer_info_t IntermediateBuffer::get_host_buffer_info() const
-{
-    return m_edge_layer->get_host_buffer_info(m_transfer_size);
-}
-
-hailo_status IntermediateBuffer::reprogram_descriptors(size_t buffer_offset)
-{
-    CHECK(m_streaming_type == StreamingType::BURST, HAILO_INTERNAL_FAILURE,
-        "Reprogramming descriptors is only supported for burst streaming type");
-
-    CHECK(buffer_offset % m_edge_layer->desc_page_size() == 0, HAILO_INTERNAL_FAILURE,
-        "Buffer offset must be aligned to descriptor page size");
-
-    assert(m_edge_layer->backing_buffer_size() >= buffer_offset);
-    const auto size_to_end = m_edge_layer->backing_buffer_size() - buffer_offset;
-    const auto first_chunk_size = std::min(size_to_end, static_cast<size_t>(m_transfer_size));
-
-    // Program the first chunk of descriptors - from the buffer offset to the end of the buffer
-    static const auto BIND = true;
-    static const auto DESC_LIST_START = 0;
-    TRY(const uint32_t first_chunk_desc_count, m_edge_layer->program_descriptors(first_chunk_size,
-        InterruptsDomain::NONE, DESC_LIST_START, buffer_offset, BIND));
-
-    uint32_t second_chunk_desc_count = 0;
-    if (first_chunk_size < m_transfer_size) {
-        // Program the second chunk of descriptors - from the start of the buffer till the end of the remaining size
-        static const auto BUFFER_START = 0;
-        const auto second_chunk_size = m_transfer_size - first_chunk_size;
-        TRY(second_chunk_desc_count, m_edge_layer->program_descriptors(second_chunk_size, InterruptsDomain::NONE,
-            first_chunk_desc_count, BUFFER_START, BIND));
-    }
-
-    const auto expected_desc_count = m_edge_layer->descs_count() - 1;
-    CHECK(first_chunk_desc_count + second_chunk_desc_count == expected_desc_count, HAILO_INTERNAL_FAILURE,
-        "Expected {} descriptors, got {}", expected_desc_count, first_chunk_desc_count + second_chunk_desc_count);
-
-    return HAILO_SUCCESS;
-}
-
-uint32_t IntermediateBuffer::transfer_size() const
-{
-    return m_transfer_size;
-}
-
-IntermediateBuffer::IntermediateBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&edge_layer, uint32_t transfer_size,
-                                       StreamingType streaming_type, uint16_t batch_size) :
-    m_edge_layer(std::move(edge_layer)),
-    m_transfer_size(transfer_size),
-    m_streaming_type(streaming_type),
-    m_dynamic_batch_size(batch_size)
-{}
-
-Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_sg_edge_layer(
-    std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size,
-    uint16_t batch_size, vdma::ChannelId d2h_channel_id, bool is_circular, uint16_t max_desc_size)
-{
-    static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
-    static const auto FORCE_BATCH_SIZE = true;
-    static const auto IS_VDMA_ALIGNED_BUFFER = true;
-    TRY(const auto buffer_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
-        vdma::VdmaBuffer::Type::SCATTER_GATHER, max_desc_size, batch_size, batch_size, transfer_size,
-        is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER));
-    const auto desc_page_size = buffer_requirements.desc_page_size();
-    const auto descs_count = buffer_requirements.descs_count();
-    const auto buffer_size = buffer_requirements.buffer_size();
-
-    TRY(auto edge_layer, vdma::SgEdgeLayer::create(std::dynamic_pointer_cast<vdma::SgBuffer>(buffer), buffer_size,
-        buffer_offset, driver, descs_count, desc_page_size, is_circular, d2h_channel_id));
-
-    auto edge_layer_ptr = make_unique_nothrow<vdma::SgEdgeLayer>(std::move(edge_layer));
-    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
-}
-
-Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> IntermediateBuffer::create_ccb_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
-    size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular)
-{
-    static const auto DONT_FORCE_DEFAULT_PAGE_SIZE = false;
-    static const auto FORCE_BATCH_SIZE = true;
-    static const auto IS_VDMA_ALIGNED_BUFFER = true;
-
-    TRY(const auto buffer_size_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
-        vdma::VdmaBuffer::Type::CONTINUOUS, driver.desc_max_page_size(), batch_size, batch_size, transfer_size,
-        is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE, FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER));
-
-    const auto page_size = buffer_size_requirements.desc_page_size();
-    const auto descs_count = buffer_size_requirements.descs_count();
-    const auto buffer_size = buffer_size_requirements.buffer_size();
-
-    TRY(auto edge_layer, vdma::ContinuousEdgeLayer::create(std::dynamic_pointer_cast<vdma::ContinuousBuffer>(buffer),
-        buffer_size, buffer_offset, page_size, descs_count));
-
-    auto edge_layer_ptr = make_unique_nothrow<vdma::ContinuousEdgeLayer>(std::move(edge_layer));
-    CHECK_NOT_NULL_AS_EXPECTED(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return std::unique_ptr<vdma::VdmaEdgeLayer>(std::move(edge_layer_ptr));
-}
-
-} /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp b/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
deleted file mode 100644
index 909d030..0000000
--- a/hailort/libhailort/src/core_op/resource_manager/intermediate_buffer.hpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
- * Distributed under the MIT license (https://opensource.org/licenses/MIT)
- **/
-/**
- * @file intermediate_buffer.hpp
- * @brief Manages intermediate buffers, including inter-context and ddr buffers.
- */
-
-#ifndef _HAILO_INTERMEDIATE_BUFFER_HPP_
-#define _HAILO_INTERMEDIATE_BUFFER_HPP_
-
-#include "hailo/expected.hpp"
-#include "hailo/buffer.hpp"
-
-#include "vdma/driver/hailort_driver.hpp"
-#include "vdma/memory/vdma_edge_layer.hpp"
-#include "vdma/memory/descriptor_list.hpp"
-#include "control_protocol.h"
-
-
-namespace hailort
-{
-
-class IntermediateBuffer final {
-public:
-
-    enum class StreamingType {
-        // Used for inter-context buffer. The buffer is not circular and the data is fetched in bursts.
-        BURST,
-
-        // Used for ddr-channel buffers. The buffer is circular and fetched continuously.
-        CIRCULAR_CONTINUOS,
-    };
-
-    // The default value of max_desc_size (= vdma::MAX_SG_PAGE_SIZE) corresponds to the maximum descriptor size
-    // supported by the sg dma.
-    static Expected<IntermediateBuffer> create(HailoRTDriver &driver, uint32_t transfer_size,
-        uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset,
-        uint16_t max_desc_size = vdma::MAX_SG_PAGE_SIZE);
-    static Expected<std::shared_ptr<IntermediateBuffer>> create_shared(HailoRTDriver &driver, uint32_t transfer_size,
-        uint16_t max_batch_size, vdma::ChannelId d2h_channel_id, StreamingType streaming_type,
-        std::shared_ptr<vdma::VdmaBuffer> buffer, size_t buffer_offset,
-        uint16_t max_desc_size = vdma::MAX_SG_PAGE_SIZE);
-
-    // If size is 0, the entire buffer is read (based on the transfer size passed in the create function)
-    Expected<Buffer> read(size_t size=0);
-    CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info() const;
-    hailo_status reprogram_descriptors(size_t buffer_offset);
-    uint32_t transfer_size() const;
-
-private:
-    IntermediateBuffer(std::unique_ptr<vdma::VdmaEdgeLayer> &&buffer, uint32_t transfer_size,
-        StreamingType streaming_type, uint16_t batch_size);
-
-    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_sg_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
-        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size,
-        vdma::ChannelId d2h_channel_id, bool is_circular, uint16_t max_desc_size);
-    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_ccb_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
-        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t batch_size, bool is_circular);
-    static Expected<std::unique_ptr<vdma::VdmaEdgeLayer>> create_edge_layer(std::shared_ptr<vdma::VdmaBuffer> buffer,
-        size_t buffer_offset, HailoRTDriver &driver, uint32_t transfer_size, uint16_t max_batch_size,
-        vdma::ChannelId d2h_channel_id, StreamingType streaming_type, uint16_t max_desc_size);
-
-    std::unique_ptr<vdma::VdmaEdgeLayer> m_edge_layer;
-    const uint32_t m_transfer_size;
-    const StreamingType m_streaming_type;
-    uint16_t m_dynamic_batch_size;
-};
-
-} /* namespace hailort */
-
-#endif /* _HAILO_INTERMEDIATE_BUFFER_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp
index 0bfa38c..5ea277e 100644
--- a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -24,145 +24,18 @@
 namespace hailort
 {
 
-Expected<std::shared_ptr<InternalBufferManager>> InternalBufferManager::create(HailoRTDriver &driver,
-    const ConfigureNetworkParams &config_params)
+Expected<std::shared_ptr<InternalBufferManager>> InternalBufferManager::create(HailoRTDriver &driver)
 {
-
-    auto buffer_manager_ptr = make_shared_nothrow<InternalBufferManager>(InternalBufferManager(driver, config_params));
+    auto buffer_manager_ptr = make_shared_nothrow<InternalBufferManager>(InternalBufferManager(driver));
     CHECK_NOT_NULL_AS_EXPECTED(buffer_manager_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return buffer_manager_ptr;
 }
 
-InternalBufferManager::InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params)
-    : m_driver(driver),
-      m_config_params(config_params),
-      m_edge_layer_infos(),
-      m_edge_layer_to_buffer_map()
+InternalBufferManager::InternalBufferManager(HailoRTDriver &driver)
+    : m_driver(driver)
     {}
 
-
-void InternalBufferManager::add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info)
-{
-    m_edge_layer_infos.emplace(edge_layer_key, buffer_info);
-}
-
-Expected<uint16_t> InternalBufferManager::get_network_batch_size(const std::string &network_name) const
-{
-    for (auto const &network_map : m_config_params.network_params_by_name) {
-        auto const network_name_from_params = network_map.first;
-        if (network_name_from_params == network_name) {
-            auto actual_batch_size = network_map.second.batch_size;
-            if (HAILO_DEFAULT_BATCH_SIZE == actual_batch_size) {
-                actual_batch_size = DEFAULT_ACTUAL_BATCH_SIZE;
-            }
-            return actual_batch_size;
-        }
-    }
-
-    LOGGER__ERROR("Failed to find network with network name {}", network_name);
-
-    return make_unexpected(HAILO_NOT_FOUND);
-}
-
-hailo_status InternalBufferManager::add_inter_context_buffer(const LayerInfo &layer_info)
-{
-    // This API gets the inter context input Layer, but the key is the output layer.
-    // The reason is that there is one output edge layer and multiple input edge layers.
-    // We must get the info of all the inputs in order to set the right start and end contexts,
-    // but the key must the the output (from the connected context info).
-
-    // layer_info.connected_context_info.context_index == start context
-    // layer_info.context_index == end context
-    const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
-    TRY(auto batch_size, get_network_batch_size(layer_info.network_name));
-    static const bool BUFFER_REUSE = true;
-
-    auto edge_layer_key =
-        std::make_pair(layer_info.connected_context_info.context_index, layer_info.connected_context_info.stream_index);
-    // First check if there is a key (for the case of one output multiple inputs).
-
-    const auto it = m_edge_layer_infos.find(edge_layer_key);
-    if (it != m_edge_layer_infos.end()) {
-        CHECK(it->second.transfer_size == transfer_size, HAILO_INTERNAL_FAILURE,
-            "Found two edge layers with the same key but different transfer size");
-        CHECK(it->second.max_transfers_in_batch == batch_size, HAILO_INTERNAL_FAILURE,
-            "Found two edge layers with the same key but different batch size");
-        // Now if the new end context is bigger than the old one, update it.
-        if (it->second.end_context < layer_info.context_index) {
-            it->second.end_context = layer_info.context_index;
-        }
-    } else {
-        LOGGER__DEBUG("Adding edge layer with key ({}, {}) to the internal buffer manager", edge_layer_key.first, edge_layer_key.second);
-        add_buffer_info(edge_layer_key,
-            EdgeLayerInfo{
-                layer_info.type,
-                transfer_size,
-                batch_size,
-                layer_info.connected_context_info.context_index,
-                layer_info.context_index,
-                BUFFER_REUSE});
-    }
-    return HAILO_SUCCESS;
-}
-
-hailo_status InternalBufferManager::add_ddr_buffer(const LayerInfo &layer_info)
-{
-    // In DDR - always use core bytes per buffer as row size
-    const auto row_size = static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer);
-    const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
-    static auto const BUFFER_REUSE = true;
-    auto edge_layer_key = std::make_pair(layer_info.context_index, layer_info.stream_index);
-
-    auto it = m_edge_layer_infos.find(edge_layer_key);
-    CHECK(it == m_edge_layer_infos.end(), HAILO_INTERNAL_FAILURE,
-        "Found two edge layers with the same key for DDR layer. This is not supported.");
-
-    add_buffer_info(edge_layer_key,
-        EdgeLayerInfo{
-            layer_info.type,
-            row_size,
-            min_buffered_rows,
-            layer_info.context_index,
-            layer_info.connected_context_info.context_index,
-            BUFFER_REUSE});
-
-    return HAILO_SUCCESS;
-}
-
-// For edge layers
-hailo_status InternalBufferManager::add_layer_buffer_info(const LayerInfo &layer_info)
-{
-    switch (layer_info.type) {
-        case LayerType::INTER_CONTEXT:
-            return add_inter_context_buffer(layer_info);
-        case LayerType::DDR:
-            return add_ddr_buffer(layer_info);
-        default:
-            LOGGER__ERROR("Unsupported layer type for InternalBufferManager");
-            return HAILO_INTERNAL_FAILURE;
-    }
-}
-
-hailo_status InternalBufferManager::add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index,
-    const std::vector<uint32_t> &cfg_sizes)
-{
-    static const bool NO_REUSE = false;
-    static const auto SINGLE_TRANSFER_PER_BATCH = 1;
-    auto edge_layer_key = std::make_pair(static_cast<uint16_t>(context_index), static_cast<uint8_t>(MAX_EDGE_LAYERS_PER_CONTEXT + config_stream_index));
-    const auto buffer_size = static_cast<uint32_t>(std::accumulate(cfg_sizes.begin(), cfg_sizes.end(), 0));
-    add_buffer_info(edge_layer_key,
-        EdgeLayerInfo{
-            LayerType::CFG,
-            buffer_size,
-            SINGLE_TRANSFER_PER_BATCH,
-            context_index,
-            context_index,
-            NO_REUSE});
-
-    return HAILO_SUCCESS;
-}
-
 Expected<std::shared_ptr<vdma::VdmaBuffer>> InternalBufferManager::create_intermediate_sg_buffer(
     const size_t buffer_size)
 {
@@ -201,28 +74,31 @@ void InternalBufferManager::print_execution_results(const BufferPlanReport &defa
     if (!default_planner_meet_requirements) {
         LOGGER__INFO("Default Internal buffer planner failed to meet requirements");
     } else {
-        LOGGER__INFO("Planned internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}",
-            default_planner_report.cma_memory, default_planner_report.user_memory, default_planner_report.memory_utilization_factor);
+        LOGGER__INFO("Planned internal buffer memory: CMA={} CMA-Desc={} Pinned={}. memory to edge layer usage factor is {}",
+            default_planner_report.cma_memory, default_planner_report.cma_memory_for_descriptors,
+            default_planner_report.pinned_memory, default_planner_report.memory_utilization_factor);
     }
 
     auto default_plan_executed = (default_planner_report.cma_memory == executed_buffers_report.cma_memory) &&
-        (default_planner_report.user_memory == executed_buffers_report.user_memory);
+        (default_planner_report.pinned_memory == executed_buffers_report.pinned_memory);
 
     if (default_plan_executed) {
         LOGGER__INFO("Default Internal buffer planner executed successfully");
     } else {
-        LOGGER__INFO("executed internal buffer memory: CMA memory {}, user memory {}. memory to edge layer usage factor is {}",
-            executed_buffers_report.cma_memory, executed_buffers_report.user_memory, executed_buffers_report.memory_utilization_factor);
+        LOGGER__INFO("executed internal buffer memory: CMA={} CMA-Desc={} Pinned={}. memory to edge layer usage factor is {}",
+            executed_buffers_report.cma_memory, default_planner_report.cma_memory_for_descriptors,
+            executed_buffers_report.pinned_memory, executed_buffers_report.memory_utilization_factor);
     }
 }
 
-hailo_status InternalBufferManager::plan_and_execute(InternalBufferPlanner::Type default_planner_type,
+hailo_status InternalBufferManager::plan_and_execute(const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos,
+    InternalBufferPlanner::Type default_planner_type,
     const size_t number_of_contexts)
 {
     // Create buffer planning
     auto planner_type = default_planner_type;
     // copy of initial edge layers
-    auto edge_layers = m_edge_layer_infos;
+    auto edge_layers = edge_layer_infos;
     // Vector of executed buffers from the planning
     InternalBufferPlanning buffers_executed;
     // Default planner report
@@ -293,14 +169,13 @@ hailo_status InternalBufferManager::execute_plan(InternalBufferPlanning &buffer_
             // If one of the buffer failed due to lack to memory, try to move to next buffer.
             continue;
         }
-        for (const auto &edge_layer_offset : buffer_plan.edge_layer_offsets) {
+        for (const auto &edge_layer_plan : buffer_plan.edge_layer_plans) {
             m_edge_layer_to_buffer_map.emplace(
-                edge_layer_offset.first,
-                EdgeLayerBuffer{buffer_ptr.value(), edge_layer_offset.second});
-        }
-        // Add edge layers to executed list
-        for (const auto &edge_layer_info : buffer_plan.edge_layer_infos) {
-            edge_layers_executed.emplace_back(edge_layer_info.first);
+                edge_layer_plan.key,
+                EdgeLayerBuffer{buffer_ptr.value(), edge_layer_plan});
+
+            // Add edge layers to executed list
+            edge_layers_executed.emplace_back(edge_layer_plan.key);
         }
 
         // Add buffer to executed list
@@ -310,16 +185,6 @@ hailo_status InternalBufferManager::execute_plan(InternalBufferPlanning &buffer_
     return execution_status;
 }
 
-ExpectedRef<EdgeLayerInfo> InternalBufferManager::get_layer_buffer_info(const EdgeLayerKey &key)
-{
-    const auto buffer_it = m_edge_layer_infos.find(key);
-    if (std::end(m_edge_layer_infos) == buffer_it) {
-        return make_unexpected(HAILO_NOT_FOUND);
-    }
-
-    return ExpectedRef<EdgeLayerInfo>(buffer_it->second);
-}
-
 Expected<EdgeLayerBuffer> InternalBufferManager::get_intermediate_buffer(const EdgeLayerKey &key)
 {
     const auto buffer_it = m_edge_layer_to_buffer_map.find(key);
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp
index b294e9f..1c43365 100644
--- a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -15,8 +15,6 @@
 #ifndef _HAILO_INTERNAL_BUFFER_MANAGER_HPP_
 #define _HAILO_INTERNAL_BUFFER_MANAGER_HPP_
 
-#include "hailo/hailort.h"
-#include "hailo/hef.hpp"
 #include "common/utils.hpp"
 #include "hef/layer_info.hpp"
 #include "vdma/memory/vdma_buffer.hpp"
@@ -28,27 +26,22 @@ namespace hailort
 
 #define MAX_EDGE_LAYERS_PER_CONTEXT (20)
 
+struct EdgeLayerBuffer {
+    std::shared_ptr<vdma::VdmaBuffer> buffer;
+    EdgeLayerPlan edge_layer_plan;
+};
+
 class InternalBufferManager final
 {
 public:
-    static Expected<std::shared_ptr<InternalBufferManager>> create(HailoRTDriver &driver,
-        const ConfigureNetworkParams &config_params);
+    static Expected<std::shared_ptr<InternalBufferManager>> create(HailoRTDriver &driver);
 
-    hailo_status add_config_buffer_info(const uint16_t context_index, const size_t config_stream_index,
-        const std::vector<uint32_t> &cfg_sizes);
-    hailo_status add_layer_buffer_info(const LayerInfo &layer_info);
-    ExpectedRef<EdgeLayerInfo> get_layer_buffer_info(const EdgeLayerKey &key);
     Expected<EdgeLayerBuffer> get_intermediate_buffer(const EdgeLayerKey &key);
-    hailo_status plan_and_execute(InternalBufferPlanner::Type default_planner_type, const size_t number_of_contexts);
+    hailo_status plan_and_execute(const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos,
+        InternalBufferPlanner::Type default_planner_type, size_t number_of_contexts);
 
 private:
-    InternalBufferManager(HailoRTDriver &driver, const ConfigureNetworkParams &config_params);
-
-    // Add buffer info phase functions
-    void add_buffer_info(const EdgeLayerKey &edge_layer_key, const EdgeLayerInfo &buffer_info);
-    hailo_status add_inter_context_buffer(const LayerInfo &layer_info);
-    hailo_status add_ddr_buffer(const LayerInfo &layer_info);
-    Expected<uint16_t> get_network_batch_size(const std::string &network_name) const;
+    InternalBufferManager(HailoRTDriver &driver);
 
     // Execute phase functions
     hailo_status execute_plan(InternalBufferPlanning &buffer_planning,
@@ -65,9 +58,6 @@ private:
         bool default_planner_meet_requirements, const BufferPlanReport &executed_buffers_report);
 
     HailoRTDriver &m_driver;
-    const ConfigureNetworkParams &m_config_params;
-    // m_edge_layer_infos is filled by add_buffer_info API
-    std::map<EdgeLayerKey, EdgeLayerInfo> m_edge_layer_infos;
     std::map<EdgeLayerKey, EdgeLayerBuffer> m_edge_layer_to_buffer_map;
 };
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
index 4f6cf16..9e0f2b1 100644
--- a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,6 +11,7 @@
   **/
 
 #include "vdma/memory/buffer_requirements.hpp"
+#include "vdma/memory/descriptor_list.hpp"
 #include "internal_buffer_planner.hpp"
 #include "common/internal_env_vars.hpp"
 
@@ -24,7 +25,7 @@ constexpr size_t NAIVE_PLANNING_EDGE_LAYER_OFFSET = 0;
 namespace hailort
 {
 
-bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type, 
+bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_type, HailoRTDriver::DmaType dma_type,
     bool force_sg_buffer_type)
 {
     if (HailoRTDriver::DmaType::PCIE == dma_type) {
@@ -56,17 +57,6 @@ bool InternalBufferPlanner::should_edge_layer_use_ccb(const LayerType &layer_typ
         } else {
             return false;
         }
-    case LayerType::CFG:
-        if (is_env_variable_on(HAILO_FORCE_CONF_CHANNEL_OVER_DESC_ENV_VAR)) {
-            LOGGER__WARNING("Using desc instead of CCB for config channel is not optimal for performance.");
-            return false;
-        }
-        else {
-            return true;
-        }
-    case LayerType::CACHE:
-        // Cache layers are always sg
-        return false;
     default:
         // Shouldn't reach here
         assert(false);
@@ -84,22 +74,20 @@ Expected<InternalBufferPlanning> InternalBufferPlanner::create_naive_buffer_plan
     auto sorted_edge_layer_vector = sort_edge_layers_by_size(edge_layer_infos);
     for (const auto &edge_layer_info : sorted_edge_layer_vector) {
         // Naive planning - Buffer holds only one transfer pattern and one edge layer
-        std::vector<std::pair<EdgeLayerKey, size_t>> edge_layer_offsets;
-        std::map<EdgeLayerKey, EdgeLayerInfo> plan_edge_layer_infos;
-        plan_edge_layer_infos.emplace(edge_layer_info.first, edge_layer_info.second);
-        edge_layer_offsets.emplace_back(edge_layer_info.first, NAIVE_PLANNING_EDGE_LAYER_OFFSET);
         vdma::VdmaBuffer::Type buffer_type = should_edge_layer_use_ccb(edge_layer_info.second.type, dma_type, force_sg_buffer_type) ?
             vdma::VdmaBuffer::Type::CONTINUOUS : vdma::VdmaBuffer::Type::SCATTER_GATHER;
         TRY_WITH_ACCEPTABLE_STATUS(HAILO_CANT_MEET_BUFFER_REQUIREMENTS, const auto buffer_requirements,
             return_buffer_requirements(edge_layer_info.second, buffer_type, max_page_size));
 
+        const std::vector<EdgeLayerPlan> edge_layer_plan{
+            EdgeLayerPlan{edge_layer_info.first, NAIVE_PLANNING_EDGE_LAYER_OFFSET, buffer_requirements}
+        };
+
         buffer_planning.emplace_back(
             BufferPlan{
                 buffer_type,
                 buffer_requirements.buffer_size(),
-                buffer_requirements.buffer_size(),
-                edge_layer_offsets,
-                plan_edge_layer_infos});
+                edge_layer_plan});
     }
     return buffer_planning;
 }
@@ -124,10 +112,11 @@ Expected<vdma::BufferSizesRequirements> InternalBufferPlanner::return_buffer_req
     static const auto FORCE_BATCH_SIZE = true;
     static const auto IS_VDMA_ALIGNED_BUFFER = true;
     const auto is_circular = (LayerType::DDR == edge_layer.type);
+    const auto is_ddr = (LayerType::DDR == edge_layer.type);
     auto buffer_requirements = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
         buffer_type, max_page_size, edge_layer.max_transfers_in_batch,
         edge_layer.max_transfers_in_batch, edge_layer.transfer_size, is_circular, DONT_FORCE_DEFAULT_PAGE_SIZE,
-        FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER);
+        FORCE_BATCH_SIZE, IS_VDMA_ALIGNED_BUFFER, is_ddr);
     return buffer_requirements;
 }
 
@@ -228,12 +217,9 @@ hailo_status InternalBufferPlanner::add_edge_layer_to_planning(
     auto end_of_edge_layer_offset = buffer_offset + edge_layer_size;
     // Update buffer size if needed
     buffer_plan.buffer_size = std::max(end_of_edge_layer_offset, buffer_plan.buffer_size);
-    // Update total edge layer size
-    buffer_plan.total_edge_layer_size += edge_layer_size;
 
     // Add the buffer to the buffer plan
-    buffer_plan.edge_layer_offsets.emplace_back(edge_layer.first, buffer_offset);
-    buffer_plan.edge_layer_infos.emplace(edge_layer.first, edge_layer.second);
+    buffer_plan.edge_layer_plans.emplace_back(EdgeLayerPlan{edge_layer.first, buffer_offset, buffer_requirements});
 
     update_buffer_to_context_map(context_buffer_usage_vector, start_context, end_context, buffer_offset, edge_layer_size);
 
@@ -256,7 +242,6 @@ Expected<InternalBufferPlanning> InternalBufferPlanner::create_single_buffer_pla
     buffer_plan.buffer_type = buffer_type;
     // Init buffer with size 0
     buffer_plan.buffer_size = 0;
-    buffer_plan.total_edge_layer_size = 0;
 
     auto sorted_edge_layer_vector = sort_edge_layers_by_size(sg_edge_layers);
     std::vector<std::vector<BufferUsageSegment>> context_buffer_usage_vector(number_of_contexts);
@@ -305,6 +290,101 @@ Expected<InternalBufferPlanning> InternalBufferPlanner::create_optimized_buffer_
     return buffer_planning;
 }
 
+static hailo_status add_ddr_buffer(std::map<EdgeLayerKey, EdgeLayerInfo>& edge_layer_infos, const LayerInfo &layer_info)
+{
+    // In DDR - always use core bytes per buffer as row size
+    const auto row_size = static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer);
+    const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
+    auto edge_layer_key = std::make_pair(layer_info.context_index, layer_info.stream_index);
+
+    auto it = edge_layer_infos.find(edge_layer_key);
+    CHECK(it == edge_layer_infos.end(), HAILO_INTERNAL_FAILURE,
+        "Found two edge layers with the same key for DDR layer. This is not supported.");
+
+    edge_layer_infos.emplace(edge_layer_key,
+        EdgeLayerInfo{
+            layer_info.type,
+            row_size,
+            min_buffered_rows,
+            layer_info.context_index,
+            layer_info.connected_context_info.context_index,
+        });
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status add_inter_context_buffer(std::map<EdgeLayerKey, EdgeLayerInfo>& edge_layer_infos,
+    const LayerInfo &layer_info, uint16_t batch_size)
+{
+    // layer_info.connected_context_info.context_index == start context (output stream)
+    // layer_info.context_index == end context
+    const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
+
+    assert(layer_info.direction == HAILO_H2D_STREAM);
+    const auto output_context_index = layer_info.connected_context_info.context_index;
+    const auto output_stream_index = layer_info.connected_context_info.stream_index;
+    const auto edge_layer_key = std::make_pair(output_context_index, output_stream_index);
+
+    const auto it = edge_layer_infos.find(edge_layer_key);
+    if (it != edge_layer_infos.end()) {
+        CHECK(it->second.transfer_size == transfer_size, HAILO_INTERNAL_FAILURE,
+            "Found two edge layers with the same key but different transfer size");
+        CHECK(it->second.max_transfers_in_batch == batch_size, HAILO_INTERNAL_FAILURE,
+            "Found two edge layers with the same key but different batch size");
+        // Now if the new end context is bigger than the old one, update it.
+        if (it->second.end_context < layer_info.context_index) {
+            it->second.end_context = layer_info.context_index;
+        }
+    } else {
+        LOGGER__DEBUG("Adding edge layer with key ({}, {}) to the internal buffer manager", edge_layer_key.first, edge_layer_key.second);
+        edge_layer_infos.emplace(edge_layer_key,
+            EdgeLayerInfo{
+                layer_info.type,
+                transfer_size,
+                batch_size,
+                layer_info.connected_context_info.context_index,
+                layer_info.context_index,
+            });
+    }
+    return HAILO_SUCCESS;
+}
+
+Expected<std::map<EdgeLayerKey, EdgeLayerInfo>> InternalBufferPlanner::get_edge_layer_infos(const CoreOpMetadata& core_op,
+    const ConfigureNetworkParams& config_params)
+{
+    std::map<EdgeLayerKey, EdgeLayerInfo> edge_layer_infos;
+
+    for (const auto &context_metadata : core_op.dynamic_contexts()) {
+        for (const auto &layer_info : context_metadata.get_ddr_output_layers()) {
+            CHECK_SUCCESS(add_ddr_buffer(edge_layer_infos, layer_info));
+        }
+
+        for (const auto &layer_info : context_metadata.get_inter_context_input_layers()) {
+            TRY(auto batch_size, get_network_batch_size(config_params, layer_info.network_name));
+            // This API gets the inter context input Layer, but the key is the output layer.
+            // The reason is that there is one output edge layer and multiple input edge layers.
+            // We must get the info of all the inputs in order to set the right start and end contexts,
+            // but the key must the output (from the connected context info).
+            CHECK_SUCCESS(add_inter_context_buffer(edge_layer_infos, layer_info, batch_size));
+        }
+    }
+
+    return edge_layer_infos;
+}
+
+Expected<InternalBufferPlanning> InternalBufferPlanner::create_buffer_planning(
+    const CoreOpMetadata& core_op, uint16_t batch_size, Type plan_type, HailoRTDriver::DmaType dma_type,
+    uint16_t max_page_size)
+{
+    ConfigureNetworkParams config_params{};
+    config_params.batch_size = batch_size;
+    for (const auto &network_name : core_op.get_network_names()) {
+        config_params.network_params_by_name[network_name].batch_size = batch_size;
+    }
+    TRY(auto edge_layer_infos, get_edge_layer_infos(core_op, config_params));
+    return create_buffer_planning(edge_layer_infos, plan_type, dma_type, max_page_size, core_op.dynamic_contexts().size());
+}
+
 Expected<InternalBufferPlanning> InternalBufferPlanner::create_buffer_planning(
     const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, Type plan_type,
     HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts)
@@ -330,75 +410,39 @@ Expected<InternalBufferPlanning> InternalBufferPlanner::create_buffer_planning(
     }
 }
 
+static size_t total_descriptors_cma_memory(const BufferPlan &buffer_plan)
+{
+    return std::accumulate(buffer_plan.edge_layer_plans.begin(), buffer_plan.edge_layer_plans.end(), size_t{0},
+        [](size_t acc, const EdgeLayerPlan &edge_plan) {
+            return acc + vdma::DescriptorList::descriptors_buffer_allocation_size(edge_plan.buffer_requirements.descs_count());
+        });
+}
+
 BufferPlanReport InternalBufferPlanner::report_planning_info(const InternalBufferPlanning &buffer_planning)
 {
     BufferPlanReport report = {};
     report.cma_memory = 0;
-    report.user_memory = 0;
+    report.pinned_memory = 0;
     report.edge_layer_size = 0;
 
     for (const auto &buffer_plan : buffer_planning) {
         if (vdma::VdmaBuffer::Type::CONTINUOUS == buffer_plan.buffer_type) {
             report.cma_memory += buffer_plan.buffer_size;
         } else {
-            report.user_memory += buffer_plan.buffer_size;
+            report.pinned_memory += buffer_plan.buffer_size;
+            report.cma_memory_for_descriptors += total_descriptors_cma_memory(buffer_plan);
+        }
+
+        for (const auto &edge_plan : buffer_plan.edge_layer_plans) {
+            report.edge_layer_size += edge_plan.buffer_requirements.buffer_size();
         }
-        report.edge_layer_size += buffer_plan.total_edge_layer_size;
     }
 
+    const auto total_memory = report.cma_memory + report.pinned_memory;
     report.memory_utilization_factor = (report.edge_layer_size > 0) ?
-        (static_cast<float>(report.cma_memory + report.user_memory) / static_cast<float>(report.edge_layer_size)) : 1;
+        (static_cast<float>(total_memory) / static_cast<float>(report.edge_layer_size)) : 1;
 
     return report;
 }
 
-Expected<EdgeLayerInfo> InternalBufferPlanner::get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning,
-    const EdgeLayerKey &edge_layer_key)
-{
-    for (const auto &buffer_plan : buffer_planning) {
-        auto it = buffer_plan.edge_layer_infos.find(edge_layer_key);
-        if (it != buffer_plan.edge_layer_infos.end()) {
-            return Expected<EdgeLayerInfo>(it->second);
-        }
-    }
-    return make_unexpected(HAILO_NOT_FOUND);
-}
-
-hailo_status InternalBufferPlanner::change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning,
-    const EdgeLayerKey &edge_layer_key, size_t new_offset, uint16_t max_page_size)
-{
-    TRY(auto edge_layer_info, get_edge_info_from_buffer_plan(buffer_planning, edge_layer_key));
-    for (auto &buffer_plan : buffer_planning) {
-        TRY_WITH_ACCEPTABLE_STATUS(HAILO_CANT_MEET_BUFFER_REQUIREMENTS, const auto buffer_requirements,
-            return_buffer_requirements(edge_layer_info, buffer_plan.buffer_type, max_page_size));
-
-        for (auto &edge_layer_offset : buffer_plan.edge_layer_offsets) {
-            if (edge_layer_offset.first == edge_layer_key) {
-                edge_layer_offset.second = new_offset;
-                if (edge_layer_offset.second + buffer_requirements.buffer_size() > buffer_plan.buffer_size) {
-                    buffer_plan.buffer_size = edge_layer_offset.second + buffer_requirements.buffer_size();
-                }
-                return HAILO_SUCCESS;
-            }
-        }
-    }
-    return HAILO_INVALID_ARGUMENT;
-}
-
-Expected<size_t> InternalBufferPlanner::get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning,
-    const EdgeLayerKey &edge_layer_key)
-{
-    for (auto &buffer_plan : buffer_planning) {
-        auto it = buffer_plan.edge_layer_offsets.begin();
-        while (it != buffer_plan.edge_layer_offsets.end()) {
-            if (it->first == edge_layer_key) {
-                return Expected<size_t>(it->second);
-            }
-            it++;
-        }
-    }
-    return make_unexpected(HAILO_NOT_FOUND);
-}
-
-
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp
index 2483157..b0d639e 100644
--- a/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/internal_buffer_planner.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -17,6 +17,7 @@
 
 #include "hailo/hef.hpp"
 #include "common/utils.hpp"
+#include "hef/core_op_metadata.hpp"
 #include "hef/layer_info.hpp"
 #include "vdma/memory/vdma_buffer.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
@@ -32,26 +33,34 @@ struct EdgeLayerInfo {
     uint16_t max_transfers_in_batch;
     uint16_t start_context;
     uint16_t end_context;
-    bool reuse_buffer;
 };
 
-struct EdgeLayerBuffer {
-    std::shared_ptr<vdma::VdmaBuffer> buffer;
+struct EdgeLayerPlan {
+    EdgeLayerKey key;
     size_t offset;
+    vdma::BufferSizesRequirements buffer_requirements;
 };
 
 struct BufferPlan {
     vdma::VdmaBuffer::Type buffer_type;
     size_t buffer_size;
-    size_t total_edge_layer_size;
-    std::vector<std::pair<EdgeLayerKey, size_t>> edge_layer_offsets;
-    std::map<EdgeLayerKey, EdgeLayerInfo> edge_layer_infos;
+    std::vector<EdgeLayerPlan> edge_layer_plans;
 };
 
 struct BufferPlanReport {
+    // Amount of CMA memory (Physically continous) in bytes needed for execution
     size_t cma_memory;
-    size_t user_memory;
+
+    // Amount of CMA memory (Physically continous) in bytes needed for creating descriptors list.
+    size_t cma_memory_for_descriptors;
+
+    // Amount of pinned memory (Memory pinned to physical memory) in bytes needed for execution
+    size_t pinned_memory;
+
+    // Total size of all edge layers in bytes
     size_t edge_layer_size;
+
+    // How much memory is used compared to the edge layer size
     float memory_utilization_factor;
 };
 
@@ -81,7 +90,13 @@ public:
         INVALID,
     };
 
+    static Expected<std::map<EdgeLayerKey, EdgeLayerInfo>> get_edge_layer_infos(const CoreOpMetadata& core_op,
+        const ConfigureNetworkParams& config_params);
+
     // Planning functions
+    static Expected<InternalBufferPlanning> create_buffer_planning(
+        const CoreOpMetadata& core_op, uint16_t batch_size, Type plan_type, HailoRTDriver::DmaType dma_type,
+        uint16_t max_page_size);
     static Expected<InternalBufferPlanning> create_buffer_planning(
         const std::map<EdgeLayerKey, EdgeLayerInfo> &edge_layer_infos, Type plan_type,
         HailoRTDriver::DmaType dma_type, uint16_t max_page_size, size_t number_of_contexts);
@@ -94,12 +109,6 @@ public:
     // Reporting functions
     static BufferPlanReport report_planning_info(const InternalBufferPlanning &buffer_planning);
 
-    // Debug API
-    static hailo_status change_edge_layer_buffer_offset(InternalBufferPlanning &buffer_planning, const EdgeLayerKey &edge_layer_key,
-        size_t new_offset, uint16_t max_page_size);
-    static Expected<size_t> get_edge_layer_buffer_offset(const InternalBufferPlanning &buffer_planning,
-        const EdgeLayerKey &edge_layer_key);
-
 private:
 
     // Helper functions
@@ -110,8 +119,6 @@ private:
     static Expected<vdma::BufferSizesRequirements> return_buffer_requirements(
         const EdgeLayerInfo &edge_layer, const vdma::VdmaBuffer::Type buffer_type,
         uint16_t max_page_size);
-    static Expected<EdgeLayerInfo> get_edge_info_from_buffer_plan(const InternalBufferPlanning &buffer_planning,
-        const EdgeLayerKey &edge_layer_key);
 
     // Planning phase functions
     static ContextBufferUsageSegments merge_context_buffer_events(
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
index 0785655..74911d7 100644
--- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -139,21 +139,10 @@ Expected<LayerInfo> PeriphCalculator::calculate_periph_registers_impl(const Laye
 }
 
 Expected<LayerInfo> PeriphCalculator::calculate_periph_registers(const LayerInfo &layer_info,
-    const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch,
-    const bool is_core_hw_padding_config_in_dfc)
+    const uint32_t desc_page_size, const HEFHwArch &hw_arch, const bool is_core_hw_padding_config_in_dfc)
 {
     TRY(const auto max_periph_bytes_from_hef, HefConfigurator::max_periph_bytes_value(DeviceBase::hef_arch_to_device_arch(hw_arch)));
     const auto max_periph_bytes = std::min(max_periph_bytes_from_hef, layer_info.max_shmifo_size);
-    // If extension for calculating periph values in hailort is false and core hw padding is not supported - copy values from
-    // Core registers, otherwise calculate them according to shape and other layer information
-    const bool hw_padding_supported = HefConfigurator::is_core_hw_padding_supported(layer_info, max_periph_bytes,
-        is_core_hw_padding_config_in_dfc);
-    if (!is_periph_calculated_in_hailort && !hw_padding_supported) {
-        LayerInfo updated_layer_info = layer_info;
-        updated_layer_info.nn_stream_config.periph_bytes_per_buffer = layer_info.nn_stream_config.core_bytes_per_buffer;
-        updated_layer_info.nn_stream_config.periph_buffers_per_frame = layer_info.nn_stream_config.core_buffers_per_frame;
-        return updated_layer_info;
-    }
 
     if (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == layer_info.format.order) {
         return calculate_nms_periph_registers(layer_info);
@@ -163,5 +152,5 @@ Expected<LayerInfo> PeriphCalculator::calculate_periph_registers(const LayerInfo
     return calculate_periph_registers_impl(layer_info, desc_page_size, max_periph_bytes,
         is_core_hw_padding_config_in_dfc, hw_arch);
 }
-    
+
 } /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
index 4b97adc..6b58399 100644
--- a/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/periph_calculator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -24,15 +24,14 @@ static const uint64_t PERIPH_FRAME_ALIGNMENT = 8;
 class PeriphCalculator {
 public:
     static Expected<LayerInfo> calculate_periph_registers(const LayerInfo &layer_info,
-        const uint32_t desc_page_size, const bool is_periph_calculated_in_hailort, const HEFHwArch &hw_arch,
-        const bool is_core_hw_padding_config_in_dfc);
+        const uint32_t desc_page_size, const HEFHwArch &hw_arch, const bool is_core_hw_padding_config_in_dfc);
 private:
     static bool is_valid_periph_bytes_value(const uint32_t periph_bytes_per_buffer, const uint32_t hw_frame_size,
         const bool is_ddr, const uint32_t max_shmifo_size, const uint32_t desc_page_size,
         const uint32_t max_periph_bytes_value, const uint16_t core_bytes_per_buffer);
     static Expected<LayerInfo> calculate_nms_periph_registers(const LayerInfo &layer_info);
     static Expected<LayerInfo> calculate_periph_registers_impl(const LayerInfo &layer_info,
-        const uint32_t desc_page_size, const uint32_t max_periph_bytes_value, 
+        const uint32_t desc_page_size, const uint32_t max_periph_bytes_value,
         const bool is_core_hw_padding_config_in_dfc, const HEFHwArch &hw_arch);
     static uint32_t calculate_ddr_periph_buffers_per_frame(const LayerInfo &layer_info,
         const uint32_t periph_bytes_per_buffer);
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
index c44f4de..da8d8d0 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.cpp
@@ -1,16 +1,19 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
 #include "hailo/hailort_defaults.hpp"
-
 #include "core_op/resource_manager/resource_manager.hpp"
 #include "vdma/channel/boundary_channel.hpp"
 #include "vdma/memory/buffer_requirements.hpp"
+#include "vdma/memory/vdma_edge_layer.hpp"
 #include "device_common/control.hpp"
 #include "core_op/resource_manager/internal_buffer_manager.hpp"
 #include "common/internal_env_vars.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+#include "vdma/memory/dma_able_buffer.hpp"
+#include "hef/hef_internal.hpp"
 
 #include <numeric>
 
@@ -20,13 +23,21 @@
     After taking into consideration the headers and pointers in it, we limit the max to 75kb (instead of 80kb) */
 #define CONTEXT_SWITCH_CONFIG__MAX_BUFFER_SIZE_WITHOUT_HEADERS (1024 * 75)
 
+#define HW_INFER_CCB_DESC_PAGE_SIZE (512)
+
 namespace hailort
 {
 
+// As a heuristic, we want to make sure NN_CORE_QUEUE_SIZE_IN_BYTES can be buffered in the nn core.
+// It doesn't mean we will actually use all of it since the actual queue size is bounded by
+// [MIN_ACTIVE_TRANSFERS_SCALE*batch_size, MAX_ACTIVE_TRANSFERS_SCALE*batch_size]
+static const constexpr size_t NN_CORE_QUEUE_SIZE_IN_BYTES = 32 * 1024 * 1024; // 32MB
+
 Expected<ContextResources> ContextResources::create(HailoRTDriver &driver,
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, uint16_t context_index,
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type,
     const std::vector<vdma::ChannelId> &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos,
-    std::shared_ptr<InternalBufferManager> internal_buffer_manager)
+    std::shared_ptr<InternalBufferManager> internal_buffer_manager, bool aligned_ccws, std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers,
+    std::shared_ptr<vdma::MappedBuffer> nops_buffer)
 {
     CHECK_AS_EXPECTED(context_type < CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_COUNT, HAILO_INVALID_ARGUMENT);
     CHECK_AS_EXPECTED(config_buffer_infos.size() <= config_channels_ids.size(), HAILO_INTERNAL_FAILURE,
@@ -35,13 +46,21 @@ Expected<ContextResources> ContextResources::create(HailoRTDriver &driver,
 
     std::vector<ConfigBuffer> config_buffers;
     config_buffers.reserve(config_buffer_infos.size());
-    for (uint8_t config_stream_index = 0; config_stream_index < config_buffer_infos.size(); config_stream_index++) {
-        TRY(auto buffer_resource, ConfigBuffer::create(driver, config_channels_ids[config_stream_index],
-            config_buffer_infos.at(config_stream_index).bursts_sizes));
-        config_buffers.emplace_back(std::move(buffer_resource));
-
-        internal_buffer_manager->add_config_buffer_info(context_index, config_stream_index,
-            config_buffer_infos.at(config_stream_index).bursts_sizes);
+    if (aligned_ccws) {
+        // In case of alligned ccws - we will also use the mapped buffer of the ccws_section + the nops buffer
+        // Also - we will program the descriptors right after creating the descriptor list
+        for (uint8_t config_stream_index = 0; config_stream_index < config_buffer_infos.size(); config_stream_index++) {
+            TRY(auto buffer_resource, ConfigBuffer::create_for_aligned_ccws(driver, config_channels_ids[config_stream_index],
+                config_buffer_infos.at(config_stream_index), mapped_buffers, nops_buffer));
+            config_buffers.emplace_back(std::move(buffer_resource));
+        }
+    } else {
+        // In the other case (no alligned ccws) - we will only create the config buffer (programming the descriptors will be done later) 
+        for (uint8_t config_stream_index = 0; config_stream_index < config_buffer_infos.size(); config_stream_index++) {
+            TRY(auto buffer_resource, ConfigBuffer::create_with_copy_descriptors(driver, config_channels_ids[config_stream_index],
+                config_buffer_infos.at(config_stream_index)));
+            config_buffers.emplace_back(std::move(buffer_resource));
+        }
     }
 
     return ContextResources(driver, context_type, std::move(config_buffers), internal_buffer_manager);
@@ -242,11 +261,11 @@ Expected<ResourcesManager> ResourcesManager::create(VdmaDevice &vdma_device, Hai
         const auto layer_identifier = std::make_tuple(LayerType::CFG, HAILO_H2D_STREAM, "", cfg_index);
         const auto engine_index = config_channels_info[cfg_index].engine_index;
         TRY(const auto channel_id,
-            allocator.get_available_channel_id(layer_identifier, HailoRTDriver::DmaDirection::H2D, engine_index));
+            allocator.get_available_channel_id(layer_identifier, HailoRTDriver::DmaDirection::H2D, engine_index, false));
         config_channels_ids.push_back(channel_id);
     }
 
-    TRY(auto internal_buffer_manager, InternalBufferManager::create(driver, config_params));
+    TRY(auto internal_buffer_manager, InternalBufferManager::create(driver));
     TRY(auto action_list_buffer_builder, ActionListBufferBuilder::create());
     TRY(auto latency_meters, create_latency_meters_from_config_params(config_params, core_op_metadata));
     auto network_index_map = core_op_metadata->get_network_names();
@@ -282,9 +301,11 @@ ResourcesManager::ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &drive
     m_is_configured(false),
     m_is_activated(false),
     m_config_channels_ids(std::move(config_channels_ids)),
-    m_hw_only_boundary_buffers(),
+    m_hw_only_desc_boundary_buffers(),
+    m_hw_only_ccb_boundary_buffers(),
     m_internal_buffer_manager(std::move(internal_buffer_manager)),
-    m_action_list_buffer_builder(std::move(action_list_buffer_builder))
+    m_action_list_buffer_builder(std::move(action_list_buffer_builder)),
+    m_hw_infer_channels_info()
 {}
 
 ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept :
@@ -305,9 +326,11 @@ ResourcesManager::ResourcesManager(ResourcesManager &&other) noexcept :
     m_is_configured(std::exchange(other.m_is_configured, false)),
     m_is_activated(std::exchange(other.m_is_activated, false)),
     m_config_channels_ids(std::move(other.m_config_channels_ids)),
-    m_hw_only_boundary_buffers(std::move(other.m_hw_only_boundary_buffers)),
+    m_hw_only_desc_boundary_buffers(std::move(other.m_hw_only_desc_boundary_buffers)),
+    m_hw_only_ccb_boundary_buffers(std::move(other.m_hw_only_ccb_boundary_buffers)),
     m_internal_buffer_manager(std::move(other.m_internal_buffer_manager)),
-    m_action_list_buffer_builder(std::move(other.m_action_list_buffer_builder))
+    m_action_list_buffer_builder(std::move(other.m_action_list_buffer_builder)),
+    m_hw_infer_channels_info(std::move(other.m_hw_infer_channels_info))
 {}
 
 hailo_status ResourcesManager::fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header)
@@ -342,7 +365,7 @@ hailo_status ResourcesManager::fill_network_batch_size(CONTROL_PROTOCOL__applica
             auto const network_name_from_map = m_network_index_map[network_index];
             if (network_name_from_map == network_name_from_params) {
                 TRY(const auto batch_size, get_network_batch_size(network_name_from_params));
-                app_header.batch_size[network_index] = batch_size;
+                app_header.batch_size = batch_size;
                 break;
             }
         }
@@ -355,11 +378,20 @@ hailo_status ResourcesManager::fill_network_batch_size(CONTROL_PROTOCOL__applica
     return HAILO_SUCCESS;
 }
 
-hailo_status ResourcesManager::fill_csm_buffer_size(CONTROL_PROTOCOL__application_header_t &app_header)
+uint16_t ResourcesManager::get_csm_buffer_size()
 {
     // All config buffers on the same platform will have the same desc_page_size - because it is derived from the host
-    app_header.csm_buffer_size = std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_SG_PAGE_SIZE);
-    return HAILO_SUCCESS;
+    return std::min(m_driver.desc_max_page_size(), vdma::DEFAULT_SG_PAGE_SIZE);
+}
+
+void ResourcesManager::fill_config_channel_info(CONTROL_PROTOCOL__application_header_t &app_header)
+{
+    app_header.config_channels_count = static_cast<uint8_t>(m_config_channels_ids.size());
+    for (uint8_t config_channel_index = 0; config_channel_index < m_config_channels_ids.size(); config_channel_index++) {
+        CONTEXT_SWITCH_DEFS__PACKED_VDMA_CHANNEL_ID__SET(app_header.config_channel_info[config_channel_index].packed_vdma_channel_id,
+        m_config_channels_ids.at(config_channel_index).engine_index,
+        m_config_channels_ids.at(config_channel_index).channel_index);
+    }
 }
 
 Expected<uint16_t> ResourcesManager::get_batch_size() const
@@ -375,51 +407,38 @@ Expected<uint16_t> ResourcesManager::get_batch_size() const
                 "The same batch size must be applied to all networks inside the network group");
         }
     }
-    return batch_size;
+    return batch_size == HAILO_DEFAULT_BATCH_SIZE ? 1 : batch_size;
 
 }
 
-std::pair<size_t, size_t> ResourcesManager::calculate_transfer_queue_sizes(const vdma::DescriptorList &desc_list,
-    uint32_t transfer_size, size_t max_active_trans, bool use_latency_meter)
+Expected<size_t> ResourcesManager::calc_default_queue_size(const LayerInfo &layer_info, uint16_t batch_size)
 {
-    // Calculate m_ongoing_transfers capacity - transfers that are already bound to the descriptor list
-    // Add desc for boundary channel because might need extra for non aligned async API
-    // We don't use get_max_aligned_transfers_in_desc_list because we want to include the option of a bounce buffer
-    static const auto INCLUDE_BOUNCE_BUFFER = true;
-    const size_t max_transfers_in_desc_list = desc_list.max_transfers(transfer_size, INCLUDE_BOUNCE_BUFFER);
+    const size_t transfers_per_frame = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP) ?
+        LayerInfoUtils::get_nms_layer_max_transfers_per_frame(layer_info) : 1;
+    const size_t transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
+    const size_t bytes_per_transfer = transfers_per_frame * transfer_size;
 
-    // Max capacity due to driver constraints (see HAILO_VDMA_MAX_ONGOING_TRANSFERS)
-    const size_t max_ongoing_transfers_capacity = (use_latency_meter ?
-        (ONGOING_TRANSFERS_SIZE / 2) : ONGOING_TRANSFERS_SIZE) - 1;
+    // The minimum size of the queue is to have at least MIN_ACTIVE_TRANSFERS_SCALE * batch_size transfers
+    const size_t min_active_transfers = (transfers_per_frame * MIN_ACTIVE_TRANSFERS_SCALE * batch_size);
 
-    const auto ongoing_transfers = std::min(max_transfers_in_desc_list, max_ongoing_transfers_capacity);
+    // The maximum size of the queue is to have at most MAX_ACTIVE_TRANSFERS_SCALE (notices that the batch is not
+    // included here since otherwise the queue will be too big).
+    const size_t max_active_transfers = std::max(min_active_transfers, (transfers_per_frame * MAX_ACTIVE_TRANSFERS_SCALE));
 
-    // We want to allow max_active_trans transfers in m_pending_transfers
-    // * If the transfers can all fit in m_ongoing_transfers, we don't need to use m_pending_transfers so we set it
-    //   to 0. In this case, all transfers will be handled via m_ongoing_transfers and each time launch_transfer is
-    //   called, the transfer will be launched immediately.
-    // * Otherwise, we set it to max_active_trans. In this case, we will use m_pending_transfers to queue up
-    //   transfers that can't fit in m_ongoing_transfers. We will then launch them as soon as there is room in
-    //   m_ongoing_transfers, via the transfer launcher.
-    const auto pending_transfers = (max_active_trans > ongoing_transfers) ? max_active_trans : 0;
-
-    return std::make_pair(ongoing_transfers, pending_transfers);
+    // Amount of transfers optimal for the nn core queue size. We clamp it with min/max to make sure the boundaries are
+    // good for us (enough frames can be queued but not too many)
+    const size_t nn_core_queue_size = NN_CORE_QUEUE_SIZE_IN_BYTES / bytes_per_transfer;
+    return clamp(nn_core_queue_size, min_active_transfers, max_active_transfers);
 }
 
-hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &layer_info)
+hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &layer_info, bool use_enhanced_channel)
 {
-    // TODO: put in layer info
     const auto channel_direction = layer_info.direction == HAILO_H2D_STREAM ? HailoRTDriver::DmaDirection::H2D :
                                                                               HailoRTDriver::DmaDirection::D2H;
-    TRY(const auto channel_id, get_available_channel_id(to_layer_identifier(layer_info),
-        channel_direction, layer_info.dma_engine_index));
+    TRY(const auto channel_id, get_available_channel_id(to_layer_identifier(layer_info), channel_direction,
+        layer_info.dma_engine_index, use_enhanced_channel &&(HAILO_D2H_STREAM == layer_info.direction)));
     TRY(const auto network_batch_size, get_network_batch_size(layer_info.network_name));
 
-    const auto transfers_per_frame = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP) ?
-        LayerInfoUtils::get_nms_layer_max_transfers_per_frame(layer_info) : 1;
-
-    const auto max_active_transfers_scale = (transfers_per_frame * MAX_ACTIVE_TRANSFERS_SCALE);
-
     TRY(const auto device_arch, m_vdma_device.get_architecture());
     /* Add error in configure phase for invalid NMS parameters */
     if ((layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP) && (HailoRTCommon::is_hailo1x_device_type(device_arch))) {
@@ -428,34 +447,25 @@ hailo_status ResourcesManager::create_boundary_vdma_channel(const LayerInfo &lay
             layer_info.nms_info.number_of_classes, layer_info.nms_info.chunks_per_frame, network_batch_size, HAILO15H_NMS_MAX_CLASSES);
     }
 
-    const auto min_active_trans = MIN_ACTIVE_TRANSFERS_SCALE * network_batch_size;
-    const auto max_active_trans = (layer_info.format.order == HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP) ?
-        /* NMS Case - Value be be higher than UINT16_MAX. in this case we only limit to UART16_MAX with no error */
-        std::min(static_cast<size_t>(UINT16_MAX), max_active_transfers_scale * network_batch_size) :
-        max_active_transfers_scale * network_batch_size;
-
-    CHECK(IS_FIT_IN_UINT16(min_active_trans), HAILO_INVALID_ARGUMENT,
-        "calculated min_active_trans for vdma descriptor list is out of UINT16 range");
-    CHECK(IS_FIT_IN_UINT16(max_active_trans), HAILO_INVALID_ARGUMENT,
+    TRY(const auto queue_size, calc_default_queue_size(layer_info, network_batch_size));
+    CHECK(IS_FIT_IN_UINT16(queue_size), HAILO_INVALID_ARGUMENT,
         "calculated min_active_trans for vdma descriptor list is out of UINT16 range");
 
     const auto transfer_size = LayerInfoUtils::get_layer_transfer_size(layer_info);
     TRY(auto buffer_requirements, vdma::BufferSizesRequirements::get_buffer_requirements_for_boundary_channels(m_driver,
-        layer_info.max_shmifo_size, static_cast<uint16_t>(min_active_trans), static_cast<uint16_t>(max_active_trans),
+        layer_info.max_shmifo_size, static_cast<uint16_t>(MIN_ACTIVE_TRANSFERS_SCALE), static_cast<uint16_t>(queue_size),
         transfer_size));
 
+    // TODO HRT-16020: Try not to allocate descriptors in ccb boundary hw infer
     const bool CIRCULAR = true;
     TRY(auto desc_list, vdma::DescriptorList::create(buffer_requirements.descs_count(), buffer_requirements.desc_page_size(),
         CIRCULAR, m_driver));
 
     auto latency_meter = (contains(m_latency_meters, layer_info.network_name)) ? m_latency_meters.at(layer_info.network_name) : nullptr;
-    size_t pending_transfers = 0, ongoing_transfers = 0;
-    std::tie(ongoing_transfers, pending_transfers) = calculate_transfer_queue_sizes(desc_list, transfer_size,
-        max_active_trans, (latency_meter != nullptr));
 
     TRY(auto vdma_transfer_launcher, m_vdma_device.get_vdma_transfer_launcher());
     TRY(auto channel, vdma::BoundaryChannel::create(m_driver, channel_id, channel_direction, std::move(desc_list),
-        vdma_transfer_launcher.get(), ongoing_transfers, pending_transfers, false, layer_info.name, latency_meter));
+        vdma_transfer_launcher.get(), queue_size, false, layer_info.name, latency_meter));
 
     m_boundary_channels.add_channel(std::move(channel));
     return HAILO_SUCCESS;
@@ -476,38 +486,59 @@ hailo_power_mode_t ResourcesManager::get_power_mode() const
     return m_config_params.power_mode;
 }
 
-ExpectedRef<IntermediateBuffer> ResourcesManager::create_intermediate_buffer(
+ExpectedRef<vdma::VdmaEdgeLayer> ResourcesManager::create_intermediate_edge_layer(
     uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index,
-    vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type)
+    vdma::ChannelId d2h_channel_id, LayerType layer_type)
 {
     auto edge_layer_key = std::make_pair(src_context_index, src_stream_index);
     TRY(auto buffer_info, m_internal_buffer_manager->get_intermediate_buffer(edge_layer_key));
 
-    TRY(auto intermediate_buffer, IntermediateBuffer::create(m_driver, transfer_size, batch_size, d2h_channel_id,
-        streaming_type, buffer_info.buffer, buffer_info.offset));
+    CHECK((layer_type == LayerType::DDR) || (layer_type == LayerType::INTER_CONTEXT), HAILO_INTERNAL_FAILURE,
+        "Invalid layer type for intermediate buffer got {}", static_cast<int>(layer_type));
+
+    const bool is_circular = (layer_type == LayerType::DDR);
+    TRY(auto edge_layer, vdma::VdmaEdgeLayer::create(m_driver, buffer_info.buffer, buffer_info.edge_layer_plan.offset,
+        buffer_info.edge_layer_plan.buffer_requirements.buffer_size(),
+        buffer_info.edge_layer_plan.buffer_requirements.desc_page_size(),
+        buffer_info.edge_layer_plan.buffer_requirements.descs_count(),
+        is_circular, d2h_channel_id));
+
+    if (LayerType::INTER_CONTEXT == layer_type) {
+        // We have max_batch_size transfers, so we program them one by one. The last transfer should report interrupt
+        // to the device.
+        TRY(const auto desc_count_local, edge_layer->program_descriptors(transfer_size, InterruptsDomain::DEVICE, 0, 0,
+            batch_size));
+        (void)desc_count_local;
+    } else {
+        // Program all descriptors, no need for interrupt.
+        const auto interrupts_domain = InterruptsDomain::NONE;
+        const auto total_size = edge_layer->descs_count() * edge_layer->desc_page_size();
+        TRY(const auto desc_count_local, edge_layer->program_descriptors(total_size, interrupts_domain, 0));
+        (void)desc_count_local;
+    }
 
     const auto key = std::make_pair(src_context_index, src_stream_index);
-    auto emplace_res = m_intermediate_buffers.emplace(key, std::move(intermediate_buffer));
-    return std::ref(emplace_res.first->second);
+    auto emplace_res = m_intermediate_buffers.emplace(key, std::move(edge_layer));
+    return std::ref(*emplace_res.first->second);
 }
 
-ExpectedRef<IntermediateBuffer> ResourcesManager::get_intermediate_buffer(const IntermediateBufferKey &key)
+ExpectedRef<vdma::VdmaEdgeLayer> ResourcesManager::get_intermediate_edge_layer(const IntermediateBufferKey &key)
 {
     auto buffer_it = m_intermediate_buffers.find(key);
     if (std::end(m_intermediate_buffers) == buffer_it) {
         return make_unexpected(HAILO_NOT_FOUND);
     }
 
-    return std::ref(buffer_it->second);
+    return std::ref(*buffer_it->second);
 }
 
-ExpectedRef<IntermediateBuffer> ResourcesManager::set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
+ExpectedRef<CacheBuffer> ResourcesManager::set_cache_input_channel(uint32_t cache_id, uint16_t batch_size,
     vdma::ChannelId channel_id)
 {
     return m_cache_manager->set_cache_input_channel(m_core_op_metadata->core_op_name(), cache_id, batch_size, channel_id);
 }
 
-ExpectedRef<IntermediateBuffer> ResourcesManager::set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
+ExpectedRef<CacheBuffer> ResourcesManager::set_cache_output_channel(uint32_t cache_id, uint16_t batch_size,
     vdma::ChannelId channel_id)
 {
     return m_cache_manager->set_cache_output_channel(m_core_op_metadata->core_op_name(), cache_id, batch_size, channel_id);
@@ -529,22 +560,22 @@ Expected<CONTROL_PROTOCOL__application_header_t> ResourcesManager::get_control_c
     CHECK_SUCCESS_AS_EXPECTED(status, "Invalid validation features");
     status = fill_network_batch_size(app_header);
     CHECK_SUCCESS_AS_EXPECTED(status, "Invalid network batch sizes");
-    status = fill_csm_buffer_size(app_header);
-    CHECK_SUCCESS_AS_EXPECTED(status, "Invalid csm buffer size");
-
+    const uint16_t csm_buffer_size = get_csm_buffer_size();
+    app_header.csm_buffer_size = csm_buffer_size;
+    fill_config_channel_info(app_header);
     app_header.external_action_list_address = CONTEXT_SWITCH_DEFS__INVALID_DDR_CONTEXTS_BUFFER_ADDRESS;
-
     return app_header;
 }
 
 Expected<std::reference_wrapper<ContextResources>> ResourcesManager::add_new_context(
-    CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index,
-    const ConfigBufferInfoMap &config_info)
+    CONTROL_PROTOCOL__context_switch_context_type_t context_type,
+    bool is_aligned_ccws_on, const ConfigBufferInfoMap &config_info)
 {
     CHECK_AS_EXPECTED(m_total_context_count < std::numeric_limits<uint16_t>::max(), HAILO_INVALID_CONTEXT_COUNT);
 
-    TRY(auto context_resources, ContextResources::create(m_driver, context_type, context_index,
-        m_config_channels_ids, config_info, m_internal_buffer_manager));
+    TRY(auto context_resources, ContextResources::create(m_driver, context_type,
+        m_config_channels_ids, config_info, m_internal_buffer_manager, is_aligned_ccws_on, m_ccws_section_mapped_buffers,
+        m_nops_mapped_buffer));
     m_contexts_resources.emplace_back(std::move(context_resources));
     m_total_context_count++;
     if (CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC == context_type) {
@@ -555,14 +586,14 @@ Expected<std::reference_wrapper<ContextResources>> ResourcesManager::add_new_con
 }
 
 Expected<vdma::ChannelId> ResourcesManager::get_available_channel_id(const LayerIdentifier &layer_identifier,
-    HailoRTDriver::DmaDirection direction, uint8_t engine_index)
+    HailoRTDriver::DmaDirection direction, uint8_t engine_index, bool use_enhanced_channel)
 {
     if (m_driver.dma_type() == HailoRTDriver::DmaType::PCIE) {
         // On PCIe we have only 1 engine. To support the same HEF with both PCIe and DRAM, we use default engine here
         engine_index = vdma::DEFAULT_ENGINE_INDEX;
     }
 
-    return m_channel_allocator.get_available_channel_id(layer_identifier, direction, engine_index);
+    return m_channel_allocator.get_available_channel_id(layer_identifier, direction, engine_index, use_enhanced_channel);
 }
 
 hailo_status ResourcesManager::free_channel_index(const LayerIdentifier &layer_identifier)
@@ -577,20 +608,7 @@ Expected<hailo_stream_interface_t> ResourcesManager::get_default_streams_interfa
 
 Expected<uint16_t> ResourcesManager::get_network_batch_size(const std::string &network_name) const
 {
-    for (auto const &network_map : m_config_params.network_params_by_name) {
-        auto const network_name_from_params = network_map.first;
-        if (network_name_from_params == network_name) {
-            auto actual_batch_size = network_map.second.batch_size;
-            if (HAILO_DEFAULT_BATCH_SIZE == actual_batch_size) {
-                actual_batch_size = DEFAULT_ACTUAL_BATCH_SIZE;
-            }
-            return actual_batch_size;
-        }
-    }
-
-    LOGGER__ERROR("Failed to find network with network name {}", network_name);
-
-    return make_unexpected(HAILO_NOT_FOUND);
+    return hailort::get_network_batch_size(m_config_params, network_name);
 }
 
 Expected<Buffer> ResourcesManager::read_intermediate_buffer(const IntermediateBufferKey &key)
@@ -599,7 +617,10 @@ Expected<Buffer> ResourcesManager::read_intermediate_buffer(const IntermediateBu
     CHECK_AS_EXPECTED(std::end(m_intermediate_buffers) != intermediate_buffer_it,
         HAILO_NOT_FOUND, "Failed to find intermediate buffer for src_context {}, src_stream_index {}", key.first,
         key.second);
-    return intermediate_buffer_it->second.read();
+
+    TRY(auto buffer, Buffer::create(intermediate_buffer_it->second->size()));
+    CHECK_SUCCESS(intermediate_buffer_it->second->read(buffer.data(), buffer.size(), 0));
+    return buffer;
 }
 
 Expected<Buffer> ResourcesManager::read_cache_buffer(uint32_t cache_id)
@@ -715,6 +736,25 @@ hailo_status ResourcesManager::stop_vdma_transfer_launcher()
     return HAILO_SUCCESS;
 }
 
+Expected<CONTROL_PROTOCOL__host_buffer_info_t> ResourcesManager::get_boundary_buffer_info(vdma::BoundaryChannel &channel,
+    uint32_t transfer_size)
+{
+    if (CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL == get_hw_infer_boundary_channel_mode()) {
+        auto &desc_list = channel.get_desc_list();
+        return vdma::VdmaEdgeLayer::get_host_buffer_info(vdma::VdmaEdgeLayer::Type::SCATTER_GATHER, desc_list.dma_address(),
+            desc_list.desc_page_size(), desc_list.count(), transfer_size);
+    } else {
+        CHECK(m_hw_only_ccb_boundary_buffers.end() != m_hw_only_ccb_boundary_buffers.find(channel.get_channel_id()),
+            HAILO_INTERNAL_FAILURE, "Error could not find channel info for channel {}", channel.get_channel_id());
+        const auto &ccb_boundary_channel_buffer = m_hw_only_ccb_boundary_buffers[channel.get_channel_id()];
+        const uint32_t desc_count = static_cast<uint32_t>(DIV_ROUND_UP(ccb_boundary_channel_buffer->size(),
+            HW_INFER_CCB_DESC_PAGE_SIZE));
+        return vdma::VdmaEdgeLayer::get_host_buffer_info(vdma::VdmaEdgeLayer::Type::CONTINUOUS,
+            ccb_boundary_channel_buffer->dma_address(), HW_INFER_CCB_DESC_PAGE_SIZE, desc_count,
+            static_cast<uint32_t>(ccb_boundary_channel_buffer->size()));
+    }
+}
+
 Expected<uint16_t> ResourcesManager::program_desc_for_hw_only_flow(vdma::DescriptorList &desc_list,
     vdma::MappedBuffer &mapped_buffer, vdma::ChannelId channel_id,
     const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count)
@@ -726,7 +766,7 @@ Expected<uint16_t> ResourcesManager::program_desc_for_hw_only_flow(vdma::Descrip
                 InterruptsDomain::DEVICE : InterruptsDomain::NONE;
             const bool should_bind = false;
             CHECK_SUCCESS(desc_list.program(mapped_buffer, single_transfer_size,
-                (acc_desc_offset * desc_list.desc_page_size()), channel_id, static_cast<uint32_t>(acc_desc_offset),
+                (acc_desc_offset * desc_list.desc_page_size()), channel_id, static_cast<uint32_t>(acc_desc_offset), 1,
                 should_bind, last_desc_interrupts_domain));
             acc_desc_offset += desc_list.descriptors_in_buffer(single_transfer_size);
         }
@@ -736,46 +776,113 @@ Expected<uint16_t> ResourcesManager::program_desc_for_hw_only_flow(vdma::Descrip
     return static_cast<uint16_t>(acc_desc_offset);
 }
 
-Expected<std::pair<vdma::ChannelId, uint16_t>> ResourcesManager::create_mapped_buffer_for_hw_only_infer(
+hailo_status ResourcesManager::allocate_mapped_buffer_for_hw_only_infer(
     vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction,
-    const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count)
+    const uint32_t single_transfer_size, uint16_t batch_size, uint16_t batch_count)
 {
-    const auto total_frames_per_run = dynamic_batch_size * batch_count;
+    const auto total_frames_per_run = batch_size * batch_count;
 
-    auto &desc_list = boundary_channel_ptr->get_desc_list();
-    const auto descs_per_transfer = desc_list.descriptors_in_buffer(single_transfer_size);
-    const auto total_desc_count = total_frames_per_run * descs_per_transfer;
+    switch (get_hw_infer_boundary_channel_mode()) {
+        case CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL:
+        {
+            auto &desc_list = boundary_channel_ptr->get_desc_list();
+            const auto descs_per_transfer = desc_list.descriptors_in_buffer(single_transfer_size);
+            const auto total_desc_count = total_frames_per_run * descs_per_transfer;
 
-    CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
-        "calculated total_desc_count for vdma descriptor list is out of UINT16 range");
+            CHECK(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
+                "calculated total_desc_count for vdma descriptor list is out of UINT16 range");
+            
+            TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared_by_allocation(
+                total_desc_count * desc_list.desc_page_size(), m_driver, direction));
+            m_hw_only_desc_boundary_buffers[boundary_channel_ptr->get_channel_id()] = std::move(mapped_buffer);
+            break;
+        }
+        case CONTROL_PROTOCOL__CCB_BOUNDARY_CHANNEL:
+        {
+            // Divide with round up
+            const auto descs_per_transfer = DIV_ROUND_UP(single_transfer_size, HW_INFER_CCB_DESC_PAGE_SIZE);
+            const auto total_desc_count = total_frames_per_run * descs_per_transfer;
+            CHECK(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
+                "calculated total_desc_count for ccb buffer is out of UINT16 range");
 
-    TRY(auto mapped_buffer, vdma::MappedBuffer::create_shared_by_allocation(
-        total_desc_count * desc_list.desc_page_size(), m_driver, direction));
-    m_hw_only_boundary_buffers.emplace_back(std::move(mapped_buffer));
+            // Use ccb buffer for boundary layer hw only infer instead of normal buffer
+            TRY(auto ccb_bufer, vdma::ContinuousBuffer::create(total_desc_count * HW_INFER_CCB_DESC_PAGE_SIZE, m_driver));
+            auto buffer_ptr = make_shared_nothrow<vdma::ContinuousBuffer>(std::move(ccb_bufer));
+            CHECK(nullptr != buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    static const auto DEFAULT_BUFFER_OFFSET = 0;
-    auto status = desc_list.program(*m_hw_only_boundary_buffers.back(),
-        m_hw_only_boundary_buffers.back()->size(), DEFAULT_BUFFER_OFFSET, boundary_channel_ptr->get_channel_id());
-    CHECK_SUCCESS_AS_EXPECTED(status);
+            m_hw_only_ccb_boundary_buffers[boundary_channel_ptr->get_channel_id()] = std::move(buffer_ptr);
+            break;
+        }
+        default:
+        {
+            LOGGER__ERROR("Invalid boundary channel mode {}", static_cast<uint8_t>(get_hw_infer_boundary_channel_mode()));
+            return HAILO_INVALID_ARGUMENT;
+        }
+    }
 
-    TRY(auto desc_programed,
-        program_desc_for_hw_only_flow(desc_list, *m_hw_only_boundary_buffers.back(), boundary_channel_ptr->get_channel_id(), single_transfer_size, dynamic_batch_size, batch_count));
-    assert(static_cast<uint16_t>(total_desc_count) == desc_programed);
+    return HAILO_SUCCESS;
+}
 
-    auto channel_info_pair = std::make_pair(boundary_channel_ptr->get_channel_id(), desc_programed);
+hailo_status ResourcesManager::configure_mapped_buffer_for_hw_only_infer(vdma::BoundaryChannelPtr boundary_channel_ptr,
+    const uint32_t single_transfer_size, uint16_t batch_size, uint16_t batch_count,
+    CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info)
+{
+    std::pair<vdma::ChannelId, uint16_t> channel_info_pair;
 
-    return channel_info_pair;
+    switch (get_hw_infer_boundary_channel_mode()) {
+        case CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL:
+        {
+            auto &desc_list = boundary_channel_ptr->get_desc_list();
+            CHECK(m_hw_only_desc_boundary_buffers.end() != m_hw_only_desc_boundary_buffers.find(
+                boundary_channel_ptr->get_channel_id()), HAILO_INTERNAL_FAILURE);
+            auto hw_infer_mapped_buffer = m_hw_only_desc_boundary_buffers[boundary_channel_ptr->get_channel_id()];
+
+            static const auto DEFAULT_BUFFER_OFFSET = 0;
+            auto status = desc_list.program(*hw_infer_mapped_buffer, hw_infer_mapped_buffer->size(),
+                DEFAULT_BUFFER_OFFSET, boundary_channel_ptr->get_channel_id());
+            CHECK_SUCCESS(status);
+
+            TRY(auto desc_programed, program_desc_for_hw_only_flow(desc_list, *hw_infer_mapped_buffer,
+                boundary_channel_ptr->get_channel_id(), single_transfer_size, batch_size, batch_count));
+            assert(static_cast<uint16_t>(desc_programed == DIV_ROUND_UP(hw_infer_mapped_buffer->size(),
+                desc_list.desc_page_size())));
+
+            channel_info_pair = std::make_pair(boundary_channel_ptr->get_channel_id(), desc_programed);
+            break;
+        }
+        case CONTROL_PROTOCOL__CCB_BOUNDARY_CHANNEL:
+        {
+            // Divide with round up
+            const auto descs_per_transfer = DIV_ROUND_UP(single_transfer_size, HW_INFER_CCB_DESC_PAGE_SIZE);
+            const auto total_desc_count = batch_size * batch_count * descs_per_transfer;
+
+            CHECK(IS_FIT_IN_UINT16(total_desc_count), HAILO_INVALID_ARGUMENT,
+                "calculated total_desc_count for ccb buffer is out of UINT16 range");
+
+            channel_info_pair = std::make_pair(boundary_channel_ptr->get_channel_id(),
+                static_cast<uint16_t>(total_desc_count));
+            break;
+        }
+        default:
+        {
+            LOGGER__ERROR("Invalid boundary channel mode {}", static_cast<uint8_t>(get_hw_infer_boundary_channel_mode()));
+            return HAILO_INVALID_ARGUMENT;
+        }
+    }
+
+    add_channel_to_hw_infer_channel_info(std::move(channel_info_pair), channels_info);
+    return HAILO_SUCCESS;
 }
 
 void ResourcesManager::add_channel_to_hw_infer_channel_info(std::pair<vdma::ChannelId, uint16_t> channel_info,
     CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info)
 {
-    auto next_chnanel_info = &channels_info.channel_info[channels_info.channel_count];
+    auto next_channel_info = &channels_info.channel_info[channels_info.channel_count];
     assert(channels_info.channel_count < CONTROL_PROTOCOL__MAX_TOTAL_CHANNEL_COUNT);
 
-    next_chnanel_info->engine_index = channel_info.first.engine_index;
-    next_chnanel_info->channel_index = channel_info.first.channel_index;
-    next_chnanel_info->desc_programed = channel_info.second;
+    next_channel_info->engine_index = channel_info.first.engine_index;
+    next_channel_info->channel_index = channel_info.first.channel_index;
+    next_channel_info->desc_programed = channel_info.second;
 
     channels_info.channel_count++;
 }
@@ -800,17 +907,82 @@ hailo_status ResourcesManager::set_hw_infer_done_notification(std::condition_var
     return HAILO_SUCCESS;
 }
 
+hailo_status ResourcesManager::configure_boundary_channels_for_hw_infer(uint16_t batch_size, uint16_t batch_count)
+{
+    hailo_status status = HAILO_UNINITIALIZED;
+    // Function should only be called in hw infer mode
+    CHECK(is_env_variable_on(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR), HAILO_INTERNAL_FAILURE,
+        "Error called hw infer without env variable for hw infer set - please set environment variable and try again");
+        
+    CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info = {};
+    channels_info.channel_count = 0;
+    for (const auto &metadata_layer_info : m_core_op_metadata->get_all_layer_infos()) {
+        const std::vector<LayerInfo> &layers = metadata_layer_info.is_multi_planar ? metadata_layer_info.planes :
+            std::vector<LayerInfo>{metadata_layer_info};
+        for (const auto &layer_info : layers) {
+            const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
+            for (auto &stream_info : stream_infos) {
+                auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == stream_info.format.order) ?
+                    stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
+                TRY(auto boundary_channel_ptr, get_boundary_vdma_channel_by_stream_name(layer_info.name));
+                status = configure_mapped_buffer_for_hw_only_infer(boundary_channel_ptr, single_transfer_size, batch_size,
+                    batch_count, channels_info);
+                CHECK_SUCCESS(status);
+            }
+        }
+    }
+
+    m_hw_infer_channels_info = channels_info;
+    return HAILO_SUCCESS;
+}
+
+hailo_status ResourcesManager::allocate_boundary_channels_buffers_hw_infer()
+{
+    hailo_status status = HAILO_UNINITIALIZED;
+    // Function should only be called in hw infer mode
+    CHECK(is_env_variable_on(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR), HAILO_INTERNAL_FAILURE,
+        "Error called hw infer without env variable for hw infer set - please set environment variable and try again");
+
+    TRY(const auto batch_size, get_batch_size());
+    TRY(const auto batch_count, calc_hw_infer_batch_count(batch_size));
+
+    for (const auto &metadata_layer_info : m_core_op_metadata->get_all_layer_infos()) {
+        const std::vector<LayerInfo> &layers = metadata_layer_info.is_multi_planar ? metadata_layer_info.planes :
+            std::vector<LayerInfo>{metadata_layer_info};
+        for (const auto &layer_info : layers) {
+            const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
+            for (auto &stream_info : stream_infos) {
+                auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == stream_info.format.order) ?
+                    stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
+                TRY(auto boundary_channel_ptr, get_boundary_vdma_channel_by_stream_name(layer_info.name));
+                const auto direction = (layer_info.direction == HAILO_H2D_STREAM) ?
+                    HailoRTDriver::DmaDirection::H2D : HailoRTDriver::DmaDirection::D2H;
+                status = allocate_mapped_buffer_for_hw_only_infer(boundary_channel_ptr, direction,
+                    single_transfer_size, batch_size, batch_count);
+                CHECK_SUCCESS(status);
+            }
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
 Expected<uint16_t> ResourcesManager::calc_hw_infer_batch_count(uint16_t dynamic_batch_size)
 {
     uint16_t batch_count = UINT16_MAX;
-    for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) {
-        const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
-        for (auto &stream_info : stream_infos) {
-            uint32_t single_transfer_size = LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info);
-            TRY(auto boundary_channel_ptr, get_boundary_vdma_channel_by_stream_name(layer_info.name));
-            const auto max_batch_transfers = boundary_channel_ptr->get_desc_list().max_transfers(single_transfer_size * dynamic_batch_size);
-            // infer batch count is the lowest number of "Max transfers" per descriptor list that for all given boundary channels.
-            batch_count = MIN(batch_count, max_batch_transfers);
+
+    for (const auto &metadata_layer_info : m_core_op_metadata->get_all_layer_infos()) {
+        const std::vector<LayerInfo> &layers = metadata_layer_info.is_multi_planar ? metadata_layer_info.planes :
+            std::vector<LayerInfo>{metadata_layer_info};
+        for (const auto &layer_info : layers) {
+            const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
+            for (auto &stream_info : stream_infos) {
+                uint32_t single_transfer_size = LayerInfoUtils::get_stream_transfer_size(stream_info, layer_info);
+                TRY(auto boundary_channel_ptr, get_boundary_vdma_channel_by_stream_name(layer_info.name));
+                const auto max_batch_transfers = boundary_channel_ptr->get_desc_list().max_transfers(single_transfer_size * dynamic_batch_size);
+                // infer batch count is the lowest number of "Max transfers" per descriptor list that for all given boundary channels.
+                batch_count = MIN(batch_count, max_batch_transfers);
+            }
         }
     }
     return batch_count;
@@ -845,27 +1017,13 @@ HwInferResults ResourcesManager::hw_infer_calc_stats(uint16_t batch_count, uint1
 Expected<HwInferResults> ResourcesManager::run_hw_only_infer()
 {
     CONTROL_PROTOCOL__hw_only_infer_results_t fw_infer_results{};
-    CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info{};
-    channels_info.channel_count = 0;
     static constexpr auto INFER_TIMEOUT = std::chrono::milliseconds(120000);
 
+    CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode = get_hw_infer_boundary_channel_mode();
     TRY(const auto batch_size, get_batch_size());
     TRY(const auto batch_count, calc_hw_infer_batch_count(batch_size));
 
-    for (const auto &layer_info : m_core_op_metadata->get_all_layer_infos()) {
-        TRY(auto boundary_channel_ptr, get_boundary_vdma_channel_by_stream_name(layer_info.name));
-        const auto &stream_infos = LayerInfoUtils::get_stream_infos_from_layer_info(layer_info);
-        for (auto &stream_info : stream_infos) {
-            auto single_transfer_size = (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == stream_info.format.order) ?
-                stream_info.nms_info.bbox_size : stream_info.hw_frame_size;
-            const auto direction = (layer_info.direction == HAILO_H2D_STREAM) ?
-                HailoRTDriver::DmaDirection::H2D : HailoRTDriver::DmaDirection::D2H;
-
-            TRY(const auto channel_info_pair, create_mapped_buffer_for_hw_only_infer(std::move(boundary_channel_ptr), direction,
-                single_transfer_size, batch_size, batch_count));
-            add_channel_to_hw_infer_channel_info(std::move(channel_info_pair), channels_info);
-        }
-    }
+    configure_boundary_channels_for_hw_infer(batch_size, batch_count);
 
     std::condition_variable infer_done_cond;
     auto status = set_hw_infer_done_notification(infer_done_cond);
@@ -875,7 +1033,7 @@ Expected<HwInferResults> ResourcesManager::run_hw_only_infer()
     std::unique_lock<std::mutex> lock(mutex);
 
     status = Control::start_hw_only_infer(m_vdma_device, m_core_op_index, batch_size,
-        batch_count, &channels_info);
+        batch_count, &m_hw_infer_channels_info, boundary_channel_mode);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     infer_done_cond.wait_for(lock, INFER_TIMEOUT);
@@ -892,22 +1050,53 @@ Expected<HwInferResults> ResourcesManager::run_hw_only_infer()
 
 hailo_status ResourcesManager::fill_internal_buffers_info()
 {
-    for (const auto &context_metadata : m_core_op_metadata->dynamic_contexts()) {
-        for (const auto &layer_info : context_metadata.get_ddr_output_layers()) {
-            auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info);
-            CHECK_SUCCESS(status);
-        }
-        for (const auto &layer_info : context_metadata.get_inter_context_input_layers()) {
-            auto status = m_internal_buffer_manager->add_layer_buffer_info(layer_info);
-            CHECK_SUCCESS(status);
-        }
-    }
+    TRY(const auto edge_layer_info, InternalBufferPlanner::get_edge_layer_infos(*m_core_op_metadata, m_config_params));
 
-    auto status = m_internal_buffer_manager->plan_and_execute(InternalBufferPlanner::Type::SINGLE_BUFFER_PER_BUFFER_TYPE,
+    auto status = m_internal_buffer_manager->plan_and_execute(edge_layer_info,
+        InternalBufferPlanner::Type::SINGLE_BUFFER_PER_BUFFER_TYPE,
         m_core_op_metadata->dynamic_contexts().size());
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
 }
 
+static Expected<size_t> parse_buffer_size_from_env_var() {
+    auto expected_mapped_bufffer_size = get_env_variable_as_size(HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE_ENV_VAR);
+    if (HAILO_NOT_FOUND == expected_mapped_bufffer_size.status()) {
+        // Environment variable not set, return the default.
+        return HAILO_ALIGNED_CCWS_MAPPED_BUFFER_SIZE;
+    } else {
+        return expected_mapped_bufffer_size;
+    }
+}
+
+hailo_status ResourcesManager::map_and_set_ccws_section_buffer(BufferPtr hef_as_buffer, size_t offset_to_ccws_section, uint64_t ccws_section_size, HailoRTDriver &driver)
+{
+    assert(m_ccws_section_mapped_buffers.empty());
+
+    m_hef_as_buffer = hef_as_buffer; // keep the hef buffer alive
+
+    /*
+    * Optimization for the aligned_ccws feature.
+    *
+    * Previously, we used to have a single huge mapped buffer for the entire CCWS section, that is splitted to sg entries (each sg entry is offset + size).
+    * That way - if we configure from offset 1Gb in the ccws section - we will have to iterate over all of the entries until that offset to find the right sg entry - it was very inefficient.
+    *
+    * With this optimization, we split the huge mapped buffer to smaller buffers, such that the search for each sg entry will be much faster.
+    */
+
+    TRY(const auto buffer_size, parse_buffer_size_from_env_var());
+    const auto buffers_count = DIV_ROUND_UP(ccws_section_size, buffer_size);
+    for (size_t i = 0; i < buffers_count; i++) {
+        auto current_buffer_size = (i == (buffers_count - 1)) ? (ccws_section_size % buffer_size) : buffer_size;
+        TRY(auto dmable_buffer_ptr, vdma::DmaAbleBuffer::create_from_user_address(
+            hef_as_buffer->data() + offset_to_ccws_section + i * buffer_size, current_buffer_size));
+        TRY(auto mapped_buffer_ptr, vdma::MappedBuffer::create_shared(dmable_buffer_ptr, driver,
+            HailoRTDriver::DmaDirection::H2D));
+        m_ccws_section_mapped_buffers.push_back(mapped_buffer_ptr);
+    }
+
+    return HAILO_SUCCESS;
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
index aa6452c..4564c1c 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -28,7 +28,6 @@
 
 #include "hailo/hailort.h"
 
-#include "core_op/resource_manager/intermediate_buffer.hpp"
 #include "core_op/resource_manager/cache_buffer.hpp"
 #include "core_op/resource_manager/cache_manager.hpp"
 #include "core_op/resource_manager/config_buffer.hpp"
@@ -38,6 +37,7 @@
 #include "vdma/channel/boundary_channel.hpp"
 #include "vdma/pcie/pcie_device.hpp"
 #include "internal_buffer_manager.hpp"
+#include "vdma/memory/continuous_buffer.hpp"
 
 namespace hailort
 {
@@ -85,9 +85,12 @@ struct DdrChannelsInfo
 class ContextResources final {
 public:
     static Expected<ContextResources> create(HailoRTDriver &driver,
-        CONTROL_PROTOCOL__context_switch_context_type_t context_type, const uint16_t context_index,
+        CONTROL_PROTOCOL__context_switch_context_type_t context_type,
         const std::vector<vdma::ChannelId> &config_channels_ids, const ConfigBufferInfoMap &config_buffer_infos,
-        std::shared_ptr<InternalBufferManager> internal_buffer_manager);
+        std::shared_ptr<InternalBufferManager> internal_buffer_manager,
+        bool aligned_ccws,
+        std::vector<std::shared_ptr<vdma::MappedBuffer>> mapped_buffers = {},
+        std::shared_ptr<vdma::MappedBuffer> nops_buffer = nullptr);
 
     hailo_status add_edge_layer(const LayerInfo &layer_info, vdma::ChannelId channel_id,
         const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const SupportedFeatures &supported_features);
@@ -145,14 +148,16 @@ public:
     ResourcesManager &operator=(ResourcesManager &&other) = delete;
     ResourcesManager(ResourcesManager &&other) noexcept;
 
-    ExpectedRef<IntermediateBuffer> create_intermediate_buffer(
+    ExpectedRef<vdma::VdmaEdgeLayer> create_intermediate_edge_layer(
         uint32_t transfer_size, uint16_t batch_size, uint8_t src_stream_index, uint16_t src_context_index,
-        vdma::ChannelId d2h_channel_id, IntermediateBuffer::StreamingType streaming_type);
-    ExpectedRef<IntermediateBuffer> get_intermediate_buffer(const IntermediateBufferKey &key);
-    ExpectedRef<IntermediateBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
-    ExpectedRef<IntermediateBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
+        vdma::ChannelId d2h_channel_id, LayerType layer_type);
+    ExpectedRef<vdma::VdmaEdgeLayer> get_intermediate_edge_layer(const IntermediateBufferKey &key);
+    ExpectedRef<CacheBuffer> set_cache_input_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
+    ExpectedRef<CacheBuffer> set_cache_output_channel(uint32_t cache_id, uint16_t batch_size, vdma::ChannelId channel_id);
     ExpectedRef<std::unordered_map<uint32_t, CacheBuffer>> get_cache_buffers();
-    hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info);
+
+    Expected<size_t> calc_default_queue_size(const LayerInfo &layer_info, uint16_t batch_size);
+    hailo_status create_boundary_vdma_channel(const LayerInfo &layer_info, bool use_enhanced_channel = false);
 
     Expected<CONTROL_PROTOCOL__application_header_t> get_control_core_op_header();
 
@@ -160,7 +165,7 @@ public:
 
     Expected<std::reference_wrapper<ContextResources>> add_new_context(
         CONTROL_PROTOCOL__context_switch_context_type_t context_type,
-        const uint16_t context_index, const ConfigBufferInfoMap &config_info={});
+        bool is_aligned_ccws_on, const ConfigBufferInfoMap &config_info={});
 
     const SupportedFeatures &get_supported_features() const
     {
@@ -173,7 +178,7 @@ public:
     }
 
     Expected<vdma::ChannelId> get_available_channel_id(const LayerIdentifier &layer_identifier,
-        HailoRTDriver::DmaDirection direction, uint8_t engine_index);
+        HailoRTDriver::DmaDirection direction, uint8_t engine_index, bool use_enhanced_channel = false);
     hailo_status free_channel_index(const LayerIdentifier &layer_identifier);
 
     const char* get_dev_id() const
@@ -207,23 +212,33 @@ public:
     hailo_status stop_vdma_transfer_launcher();
     Expected<uint16_t> get_network_batch_size(const std::string &network_name) const;
     Expected<vdma::BoundaryChannelPtr> get_boundary_vdma_channel_by_stream_name(const std::string &stream_name);
-    Expected<std::shared_ptr<const vdma::BoundaryChannel>> get_boundary_vdma_channel_by_stream_name(const std::string &stream_name) const;
+    Expected<std::shared_ptr<const vdma::BoundaryChannel>> get_boundary_vdma_channel_by_stream_name(
+        const std::string &stream_name) const;
     hailo_power_mode_t get_power_mode() const;
+    Expected<CONTROL_PROTOCOL__host_buffer_info_t> get_boundary_buffer_info(vdma::BoundaryChannel &channel,
+        uint32_t transfer_size);
     Expected<uint16_t> program_desc_for_hw_only_flow(vdma::DescriptorList &desc_list,
         vdma::MappedBuffer &mapped_buffer, vdma::ChannelId channel_id,
         const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count);
-    Expected<std::pair<vdma::ChannelId, uint16_t>> create_mapped_buffer_for_hw_only_infer(
-        vdma::BoundaryChannelPtr boundary_channel_ptr, const HailoRTDriver::DmaDirection direction,
-        const uint32_t single_transfer_size, const uint16_t dynamic_batch_size, const uint16_t batch_count);
+    hailo_status allocate_mapped_buffer_for_hw_only_infer(vdma::BoundaryChannelPtr boundary_channel_ptr,
+        const HailoRTDriver::DmaDirection direction, const uint32_t single_transfer_size, uint16_t batch_size, uint16_t batch_count);
+    hailo_status configure_mapped_buffer_for_hw_only_infer(vdma::BoundaryChannelPtr boundary_channel_ptr,
+        const uint32_t single_transfer_size, uint16_t batch_size, uint16_t batch_count,
+        CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info);
     void add_channel_to_hw_infer_channel_info(std::pair<vdma::ChannelId, uint16_t> channel_info,
         CONTROL_PROTOCOL__hw_infer_channels_info_t &channels_info);
     Expected<uint16_t> calc_hw_infer_batch_count(uint16_t dynamic_batch_size);
     HwInferResults hw_infer_calc_stats(uint16_t batch_count, uint16_t dynamic_batch_size,
         size_t single_frame_transfer_size, uint32_t infer_cycles);
     hailo_status set_hw_infer_done_notification(std::condition_variable &infer_done_cond);
+    hailo_status configure_boundary_channels_for_hw_infer(uint16_t batch_size, uint16_t batch_count);
+    hailo_status allocate_boundary_channels_buffers_hw_infer();
     Expected<HwInferResults> run_hw_only_infer();
     hailo_status fill_internal_buffers_info();
     static bool should_use_ddr_action_list(size_t num_contexts, HailoRTDriver::DmaType dma_type);
+    Expected<uint16_t> get_batch_size() const;
+    hailo_status map_and_set_ccws_section_buffer(BufferPtr hef_as_buffer, size_t offset_to_ccws_section, uint64_t ccws_section_size, HailoRTDriver &driver);
+
     bool get_can_fast_batch_switch()
     {
         return m_core_op_metadata->get_can_fast_batch_switch();
@@ -239,16 +254,24 @@ public:
         return m_is_activated;
     }
 
+    CONTROL_PROTOCOL__boundary_channel_mode_t get_hw_infer_boundary_channel_mode() const
+    {
+        return (is_env_variable_on(HAILO_HW_INFER_BOUNDARY_CHANNELS_OVER_CCB_ENV_VAR) ?
+            CONTROL_PROTOCOL__CCB_BOUNDARY_CHANNEL : CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL);
+    }
+
+    void set_nops_mapped_buffer(vdma::MappedBufferPtr nops_buffer)
+    {
+        m_nops_mapped_buffer = nops_buffer;
+    }
+
+    uint16_t get_csm_buffer_size();
+
 private:
     hailo_status fill_infer_features(CONTROL_PROTOCOL__application_header_t &app_header);
     hailo_status fill_validation_features(CONTROL_PROTOCOL__application_header_t &app_header);
     hailo_status fill_network_batch_size(CONTROL_PROTOCOL__application_header_t &app_header);
-    hailo_status fill_csm_buffer_size(CONTROL_PROTOCOL__application_header_t &app_header);
-    Expected<uint16_t> get_batch_size() const;
-
-    // <ongoing_transfers, pending_transfers>
-    static std::pair<size_t, size_t> calculate_transfer_queue_sizes(const vdma::DescriptorList &desc_list,
-        uint32_t transfer_size, size_t max_active_trans, bool use_latency_meter);
+    void fill_config_channel_info(CONTROL_PROTOCOL__application_header_t &app_header);
 
     std::vector<ContextResources> m_contexts_resources;
     ChannelAllocator m_channel_allocator;
@@ -256,7 +279,7 @@ private:
     HailoRTDriver &m_driver;
     const ConfigureNetworkParams m_config_params;
     CacheManagerPtr m_cache_manager;
-    std::map<IntermediateBufferKey, IntermediateBuffer> m_intermediate_buffers;
+    std::map<IntermediateBufferKey, std::unique_ptr<vdma::VdmaEdgeLayer>> m_intermediate_buffers;
     std::shared_ptr<CoreOpMetadata> m_core_op_metadata;
     uint8_t m_core_op_index;
     uint16_t m_dynamic_context_count;
@@ -266,13 +289,19 @@ private:
     vdma::ChannelsGroup m_boundary_channels;
     bool m_is_configured;
     bool m_is_activated;
+    std::vector<vdma::MappedBufferPtr> m_ccws_section_mapped_buffers;
+    vdma::MappedBufferPtr m_nops_mapped_buffer;
+    std::shared_ptr<Buffer> m_hef_as_buffer;
     // Config channels ids are shared between all context. The following vector contains the channel id for each
     // config_stream_index.
     std::vector<vdma::ChannelId> m_config_channels_ids;
     // Mapped buffers would be used only in hw only flow
-    std::vector<std::shared_ptr<vdma::MappedBuffer>> m_hw_only_boundary_buffers;
+    std::map<vdma::ChannelId, std::shared_ptr<vdma::MappedBuffer>> m_hw_only_desc_boundary_buffers;
+    // Use ccb buffer for hw only flow
+    std::map<vdma::ChannelId, std::shared_ptr<vdma::ContinuousBuffer>> m_hw_only_ccb_boundary_buffers;
     std::shared_ptr<InternalBufferManager> m_internal_buffer_manager;
     std::shared_ptr<ActionListBufferBuilder> m_action_list_buffer_builder;
+    CONTROL_PROTOCOL__hw_infer_channels_info_t m_hw_infer_channels_info;
 
     ResourcesManager(VdmaDevice &vdma_device, HailoRTDriver &driver,
         ChannelAllocator &&channel_allocator, const ConfigureNetworkParams config_params,
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
index 2f20326..10694f7 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -79,9 +79,8 @@ static Expected<LayerInfo> calculate_credit_params(const CONTROL_PROTOCOL__hw_co
 }
 
 static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_info,
-    const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info,
-    const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, const bool should_optimize_credits,
-    const bool is_periph_calculated_in_hailort, const bool is_core_hw_padding_config_in_dfc)
+    const CONTROL_PROTOCOL__host_buffer_info_t &buffer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
+    const HEFHwArch &hw_arch, const bool should_optimize_credits, const bool is_core_hw_padding_config_in_dfc)
 {
     LayerInfo local_layer_info = original_layer_info;
 
@@ -90,11 +89,10 @@ static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_inf
         local_layer_info.max_shmifo_size = hw_consts.default_initial_credit_size;
     }
 
-    local_layer_info.nn_stream_config.is_periph_calculated_in_hailort = is_periph_calculated_in_hailort;
     local_layer_info.nn_stream_config.is_core_hw_padding_config_in_dfc = is_core_hw_padding_config_in_dfc;
 
     TRY(const auto updated_periph_layer_info, PeriphCalculator::calculate_periph_registers(local_layer_info,
-        buffer_info.desc_page_size, is_periph_calculated_in_hailort, hw_arch, is_core_hw_padding_config_in_dfc));
+        buffer_info.desc_page_size, hw_arch, is_core_hw_padding_config_in_dfc));
 
     TRY(auto updated_local_layer_info, calculate_credit_params(hw_consts, buffer_info.desc_page_size, should_optimize_credits,
         updated_periph_layer_info));
@@ -102,13 +100,6 @@ static Expected<LayerInfo> update_layer_info(const LayerInfo &original_layer_inf
     return updated_local_layer_info;
 }
 
-static CONTROL_PROTOCOL__host_buffer_info_t get_boundary_buffer_info(vdma::BoundaryChannel &channel, uint32_t transfer_size)
-{
-    auto &desc_list = channel.get_desc_list();
-    return vdma::VdmaEdgeLayer::get_host_buffer_info(vdma::VdmaEdgeLayer::Type::SCATTER_GATHER, desc_list.dma_address(),
-        desc_list.desc_page_size(), desc_list.count(), transfer_size);
-}
-
 static hailo_status fill_boundary_input_layer_impl(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts,
     const HEFHwArch &hw_arch, bool should_optimize_credits)
@@ -117,11 +108,10 @@ static hailo_status fill_boundary_input_layer_impl(ContextResources &context_res
 
     TRY(const auto vdma_channel, resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name));
 
-    const auto buffer_info = get_boundary_buffer_info(*vdma_channel, transfer_size);
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    TRY(const auto buffer_info, resources_manager.get_boundary_buffer_info(*vdma_channel, transfer_size));
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     TRY(auto local_layer_info, update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
-        is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+        is_core_hw_padding_config_in_dfc));
 
     const auto channel_id = vdma_channel->get_channel_id();
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id, buffer_info,
@@ -152,22 +142,23 @@ static hailo_status fill_inter_context_input_layer(ContextResources &context_res
     const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     TRY(const auto channel_id, resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
-        HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index, false));
 
-    /* Get inter context buffer previously created */
+    const auto frame_credits_in_bytes = LayerInfoUtils::get_layer_transfer_size(layer_info);
+
+    // Get inter context edge layer previously created
     const auto &connected_context = layer_info.connected_context_info;
     auto intermediate_buffer_key = std::make_pair(connected_context.context_index, connected_context.stream_index);
-    TRY(auto inter_context_buffer, resources_manager.get_intermediate_buffer(intermediate_buffer_key),
+    TRY(auto inter_context_buffer, resources_manager.get_intermediate_edge_layer(intermediate_buffer_key),
         "Failed to find inter context buffer for src context {}, src_stream_index {}",
         connected_context.context_index, connected_context.stream_index);
 
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
-    TRY(auto local_layer_info, update_layer_info(layer_info, inter_context_buffer.get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+    TRY(auto local_layer_info, update_layer_info(layer_info, inter_context_buffer.get().get_host_buffer_info(frame_credits_in_bytes), hw_consts,
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
 
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id,
-        inter_context_buffer.get().get_host_buffer_info(), resources_manager.get_supported_features());
+        inter_context_buffer.get().get_host_buffer_info(frame_credits_in_bytes), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
     LOGGER__DEBUG("Intermediate edge key: {}:{} src_context:{}, dst_context: {}, h2d_channel {}.",
@@ -185,11 +176,10 @@ static hailo_status fill_boundary_output_layer(ContextResources &context_resourc
 
     TRY(const auto vdma_channel, resources_manager.get_boundary_vdma_channel_by_stream_name(layer_info.name));
 
-    const auto buffer_info = get_boundary_buffer_info(*vdma_channel, transfer_size);
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
+    TRY(const auto buffer_info, resources_manager.get_boundary_buffer_info(*vdma_channel, transfer_size));
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     TRY(auto local_layer_info, update_layer_info(layer_info, buffer_info, hw_consts, hw_arch, should_optimize_credits,
-        is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+        is_core_hw_padding_config_in_dfc));
 
     const auto channel_id = vdma_channel->get_channel_id();
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id, buffer_info,
@@ -205,23 +195,23 @@ static hailo_status fill_inter_context_output_layer(ContextResources &context_re
     const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     TRY(const auto channel_id, resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
-        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index, false));
 
     const auto frame_credits_in_bytes = LayerInfoUtils::get_layer_transfer_size(layer_info);
 
     TRY(const auto network_batch_size, resources_manager.get_network_batch_size(layer_info.network_name));
 
-    TRY(auto inter_context_buffer, resources_manager.create_intermediate_buffer(frame_credits_in_bytes,
+    TRY(auto inter_context_buffer, resources_manager.create_intermediate_edge_layer(frame_credits_in_bytes,
         network_batch_size, layer_info.stream_index, layer_info.context_index,
-        channel_id, IntermediateBuffer::StreamingType::BURST));
+        channel_id, LayerType::INTER_CONTEXT));
 
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
-    TRY(auto local_layer_info, update_layer_info(layer_info, inter_context_buffer.get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+    TRY(auto local_layer_info, update_layer_info(layer_info,
+        inter_context_buffer.get().get_host_buffer_info(frame_credits_in_bytes), hw_consts,
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
 
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id,
-        inter_context_buffer.get().get_host_buffer_info(), resources_manager.get_supported_features());
+        inter_context_buffer.get().get_host_buffer_info(frame_credits_in_bytes), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
     LOGGER__DEBUG("Inter-context output stream {}, src_context:{}, d2h_channel {}.",
@@ -233,23 +223,23 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
     ResourcesManager &resources_manager, const LayerInfo &layer_info,
     const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch)
 {
-    CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_INVALID_HEF,
-        "Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers." 
+    CHECK(resources_manager.get_supported_features().padded_ddr_buffers, HAILO_HEF_NOT_SUPPORTED,
+        "Failed opening non-compatible HEF that uses the following deprecated features: host-managed DDR buffers."
         "Please re-compile the HEF using a newer Dataflow Compiler version (v3.11.0 or newer)");
 
     // It is assumed that output channels are parsed before input channels.
     // Allocate vdma channel index for both edges
     const auto h2d_stream_index = layer_info.connected_context_info.stream_index;
-    const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_H2D_STREAM, 
+    const auto h2d_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_H2D_STREAM,
             layer_info.name, h2d_stream_index);
     TRY(const auto h2d_channel_id, resources_manager.get_available_channel_id(h2d_layer_identifier,
-        HailoRTDriver::DmaDirection::H2D, layer_info.connected_context_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::H2D, layer_info.connected_context_info.dma_engine_index, false));
 
     const auto d2h_stream_index = layer_info.stream_index;
-    const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_D2H_STREAM, 
+    const auto d2h_layer_identifier = std::make_tuple(LayerType::DDR, HAILO_D2H_STREAM,
             layer_info.name, d2h_stream_index);
     TRY(const auto d2h_channel_id, resources_manager.get_available_channel_id(d2h_layer_identifier,
-        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index, false));
 
     // In DDR - always use core bytes per buffer as row size
     const auto row_size = static_cast<uint16_t>(layer_info.nn_stream_config.core_bytes_per_buffer);
@@ -257,10 +247,9 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
         "DDR Row size ({}) must be aligned to {}", row_size, PERIPH_BYTES_PER_BUFFER_DDR_ALIGNMENT_SIZE);
     const auto min_buffered_rows = layer_info.ddr_info.min_buffered_rows;
 
-    // Allocate the ddr buffer
-    TRY(auto ddr_buffer, resources_manager.create_intermediate_buffer(row_size, min_buffered_rows,
-        d2h_stream_index, layer_info.context_index, d2h_channel_id,
-        IntermediateBuffer::StreamingType::CIRCULAR_CONTINUOS));
+    // Create the ddr edge layer
+    TRY(auto ddr_buffer, resources_manager.create_intermediate_edge_layer(row_size, min_buffered_rows,
+        d2h_stream_index, layer_info.context_index, d2h_channel_id, LayerType::DDR));
 
     DdrChannelsInfo ddr_pair_info{};
     ddr_pair_info.h2d_stream_index = h2d_stream_index;
@@ -271,19 +260,18 @@ static hailo_status fill_ddr_output_layer(ContextResources &context_resources,
     ddr_pair_info.row_size = row_size;
     ddr_pair_info.min_buffered_rows = min_buffered_rows;
     ddr_pair_info.total_buffers_per_frame = layer_info.ddr_info.total_buffers_per_frame;
-    ddr_pair_info.host_buffer_info = ddr_buffer.get().get_host_buffer_info();
+    ddr_pair_info.host_buffer_info = ddr_buffer.get().get_host_buffer_info(row_size);
     context_resources.add_ddr_channels_info(ddr_pair_info);
 
     // On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to
     // optimize the credits.
     const bool should_optimize_credits = false;
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
-    TRY(auto local_layer_info, update_layer_info(layer_info, ddr_buffer.get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+    TRY(auto local_layer_info, update_layer_info(layer_info, ddr_buffer.get().get_host_buffer_info(row_size), hw_consts,
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
 
     auto status = context_resources.add_edge_layer(local_layer_info, ddr_pair_info.d2h_channel_id,
-        ddr_buffer.get().get_host_buffer_info(), resources_manager.get_supported_features());
+        ddr_buffer.get().get_host_buffer_info(row_size), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
     return HAILO_SUCCESS;
@@ -305,10 +293,9 @@ static hailo_status fill_ddr_input_layer(ContextResources &context_resources, Re
     // On ddr layers, we assume the periph credit size is aligned to the size of descriptor, so we don't want to
     // optimize the credits.
     const bool should_optimize_credits = false;
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
     TRY(auto local_layer_info, update_layer_info(layer_info, ddr_info.host_buffer_info, hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
 
     auto status = context_resources.add_edge_layer(local_layer_info, ddr_info.h2d_channel_id,
         ddr_info.host_buffer_info, resources_manager.get_supported_features());
@@ -322,23 +309,23 @@ static hailo_status fill_cache_output_layer(ContextResources &context_resources,
     bool should_optimize_credits)
 {
     TRY(const auto channel_id, resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
-        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::D2H, layer_info.dma_engine_index, false));
 
     TRY(const auto network_batch_size, resources_manager.get_network_batch_size(layer_info.network_name));
-    TRY(auto cache_buffer, resources_manager.set_cache_output_channel(layer_info.cache_id,
+    TRY(auto cache_buffer, resources_manager.set_cache_output_channel(layer_info.cache_info.cache_id,
         network_batch_size, channel_id));
 
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
-    TRY(auto local_layer_info, update_layer_info(layer_info, cache_buffer.get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+    TRY(auto local_layer_info, update_layer_info(layer_info, cache_buffer.get().get_host_output_buffer_info(), hw_consts,
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
+    local_layer_info.cache_info.batch_size = cache_buffer.get().output_batch_size();
 
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id,
-        cache_buffer.get().get_host_buffer_info(), resources_manager.get_supported_features());
+        cache_buffer.get().get_host_output_buffer_info(), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
     LOGGER__DEBUG("Cache id {}: output stream {}, d2h_channel {}, context {}",
-        layer_info.cache_id, layer_info.stream_index, channel_id, layer_info.context_index);
+        layer_info.cache_info.cache_id, layer_info.stream_index, channel_id, layer_info.context_index);
     return HAILO_SUCCESS;
 }
 
@@ -346,22 +333,21 @@ static hailo_status fill_cache_input_layer(ContextResources &context_resources,
     const LayerInfo &layer_info, const CONTROL_PROTOCOL__hw_consts_t &hw_consts, const HEFHwArch &hw_arch, bool should_optimize_credits)
 {
     TRY(const auto channel_id, resources_manager.get_available_channel_id(to_layer_identifier(layer_info),
-        HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index));
+        HailoRTDriver::DmaDirection::H2D, layer_info.dma_engine_index, false));
 
     TRY(const auto network_batch_size, resources_manager.get_network_batch_size(layer_info.network_name));
-    TRY(auto cache_buffer, resources_manager.set_cache_input_channel(layer_info.cache_id, network_batch_size, channel_id));
+    TRY(auto cache_buffer, resources_manager.set_cache_input_channel(layer_info.cache_info.cache_id, network_batch_size, channel_id));
 
-    const bool is_periph_calculated_in_hailort = resources_manager.get_supported_features().periph_calculation_in_hailort;
     const bool is_core_hw_padding_config_in_dfc = resources_manager.get_supported_features().core_hw_padding_config_in_dfc;
-    TRY(auto local_layer_info, update_layer_info(layer_info, cache_buffer.get().get_host_buffer_info(), hw_consts,
-        hw_arch, should_optimize_credits, is_periph_calculated_in_hailort, is_core_hw_padding_config_in_dfc));
+    TRY(auto local_layer_info, update_layer_info(layer_info, cache_buffer.get().get_host_input_buffer_info(), hw_consts,
+        hw_arch, should_optimize_credits, is_core_hw_padding_config_in_dfc));
 
     auto status = context_resources.add_edge_layer(local_layer_info, channel_id,
-        cache_buffer.get().get_host_buffer_info(), resources_manager.get_supported_features());
+        cache_buffer.get().get_host_input_buffer_info(), resources_manager.get_supported_features());
     CHECK_SUCCESS(status);
 
     LOGGER__DEBUG("Cache id {}: input stream {}, h2d_channel {}, context {}",
-        layer_info.cache_id, layer_info.stream_index, channel_id, layer_info.context_index);
+        layer_info.cache_info.cache_id, layer_info.stream_index, channel_id, layer_info.context_index);
 
     return HAILO_SUCCESS;
 }
@@ -604,18 +590,21 @@ static hailo_status push_fetch_config_actions(
 
 static hailo_status proccess_write_ccw_action(ContextSwitchConfigActionPtr &configuration_action,
     std::vector<ConfigBuffer> &config_resources,
-    const bool support_pre_fetch,
-    std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions)
+    const bool support_pre_fetch, std::vector<ContextSwitchConfigActionPtr> &processed_configuration_actions,
+    bool aligned_ccws)
 {
     assert(ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type());
     auto &write_ccw_action = *static_cast<WriteDataCcwAction*>(configuration_action.get());
 
     const auto config_stream_index = write_ccw_action.config_stream_index();
     assert(config_stream_index < config_resources.size());
-    auto status = write_ccw_action.write_to_config_buffer(config_resources[config_stream_index], support_pre_fetch);
-    CHECK_SUCCESS(status);
 
-    status = push_fetch_config_actions(config_resources[config_stream_index], config_stream_index,
+    if (!aligned_ccws) {
+        auto status = write_ccw_action.write_to_config_buffer(config_resources[config_stream_index], support_pre_fetch);
+        CHECK_SUCCESS(status);
+    }
+
+    auto status = push_fetch_config_actions(config_resources[config_stream_index], config_stream_index,
         write_ccw_action.total_ccw_burst(), support_pre_fetch, processed_configuration_actions);
     CHECK_SUCCESS(status);
 
@@ -719,7 +708,7 @@ static hailo_status push_edge_layer_activation_actions(
         //       (HRT-13775)
         TRY(const auto activate_action, ActivateCacheOutputChannelAction::create(edge_layer.channel_id,
             edge_layer.layer_info.stream_index, edge_layer.layer_info.network_index,
-            edge_layer.layer_info.nn_stream_config, edge_layer.buffer_info));
+            edge_layer.layer_info.nn_stream_config, edge_layer.buffer_info, edge_layer.layer_info.cache_info.batch_size));
         actions.emplace_back(std::move(activate_action));
     }
 
@@ -816,14 +805,14 @@ static hailo_status proccess_trigger_new_data_input_action(const HEFHwArch &hw_a
 
 // At the end of each consecutive group of WriteDataCcwAction, a FetchCfgChannelDescriptorsAction is added.
 static hailo_status add_fetch_config_actions(std::vector<ContextSwitchConfigActionPtr> &configuration_actions,
-    std::vector<ConfigBuffer> &config_resources, bool support_pre_fetch)
+    std::vector<ConfigBuffer> &config_resources, bool support_pre_fetch, bool aligned_ccws)
 {
     std::vector<ContextSwitchConfigActionPtr> processed_configuration_actions;
     for (uint32_t action_index = 0; action_index < configuration_actions.size(); action_index++) {
         auto &configuration_action = configuration_actions[action_index];
         if (ContextSwitchConfigAction::Type::WriteDataCcw == configuration_action->get_type()) {
             auto status = proccess_write_ccw_action(configuration_action, config_resources,
-                support_pre_fetch, processed_configuration_actions);
+                support_pre_fetch, processed_configuration_actions, aligned_ccws);
             CHECK_SUCCESS(status);
         } else {
             // Add the current action
@@ -991,7 +980,7 @@ static hailo_status add_edge_layer_end_of_context_actions(const ContextResources
 static hailo_status fill_context_recipes_for_multi_context(const HEFHwArch &hw_arch,
     ContextResources &context_resources, ResourcesManager &resources_manager,
     uint16_t context_index, const CoreOpMetadata &core_op_metadata, const ContextMetadata &context_metadata,
-    bool is_single_context, bool is_last_context, bool caches_in_use)
+    bool is_single_context, bool is_last_context, bool caches_in_use, bool aligned_ccws)
 {
     hailo_status status = HAILO_UNINITIALIZED;
 
@@ -1003,7 +992,7 @@ static hailo_status fill_context_recipes_for_multi_context(const HEFHwArch &hw_a
     std::vector<ContextSwitchConfigActionPtr> actions = context_metadata.get_actions();
 
     const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
-    status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
+    status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch, aligned_ccws);
     CHECK_SUCCESS(status);
 
     status = handle_edge_layer_activation_actions(hw_arch, actions, core_op_metadata, resources_manager,
@@ -1047,11 +1036,13 @@ static hailo_status create_boundary_channels(ResourcesManager &resources_manager
     for (const auto &layer_info : core_op_metadata.get_all_layer_infos()) {
         if (layer_info.is_multi_planar) {
             for (const auto &plane : layer_info.planes) {
-                auto status = resources_manager.create_boundary_vdma_channel(plane);
+                auto status = resources_manager.create_boundary_vdma_channel(plane,
+                    resources_manager.get_hw_infer_boundary_channel_mode());
                 CHECK_SUCCESS(status);
             }
         } else {
-            auto status = resources_manager.create_boundary_vdma_channel(layer_info);
+            auto status = resources_manager.create_boundary_vdma_channel(layer_info,
+                resources_manager.get_hw_infer_boundary_channel_mode());
             CHECK_SUCCESS(status);
         }
     }
@@ -1264,7 +1255,7 @@ static hailo_status fill_batch_switching_context_config_recepies_for_multi_conte
 static hailo_status fill_preliminary_config_recepies_for_multi_context(const HEFHwArch &hw_arch,
     ContextResources &context_resources, ResourcesManager &resources_manager,
     std::shared_ptr<CoreOpMetadata> core_op_metadata, const ContextMetadata &preliminary_context,
-    bool is_single_context)
+    bool is_single_context, bool aligned_ccws)
 {
     static const auto PRELIMINARY_CONTEXT_INDEX = 0; // First context in the hef
 
@@ -1280,7 +1271,7 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const HEF
     std::vector<ContextSwitchConfigActionPtr> actions = preliminary_context.get_actions();
 
     const auto support_pre_fetch = HailoRTCommon::is_hailo1x_device_type(DeviceBase::hef_arch_to_device_arch(hw_arch));
-    auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch);
+    auto status = add_fetch_config_actions(actions, context_resources.get_config_buffers(), support_pre_fetch, aligned_ccws);
     CHECK_SUCCESS(status);
 
     if (resources_manager.get_supported_features().preliminary_run_asap) {
@@ -1298,9 +1289,27 @@ static hailo_status fill_preliminary_config_recepies_for_multi_context(const HEF
     return write_action_list(context_resources, resources_manager.get_action_list_buffer_builder(), actions);
 }
 
+hailo_status ResourcesManagerBuilder::prepare_aligned_ccws_resources(const Hef &hef, ResourcesManager &resources_manager, HailoRTDriver &driver)
+{
+    const size_t page_size = resources_manager.get_csm_buffer_size();
+    TRY(auto hef_as_bufferptr, hef.pimpl->get_hef_as_buffer());
+    auto status = resources_manager.map_and_set_ccws_section_buffer(hef_as_bufferptr, hef.pimpl->get_offset_zero_point(), hef.pimpl->get_ccws_section_size(), driver);
+    CHECK_SUCCESS(status);
+
+    // create a mapped buffer and fill it with nops - we will use it for padding each ccws dma transfer
+    TRY(auto dmable_nops_buffer, vdma::DmaAbleBuffer::create_by_allocation(page_size, driver));
+    TRY(auto nops_buffer, vdma::MappedBuffer::create_shared(dmable_nops_buffer, driver, HailoRTDriver::DmaDirection::H2D));
+    static constexpr uint64_t CCW_NOP = 0x0;
+    std::vector<uint64_t> nops_data(page_size / sizeof(uint64_t), CCW_NOP);
+    status = nops_buffer->write(nops_data.data(), page_size, 0);
+    CHECK_SUCCESS(status);
+    resources_manager.set_nops_mapped_buffer(nops_buffer);
+    return HAILO_SUCCESS;
+}
+
 Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8_t current_core_op_index, VdmaDevice &device,
     HailoRTDriver &driver, CacheManagerPtr cache_manager, const ConfigureNetworkParams &config_params,
-    std::shared_ptr<CoreOpMetadata> core_op_metadata, const HEFHwArch &hw_arch)
+    std::shared_ptr<CoreOpMetadata> core_op_metadata, const HEFHwArch &hw_arch, const Hef &hef)
 {
     const auto num_contexts = core_op_metadata->dynamic_contexts().size() +
         CONTROL_PROTOCOL__CONTEXT_SWITCH_NUMBER_OF_NON_DYNAMIC_CONTEXTS;
@@ -1326,12 +1335,19 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
     status = resources_manager.fill_internal_buffers_info();
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    // No allocation of edge layers in the activation context. No need for context index here
-    auto INVLID_CONTEXT_INDEX = static_cast<uint16_t>(UINT16_MAX);
-    auto ACTIVATION_CONTEXT_INDEX = INVLID_CONTEXT_INDEX;
+    // NOTE: need to allocate the hw infer buffers after creating boundary channels so we can give the 
+    // correct ccb dma address in case of hw infer over ccb boundary
+    if (is_env_variable_on(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR)) {
+        status = resources_manager.allocate_boundary_channels_buffers_hw_infer();
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    }
 
-    TRY(auto activation_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION,
-        ACTIVATION_CONTEXT_INDEX));
+    if (hef.pimpl->is_aligned_ccws_on()) {
+        status = ResourcesManagerBuilder::prepare_aligned_ccws_resources(hef, resources_manager, driver);
+        CHECK_SUCCESS_AS_EXPECTED(status);
+    }
+
+    TRY(auto activation_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_ACTIVATION, hef.pimpl->is_aligned_ccws_on()));
     status = fill_activation_config_recepies_for_multi_context(activation_context.get(),
         resources_manager, core_op_metadata, hw_arch);
     CHECK_SUCCESS_AS_EXPECTED(status);
@@ -1340,21 +1356,17 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
     const auto activation_context_boundary_input_layers =
         activation_context.get().get_edge_layers(LayerType::BOUNDARY, HAILO_H2D_STREAM);
 
-    // No allocation of edge layers in the batch switching context. No need for context index here
-    auto BATCH_SWITCH_CONTEXT_INDEX = INVLID_CONTEXT_INDEX;
-    TRY(auto batch_switching_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING,
-        BATCH_SWITCH_CONTEXT_INDEX));
+    TRY(auto batch_switching_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_BATCH_SWITCHING, hef.pimpl->is_aligned_ccws_on()));
     status = fill_batch_switching_context_config_recepies_for_multi_context(batch_switching_context.get(),
         *core_op_metadata, resources_manager, hw_arch, activation_context_boundary_input_layers);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
-    static const uint16_t PRELIMINARY_CONTEXT_INDEX = 0;
-    static const uint16_t FIRST_DYNAMIC_CONTEXT_INDEX = 1;
     const auto is_single_context = (core_op_metadata->dynamic_contexts().size() == 1);
     TRY(auto preliminary_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_PRELIMINARY,
-        PRELIMINARY_CONTEXT_INDEX, core_op_metadata->preliminary_context().config_buffers_info()));
+        hef.pimpl->is_aligned_ccws_on(), core_op_metadata->preliminary_context().config_buffers_info()));
     status = fill_preliminary_config_recepies_for_multi_context(hw_arch, preliminary_context.get(),
-        resources_manager, core_op_metadata, core_op_metadata->preliminary_context(), is_single_context);
+        resources_manager, core_op_metadata, core_op_metadata->preliminary_context(), is_single_context,
+        hef.pimpl->is_aligned_ccws_on());
     CHECK_SUCCESS_AS_EXPECTED(status);
 
     const auto caches_in_use = core_op_metadata->get_cache_layers_count() > 0;
@@ -1365,12 +1377,12 @@ Expected<std::shared_ptr<ResourcesManager>> ResourcesManagerBuilder::build(uint8
     for (size_t context_index = 0; context_index < num_dynamic_contexts; context_index++) {
         const auto &context_metadata = core_op_metadata->dynamic_contexts()[context_index];
         TRY(auto new_context, resources_manager.add_new_context(CONTROL_PROTOCOL__CONTEXT_SWITCH_CONTEXT_TYPE_DYNAMIC,
-            static_cast<uint16_t>(FIRST_DYNAMIC_CONTEXT_INDEX + context_index), context_metadata.config_buffers_info()));
+            hef.pimpl->is_aligned_ccws_on(), context_metadata.config_buffers_info()));
 
         const auto is_last_context = (context_index == (num_dynamic_contexts - 1));
         status = fill_context_recipes_for_multi_context(hw_arch, new_context.get(), resources_manager,
             static_cast<uint16_t>(context_index), *core_op_metadata, context_metadata, is_single_context,
-            is_last_context, caches_in_use);
+            is_last_context, caches_in_use, hef.pimpl->is_aligned_ccws_on());
         CHECK_SUCCESS_AS_EXPECTED(status);
     }
 
diff --git a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
index f38e932..964b882 100644
--- a/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
+++ b/hailort/libhailort/src/core_op/resource_manager/resource_manager_builder.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -24,7 +24,9 @@ public:
 
     static Expected<std::shared_ptr<ResourcesManager>> build(uint8_t net_group_index, VdmaDevice &device,
         HailoRTDriver &driver, CacheManagerPtr cache_manager, const ConfigureNetworkParams &config_params,
-        std::shared_ptr<CoreOpMetadata> core_op, const HEFHwArch &hw_arch);
+        std::shared_ptr<CoreOpMetadata> core_op, const HEFHwArch &hw_arch, const Hef &hef);
+
+    static hailo_status prepare_aligned_ccws_resources(const Hef &hef, ResourcesManager &resources_manager, HailoRTDriver &driver);
 
 };
 
diff --git a/hailort/libhailort/src/device_common/control.cpp b/hailort/libhailort/src/device_common/control.cpp
index 4559a63..7a99c79 100644
--- a/hailort/libhailort/src/device_common/control.cpp
+++ b/hailort/libhailort/src/device_common/control.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,7 +44,8 @@ namespace hailort
 typedef std::array<std::array<float64_t, CONTROL_PROTOCOL__POWER_MEASUREMENT_TYPES__COUNT>, CONTROL_PROTOCOL__DVM_OPTIONS_COUNT> power_conversion_multiplier_t;
 
 
-Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOCOL_identify_response_t *identify_response)
+Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOCOL_identify_response_t *identify_response,
+    Device &device)
 {
     hailo_device_identity_t board_info;
 
@@ -97,11 +98,15 @@ Expected<hailo_device_identity_t> control__parse_identify_results(CONTROL_PROTOC
 
     // Check if we're on H10 - relevant only for linux
 #ifdef __linux__
-    TRY(auto host_name_pair, Process::create_and_wait_for_output("hostname", 20));
-    CHECK_AS_EXPECTED(0 == host_name_pair.first, HAILO_INTERNAL_FAILURE, "Failed to run 'hostname'");
-    if (host_name_pair.second.find("hailo10") != std::string::npos) {
-        board_info.device_architecture = HAILO_ARCH_HAILO10H;
+    if (Device::Type::INTEGRATED == device.get_type()) {
+        char hostname[HOST_NAME_MAX+1];
+        CHECK_AS_EXPECTED(0 == gethostname(hostname, HOST_NAME_MAX+1), HAILO_INTERNAL_FAILURE, "Failed to get hostname");
+        if (std::string(hostname).find("hailo10") != std::string::npos) {
+            board_info.device_architecture = HAILO_ARCH_HAILO10H;
+        }
     }
+#else
+    (void)device;
 #endif
 
     /* Write identify results to log */
@@ -339,7 +344,7 @@ Expected<hailo_device_identity_t> Control::identify(Device &device)
     CHECK_SUCCESS_AS_EXPECTED(status);
     identify_response = (CONTROL_PROTOCOL_identify_response_t *)(payload->parameters);
 
-    return control__parse_identify_results(identify_response);
+    return control__parse_identify_results(identify_response, device);
 }
 
 hailo_status Control::core_identify(Device &device, hailo_core_information_t *core_info)
@@ -3408,7 +3413,8 @@ hailo_status Control::set_sleep_state(Device &device, hailo_sleep_state_t sleep_
 
 hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state,
     uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
-    CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results)
+    CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results,
+    CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode)
 {
     CONTROL_PROTOCOL__request_t request = {};
     size_t request_size = 0;
@@ -3421,8 +3427,8 @@ hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__h
     RETURN_IF_ARG_NULL(results);
 
     auto common_status = CONTROL_PROTOCOL__pack_change_hw_infer_status_request(
-        &request, &request_size, device.get_control_sequence(), static_cast<uint8_t>(state), 
-        network_group_index, dynamic_batch_size, batch_count, channels_info);
+        &request, &request_size, device.get_control_sequence(), static_cast<uint8_t>(state),
+        network_group_index, dynamic_batch_size, batch_count, channels_info, boundary_channel_mode);
     auto status = (HAILO_COMMON_STATUS__SUCCESS == common_status) ? HAILO_SUCCESS : HAILO_INTERNAL_FAILURE;
     CHECK_SUCCESS(status);
 
@@ -3442,11 +3448,12 @@ hailo_status Control::change_hw_infer_status(Device &device, CONTROL_PROTOCOL__h
 }
 
 hailo_status Control::start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size,
-    uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
+    uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info,
+    CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode)
 {
     CONTROL_PROTOCOL__hw_only_infer_results_t results = {};
     return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_START,
-        network_group_index, dynamic_batch_size, batch_count, channels_info ,&results);
+        network_group_index, dynamic_batch_size, batch_count, channels_info ,&results, boundary_channel_mode);
 }
 
 hailo_status Control::stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results)
@@ -3454,9 +3461,11 @@ hailo_status Control::stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_on
     const uint8_t DEFAULT_NETWORK_GROUP = 0;
     const uint16_t DEFAULT_DYNAMIC_BATCH_SIZE = 1;
     const uint16_t DEFAULT_BATCH_COUNT = 1;
+    const CONTROL_PROTOCOL__boundary_channel_mode_t DEFAULT_BOUNDARY_TYPE = CONTROL_PROTOCOL__DESC_BOUNDARY_CHANNEL;
     CONTROL_PROTOCOL__hw_infer_channels_info_t channels_info_default = {};
     return Control::change_hw_infer_status(device, CONTROL_PROTOCOL__HW_INFER_STATE_STOP,
-        DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, &channels_info_default, results);
+        DEFAULT_NETWORK_GROUP, DEFAULT_DYNAMIC_BATCH_SIZE, DEFAULT_BATCH_COUNT, &channels_info_default, results,
+        DEFAULT_BOUNDARY_TYPE);
 }
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/device_common/control.hpp b/hailort/libhailort/src/device_common/control.hpp
index da7b23e..997bd44 100644
--- a/hailort/libhailort/src/device_common/control.hpp
+++ b/hailort/libhailort/src/device_common/control.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -376,9 +376,11 @@ public:
     static hailo_status set_sleep_state(Device &device, hailo_sleep_state_t sleep_state);
     static hailo_status change_hw_infer_status(Device &device, CONTROL_PROTOCOL__hw_infer_state_t state,
         uint8_t network_group_index, uint16_t dynamic_batch_size, uint16_t batch_count,
-        CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results);
+        CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info, CONTROL_PROTOCOL__hw_only_infer_results_t *results,
+        CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode);
     static hailo_status start_hw_only_infer(Device &device, uint8_t network_group_index, uint16_t dynamic_batch_size,
-        uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
+        uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info,
+        CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode);
     static hailo_status stop_hw_only_infer(Device &device, CONTROL_PROTOCOL__hw_only_infer_results_t *results);
     // TODO: needed?
     static hailo_status power_measurement(Device &device, CONTROL_PROTOCOL__dvm_options_t dvm,
diff --git a/hailort/libhailort/src/device_common/control_protocol.cpp b/hailort/libhailort/src/device_common/control_protocol.cpp
index 4a96ec5..a11e635 100644
--- a/hailort/libhailort/src/device_common/control_protocol.cpp
+++ b/hailort/libhailort/src/device_common/control_protocol.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -43,8 +43,6 @@ const char *CONTROL_PROTOCOL__get_textual_opcode(CONTROL_PROTOCOL__OPCODE_t opco
     return CONTROL_PROTOCOL__textual_format[opcode];
 }
 
-#define CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT (5)
-
 #define CHECK_NOT_NULL_COMMON_STATUS(arg, status) _CHECK(nullptr != (arg), (status), "CHECK_NOT_NULL for {} failed", #arg)
 #define CHECK_COMMON_STATUS(cond, ret_val, ...) \
     _CHECK((cond), (ret_val), CONSTRUCT_MSG("CHECK failed", ##__VA_ARGS__))
@@ -2388,55 +2386,55 @@ exit:
 }
 
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(
-    CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, 
+    CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence,
     uint8_t hw_infer_state, uint8_t network_group_index, uint16_t dynamic_batch_size,
-    uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info)
+    uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info,
+    CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode)
 {
-    HAILO_COMMON_STATUS_t status = HAILO_COMMON_STATUS__UNINITIALIZED;
     size_t local_request_size = 0;
 
-    if ((NULL == request) || (NULL == request_size)) {
-        status = HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED;
-        goto exit;
-    }
+    CHECK_COMMON_STATUS((NULL != request) && (NULL != request_size), HAILO_STATUS__CONTROL_PROTOCOL__NULL_ARGUMENT_PASSED);
 
     /* Header */
-    local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE + 
+    local_request_size = CONTROL_PROTOCOL__REQUEST_BASE_SIZE +
         sizeof(CONTROL_PROTOCOL__change_hw_infer_status_request_t);
-    control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CHANGE_HW_INFER_STATUS, 
+    control_protocol__pack_request_header(request, sequence, HAILO_CONTROL_OPCODE_CHANGE_HW_INFER_STATUS,
         CHANGE_HW_INFER_REQUEST_PARAMETER_COUNT);
 
     /* hw_infer_state */
-    request->parameters.change_hw_infer_status_request.hw_infer_state_length = 
+    request->parameters.change_hw_infer_status_request.hw_infer_state_length =
         BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.hw_infer_state));
     request->parameters.change_hw_infer_status_request.hw_infer_state = hw_infer_state;
 
     /* network_group_index */
-    request->parameters.change_hw_infer_status_request.application_index_length = 
+    request->parameters.change_hw_infer_status_request.application_index_length =
         BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.application_index));
     request->parameters.change_hw_infer_status_request.application_index = network_group_index;
 
     /* dynamic_batch_size */
-    request->parameters.change_hw_infer_status_request.dynamic_batch_size_length = 
+    request->parameters.change_hw_infer_status_request.dynamic_batch_size_length =
         BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.dynamic_batch_size));
     request->parameters.change_hw_infer_status_request.dynamic_batch_size = dynamic_batch_size;
 
     /* batch_count */
-    request->parameters.change_hw_infer_status_request.batch_count_length = 
+    request->parameters.change_hw_infer_status_request.batch_count_length =
         BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.batch_count));
     request->parameters.change_hw_infer_status_request.batch_count = batch_count;
 
     /* channels_info */
-    request->parameters.change_hw_infer_status_request.channels_info_length = 
+    request->parameters.change_hw_infer_status_request.channels_info_length =
         BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.channels_info));
-    memcpy(&(request->parameters.change_hw_infer_status_request.channels_info), 
-        channels_info, 
-        sizeof(request->parameters.change_hw_infer_status_request.channels_info));
+    memcpy(&(request->parameters.change_hw_infer_status_request.channels_info),
+        channels_info, sizeof(request->parameters.change_hw_infer_status_request.channels_info));
+
+    /* boundary channels mode */
+    request->parameters.change_hw_infer_status_request.boundary_channel_mode_length =
+        BYTE_ORDER__htonl(sizeof(request->parameters.change_hw_infer_status_request.boundary_channel_mode));
+    request->parameters.change_hw_infer_status_request.boundary_channel_mode =
+        static_cast<uint8_t>(boundary_channel_mode);
 
     *request_size = local_request_size;
-    status = HAILO_COMMON_STATUS__SUCCESS;
-exit:
-    return status;
+    return HAILO_COMMON_STATUS__SUCCESS;
 }
 
 #endif /* FIRMWARE_ARCH */
diff --git a/hailort/libhailort/src/device_common/control_protocol.hpp b/hailort/libhailort/src/device_common/control_protocol.hpp
index 1c5bc8d..55f21c1 100644
--- a/hailort/libhailort/src/device_common/control_protocol.hpp
+++ b/hailort/libhailort/src/device_common/control_protocol.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -172,8 +172,9 @@ HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_overcurrent_state_request(CONTR
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_get_overcurrent_state_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_get_hw_consts_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence);
 HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_set_sleep_state_request(CONTROL_PROTOCOL__request_t *request, size_t *request_size, uint32_t sequence, uint8_t sleep_state);
-HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(CONTROL_PROTOCOL__request_t *request, 
-    size_t *request_size, uint32_t sequence, uint8_t hw_infer_state, uint8_t network_group_index, 
-    uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info);
+HAILO_COMMON_STATUS_t CONTROL_PROTOCOL__pack_change_hw_infer_status_request(CONTROL_PROTOCOL__request_t *request,
+    size_t *request_size, uint32_t sequence, uint8_t hw_infer_state, uint8_t network_group_index,
+    uint16_t dynamic_batch_size, uint16_t batch_count, CONTROL_PROTOCOL__hw_infer_channels_info_t *channels_info,
+    CONTROL_PROTOCOL__boundary_channel_mode_t boundary_channel_mode);
 
 #endif /* _CONTROL_PROTOCOL_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/device_common/control_soc.cpp b/hailort/libhailort/src/device_common/control_soc.cpp
index 2557e6b..293555c 100644
--- a/hailort/libhailort/src/device_common/control_soc.cpp
+++ b/hailort/libhailort/src/device_common/control_soc.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/device_common/control_soc.hpp b/hailort/libhailort/src/device_common/control_soc.hpp
index 24cf657..dfa9d28 100644
--- a/hailort/libhailort/src/device_common/control_soc.hpp
+++ b/hailort/libhailort/src/device_common/control_soc.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/device_common/d2h_event_queue.cpp b/hailort/libhailort/src/device_common/d2h_event_queue.cpp
index b7b57b4..46502d5 100644
--- a/hailort/libhailort/src/device_common/d2h_event_queue.cpp
+++ b/hailort/libhailort/src/device_common/d2h_event_queue.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/device_common/d2h_event_queue.hpp b/hailort/libhailort/src/device_common/d2h_event_queue.hpp
index df34b08..c9b82c8 100644
--- a/hailort/libhailort/src/device_common/d2h_event_queue.hpp
+++ b/hailort/libhailort/src/device_common/d2h_event_queue.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/device_common/d2h_events_parser.cpp b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
index 9023d03..add8e31 100644
--- a/hailort/libhailort/src/device_common/d2h_events_parser.cpp
+++ b/hailort/libhailort/src/device_common/d2h_events_parser.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -220,16 +220,15 @@ static HAILO_COMMON_STATUS_t D2H_EVENTS__parse_health_monitor_closed_streams_not
     }
 
     if(d2h_notification_message->header.payload_length != sizeof(d2h_notification_message->message_parameters.health_monitor_closed_streams_event)) {
-        LOGGER__ERROR("d2h notification invalid payload_length: {}", d2h_notification_message->header.payload_length);
+        LOGGER__ERROR("d2h notification invalid payload_length: {} vs {}", d2h_notification_message->header.payload_length,
+			 sizeof(d2h_notification_message->message_parameters.health_monitor_closed_streams_event));
         status = HAILO_STATUS__D2H_EVENTS__INCORRECT_PARAMETER_LENGTH;
         goto l_exit;
     }
 
-    LOGGER__CRITICAL("Got health monitor closed streams notification. temperature: TS00={} c, TS01={} c, inputs bitfield:{:x}, outputs bitfield:{:x}", 
+    LOGGER__CRITICAL("Got health monitor closed streams notification. temperature: TS00={} c, TS01={} c",
         d2h_notification_message->message_parameters.health_monitor_closed_streams_event.ts0_temperature,
-        d2h_notification_message->message_parameters.health_monitor_closed_streams_event.ts1_temperature,
-        d2h_notification_message->message_parameters.health_monitor_closed_streams_event.closed_input_streams,
-        d2h_notification_message->message_parameters.health_monitor_closed_streams_event.closed_output_streams);
+        d2h_notification_message->message_parameters.health_monitor_closed_streams_event.ts1_temperature);
 
     status = HAILO_COMMON_STATUS__SUCCESS;
 
diff --git a/hailort/libhailort/src/device_common/device.cpp b/hailort/libhailort/src/device_common/device.cpp
index c0e3022..ac0944f 100644
--- a/hailort/libhailort/src/device_common/device.cpp
+++ b/hailort/libhailort/src/device_common/device.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -21,11 +21,13 @@
 #include "vdma/pcie/pcie_device.hpp"
 #include "vdma/integrated/integrated_device.hpp"
 #include "eth/eth_device.hpp"
+#include "utils/query_stats_utils.hpp"
 
 #include "byte_order.h"
 #include "firmware_header_utils.h"
 #include "control_protocol.h"
 #include <memory>
+#include <algorithm>
 #ifndef _MSC_VER
 #include <sys/utsname.h>
 #endif
@@ -370,6 +372,75 @@ Expected<hailo_chip_temperature_info_t> Device::get_chip_temperature()
     return res;
 }
 
+Expected<hailo_health_stats_t> Device::query_health_stats()
+{
+#ifndef __linux__
+    LOGGER__ERROR("Query health stats is supported only on Linux systems");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+#endif
+
+    TRY(auto device_arch, get_architecture());
+    if ((device_arch != HAILO_ARCH_HAILO15H) && (device_arch != HAILO_ARCH_HAILO15L) && (device_arch != HAILO_ARCH_HAILO15M) && (device_arch != HAILO_ARCH_HAILO10H)) {
+        LOGGER__ERROR("Query health stats is not supported for device arch {}", HailoRTCommon::get_device_arch_str(device_arch));
+        return make_unexpected(HAILO_NOT_SUPPORTED);
+    }
+
+    hailo_health_stats_t health_stats = {-1, -1, -1};
+    TRY(auto temp, get_chip_temperature());
+
+    health_stats.on_die_temperature = std::max(temp.ts0_temperature, temp.ts1_temperature);
+
+    // TODO (HRT-16224): add on_die_voltage and startup_bist_mask (currently APIs does not exist)
+
+    return health_stats;
+}
+
+Expected<hailo_performance_stats_t> Device::query_performance_stats()
+{
+#ifndef __linux__
+    LOGGER__ERROR("Query performance stats is supported only on Linux systems");
+    return make_unexpected(HAILO_NOT_SUPPORTED);
+#endif
+
+    TRY(auto device_arch, get_architecture());
+    if ((device_arch != HAILO_ARCH_HAILO15H) && (device_arch != HAILO_ARCH_HAILO15L) && (device_arch != HAILO_ARCH_HAILO15M) && (device_arch != HAILO_ARCH_HAILO10H)) {
+        LOGGER__ERROR("Query performance stats is not supported for device arch {}", HailoRTCommon::get_device_arch_str(device_arch));
+        return make_unexpected(HAILO_NOT_SUPPORTED);
+    }
+
+    hailo_performance_stats_t performance_stats = {-1, -1, -1, -1, -1, -1};
+
+    auto cpu_utilization = QueryStatsUtils::calculate_cpu_utilization();
+    if (HAILO_SUCCESS == cpu_utilization.status()) {
+        performance_stats.cpu_utilization = cpu_utilization.release();
+    }
+
+    auto ram_sizes =  QueryStatsUtils::calculate_ram_sizes();
+    if (HAILO_SUCCESS == ram_sizes.status()) {
+        performance_stats.ram_size_total = std::get<0>(ram_sizes.value());
+        performance_stats.ram_size_used = std::get<1>(ram_sizes.value());
+    }
+
+    auto dsp_utilization = QueryStatsUtils::get_dsp_utilization();
+    if (HAILO_SUCCESS == dsp_utilization.status()) {
+        performance_stats.dsp_utilization = dsp_utilization.release();
+    }
+
+    auto ddr_noc_utilization = QueryStatsUtils::get_ddr_noc_utilization();
+    if (HAILO_SUCCESS == ddr_noc_utilization.status()) {
+        performance_stats.ddr_noc_total_transactions = ddr_noc_utilization.release();
+    }
+
+    auto id_info_str = get_dev_id();
+    auto device_arch_str = HailoRTCommon::get_device_arch_str(device_arch);
+    auto nnc_utilization = QueryStatsUtils::get_nnc_utilization(id_info_str, device_arch_str);
+    if (HAILO_SUCCESS == nnc_utilization.status()) {
+        performance_stats.nnc_utilization = nnc_utilization.release();
+    }
+
+    return performance_stats;
+}
+
 hailo_status Device::test_chip_memories()
 {
     return Control::test_chip_memories(*this);
diff --git a/hailort/libhailort/src/device_common/device_internal.cpp b/hailort/libhailort/src/device_common/device_internal.cpp
index fea54ab..7f4918e 100644
--- a/hailort/libhailort/src/device_common/device_internal.cpp
+++ b/hailort/libhailort/src/device_common/device_internal.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -601,7 +601,7 @@ hailo_status DeviceBase::check_hef_is_compatible(Hef &hef)
 
         LOGGER__ERROR("HEF format is not compatible with device. Device arch: {}, HEF arch: {}",
             device_arch_str.c_str(), hef_arch_str.c_str());
-        return HAILO_INVALID_HEF;
+        return HAILO_HEF_NOT_COMPATIBLE_WITH_DEVICE;
     }
 
     // TODO: MSW-227 check clock rate for hailo15 as well.
@@ -758,6 +758,8 @@ hailo_device_architecture_t DeviceBase::hef_arch_to_device_arch(HEFHwArch hef_ar
         return HAILO_ARCH_HAILO15M;
     case HEFHwArch::HW_ARCH__HAILO10H:
         return HAILO_ARCH_HAILO10H;
+    case HEFHwArch::HW_ARCH__MARS:
+        return HAILO_ARCH_MARS;
 
     default:
         return HAILO_ARCH_MAX_ENUM;
diff --git a/hailort/libhailort/src/device_common/device_internal.hpp b/hailort/libhailort/src/device_common/device_internal.hpp
index af464bc..268b210 100644
--- a/hailort/libhailort/src/device_common/device_internal.hpp
+++ b/hailort/libhailort/src/device_common/device_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -59,6 +59,7 @@ enum class HEFHwArch // Must be aligned to ProtoHEFHwArch
     HW_ARCH__GINGER = 104,
     HW_ARCH__LAVENDER = 105,
     HW_ARCH__PLUTO = 106,
+    HW_ARCH__MARS = 108,
 };
 
 class DeviceBase : public Device
@@ -108,6 +109,21 @@ public:
         return Expected<hailo_device_architecture_t>(m_device_architecture);
     }
 
+    virtual hailo_status before_fork() override
+    {
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status after_fork_in_parent() override
+    {
+        return HAILO_SUCCESS;
+    }
+
+    virtual hailo_status after_fork_in_child() override
+    {
+        return HAILO_SUCCESS;
+    }
+
 protected:
     struct NotificationThreadSharedParams {
         NotificationThreadSharedParams() : is_running(false) {}
diff --git a/hailort/libhailort/src/eth/eth_device.cpp b/hailort/libhailort/src/eth/eth_device.cpp
index f439df5..4e07b66 100644
--- a/hailort/libhailort/src/eth/eth_device.cpp
+++ b/hailort/libhailort/src/eth/eth_device.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/eth_device.hpp b/hailort/libhailort/src/eth/eth_device.hpp
index 6865a19..f6f156d 100644
--- a/hailort/libhailort/src/eth/eth_device.hpp
+++ b/hailort/libhailort/src/eth/eth_device.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/eth_stream.cpp b/hailort/libhailort/src/eth/eth_stream.cpp
index 84f24cb..88a8a49 100644
--- a/hailort/libhailort/src/eth/eth_stream.cpp
+++ b/hailort/libhailort/src/eth/eth_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/eth_stream.hpp b/hailort/libhailort/src/eth/eth_stream.hpp
index 1896e23..67d08ec 100644
--- a/hailort/libhailort/src/eth/eth_stream.hpp
+++ b/hailort/libhailort/src/eth/eth_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.cpp b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
index ead7fea..0e3910b 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.cpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -104,10 +104,9 @@ Expected<uint32_t> HcpConfigCoreOp::get_cache_entry_size(uint32_t cache_id) cons
     return make_unexpected(HAILO_INVALID_OPERATION);
 }
 
-hailo_status HcpConfigCoreOp::init_cache(uint32_t read_offset, int32_t write_offset_delta)
+hailo_status HcpConfigCoreOp::init_cache(uint32_t read_offset)
 {
     (void) read_offset;
-    (void) write_offset_delta;
     LOGGER__ERROR("init_cache function is not supported on ETH core-ops");
     return HAILO_INVALID_OPERATION;
 }
diff --git a/hailort/libhailort/src/eth/hcp_config_core_op.hpp b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
index a76b4d5..95d60ba 100644
--- a/hailort/libhailort/src/eth/hcp_config_core_op.hpp
+++ b/hailort/libhailort/src/eth/hcp_config_core_op.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -58,7 +58,7 @@ public:
     virtual Expected<uint32_t> get_cache_read_length() const override;
     virtual Expected<uint32_t> get_cache_write_length() const override;
     virtual Expected<uint32_t> get_cache_entry_size(uint32_t cache_id) const override;
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual hailo_status init_cache(uint32_t read_offset) override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
diff --git a/hailort/libhailort/src/eth/network_rate_calculator.cpp b/hailort/libhailort/src/eth/network_rate_calculator.cpp
index 005ddab..42d901a 100644
--- a/hailort/libhailort/src/eth/network_rate_calculator.cpp
+++ b/hailort/libhailort/src/eth/network_rate_calculator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/udp.cpp b/hailort/libhailort/src/eth/udp.cpp
index e464e9b..5db69f7 100644
--- a/hailort/libhailort/src/eth/udp.cpp
+++ b/hailort/libhailort/src/eth/udp.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/eth/udp.hpp b/hailort/libhailort/src/eth/udp.hpp
index 8c7533d..e299bdd 100644
--- a/hailort/libhailort/src/eth/udp.hpp
+++ b/hailort/libhailort/src/eth/udp.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/genai/CMakeLists.txt b/hailort/libhailort/src/genai/CMakeLists.txt
index 5d206dd..eb726ec 100644
--- a/hailort/libhailort/src/genai/CMakeLists.txt
+++ b/hailort/libhailort/src/genai/CMakeLists.txt
@@ -1,8 +1,12 @@
 cmake_minimum_required(VERSION 3.5.0)
 
 set(SRC_FILES
-    ${CMAKE_CURRENT_SOURCE_DIR}/llm/llm.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/vdevice_genai.cpp
+
+    ${HAILORT_COMMON_DIR}/common/genai/serializer/serializer.cpp
+
+    ${CMAKE_CURRENT_SOURCE_DIR}/llm/llm.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/text2image/text2image.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
\ No newline at end of file
diff --git a/hailort/libhailort/src/genai/llm/llm.cpp b/hailort/libhailort/src/genai/llm/llm.cpp
index 0c94179..69e28db 100644
--- a/hailort/libhailort/src/genai/llm/llm.cpp
+++ b/hailort/libhailort/src/genai/llm/llm.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -15,7 +15,8 @@
 #include "common/filesystem.hpp"
 #include "common/utils.hpp"
 #include "common/file_utils.hpp"
-#include "common/utils.hpp"
+
+#include "common/genai/serializer/serializer.hpp"
 
 #include <numeric>
 
@@ -26,68 +27,46 @@ namespace genai
 
 constexpr std::chrono::milliseconds LLMGeneratorCompletion::DEFAULT_READ_TIMEOUT;
 const uint16_t DEFAULT_LLM_CONNECTION_PORT = 12145;
-const std::string FILE_NOT_FOUND = "<file_not_found>";
+
+static const auto LONG_TIMEOUT = std::chrono::seconds(45);
 
 // TODO (HRT-15334): Move the logic to server side
 const std::string EOF_TOEKN = "<|endoftext|>";
-const std::string IM_END_TOEKN = "<|im_end|>";
-
-// TODO (HRT-15334): - adjusting all ack's once server is written in cpp
-const size_t SERVER_ACK_SIZE = 32;
 
 hailo_status LLMParams::set_model(const std::string &hef_path, const std::string &lora_name)
 {
-    auto status = set_hef(hef_path);
-    CHECK_SUCCESS(status);
-
+    m_hef_path = hef_path;
     m_lora = lora_name;
-    CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+
+    if (BUILTIN != hef_path) {
+        CHECK((Filesystem::does_file_exists(hef_path)), HAILO_OPEN_FILE_FAILURE,
+            "Hef file '{}' does not exist", hef_path);
+        // LoRA is supported only when working with BUILTIN HEF
+        CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+    } else {
+        // When using BUILTIN HEF, LoRA must be set
+        CHECK(!(lora_name.empty()), HAILO_INVALID_OPERATION,
+            "When using '{}' model, LoRA name must be set.", BUILTIN);
+    }
 
     return HAILO_SUCCESS;
 }
 
-hailo_status LLMParams::set_hef(const std::string &path)
-{
-    m_hef_path = path;
-
-    CHECK(((BUILTIN == path) || Filesystem::does_file_exists(path)), HAILO_OPEN_FILE_FAILURE,
-        "Hef file {} does not exist", path);
-
-    return HAILO_SUCCESS;
-}
-
-std::string LLMParams::hef() const
+const std::string& LLMParams::hef() const
 {
     return m_hef_path;
 }
 
-std::string LLMParams::lora() const
+const std::string& LLMParams::lora() const
 {
     return m_lora;
 }
 
-hailo_status LLMParams::set_vocabulary(const std::string &vocabulary_path)
-{
-    m_vocabulary_path = vocabulary_path;
-
-    CHECK((BUILTIN == vocabulary_path) || Filesystem::does_file_exists(vocabulary_path), HAILO_OPEN_FILE_FAILURE,
-        "vocabulary file {} does not exist", vocabulary_path);
-
-    return HAILO_SUCCESS;
-}
-
-std::string LLMParams::vocabulary() const
-{
-    return m_vocabulary_path;
-}
-
 hailo_status LLMGeneratorParams::set_temperature(float32_t temperature)
 {
     m_temperature = temperature;
 
-    // TODO (HRT-15334): Implement when server is in C++
-    LOGGER__ERROR("`set_temperature` function is not supported yet");
-    return HAILO_NOT_IMPLEMENTED;
+    return HAILO_SUCCESS;
 }
 
 float32_t LLMGeneratorParams::temperature() const
@@ -99,9 +78,7 @@ hailo_status LLMGeneratorParams::set_top_p(float32_t top_p)
 {
     m_top_p = top_p;
 
-    // TODO (HRT-15334): Implement when server is in C++
-    LOGGER__ERROR("`set_top_p` function is not supported yet");
-    return HAILO_NOT_IMPLEMENTED;
+    return HAILO_SUCCESS;
 }
 
 float32_t LLMGeneratorParams::top_p() const
@@ -109,6 +86,66 @@ float32_t LLMGeneratorParams::top_p() const
     return m_top_p;
 }
 
+hailo_status LLMGeneratorParams::set_top_k(uint32_t top_k)
+{
+    m_top_k = top_k;
+
+    return HAILO_SUCCESS;
+}
+
+uint32_t LLMGeneratorParams::top_k() const
+{
+    return m_top_k;
+}
+
+hailo_status LLMGeneratorParams::set_frequency_penalty(float32_t frequency_penalty)
+{
+    m_frequency_penalty = frequency_penalty;
+
+    return HAILO_SUCCESS;
+}
+
+float32_t LLMGeneratorParams::frequency_penalty() const
+{
+    return m_frequency_penalty;
+}
+
+hailo_status LLMGeneratorParams::set_max_generated_tokens(uint32_t max_generated_tokens)
+{
+    m_max_generated_tokens = max_generated_tokens;
+
+    return HAILO_SUCCESS;
+}
+
+uint32_t LLMGeneratorParams::max_generated_tokens() const
+{
+    return m_max_generated_tokens;
+}
+
+hailo_status LLMGeneratorParams::set_do_sample(bool do_sample)
+{
+    m_do_sample = do_sample;
+
+    return HAILO_SUCCESS;
+}
+
+bool LLMGeneratorParams::do_sample() const
+{
+    return m_do_sample;
+}
+
+hailo_status LLMGeneratorParams::set_seed(uint32_t seed)
+{
+    m_seed = seed;
+
+    return HAILO_SUCCESS;
+}
+
+uint32_t LLMGeneratorParams::seed() const
+{
+    return m_seed;
+}
+
 Expected<LLM> LLM::create(std::shared_ptr<VDeviceGenAI> vdevice, const LLMParams &llm_params)
 {
     TRY(auto pimpl, Impl::create_unique(vdevice, llm_params));
@@ -117,32 +154,44 @@ Expected<LLM> LLM::create(std::shared_ptr<VDeviceGenAI> vdevice, const LLMParams
 
 Expected<std::unique_ptr<LLM::Impl>> LLM::Impl::create_unique(std::shared_ptr<VDeviceGenAI> vdevice, const LLMParams &llm_params)
 {
-    CHECK(llm_params.lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create LLM. Setting LoRA is not Implemented.");
     CHECK(!llm_params.hef().empty(), HAILO_INVALID_OPERATION, "Failed to create LLM. HEF was not set.");
-    CHECK(!llm_params.vocabulary().empty(), HAILO_INVALID_OPERATION, "Failed to create LLM. Vocabulary was not set.");
+
+    // LoRA is supported only when working with BUILTIN HEF
+    if (BUILTIN != llm_params.hef()) {
+        CHECK(llm_params.lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create LLM. Setting LoRA is not Implemented.");
+    } else {
+        // When using BUILTIN HEF, LoRA must be set
+        CHECK(!llm_params.lora().empty(), HAILO_INVALID_OPERATION,
+            "Failed to create LLM. When using '{}' model, LoRA name must be set.", BUILTIN);
+    }
 
     TRY(auto session, vdevice->create_session(DEFAULT_LLM_CONNECTION_PORT));
 
-    auto status = send_data_file(session, llm_params.hef());
-    CHECK_SUCCESS(status, "Failed to load LLM hef");
+    auto vdevice_params = vdevice->get_params();
+    TRY(auto create_llm_request, LLMCreateSerializer::serialize_request(vdevice_params, llm_params));
+    CHECK_SUCCESS(session->write(MemoryView(create_llm_request)), "Failed to load LLM hef");
+    // If HEF is not builtin, write it to the server
+    if (BUILTIN != llm_params.hef()) {
+        TRY(auto file_data, read_binary_file(llm_params.hef(), BufferStorageParams::create_dma()));
+        CHECK_SUCCESS(session->write(MemoryView(file_data)));
+    }
+    TRY(auto create_llm_reply, session->read(LONG_TIMEOUT)); // TODO (HRT-16302): Reduce timeout once configure is faster
+    CHECK_SUCCESS(LLMCreateSerializer::deserialize_reply(MemoryView(*create_llm_reply)), "Failed to create LLM");
 
-    status = send_data_file(session, llm_params.vocabulary());
-    CHECK_SUCCESS(status, "Failed to load LLM vocabulary");
+    TRY(auto get_generator_default_params_request, LLMGetDefaultGeneratorParamsSerializer::serialize_request());
+    CHECK_SUCCESS(session->write(MemoryView(get_generator_default_params_request)), "Failed to get default generator params");
+    TRY(auto get_generator_default_params, session->read(LONG_TIMEOUT)); // TODO (HRT-16302): Reduce timeout once configure is faster
+    TRY(auto default_generator_params, LLMGetDefaultGeneratorParamsSerializer::deserialize_reply(MemoryView(*get_generator_default_params)));
 
-    // Ack from server - finished hef configure
-    uint8_t server_ack[SERVER_ACK_SIZE] = {};
-    TRY(auto size, session->read(server_ack, SERVER_ACK_SIZE));
-    std::string config_finished_ack = (0 == size)? "" : std::string(reinterpret_cast<const char*>(server_ack), size);
-    LOGGER__INFO("Got ack from server: {}", config_finished_ack);
-
-    auto llm = Impl(session, llm_params);
+    auto llm = Impl(session, llm_params, default_generator_params);
     auto llm_ptr = std::make_unique<Impl>(std::move(llm));
     CHECK_NOT_NULL_AS_EXPECTED(llm_ptr, HAILO_OUT_OF_HOST_MEMORY);
     return llm_ptr;
 }
 
-LLM::Impl::Impl(std::shared_ptr<GenAISession> session, const LLMParams &llm_params) :
-    m_session(session), m_llm_params(llm_params)
+LLM::Impl::Impl(std::shared_ptr<GenAISession> session, const LLMParams &llm_params,
+    const LLMGeneratorParams &default_generator_params) :
+        m_session(session), m_llm_params(llm_params), m_default_generator_params(default_generator_params)
 {}
 
 LLM::LLM(std::unique_ptr<Impl> pimpl) :
@@ -154,21 +203,50 @@ Expected<LLMGenerator> LLM::create_generator(const LLMGeneratorParams &params)
     return m_pimpl->create_generator(params);
 }
 
+Expected<LLMGenerator> LLM::create_generator()
+{
+    TRY(auto generator_params, create_generator_params());
+    return m_pimpl->create_generator(generator_params);
+}
+
+Expected<LLMGeneratorParams> LLM::create_generator_params()
+{
+    return m_pimpl->create_generator_params();
+}
+
+Expected<LLMGeneratorParams> LLM::Impl::create_generator_params()
+{
+    auto generator_params = m_default_generator_params;
+
+    return generator_params;
+}
+
 Expected<LLMGenerator> LLM::Impl::create_generator(const LLMGeneratorParams &params)
 {
-    CHECK_SUCCESS(LLMGenerator::Impl::validate_params(params));
+    CHECK_SUCCESS(validate_generator_params(params));
+
+    TRY(auto create_generator_request, LLMGeneratorCreateSerializer::serialize_request(params));
+    CHECK_SUCCESS(m_session->write(MemoryView(create_generator_request)), "Failed to create LLM generator");
+    TRY(auto create_generator_reply, m_session->read());
+    CHECK_SUCCESS(LLMGeneratorCreateSerializer::deserialize_reply(MemoryView(*create_generator_reply)), "Failed to create LLM generator");
+
     auto pimpl = std::make_unique<LLMGenerator::Impl>(m_session);
     CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
     return LLMGenerator(std::move(pimpl));
 }
 
-hailo_status LLMGenerator::Impl::validate_params(const LLMGeneratorParams &params)
+hailo_status LLM::Impl::validate_generator_params(const LLMGeneratorParams &params)
 {
-    CHECK_AS_EXPECTED(0 == params.temperature(), HAILO_NOT_IMPLEMENTED,
-        "Setting generator's temperature is not implemented.");
-    CHECK_AS_EXPECTED(0 == params.top_p(), HAILO_NOT_IMPLEMENTED,
-        "Setting generator's temperature is not implemented.");
-
+    CHECK_AS_EXPECTED(0 < params.temperature(), HAILO_INVALID_ARGUMENT,
+        "Temperature should be higher than '0'. received: '{}'", params.temperature());
+    CHECK_AS_EXPECTED((0 <= params.top_p()) && (params.top_p() <= 1), HAILO_INVALID_ARGUMENT,
+        "top_p should be in range [0, 1]. received: '{}'", params.top_p());
+    CHECK_AS_EXPECTED(0 < params.top_k(), HAILO_INVALID_ARGUMENT,
+        "top_k should be greater than or equal to '1'. received: '{}'", params.top_k());
+    CHECK_AS_EXPECTED(0 != params.frequency_penalty(), HAILO_INVALID_ARGUMENT,
+        "frequency_penalty must be a nonzero value. received: '{}'", params.frequency_penalty());
+    CHECK_AS_EXPECTED(2 <= params.max_generated_tokens(), HAILO_INVALID_ARGUMENT,
+        "max_generated_tokens should be greater than '1'. received: '{}'", params.max_generated_tokens());
     return HAILO_SUCCESS;
 }
 
@@ -222,8 +300,16 @@ Expected<LLMGeneratorCompletion> LLMGenerator::Impl::generate()
     auto prompt = concat_prompts(m_prompts);
     CHECK_AS_EXPECTED(!prompt.empty(), HAILO_INVALID_OPERATION, "Generate on empty prompt is invalid");
 
-    auto status = m_session->write(reinterpret_cast<const uint8_t*>(prompt.c_str()), prompt.size());
-    CHECK_SUCCESS(status);
+    TRY(auto generator_write_request, LLMGeneratorWriteSerializer::serialize_request());
+    CHECK_SUCCESS(m_session->write(MemoryView(generator_write_request)), "Failed to write prompt");
+    CHECK_SUCCESS(m_session->write(MemoryView(prompt)), "Failed to write prompt");
+    TRY(auto generator_write_reply, m_session->read());
+    CHECK_SUCCESS(LLMGeneratorWriteSerializer::deserialize_reply(MemoryView(*generator_write_reply)), "Failed to write prompt");
+
+    TRY(auto generator_generate_request, LLMGeneratorGenerateSerializer::serialize_request());
+    CHECK_SUCCESS(m_session->write(MemoryView(generator_generate_request)), "Failed to generate");
+    TRY(auto generator_generate_reply, m_session->read());
+    CHECK_SUCCESS(LLMGeneratorGenerateSerializer::deserialize_reply(MemoryView(*generator_generate_reply)), "Failed to generate");
 
     auto pimpl = std::make_unique<LLMGeneratorCompletion::Impl>(m_session);
     CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
@@ -237,7 +323,7 @@ LLMGeneratorCompletion::LLMGeneratorCompletion(std::unique_ptr<Impl> pimpl) :
 LLMGeneratorCompletion::Impl::Impl(std::shared_ptr<GenAISession> session) :
     m_session(session),
     m_mutex(),
-    m_end_of_generation(false)
+    m_generation_status(Status::GENERATING)
 {}
 
 Expected<size_t> LLMGeneratorCompletion::read(char *output, size_t output_size, std::chrono::milliseconds timeout)
@@ -247,25 +333,11 @@ Expected<size_t> LLMGeneratorCompletion::read(char *output, size_t output_size,
 
 Expected<size_t> LLMGeneratorCompletion::Impl::read(char *output, size_t output_size, std::chrono::milliseconds timeout)
 {
-    auto start_time = std::chrono::steady_clock::now();
-    CHECK(!m_end_of_generation, HAILO_INVALID_OPERATION, "read() cannot be called after generation completed!");
-    TRY(auto bytes_read, m_session->read(reinterpret_cast<uint8_t*>(output), output_size, timeout));
+    TRY(auto str, read(timeout));
+    CHECK(output_size > str.size(), HAILO_INSUFFICIENT_BUFFER, "Output buffer is too small. received token: '{}' is too large.", str);
+    std::strncpy(output, str.c_str(), str.size());
 
-    // TODO (HRT-15334): Move logic to server
-    // TODO: if IM_END_TOEKN is splitted acroos multiple reads we wont know it
-    if ((bytes_read == IM_END_TOEKN.size()) && (0 == memcmp(output, IM_END_TOEKN.c_str(), IM_END_TOEKN.size()))) {
-        std::vector<uint8_t> eof_token(EOF_TOEKN.size());
-        auto elapsed_time =
-            std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start_time);
-        TRY(auto res_eof_size, m_session->read(eof_token.data(), eof_token.size(), (timeout - elapsed_time)));
-        CHECK((res_eof_size == EOF_TOEKN.size() && (0 == memcmp(eof_token.data(), EOF_TOEKN.c_str(), EOF_TOEKN.size()))),
-            HAILO_INTERNAL_FAILURE, "EOF token {} should come after IM_END token {}", EOF_TOEKN, IM_END_TOEKN);
-
-        m_end_of_generation = true;
-        return 0;
-    }
-
-    return bytes_read;
+    return str.size();
 }
 
 Expected<std::string> LLMGeneratorCompletion::read(std::chrono::milliseconds timeout)
@@ -275,47 +347,28 @@ Expected<std::string> LLMGeneratorCompletion::read(std::chrono::milliseconds tim
 
 Expected<std::string> LLMGeneratorCompletion::Impl::read(std::chrono::milliseconds timeout)
 {
-    const size_t READ_CHUNK_MAX_SIZE = 1024; // High number to make sure EOF_TOEKNis not splitted across multiple reads
-    char res[READ_CHUNK_MAX_SIZE] = {};
+    TimeoutGuard timeout_guard(timeout);
+    CHECK((m_generation_status == Status::GENERATING), HAILO_INVALID_OPERATION,
+        "read() cannot be called after generation completed!");
 
-    TRY(auto size, read(res, READ_CHUNK_MAX_SIZE, timeout));
-    if (0 == size) {
-        return std::string("");
-    }
-    return std::string(res, size);
+    TRY(auto read_request, LLMGeneratorReadSerializer::serialize_request());
+    CHECK_SUCCESS(m_session->write(MemoryView(read_request)), "Failed to read");
+    TRY(auto read_reply, m_session->read());
+    TRY(auto pair, LLMGeneratorReadSerializer::deserialize_reply(MemoryView(*read_reply)));
+    auto next_token = pair.first;
+    m_generation_status = pair.second;
+
+    return next_token;
 }
 
-bool LLMGeneratorCompletion::end_of_generation() const
+LLMGeneratorCompletion::Status LLMGeneratorCompletion::generation_status() const
 {
-    return m_pimpl->end_of_generation();
+    return m_pimpl->generation_status();
 }
 
-bool LLMGeneratorCompletion::Impl::end_of_generation() const
+LLMGeneratorCompletion::Status LLMGeneratorCompletion::Impl::generation_status() const
 {
-    return m_end_of_generation;
-}
-
-hailo_status LLM::Impl::send_data_file(std::shared_ptr<GenAISession> session, const std::string &path)
-{
-    if ((BUILTIN == path)) {
-        // Write the `BUILTIN` indicator
-        auto status = session->write(reinterpret_cast<const uint8_t*>(path.c_str()), path.size());
-        CHECK_SUCCESS(status);
-    } else {
-        // Send file bytes
-        TRY(auto file_data, read_binary_file(path, BufferStorageParams::create_dma()));
-        auto status = session->write(file_data.data(), file_data.size());
-        CHECK_SUCCESS(status);
-    }
-
-    // TODO (HRT-15334): - adjusting all ack's once server is written in cpp
-    uint8_t server_ack[SERVER_ACK_SIZE] = {}; // Ack is "HEF Config done, ack returned")
-    TRY(auto size, session->read(server_ack, SERVER_ACK_SIZE));
-    std::string output = (0 == size) ? "" : std::string(reinterpret_cast<const char*>(server_ack), size);
-    CHECK(output != FILE_NOT_FOUND, HAILO_NOT_FOUND, "Builtin file does not exist");
-    LOGGER__INFO("Sent {}, Got ack: {}", path, output);
-
-    return HAILO_SUCCESS;
+    return m_generation_status;
 }
 
 // https://stackoverflow.com/questions/71104545/constructor-and-destructor-in-c-when-using-the-pimpl-idiom
@@ -324,7 +377,7 @@ LLM::~LLM() = default;
 LLM::LLM(LLM &&) = default;
 
 LLMGenerator::~LLMGenerator() = default;
-LLMGenerator::LLMGenerator(LLMGenerator &&other) = default;
+LLMGenerator::LLMGenerator(LLMGenerator &&) = default;
 
 LLMGeneratorCompletion::~LLMGeneratorCompletion() = default;
 LLMGeneratorCompletion::LLMGeneratorCompletion(LLMGeneratorCompletion &&) = default;
diff --git a/hailort/libhailort/src/genai/llm/llm_internal.hpp b/hailort/libhailort/src/genai/llm/llm_internal.hpp
index 98f4f1b..c0be5c6 100644
--- a/hailort/libhailort/src/genai/llm/llm_internal.hpp
+++ b/hailort/libhailort/src/genai/llm/llm_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -30,12 +30,12 @@ public:
 
     Expected<size_t> read(char *output, size_t output_size, std::chrono::milliseconds timeout);
     Expected<std::string> read(std::chrono::milliseconds timeout);
-    bool end_of_generation() const;
+    Status generation_status() const;
 
 private:
     std::shared_ptr<GenAISession> m_session;
     std::mutex m_mutex;
-    bool m_end_of_generation;
+    Status m_generation_status;
 };
 
 
@@ -47,8 +47,6 @@ public:
     hailo_status write(const std::string &prompt);
     Expected<LLMGeneratorCompletion> generate();
 
-    static hailo_status validate_params(const LLMGeneratorParams &params);
-
 private:
     std::shared_ptr<GenAISession> m_session;
     std::vector<std::string> m_prompts;
@@ -64,14 +62,16 @@ public:
     static Expected<std::unique_ptr<Impl>> create_unique(std::shared_ptr<VDevice> vdevice, const LLMParams &llm_params);
 
     Expected<LLMGenerator> create_generator(const LLMGeneratorParams &params);
+    Expected<LLMGeneratorParams> create_generator_params();
 
 private:
-    Impl(std::shared_ptr<GenAISession> session, const LLMParams &llm_params);
-
-    static hailo_status send_data_file(std::shared_ptr<GenAISession> session, const std::string &path);
+    Impl(std::shared_ptr<GenAISession> session, const LLMParams &llm_params,
+        const LLMGeneratorParams &default_generator_params);
+    hailo_status validate_generator_params(const LLMGeneratorParams &params);
 
     std::shared_ptr<GenAISession> m_session;
     LLMParams m_llm_params;
+    LLMGeneratorParams m_default_generator_params;
 };
 
 } /* namespace genai */
diff --git a/hailort/libhailort/src/genai/text2image/text2image.cpp b/hailort/libhailort/src/genai/text2image/text2image.cpp
new file mode 100644
index 0000000..6ef619a
--- /dev/null
+++ b/hailort/libhailort/src/genai/text2image/text2image.cpp
@@ -0,0 +1,619 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file text2image.cpp
+ * @brief Text2Image Implementation
+ **/
+
+#include "genai/text2image/text2image_internal.hpp"
+#include "hailo/genai/common.hpp"
+#include "hailo/hef.hpp"
+#include "hailo/hailort_common.hpp"
+#include "common/utils.hpp"
+#include "common/filesystem.hpp"
+
+
+namespace hailort
+{
+namespace genai
+{
+
+/*! Indicates if the action of the current `write`, was successful on server side */
+const std::string SERVER_SUCCESS_ACK = "<success>";
+constexpr std::chrono::milliseconds Text2ImageGenerator::DEFAULT_OPERATION_TIMEOUT;
+constexpr uint16_t DEFAULT_TEXT2IMAGE_CONNECTION_PORT = 12144;
+
+constexpr uint32_t TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE = 1;
+constexpr uint32_t TEXT2IMAGE_STEPS_COUNT_DEFAULT_VALUE = 20;
+constexpr float32_t TEXT2IMAGE_GUIDANCE_SCALE_DEFAULT_VALUE = 7.5f;
+constexpr uint32_t TEXT2IMAGE_SEED_DEFAULT_VALUE = 0;
+
+hailo_status write_and_validate_ack(std::shared_ptr<GenAISession> session, uint8_t *buffer,
+    size_t size, std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+{
+    TimeoutGuard timeout_guard(timeout);
+    auto status = session->write(MemoryView(buffer, size), timeout_guard.get_remaining_timeout());
+    CHECK_SUCCESS(status);
+
+    TRY(auto ack, session->get_ack(timeout_guard.get_remaining_timeout()));
+    CHECK((ack.find(SERVER_SUCCESS_ACK) != std::string::npos), HAILO_INTERNAL_FAILURE, "Transfer failed, got error: {}", ack);
+    LOGGER__INFO("Received ack from server - '{}'", ack);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status write_and_validate_ack(std::shared_ptr<GenAISession> session, MemoryView buffer,
+    std::chrono::milliseconds timeout = Session::DEFAULT_READ_TIMEOUT)
+{
+    return write_and_validate_ack(session, buffer.data(), buffer.size(), timeout);
+}
+
+hailo_status validate_hef_path(const std::string &hef_path)
+{
+    CHECK(((BUILTIN == hef_path) || Filesystem::does_file_exists(hef_path)), HAILO_OPEN_FILE_FAILURE,
+        "Hef file {} does not exist", hef_path);
+
+    return HAILO_SUCCESS;
+}
+
+Text2ImageParams::Text2ImageParams() :
+    m_denoise_hef(""),
+    m_denoise_lora(""),
+    m_text_encoder_hef(""),
+    m_text_encoder_lora(""),
+    m_image_decoder_hef(""),
+    m_image_decoder_lora(""),
+    m_ip_adapter_hef(""),
+    m_ip_adapter_lora(""),
+    m_scheduler_type(HailoDiffuserSchedulerType::EULER)
+{}
+
+hailo_status Text2ImageParams::set_denoise_model(const std::string &hef_path, const std::string &lora_name)
+{
+    m_denoise_hef = hef_path;
+    m_denoise_lora = lora_name;
+
+    auto status = validate_hef_path(hef_path);
+    CHECK_SUCCESS(status);
+    CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2ImageParams::set_text_encoder_model(const std::string &hef_path, const std::string &lora_name)
+{
+    m_text_encoder_hef = hef_path;
+    m_text_encoder_lora = lora_name;
+
+    auto status = validate_hef_path(hef_path);
+    CHECK_SUCCESS(status);
+    CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2ImageParams::set_image_decoder_model(const std::string &hef_path, const std::string &lora_name)
+{
+    m_image_decoder_hef = hef_path;
+    m_image_decoder_lora = lora_name;
+
+    auto status = validate_hef_path(hef_path);
+    CHECK_SUCCESS(status);
+    CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2ImageParams::set_ip_adapter_model(const std::string &hef_path, const std::string &lora_name)
+{
+    m_ip_adapter_hef = hef_path;
+    m_ip_adapter_lora = lora_name;
+
+    auto status = validate_hef_path(hef_path);
+    CHECK_SUCCESS(status);
+    CHECK(lora_name.empty(), HAILO_NOT_IMPLEMENTED, "Setting LoRA is not implemented.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2ImageParams::set_scheduler(HailoDiffuserSchedulerType scheduler_type)
+{
+    m_scheduler_type = scheduler_type;
+    return HAILO_SUCCESS;
+}
+
+const std::string& Text2ImageParams::denoise_hef() const
+{
+    return m_denoise_hef;
+}
+
+const std::string& Text2ImageParams::denoise_lora() const
+{
+    return m_denoise_lora;
+}
+
+const std::string& Text2ImageParams::text_encoder_hef() const
+{
+    return m_text_encoder_hef;
+}
+
+const std::string& Text2ImageParams::text_encoder_lora() const
+{
+    return m_text_encoder_lora;
+}
+
+const std::string& Text2ImageParams::image_decoder_hef() const
+{
+    return m_image_decoder_hef;
+}
+
+const std::string& Text2ImageParams::image_decoder_lora() const
+{
+    return m_image_decoder_lora;
+}
+
+const std::string& Text2ImageParams::ip_adapter_hef() const
+{
+    return m_ip_adapter_hef;
+}
+
+const std::string& Text2ImageParams::ip_adapter_lora() const
+{
+    return m_ip_adapter_lora;
+}
+
+HailoDiffuserSchedulerType Text2ImageParams::scheduler() const
+{
+    return m_scheduler_type;
+}
+
+hailo_status Text2ImageGeneratorParams::set_samples_count(uint32_t samples_count)
+{
+    m_samples_count = samples_count;
+
+    LOGGER__ERROR("`set_samples_count` function is not supported yet");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+uint32_t Text2ImageGeneratorParams::samples_count() const
+{
+    return m_samples_count;
+}
+
+hailo_status Text2ImageGeneratorParams::set_steps_count(uint32_t steps_count)
+{
+    m_steps_count = steps_count;
+
+    LOGGER__ERROR("`set_steps_count` function is not supported yet");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+uint32_t Text2ImageGeneratorParams::steps_count() const
+{
+    return m_steps_count;
+}
+
+hailo_status Text2ImageGeneratorParams::set_guidance_scale(float32_t guidance_scale)
+{
+    m_guidance_scale = guidance_scale;
+
+    LOGGER__ERROR("`set_guidance_scale` function is not supported yet");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+float32_t Text2ImageGeneratorParams::guidance_scale() const
+{
+    return m_guidance_scale;
+}
+
+hailo_status Text2ImageGeneratorParams::set_seed(uint32_t seed)
+{
+    m_seed = seed;
+
+    return HAILO_SUCCESS;
+}
+
+uint32_t Text2ImageGeneratorParams::seed() const
+{
+    return m_seed;
+}
+
+Text2Image::Text2Image(std::unique_ptr<Text2Image::Impl> pimpl) :
+    m_pimpl(std::move(pimpl))
+{}
+
+Expected<Text2Image> Text2Image::create(std::shared_ptr<VDeviceGenAI> vdevice, const Text2ImageParams &params)
+{
+    TRY(auto pimpl, Text2Image::Impl::create_unique(vdevice, params));
+    return Text2Image(std::move(pimpl));
+}
+
+Expected<Text2ImageGenerator> Text2Image::create_generator(const Text2ImageGeneratorParams &params)
+{
+    return m_pimpl->create_generator(params);
+}
+
+Expected<Text2ImageGenerator> Text2Image::create_generator()
+{
+    TRY(auto generator_params, create_generator_params());
+    return m_pimpl->create_generator(generator_params);
+}
+
+Expected<Text2ImageGeneratorParams> Text2Image::create_generator_params()
+{
+    return m_pimpl->create_generator_params();
+}
+
+Expected<Text2ImageGeneratorParams> Text2Image::Impl::create_generator_params()
+{
+    // TODO: Use a getter from server
+    auto generator_params = hailort::genai::Text2ImageGeneratorParams();
+    generator_params.m_samples_count = TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE;
+    generator_params.m_steps_count = TEXT2IMAGE_STEPS_COUNT_DEFAULT_VALUE;
+    generator_params.m_guidance_scale = TEXT2IMAGE_GUIDANCE_SCALE_DEFAULT_VALUE;
+    generator_params.m_seed = TEXT2IMAGE_SEED_DEFAULT_VALUE;
+
+    return generator_params;
+}
+
+uint32_t Text2Image::output_sample_frame_size() const
+{
+    return m_pimpl->output_sample_frame_size();
+}
+
+hailo_3d_image_shape_t Text2Image::output_sample_shape() const
+{
+    return m_pimpl->output_sample_shape();
+}
+
+hailo_format_type_t Text2Image::output_sample_format_type() const
+{
+    return m_pimpl->output_sample_format_type();
+}
+
+hailo_format_order_t Text2Image::output_sample_format_order() const
+{
+    return m_pimpl->output_sample_format_order();
+}
+
+Expected<uint32_t> Text2Image::ip_adapter_frame_size() const
+{
+    return m_pimpl->ip_adapter_frame_size();
+}
+
+Expected<hailo_3d_image_shape_t> Text2Image::ip_adapter_shape() const
+{
+    return m_pimpl->ip_adapter_shape();
+}
+
+Expected<hailo_format_type_t> Text2Image::ip_adapter_format_type() const
+{
+    return m_pimpl->ip_adapter_format_type();
+}
+
+Expected<hailo_format_order_t> Text2Image::ip_adapter_format_order() const
+{
+    return m_pimpl->ip_adapter_format_order();
+}
+
+Text2ImageGenerator::Text2ImageGenerator(std::unique_ptr<Text2ImageGenerator::Impl> pimpl) :
+    m_pimpl(std::move(pimpl))
+{}
+
+Expected<std::vector<Buffer>> Text2ImageGenerator::generate(const std::string &positive_prompt,
+    const std::string &negative_prompt, std::chrono::milliseconds timeout)
+{
+    return m_pimpl->generate(positive_prompt, negative_prompt, timeout);
+}
+
+Expected<std::vector<Buffer>> Text2ImageGenerator::generate(const std::string &positive_prompt,
+    const std::string &negative_prompt, const MemoryView &ip_adapter, std::chrono::milliseconds timeout)
+{
+    return m_pimpl->generate(positive_prompt, negative_prompt, ip_adapter, timeout);
+}
+
+hailo_status Text2ImageGenerator::generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+    const std::string &negative_prompt, std::chrono::milliseconds timeout)
+{
+    return m_pimpl->generate(output_images, positive_prompt, negative_prompt, timeout);
+}
+
+hailo_status Text2ImageGenerator::generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+    const std::string &negative_prompt, const MemoryView &ip_adapter, std::chrono::milliseconds timeout)
+{
+    return m_pimpl->generate(output_images, positive_prompt, negative_prompt, ip_adapter, timeout);
+}
+
+hailo_status Text2ImageGenerator::stop()
+{
+    return m_pimpl->stop();
+}
+
+Text2Image::Impl::Impl(std::shared_ptr<GenAISession> session, const Text2ImageParams &params,
+    const frame_info_t &output_sample_frame_info, const bool is_ip_adapter_supported, const frame_info_t &ip_adapter_frame_info) :
+    m_session(session),
+    m_params(params),
+    m_output_sample_frame_info(output_sample_frame_info),
+    m_is_ip_adapter_supported(is_ip_adapter_supported),
+    m_ip_adapter_frame_info(ip_adapter_frame_info)
+{}
+
+hailo_status Text2Image::Impl::validate_params(const Text2ImageParams &params)
+{
+    CHECK(!params.denoise_hef().empty(), HAILO_INVALID_OPERATION, "Failed to create Text2Image model. `denoise_hef` was not set.");
+    CHECK(!params.text_encoder_hef().empty(), HAILO_INVALID_OPERATION, "Failed to create Text2Image model. `text_encoder_hef` was not set.");
+    CHECK(!params.image_decoder_hef().empty(), HAILO_INVALID_OPERATION, "Failed to create Text2Image model. `image_decoder_hef` was not set.");
+
+    // TODO: HRT-15973 - Remove after supporting no IP Adapter flow
+    CHECK(!params.ip_adapter_hef().empty(), HAILO_NOT_IMPLEMENTED,
+        "Failed to create Text2Image model. `ip_adapter_hef` was not set. Running without `ip_adapter` is not implemented yet.");
+
+    CHECK(params.denoise_lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create Text2Image model. Setting `denoise_lora` is not implemented yet.");
+    CHECK(params.text_encoder_lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create Text2Image model. Setting `text_encoder_lora` is not implemented yet.");
+    CHECK(params.image_decoder_lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create Text2Image model. Setting `image_decoder_lora` is not implemented yet.");
+    CHECK(params.ip_adapter_lora().empty(), HAILO_NOT_IMPLEMENTED, "Failed to create Text2Image model. Setting `ip_adapter_lora` is not implemented yet.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2Image::Impl::load_params(std::shared_ptr<GenAISession> session, const Text2ImageParams &params)
+{
+    auto status = session->send_file(params.denoise_hef());
+    CHECK_SUCCESS(status, "Failed to load Text2Image `denoise_hef`");
+
+    status = session->send_file(params.text_encoder_hef());
+    CHECK_SUCCESS(status, "Failed to load Text2Image `text_encoder_hef`");
+
+    status = session->send_file(params.image_decoder_hef());
+    CHECK_SUCCESS(status, "Failed to load Text2Image `image_decoder_hef`");
+
+    // TODO: HRT-15799 - Adjust sending ip_adapter (if necessary) according to server integration
+    status = session->send_file(params.ip_adapter_hef());
+    CHECK_SUCCESS(status, "Failed to load Text2Image `ip_adapter_hef`");
+
+    auto scheduler_type = params.scheduler();
+    status = write_and_validate_ack(session, reinterpret_cast<uint8_t*>(&scheduler_type), sizeof(scheduler_type));
+    CHECK_SUCCESS(status, "Failed to configure Text2Image scheduler type");
+
+    // Ack from server - finished configuring model
+    TRY(auto ack, session->get_ack());
+    LOGGER__INFO("Received ack from server - '{}'", ack);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::unique_ptr<Text2Image::Impl>> Text2Image::Impl::create_unique(std::shared_ptr<VDeviceGenAI> vdevice, const Text2ImageParams &params)
+{
+    CHECK_SUCCESS(validate_params(params));
+
+    TRY(auto session, vdevice->create_session(DEFAULT_TEXT2IMAGE_CONNECTION_PORT));
+    CHECK_SUCCESS(load_params(session, params));
+
+    // Output sample info (TODO: HRT-15869 - Get info from server side)
+    frame_info_t output_sample_frame_info = {};
+    output_sample_frame_info.format = {HAILO_FORMAT_TYPE_UINT8, HAILO_FORMAT_ORDER_NHWC, HAILO_FORMAT_FLAGS_NONE};
+    output_sample_frame_info.shape = {1024, 1024, 3};
+
+    bool is_ip_adapter_supported = false;
+    frame_info_t ip_adapter_frame_info = {};
+    if (!params.ip_adapter_hef().empty()) {
+        // Input Ip Adapter info (TODO: HRT-15869 - Get info from server side)
+        is_ip_adapter_supported = true;
+        ip_adapter_frame_info.format = {HAILO_FORMAT_TYPE_UINT8, HAILO_FORMAT_ORDER_NHWC, HAILO_FORMAT_FLAGS_NONE};
+        ip_adapter_frame_info.shape = {112, 112, 3};
+    }
+
+    auto text2image = Impl(session, params, output_sample_frame_info, is_ip_adapter_supported, ip_adapter_frame_info);
+    auto text2image_ptr = std::make_unique<Impl>(std::move(text2image));
+    CHECK_NOT_NULL_AS_EXPECTED(text2image_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return text2image_ptr;
+}
+
+Expected<uint32_t> Text2Image::Impl::ip_adapter_frame_size() const
+{
+    CHECK_AS_EXPECTED(m_is_ip_adapter_supported, HAILO_INVALID_OPERATION,
+        "Failed to get `ip_adapter_frame_size`. Ip Adapter was not set");
+
+    return HailoRTCommon::get_frame_size(m_ip_adapter_frame_info.shape, m_ip_adapter_frame_info.format);
+}
+
+Expected<hailo_3d_image_shape_t> Text2Image::Impl::ip_adapter_shape() const
+{
+    CHECK_AS_EXPECTED(m_is_ip_adapter_supported, HAILO_INVALID_OPERATION,
+        "Failed to get `ip_adapter_shape`. Ip Adapter was not set");
+
+    auto ip_adapter_frame_shape = m_ip_adapter_frame_info.shape;
+    return ip_adapter_frame_shape;
+}
+
+Expected<hailo_format_type_t> Text2Image::Impl::ip_adapter_format_type() const
+{
+    CHECK_AS_EXPECTED(m_is_ip_adapter_supported, HAILO_INVALID_OPERATION,
+        "Failed to get `ip_adapter_format_type`. Ip Adapter was not set");
+    auto type = m_ip_adapter_frame_info.format.type;
+    return type;
+}
+
+Expected<hailo_format_order_t> Text2Image::Impl::ip_adapter_format_order() const
+{
+    CHECK_AS_EXPECTED(m_is_ip_adapter_supported, HAILO_INVALID_OPERATION,
+        "Failed to get `ip_adapter_format_order`. Ip Adapter was not set");
+
+    auto order = m_ip_adapter_frame_info.format.order;
+    return order;
+}
+
+uint32_t Text2Image::Impl::output_sample_frame_size() const
+{
+    return HailoRTCommon::get_frame_size(m_output_sample_frame_info.shape, m_output_sample_frame_info.format);
+}
+
+hailo_3d_image_shape_t Text2Image::Impl::output_sample_shape() const
+{
+    return m_output_sample_frame_info.shape;
+}
+
+hailo_format_type_t Text2Image::Impl::output_sample_format_type() const
+{
+    return m_output_sample_frame_info.format.type;
+}
+
+hailo_format_order_t Text2Image::Impl::output_sample_format_order() const
+{
+    return m_output_sample_frame_info.format.order;
+}
+
+Expected<Text2ImageGenerator> Text2Image::Impl::create_generator(const Text2ImageGeneratorParams &params)
+{
+    CHECK_SUCCESS(validate_generator_params(params));
+    CHECK_SUCCESS(load_generator_params(params));
+
+    TRY(auto ip_adapter_frame_size, ip_adapter_frame_size());
+    auto pimpl = std::make_unique<Text2ImageGenerator::Impl>(m_session, params, output_sample_frame_size(),
+        m_is_ip_adapter_supported, ip_adapter_frame_size);
+    CHECK_NOT_NULL_AS_EXPECTED(pimpl, HAILO_OUT_OF_HOST_MEMORY);
+    return Text2ImageGenerator(std::move(pimpl));
+}
+
+Text2ImageGenerator::Impl::Impl(std::shared_ptr<GenAISession> session, const Text2ImageGeneratorParams &params,
+    uint32_t output_sample_frame_size, bool is_ip_adapter_supported, uint32_t ip_adapter_frame_size) :
+        m_session(session),
+        m_params(params),
+        m_output_sample_frame_size(output_sample_frame_size),
+        m_is_ip_adapter_supported(is_ip_adapter_supported),
+        m_ip_adapter_frame_size(ip_adapter_frame_size)
+{}
+
+hailo_status Text2Image::Impl::validate_generator_params(const Text2ImageGeneratorParams &params)
+{
+    CHECK_AS_EXPECTED(TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE == params.samples_count(), HAILO_NOT_IMPLEMENTED,
+        "Setting generator's samples_count is not implemented.");
+    CHECK_AS_EXPECTED(TEXT2IMAGE_STEPS_COUNT_DEFAULT_VALUE == params.steps_count(), HAILO_NOT_IMPLEMENTED,
+        "Setting generator's steps_count is not implemented.");
+    CHECK_AS_EXPECTED(TEXT2IMAGE_GUIDANCE_SCALE_DEFAULT_VALUE == params.guidance_scale(), HAILO_NOT_IMPLEMENTED,
+        "Setting generator's guidance_scale is not implemented.");
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2Image::Impl::load_generator_params(const Text2ImageGeneratorParams &params)
+{
+    // TODO: Use serialization functions
+    text2image_generator_params_t packed_params = {
+        params.steps_count(),
+        params.samples_count(),
+        params.guidance_scale(),
+        params.seed()
+    };
+
+    auto status = write_and_validate_ack(m_session, reinterpret_cast<uint8_t*>(&packed_params), sizeof(packed_params));
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::vector<Buffer>> Text2ImageGenerator::Impl::generate(const std::string &positive_prompt,
+    const std::string &negative_prompt, const MemoryView &ip_adapter, std::chrono::milliseconds timeout)
+{
+    // TODO: HRT-15972 - Create `m_samples_count` buffers
+    std::vector<MemoryView> output_images_memview;
+    TRY(auto buffer, Buffer::create(m_output_sample_frame_size, BufferStorageParams::create_dma()));
+    output_images_memview.emplace_back(buffer);
+
+    auto status = generate(output_images_memview, positive_prompt, negative_prompt, ip_adapter, timeout);
+    CHECK_SUCCESS(status);
+
+    // TODO: HRT-15972 - use m_params.samples_count()
+    (void)m_params; // not used yet, added to fix android compilation
+    std::vector<Buffer> result;
+    result.reserve(TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE);
+    result.emplace_back(std::move(buffer));
+
+    return result;
+}
+
+hailo_status Text2ImageGenerator::Impl::generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+    const std::string &negative_prompt, const MemoryView &ip_adapter, std::chrono::milliseconds timeout)
+{
+    TimeoutGuard timeout_guard(timeout);
+
+    CHECK_AS_EXPECTED(!positive_prompt.empty(), HAILO_INVALID_ARGUMENT, "`generate` failed. `positive_prompt` cannot be empty");
+    CHECK_AS_EXPECTED(m_is_ip_adapter_supported, HAILO_INVALID_OPERATION, "`generate` failed. Ip Adapter was not set");
+    CHECK(ip_adapter.size() == m_ip_adapter_frame_size, HAILO_INVALID_OPERATION,
+        "`generate` failed. IP Aapter frame size is not as expected ({}), got {}", m_ip_adapter_frame_size, ip_adapter.size());
+    CHECK(output_images.size() == TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE, HAILO_INVALID_OPERATION,
+        "`generate` failed. Samples count must be {}, got {}", TEXT2IMAGE_SAMPLES_COUNT_DEFAULT_VALUE, output_images.size());
+    for (auto &out_img : output_images) {
+        CHECK(out_img.size() == m_output_sample_frame_size, HAILO_INVALID_OPERATION,
+            "`generate` failed. Output sample frame size is not as expected ({}), got {}", m_output_sample_frame_size, out_img.size());
+    }
+
+    // Send info before generation
+    text2image_generation_info_t packed_info = {};
+    packed_info.has_negative_prompt = !negative_prompt.empty();
+    packed_info.has_ip_adapter = true;
+
+    auto status = write_and_validate_ack(m_session, reinterpret_cast<uint8_t*>(&packed_info), sizeof(packed_info), timeout_guard.get_remaining_timeout());
+    CHECK_SUCCESS(status);
+
+    status = write_and_validate_ack(m_session, positive_prompt, timeout_guard.get_remaining_timeout());
+    CHECK_SUCCESS(status);
+
+    if (packed_info.has_negative_prompt) {
+        status = write_and_validate_ack(m_session, negative_prompt, timeout_guard.get_remaining_timeout());
+        CHECK_SUCCESS(status);
+    }
+
+    status = write_and_validate_ack(m_session, ip_adapter, timeout_guard.get_remaining_timeout());
+    CHECK_SUCCESS(status);
+
+    for (auto &out_img : output_images) {
+        TRY(auto bytes_read, m_session->read(out_img, timeout_guard.get_remaining_timeout()));
+        CHECK(bytes_read == out_img.size(), HAILO_INTERNAL_FAILURE,
+            "Failed to read output sample frame. Expected frame size {}, got {}", out_img.size(), bytes_read);
+    }
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status Text2ImageGenerator::Impl::generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+    const std::string &negative_prompt, std::chrono::milliseconds timeout)
+{
+    (void)output_images;
+    (void)positive_prompt;
+    (void)negative_prompt;
+    (void)timeout;
+
+    LOGGER__ERROR("`Text2ImageGenerator::generate()` function without ip-adapter is not supported yet");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+Expected<std::vector<Buffer>> Text2ImageGenerator::Impl::generate(const std::string &positive_prompt,
+    const std::string &negative_prompt, std::chrono::milliseconds timeout)
+{
+    (void)positive_prompt;
+    (void)negative_prompt;
+    (void)timeout;
+
+    LOGGER__ERROR("`Text2ImageGenerator::generate()` function without ip-adapter is not supported yet");
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+hailo_status Text2ImageGenerator::Impl::stop()
+{
+    LOGGER__ERROR("`Text2ImageGenerator::stop()` function is not supported yet");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
+// https://stackoverflow.com/questions/71104545/constructor-and-destructor-in-c-when-using-the-pimpl-idiom
+// All member functions shoud be implemented in the cpp module
+Text2Image::~Text2Image() = default;
+Text2Image::Text2Image(Text2Image &&) = default;
+
+Text2ImageGenerator::~Text2ImageGenerator() = default;
+Text2ImageGenerator::Text2ImageGenerator(Text2ImageGenerator &&) = default;
+
+} /* namespace genai */
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/genai/text2image/text2image_internal.hpp b/hailort/libhailort/src/genai/text2image/text2image_internal.hpp
new file mode 100644
index 0000000..74c44fa
--- /dev/null
+++ b/hailort/libhailort/src/genai/text2image/text2image_internal.hpp
@@ -0,0 +1,95 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file text2image_internal.hpp
+ * @brief HailoRT Text to Image internal implementation.
+ **/
+
+#ifndef _HAILO_GENAI_TEXT2IMAGE_INTERNAL_HPP_
+#define _HAILO_GENAI_TEXT2IMAGE_INTERNAL_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/genai/text2image/text2image.hpp"
+#include "common/genai/serializer/serializer.hpp"
+
+namespace hailort
+{
+namespace genai
+{
+
+class Text2ImageGenerator::Impl final
+{
+public:
+    Impl(std::shared_ptr<GenAISession> session, const Text2ImageGeneratorParams &params, uint32_t output_sample_frame_size,
+        bool is_ip_adapter_supported, uint32_t ip_adapter_frame_size);
+
+    Expected<std::vector<Buffer>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
+        std::chrono::milliseconds timeout);
+
+    Expected<std::vector<Buffer>> generate(const std::string &positive_prompt, const std::string &negative_prompt,
+        const MemoryView &ip_adapter, std::chrono::milliseconds timeout);
+
+    hailo_status generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+        const std::string &negative_prompt, std::chrono::milliseconds timeout);
+
+    hailo_status generate(std::vector<MemoryView> &output_images, const std::string &positive_prompt,
+        const std::string &negative_prompt, const MemoryView &ip_adapter,
+        std::chrono::milliseconds timeout);
+
+    hailo_status stop();
+
+private:
+    std::shared_ptr<GenAISession> m_session;
+    Text2ImageGeneratorParams m_params;
+    uint32_t m_output_sample_frame_size;
+    bool m_is_ip_adapter_supported;
+    uint32_t m_ip_adapter_frame_size;
+};
+
+
+typedef struct {
+    hailo_format_t format;
+    hailo_3d_image_shape_t shape;
+} frame_info_t;
+
+class Text2Image::Impl final
+{
+public:
+    static Expected<std::unique_ptr<Text2Image::Impl>> create_unique(std::shared_ptr<VDevice> vdevice, const Text2ImageParams &params);
+
+    Expected<Text2ImageGenerator> create_generator(const Text2ImageGeneratorParams &params = Text2ImageGeneratorParams());
+    Expected<Text2ImageGeneratorParams> create_generator_params();
+
+    uint32_t output_sample_frame_size() const;
+    hailo_3d_image_shape_t output_sample_shape() const;
+    hailo_format_type_t output_sample_format_type() const;
+    hailo_format_order_t output_sample_format_order() const;
+    Expected<uint32_t> ip_adapter_frame_size() const;
+    Expected<hailo_3d_image_shape_t> ip_adapter_shape() const;
+    Expected<hailo_format_type_t> ip_adapter_format_type() const;
+    Expected<hailo_format_order_t> ip_adapter_format_order() const;
+
+private:
+    Impl(std::shared_ptr<GenAISession> session, const Text2ImageParams &params, const frame_info_t &output_sample_frame_info,
+        const bool is_ip_adapter_supported, const frame_info_t &ip_adapter_frame_info = {});
+
+    static hailo_status validate_params(const Text2ImageParams &params);
+    static hailo_status load_params(std::shared_ptr<GenAISession> session, const Text2ImageParams &params);
+
+    hailo_status validate_generator_params(const Text2ImageGeneratorParams &params);
+    hailo_status load_generator_params(const Text2ImageGeneratorParams &params);
+
+    std::shared_ptr<GenAISession> m_session;
+    Text2ImageParams m_params;
+
+    frame_info_t m_output_sample_frame_info;
+    bool m_is_ip_adapter_supported;
+    frame_info_t m_ip_adapter_frame_info;
+};
+
+} /* namespace genai */
+} /* namespace hailort */
+
+#endif /* _HAILO_GENAI_TEXT2IMAGE_INTERNAL_HPP_ */
diff --git a/hailort/libhailort/src/genai/vdevice_genai.cpp b/hailort/libhailort/src/genai/vdevice_genai.cpp
index 8afae44..5e160e1 100644
--- a/hailort/libhailort/src/genai/vdevice_genai.cpp
+++ b/hailort/libhailort/src/genai/vdevice_genai.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -14,12 +14,16 @@
 #include "hrpc/connection_context.hpp"
 #include "hrpc/rpc_connection.hpp"
 #include "vdevice/vdevice_hrpc_client.hpp"
+#include "common/genai/session_wrapper/session_wrapper.hpp"
+
 
 namespace hailort
 {
 namespace genai
 {
 
+const std::string FILE_NOT_FOUND = "<file_not_found>";
+
 Expected<std::shared_ptr<VDeviceGenAI>> VDeviceGenAI::create_shared()
 {
     hailo_vdevice_params_t params {};
@@ -30,7 +34,8 @@ Expected<std::shared_ptr<VDeviceGenAI>> VDeviceGenAI::create_shared()
 hailo_status VDeviceGenAI::validate_params(const hailo_vdevice_params_t &params)
 {
     CHECK_AS_EXPECTED(params.device_count == 1, HAILO_OUT_OF_PHYSICAL_DEVICES, "Only single device is supported!");
-    CHECK_AS_EXPECTED(params.multi_process_service == false, HAILO_NOT_SUPPORTED, "Multi proc service is not supported for GenAI");
+    CHECK_AS_EXPECTED(params.multi_process_service == false, HAILO_NOT_SUPPORTED, "Working with multi-process service is not supported for GenAI");
+    CHECK_AS_EXPECTED(params.scheduling_algorithm != HAILO_SCHEDULING_ALGORITHM_NONE, HAILO_NOT_SUPPORTED, "Working without schecduler is not supported for GenAI");
 
     return HAILO_SUCCESS;
 }
@@ -39,9 +44,8 @@ Expected<hailo_device_id_t> get_device_id(const hailo_vdevice_params_t &params)
 {
     hailo_device_id_t device_id = {};
 
-    TRY(auto device_id_str, VDeviceHrpcClient::get_device_id(params));
-    std::strncpy(device_id.id, device_id_str.c_str(),
-        (device_id_str.length() + 1));
+    TRY(auto device_ids, VDeviceHrpcClient::get_device_ids(params));
+    std::strncpy(device_id.id, device_ids[0].c_str(), (device_ids[0].length() + 1));
 
     return device_id;
 }
@@ -54,14 +58,14 @@ Expected<std::shared_ptr<VDeviceGenAI>> VDeviceGenAI::create_shared(const hailo_
     if (!hailort::VDevice::should_force_hrpc_client()) {
         TRY(device_id, get_device_id(params));
     }
-    auto vdevice_genai = make_shared_nothrow<VDeviceGenAI>(device_id);
+    auto vdevice_genai = make_shared_nothrow<VDeviceGenAI>(device_id, params);
     CHECK_NOT_NULL(vdevice_genai, HAILO_OUT_OF_HOST_MEMORY);
 
     return vdevice_genai;
 }
 
-VDeviceGenAI::VDeviceGenAI(hailo_device_id_t device_id) :
-    m_device_id(device_id)
+VDeviceGenAI::VDeviceGenAI(hailo_device_id_t device_id, const hailo_vdevice_params_t &params) :
+    m_device_id(device_id), m_vdevice_params(params)
 {}
 
 Expected<std::shared_ptr<GenAISession>> VDeviceGenAI::create_session(uint16_t port)
@@ -72,46 +76,61 @@ Expected<std::shared_ptr<GenAISession>> VDeviceGenAI::create_session(uint16_t po
 Expected<std::shared_ptr<GenAISession>> GenAISession::create_shared(uint16_t port, const std::string &device_id)
 {
     TRY(auto session, Session::connect(port, device_id));
-    auto ptr = make_shared_nothrow<GenAISession>(session);
+    auto session_wrapper_ptr = make_shared_nothrow<SessionWrapper>(session);
+    CHECK_NOT_NULL_AS_EXPECTED(session_wrapper_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    auto ptr = make_shared_nothrow<GenAISession>(session_wrapper_ptr);
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
     return ptr;
 }
 
-GenAISession::GenAISession(std::shared_ptr<Session> session) :
-    m_session(session)
+GenAISession::GenAISession(std::shared_ptr<SessionWrapper> session_wrapper) :
+    m_session_wrapper(session_wrapper)
 {}
 
-hailo_status GenAISession::write(const uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
+hailo_status GenAISession::write(MemoryView buffer, std::chrono::milliseconds timeout)
 {
-    // First we send the buffer's size. Then the buffer itself.
-    // TODO: Use hrpc protocol
-    auto status = m_session->write(reinterpret_cast<const uint8_t*>(&size), sizeof(size), timeout);
-    CHECK_SUCCESS(status);
+    return m_session_wrapper->write(buffer, timeout);
+}
 
-    status = m_session->write(buffer, size, timeout);
-    CHECK_SUCCESS(status);
+Expected<size_t> GenAISession::read(MemoryView buffer, std::chrono::milliseconds timeout)
+{
+    return m_session_wrapper->read(buffer, timeout);
+}
+
+Expected<std::shared_ptr<Buffer>> GenAISession::read(std::chrono::milliseconds timeout)
+{
+    return m_session_wrapper->read(timeout);
+}
+
+Expected<std::string> GenAISession::get_ack(std::chrono::milliseconds timeout)
+{
+    // TODO (HRT-15334): - adjusting all ack's once server is written in cpp, validate the ack
+    std::string server_ack(SERVER_ACK_SIZE, '\0');
+    TRY(auto size, read(server_ack, timeout));
+    return (0 == size)? "" : server_ack;
+}
+
+hailo_status GenAISession::send_file(const std::string &path)
+{
+    if ((BUILTIN == path)) {
+        // Write the `BUILTIN` indicator
+        auto status = write(path);
+        CHECK_SUCCESS(status);
+    } else {
+        // Send file bytes
+        TRY(auto file_data, read_binary_file(path, BufferStorageParams::create_dma()));
+        auto status = write(MemoryView(file_data));
+        CHECK_SUCCESS(status);
+    }
+
+    // Ack from server - finished sending data file
+    TRY(auto ack, get_ack());
+    CHECK(ack != FILE_NOT_FOUND, HAILO_NOT_FOUND, "Builtin file does not exist");
+    LOGGER__INFO("Sent file - '{}', Received ack from server - '{}'", path, ack);
 
     return HAILO_SUCCESS;
 }
 
-Expected<size_t> GenAISession::read(uint8_t *buffer, size_t size, std::chrono::milliseconds timeout)
-{
-    auto start_time = std::chrono::steady_clock::now();
-	size_t size_to_read = 0;
-	auto status = m_session->read(reinterpret_cast<uint8_t*>(&size_to_read), sizeof(size_to_read), timeout);
-    CHECK_SUCCESS(status);
-
-    CHECK(size_to_read <= size, HAILO_INVALID_OPERATION,
-        "Read buffer is smaller then necessary. Buffer size = {}, generation size = {}", size, size_to_read);
-
-    auto elapsed_time =
-        std::chrono::duration_cast<std::chrono::milliseconds>(std::chrono::steady_clock::now() - start_time);
-    status = m_session->read(buffer, size_to_read, (timeout - elapsed_time));
-    CHECK_SUCCESS(status);
-
-    return size_to_read;
-}
-
-
 } /* namespace genai */
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/hailort.cpp b/hailort/libhailort/src/hailort.cpp
index 07af238..d0df007 100644
--- a/hailort/libhailort/src/hailort.cpp
+++ b/hailort/libhailort/src/hailort.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/hailort_defaults.cpp b/hailort/libhailort/src/hailort_defaults.cpp
index 3911a01..867ed83 100644
--- a/hailort/libhailort/src/hailort_defaults.cpp
+++ b/hailort/libhailort/src/hailort_defaults.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -316,7 +316,7 @@ hailo_network_parameters_t HailoRTDefaults::get_network_parameters(uint16_t batc
     params.batch_size = batch_size;
 
     return params;
-}    
+}
 
 std::string HailoRTDefaults::get_network_name(const std::string &net_group_name)
 {
diff --git a/hailort/libhailort/src/hef/CMakeLists.txt b/hailort/libhailort/src/hef/CMakeLists.txt
index 2483ba6..9c85e2d 100644
--- a/hailort/libhailort/src/hef/CMakeLists.txt
+++ b/hailort/libhailort/src/hef/CMakeLists.txt
@@ -4,6 +4,7 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/hef.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/core_op_metadata.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/context_switch_actions.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/memory_requirements_calculator.cpp
 )
 
 set(HAILORT_CPP_SOURCES ${HAILORT_CPP_SOURCES} ${SRC_FILES} PARENT_SCOPE)
diff --git a/hailort/libhailort/src/hef/context_switch_actions.cpp b/hailort/libhailort/src/hef/context_switch_actions.cpp
index cdde0ae..902d8bb 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.cpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,6 +11,7 @@
 #include "context_switch_actions.hpp"
 #include "core_op/resource_manager/resource_manager.hpp"
 #include "hef/hef_internal.hpp"
+#include "vdma/memory/descriptor_list.hpp"
 
 #include "context_switch_defs.h"
 
@@ -701,10 +702,13 @@ Expected<Buffer> AllowInputDataflowAction::serialize_params(const ContextResourc
             edge_layer.layer_info.nn_stream_config.periph_buffers_per_frame;
         break;
     case LayerType::INTER_CONTEXT:
-    case LayerType::CACHE:
         params.credit_type = CONTEXT_SWITCH_DEFS__CREDIT_IN_DESCRIPTORS;
         params.frame_periph_size = ((edge_layer.buffer_info.bytes_in_pattern - 1) / (edge_layer.buffer_info.desc_page_size)) + 1;
         break;
+    case LayerType::CACHE:
+        params.credit_type = CONTEXT_SWITCH_DEFS__CREDIT_IN_DESCRIPTORS;
+        params.frame_periph_size = edge_layer.buffer_info.total_desc_count - 1;
+        break;
     default:
         LOGGER__ERROR("Invalid layer type {} for stream {}", static_cast<int>(edge_layer.layer_info.type), m_stream_index);
         return make_unexpected(HAILO_INTERNAL_FAILURE);
@@ -996,7 +1000,6 @@ static CONTEXT_SWITCH_DEFS__stream_reg_info_t parse_nn_config(const CONTROL_PROT
     reg_info.buffer_padding = nn_config.buffer_padding;
     reg_info.periph_bytes_per_buffer = nn_config.periph_bytes_per_buffer;
     reg_info.periph_buffers_per_frame = nn_config.periph_buffers_per_frame;
-    reg_info.is_periph_calculated_in_hailort = nn_config.is_periph_calculated_in_hailort;
     reg_info.is_core_hw_padding_config_in_dfc = nn_config.is_core_hw_padding_config_in_dfc;
     return reg_info;
 }
@@ -1280,24 +1283,25 @@ Expected<Buffer> ActivateCacheInputChannelAction::serialize_params(const Context
 
 Expected<ContextSwitchConfigActionPtr> ActivateCacheOutputChannelAction::create(const vdma::ChannelId &channel_id,
     uint8_t stream_index, uint8_t network_index, const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config,
-    const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info)
+    const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info, uint16_t batch_size)
 {
     auto result = ContextSwitchConfigActionPtr(new (std::nothrow) ActivateCacheOutputChannelAction(channel_id,
-        stream_index, network_index, nn_stream_config, host_buffer_info));
+        stream_index, network_index, nn_stream_config, host_buffer_info, batch_size));
     CHECK_NOT_NULL_AS_EXPECTED(result, HAILO_OUT_OF_HOST_MEMORY);
     return result;
 }
 
 ActivateCacheOutputChannelAction::ActivateCacheOutputChannelAction(const vdma::ChannelId &channel_id,
     uint8_t stream_index, uint8_t network_index, const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config,
-    const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info) :
+    const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info, uint16_t batch_size) :
     ContextSwitchConfigAction(ContextSwitchConfigAction::Type::ActivateCacheOutputChannel,
                               CONTEXT_SWITCH_DEFS__ACTION_TYPE_ACTIVATE_CACHE_OUTPUT),
     m_channel_id(channel_id),
     m_stream_index(stream_index),
     m_network_index(network_index),
     m_nn_stream_config(nn_stream_config),
-    m_host_buffer_info(host_buffer_info)
+    m_host_buffer_info(host_buffer_info),
+    m_batch_size(batch_size)
 {}
 
 bool ActivateCacheOutputChannelAction::supports_repeated_block() const
@@ -1314,6 +1318,7 @@ Expected<Buffer> ActivateCacheOutputChannelAction::serialize_params(const Contex
     params.network_index = m_network_index;
     params.stream_reg_info = parse_nn_config(m_nn_stream_config);
     params.host_buffer_info = m_host_buffer_info;
+    params.batch_size = m_batch_size;
     return Buffer::create(reinterpret_cast<uint8_t*>(&params), sizeof(params));
 }
 
diff --git a/hailort/libhailort/src/hef/context_switch_actions.hpp b/hailort/libhailort/src/hef/context_switch_actions.hpp
index d9d520f..cc10496 100644
--- a/hailort/libhailort/src/hef/context_switch_actions.hpp
+++ b/hailort/libhailort/src/hef/context_switch_actions.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -20,12 +20,12 @@
 
 #include "device_common/control_protocol.hpp"
 #include "context_switch_defs.h"
-#include "core_op/resource_manager/config_buffer.hpp"
 
 namespace hailort
 {
 
 
+class ResourcesManager;
 class ContextResources;
 struct EdgeLayer;
 #pragma pack(push, 1)
@@ -168,6 +168,8 @@ private:
     const vdma::ChannelId m_channel_id;
 };
 
+class ConfigBuffer;
+
 class WriteDataCcwAction : public ContextSwitchConfigAction
 {
 public:
@@ -791,7 +793,7 @@ class ActivateCacheOutputChannelAction : public ContextSwitchConfigAction
 public:
     static Expected<ContextSwitchConfigActionPtr> create(const vdma::ChannelId &channel_id, uint8_t stream_index,
         uint8_t network_index, const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config,
-        const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info);
+        const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info, uint16_t batch_size);
 
     virtual bool supports_repeated_block() const override;
     virtual Expected<Buffer> serialize_params(const ContextResources &context_resources) const override;
@@ -799,13 +801,14 @@ public:
 private:
     ActivateCacheOutputChannelAction(const vdma::ChannelId &channel_id, uint8_t stream_index,
         uint8_t network_index, const CONTROL_PROTOCOL__nn_stream_config_t &nn_stream_config,
-        const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info);
+        const CONTROL_PROTOCOL__host_buffer_info_t &host_buffer_info, uint16_t batch_size);
 
     const vdma::ChannelId m_channel_id;
     const uint8_t m_stream_index;
     const uint8_t m_network_index;
     const CONTROL_PROTOCOL__nn_stream_config_t m_nn_stream_config;
     const CONTROL_PROTOCOL__host_buffer_info_t m_host_buffer_info;
+    uint16_t m_batch_size;
 };
 
 class ValidateChannelAction : public ContextSwitchConfigAction
diff --git a/hailort/libhailort/src/hef/core_op_metadata.cpp b/hailort/libhailort/src/hef/core_op_metadata.cpp
index ded0449..28a209a 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.cpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -51,9 +51,10 @@ static bool is_edge_under_mux(const LayerInfo &info, const std::string &edge_nam
 }
 
 ContextMetadata::ContextMetadata(std::vector<ContextSwitchConfigActionPtr> &&actions,
-    ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found) :
+    ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found, CcwDmaTransfersInfoMap&& ccws_dma_transfers_info) :
     m_actions(std::move(actions)),
     m_config_buffers_info(std::move(config_buffers_info)),
+    m_ccws_dma_transfers_info(std::move(ccws_dma_transfers_info)),
     m_const_input_layer_found(const_input_layer_found)
 {}
 
@@ -608,4 +609,22 @@ Expected<std::vector<hailo_network_info_t>> NetworkGroupMetadata::get_network_in
     return network_infos;
 }
 
+
+Expected<uint16_t> get_network_batch_size(const ConfigureNetworkParams& params, const std::string &network_name)
+{
+    for (auto const &network_map : params.network_params_by_name) {
+        auto const network_name_from_params = network_map.first;
+        if (network_name_from_params == network_name) {
+            auto actual_batch_size = network_map.second.batch_size;
+            if (HAILO_DEFAULT_BATCH_SIZE == actual_batch_size) {
+                actual_batch_size = DEFAULT_ACTUAL_BATCH_SIZE;
+            }
+            return actual_batch_size;
+        }
+    }
+
+    LOGGER__ERROR("Failed to find network with network name {}", network_name);
+    return make_unexpected(HAILO_NOT_FOUND);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/hef/core_op_metadata.hpp b/hailort/libhailort/src/hef/core_op_metadata.hpp
index 407b563..2419c7d 100644
--- a/hailort/libhailort/src/hef/core_op_metadata.hpp
+++ b/hailort/libhailort/src/hef/core_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -32,8 +32,10 @@ struct SupportedFeatures {
     bool periph_calculation_in_hailort = false;
     bool core_hw_padding_config_in_dfc = false;
     bool batch_register_config = false;
+    bool aligned_ccws = false;
 };
 
+// TODO: HRT-16585 - Remove duplication in struct ConfigBufferInfo - we don't need both bursts_sizes and ccw_dma_transfers
 struct ConfigBufferInfo {
     /**
      * Sizes of all the successive ccw's (ccw burst).
@@ -45,16 +47,24 @@ struct ConfigBufferInfo {
      * we use this var to get the config buffer offset from the beginning of the hef user address.
      */
     uint64_t offset_from_hef_base = 0;
+    /**
+     * In case of shared_weights (alligned ccws) - we use this vector to perform the dma transfers.
+     */
+    std::vector<std::pair<uint64_t, uint64_t>> ccw_dma_transfers;
 };
 
 // For each config_stream_index we store vector of all ccw write length. The vector is used to build the config buffer.g
 using ConfigBufferInfoMap = std::unordered_map<uint8_t, ConfigBufferInfo>;
 
+// List of dma transfers for each config channel index
+using CcwDmaTransfersInfoMap = std::unordered_map<uint8_t, std::vector<std::pair<uint64_t, uint64_t>>>;
+
 
 class ContextMetadata final {
 public:
     ContextMetadata(std::vector<ContextSwitchConfigActionPtr> &&actions,
-        ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found);
+        ConfigBufferInfoMap&& config_buffers_info, bool const_input_layer_found, CcwDmaTransfersInfoMap&& ccws_dma_transfers_info = {});
+    ContextMetadata() = default;
 
     const std::vector<ContextSwitchConfigActionPtr> &get_actions() const;
     std::vector<ContextSwitchConfigActionPtr> get_actions_of_type(
@@ -83,6 +93,7 @@ public:
 private:
     std::vector<ContextSwitchConfigActionPtr> m_actions;
     ConfigBufferInfoMap m_config_buffers_info;
+    CcwDmaTransfersInfoMap m_ccws_dma_transfers_info;
     bool m_const_input_layer_found;
 
     std::vector<LayerInfo> m_boundary_input_layers;
@@ -131,12 +142,12 @@ public:
     size_t get_contexts_count();
     size_t get_dynamic_contexts_count();
 
-    const std::string &core_op_name() const
+    const std::string& core_op_name() const
     {
         return m_core_op_name;
     }
 
-    const SupportedFeatures &supported_features() const
+    const SupportedFeatures& supported_features() const
     {
         return m_supported_features;
     }
@@ -144,7 +155,7 @@ public:
     Expected<size_t> get_total_transfer_size();
 
     // TODO: Remove
-    const std::vector<std::string> &get_network_names() const
+    const std::vector<std::string>& get_network_names() const
     {
         return m_sorted_network_names;
     }
@@ -216,7 +227,7 @@ public:
 
     Expected<std::vector<hailo_network_info_t>> get_network_infos() const;
 
-    const std::string &name() const
+    const std::string& name() const
     {
         return m_network_group_name;
     }
@@ -259,6 +270,9 @@ private:
     friend class ConfiguredNetworkGroupBase;
 };
 
+Expected<uint16_t> get_network_batch_size(const ConfigureNetworkParams& params, const std::string &network_name);
+
+
 } /* namespace hailort */
 
 #endif /* _HAILO_CORE_OP_METADATA_HPP_ */
diff --git a/hailort/libhailort/src/hef/hef.cpp b/hailort/libhailort/src/hef/hef.cpp
index 2053022..bbffb18 100644
--- a/hailort/libhailort/src/hef/hef.cpp
+++ b/hailort/libhailort/src/hef/hef.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -84,7 +84,6 @@ static std::string get_shape_str(const hailo_stream_info_t &stream_info)
 {
     switch (stream_info.format.order)
     {
-    case HAILO_FORMAT_ORDER_HAILO_NMS:
     case HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP:
         return HailoRTCommon::get_format_type_str(stream_info.format.type) + ", " + HailoRTCommon::get_format_order_str(stream_info.format.order) +
             "(maximum frame size: " + std::to_string(HailoRTCommon::get_nms_hw_frame_size(stream_info.nms_info)) + ")";
@@ -106,14 +105,13 @@ static std::string get_shape_str(const hailo_vstream_info_t &vstream_info)
 {
     switch (vstream_info.format.order)
     {
-    case HAILO_FORMAT_ORDER_HAILO_NMS:
     case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
-    case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:  // TODO: HRT-15612 (byte_mask order to use new field max_proposals_total and not max_proposals_per_class)
         return HailoRTCommon::get_format_type_str(vstream_info.format.type) + ", " + HailoRTCommon::get_format_order_str(vstream_info.format.order) +
             "(number of classes: " + std::to_string(vstream_info.nms_shape.number_of_classes) +
             ", maximum bounding boxes per class: " + std::to_string(vstream_info.nms_shape.max_bboxes_per_class) +
             ", maximum frame size: " + std::to_string(HailoRTCommon::get_nms_host_frame_size(vstream_info.nms_shape, vstream_info.format)) + ")";
     case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
+    case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:
         return HailoRTCommon::get_format_type_str(vstream_info.format.type) + ", " + HailoRTCommon::get_format_order_str(vstream_info.format.order) +
             "(number of classes: " + std::to_string(vstream_info.nms_shape.number_of_classes) +
             ", maximum bounding boxes total: " + std::to_string(vstream_info.nms_shape.max_bboxes_total) +
@@ -169,19 +167,33 @@ Expected<Hef> Hef::create(const std::string &hef_path)
 {
     TRY(auto impl, Hef::Impl::create(hef_path));
 
-    // TODO: can we do this without the copy ctor here (i.e. make the impl as a unique_ptr to begin with)
-    return Hef(make_unique_nothrow<Impl>(std::move(impl)));
+    auto impl_ptr = make_shared_nothrow<Impl>(std::move(impl));
+    CHECK_NOT_NULL_AS_EXPECTED(impl_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return Hef(std::move(impl_ptr));
 }
 
 Expected<Hef> Hef::create(const MemoryView &hef_buffer)
 {
-    TRY(auto impl, Hef::Impl::create(hef_buffer));
+    TRY(auto hef_shared_buffer, Buffer::create_shared(hef_buffer.data(), hef_buffer.size(),
+        BufferStorageParams::create_dma()));
+    
+    TRY(auto impl, Hef::Impl::create(hef_shared_buffer));
 
-    // TODO: can we do this without the copy ctor here (i.e. make the impl as a unique_ptr to begin with)
-    return Hef(make_unique_nothrow<Impl>(std::move(impl)));
+    auto impl_ptr = make_shared_nothrow<Impl>(std::move(impl));
+    CHECK_NOT_NULL_AS_EXPECTED(impl_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return Hef(std::move(impl_ptr));
 }
 
-Hef::Hef(std::unique_ptr<Impl> pimpl) :
+Expected<Hef> Hef::create(std::shared_ptr<Buffer> hef_buffer)
+{
+    TRY(auto impl, Hef::Impl::create(hef_buffer));
+
+    auto impl_ptr = make_shared_nothrow<Impl>(std::move(impl));
+    CHECK_NOT_NULL_AS_EXPECTED(impl_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return Hef(std::move(impl_ptr));
+}
+
+Hef::Hef(std::shared_ptr<Impl> pimpl) :
     pimpl(std::move(pimpl))
 {}
 
@@ -307,18 +319,25 @@ Expected<std::vector<std::string>> Hef::get_vstream_names_from_stream_name(const
 
 Expected<Hef::Impl> Hef::Impl::create(const std::string &hef_path)
 {
-    hailo_status status = HAILO_UNINITIALIZED;
+    if (is_env_variable_on(HAILO_COPY_HEF_CONTENT_TO_A_MAPPED_BUFFER_PRE_CONFIGURE_ENV_VAR)) {
+        TRY(auto buffer, read_binary_file(hef_path, BufferStorageParams::create_dma()));
+        auto hef_buffer_ptr = make_shared_nothrow<Buffer>(std::move(buffer));
+        CHECK_NOT_NULL_AS_EXPECTED(hef_buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    Impl hef(hef_path, status);
-    if (HAILO_SUCCESS != status) {
-        LOGGER__ERROR("Failed creating HEF");
-        return make_unexpected(status);
-    }
+        return create(hef_buffer_ptr);
+    } else {
+        hailo_status status = HAILO_UNINITIALIZED;
 
+        Impl hef(hef_path, status);
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed creating HEF");
+            return make_unexpected(status);
+        }
     return hef;
+    }
 }
 
-Expected<Hef::Impl> Hef::Impl::create(const MemoryView &hef_buffer)
+Expected<Hef::Impl> Hef::Impl::create(std::shared_ptr<Buffer> hef_buffer)
 {
     hailo_status status = HAILO_UNINITIALIZED;
 
@@ -341,9 +360,11 @@ Expected<size_t> calc_hef_residue_size(std::shared_ptr<SeekableBytesReader> hef_
         return total_size - HEF_HEADER_SIZE_V1;
     case HEADER_VERSION_2:
         return total_size - HEF_HEADER_SIZE_V2;
+    case HEADER_VERSION_3:
+        return total_size - HEF_HEADER_SIZE_V3;
     default:
         LOGGER__ERROR("Unsupported hef version {}", version);
-        return make_unexpected(HAILO_INVALID_HEF);
+        return make_unexpected(HAILO_HEF_NOT_SUPPORTED);
     }
 }
 
@@ -361,10 +382,8 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5
 {
     char md5_buffer[HEF__MD5_BUFFER_SIZE] = {};
     MD5_CTX md5 = {};
-
     auto beg_pos = s.tellg();
     CHECK(-1 != beg_pos, HAILO_FILE_OPERATION_FAILURE, "ifstream::tellg() failed");
-
     MD5_Init(&md5);
     while (!s.eof()) {
         s.read(md5_buffer, HEF__MD5_BUFFER_SIZE);
@@ -372,26 +391,24 @@ static hailo_status calc_istream_md5(std::ifstream &s, MD5_SUM_t &calculated_md5
         MD5_Update(&md5, &md5_buffer, s.gcount());
     }
     MD5_Final(calculated_md5, &md5);
-
     s.clear();
     s.seekg(beg_pos, s.beg);
     CHECK(s.good(), HAILO_FILE_OPERATION_FAILURE, "ifstream::seekg() failed");
-
     return HAILO_SUCCESS;
 }
 
 hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM_t &calculated_md5, size_t hef_file_residue_size)
 {
-    CHECK(HEADER_MAGIC == header.magic, HAILO_INVALID_HEF,
+    CHECK(HEADER_MAGIC == header.magic, HAILO_HEF_NOT_SUPPORTED,
         "HEF magic does not match. detected magic - {:x}", header.magic);
 
     CHECK((HEADER_VERSION_0 == header.version) , HAILO_INTERNAL_FAILURE,
         "HEF version does not match. Should be {} but detected {}", HEADER_VERSION_0, header.version);
 
-    CHECK(hef_file_residue_size == header.hef_proto_size, HAILO_INVALID_HEF,
+    CHECK(hef_file_residue_size == header.hef_proto_size, HAILO_HEF_FILE_CORRUPTED,
         "HEF file length does not match");
 
-    CHECK(0 == memcmp(&calculated_md5, &header.distinct.v0.expected_md5, sizeof(MD5_SUM_t)), HAILO_INVALID_HEF,
+    CHECK(0 == memcmp(&calculated_md5, &header.distinct.v0.expected_md5, sizeof(MD5_SUM_t)), HAILO_HEF_FILE_CORRUPTED,
         "HEF md5 does not match");
 
     return HAILO_SUCCESS;
@@ -399,34 +416,43 @@ hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, MD5_SUM
 
 hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, const uint32_t &crc_32, size_t hef_file_residue_size)
 {
-    CHECK(HEADER_MAGIC == header.magic, HAILO_INVALID_HEF,
+    CHECK(HEADER_MAGIC == header.magic, HAILO_HEF_NOT_SUPPORTED,
         "HEF magic does not match. Should be {:x} but detected magic - {:x}", HEADER_MAGIC, header.magic);
 
     CHECK((HEADER_VERSION_1 == header.version), HAILO_INTERNAL_FAILURE,
         "HEF version does not match. Should be {} but detected {}", HEADER_VERSION_1, header.version);
 
-    CHECK(hef_file_residue_size == header.hef_proto_size + header.distinct.v1.ccws_size, HAILO_INVALID_HEF,
+    CHECK(hef_file_residue_size == header.hef_proto_size + header.distinct.v1.ccws_size, HAILO_HEF_FILE_CORRUPTED,
         "HEF file length does not match");
 
-    CHECK(0 == memcmp(&crc_32, &header.distinct.v1.crc, sizeof(crc_32)), HAILO_INVALID_HEF,
+    CHECK(0 == memcmp(&crc_32, &header.distinct.v1.crc, sizeof(crc_32)), HAILO_HEF_FILE_CORRUPTED,
         "HEF crc does not match");
 
     return HAILO_SUCCESS;
 }
 
-hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, const uint64_t &xxh3_64bits, size_t hef_file_residue_size)
+hailo_status Hef::Impl::validate_hef_header(const hef__header_t &header, const uint64_t &calculated_xxh3_64bits, size_t hef_file_residue_size)
 {
-    CHECK(HEADER_MAGIC == header.magic, HAILO_INVALID_HEF,
+    CHECK(HEADER_MAGIC == header.magic, HAILO_HEF_NOT_SUPPORTED,
         "HEF magic does not match. Should be {:x} but detected magic - {:x}", HEADER_MAGIC, header.magic);
 
-    CHECK((HEADER_VERSION_2 == header.version), HAILO_INTERNAL_FAILURE,
-        "HEF version does not match. Should be {} but detected {}", HEADER_VERSION_2, header.version);
+    uint64_t non_proto_size = 0;
+    uint64_t xxh3_64bits_from_hef = 0;
+    if (HEADER_VERSION_2 == header.version) {
+        non_proto_size = header.distinct.v2.ccws_size;
+        xxh3_64bits_from_hef = header.distinct.v2.xxh3_64bits;
+    } else if (HEADER_VERSION_3 == header.version) {
+        non_proto_size = header.distinct.v3.ccws_size_with_padding + header.distinct.v3.additional_info_size;
+        xxh3_64bits_from_hef = header.distinct.v3.xxh3_64bits;
+    } else {
+        LOGGER__ERROR("Invalid HEF version");
+        return HAILO_HEF_NOT_SUPPORTED;
+    }
+    CHECK(hef_file_residue_size == header.hef_proto_size + non_proto_size, HAILO_HEF_FILE_CORRUPTED,
+       "HEF file length does not match");
 
-    CHECK(hef_file_residue_size == header.hef_proto_size + header.distinct.v2.ccws_size, HAILO_INVALID_HEF,
-        "HEF file length does not match");
-
-    CHECK(0 == memcmp(&xxh3_64bits, &header.distinct.v2.xxh3_64bits, sizeof(xxh3_64bits)), HAILO_INVALID_HEF,
-        "HEF xxhash does not match, calculated: {}, expected: {}", xxh3_64bits, header.distinct.v2.xxh3_64bits);
+    CHECK(0 == memcmp(&calculated_xxh3_64bits, &xxh3_64bits_from_hef, sizeof(calculated_xxh3_64bits)), HAILO_HEF_FILE_CORRUPTED,
+        "HEF xxhash does not match, calculated: {}, expected: {}", calculated_xxh3_64bits, xxh3_64bits_from_hef);
 
     return HAILO_SUCCESS;
 }
@@ -440,10 +466,13 @@ hailo_status Hef::Impl::validate_hef_extensions()
         }
     }
 
-    CHECK(unsupported_extensions.empty(), HAILO_INVALID_HEF, "Failed opening non-compatible HEF with the following unsupported extensions: {}",
+    CHECK(unsupported_extensions.empty(), HAILO_HEF_NOT_SUPPORTED, "Failed opening non-compatible HEF with the following unsupported extensions: {}",
         std::accumulate(std::next(unsupported_extensions.begin()), unsupported_extensions.end(), unsupported_extensions[0], 
         [] (std::string a, std::string b) { return std::move(a) + ", " + b; }));
 
+    CHECK_AS_EXPECTED(m_supported_features.periph_calculation_in_hailort, HAILO_HEF_NOT_SUPPORTED,
+        "Hef has periph_calculation_in_hailort feature disabled - this HEF is outdated and no longer supported. Please update HEF");
+
     return HAILO_SUCCESS;
 }
 
@@ -462,13 +491,6 @@ void Hef::Impl::init_hef_version(uint32_t version)
     m_hef_version = version;
 }
 
-void Hef::Impl::clear_hef_buffer()
-{
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-    m_hef_buffer = Buffer();
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-}
-
 Expected<hef__header_t> Hef::Impl::parse_hef_header_before_distinct(std::shared_ptr<SeekableBytesReader> hef_reader)
 {
     hef__header_t hef_header = {};
@@ -504,6 +526,19 @@ hailo_status Hef::Impl::fill_v2_hef_header(hef__header_t &hef_header, std::share
     return HAILO_SUCCESS;
 }
 
+hailo_status Hef::Impl::fill_v3_hef_header(hef__header_t &hef_header, std::shared_ptr<SeekableBytesReader> hef_reader)
+{
+    auto status = hef_reader->read(reinterpret_cast<uint8_t*>(&hef_header.distinct), sizeof(hef__header_distinct_t::v3));
+    CHECK_SUCCESS(status);
+
+    hef_header.distinct.v3.ccws_size_with_padding = BYTE_ORDER__htonll(hef_header.distinct.v3.ccws_size_with_padding);
+    hef_header.distinct.v3.xxh3_64bits = BYTE_ORDER__htonll(hef_header.distinct.v3.xxh3_64bits);
+    hef_header.distinct.v3.hef_padding_size = BYTE_ORDER__htonl(hef_header.distinct.v3.hef_padding_size);
+    hef_header.distinct.v3.additional_info_size = BYTE_ORDER__htonll(hef_header.distinct.v3.additional_info_size);
+
+    return HAILO_SUCCESS;
+}
+
 hailo_status Hef::Impl::fill_core_ops_and_networks_metadata(uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
 {
     fill_core_ops();
@@ -518,64 +553,56 @@ hailo_status Hef::Impl::fill_core_ops_and_networks_metadata(uint32_t hef_version
     return HAILO_SUCCESS;
 }
 
-// TODO HRT-13920: remove duplications between parse_hef_file and parse_hef_memview
 hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
 {
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-    TRY(m_hef_buffer, read_binary_file(hef_path));
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-
     TRY(auto hef_reader, SeekableBytesReader::create_reader(hef_path));
     auto status = hef_reader->open();
     CHECK_SUCCESS(status);
     m_hef_reader = hef_reader;
-
     TRY(auto hef_header, parse_hef_header_before_distinct(hef_reader));
     init_hef_version(hef_header.version);
-
-    m_ccws_offset = 0; // Relevant only for HEADER_VERSION_1
-
+    m_offset_zero_point = 0; // Not relevant for HEADER_VERSION_0
     switch (hef_header.version) {
     case HEADER_VERSION_0: {
         status = hef_reader->read(reinterpret_cast<uint8_t*>(&hef_header.distinct), sizeof(hef__header_distinct_t::v0));
         CHECK_SUCCESS(status);
-
         MD5_SUM_t calculated_md5 = {};
         status = calc_istream_md5(*hef_reader->get_fstream(), calculated_md5);
         CHECK_SUCCESS(status);
-
         TRY(const auto hef_file_residue_size, hef_reader->calculate_remaining_size());
-
         status = validate_hef_header(hef_header, calculated_md5, hef_file_residue_size);
         CHECK_SUCCESS(status);
-
         init_md5(calculated_md5);
         break;
     }
     case HEADER_VERSION_1: {
         status = fill_v1_hef_header(hef_header, hef_reader);
         CHECK_SUCCESS(status);
-
-        m_ccws_offset = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
-
+        m_offset_zero_point = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
         TRY(auto calculated_residue_size, calc_hef_residue_size(hef_reader, hef_header.version));
         TRY(auto calculated_crc, CRC32::calc_crc_on_stream(hef_reader->get_fstream(), calculated_residue_size));
-
         status = validate_hef_header(hef_header, calculated_crc, calculated_residue_size);
         CHECK_SUCCESS(status);
-
         init_crc(calculated_crc);
         break;
     }
     case HEADER_VERSION_2: {
         status = fill_v2_hef_header(hef_header, hef_reader);
         CHECK_SUCCESS(status);
-
-        m_ccws_offset = HEF_HEADER_SIZE_V2 + hef_header.hef_proto_size;
-
+        m_offset_zero_point = HEF_HEADER_SIZE_V2 + hef_header.hef_proto_size;
+        TRY(auto calculated_residue_size, calc_hef_residue_size(hef_reader, hef_header.version));
+        TRY(auto calculated_xxh3_64bits, Xxhash::calc_xxh3_on_stream(hef_reader->get_fstream(), calculated_residue_size));
+        status = validate_hef_header(hef_header, calculated_xxh3_64bits, calculated_residue_size);
+        CHECK_SUCCESS(status);
+        m_xxh3_64bits = calculated_xxh3_64bits;
+        break;
+    }
+    case HEADER_VERSION_3: {
+        status = fill_v3_hef_header(hef_header, hef_reader);
+        CHECK_SUCCESS(status);
+        m_offset_zero_point = HEF_HEADER_SIZE_V3 + hef_header.hef_proto_size + hef_header.distinct.v3.hef_padding_size;
         TRY(auto calculated_residue_size, calc_hef_residue_size(hef_reader, hef_header.version));
         TRY(auto calculated_xxh3_64bits, Xxhash::calc_xxh3_on_stream(hef_reader->get_fstream(), calculated_residue_size));
-
         status = validate_hef_header(hef_header, calculated_xxh3_64bits, calculated_residue_size);
         CHECK_SUCCESS(status);
         m_xxh3_64bits = calculated_xxh3_64bits;
@@ -583,24 +610,20 @@ hailo_status Hef::Impl::parse_hef_file(const std::string &hef_path)
     }
     default:
         LOGGER__ERROR("Unsupported hef version {}", hef_header.version);
-        return HAILO_INVALID_HEF;
+        return HAILO_HEF_NOT_SUPPORTED;
     }
-
     ProtoHEFHef hef_message;
     google::protobuf::io::IstreamInputStream zero_copy_input(hef_reader->get_fstream().get());
     auto rb = hef_message.ParseFromBoundedZeroCopyStream(&zero_copy_input, hef_header.hef_proto_size); // This line corrupts the file
-    CHECK(rb, HAILO_INVALID_HEF, "Failed parsing HEF file");
+    CHECK(rb, HAILO_HEF_FILE_CORRUPTED, "Failed parsing HEF file");
     hef_reader->get_fstream()->clear(); // The call to ParseFromBoundedZeroCopyStream might corrupt the file, so we need to clear it's error flags
     // TODO: Remove this reset after stopping support for V0 (in the new format (V1), the file is not corrupted after parsing the protobuf message).
     status = transfer_protobuf_field_ownership(hef_message);
     CHECK_SUCCESS(status);
-
-    status = fill_core_ops_and_networks_metadata(hef_header.version, hef_reader, m_ccws_offset);
+    status = fill_core_ops_and_networks_metadata(hef_header.version, hef_reader, m_offset_zero_point);
     CHECK_SUCCESS(status);
-
     status = hef_reader->close();
     CHECK_SUCCESS(status);
-
     TRACE(HefLoadedTrace, hef_path, m_header.sdk_version(), m_md5);
     return HAILO_SUCCESS;
 }
@@ -610,7 +633,7 @@ hailo_status Hef::Impl::parse_hef_memview_internal(const size_t proto_size, cons
 {
     ProtoHEFHef hef_message;
     auto rb = hef_message.ParseFromArray(proto_buffer, static_cast<int>(proto_size));
-    CHECK(rb, HAILO_INVALID_HEF, "Failed parsing HEF buffer");
+    CHECK(rb, HAILO_HEF_FILE_CORRUPTED, "Failed parsing HEF buffer");
     auto status = transfer_protobuf_field_ownership(hef_message);
     CHECK_SUCCESS(status);
 
@@ -620,22 +643,17 @@ hailo_status Hef::Impl::parse_hef_memview_internal(const size_t proto_size, cons
     return HAILO_SUCCESS;
 }
 
-// TODO HRT-13920: remove duplications between parse_hef_file and parse_hef_memview
 hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
 {
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-    TRY(m_hef_buffer, Buffer::create(hef_memview.data(), hef_memview.size()));
-#endif // HAILO_SUPPORT_MULTI_PROCESS
-
     TRY(auto hef_reader, SeekableBytesReader::create_reader(hef_memview));
     m_hef_reader = hef_reader;
 
     TRY(auto hef_header, parse_hef_header_before_distinct(hef_reader));
     init_hef_version(hef_header.version);
 
-    CHECK(hef_memview.size() >= sizeof(hef__header_t), HAILO_INVALID_HEF, "Invalid HEF header");
+    CHECK(hef_memview.size() >= sizeof(hef__header_t), HAILO_HEF_FILE_CORRUPTED, "Invalid HEF header");
 
-    m_ccws_offset = 0; // Not relevant for HEADER_VERSION_0
+    m_offset_zero_point = 0; // Not relevant for HEADER_VERSION_0
 
     switch (hef_header.version) {
     case HEADER_VERSION_0: {
@@ -654,7 +672,7 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
 
         init_md5(calculated_md5);
 
-        return parse_hef_memview_internal(proto_size, proto_buffer, hef_header.version, hef_reader, m_ccws_offset);
+        return parse_hef_memview_internal(proto_size, proto_buffer, hef_header.version, hef_reader, m_offset_zero_point);
     }
     case HEADER_VERSION_1: {
         auto status = fill_v1_hef_header(hef_header, hef_reader);
@@ -663,7 +681,7 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
         auto proto_and_ccw_buffer = hef_memview.data() + HEF_HEADER_SIZE_V1;
         auto proto_size = hef_memview.size() - HEF_HEADER_SIZE_V1 - hef_header.distinct.v1.ccws_size;
 
-        m_ccws_offset = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
+        m_offset_zero_point = HEF_HEADER_SIZE_V1 + hef_header.hef_proto_size;
 
         TRY(auto proto_and_ccws_size, calc_hef_residue_size(hef_reader, hef_header.version));
         auto proto_and_ccws_buffer = MemoryView::create_const(hef_memview.data() + HEF_HEADER_SIZE_V1, proto_and_ccws_size);
@@ -674,7 +692,7 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
 
         init_crc(calculated_crc);
 
-        return parse_hef_memview_internal(static_cast<size_t>(proto_size), proto_and_ccw_buffer, hef_header.version, hef_reader, m_ccws_offset);
+        return parse_hef_memview_internal(static_cast<size_t>(proto_size), proto_and_ccw_buffer, hef_header.version, hef_reader, m_offset_zero_point);
     }
     case HEADER_VERSION_2: {
         auto status = fill_v2_hef_header(hef_header, hef_reader);
@@ -683,7 +701,7 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
         auto proto_and_ccw_buffer = hef_memview.data() + HEF_HEADER_SIZE_V2;
         auto proto_size = hef_memview.size() - HEF_HEADER_SIZE_V2 - hef_header.distinct.v2.ccws_size;
 
-        m_ccws_offset = HEF_HEADER_SIZE_V2 + hef_header.hef_proto_size;
+        m_offset_zero_point = HEF_HEADER_SIZE_V2 + hef_header.hef_proto_size;
 
         TRY(auto proto_and_ccws_size, calc_hef_residue_size(hef_reader, hef_header.version));
         auto proto_and_ccws_buffer = MemoryView::create_const(hef_memview.data() + HEF_HEADER_SIZE_V2, proto_and_ccws_size);
@@ -693,11 +711,33 @@ hailo_status Hef::Impl::parse_hef_memview(const MemoryView &hef_memview)
         CHECK_SUCCESS(status);
         m_xxh3_64bits = calculated_xxh3_64bits;
 
-        return parse_hef_memview_internal(static_cast<size_t>(proto_size), proto_and_ccw_buffer, hef_header.version, hef_reader, m_ccws_offset);
+        return parse_hef_memview_internal(static_cast<size_t>(proto_size), proto_and_ccw_buffer, hef_header.version, hef_reader, m_offset_zero_point);
+    }
+    case HEADER_VERSION_3:
+    {
+        auto status = fill_v3_hef_header(hef_header, hef_reader);
+        CHECK_SUCCESS(status);
+
+        auto proto_and_ccw_buffer = hef_memview.data() + HEF_HEADER_SIZE_V3;
+        auto proto_size = hef_memview.size() - HEF_HEADER_SIZE_V3 - hef_header.distinct.v3.ccws_size_with_padding - hef_header.distinct.v3.additional_info_size;
+
+        CHECK(hef_header.distinct.v3.ccws_size_with_padding >= hef_header.distinct.v3.hef_padding_size, HAILO_HEF_FILE_CORRUPTED, "Invalid HEF - ccws size is smaller than padding size");
+        m_ccws_section_size = hef_header.distinct.v3.ccws_size_with_padding - hef_header.distinct.v3.hef_padding_size;
+        m_offset_zero_point = HEF_HEADER_SIZE_V3 + hef_header.hef_proto_size + hef_header.distinct.v3.hef_padding_size;
+
+        TRY(auto proto_and_ccws_size, calc_hef_residue_size(hef_reader, hef_header.version));
+        auto proto_and_ccws_buffer = MemoryView::create_const(hef_memview.data() + HEF_HEADER_SIZE_V3, proto_and_ccws_size);
+        TRY(auto calculated_xxh3_64bits, Xxhash::calc_xxh3_on_buffer(proto_and_ccws_buffer));
+
+        status = validate_hef_header(hef_header, calculated_xxh3_64bits, proto_and_ccws_size);
+        CHECK_SUCCESS(status);
+        m_xxh3_64bits = calculated_xxh3_64bits;
+    
+        return parse_hef_memview_internal(static_cast<size_t>(proto_size), proto_and_ccw_buffer, hef_header.version, hef_reader, m_offset_zero_point);
     }
     default:
         LOGGER__ERROR("Unsupported hef version {}", hef_header.version);
-        return HAILO_INVALID_HEF;
+        return HAILO_HEF_NOT_SUPPORTED;
     }
 }
 
@@ -864,8 +904,10 @@ Expected<CoreOpMetadataPtr> Hef::Impl::create_metadata_per_arch(const ProtoHEFCo
 {
     // TODO: validate that there's a read+write layer for each cache + no cache_id is only read or written without the
     //       other. They can be across different contexts (HRT-13655)
-    TRY(auto preliminary_context, HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features, hef_version, hef_reader, ccws_offset));
-    TRY_V(auto dynamic_contexts, HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch(), hef_version, hef_reader, ccws_offset));
+    TRY(auto preliminary_context, HefUtils::parse_preliminary_context(core_op.preliminary_config, m_supported_features, hef_version, hef_reader, ccws_offset,
+                                                                        is_aligned_ccws_on()));
+    TRY_V(auto dynamic_contexts, HefUtils::parse_dynamic_contexts(core_op, m_supported_features, get_device_arch(), hef_version, hef_reader, ccws_offset,
+                                                                    is_aligned_ccws_on()));
     TRY(auto config_channels_info,  parse_config_channels_info(core_op));
 
     // If const input layer is found in the preliminary context, or first dynamic context we can't use fast batch switch
@@ -982,15 +1024,31 @@ hailo_status Hef::Impl::transfer_protobuf_field_ownership(ProtoHEFHef &hef_messa
     m_supported_features = get_supported_features(m_header, m_hef_extensions, m_included_features,
         m_hef_optional_extensions);
 
+    m_hef_external_resources.reserve(hef_message.external_resources().size());
+    for (const auto &external_resouce : hef_message.external_resources()) {
+        ExternalResourceInfo external_resource_info{external_resouce.name(), external_resouce.size(), external_resouce.offset()};
+        m_hef_external_resources.emplace_back(external_resource_info);
+    }
+
     return HAILO_SUCCESS;
 }
 
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-const MemoryView Hef::Impl::get_hef_memview()
+Expected<std::shared_ptr<Buffer>> Hef::Impl::get_hef_as_buffer()
 {
-    return MemoryView(m_hef_buffer);
+    if (m_hef_buffer) {
+        auto ptr = m_hef_buffer;
+        return ptr;
+    }
+
+    auto hef_reader = get_hef_reader();
+    CHECK_SUCCESS_AS_EXPECTED(hef_reader->open());
+    TRY(auto size, hef_reader->get_size());
+    TRY(auto buffer_ptr, Buffer::create_shared(size, BufferStorageParams::create_dma()));
+
+    CHECK_SUCCESS(hef_reader->read(buffer_ptr->data(), size));
+    CHECK_SUCCESS(hef_reader->close());
+    return buffer_ptr;
 }
-#endif // HAILO_SUPPORT_MULTI_PROCESS
 
 Hef::Impl::Impl(const std::string &hef_path, hailo_status &status)
 {
@@ -1006,12 +1064,14 @@ Hef::Impl::Impl(const std::string &hef_path, hailo_status &status)
     status = HAILO_SUCCESS;
 }
 
-Hef::Impl::Impl(const MemoryView &hef_memview, hailo_status &status)
+Hef::Impl::Impl(std::shared_ptr<Buffer> hef_buffer, hailo_status &status)
 {
     status = HAILO_UNINITIALIZED;
     GOOGLE_PROTOBUF_VERIFY_VERSION;
 
-    status = parse_hef_memview(hef_memview);
+    m_hef_buffer = hef_buffer;
+
+    status = parse_hef_memview(MemoryView(*m_hef_buffer));
     if (HAILO_SUCCESS != status) {
         LOGGER__ERROR("Failed parsing HEF buffer");
         return;
@@ -1054,6 +1114,8 @@ SupportedFeatures Hef::Impl::get_supported_features(const ProtoHEFHeader &header
         header, hef_optional_extensions);
     supported_features.batch_register_config = check_hef_extension(ProtoHEFExtensionType::BATCH_REGISTER_CONFIG,
         header, hef_extensions, included_features);
+    supported_features.aligned_ccws = check_hef_extension(ProtoHEFExtensionType::SHARED_CONFIG,
+        header, hef_extensions, included_features);
 
     return supported_features;
 }
@@ -1065,10 +1127,10 @@ net_flow::NmsPostProcessConfig create_post_process_nms_config(const ProtoHEFOp &
     nms_config.nms_iou_th = (float32_t)op_proto.nms_op().nms_iou_th();
     nms_config.max_proposals_per_class = op_proto.nms_op().max_proposals_per_class();
     nms_config.number_of_classes = op_proto.nms_op().classes();
+    nms_config.max_proposals_total = nms_config.max_proposals_per_class * nms_config.number_of_classes;
     nms_config.background_removal = op_proto.nms_op().background_removal();
     nms_config.background_removal_index = op_proto.nms_op().background_removal_index();
     nms_config.bbox_only = op_proto.nms_op().bbox_decoding_only();
-    nms_config.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
 
     return nms_config;
 }
@@ -1182,6 +1244,7 @@ Expected<net_flow::PostProcessOpMetadataPtr> create_yolov5_seg_op_metadata(const
     const std::string &network_name)
 {
     auto nms_config = create_post_process_nms_config(op_proto);
+
     TRY(auto yolov5_config, create_yolov5_config(op_proto.nms_op().yolo_seg_op().bbox_decoders(),
         op_proto.nms_op().yolo_seg_op().image_height(), op_proto.nms_op().yolo_seg_op().image_width(), pad_index_to_streams_info));
     TRY(auto inputs_metadata, create_inputs_metadata(op_proto, pad_index_to_streams_info, input_to_output_pads));
@@ -1716,6 +1779,8 @@ Expected<uint32_t> HefConfigurator::max_periph_bytes_value(const hailo_device_ar
         case HAILO_ARCH_HAILO15M:
         case HAILO_ARCH_HAILO15L:
         case HAILO_ARCH_HAILO10H:
+        // TODO: HRT-15000: Fix according to MARS hw consts
+        case HAILO_ARCH_MARS:
             return HAILO1X_PERIPH_BYTES_PER_BUFFER_MAX_SIZE;
         default:
             LOGGER__ERROR("Unknown device architecture!");
@@ -2059,7 +2124,7 @@ hailo_status HefUtils::fill_layer_info_with_base_info(const ProtoHEFEdgeLayerBas
 
     if (base_info.host_argmax()) {
         LOGGER__ERROR("Using legacy implementation of Argmax in host. Please re-compile your model with latest DFC version");
-        return HAILO_INVALID_HEF;
+        return HAILO_HEF_NOT_SUPPORTED;
     }
 
     TRY(layer_info.format.type, HailoRTCommon::get_format_type(layer_info.hw_data_bytes));
@@ -2822,9 +2887,8 @@ static hailo_status build_write_ccw_actions(
     return HAILO_SUCCESS;
 }
 
-static hailo_status parse_hef_v1_actions(const ProtoHEFOperation &operation_proto,
-    std::vector<ContextSwitchConfigActionPtr> &actions, ConfigBufferInfoMap &config_buffer_infos,
-    const SupportedFeatures &supported_features, bool &const_input_layer_found,
+static hailo_status parse_hef_actions(const ProtoHEFOperation &operation_proto, std::vector<ContextSwitchConfigActionPtr> &actions,
+    ConfigBufferInfoMap &config_buffer_infos, const SupportedFeatures &supported_features, bool &const_input_layer_found,
     std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
 {
     std::vector<const ProtoHEFActionWriteDataCcwPtr*> current_write_ccw_ptr_actions;
@@ -2859,7 +2923,39 @@ static hailo_status parse_hef_v1_actions(const ProtoHEFOperation &operation_prot
     return HAILO_SUCCESS;
 }
 
-static hailo_status parse_hef_v0_actions(const ProtoHEFOperation &operation_proto,
+static hailo_status prepare_aligned_ccws_transfers(const ProtoHEFOperation &operation_proto,
+    ConfigBufferInfoMap &config_buffer_infos,
+    CcwDmaTransfersInfoMap& ccw_dma_transfers_infos,
+    std::unordered_map<uint8_t, uint64_t> &next_offset_per_config_channel)
+{
+    for (int action_index = 0; action_index < operation_proto.actions_size(); action_index++) {
+        const auto &proto_action = operation_proto.actions(action_index);
+        if (proto_action.action_case() == ProtoHEFAction::kWriteDataCcwPtr) {
+            auto ccw_ptr_action_proto = proto_action.write_data_ccw_ptr();
+            auto const &current_config_channel = static_cast<uint8_t>(ccw_ptr_action_proto.cfg_channel_index());
+            if (0 == next_offset_per_config_channel[current_config_channel]) {
+                // Meaning it's the first write_ccw in a burst
+                ccw_dma_transfers_infos[current_config_channel].emplace_back(ccw_ptr_action_proto.offset(), ccw_ptr_action_proto.size());
+                config_buffer_infos[current_config_channel].ccw_dma_transfers.emplace_back(ccw_ptr_action_proto.offset(), ccw_ptr_action_proto.size());
+                next_offset_per_config_channel[current_config_channel] = ccw_ptr_action_proto.offset() + ccw_ptr_action_proto.size();
+            } else if (next_offset_per_config_channel[current_config_channel] == ccw_ptr_action_proto.offset()) {
+                // consecutive in memory => concating
+                next_offset_per_config_channel[current_config_channel] += ccw_ptr_action_proto.size();
+                ccw_dma_transfers_infos[current_config_channel].back().second += ccw_ptr_action_proto.size();
+                config_buffer_infos[current_config_channel].ccw_dma_transfers.back().second += ccw_ptr_action_proto.size();
+            } else {
+                // Next write is not consecutive in memory => saving burst + starting a new burst
+                ccw_dma_transfers_infos[current_config_channel].emplace_back(ccw_ptr_action_proto.offset(), ccw_ptr_action_proto.size());
+                config_buffer_infos[current_config_channel].ccw_dma_transfers.emplace_back(ccw_ptr_action_proto.offset(), ccw_ptr_action_proto.size());
+                next_offset_per_config_channel[current_config_channel] = ccw_ptr_action_proto.offset() + ccw_ptr_action_proto.size();
+            }
+        }
+    }
+
+    return HAILO_SUCCESS;
+}
+
+static hailo_status parse_hef_actions(const ProtoHEFOperation &operation_proto,
     std::vector<ContextSwitchConfigActionPtr> &actions, ConfigBufferInfoMap &config_buffer_infos,
     const SupportedFeatures &supported_features, bool &const_input_layer_found)
 {
@@ -2894,10 +2990,12 @@ static hailo_status parse_hef_v0_actions(const ProtoHEFOperation &operation_prot
 
 static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &actions,
     ConfigBufferInfoMap &config_buffer_infos,
+    CcwDmaTransfersInfoMap& ccw_dma_transfers_infos,
     const ProtoHEFOperation &operation_proto,
     const SupportedFeatures &supported_features,
     bool &const_input_layer_found, uint32_t hef_version,
-    std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
+    std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset,
+    std::unordered_map<uint8_t, uint64_t> &next_offset_per_config_channel, bool is_aligned_ccws_on)
 {
     TRY(auto trigger_action, parse_trigger_action(operation_proto.trigger()));
     actions.emplace_back(std::move(trigger_action));
@@ -2906,17 +3004,24 @@ static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &a
     switch (hef_version)
     {
     case HEADER_VERSION_0:
-        status = parse_hef_v0_actions(operation_proto, actions, config_buffer_infos, supported_features, const_input_layer_found);
+        status = parse_hef_actions(operation_proto, actions, config_buffer_infos, supported_features, const_input_layer_found);
         CHECK_SUCCESS(status);
         break;
     case HEADER_VERSION_1:
     case HEADER_VERSION_2:
-        status = parse_hef_v1_actions(operation_proto, actions, config_buffer_infos, supported_features, const_input_layer_found, hef_reader, ccws_offset);
+    case HEADER_VERSION_3:
+        status = parse_hef_actions(operation_proto, actions, config_buffer_infos, supported_features, const_input_layer_found, hef_reader, ccws_offset);
         CHECK_SUCCESS(status);
+        if (is_aligned_ccws_on) {
+            CHECK(hef_version == HEADER_VERSION_3, HAILO_HEF_NOT_SUPPORTED, "Aligned_ccws is not supported on hef version {}", hef_version);
+            status = prepare_aligned_ccws_transfers(operation_proto, config_buffer_infos, ccw_dma_transfers_infos,
+                next_offset_per_config_channel);
+            CHECK_SUCCESS(status);
+        }
         break;
     default:
         LOGGER__ERROR("Unsupported hef version {}", hef_version);
-        return HAILO_INVALID_HEF;
+        return HAILO_HEF_NOT_SUPPORTED;
     }
 
     return HAILO_SUCCESS;
@@ -2925,33 +3030,41 @@ static hailo_status parse_operation(std::vector<ContextSwitchConfigActionPtr> &a
 static Expected<ContextMetadata> parse_operations(
     const google::protobuf::RepeatedPtrField<ProtoHEFOperation> &operations_proto,
     const SupportedFeatures &supported_features,
-    uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
+    uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset, bool is_aligned_ccws_on)
 {
     std::vector<ContextSwitchConfigActionPtr> actions;
     ConfigBufferInfoMap config_buffer_infos;
+    CcwDmaTransfersInfoMap ccw_dma_transfers_infos;
     bool const_input_layer_found = false;
 
+    std::unordered_map<uint8_t, uint64_t> next_offset_per_config_channel;
+    for (auto &config_buffer_info : config_buffer_infos) {
+        next_offset_per_config_channel[config_buffer_info.first] = 0;
+    }
+
     for (const auto &operation_proto : operations_proto) {
-        auto status = parse_operation(actions, config_buffer_infos, operation_proto, supported_features,
-            const_input_layer_found, hef_version, hef_reader, ccws_offset);
+        auto status = parse_operation(actions, config_buffer_infos, ccw_dma_transfers_infos, operation_proto, supported_features,
+            const_input_layer_found, hef_version, hef_reader, ccws_offset, next_offset_per_config_channel, is_aligned_ccws_on);
         CHECK_SUCCESS_AS_EXPECTED(status);
     }
 
-    return ContextMetadata(std::move(actions), std::move(config_buffer_infos), const_input_layer_found);
+    return ContextMetadata(std::move(actions), std::move(config_buffer_infos), const_input_layer_found, std::move(ccw_dma_transfers_infos));
 }
 
 Expected<ContextMetadata> HefUtils::parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto,
-    const SupportedFeatures &supported_features, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
+    const SupportedFeatures &supported_features, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, 
+    size_t ccws_offset, bool is_aligned_ccws_on)
 {
-    return parse_operations(preliminary_proto.operation(), supported_features, hef_version, hef_reader, ccws_offset);
+    return parse_operations(preliminary_proto.operation(), supported_features, hef_version, hef_reader, ccws_offset, is_aligned_ccws_on);
 }
 
 Expected<ContextMetadata> HefUtils::parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
     const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
+    const ProtoHEFHwArch &hef_arch, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader,
+    size_t ccws_offset, bool is_aligned_ccws_on)
 {
-    TRY(auto context_metadata,
-        parse_operations(context_proto.operations(), supported_features, hef_version, hef_reader, ccws_offset));
+    ContextMetadata context_metadata;
+    TRY(context_metadata, parse_operations(context_proto.operations(), supported_features, hef_version, hef_reader, ccws_offset, is_aligned_ccws_on));
 
     for (const auto &edge_layer : context_proto.metadata().edge_layers()) {
         if (ProtoHEFEdgeConnectionType::PROTO__EDGE_CONNECTION_TYPE__BOUNDARY ==
@@ -3007,13 +3120,13 @@ static hailo_status validate_unique_boundary_names(const std::vector<ContextMeta
 }
 
 Expected<std::vector<ContextMetadata>> HefUtils::parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op, const SupportedFeatures &supported_features,
-    const ProtoHEFHwArch &hef_arch, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset)
+    const ProtoHEFHwArch &hef_arch, uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset, bool is_aligned_ccws_on)
 {
     std::vector<ContextMetadata> contexts_metadata;
     for (uint16_t context_index = 0; context_index < core_op.contexts.size(); context_index++) {
         auto &context_proto = core_op.contexts[context_index];
         TRY(auto context_metadata, parse_single_dynamic_context(core_op, context_proto, context_index, supported_features,
-            hef_arch, hef_version, hef_reader, ccws_offset));
+            hef_arch, hef_version, hef_reader, ccws_offset, is_aligned_ccws_on));
         contexts_metadata.emplace_back(std::move(context_metadata));
     }
 
@@ -3099,8 +3212,8 @@ Expected<hailo_nms_info_t> HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo
     nms_info.number_of_classes = static_cast<uint32_t>(proto_nms_info.number_of_classes());
     nms_info.bbox_size = static_cast<uint32_t>(proto_nms_info.bbox_size());
     nms_info.max_bboxes_per_class = static_cast<uint32_t>(proto_nms_info.max_output_size());
+    nms_info.max_bboxes_total = nms_info.max_bboxes_per_class * nms_info.number_of_classes;
     nms_info.chunks_per_frame = static_cast<uint32_t>(proto_nms_info.input_division_factor());
-    nms_info.order_type = HAILO_NMS_RESULT_ORDER_HW;
 
     if (burst_mode_enabled) {
         nms_info.burst_size = static_cast<uint32_t>(proto_nms_info.burst_size());
@@ -3130,7 +3243,6 @@ Expected<hailo_nms_info_t> HefUtils::parse_proto_nms_info(const ProtoHEFNmsInfo
         "original_name field '{}' has a too long name (max is HAILO_MAX_STREAM_NAME_SIZE including the null terminated character)",
         original_name);
     strncpy(nms_info.defuse_info.original_name, original_name.c_str(), original_name.length() + 1);
-    nms_info.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;   // We don't support NMS by score if NMS on chip
     return nms_info;
 }
 
@@ -3348,7 +3460,7 @@ Expected<LayerInfo> HefUtils::get_cache_layer_info(
     // Negative cache_id means that the cache is not used
     const int32_t cache_id = layer.context_switch_info().cache_id();
     CHECK_AS_EXPECTED(cache_id >= 0, HAILO_INVALID_HEF, "Invalid cache_id: {}", cache_id);
-    result.cache_id = static_cast<uint32_t>(cache_id);
+    result.cache_info.cache_id = static_cast<uint32_t>(cache_id);
 
     return result;
 }
@@ -3494,7 +3606,7 @@ Expected<std::vector<WriteMemoryInfo>> Hef::Impl::create_single_context_core_op_
                 case ProtoHEFAction::kWriteDataCcwPtr :{
                     CHECK(HEADER_VERSION_0 != hef.pimpl->m_hef_version, HAILO_INVALID_HEF, "WriteDataCcwPtr is not supported on V0 HEF");
                     const auto size = action.write_data_ccw_ptr().size();
-                    const auto offset = action.write_data_ccw_ptr().offset() + hef.pimpl->get_ccws_offset();
+                    const auto offset = action.write_data_ccw_ptr().offset() + hef.pimpl->get_offset_zero_point();
                     auto hef_reader = hef.pimpl->get_hef_reader();
                     TRY(auto config_buffer, parse_ccw_buffer_from_ptr(size, offset, hef_reader));
                     config_buffers.emplace_back(std::move(config_buffer));
@@ -3547,14 +3659,24 @@ ProtoHEFHwArch Hef::Impl::get_device_arch()
     return m_header.hw_arch();
 }
 
-std::shared_ptr<SeekableBytesReader> Hef::Impl::get_hef_reader()
+std::shared_ptr<SeekableBytesReader> Hef::Impl::get_hef_reader() const
 {
     return m_hef_reader;
 }
 
-size_t Hef::Impl::get_ccws_offset()
+size_t Hef::Impl::get_offset_zero_point() const
 {
-    return m_ccws_offset;
+    return m_offset_zero_point;
+}
+
+uint64_t Hef::Impl::get_ccws_section_size() const
+{
+    return m_ccws_section_size;
+}
+
+bool Hef::Impl::is_aligned_ccws_on() const
+{
+    return (m_supported_features.aligned_ccws) && (!is_env_variable_on(HAILO_DISABLE_ALIGNED_CCWS_ENV_VAR));
 }
 
 Expected<float64_t> Hef::Impl::get_bottleneck_fps(const std::string &net_group_name)
@@ -3956,6 +4078,28 @@ Expected<std::string> Hef::Impl::get_description(bool stream_infos, bool vstream
     return hef_infos;
 }
 
+Expected<std::map<std::string, std::string>> Hef::get_external_resources() const
+{
+    return pimpl->get_external_resources();
+}
+
+Expected<std::map<std::string, std::string>> Hef::Impl::get_external_resources() const
+{
+    std::map<std::string, std::string> external_resources;
+    auto hef_reader = get_hef_reader();
+    CHECK_SUCCESS(hef_reader->open());
+    for (auto &external_resource_info : m_hef_external_resources) {
+        const auto offset = external_resource_info.offset + get_offset_zero_point();
+        const auto size = external_resource_info.size;
+        std::string resource_data(size, '\0');
+
+        CHECK_SUCCESS(hef_reader->read_from_offset(offset, resource_data, size));
+        external_resources[external_resource_info.name] = std::move(resource_data);
+    }
+    CHECK_SUCCESS(hef_reader->close());
+    return external_resources;
+}
+
 Expected<std::vector<hailo_network_group_info_t>> Hef::Impl::get_network_groups_infos()
 {
     std::vector<hailo_network_group_info_t> results;
diff --git a/hailort/libhailort/src/hef/hef_internal.hpp b/hailort/libhailort/src/hef/hef_internal.hpp
index f222846..bf447b8 100644
--- a/hailort/libhailort/src/hef/hef_internal.hpp
+++ b/hailort/libhailort/src/hef/hef_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -131,6 +131,15 @@ typedef union {
         uint64_t reserved1;
         uint64_t reserved2;
     } v2;
+    struct {
+        uint64_t xxh3_64bits;
+        // ccws_size_with_padding includes the padding for 4k alignment - the real ccws size is (ccws_size_with_padding - hef_padding_size)
+        uint64_t ccws_size_with_padding;
+        uint32_t hef_padding_size;
+        uint64_t additional_info_size;
+        uint64_t reserved1;
+        uint64_t reserved2;
+    } v3;
 } hef__header_distinct_t;
 
 typedef struct {
@@ -145,6 +154,7 @@ static const size_t HEF_COMMON_SIZE = sizeof(hef__header_t) - sizeof(hef__header
 static const size_t HEF_HEADER_SIZE_V0 = HEF_COMMON_SIZE + sizeof(hef__header_distinct_t::v0);
 static const size_t HEF_HEADER_SIZE_V1 = HEF_COMMON_SIZE + sizeof(hef__header_distinct_t::v1);
 static const size_t HEF_HEADER_SIZE_V2 = HEF_COMMON_SIZE + sizeof(hef__header_distinct_t::v2);
+static const size_t HEF_HEADER_SIZE_V3 = HEF_COMMON_SIZE + sizeof(hef__header_distinct_t::v3);
 
 typedef enum {
     HEF__FORMAT__TF_RGB = 0,
@@ -170,6 +180,7 @@ typedef enum {
 #define HEADER_VERSION_0 (0)
 #define HEADER_VERSION_1 (1)
 #define HEADER_VERSION_2 (2)
+#define HEADER_VERSION_3 (3)
 
 const static uint32_t SUPPORTED_EXTENSIONS_BITSET_SIZE = 1000;
 static const std::vector<ProtoHEFExtensionType> SUPPORTED_EXTENSIONS = {
@@ -184,23 +195,27 @@ static const std::vector<ProtoHEFExtensionType> SUPPORTED_EXTENSIONS = {
     OFFLOAD_ARGMAX,
     KO_RUN_ASAP,
     HAILO_NET_FLOW,
-    HAILO_NET_FLOW_YOLOV5_NMS, // Extension added in platform 4.12 release
-    HAILO_NET_FLOW_SSD_NMS, // Extension added in platform 4.14 release
-    WRITE_DATA_BY_TYPE, // Extension added in platform 4.14 release
-    NMS_OUTPUT_BURST, // Extension added in platform 4.14 release
-    DUAL_DIRECTION_STREAM_INDEX, // Extension added in platform 4.14 release
-    HAILO_NET_FLOW_ARGMAX, // Extension added in platform 4.14 release
-    HAILO_NET_FLOW_SOFTMAX, // Extension added in platform 4.14 release
-    ALIGNED_FORMAT_TYPE, // Extension added in platform 4.14 release
-    HAILO_NET_FLOW_YOLOX_NMS, // Extension added in platform 4.14 release
-    OUTPUT_SCALE_PER_FEATURE, // Extension added in platform 4.14 release
-    PERIPH_CALCULATION_IN_HAILORT, // Extension added in platform 4.14 release
-    HAILO_NET_FLOW_YOLOV5_SEG_NMS, // Extension added in platform 4.15 release
-    HAILO_NET_FLOW_IOU_NMS, // Extension added in platform 4.15 release
-    HW_PADDING, // Extension added in platform 4.16 release
-    HAILO_NET_FLOW_YOLOV8_NMS, // Extension added in platform 4.16 release
-    BATCH_REGISTER_CONFIG, // Extension added in platform 4.17 release
-    HAILO_NET_FLOW_BBOX_DECODING // Extension added in platform 4.18 release
+    HAILO_NET_FLOW_YOLOV5_NMS,      // Extension added in platform 4.12 release
+    HAILO_NET_FLOW_SSD_NMS,         // Extension added in platform 4.14 release
+    WRITE_DATA_BY_TYPE,             // Extension added in platform 4.14 release
+    NMS_OUTPUT_BURST,               // Extension added in platform 4.14 release
+    DUAL_DIRECTION_STREAM_INDEX,    // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_ARGMAX,          // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_SOFTMAX,         // Extension added in platform 4.14 release
+    ALIGNED_FORMAT_TYPE,            // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_YOLOX_NMS,       // Extension added in platform 4.14 release
+    OUTPUT_SCALE_PER_FEATURE,       // Extension added in platform 4.14 release
+    PERIPH_CALCULATION_IN_HAILORT,  // Extension added in platform 4.14 release
+    HAILO_NET_FLOW_YOLOV5_SEG_NMS,  // Extension added in platform 4.15 release
+    HAILO_NET_FLOW_IOU_NMS,         // Extension added in platform 4.15 release
+    HW_PADDING,                     // Extension added in platform 4.16 release
+    HAILO_NET_FLOW_YOLOV8_NMS,      // Extension added in platform 4.16 release
+    BATCH_REGISTER_CONFIG,          // Extension added in platform 4.17 release
+    HAILO_NET_FLOW_BBOX_DECODING,   // Extension added in platform 4.18 release
+    CCW_PTR_SQUEEZE,                // Currently this extension is always off, will be renamed and re-purposed under HRT-13205
+    EXTERNAL_RESOURCES,             // Extension added in platform 4.21 release
+    SHARED_CONFIG                   // Extension added in platform 4.21 release
+
 };
 
 static inline bool is_h2d_boundary_info_layer(const ProtoHEFEdgeLayer& layer)
@@ -254,12 +269,19 @@ class VdmaConfigCoreOp;
 class VdmaDevice;
 class HailoRTDriver;
 
+struct ExternalResourceInfo
+{
+    std::string name;
+    uint64_t size;
+    uint64_t offset;
+};
+
 class Hef::Impl final
 {
 public:
 
     static Expected<Impl> create(const std::string &hef_path);
-    static Expected<Impl> create(const MemoryView &hef_buffer);
+    static Expected<Impl> create(std::shared_ptr<Buffer> hef_buffer);
 
     const std::vector<ProtoHEFNetworkGroupPtr>& network_groups() const;
     const std::vector<ProtoHEFCoreOpMock>& core_ops(const std::string &net_group_name) const;
@@ -267,8 +289,6 @@ public:
 
     Expected<std::pair<std::string, std::string>> get_network_group_and_network_name(const std::string &name);
 
-    void clear_hef_buffer();
-
     Expected<std::shared_ptr<ProtoHEFCoreOpMock>> get_core_op_by_net_group_name(const std::string &net_group_name="");
     Expected<std::vector<hailo_network_info_t>> get_network_infos(const std::string &net_group_name="");
 
@@ -291,8 +311,9 @@ public:
     Expected<size_t> get_number_of_input_streams(const std::string &net_group_name="");
     Expected<size_t> get_number_of_output_streams(const std::string &net_group_name="");
     ProtoHEFHwArch get_device_arch();
-    std::shared_ptr<SeekableBytesReader> get_hef_reader();
-    size_t get_ccws_offset();
+    uint64_t get_ccws_section_size() const;
+    std::shared_ptr<SeekableBytesReader> get_hef_reader() const;
+    size_t get_offset_zero_point() const;
     Expected<float64_t> get_bottleneck_fps(const std::string &net_group_name="");
     static bool contains_ddr_layers(const ProtoHEFCoreOpMock &core_op);
     static hailo_status validate_core_op_unique_layer_names(const ProtoHEFCoreOpMock &core_op);
@@ -356,6 +377,8 @@ public:
     Expected<CoreOpMetadataPtr> get_core_op_metadata(const std::string &network_group_name, uint32_t partial_clusters_layout_bitmap = PARTIAL_CLUSTERS_LAYOUT_IGNORE);
 
     Expected<std::string> get_description(bool stream_infos, bool vstream_infos, hailo_device_architecture_t device_arch);
+    Expected<std::map<std::string, std::string>> get_external_resources() const; // Key is reosucre name, value is resource data in bytes
+
 
     const MemoryView get_hash_as_memview() const
     {
@@ -363,6 +386,7 @@ public:
         case HEADER_VERSION_0:
             return MemoryView::create_const(m_md5, sizeof(m_md5));
         case HEADER_VERSION_2:
+        case HEADER_VERSION_3:
             return MemoryView::create_const(&m_xxh3_64bits, sizeof(m_xxh3_64bits));
         case HEADER_VERSION_1:
         default:
@@ -403,13 +427,13 @@ public:
 
     hailo_status validate_boundary_streams_were_created(const std::string &network_group_name, std::shared_ptr<CoreOp> core_op);
 
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-    const MemoryView get_hef_memview();
-#endif // HAILO_SUPPORT_MULTI_PROCESS
+    Expected<std::shared_ptr<Buffer>> get_hef_as_buffer();
+
+    bool is_aligned_ccws_on() const;
 
 private:
     Impl(const std::string &hef_path, hailo_status &status);
-    Impl(const MemoryView &hef_memview, hailo_status &status);
+    Impl(std::shared_ptr<Buffer> hef_buffer, hailo_status &status);
 
     hailo_status parse_hef_file(const std::string &hef_path);
     hailo_status parse_hef_memview(const MemoryView &hef_memview);
@@ -418,6 +442,7 @@ private:
     Expected<hef__header_t> parse_hef_header_before_distinct(std::shared_ptr<SeekableBytesReader> hef_reader);
     hailo_status fill_v1_hef_header(hef__header_t &hef_header, std::shared_ptr<SeekableBytesReader> hef_reader);
     hailo_status fill_v2_hef_header(hef__header_t &hef_header, std::shared_ptr<SeekableBytesReader> hef_reader);
+    hailo_status fill_v3_hef_header(hef__header_t &hef_header, std::shared_ptr<SeekableBytesReader> hef_reader);
     hailo_status fill_core_ops_and_networks_metadata(uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset);
     hailo_status transfer_protobuf_field_ownership(ProtoHEFHef &hef_message);
     void fill_core_ops();
@@ -459,6 +484,7 @@ private:
     ProtoHEFHeader m_header;
     ProtoHEFIncludedFeatures m_included_features;
     SupportedFeatures m_supported_features;
+    std::vector<ExternalResourceInfo> m_hef_external_resources;
     std::vector<ProtoHEFNetworkGroupPtr> m_groups;
     std::map<std::string, std::vector<ProtoHEFCoreOpMock>> m_core_ops_per_group;
     std::map<std::string, std::vector<net_flow::PostProcessOpMetadataPtr>> m_post_process_ops_metadata_per_group;
@@ -470,11 +496,10 @@ private:
     uint32_t m_crc;
     uint64_t m_xxh3_64bits;
     std::shared_ptr<SeekableBytesReader> m_hef_reader;
-    size_t m_ccws_offset;
+    uint64_t m_ccws_section_size;
+    size_t m_offset_zero_point;
 
-#ifdef HAILO_SUPPORT_MULTI_PROCESS
-    Buffer m_hef_buffer;
-#endif // HAILO_SUPPORT_MULTI_PROCESS
+    std::shared_ptr<Buffer> m_hef_buffer; // Only used if Hef is created from memory
 
     std::map<std::string, NetworkGroupMetadata> m_network_group_metadata; // Key is NG name
 };
@@ -493,7 +518,6 @@ public:
     static Expected<uint32_t> max_periph_bytes_value(const hailo_device_architecture_t hw_arch);
     static Expected<uint32_t> max_periph_padding_payload_value(const hailo_device_architecture_t hw_arch);
 
-
     static bool is_core_hw_padding_supported(const LayerInfo &layer_info, const uint32_t max_periph_bytes_value,
         const bool is_core_hw_padding_config_in_dfc);
 };
@@ -542,13 +566,15 @@ public:
         const ProtoHEFCoreOpMock &core_op, const uint16_t context_index,
         const ProtoHEFEdgeLayer &layer, const SupportedFeatures &supported_features);
     static Expected<ContextMetadata> parse_preliminary_context(const ProtoHEFPreliminaryConfig &preliminary_proto,
-        const SupportedFeatures &supported_features, const uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset);
+        const SupportedFeatures &supported_features, const uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader,
+        size_t ccws_offset, bool is_aligned_ccws_on);
     static Expected<ContextMetadata> parse_single_dynamic_context(const ProtoHEFCoreOpMock &core_op,
         const ProtoHEFContext &context_proto, uint16_t context_index, const SupportedFeatures &supported_features,
-        const ProtoHEFHwArch &hef_arch, const uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset);
+        const ProtoHEFHwArch &hef_arch, const uint32_t hef_version, std::shared_ptr<SeekableBytesReader> hef_reader,
+        size_t ccws_offset, bool is_aligned_ccws_on);
     static Expected<std::vector<ContextMetadata>> parse_dynamic_contexts(const ProtoHEFCoreOpMock &core_op,
         const SupportedFeatures &supported_features, const ProtoHEFHwArch &hef_arch, const uint32_t hef_version,
-        std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset);
+        std::shared_ptr<SeekableBytesReader> hef_reader, size_t ccws_offset, bool is_aligned_ccws_on);
     static Expected<hailo_nms_info_t> parse_proto_nms_info(const ProtoHEFNmsInfo &proto_nms_info,
         const bool burst_mode_enabled, const ProtoHEFHwArch &hef_arch);
     static Expected<LayerInfo> get_boundary_layer_info(const ProtoHEFCoreOpMock &core_op,
diff --git a/hailort/libhailort/src/hef/layer_info.hpp b/hailort/libhailort/src/hef/layer_info.hpp
index 2240363..1d64853 100644
--- a/hailort/libhailort/src/hef/layer_info.hpp
+++ b/hailort/libhailort/src/hef/layer_info.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -58,6 +58,11 @@ struct DdrInfo {
     uint16_t min_buffered_rows;
 };
 
+struct CacheBufferInfo {
+    uint32_t cache_id;
+    uint16_t batch_size;
+};
+
 struct LayerInfo {
     LayerType type = LayerType::NOT_SET;
     hailo_stream_direction_t direction;
@@ -102,7 +107,7 @@ struct LayerInfo {
     // Context switch info TODO: we should use std::optional for this structures (or implement our self).
     ConnectedContextInfo connected_context_info;
     DdrInfo ddr_info;
-    uint32_t cache_id;
+    CacheBufferInfo cache_info;
 };
 
 // LayerIdentifier = <LayerType, hailo_stream_direction_t, layer_name, stream_index>
@@ -127,7 +132,6 @@ public:
             stream_info.format = layer.format;
             if (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == stream_info.format.order) {
                 stream_info.nms_info = layer.nms_info;
-                stream_info.nms_info.order_type = HAILO_NMS_RESULT_ORDER_HW;
                 stream_info.hw_frame_size =
                     HailoRTCommon::get_nms_hw_frame_size(stream_info.nms_info);
             } else {
@@ -314,9 +318,9 @@ private:
         // If a layer is multi-planar, its format_order is already the host-side format order
         res.format.order = (layer_info.is_multi_planar) ? layer_info.format.order : HailoRTDefaults::get_default_host_format_order(layer_info.format);
         if (HailoRTCommon::is_nms(res)) {
-            // TODO: HRT-15612 - consider changes here in case of order nms by score, or byte_mask
             res.nms_shape.max_bboxes_per_class = layer_info.nms_info.max_bboxes_per_class * layer_info.nms_info.chunks_per_frame;
             res.nms_shape.number_of_classes = layer_info.nms_info.number_of_classes;
+            res.nms_shape.max_bboxes_total = res.nms_shape.max_bboxes_per_class * layer_info.nms_info.number_of_classes;
             res.format.type = HAILO_FORMAT_TYPE_FLOAT32; // NMS on vstream is always float32s
         } else {
             res.shape.height = layer_info.shape.height;
diff --git a/hailort/libhailort/src/hef/memory_requirements_calculator.cpp b/hailort/libhailort/src/hef/memory_requirements_calculator.cpp
new file mode 100644
index 0000000..9f9f5cc
--- /dev/null
+++ b/hailort/libhailort/src/hef/memory_requirements_calculator.cpp
@@ -0,0 +1,125 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file memory_requirements_calculator.cpp
+ **/
+
+#include "memory_requirements_calculator.hpp"
+#include "common/utils.hpp"
+
+#include "hef/hef_internal.hpp"
+#include "core_op/resource_manager/internal_buffer_planner.hpp"
+#include "core_op/resource_manager/config_buffer.hpp"
+#include "common/internal_env_vars.hpp"
+#include "vdma/memory/buffer_requirements.hpp"
+#include "vdma/memory/descriptor_list.hpp"
+
+#include <numeric>
+
+namespace hailort
+{
+
+static EdgeTypeMemoryRequirements join_requirements(const EdgeTypeMemoryRequirements &a, const EdgeTypeMemoryRequirements &b)
+{
+    return EdgeTypeMemoryRequirements{a.cma_memory + b.cma_memory,
+                                     a.cma_memory_for_descriptors + b.cma_memory_for_descriptors,
+                                     a.pinned_memory + b.pinned_memory};
+}
+
+static Expected<EdgeTypeMemoryRequirements> get_context_cfg_requirements(const ContextMetadata &context_metadata,
+    HailoRTDriver::DmaType dma_type)
+{
+    EdgeTypeMemoryRequirements requirment{};
+    for (const auto &cfg_info : context_metadata.config_buffers_info()) {
+        TRY(auto requirements, ConfigBuffer::get_buffer_requirements(cfg_info.second, dma_type, vdma::MAX_SG_PAGE_SIZE));
+        if (ConfigBuffer::should_use_ccb(dma_type)) {
+            requirment.cma_memory += requirements.buffer_size();
+        } else {
+            requirment.pinned_memory += requirements.buffer_size();
+            requirment.cma_memory_for_descriptors +=
+                vdma::DescriptorList::descriptors_buffer_allocation_size(requirements.descs_count());
+        }
+    }
+    return requirment;
+}
+
+// Gets the memory requirements for the configuration buffers (weights and layer configurations)
+static Expected<EdgeTypeMemoryRequirements> get_cfg_requirements(const CoreOpMetadata &core_op_metadata,
+    HailoRTDriver::DmaType dma_type)
+{
+    TRY(auto requirment, get_context_cfg_requirements(core_op_metadata.preliminary_context(), dma_type));
+    for (const auto& context : core_op_metadata.dynamic_contexts()) {
+        TRY(auto context_requirment, get_context_cfg_requirements(context, dma_type));
+        requirment = join_requirements(requirment, context_requirment);
+    }
+    return requirment;
+}
+
+// Gets the memory requirements for intermediate buffers (including inter-context and ddr buffers)
+static Expected<EdgeTypeMemoryRequirements> get_intermediate_requirements(const CoreOpMetadata &core_op_metadata,
+    uint16_t batch_size, HailoRTDriver::DmaType dma_type)
+{
+    batch_size = (batch_size == HAILO_DEFAULT_BATCH_SIZE) ? 1 : batch_size;
+    TRY(auto plan, InternalBufferPlanner::create_buffer_planning(core_op_metadata, batch_size,
+        InternalBufferPlanner::Type::SINGLE_BUFFER_PER_BUFFER_TYPE, dma_type, vdma::MAX_SG_PAGE_SIZE));
+    auto report = InternalBufferPlanner::report_planning_info(plan);
+    return EdgeTypeMemoryRequirements{report.cma_memory, report.cma_memory_for_descriptors, report.pinned_memory};
+}
+
+// Gets the memory requirements for a single model
+static Expected<MemoryRequirements> get_model_memory_requirements(const CoreOpMetadata &core_op_metadata, uint16_t batch_size,
+    HailoRTDriver::DmaType dma_type)
+{
+    TRY(auto intermediate, get_intermediate_requirements(core_op_metadata, batch_size, dma_type));
+    TRY(auto config, get_cfg_requirements(core_op_metadata, dma_type));
+    return MemoryRequirements{intermediate, config};
+}
+
+static Expected<HailoRTDriver::DmaType> get_dma_type(Hef &hef)
+{
+    TRY(auto hef_arch, hef.get_hef_device_arch());
+    switch (hef_arch) {
+    case HAILO_ARCH_HAILO8_A0:
+    case HAILO_ARCH_HAILO8:
+    case HAILO_ARCH_HAILO8L:
+        return HailoRTDriver::DmaType::PCIE;
+    case HAILO_ARCH_HAILO15H:
+    case HAILO_ARCH_HAILO15L:
+    case HAILO_ARCH_HAILO15M:
+    case HAILO_ARCH_HAILO10H:
+    case HAILO_ARCH_MARS:
+        return HailoRTDriver::DmaType::DRAM;
+    case HAILO_ARCH_MAX_ENUM:
+        break;
+    };
+
+    LOGGER__ERROR("Unsupported Hailo device architecture: {}", static_cast<int>(hef_arch));
+    return make_unexpected(HAILO_NOT_IMPLEMENTED);
+}
+
+Expected<FullMemoryRequirements> MemoryRequirementsCalculator::get_memory_requirements(
+    const std::vector<MemoryRequirementsCalculator::HefParams> &models)
+{
+    FullMemoryRequirements full_memory_requirements{};
+    for (const auto &model : models) {
+        TRY(auto hef, Hef::create(model.hef_path));
+        TRY(const auto network_pair, hef.pimpl->get_network_group_and_network_name(model.network_group_name));
+        TRY(auto core_op_metadata, hef.pimpl->get_core_op_metadata(network_pair.first));
+
+        TRY(const auto dma_type, get_dma_type(hef));
+        TRY(auto req, get_model_memory_requirements(*core_op_metadata, model.batch_size, dma_type));
+        full_memory_requirements.hefs_memory_requirements.push_back(req);
+
+        // Add to total
+        full_memory_requirements.total_memory_requirements.intermediate_buffers = join_requirements(
+            full_memory_requirements.total_memory_requirements.intermediate_buffers, req.intermediate_buffers);
+        full_memory_requirements.total_memory_requirements.config_buffers = join_requirements(
+            full_memory_requirements.total_memory_requirements.config_buffers, req.config_buffers);
+    }
+
+    return full_memory_requirements;
+}
+
+} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/hef/memory_requirements_calculator.hpp b/hailort/libhailort/src/hef/memory_requirements_calculator.hpp
new file mode 100644
index 0000000..69d8b7b
--- /dev/null
+++ b/hailort/libhailort/src/hef/memory_requirements_calculator.hpp
@@ -0,0 +1,66 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file memory_requirements_calculator.hpp
+ * @brief Calculates the memory requirements used for running one or more models
+ **/
+#ifndef _HAILO_MEMORY_REQUIREMENTS_CALCULATOR_HPP_
+#define _HAILO_MEMORY_REQUIREMENTS_CALCULATOR_HPP_
+
+#include "hailo/expected.hpp"
+#include "hailo/hef.hpp"
+
+#include <string>
+#include <vector>
+
+
+namespace hailort
+{
+
+struct EdgeTypeMemoryRequirements {
+    // Amount of CMA memory (Physically continous) in bytes needed for execution
+    size_t cma_memory;
+
+    // Amount of CMA memory (Physically continous) in bytes needed for creating descriptors list.
+    size_t cma_memory_for_descriptors;
+
+    // Amount of pinned memory (Memory pinned to physical memory) in bytes needed for execution
+    size_t pinned_memory;
+};
+
+// Memory requirements for one model
+struct MemoryRequirements {
+    EdgeTypeMemoryRequirements intermediate_buffers;
+    EdgeTypeMemoryRequirements config_buffers;
+};
+
+// Memory requirements for several models
+struct FullMemoryRequirements {
+    std::vector<MemoryRequirements> hefs_memory_requirements;
+
+    // Notice that the total memory requirements is not the sum of all the memory requirements, since some of the memory
+    // is shared between the models.
+    MemoryRequirements total_memory_requirements;
+};
+
+/**
+ * Used to calculate the memory requirements for running one or more models.
+ * This class is exported as an internal API to be used by internal tools (so the header is not public, but the code
+ * is exported)
+ */
+class HAILORTAPI MemoryRequirementsCalculator final {
+public:
+    struct HefParams {
+        std::string hef_path;
+        std::string network_group_name;
+        uint16_t batch_size;
+    };
+
+    static Expected<FullMemoryRequirements> get_memory_requirements(const std::vector<HefParams> &models);
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_MEMORY_REQUIREMENTS_CALCULATOR_HPP_ */
diff --git a/hailort/libhailort/src/hw_consts.hpp b/hailort/libhailort/src/hw_consts.hpp
index fa5ddfc..272c4f4 100644
--- a/hailort/libhailort/src/hw_consts.hpp
+++ b/hailort/libhailort/src/hw_consts.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/mipi/mipi_stream.cpp b/hailort/libhailort/src/mipi/mipi_stream.cpp
index d49ac25..7efc27c 100644
--- a/hailort/libhailort/src/mipi/mipi_stream.cpp
+++ b/hailort/libhailort/src/mipi/mipi_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/mipi/mipi_stream.hpp b/hailort/libhailort/src/mipi/mipi_stream.hpp
index a137207..9dc39fd 100644
--- a/hailort/libhailort/src/mipi/mipi_stream.hpp
+++ b/hailort/libhailort/src/mipi/mipi_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
index b03904c..574d9ee 100644
--- a/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
index 3d29b56..0c40679 100644
--- a/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/argmax_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
index 013b5a4..7de01f8 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -39,7 +39,7 @@ std::string NmsOpMetadata::get_op_description()
 hailo_status NmsOpMetadata::validate_format_info()
 {
     for (const auto& output_metadata : m_outputs_metadata) {
-        CHECK(((HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS == output_metadata.second.format.order) || (HAILO_FORMAT_ORDER_HAILO_NMS == output_metadata.second.format.order)
+        CHECK(((HailoRTCommon::is_nms_by_class(output_metadata.second.format.order))
             || (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == output_metadata.second.format.order)),
             HAILO_INVALID_ARGUMENT, "The given output format order {} is not supported, "
             "should be HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS or HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE",
@@ -50,10 +50,6 @@ hailo_status NmsOpMetadata::validate_format_info()
 
         CHECK(!(HAILO_FORMAT_FLAGS_TRANSPOSED & output_metadata.second.format.flags), HAILO_INVALID_ARGUMENT, "Output {} is marked as transposed, which is not supported for this model.",
             output_metadata.first);
-
-        if (HAILO_FORMAT_ORDER_HAILO_NMS == output_metadata.second.format.order) {
-            LOGGER__WARNING("Using a deprecated format order HAILO_FORMAT_ORDER_HAILO_NMS. Use HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS instead");
-        }
     }
     if (m_type == OperationType::IOU) {
         assert(1 == m_inputs_metadata.size());
@@ -85,12 +81,13 @@ hailo_status NmsOpMetadata::validate_params()
 
 std::string NmsOpMetadata::get_nms_config_description()
 {
-    auto config_info = fmt::format("Score threshold: {:.3f}, IoU threshold: {:.2f}, Classes: {}, Cross classes: {}, NMS results order: {}",
-                        m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.number_of_classes, m_nms_config.cross_classes,
-                        HailoRTCommon::get_nms_result_order_type_str(m_nms_config.order_type));
-    if (m_nms_config.order_type != HAILO_NMS_RESULT_ORDER_BY_SCORE) {
+    auto config_info = fmt::format("Score threshold: {:.3f}, IoU threshold: {:.2f}, Classes: {}",
+                        m_nms_config.nms_score_th, m_nms_config.nms_iou_th, m_nms_config.number_of_classes);
+    if ((HailoRTCommon::is_nms_by_class(m_outputs_metadata.begin()->second.format.order)) ||
+        (HAILO_FORMAT_ORDER_NHWC == m_outputs_metadata.begin()->second.format.order)){
         config_info += fmt::format(", Max bboxes per class: {}", m_nms_config.max_proposals_per_class);
-    } else {
+    }
+    if (HailoRTCommon::is_nms_by_score(m_outputs_metadata.begin()->second.format.order)){
         config_info += fmt::format(", Max bboxes total: {}", m_nms_config.max_proposals_total);
     }
     if (m_nms_config.background_removal) {
@@ -197,11 +194,13 @@ void NmsPostProcessOp::fill_nms_by_class_format_buffer(MemoryView &buffer, const
 }
 
 void NmsPostProcessOp::fill_nms_by_score_format_buffer(MemoryView &buffer, std::vector<DetectionBbox> &detections,
-    const NmsPostProcessConfig &nms_config)
+    const NmsPostProcessConfig &nms_config, const bool should_sort)
 {
-    std::sort(detections.begin(), detections.end(),
+    if (should_sort) {
+        std::sort(detections.begin(), detections.end(),
             [](DetectionBbox a, DetectionBbox b)
             { return a.m_bbox.score > b.m_bbox.score; });
+    }
 
     uint16_t total_detections_count = 0;
     for (auto detection_bbox : detections) {
@@ -211,7 +210,7 @@ void NmsPostProcessOp::fill_nms_by_score_format_buffer(MemoryView &buffer, std::
         }
 
         if (total_detections_count > nms_config.max_proposals_total) {
-            LOGGER__INFO("{} Detections were ignored, due to `max_bboxes_total` defined as {}.",
+            LOGGER__INFO("{} detections were ignored, due to `max_bboxes_total` defined as {}.",
                 detections.size() - nms_config.max_proposals_total, nms_config.max_proposals_total);
             break;
         }
@@ -228,7 +227,6 @@ void NmsPostProcessOp::fill_nms_by_score_format_buffer(MemoryView &buffer, std::
         detection.y_max = detection_bbox.m_bbox.y_max;
 
         *(hailo_detection_t*)(buffer.data() + buffer_offset) = detection;
-
         total_detections_count++;
     }
     *(uint16_t*)(buffer.data()) = total_detections_count;
@@ -237,15 +235,14 @@ void NmsPostProcessOp::fill_nms_by_score_format_buffer(MemoryView &buffer, std::
 hailo_status NmsPostProcessOp::hailo_nms_format(MemoryView dst_view)
 {
     remove_overlapping_boxes(m_detections, m_classes_detections_count, m_nms_metadata->nms_config().nms_iou_th);
-    switch (m_nms_metadata->nms_config().order_type) {
-    case HAILO_NMS_RESULT_ORDER_BY_CLASS:
+    if ((HailoRTCommon::is_nms_by_class(m_nms_metadata->outputs_metadata().begin()->second.format.order)) ||
+        (HAILO_FORMAT_ORDER_NHWC == m_nms_metadata->outputs_metadata().begin()->second.format.order)) {
         fill_nms_by_class_format_buffer(dst_view, m_detections, m_classes_detections_count, m_nms_metadata->nms_config());
-        break;
-    case HAILO_NMS_RESULT_ORDER_BY_SCORE:
+    } else if (HailoRTCommon::is_nms_by_score(m_nms_metadata->outputs_metadata().begin()->second.format.order)) {
         fill_nms_by_score_format_buffer(dst_view, m_detections, m_nms_metadata->nms_config());
-        break;
-    default:
-        LOGGER__ERROR("NMS result order type not supported: {}", HailoRTCommon::get_nms_result_order_type_str(m_nms_metadata->nms_config().order_type));
+    } else {
+        LOGGER__ERROR("Unsupported output format order for NmsPostProcessOp: {}",
+            HailoRTCommon::get_format_order_str(m_nms_metadata->outputs_metadata().begin()->second.format.order));
         return HAILO_INVALID_ARGUMENT;
     }
     return HAILO_SUCCESS;
@@ -283,19 +280,8 @@ Expected<hailo_vstream_info_t> NmsOpMetadata::get_output_vstream_info()
     vstream_info.format.order = m_outputs_metadata.begin()->second.format.order;
     vstream_info.format.type = m_outputs_metadata.begin()->second.format.type;
     vstream_info.format.flags = HAILO_FORMAT_FLAGS_NONE;
-
-    if (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == vstream_info.format.order) {
-        nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
-    } else {
-        nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
-    }
-
-    vstream_info.nms_shape.order_type = nms_config().order_type;
-    if (HAILO_NMS_RESULT_ORDER_BY_SCORE == nms_config().order_type) {
-        vstream_info.nms_shape.max_bboxes_total = nms_config().max_proposals_total;
-    } else {
-        vstream_info.nms_shape.max_bboxes_per_class = nms_config().max_proposals_per_class;
-    }
+    vstream_info.nms_shape.max_bboxes_total = nms_config().max_proposals_total;
+    vstream_info.nms_shape.max_bboxes_per_class = nms_config().max_proposals_per_class;
     vstream_info.nms_shape.number_of_classes = nms_config().number_of_classes;
     if (nms_config().background_removal) {
         vstream_info.nms_shape.number_of_classes--;
@@ -309,17 +295,16 @@ Expected<hailo_vstream_info_t> NmsOpMetadata::get_output_vstream_info()
 
 hailo_nms_info_t NmsOpMetadata::nms_info()
 {
-    uint32_t max_proposals = (HAILO_NMS_RESULT_ORDER_BY_SCORE == m_nms_config.order_type) ? nms_config().max_proposals_total : nms_config().max_proposals_per_class;
     hailo_nms_info_t nms_info = {
         nms_config().number_of_classes,
-        {max_proposals},
+        nms_config().max_proposals_per_class,
+        nms_config().max_proposals_total,
         sizeof(hailo_bbox_float32_t),
         1, // input_division_factor
         false,
         hailo_nms_defuse_info_t(),
         DEFAULT_NMS_NO_BURST_SIZE,
-        HAILO_BURST_TYPE_H8_BBOX,
-        m_nms_config.order_type
+        HAILO_BURST_TYPE_H8_BBOX
     };
     if (nms_config().background_removal) {
         nms_info.number_of_classes--;
diff --git a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
index 037fcac..c7f3ebe 100644
--- a/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/nms_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -162,9 +162,6 @@ public:
         std::vector<net_flow::DetectionBbox> detections;
         std::vector<uint32_t> classes_detection_count(nms_info.number_of_classes, 0);
 
-        // TODO: HRT-15612 support BY_SCORE order_type in this function
-        assert(HAILO_NMS_RESULT_ORDER_BY_SCORE != nms_info.order_type);
-
         detections.reserve(nms_info.max_bboxes_per_class * nms_info.number_of_classes);
 
         const uint32_t bbox_size = sizeof(hailo_bbox_float32_t);
@@ -211,7 +208,7 @@ public:
     *          \endcode
     */
     static void fill_nms_by_score_format_buffer(MemoryView &buffer, std::vector<DetectionBbox> &detections,
-        const NmsPostProcessConfig &nms_config);
+        const NmsPostProcessConfig &nms_config, const bool should_sort = false);
 
 protected:
     NmsPostProcessOp(std::shared_ptr<NmsOpMetadata> metadata)
@@ -224,16 +221,14 @@ protected:
 
     void reserve_detections()
     {
-        switch (m_nms_metadata->nms_config().order_type) {
-            case HAILO_NMS_RESULT_ORDER_BY_CLASS:
-                m_detections.reserve(m_nms_metadata->nms_config().max_proposals_per_class * m_nms_metadata->nms_config().number_of_classes);
-                break;
-            case HAILO_NMS_RESULT_ORDER_BY_SCORE:
-                m_detections.reserve(m_nms_metadata->nms_config().max_proposals_total);
-                break;
-            default:
-                LOGGER__ERROR("Unsupported NMS format order type for NmsPostProcessOp: {}",
-                    HailoRTCommon::get_nms_result_order_type_str(m_nms_metadata->nms_config().order_type));
+        if ((HailoRTCommon::is_nms_by_class(m_nms_metadata->outputs_metadata().begin()->second.format.order)) ||
+            (HAILO_FORMAT_ORDER_NHWC == m_nms_metadata->outputs_metadata().begin()->second.format.order)) {
+            m_detections.reserve(m_nms_metadata->nms_config().max_proposals_per_class * m_nms_metadata->nms_config().number_of_classes);
+        } else if (HailoRTCommon::is_nms_by_score(m_nms_metadata->outputs_metadata().begin()->second.format.order)) {
+            m_detections.reserve(m_nms_metadata->nms_config().max_proposals_total);
+        } else {
+            LOGGER__WARNING("Unsupported output format order for NmsPostProcessOp: {}",
+                HailoRTCommon::get_format_order_str(m_nms_metadata->outputs_metadata().begin()->second.format.order));
         }
     }
 
diff --git a/hailort/libhailort/src/net_flow/ops/op.hpp b/hailort/libhailort/src/net_flow/ops/op.hpp
index e9fd3d8..f3328d9 100644
--- a/hailort/libhailort/src/net_flow/ops/op.hpp
+++ b/hailort/libhailort/src/net_flow/ops/op.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
index 074692f..7e7ff4a 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
index 6e287a5..9d3843f 100644
--- a/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/softmax_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
index db89abf..8c94488 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
index efdbbd5..15dad21 100644
--- a/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/ssd_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,43 +44,31 @@ private:
     void extract_bbox_classes(const hailo_bbox_float32_t &dims_bbox, SrcType *cls_data, const BufferMetaData &cls_metadata, uint32_t cls_index)
     {
         const auto &nms_config = m_metadata->nms_config();
-        if (nms_config.cross_classes) {
-            // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
-            auto max_id_score_pair = get_max_class<DstType, SrcType>(cls_data, cls_index, 0, 1,
-                cls_metadata.quant_info, cls_metadata.padded_shape.width);
-            auto bbox = dims_bbox;
-            bbox.score = max_id_score_pair.second;
-            if (max_id_score_pair.second >= nms_config.nms_score_th) {
-                m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
-                m_classes_detections_count[max_id_score_pair.first]++;
-            }
-        } else {
-            for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
-                auto class_id = class_index;
-                if (nms_config.background_removal) {
-                    if (nms_config.background_removal_index == class_index) {
-                        // Ignore if class_index is background_removal_index
-                        continue;
-                    }
-                    else if (0 == nms_config.background_removal_index) {
-                        // background_removal_index will always be the first or last index.
-                        // If it is the first one we need to reduce all classes id's in 1.
-                        // If it is the last one we just ignore it in the previous if case.
-                        class_id--;
-                    }
-                }
-
-                auto class_entry_idx = cls_index + (class_index * cls_metadata.padded_shape.width);
-                auto class_score = Quantization::dequantize_output<DstType, SrcType>(cls_data[class_entry_idx],
-                    cls_metadata.quant_info);
-                if (class_score < nms_config.nms_score_th) {
+        for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
+            auto class_id = class_index;
+            if (nms_config.background_removal) {
+                if (nms_config.background_removal_index == class_index) {
+                    // Ignore if class_index is background_removal_index
                     continue;
                 }
-                auto bbox = dims_bbox;
-                bbox.score = class_score;
-                m_detections.emplace_back(bbox, class_id);
-                m_classes_detections_count[class_id]++;
+                else if (0 == nms_config.background_removal_index) {
+                    // background_removal_index will always be the first or last index.
+                    // If it is the first one we need to reduce all classes id's in 1.
+                    // If it is the last one we just ignore it in the previous if case.
+                    class_id--;
+                }
             }
+
+            auto class_entry_idx = cls_index + (class_index * cls_metadata.padded_shape.width);
+            auto class_score = Quantization::dequantize_output<DstType, SrcType>(cls_data[class_entry_idx],
+                cls_metadata.quant_info);
+            if (class_score < nms_config.nms_score_th) {
+                continue;
+            }
+            auto bbox = dims_bbox;
+            bbox.score = class_score;
+            m_detections.emplace_back(bbox, class_id);
+            m_classes_detections_count[class_id]++;
         }
     }
 
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
index 7113ff5..42a9bc3 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp
index 41b7c39..dce98b5 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_bbox_only_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
index 30ab7af..357bb69 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
index 16933f0..ea4f49d 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -94,24 +94,14 @@ protected:
         DstType objectness, uint32_t padded_width)
     {
         const auto &nms_config = m_metadata->nms_config();
-
-        if (nms_config.cross_classes) {
-            // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
-            auto max_id_score_pair = get_max_class<DstType, SrcType>(data, entry_idx, class_start_idx, objectness, quant_info, padded_width);
-            bbox.score = max_id_score_pair.second;
-            check_threshold_and_add_detection(bbox, quant_info, max_id_score_pair.first,
+        for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
+            auto class_entry_idx = entry_idx + ((class_start_idx + class_index) * padded_width);
+            auto class_confidence = dequantize_and_sigmoid<DstType, SrcType>(
+                data[class_entry_idx], quant_info);
+            bbox.score = class_confidence * objectness;
+            check_threshold_and_add_detection(bbox, quant_info, class_index,
                 data, entry_idx, padded_width, objectness);
         }
-        else {
-            for (uint32_t class_index = 0; class_index < nms_config.number_of_classes; class_index++) {
-                auto class_entry_idx = entry_idx + ((class_start_idx + class_index) * padded_width);
-                auto class_confidence = dequantize_and_sigmoid<DstType, SrcType>(
-                    data[class_entry_idx], quant_info);
-                bbox.score = class_confidence * objectness;
-                check_threshold_and_add_detection(bbox, quant_info, class_index,
-                    data, entry_idx, padded_width, objectness);
-            }
-        }
     }
 
     /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
index 810746d..6e648bd 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -273,26 +273,26 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu
 {
     auto status = HAILO_SUCCESS;
     const auto &nms_config = m_metadata->nms_config();
-    uint32_t ignored_detections_count = 0;
     uint16_t detections_count = 0;
     // The beginning of the output buffer will contain the detections_count first, here we save space for it.
     uint32_t buffer_offset = sizeof(detections_count);
+    // Note: Assuming the m_detections is sorted by score (it's done in remove_overlapping_boxes())
     for (auto &detection : m_detections) {
         if (REMOVED_CLASS_SCORE == detection.m_bbox.score) {
-            // Detection was removed in remove_overlapping_boxes()
-            continue;
-        }
-        if (0 == m_classes_detections_count[detection.m_class_id]) {
-            // This class' detections count is higher then m_nms_config.max_proposals_per_class.
-            // This detection is ignored due to having lower score (detections vector is sorted by score).
+            // Detection overlapped with a higher score detection and removed in remove_overlapping_boxes()
             continue;
         }
 
-        // If class's detections count is higher then max_proposals_per_class we set the detection count of that class to the max
-        // and ignore the rest by reducing the m_classes_detections_count[detection.m_class_id] after copying the bbox to result buffer.
-        if (nms_config.max_proposals_per_class < m_classes_detections_count[detection.m_class_id]) {
-            ignored_detections_count += (m_classes_detections_count[detection.m_class_id] - nms_config.max_proposals_per_class);
-            m_classes_detections_count[detection.m_class_id] = nms_config.max_proposals_per_class;
+        detections_count++;
+        uint32_t max_proposals_total = nms_config.max_proposals_total;
+        // TODO: HRT-15885 remove support for max_proposals_per_class in YOLOv5Seg
+        if (HailoRTCommon::is_nms_by_class(m_metadata->outputs_metadata().begin()->second.format.order)) {
+            max_proposals_total = nms_config.max_proposals_per_class * nms_config.number_of_classes;
+        }
+        if (detections_count > max_proposals_total) {
+            LOGGER__INFO("{} detections were ignored, due to `max_bboxes_total` defined as {}.",
+                detections_count - max_proposals_total, max_proposals_total);
+            break;
         }
 
         auto copied_bytes_amount = copy_detection_to_result_buffer(buffer, detection, buffer_offset);
@@ -302,17 +302,11 @@ hailo_status Yolov5SegPostProcess::fill_nms_with_byte_mask_format(MemoryView &bu
         }
         CHECK_EXPECTED_AS_STATUS(copied_bytes_amount); // TODO (HRT-13278): Figure out how to remove CHECK_EXPECTED here
         buffer_offset += copied_bytes_amount.release();
-        detections_count++;
     }
 
     // Copy detections count to the beginning of the buffer
     *(uint16_t*)buffer.data() = detections_count;
 
-    if (0 != ignored_detections_count) {
-        LOGGER__INFO("{} Detections were ignored, due to `max_bboxes_per_class` defined as {}.",
-            ignored_detections_count, nms_config.max_proposals_per_class);
-    }
-
     return status;
 }
 
diff --git a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
index 3992962..61d1e01 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov5_seg_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
index f3aa78d..31744bf 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.hpp
index 2accc56..1b45bbe 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_bbox_only_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
index c84bd5b..279afc3 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
index 5f15861..2615967 100644
--- a/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolov8_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -137,36 +137,18 @@ private:
         for (uint32_t row = 0; row < cls_shape.height; row++) {
             for (uint32_t col = 0; col < cls_shape.width; col++) {
                 auto cls_idx = (cls_row_size * row) + col;
-
-                if (nms_config.cross_classes) {
-                    // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
-                    auto max_id_score_pair = get_max_class<DstType, SrcType>(cls_data, cls_idx, CLASSES_START_INDEX,
-                        NO_OBJECTNESS, cls_quant_info, cls_padded_shape.width);
-                    if (max_id_score_pair.second >= nms_config.nms_score_th) {
+                for (uint32_t curr_class_idx = 0; curr_class_idx < nms_config.number_of_classes; curr_class_idx++) {
+                    auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
+                    auto class_confidence = Quantization::dequantize_output<DstType, SrcType>(
+                        cls_data[class_entry_idx], cls_quant_info);
+                    if (class_confidence >= nms_config.nms_score_th) {
                         // If passes threshold - get the relevant bbox and add this detection
                         assert(contains(m_d_matrix, layers_names.reg));
                         auto &d_matrix = m_d_matrix.at(layers_names.reg);
                         auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info,
-                                                                (SrcType*)reg_data, d_matrix, max_id_score_pair.second);
-                        m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
-                        m_classes_detections_count[max_id_score_pair.first]++;
-                    }
-                }
-                else {
-                    // No optimization - it's possible that a specific bbox will hold more then 1 class
-                    for (uint32_t curr_class_idx = 0; curr_class_idx < nms_config.number_of_classes; curr_class_idx++) {
-                        auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
-                        auto class_confidence = Quantization::dequantize_output<DstType, SrcType>(
-                            cls_data[class_entry_idx], cls_quant_info);
-                        if (class_confidence >= nms_config.nms_score_th) {
-                            // If passes threshold - get the relevant bbox and add this detection
-                            assert(contains(m_d_matrix, layers_names.reg));
-                            auto &d_matrix = m_d_matrix.at(layers_names.reg);
-                            auto bbox = get_bbox<DstType, SrcType>(row, col, stride, reg_padded_shape, reg_shape, reg_quant_info,
-                                                                    (SrcType*)reg_data, d_matrix, class_confidence);
-                            m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
-                            m_classes_detections_count[curr_class_idx]++;
-                        }
+                                                                (SrcType*)reg_data, d_matrix, class_confidence);
+                        m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
+                        m_classes_detections_count[curr_class_idx]++;
                     }
                 }
             }
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
index 5459250..b959a24 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
index 7161d0b..3e15f24 100644
--- a/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
+++ b/hailort/libhailort/src/net_flow/ops/yolox_post_process.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -62,8 +62,6 @@ private:
         const uint32_t W_OFFSET = W_INDEX * reg_padded_shape.width;
         const uint32_t H_OFFSET = H_INDEX * reg_padded_shape.width;
 
-        static const uint32_t CLASSES_START_INDEX = 0;
-
         // Validate regression buffer size
         static const uint32_t reg_entry_size = 4;
         auto number_of_entries = reg_padded_shape.height * reg_padded_shape.width;
@@ -112,26 +110,15 @@ private:
                 auto th = Quantization::dequantize_output<DstType, SrcType>(reg_data[reg_idx + H_OFFSET], reg_quant_info);
                 auto bbox = decode(tx, ty, tw, th, col, row, static_cast<float32_t>(reg_shape.width), static_cast<float32_t>(reg_shape.height));
 
-                if (nms_config.cross_classes) {
-                    // Pre-NMS optimization. If NMS checks IoU over different classes, only the maximum class is relevant
-                    auto max_id_score_pair = get_max_class<DstType, SrcType>(cls_data, cls_idx, CLASSES_START_INDEX, objectness, cls_quant_info, cls_padded_shape.width);
-                    bbox.score = max_id_score_pair.second;
-                    if (max_id_score_pair.second >= nms_config.nms_score_th) {
-                        m_detections.emplace_back(DetectionBbox(bbox, max_id_score_pair.first));
-                        m_classes_detections_count[max_id_score_pair.first]++;
-                    }
-                }
-                else {
-                    for (uint32_t curr_class_idx = 0; curr_class_idx < nms_config.number_of_classes; curr_class_idx++) {
-                        auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
-                        auto class_confidence = Quantization::dequantize_output<DstType, SrcType>(
-                            cls_data[class_entry_idx], cls_quant_info);
-                        auto class_score = class_confidence * objectness;
-                        if (class_score >= nms_config.nms_score_th) {
-                            bbox.score = class_score;
-                            m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
-                            m_classes_detections_count[curr_class_idx]++;
-                        }
+                for (uint32_t curr_class_idx = 0; curr_class_idx < nms_config.number_of_classes; curr_class_idx++) {
+                    auto class_entry_idx = cls_idx + (curr_class_idx * cls_padded_shape.width);
+                    auto class_confidence = Quantization::dequantize_output<DstType, SrcType>(
+                        cls_data[class_entry_idx], cls_quant_info);
+                    auto class_score = class_confidence * objectness;
+                    if (class_score >= nms_config.nms_score_th) {
+                        bbox.score = class_score;
+                        m_detections.emplace_back(DetectionBbox(bbox, curr_class_idx));
+                        m_classes_detections_count[curr_class_idx]++;
                     }
                 }
             }
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp
index ef4d46f..6f92889 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/argmax_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp
index 8ba4a5f..d2a2026 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/nms_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -22,24 +22,17 @@ namespace net_flow
 struct NmsPostProcessConfig
 {
 
-    NmsPostProcessConfig(double nms_score_th = 0, double nms_iou_th = 0, uint32_t max_proposals = 0, uint32_t number_of_classes = 0, bool background_removal = false,
-        uint32_t background_removal_index = 0, bool cross_classes = false, bool bbox_only = false,
-        hailo_nms_result_order_type_t order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS) :
+    NmsPostProcessConfig(double nms_score_th = 0, double nms_iou_th = 0, uint32_t max_proposals_per_class = 0, uint32_t max_proposals_total = 0,
+        uint32_t number_of_classes = 0, bool background_removal = false, uint32_t background_removal_index = 0, bool bbox_only = false) :
             nms_score_th(nms_score_th),
             nms_iou_th(nms_iou_th),
+            max_proposals_per_class(max_proposals_per_class),
+            max_proposals_total(max_proposals_total),
             number_of_classes(number_of_classes),
             background_removal(background_removal),
             background_removal_index(background_removal_index),
-            cross_classes(cross_classes),
-            bbox_only(bbox_only),
-            order_type(order_type)
-    {
-        if (HAILO_NMS_RESULT_ORDER_BY_SCORE == order_type) {
-            max_proposals_total = max_proposals;
-        } else {
-            max_proposals_per_class = max_proposals;
-        }
-    }
+            bbox_only(bbox_only)
+    {}
 
     // User given confidence threshold for a bbox. A bbox will be consider as detection if the
     // (objectness * class_score) is higher then the confidence_threshold.
@@ -49,14 +42,11 @@ struct NmsPostProcessConfig
     // Non-maximum suppression (Removing overlapping boxes).
     double nms_iou_th;
 
-    union
-    {
-        // Maximum amount of bboxes per nms class.
-        uint32_t max_proposals_per_class;
+    // Maximum amount of bboxes per nms class.
+    uint32_t max_proposals_per_class;
 
-        // Maximum amount of bboxes in total.
-        uint32_t max_proposals_total;
-    };
+    // Maximum amount of bboxes in total.
+    uint32_t max_proposals_total;
 
     // The model's number of classes. (This depends on the dataset that the model trained on).
     uint32_t number_of_classes;
@@ -67,15 +57,8 @@ struct NmsPostProcessConfig
     // Index of background class for background removal
     uint32_t background_removal_index;
 
-    // Indicates whether or not NMS performs IoU over different classes for the same box.
-    // If set to false - NMS won't intersect different classes, and a box could have multiple labels.
-    bool cross_classes;
-
     // Indicates whether only the bbox decoding is being done
     bool bbox_only;
-
-    // Order of NMS results
-    hailo_nms_result_order_type_t order_type;
 };
 
 static const float32_t REMOVED_CLASS_SCORE = 0.0f;
@@ -108,23 +91,7 @@ protected:
                     const OperationType type)
         : OpMetadata(inputs_metadata, outputs_metadata, name, network_name, type),
             m_nms_config(nms_post_process_config)
-    {
-        switch (outputs_metadata.begin()->second.format.order) {
-            case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
-                m_nms_config.order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
-                m_nms_config.max_proposals_total = m_nms_config.max_proposals_per_class * m_nms_config.number_of_classes;
-                break;
-            case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
-                m_nms_config.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
-                break;
-            case HAILO_FORMAT_ORDER_NHWC:
-                // In case of bbox only
-                break;
-            default:
-                LOGGER__WARNING("Unsupported NMS format order type for NmsOpMetadata: {}",
-                    HailoRTCommon::get_format_order_str(outputs_metadata.begin()->second.format.order));
-        }
-    }
+    {}
 
     hailo_status validate_params() override;
 
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
index 80ee411..87b5817 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp
index f6441db..fa5b1a4 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/softmax_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp
index 4b97c72..b3890b1 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/ssd_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp
index 5fd3c85..d66e281 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_bbox_only_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
index b984229..3295b2b 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
index be44d24..31e9867 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov5_seg_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_bbox_only_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_bbox_only_op_metadata.hpp
index d99529a..c57c622 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_bbox_only_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_bbox_only_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp
index 3b20989..9ed2da1 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolov8_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp
index 4eaa84a..4b25e63 100644
--- a/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp
+++ b/hailort/libhailort/src/net_flow/ops_metadata/yolox_op_metadata.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
index 1e6136f..7ace6d8 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
index f4ff7c3..c5f351a 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_infer_runner.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
index f3a0871..9b7c53e 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -240,14 +240,14 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> AsyncPipelineBuilder::
 
 Expected<std::shared_ptr<FillNmsFormatElement>> AsyncPipelineBuilder::add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
     const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-    std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_index)
+    std::shared_ptr<PipelineElement> final_elem, const hailo_format_order_t &dst_format_order, const uint32_t final_elem_index)
 {
     auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
     assert(nullptr != metadata);
 
     TRY(auto fill_nms_format_element, FillNmsFormatElement::create(metadata->nms_config(),
         PipelineObject::create_element_name(element_name, output_stream_name, stream_index),
-        async_pipeline->get_build_params(), PipelineDirection::PUSH, async_pipeline));
+        async_pipeline->get_build_params(), dst_format_order, PipelineDirection::PUSH, async_pipeline));
 
     async_pipeline->add_element_to_pipeline(fill_nms_format_element);
 
@@ -327,7 +327,7 @@ hailo_status AsyncPipelineBuilder::add_output_demux_flow(const std::string &outp
                 async_pipeline, edge_info.hw_shape, edge_info.format, edge_info.shape, {edge_info.quant_info}, demux_queue_elem));
 
             auto post_transform_frame_size = (HailoRTCommon::is_nms(edge_info.format.order)) ?
-                HailoRTCommon::get_nms_host_frame_size(edge_info.nms_info, output_format.second) :
+                HailoRTCommon::get_nms_by_class_host_frame_size(edge_info.nms_info, output_format.second) :
                 HailoRTCommon::get_frame_size(edge_info.shape, output_format.second);
 
             TRY(auto last_async_element, add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
@@ -403,7 +403,7 @@ hailo_status AsyncPipelineBuilder::add_nms_fuse_flow(const std::vector<std::stri
     TRY(auto post_infer_elem, add_post_infer_element(output_format.second, fused_layer_nms_info, async_pipeline,
         first_defused_stream_info.hw_shape, first_defused_stream_info.format, first_defused_stream_info.shape, stream_quant_infos, nms_elem));
 
-    const auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, output_format.second);
+    const auto post_transform_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(fused_layer_nms_info, output_format.second);
 
     TRY(auto last_async_element, add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
         post_infer_elem));
@@ -627,7 +627,7 @@ hailo_status AsyncPipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline>
 
     auto is_empty = false;
     auto interacts_with_hw = false;
-    const auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream_info.nms_info, output_format.second);
+    const auto post_transform_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(output_stream_info.nms_info, output_format.second);
     TRY(auto pre_nms_convert_queue_element,
         add_push_queue_element(PipelineObject::create_element_name("PushQEl_pre_nms_convert", output_stream_name,
             output_stream_info.index), async_pipeline, post_transform_frame_size, is_empty, interacts_with_hw, post_infer_element));
@@ -650,7 +650,7 @@ hailo_status AsyncPipelineBuilder::add_iou_flow( std::shared_ptr<AsyncPipeline>
 
     TRY(auto fill_nms_format_element,
         add_fill_nms_format_element(async_pipeline, output_stream_name, output_stream_info.index,
-            "FillNmsFormatEl", iou_op_metadata, pre_fill_nms_format_element_queue_element));
+            "FillNmsFormatEl", iou_op_metadata, pre_fill_nms_format_element_queue_element, output_format.second.order));
 
     TRY(const auto output_vstream_info, iou_op_metadata->get_output_vstream_info());
     const auto final_frame_size = HailoRTCommon::get_frame_size(output_vstream_info, output_format.second);
@@ -840,7 +840,7 @@ hailo_status AsyncPipelineBuilder::create_post_async_hw_elements(std::shared_ptr
                         final_elem_source_index));
 
                 const auto post_transform_frame_size = (HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == first_stream_info.format.order) ?
-                    HailoRTCommon::get_nms_host_frame_size(first_stream_info.nms_info, output_format.second) :
+                    HailoRTCommon::get_nms_by_class_host_frame_size(first_stream_info.nms_info, output_format.second) :
                     HailoRTCommon::get_frame_size(first_stream_info.shape, output_format.second);
 
                 TRY(auto last_async_element, add_last_async_element(async_pipeline, output_format.first, post_transform_frame_size,
@@ -867,7 +867,7 @@ Expected<std::shared_ptr<AsyncPipeline>> AsyncPipelineBuilder::create_pipeline(s
     // Buffer pool sizes for pipeline elements should be:
     // * The minimum of the maximum queue size of all LL streams (input and output) - for edge elements
     // * HAILO_DEFAULT_ASYNC_INFER_QUEUE_SIZE - for internal elements
-    TRY(build_params.buffer_pool_size_edges, net_group->get_min_buffer_pool_size());
+    TRY(build_params.buffer_pool_size_edges, net_group->infer_queue_size());
     build_params.buffer_pool_size_internal = std::min(static_cast<uint32_t>(build_params.buffer_pool_size_edges),
         static_cast<uint32_t>(HAILO_DEFAULT_ASYNC_INFER_QUEUE_SIZE));
     build_params.elem_stats_flags = HAILO_PIPELINE_ELEM_STATS_NONE;
diff --git a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
index 06dd392..9714bcf 100644
--- a/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/async_pipeline_builder.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -89,7 +89,7 @@ public:
         std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<AsyncPipeline> async_pipeline,
         const std::string &output_stream_name, uint8_t stream_index, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        std::shared_ptr<PipelineElement> final_elem, const uint32_t final_elem_source_index = 0);
+        std::shared_ptr<PipelineElement> final_elem, const hailo_format_order_t &dst_format_order, const uint32_t final_elem_source_index = 0);
     static Expected<std::shared_ptr<PixBufferElement>> create_multi_plane_splitter_element(const std::string &input_name,
         hailo_format_order_t order, std::shared_ptr<std::atomic<hailo_status>> pipeline_status, std::shared_ptr<AsyncPipeline> async_pipeline);
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
index 5253168..1b0e369 100644
--- a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -13,16 +13,36 @@
 namespace hailort
 {
 
+AsyncInferJobHrpcClient::AsyncInferJobHrpcClient(EventPtr event) : m_event(event), m_job_status(HAILO_UNINITIALIZED)
+{
+}
+
+hailo_status AsyncInferJobHrpcClient::wait(std::chrono::milliseconds timeout)
+{
+    auto status = m_event->wait(timeout);
+    if (HAILO_UNINITIALIZED != m_job_status) {
+        return m_job_status;
+    }
+    CHECK_SUCCESS(status);
+
+    return HAILO_SUCCESS;
+}
+
+hailo_status AsyncInferJobHrpcClient::set_status(hailo_status status)
+{
+    m_job_status = status;
+    return m_event->signal();
+}
+
 Expected<std::shared_ptr<ConfiguredInferModelHrpcClient>> ConfiguredInferModelHrpcClient::create(std::shared_ptr<Client> client,
     rpc_object_handle_t handle_id, std::vector<hailo_vstream_info_t> &&input_vstream_infos,
-    std::vector<hailo_vstream_info_t> &&output_vstream_infos, uint32_t max_ongoing_transfers,
-    std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_id,
-    const std::unordered_map<std::string, size_t> inputs_frame_sizes,
-    const std::unordered_map<std::string, size_t> outputs_frame_sizes)
+    std::vector<hailo_vstream_info_t> &&output_vstream_infos, uint32_t max_ongoing_transfers, rpc_object_handle_t infer_model_id,
+    const std::unordered_map<std::string, size_t> inputs_frame_sizes, const std::unordered_map<std::string, size_t> outputs_frame_sizes)
 {
     // TODO: consider create a separate client object here - HRT-13687
+    TRY(auto callback_dispatcher, client->callback_dispatcher_manager()->new_dispatcher(RpcCallbackType::RUN_ASYNC, true));
     auto ptr = make_shared_nothrow<ConfiguredInferModelHrpcClient>(client, handle_id, std::move(input_vstream_infos),
-        std::move(output_vstream_infos), max_ongoing_transfers, callbacks_queue, infer_model_id, inputs_frame_sizes,
+        std::move(output_vstream_infos), max_ongoing_transfers, callback_dispatcher, infer_model_id, inputs_frame_sizes,
         outputs_frame_sizes);
     CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
@@ -35,43 +55,68 @@ ConfiguredInferModelHrpcClient::~ConfiguredInferModelHrpcClient()
         return;
     }
 
-    auto request = DestroyConfiguredInferModelSerializer::serialize_request(m_handle_id);
-    if (!request) {
-        LOGGER__CRITICAL("Failed to serialize ConfiguredInferModel_release request");
-        return;
-    }
-
     auto client = m_client.lock();
     if (client) {
-        auto result = client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__DESTROY, MemoryView(*request));
-        if (!result) {
-            LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", result.status());
+        auto request_buffer = client->allocate_request_buffer();
+        if (!request_buffer) {
+            LOGGER__CRITICAL("Failed to create buffer for ConfiguredInferModel_release request");
             return;
         }
 
-        auto status = DestroyConfiguredInferModelSerializer::deserialize_reply(MemoryView(*result));
+        auto request_size = DestroyConfiguredInferModelSerializer::serialize_request(m_handle_id, MemoryView(**request_buffer));
+        if (!request_size) {
+            LOGGER__CRITICAL("Failed to serialize ConfiguredInferModel_release request");
+            return;
+        }
+
+        auto expected_result = client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__DESTROY, MemoryView(request_buffer.value()->data(), *request_size));
+        if (!expected_result) {
+            LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", expected_result.status());
+            return;
+        }
+        auto result = expected_result.release();
+
+        auto status = DestroyConfiguredInferModelSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
         if (HAILO_SUCCESS != status) {
             LOGGER__CRITICAL("Failed to destroy configured infer model! status = {}", status);
         }
+
+        status = client->callback_dispatcher_manager()->remove_dispatcher(m_callback_dispatcher->id());
+        if (HAILO_SUCCESS != status) {
+            LOGGER__CRITICAL("Failed to remove callback dispatcher! status = {}", status);
+        }
     }
 }
 
-Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelHrpcClient::create_bindings()
+Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelHrpcClient::create_bindings(const std::map<std::string, MemoryView> &buffers)
 {
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> inputs;
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> outputs;
 
+    uint32_t used_buffers = 0;
+
     for (const auto &vstream_info : m_input_vstream_infos) {
         TRY(auto stream, ConfiguredInferModelBase::create_infer_stream(vstream_info));
-        inputs.emplace(vstream_info.name, std::move(stream));
+        auto name = std::string(vstream_info.name);
+        inputs.emplace(name, std::move(stream));
+        if (contains(buffers, name)) {
+            inputs.at(name).set_buffer(buffers.at(name));
+            used_buffers++;
+        }
     }
 
     for (const auto &vstream_info : m_output_vstream_infos) {
         TRY(auto stream, ConfiguredInferModelBase::create_infer_stream(vstream_info));
-        outputs.emplace(vstream_info.name, std::move(stream));
+        auto name = std::string(vstream_info.name);
+        outputs.emplace(name, std::move(stream));
+        if (contains(buffers, name)) {
+            outputs.at(name).set_buffer(buffers.at(name));
+            used_buffers++;
+        }
     }
 
     TRY(auto bindings, ConfiguredInferModelBase::create_bindings(std::move(inputs), std::move(outputs)));
+    CHECK_AS_EXPECTED(used_buffers == buffers.size(), HAILO_INVALID_ARGUMENT, "Given 'buffers' contains names which arent model edges.");
     return bindings;
 }
 
@@ -102,45 +147,70 @@ Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(const Con
 {
     CHECK_SUCCESS_AS_EXPECTED(validate_bindings(bindings));
     std::unique_lock<std::mutex> lock(m_infer_mutex);
-    m_callbacks_counter++;
-    auto callback_wrapper = [this, callback] (const AsyncInferCompletionInfo &info) {
-        {
-            std::unique_lock<std::mutex> transfers_lock(m_ongoing_transfers_mutex);
-            m_ongoing_transfers--;
-        }
-        m_cv.notify_one();
-        if (callback) {
-            callback(info);
-        }
-    };
 
+    auto callback_id = m_callbacks_counter++;
     TRY(auto input_buffer_sizes, get_input_buffer_sizes(bindings));
-    TRY(auto request, RunAsyncSerializer::serialize_request({m_handle_id, m_infer_model_handle_id,
-        m_callbacks_counter, input_buffer_sizes}));
-    auto request_ptr = make_shared_nothrow<Buffer>(std::move(request));
-    CHECK_NOT_NULL(request_ptr, HAILO_OUT_OF_HOST_MEMORY);
 
-    TRY(auto job_ptr, m_callbacks_queue->register_callback(m_callbacks_counter, bindings, callback_wrapper));
+    TRY(auto event_ptr, Event::create_shared(Event::State::not_signalled));
+    auto job_ptr = make_shared_nothrow<AsyncInferJobHrpcClient>(event_ptr);
+    CHECK_NOT_NULL(job_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    m_callback_dispatcher->add_additional_reads(callback_id,
+        [this, bindings] (const RpcCallback &rpc_callback) -> Expected<std::vector<TransferBuffer>> {
+            std::vector<TransferBuffer> buffers;
+            if (HAILO_SUCCESS == rpc_callback.data.run_async.status) {
+                buffers.reserve(m_output_vstream_infos.size());
+                for (const auto &vstream_info : m_output_vstream_infos) {
+                    TRY(auto buffer, bindings.output(vstream_info.name)->get_buffer());
+                    buffers.emplace_back(MemoryView(buffer));
+                }
+            }
+            return buffers;
+        });
+    m_callback_dispatcher->register_callback(callback_id,
+        [this, callback, event_ptr]
+        (const RpcCallback &rpc_callback, hailo_status shutdown_status) {
+            {
+                std::unique_lock<std::mutex> transfers_lock(m_ongoing_transfers_mutex);
+                m_ongoing_transfers--;
+            }
+            m_cv.notify_one();
+            if (!callback) {
+                return;
+            }
+
+            hailo_status status = (shutdown_status != HAILO_UNINITIALIZED) ? shutdown_status : rpc_callback.data.run_async.status;
+            AsyncInferCompletionInfo info(status);
+            callback(info);
+            status = event_ptr->signal();
+            if (HAILO_SUCCESS != status) {
+                LOGGER__CRITICAL("Failed to signal event, status = {}", status);
+            }
+        });
 
     auto client = m_client.lock();
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
 
-    auto status = client->wait_for_execute_request_ready(MemoryView(*request_ptr), REQUEST_TIMEOUT);
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, RunAsyncSerializer::serialize_request({m_handle_id, m_infer_model_handle_id,
+        callback_id, m_callback_dispatcher->id(), input_buffer_sizes}, MemoryView(*serialized_request)));
+
+    auto status = client->wait_for_execute_request_ready(MemoryView(serialized_request->data(), request_size), REQUEST_TIMEOUT);
     CHECK_SUCCESS(status);
 
-    auto request_sent_callback = [request_ptr] (hailo_status status) {
+    auto request_sent_callback = [serialized_request] (hailo_status status) {
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to send request, status = {}", status);
         }
     };
-    auto reply_received_callback = [job_ptr] (hailo_status status, Buffer &&reply) {
+    auto reply_received_callback = [job_ptr] (hailo_status status, rpc_message_t reply) {
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed getting reply, status = {}", status);
             return;
         }
 
-        status = RunAsyncSerializer::deserialize_reply(MemoryView(reply));
+        status = RunAsyncSerializer::deserialize_reply(MemoryView(reply.buffer->data(), reply.header.size));
         if (HAILO_SUCCESS != status) {
             LOGGER__ERROR("Failed to run async, status = {}", status);
             hailo_status job_status = status;
@@ -150,11 +220,11 @@ Expected<AsyncInferJob> ConfiguredInferModelHrpcClient::run_async_impl(const Con
             }
         }
     };
-    auto additional_writes_lambda = [this, &bindings] (RpcConnection connection) -> hailo_status {
-        return write_async_inputs(bindings, connection);
-    };
-    status = client->execute_request_async(HailoRpcActionID::CONFIGURED_INFER_MODEL__RUN_ASYNC, MemoryView(*request_ptr),
-        request_sent_callback, reply_received_callback, additional_writes_lambda);
+
+    TRY(auto additional_buffers, get_async_inputs(bindings));
+    status = client->execute_request_async(HailoRpcActionID::CONFIGURED_INFER_MODEL__RUN_ASYNC,
+        MemoryView(serialized_request->data(), request_size),
+        request_sent_callback, reply_received_callback, std::move(additional_buffers));
     CHECK_SUCCESS(status);
 
     {
@@ -199,9 +269,10 @@ Expected<std::vector<uint32_t>> ConfiguredInferModelHrpcClient::get_input_buffer
     return buffer_sizes;
 }
 
-hailo_status ConfiguredInferModelHrpcClient::write_async_inputs(const ConfiguredInferModel::Bindings &bindings,
-    RpcConnection connection)
+Expected<std::vector<TransferBuffer>> ConfiguredInferModelHrpcClient::get_async_inputs(
+    const ConfiguredInferModel::Bindings &bindings)
 {
+    std::vector<TransferBuffer> inputs;
     for (const auto &input_vstream : m_input_vstream_infos) {
         TRY(auto input, bindings.input(input_vstream.name));
         auto buffer_type = ConfiguredInferModelBase::get_infer_stream_buffer_type(input);
@@ -209,12 +280,7 @@ hailo_status ConfiguredInferModelHrpcClient::write_async_inputs(const Configured
         case BufferType::VIEW:
         {
             TRY(auto buffer, input.get_buffer());
-            auto status = connection.write_buffer_async(MemoryView(buffer), [] (hailo_status status) {
-                if (HAILO_SUCCESS != status) {
-                    LOGGER__ERROR("Failed to write buffer, status = {}", status);
-                }
-            });
-            CHECK_SUCCESS(status);
+            inputs.push_back(MemoryView(buffer));
             break;
         }
         case BufferType::PIX_BUFFER:
@@ -223,72 +289,74 @@ hailo_status ConfiguredInferModelHrpcClient::write_async_inputs(const Configured
             CHECK(HAILO_PIX_BUFFER_MEMORY_TYPE_USERPTR == pix_buffer.memory_type, HAILO_NOT_SUPPORTED,
                 "Currently, only userptr pix buffers are supported in HRPC!"); // TODO: HRT-14391
             for (uint32_t i = 0; i < pix_buffer.number_of_planes; i++) {
-                auto status = connection.write_buffer_async(MemoryView(pix_buffer.planes[i].user_ptr, pix_buffer.planes[i].bytes_used),
-                    [] (hailo_status status) {
-                        if (HAILO_SUCCESS != status) {
-                            LOGGER__ERROR("Failed to write buffer, status = {}", status);
-                        }
-                    });
-                CHECK_SUCCESS(status);
+                inputs.push_back(MemoryView(pix_buffer.planes[i].user_ptr, pix_buffer.planes[i].bytes_used));
             }
             break;
         }
         case BufferType::DMA_BUFFER:
             LOGGER__CRITICAL("DMA_BUFFER is not supported in HRPC");
-            return HAILO_NOT_IMPLEMENTED;
+            return make_unexpected(HAILO_NOT_IMPLEMENTED);
         default:
             LOGGER__CRITICAL("Unknown buffer type");
-            return HAILO_INTERNAL_FAILURE;
+            return make_unexpected(HAILO_INTERNAL_FAILURE);
         }
     }
-    return HAILO_SUCCESS;
+    return inputs;
 }
 
 hailo_status ConfiguredInferModelHrpcClient::set_scheduler_timeout(const std::chrono::milliseconds &timeout)
 {
-    TRY(auto serialized_request, SetSchedulerTimeoutSerializer::serialize_request(m_handle_id, timeout));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, SetSchedulerTimeoutSerializer::serialize_request(m_handle_id, timeout, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_TIMEOUT, MemoryView(serialized_request)));
-    CHECK_SUCCESS(SetSchedulerTimeoutSerializer::deserialize_reply(MemoryView(result)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_TIMEOUT, MemoryView(serialized_request->data(), request_size)));
+    CHECK_SUCCESS(SetSchedulerTimeoutSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 }
 
 hailo_status ConfiguredInferModelHrpcClient::set_scheduler_threshold(uint32_t threshold)
 {
-    TRY(auto serialized_request, SetSchedulerThresholdSerializer::serialize_request(m_handle_id, threshold));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, SetSchedulerThresholdSerializer::serialize_request(m_handle_id, threshold, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_THRESHOLD, MemoryView(serialized_request)));
-    CHECK_SUCCESS(SetSchedulerThresholdSerializer::deserialize_reply(MemoryView(result)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_THRESHOLD, MemoryView(serialized_request->data(), request_size)));
+    CHECK_SUCCESS(SetSchedulerThresholdSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 }
 
 hailo_status ConfiguredInferModelHrpcClient::set_scheduler_priority(uint8_t priority)
 {
-    TRY(auto serialized_request, SetSchedulerPrioritySerializer::serialize_request(m_handle_id, priority));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, SetSchedulerPrioritySerializer::serialize_request(m_handle_id, priority, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_PRIORITY, MemoryView(serialized_request)));
-    CHECK_SUCCESS(SetSchedulerPrioritySerializer::deserialize_reply(MemoryView(result)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SET_SCHEDULER_PRIORITY, MemoryView(serialized_request->data(), request_size)));
+    CHECK_SUCCESS(SetSchedulerPrioritySerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 }
 
 Expected<LatencyMeasurementResult> ConfiguredInferModelHrpcClient::get_hw_latency_measurement()
 {
-    TRY(auto serialized_request, GetHwLatencyMeasurementSerializer::serialize_request(m_handle_id));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, GetHwLatencyMeasurementSerializer::serialize_request(m_handle_id, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__GET_HW_LATENCY_MEASUREMENT, MemoryView(serialized_request)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__GET_HW_LATENCY_MEASUREMENT, MemoryView(serialized_request->data(), request_size)));
 
-    TRY(auto tuple, GetHwLatencyMeasurementSerializer::deserialize_reply(MemoryView(result)));
+    TRY(auto tuple, GetHwLatencyMeasurementSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     auto status = std::get<0>(tuple);
     if (HAILO_NOT_AVAILABLE == status) {
@@ -304,31 +372,35 @@ Expected<LatencyMeasurementResult> ConfiguredInferModelHrpcClient::get_hw_latenc
 
 hailo_status ConfiguredInferModelHrpcClient::activate()
 {
-    TRY(auto serialized_request, ActivateSerializer::serialize_request(m_handle_id));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, ActivateSerializer::serialize_request(m_handle_id, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__ACTIVATE, MemoryView(serialized_request)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__ACTIVATE, MemoryView(serialized_request->data(), request_size)));
 
-    CHECK_SUCCESS(ActivateSerializer::deserialize_reply(MemoryView(result)));
+    CHECK_SUCCESS(ActivateSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 };
 
 hailo_status ConfiguredInferModelHrpcClient::deactivate()
 {
-    TRY(auto serialized_request, DeactivateSerializer::serialize_request(m_handle_id));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, DeactivateSerializer::serialize_request(m_handle_id, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__DEACTIVATE, MemoryView(serialized_request)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__DEACTIVATE, MemoryView(serialized_request->data(), request_size)));
 
-    CHECK_SUCCESS(DeactivateSerializer::deserialize_reply(MemoryView(result)));
+    CHECK_SUCCESS(DeactivateSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 };
 
-Expected<size_t> ConfiguredInferModelHrpcClient::get_async_queue_size()
+Expected<size_t> ConfiguredInferModelHrpcClient::get_async_queue_size() const
 {
     size_t queue_size = m_max_ongoing_transfers;
     return queue_size;
@@ -409,13 +481,15 @@ hailo_status ConfiguredInferModelHrpcClient::validate_bindings(const ConfiguredI
 
 hailo_status ConfiguredInferModelHrpcClient::shutdown_impl()
 {
-    TRY(auto serialized_request, ShutdownSerializer::serialize_request(m_handle_id));
     auto client = m_client.lock();
+
+    TRY(auto serialized_request, client->allocate_request_buffer());
+    TRY(auto request_size, ShutdownSerializer::serialize_request(m_handle_id, MemoryView(*serialized_request)));
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the ConfiguredInferModel is in use.");
-    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SHUTDOWN, MemoryView(serialized_request)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::CONFIGURED_INFER_MODEL__SHUTDOWN, MemoryView(serialized_request->data(), request_size)));
 
-    CHECK_SUCCESS(ShutdownSerializer::deserialize_reply(MemoryView(result)));
+    CHECK_SUCCESS(ShutdownSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     return HAILO_SUCCESS;
 }
@@ -425,10 +499,16 @@ hailo_status ConfiguredInferModelHrpcClient::shutdown()
     auto status = shutdown_impl();
 
     if (status != HAILO_SUCCESS) {
-        CHECK_SUCCESS(m_callbacks_queue->shutdown(status));
+        CHECK_SUCCESS(m_callback_dispatcher->shutdown(status));
     }
 
     return status;
 }
 
+hailo_status ConfiguredInferModelHrpcClient::update_cache_offset(int32_t /*offset_delta_entries*/)
+{
+    LOGGER__ERROR("update_cache_offset is not supported for HrpcClient");
+    return HAILO_NOT_IMPLEMENTED;
+}
+
 } // namespace hailort
diff --git a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
index 290d7ef..dc20e99 100644
--- a/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/configured_infer_model_hrpc_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -18,24 +18,36 @@
 namespace hailort
 {
 
+class AsyncInferJobHrpcClient : public AsyncInferJobBase
+{
+public:
+    AsyncInferJobHrpcClient(EventPtr event);
+
+    virtual hailo_status wait(std::chrono::milliseconds timeout) override;
+    hailo_status set_status(hailo_status status);
+
+private:
+    EventPtr m_event;
+    std::atomic<hailo_status> m_job_status;
+};
+
 class ConfiguredInferModelHrpcClient : public ConfiguredInferModelBase
 {
 public:
     static Expected<std::shared_ptr<ConfiguredInferModelHrpcClient>> create(std::shared_ptr<Client> client,
         rpc_object_handle_t handle_id, std::vector<hailo_vstream_info_t> &&input_vstream_infos,
         std::vector<hailo_vstream_info_t> &&output_vstream_infos, uint32_t max_ongoing_transfers,
-        std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_handle_id,
-        const std::unordered_map<std::string, size_t> inputs_frame_sizes,
+        rpc_object_handle_t infer_model_handle_id, const std::unordered_map<std::string, size_t> inputs_frame_sizes,
         const std::unordered_map<std::string, size_t> outputs_frame_sizes);
     ConfiguredInferModelHrpcClient(std::shared_ptr<Client> client, rpc_object_handle_t handle_id,
         std::vector<hailo_vstream_info_t> &&input_vstream_infos, std::vector<hailo_vstream_info_t> &&output_vstream_infos,
-        uint32_t max_ongoing_transfers, std::shared_ptr<CallbacksQueue> callbacks_queue, rpc_object_handle_t infer_model_handle_id,
+        uint32_t max_ongoing_transfers, std::shared_ptr<ClientCallbackDispatcher> callback_dispatcher, rpc_object_handle_t infer_model_handle_id,
         const std::unordered_map<std::string, size_t> inputs_frame_sizes,
         const std::unordered_map<std::string, size_t> outputs_frame_sizes) :
             ConfiguredInferModelBase(inputs_frame_sizes, outputs_frame_sizes),
             m_client(client), m_handle_id(handle_id), m_input_vstream_infos(std::move(input_vstream_infos)),
             m_output_vstream_infos(std::move(output_vstream_infos)), m_max_ongoing_transfers(max_ongoing_transfers),
-            m_ongoing_transfers(0), m_callbacks_queue(std::move(callbacks_queue)), m_infer_model_handle_id(infer_model_handle_id),
+            m_ongoing_transfers(0), m_callback_dispatcher(std::move(callback_dispatcher)), m_infer_model_handle_id(infer_model_handle_id),
             m_callbacks_counter(0) {}
     virtual ~ConfiguredInferModelHrpcClient();
 
@@ -44,7 +56,7 @@ public:
     ConfiguredInferModelHrpcClient(ConfiguredInferModelHrpcClient &&) = delete;
     ConfiguredInferModelHrpcClient &operator=(ConfiguredInferModelHrpcClient &&) = delete;
 
-    virtual Expected<ConfiguredInferModel::Bindings> create_bindings() override;
+    virtual Expected<ConfiguredInferModel::Bindings> create_bindings(const std::map<std::string, MemoryView> &buffers) override;
     virtual hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count) override;
 
     virtual hailo_status activate() override;
@@ -59,17 +71,17 @@ public:
     virtual hailo_status set_scheduler_threshold(uint32_t threshold) override;
     virtual hailo_status set_scheduler_priority(uint8_t priority) override;
 
-    virtual Expected<size_t> get_async_queue_size() override;
+    virtual Expected<size_t> get_async_queue_size() const override;
 
     virtual hailo_status shutdown() override;
+    virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
 
 private:
     virtual hailo_status validate_bindings(const ConfiguredInferModel::Bindings &bindings) override;
     Expected<AsyncInferJob> run_async_impl(const ConfiguredInferModel::Bindings &bindings,
         std::function<void(const AsyncInferCompletionInfo &)> callback);
     Expected<std::vector<uint32_t>> get_input_buffer_sizes(const ConfiguredInferModel::Bindings &bindings);
-    hailo_status write_async_inputs(const ConfiguredInferModel::Bindings &bindings,
-        RpcConnection connection);
+    Expected<std::vector<TransferBuffer>> get_async_inputs(const ConfiguredInferModel::Bindings &bindings);
     hailo_status shutdown_impl();
 
     std::weak_ptr<Client> m_client;
@@ -80,7 +92,7 @@ private:
     std::mutex m_ongoing_transfers_mutex;
     std::condition_variable m_cv;
     std::atomic_uint32_t m_ongoing_transfers;
-    std::shared_ptr<CallbacksQueue> m_callbacks_queue;
+    std::shared_ptr<ClientCallbackDispatcher> m_callback_dispatcher;
     rpc_object_handle_t m_infer_model_handle_id;
     std::atomic_uint32_t m_callbacks_counter;
     std::mutex m_infer_mutex;
diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp
index 632c393..2de7eb5 100644
--- a/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp
index a836578..95dfe19 100644
--- a/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/edge_elements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp
index affa02a..7c3e7ba 100644
--- a/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -256,10 +256,6 @@ Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&
 
     m_duration_collector.start_measurement();
 
-    // TODO: HRT-15612 support BY_SCORE order_type in this function
-    CHECK_AS_EXPECTED(HAILO_NMS_RESULT_ORDER_BY_SCORE != m_nms_info.order_type, HAILO_INVALID_ARGUMENT,
-        "HAILO_NMS_RESULT_ORDER_BY_SCORE is not supported NMS result order type for ConvertNmsToDetectionsElement");
-
     auto detections_pair = net_flow::NmsPostProcessOp::transform__d2h_NMS_DETECTIONS(input.data(), m_nms_info);
     auto detections_pipeline_data = make_shared_nothrow<IouPipelineData>
         (std::move(detections_pair.first),std::move(detections_pair.second));
@@ -272,12 +268,13 @@ Expected<PipelineBuffer> ConvertNmsToDetectionsElement::action(PipelineBuffer &&
 
 Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config,
     const std::string &name, hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
+    std::chrono::milliseconds timeout, const hailo_format_order_t format_order, PipelineDirection pipeline_direction,
+    std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     TRY(auto duration_collector, DurationCollector::create(elem_flags));
 
     auto fill_nms_format_element = make_shared_nothrow<FillNmsFormatElement>(std::move(nms_config),
-        name, std::move(duration_collector), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline);
+        name, std::move(duration_collector), std::move(pipeline_status), timeout, pipeline_direction, async_pipeline, format_order);
     CHECK_AS_EXPECTED(nullptr != fill_nms_format_element, HAILO_OUT_OF_HOST_MEMORY);
 
     LOGGER__INFO("Created {}", fill_nms_format_element->description());
@@ -286,18 +283,20 @@ Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(con
 }
 
 Expected<std::shared_ptr<FillNmsFormatElement>> FillNmsFormatElement::create(const net_flow::NmsPostProcessConfig nms_config,
-    const std::string &name, const ElementBuildParams &build_params, PipelineDirection pipeline_direction,
-    std::shared_ptr<AsyncPipeline> async_pipeline)
+    const std::string &name, const ElementBuildParams &build_params, const hailo_format_order_t format_order,
+    PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline)
 {
     return FillNmsFormatElement::create(nms_config, name, build_params.elem_stats_flags,
-        build_params.pipeline_status, build_params.timeout, pipeline_direction, async_pipeline);
+        build_params.pipeline_status, build_params.timeout, format_order, pipeline_direction, async_pipeline);
 }
 
 FillNmsFormatElement::FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name,
     DurationCollector &&duration_collector, std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status,
-    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline) :
+    std::chrono::milliseconds timeout, PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline,
+    const hailo_format_order_t format_order) :
     FilterElement(name, std::move(duration_collector), std::move(pipeline_status), pipeline_direction, timeout, async_pipeline),
-    m_nms_config(std::move(nms_config))
+    m_nms_config(std::move(nms_config)),
+    m_format_order(format_order)
 {}
 
 hailo_status FillNmsFormatElement::run_push(PipelineBuffer &&buffer, const PipelinePad &sink)
@@ -340,16 +339,14 @@ Expected<PipelineBuffer> FillNmsFormatElement::action(PipelineBuffer &&input, Pi
     auto detections = input.get_metadata().get_additional_data<IouPipelineData>();
     TRY(auto dst, buffer.as_view(BufferProtection::WRITE));
 
-    switch (m_nms_config.order_type) {
-    case HAILO_NMS_RESULT_ORDER_BY_CLASS:
+    if (HailoRTCommon::is_nms_by_class(m_format_order)) {
         net_flow::NmsPostProcessOp::fill_nms_by_class_format_buffer(dst, detections->m_detections, detections->m_detections_classes_count,
             m_nms_config);
-        break;
-    case HAILO_NMS_RESULT_ORDER_BY_SCORE:
-        net_flow::NmsPostProcessOp::fill_nms_by_score_format_buffer(dst, detections->m_detections, m_nms_config);
-        break;
-    default:
-        LOGGER__ERROR("NMS result order type not supported: {}", HailoRTCommon::get_nms_result_order_type_str(m_nms_config.order_type));
+    } else if (HailoRTCommon::is_nms_by_score(m_format_order)) {
+        net_flow::NmsPostProcessOp::fill_nms_by_score_format_buffer(dst, detections->m_detections, m_nms_config, true);
+    } else {
+        LOGGER__ERROR("Unsupported output format order for NmsPostProcessOp: {}",
+                HailoRTCommon::get_format_order_str(m_format_order));
         return make_unexpected(HAILO_INVALID_ARGUMENT);
     }
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp
index e496a63..1dbe142 100644
--- a/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/filter_elements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -167,14 +167,14 @@ class FillNmsFormatElement : public FilterElement
 public:
     static Expected<std::shared_ptr<FillNmsFormatElement>> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
         hailo_pipeline_elem_stats_flags_t elem_flags, std::shared_ptr<std::atomic<hailo_status>> pipeline_status,
-        std::chrono::milliseconds timeout, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        std::chrono::milliseconds timeout, const hailo_format_order_t format_order, PipelineDirection pipeline_direction = PipelineDirection::PULL,
         std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     static Expected<std::shared_ptr<FillNmsFormatElement>> create(const net_flow::NmsPostProcessConfig nms_config, const std::string &name,
-        const ElementBuildParams &build_params, PipelineDirection pipeline_direction = PipelineDirection::PULL,
+        const ElementBuildParams &build_params, const hailo_format_order_t format_order, PipelineDirection pipeline_direction = PipelineDirection::PULL,
         std::shared_ptr<AsyncPipeline> async_pipeline = nullptr);
     FillNmsFormatElement(const net_flow::NmsPostProcessConfig &&nms_config, const std::string &name, DurationCollector &&duration_collector,
         std::shared_ptr<std::atomic<hailo_status>> &&pipeline_status, std::chrono::milliseconds timeout,
-        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline);
+        PipelineDirection pipeline_direction, std::shared_ptr<AsyncPipeline> async_pipeline, const hailo_format_order_t format_order);
     virtual ~FillNmsFormatElement() = default;
     virtual hailo_status run_push(PipelineBuffer &&buffer, const PipelinePad &sink) override;
     virtual PipelinePad &next_pad() override;
@@ -188,7 +188,6 @@ public:
     virtual hailo_status set_nms_max_proposals_total(uint32_t max_proposals_total) override
     {
         m_nms_config.max_proposals_total = max_proposals_total;
-        m_nms_config.order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
         return HAILO_SUCCESS;
     }
 
@@ -197,6 +196,7 @@ protected:
 
 private:
     net_flow::NmsPostProcessConfig m_nms_config;
+    hailo_format_order_t m_format_order;
 };
 
 class ArgmaxPostProcessElement : public FilterElement
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
index 65e2e10..2e16626 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -63,19 +63,6 @@ void InferModelBase::InferStream::Impl::set_format_type(hailo_format_type_t type
 void InferModelBase::InferStream::Impl::set_format_order(hailo_format_order_t order)
 {
     m_user_buffer_format.order = order;
-    switch (order)
-    {
-        case HAILO_FORMAT_ORDER_HAILO_NMS:
-        case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
-        case HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK:
-            m_vstream_info.nms_shape.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
-            break;
-        case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
-            m_vstream_info.nms_shape.order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
-            break;
-        default:
-            break;
-    }
 }
 
 bool InferModelBase::InferStream::Impl::is_nms() const
@@ -97,14 +84,12 @@ void InferModelBase::InferStream::Impl::set_nms_max_proposals_per_class(uint32_t
 {
     m_nms_max_proposals_per_class = max_proposals_per_class;
     m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
-    m_vstream_info.nms_shape.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
 }
 
 void InferModelBase::InferStream::Impl::set_nms_max_proposals_total(uint32_t max_proposals_total)
 {
     m_nms_max_proposals_total = max_proposals_total;
     m_vstream_info.nms_shape.max_bboxes_total = max_proposals_total;
-    m_vstream_info.nms_shape.order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
 }
 
 void InferModelBase::InferStream::Impl::set_nms_max_accumulated_mask_size(uint32_t max_accumulated_mask_size)
@@ -237,9 +222,8 @@ uint32_t InferModelBase::InferStream::nms_max_accumulated_mask_size() const
     return m_pimpl->nms_max_accumulated_mask_size();
 }
 
-Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const std::string &hef_path, const std::string &network_name)
+Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, Hef hef, const std::string &network_name)
 {
-    TRY(auto hef, Hef::create(hef_path));
     TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
     TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
 
@@ -258,18 +242,6 @@ Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevic
     return ptr;
 }
 
-Expected<std::shared_ptr<InferModelBase>> InferModelBase::create(VDevice &vdevice, const MemoryView hef_buffer, const std::string &network_name)
-{
-    TRY(auto hef, Hef::create(hef_buffer));
-    TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
-    TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
-
-    auto ptr = make_shared_nothrow<InferModelBase>(vdevice, std::move(hef), network_name, std::move(inputs), std::move(outputs));
-    CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return ptr;
-}
-
 InferModelBase::InferModelBase(VDevice &vdevice, Hef &&hef, const std::string &network_name,
         std::vector<InferModelBase::InferStream> &&inputs, std::vector<InferModelBase::InferStream> &&outputs)
     : m_vdevice(vdevice), m_hef(std::move(hef)), m_network_name(network_name), m_inputs_vector(std::move(inputs)),
@@ -359,7 +331,7 @@ Expected<ConfiguredInferModel> InferModelBase::configure()
     }
 
     // internal_queue_size should be derived from batch_size, keeping this validation to make sure the logic doesnt change
-    TRY(auto internal_queue_size, network_groups.value()[0]->get_min_buffer_pool_size());
+    TRY(auto internal_queue_size, network_groups.value()[0]->infer_queue_size());
     CHECK_AS_EXPECTED(internal_queue_size >= m_config_params.batch_size, HAILO_INVALID_OPERATION,
         "Trying to configure a model with a batch={} bigger than internal_queue_size={}, which is not supported. Try using a smaller batch.",
             m_config_params.batch_size, internal_queue_size);
@@ -398,11 +370,8 @@ Expected<ConfiguredInferModel> InferModelBase::configure()
     for (const auto &output_pair : m_outputs) {
         auto &edge_name = output_pair.first;
 
-        if ((HailoRTCommon::is_nms(output_pair.second.m_pimpl->format().order)) &&
-            (HAILO_NMS_RESULT_ORDER_HW != output_pair.second.m_pimpl->get_nms_shape()->order_type)) {
-            auto status = network_groups.value()[0]->set_nms_result_order_type(edge_name, output_pair.second.m_pimpl->get_nms_shape()->order_type);
-            CHECK_SUCCESS_AS_EXPECTED(status);
-        }
+        auto stream_names = network_groups.value()[0]->get_stream_names_from_vstream_name(edge_name);
+        CHECK_EXPECTED(stream_names);
 
         if ((output_pair.second.m_pimpl->m_nms_score_threshold == INVALID_NMS_CONFIG) &&
             (output_pair.second.m_pimpl->m_nms_iou_threshold == INVALID_NMS_CONFIG) &&
@@ -420,10 +389,22 @@ Expected<ConfiguredInferModel> InferModelBase::configure()
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
         if (output_pair.second.m_pimpl->m_nms_max_proposals_per_class != static_cast<uint32_t>(INVALID_NMS_CONFIG)) {
+            // TODO: HRT-15885 remove support for max_proposals_per_class in BYTE_MASK_NMS (warning below should be error)
+            if (HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK == output_pair.second.m_pimpl->format().order) {
+                LOGGER__WARNING("Setting NMS max proposals per class is deprecated for format order HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK. "
+                    "Please set max proposals total instead.");
+            }
+
+            CHECK_AS_EXPECTED((HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE != output_pair.second.m_pimpl->format().order),
+                HAILO_INVALID_ARGUMENT, "NMS Format order is HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE while setting max proposals per class");
+
             auto status = network_groups.value()[0]->set_nms_max_bboxes_per_class(edge_name, output_pair.second.m_pimpl->m_nms_max_proposals_per_class);
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
         if (output_pair.second.m_pimpl->m_nms_max_proposals_total != static_cast<uint32_t>(INVALID_NMS_CONFIG)) {
+            CHECK_AS_EXPECTED((!HailoRTCommon::is_nms_by_class(output_pair.second.m_pimpl->format().order)),
+                HAILO_INVALID_ARGUMENT, "NMS Format order is not HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE or HAILO_FORMAT_ORDER_HAILO_NMS_WITH_BYTE_MASK while setting "
+                    "max proposals total");
             auto status = network_groups.value()[0]->set_nms_max_bboxes_total(edge_name, output_pair.second.m_pimpl->m_nms_max_proposals_total);
             CHECK_SUCCESS_AS_EXPECTED(status);
         }
@@ -441,11 +422,6 @@ Expected<ConfiguredInferModel> InferModelBase::configure()
         get_input_names(), get_output_names(), m_vdevice, inputs_frame_sizes, outputs_frame_sizes);
     CHECK_EXPECTED(configured_infer_model_pimpl);
 
-    // The hef buffer is being used only when working with the service.
-    // TODO HRT-12636 - Besides clearing the hef buffer, clear also unnecessary members of Hef object.
-    // After HRT-12636 is done - The user can configure an infer model only once, with or without the service.
-    m_hef.pimpl->clear_hef_buffer();
-
     return ConfiguredInferModel(configured_infer_model_pimpl.release());
 }
 
@@ -582,7 +558,13 @@ ConfiguredInferModel ConfiguredInferModelBase::create(std::shared_ptr<Configured
 
 Expected<ConfiguredInferModel::Bindings> ConfiguredInferModel::create_bindings()
 {
-    return m_pimpl->create_bindings();
+    std::map<std::string, MemoryView> buffers;
+    return m_pimpl->create_bindings(buffers);
+}
+
+Expected<ConfiguredInferModel::Bindings> ConfiguredInferModel::create_bindings(const std::map<std::string, MemoryView> &buffers)
+{
+    return m_pimpl->create_bindings(buffers);
 }
 
 hailo_status ConfiguredInferModel::wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count)
@@ -637,7 +619,7 @@ hailo_status ConfiguredInferModel::set_scheduler_priority(uint8_t priority)
     return m_pimpl->set_scheduler_priority(priority);
 }
 
-Expected<size_t> ConfiguredInferModel::get_async_queue_size()
+Expected<size_t> ConfiguredInferModel::get_async_queue_size() const
 {
     return m_pimpl->get_async_queue_size();
 }
@@ -673,6 +655,11 @@ Expected<AsyncInferJob> ConfiguredInferModel::run_async(const std::vector<Config
     return AsyncInferJobImpl::create(job_pimpl);
 }
 
+hailo_status ConfiguredInferModel::update_cache_offset(int32_t offset_delta_entries)
+{
+    return m_pimpl->update_cache_offset(offset_delta_entries);
+}
+
 Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelBase::create_bindings(
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> &&inputs,
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> &&outputs)
@@ -774,17 +761,24 @@ ConfiguredInferModelImpl::~ConfiguredInferModelImpl()
     shutdown();
 }
 
-Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindings()
+Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindings(const std::map<std::string, MemoryView> &buffers)
 {
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> inputs;
     std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> outputs;
 
+    uint32_t used_buffers = 0;
+
     auto input_vstream_infos = m_cng->get_input_vstream_infos();
     CHECK_EXPECTED(input_vstream_infos);
 
     for (const auto &vstream_info : input_vstream_infos.value()) {
         TRY(auto stream, ConfiguredInferModelBase::create_infer_stream(vstream_info));
-        inputs.emplace(vstream_info.name, std::move(stream));
+        auto name = std::string(vstream_info.name);
+        inputs.emplace(name, std::move(stream));
+        if (contains(buffers, name)) {
+            inputs.at(name).set_buffer(buffers.at(name));
+            used_buffers++;
+        }
     }
 
     auto output_vstream_infos = m_cng->get_output_vstream_infos();
@@ -792,10 +786,16 @@ Expected<ConfiguredInferModel::Bindings> ConfiguredInferModelImpl::create_bindin
 
     for (const auto &vstream_info : output_vstream_infos.value()) {
         TRY(auto stream, ConfiguredInferModelBase::create_infer_stream(vstream_info));
+        auto name = std::string(vstream_info.name);
         outputs.emplace(vstream_info.name, std::move(stream));
+        if (contains(buffers, name)) {
+            outputs.at(name).set_buffer(buffers.at(name));
+            used_buffers++;
+        }
     }
 
     TRY(auto bindings, ConfiguredInferModelBase::create_bindings(std::move(inputs), std::move(outputs)));
+    CHECK_AS_EXPECTED(used_buffers == buffers.size(), HAILO_INVALID_ARGUMENT, "Given 'buffers' contains names which arent model edges.");
     return bindings;
 }
 
@@ -833,6 +833,12 @@ hailo_status ConfiguredInferModelImpl::shutdown()
     return deactivate();
 }
 
+hailo_status ConfiguredInferModelImpl::update_cache_offset(int32_t offset_delta_entries)
+{
+    return m_cng->update_cache_offset(offset_delta_entries);
+}
+
+
 hailo_status ConfiguredInferModelImpl::activate()
 {
     auto activated_ng = m_cng->activate();
@@ -976,9 +982,9 @@ hailo_status ConfiguredInferModelImpl::set_scheduler_priority(uint8_t priority)
     return m_cng->set_scheduler_priority(priority);
 }
 
-Expected<size_t> ConfiguredInferModelImpl::get_async_queue_size()
+Expected<size_t> ConfiguredInferModelImpl::get_async_queue_size() const
 {
-    return m_cng->get_min_buffer_pool_size();
+    return m_cng->infer_queue_size();
 }
 
 AsyncInferJob::AsyncInferJob(std::shared_ptr<AsyncInferJobBase> pimpl) : m_pimpl(pimpl), m_should_wait_in_dtor(true)
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
index 5404e93..de1ca24 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -15,26 +15,26 @@ namespace hailort
 
 Expected<std::shared_ptr<InferModelHrpcClient>> InferModelHrpcClient::create(Hef &&hef, const std::string &network_name,
     std::shared_ptr<Client> client, uint32_t infer_model_handle_id, uint32_t vdevice_handle, VDevice &vdevice,
-    std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher)
+    std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager)
 {
     TRY(auto inputs, create_infer_stream_inputs(hef, network_name));
     TRY(auto outputs, create_infer_stream_outputs(hef, network_name));
 
     auto ptr = make_shared_nothrow<InferModelHrpcClient>(client, infer_model_handle_id,
-        vdevice_handle, vdevice, callbacks_dispatcher, std::move(hef), network_name, std::move(inputs), std::move(outputs));
+        vdevice_handle, vdevice, callback_dispatcher_manager, std::move(hef), network_name, std::move(inputs), std::move(outputs));
     CHECK_NOT_NULL_AS_EXPECTED(ptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return ptr;
 }
 
 InferModelHrpcClient::InferModelHrpcClient(std::shared_ptr<Client> client, uint32_t handle,
-    uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+    uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager,
     Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs, std::vector<InferStream> &&outputs) :
         InferModelBase(vdevice, std::move(hef), network_name, std::move(inputs), std::move(outputs)),
         m_client(client),
         m_handle(handle),
         m_vdevice_handle(vdevice_handle),
-        m_callbacks_dispatcher(callbacks_dispatcher)
+        m_callback_dispatcher_manager(callback_dispatcher_manager)
 {
 }
 
@@ -44,21 +44,27 @@ InferModelHrpcClient::~InferModelHrpcClient()
         return;
     }
 
-    auto request = DestroyInferModelSerializer::serialize_request(m_handle);
-    if (!request) {
-        LOGGER__CRITICAL("Failed to serialize InferModel_release request");
-        return;
-    }
-
     auto client = m_client.lock();
     if (client) {
-        auto execute_request_result = client->execute_request(HailoRpcActionID::INFER_MODEL__DESTROY, MemoryView(*request));
+        auto request_buffer = client->allocate_request_buffer();
+        if (!request_buffer) {
+            LOGGER__CRITICAL("Failed to create buffer for InferModel_release request");
+            return;
+        }
+
+        auto request_size = DestroyInferModelSerializer::serialize_request(m_handle, MemoryView(**request_buffer));
+        if (!request_size) {
+            LOGGER__CRITICAL("Failed to serialize InferModel_release request");
+            return;
+        }
+
+        auto execute_request_result = client->execute_request(HailoRpcActionID::INFER_MODEL__DESTROY, MemoryView(request_buffer.value()->data(), *request_size));
         if (!execute_request_result) {
             LOGGER__CRITICAL("Failed to destroy infer model! status = {}", execute_request_result.status());
             return;
         }
 
-        auto deserialize_reply_result = DestroyInferModelSerializer::deserialize_reply(MemoryView(*execute_request_result));
+        auto deserialize_reply_result = DestroyInferModelSerializer::deserialize_reply(MemoryView(execute_request_result->buffer->data(), execute_request_result->header.size));
         if (HAILO_SUCCESS != deserialize_reply_result) {
             LOGGER__CRITICAL("Failed to destroy infer model! status = {}", deserialize_reply_result);
             return;
@@ -101,13 +107,15 @@ Expected<ConfiguredInferModel> InferModelHrpcClient::configure()
     request_params.infer_model_handle = m_handle;
     request_params.vdevice_handle = m_vdevice_handle;
 
-    TRY(auto request, CreateConfiguredInferModelSerializer::serialize_request(request_params));
+    // Not using allocator because CREATE_CONFIGURED_INFER_MODEL protobuf size is a lot bigger than the other requests and happens only once.
+    TRY(auto request_buffer, Buffer::create_shared(CREATE_CONFIGURED_INFER_MODEL_PROTO_MAX_SIZE, BufferStorageParams::create_dma()));
+    TRY(auto request_size, CreateConfiguredInferModelSerializer::serialize_request(request_params, MemoryView(*request_buffer)));
     auto client = m_client.lock();
     CHECK_AS_EXPECTED(nullptr != client, HAILO_INTERNAL_FAILURE,
         "Lost comunication with the server. This may happen if VDevice is released while the InferModel is in use.");
     TRY(auto result, client->execute_request(HailoRpcActionID::INFER_MODEL__CREATE_CONFIGURED_INFER_MODEL,
-        MemoryView(request)));
-    TRY(auto tuple, CreateConfiguredInferModelSerializer::deserialize_reply(MemoryView(result)));
+        MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, CreateConfiguredInferModelSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto configured_infer_model_handle = std::get<1>(tuple);
     auto async_queue_size = std::get<2>(tuple);
@@ -121,18 +129,12 @@ Expected<ConfiguredInferModel> InferModelHrpcClient::configure()
         outputs_frame_sizes.emplace(output.second.name(), output.second.get_frame_size());
     }
 
-    auto callbacks_queue = make_shared_nothrow<CallbacksQueue>(m_output_names);
-    CHECK_NOT_NULL_AS_EXPECTED(callbacks_queue, HAILO_OUT_OF_HOST_MEMORY);
-
-    m_callbacks_dispatcher->add(configured_infer_model_handle, callbacks_queue);
-
-    TRY(auto input_vstream_infos, m_hef.get_input_vstream_infos());
-    TRY(auto output_vstream_infos, m_hef.get_output_vstream_infos());
+    TRY(auto input_vstream_infos, m_hef.get_input_vstream_infos(m_network_name));
+    TRY(auto output_vstream_infos, m_hef.get_output_vstream_infos(m_network_name));
     TRY(auto cim_client_ptr, ConfiguredInferModelHrpcClient::create(client,
         configured_infer_model_handle,
         std::move(input_vstream_infos), std::move(output_vstream_infos),
-        async_queue_size, callbacks_queue, m_handle,
-        inputs_frame_sizes, outputs_frame_sizes));
+        async_queue_size, m_handle, inputs_frame_sizes, outputs_frame_sizes));
 
     return ConfiguredInferModelBase::create(cim_client_ptr);
 }
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
index 6614615..9667bfc 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_hrpc_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -16,6 +16,8 @@
 #include "net_flow/pipeline/infer_model_internal.hpp"
 #include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
 
+#define CREATE_CONFIGURED_INFER_MODEL_PROTO_MAX_SIZE (2048)
+
 namespace hailort
 {
 
@@ -24,10 +26,10 @@ class InferModelHrpcClient : public InferModelBase
 public:
     static Expected<std::shared_ptr<InferModelHrpcClient>> create(Hef &&hef, const std::string &network_name,
         std::shared_ptr<Client> client, uint32_t infer_model_handle_id, uint32_t vdevice_handle, VDevice &vdevice,
-        std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher);
+        std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager);
 
     InferModelHrpcClient(std::shared_ptr<Client> client, uint32_t id,
-        uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+        uint32_t vdevice_handle, VDevice &vdevice, std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager,
         Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs, std::vector<InferStream> &&outputs);
     virtual ~InferModelHrpcClient();
 
@@ -48,7 +50,7 @@ private:
     std::weak_ptr<Client> m_client;
     uint32_t m_handle;
     uint32_t m_vdevice_handle;
-    std::shared_ptr<CallbacksDispatcher> m_callbacks_dispatcher;
+    std::shared_ptr<ClientCallbackDispatcherManager> m_callback_dispatcher_manager;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
index 742061b..f04f2a4 100644
--- a/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/infer_model_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -50,8 +50,7 @@ private:
 class InferModelBase : public InferModel
 {
 public:
-    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const std::string &hef_path, const std::string &network_name);
-    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, const MemoryView hef_buffer, const std::string &network_name);
+    static Expected<std::shared_ptr<InferModelBase>> create(VDevice &vdevice, Hef hef, const std::string &network_name);
 
     InferModelBase(VDevice &vdevice, Hef &&hef, const std::string &network_name, std::vector<InferStream> &&inputs,
         std::vector<InferStream> &&outputs);
@@ -185,7 +184,7 @@ public:
     ConfiguredInferModelBase(const std::unordered_map<std::string, size_t> inputs_frame_sizes,
         const std::unordered_map<std::string, size_t> outputs_frame_sizes);
     virtual ~ConfiguredInferModelBase() = default;
-    virtual Expected<ConfiguredInferModel::Bindings> create_bindings() = 0;
+    virtual Expected<ConfiguredInferModel::Bindings> create_bindings(const std::map<std::string, MemoryView> &buffers) = 0;
     virtual hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count = 1) = 0;
     virtual hailo_status activate() = 0;
     virtual hailo_status deactivate() = 0;
@@ -196,8 +195,9 @@ public:
     virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout) = 0;
     virtual hailo_status set_scheduler_threshold(uint32_t threshold) = 0;
     virtual hailo_status set_scheduler_priority(uint8_t priority) = 0;
-    virtual Expected<size_t> get_async_queue_size() = 0;
+    virtual Expected<size_t> get_async_queue_size() const = 0;
     virtual hailo_status shutdown() = 0;
+    virtual hailo_status update_cache_offset(int32_t offset_delta_entries) = 0;
 
     static Expected<ConfiguredInferModel::Bindings> create_bindings(
         std::unordered_map<std::string, ConfiguredInferModel::Bindings::InferStream> &&inputs,
@@ -230,7 +230,7 @@ public:
         const std::vector<std::string> &input_names, const std::vector<std::string> &output_names,
         const std::unordered_map<std::string, size_t> inputs_frame_sizes, const std::unordered_map<std::string, size_t> outputs_frame_sizes);
     ~ConfiguredInferModelImpl();
-    virtual Expected<ConfiguredInferModel::Bindings> create_bindings() override;
+    virtual Expected<ConfiguredInferModel::Bindings> create_bindings(const std::map<std::string, MemoryView> &buffers) override;
     virtual hailo_status wait_for_async_ready(std::chrono::milliseconds timeout, uint32_t frames_count) override;
     virtual hailo_status activate() override;
     virtual hailo_status deactivate() override;
@@ -240,8 +240,9 @@ public:
     virtual hailo_status set_scheduler_timeout(const std::chrono::milliseconds &timeout) override;
     virtual hailo_status set_scheduler_threshold(uint32_t threshold) override;
     virtual hailo_status set_scheduler_priority(uint8_t priority) override;
-    virtual Expected<size_t> get_async_queue_size() override;
+    virtual Expected<size_t> get_async_queue_size() const override;
     virtual hailo_status shutdown() override;
+    virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
 
     static Expected<std::shared_ptr<ConfiguredInferModelImpl>> create_for_ut(std::shared_ptr<ConfiguredNetworkGroup> net_group,
         std::shared_ptr<AsyncInferRunnerImpl> async_infer_runner, const std::vector<std::string> &input_names, const std::vector<std::string> &output_names,
diff --git a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
index 77350bc..c3f1f19 100644
--- a/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/inference_pipeline.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
index 5ff6932..b29cefd 100644
--- a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -221,7 +221,6 @@ static hailo_nms_info_t fuse_nms_info(const std::vector<hailo_nms_info_t> &nms_i
     hailo_nms_info_t fused_info = nms_infos[0];
     fused_info.is_defused = false;
     fused_info.number_of_classes = 0;
-    fused_info.order_type = HAILO_NMS_RESULT_ORDER_HW;
     for (const auto &nms_info : nms_infos) {
         fused_info.number_of_classes += nms_info.number_of_classes;
         assert(nms_infos[0].max_bboxes_per_class == nms_info.max_bboxes_per_class);
@@ -743,13 +742,12 @@ Expected<std::shared_ptr<AsyncHwElement>> AsyncHwElement::create(const std::unor
     auto duration_collector = DurationCollector::create(elem_flags);
     CHECK_EXPECTED(duration_collector);
 
-    auto min_buffer_pool_size = net_group->get_min_buffer_pool_size();
-    CHECK_EXPECTED(min_buffer_pool_size);
+    TRY(auto queue_size, net_group->infer_queue_size());
 
     auto status = HAILO_UNINITIALIZED;
     auto elem_ptr = make_shared_nothrow<AsyncHwElement>(named_stream_infos, timeout, name,
         duration_collector.release(), std::move(pipeline_status), pipeline_direction, async_pipeline, net_group,
-        min_buffer_pool_size.release(), status);
+        queue_size, status);
     CHECK_AS_EXPECTED(nullptr != elem_ptr, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status);
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp
index cc2f389..b75879f 100644
--- a/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/multi_io_elements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -97,7 +97,6 @@ public:
         auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
         assert(nullptr != nms_metadata);
         nms_metadata->nms_config().max_proposals_per_class = max_proposals_per_class;
-        nms_metadata->nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
 
         return HAILO_SUCCESS;
     }
@@ -107,7 +106,6 @@ public:
         auto nms_metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(get_op()->metadata());
         assert(nullptr != nms_metadata);
         nms_metadata->nms_config().max_proposals_total = max_proposals_total;
-        nms_metadata->nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
 
         return HAILO_SUCCESS;
     }
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
index 1978abc..9af4ba2 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
index e506012..8ec4bd0 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -269,7 +269,7 @@ public:
     PipelineObject(PipelineObject &&) noexcept = default;
     PipelineObject& operator=(PipelineObject &&) noexcept = default;
 
-    const std::string &name() const;
+    const std::string& name() const;
 
     static std::string create_element_name(const std::string &element_name, const std::string &stream_name, uint8_t stream_index);
 
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
index 565c8b4..5724bf6 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
index c164cca..5de6f22 100644
--- a/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/pipeline_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
index 6496455..7f51ddf 100644
--- a/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
index ca4851d..5c42684 100644
--- a/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/queue_elements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
index d0bf00e..85a5fbd 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -1185,7 +1185,6 @@ hailo_status OutputVStreamImpl::set_nms_max_proposals_per_class(uint32_t max_pro
 
             // Update vstream info and frame size
             m_vstream_info.nms_shape.max_bboxes_per_class = max_proposals_per_class;
-            m_vstream_info.nms_shape.order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
             auto set_buffer_size_status = user_buffer_queue_element->set_buffer_pool_buffer_size(HailoRTCommon::get_frame_size(m_vstream_info,
                 m_vstream_params.user_buffer_format));
             CHECK_SUCCESS(set_buffer_size_status, "Failed to update buffer size in {}", name());
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
index d556af4..d26733b 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -268,7 +268,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     CHECK_EXPECTED(post_infer_element);
     CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(hw_read_queue_element.value(), post_infer_element.value()));
 
-    auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(output_stream->get_info().nms_info, vstream_params.user_buffer_format);
+    auto post_transform_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(output_stream->get_info().nms_info, vstream_params.user_buffer_format);
     auto pre_nms_convert_queue_element = add_pull_queue_element(output_stream, pipeline_status, elements, "PullQEl_pre_nms_convert",
         vstream_params, post_transform_frame_size);
     CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(post_infer_element.value(), pre_nms_convert_queue_element.value()));
@@ -292,7 +292,7 @@ Expected<std::vector<OutputVStream>> VStreamsBuilderUtils::create_output_post_pr
     CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(remove_overlapping_bboxes_element.value(), pre_fill_nms_format_element_queue_element.value()));
 
     auto fill_nms_format_element = add_fill_nms_format_element(output_stream, elements, "FillNmsFormatEl",
-        iou_op_metadata, build_params);
+        iou_op_metadata, build_params, vstream_params.user_buffer_format.order);
     CHECK_EXPECTED(fill_nms_format_element);
     CHECK_SUCCESS_AS_EXPECTED(PipelinePad::link_pads(pre_fill_nms_format_element_queue_element.value(), fill_nms_format_element.value()));
 
@@ -733,14 +733,14 @@ Expected<std::shared_ptr<RemoveOverlappingBboxesElement>> VStreamsBuilderUtils::
 
 Expected<std::shared_ptr<FillNmsFormatElement>> VStreamsBuilderUtils::add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &op_metadata,
-        const ElementBuildParams &build_params)
+        const ElementBuildParams &build_params, const hailo_format_order_t &dst_format_order)
 {
     auto metadata = std::dynamic_pointer_cast<net_flow::NmsOpMetadata>(op_metadata);
     assert(nullptr != metadata);
 
     auto fill_nms_format_element = FillNmsFormatElement::create(metadata->nms_config(),
         PipelineObject::create_element_name(element_name, output_stream->name(), output_stream->get_info().index),
-        build_params);
+        build_params, dst_format_order);
     CHECK_EXPECTED(fill_nms_format_element);
     elements.push_back(fill_nms_format_element.value());
     return fill_nms_format_element;
@@ -1139,7 +1139,7 @@ hailo_status VStreamsBuilderUtils::add_nms_fuse(OutputStreamPtrVector &output_st
     elements.push_back(post_infer_elem.value());
     CHECK_SUCCESS(PipelinePad::link_pads(nms_queue_elem.value(), post_infer_elem.value()));
 
-    auto post_transform_frame_size = HailoRTCommon::get_nms_host_frame_size(fused_layer_nms_info, vstreams_params.user_buffer_format);
+    auto post_transform_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(fused_layer_nms_info, vstreams_params.user_buffer_format);
     auto post_infer_queue_elem = UserBufferQueueElement::create(
         PipelineObject::create_element_name("UserBufQEl_post_infer", fused_layer_name, 0),
         vstreams_params, post_transform_frame_size, pipeline_status);
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp
index d4ba89d..e5448b0 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_builder.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -61,7 +61,7 @@ public:
 
     static Expected<std::shared_ptr<FillNmsFormatElement>> add_fill_nms_format_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::vector<std::shared_ptr<PipelineElement>> &elements, const std::string &element_name, const net_flow::PostProcessOpMetadataPtr &iou_op_metadata,
-        const ElementBuildParams &build_params);
+        const ElementBuildParams &build_params, const hailo_format_order_t &dst_format_order);
 
     static Expected<std::shared_ptr<UserBufferQueueElement>> add_user_buffer_queue_element(std::shared_ptr<OutputStreamBase> &output_stream,
         std::shared_ptr<std::atomic<hailo_status>> &pipeline_status, std::vector<std::shared_ptr<PipelineElement>> &elements,
diff --git a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
index bbcbd9e..55b4a7e 100644
--- a/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
+++ b/hailort/libhailort/src/net_flow/pipeline/vstream_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/network_group/network_group.cpp b/hailort/libhailort/src/network_group/network_group.cpp
index aeb671c..84bf935 100644
--- a/hailort/libhailort/src/network_group/network_group.cpp
+++ b/hailort/libhailort/src/network_group/network_group.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -366,7 +366,6 @@ hailo_status ConfiguredNetworkGroupBase::set_nms_max_bboxes_per_class(const std:
     auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
     CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
     expected_nms_op_metadata.value()->nms_config().max_proposals_per_class = max_bboxes_per_class;
-    expected_nms_op_metadata.value()->nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
     return HAILO_SUCCESS;
 }
 
@@ -375,15 +374,6 @@ hailo_status ConfiguredNetworkGroupBase::set_nms_max_bboxes_total(const std::str
     auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
     CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
     expected_nms_op_metadata.value()->nms_config().max_proposals_total = max_bboxes_total;
-    expected_nms_op_metadata.value()->nms_config().order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
-    return HAILO_SUCCESS;
-}
-
-hailo_status ConfiguredNetworkGroupBase::set_nms_result_order_type(const std::string &edge_name, hailo_nms_result_order_type_t order_type)
-{
-    auto expected_nms_op_metadata = get_nms_meta_data(edge_name);
-    CHECK_EXPECTED_AS_STATUS(expected_nms_op_metadata);
-    expected_nms_op_metadata.value()->nms_config().order_type = order_type;
     return HAILO_SUCCESS;
 }
 
@@ -410,30 +400,6 @@ ConfiguredNetworkGroupBase::ConfiguredNetworkGroupBase(
         m_is_forked(false)
 {}
 
-// static func
-uint16_t ConfiguredNetworkGroupBase::get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params)
-{
-    // There are two possible situations:
-    // 1) All networks in the network group have the same configured (and hence smallest) batch_size =>
-    //    We return that batch size.
-    // 2) Not all of the networks have the same configured (and hence smallest) batch_size. Currently, when
-    //    using dynamic_batch_sizes, all networks will use the same dynamic_batch_size (until HRT-6535 is done).
-    //    Hence, we must not set a dynamic_batch_size to a value greater than the smallest configured network
-    //    batch_size (e.g. all the resources allocated are for at most the configured network batch_size).
-
-    /* We iterate over all network's batch_sizes to get the non-default min.
-       Ignoring HAILO_DEFAULT_BATCH_SIZE as it is not a real batch-value,
-       but indicating the scheduler should optimize batches by himself */
-    uint16_t min_batch_size = UINT16_MAX;
-    for (const auto &network_params_pair : config_params.network_params_by_name) {
-        if ((HAILO_DEFAULT_BATCH_SIZE != network_params_pair.second.batch_size) &&
-            (network_params_pair.second.batch_size < min_batch_size)) {
-            min_batch_size = network_params_pair.second.batch_size;
-        }
-    }
-    return (UINT16_MAX == min_batch_size) ? DEFAULT_ACTUAL_BATCH_SIZE : min_batch_size;
-}
-
 const std::string &ConfiguredNetworkGroupBase::get_network_group_name() const
 {
     return m_network_group_metadata.name();
@@ -809,29 +775,9 @@ Expected<Buffer> ConfiguredNetworkGroupBase::get_intermediate_buffer(const Inter
     return get_core_op()->get_intermediate_buffer(key);
 }
 
-Expected<size_t> ConfiguredNetworkGroupBase::get_min_buffer_pool_size()
+Expected<size_t> ConfiguredNetworkGroupBase::infer_queue_size() const
 {
-    uint32_t buffer_pool_size = UINT32_MAX;
-
-    auto input_streams = get_input_streams();
-    for (const auto &input_stream : input_streams) {
-        auto async_max_queue_size = input_stream.get().get_async_max_queue_size();
-        CHECK_EXPECTED(async_max_queue_size);
-        if (buffer_pool_size > async_max_queue_size.value()) {
-            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
-        }
-    }
-
-    auto output_streams = get_output_streams();
-    for (const auto &output_stream : output_streams) {
-        auto async_max_queue_size = output_stream.get().get_async_max_queue_size();
-        CHECK_EXPECTED(async_max_queue_size);
-        if (buffer_pool_size > async_max_queue_size.value()) {
-            buffer_pool_size = static_cast<uint32_t>(async_max_queue_size.value());
-        }
-    }
-
-    return buffer_pool_size;
+    return get_core_op()->infer_queue_size();
 }
 
 hailo_status ConfiguredNetworkGroupBase::infer_async(const NamedBuffersCallbacks &named_buffers_callbacks,
@@ -910,12 +856,12 @@ Expected<uint32_t> ConfiguredNetworkGroupBase::get_cache_entry_size(uint32_t cac
     return m_core_ops[0]->get_cache_entry_size(cache_id);
 }
 
-hailo_status ConfiguredNetworkGroupBase::init_cache(uint32_t read_offset, int32_t write_offset_delta)
+hailo_status ConfiguredNetworkGroupBase::init_cache(uint32_t read_offset)
 {
     CHECK(m_core_ops.size() == 1, HAILO_INVALID_OPERATION,
         "init_cache() is not supported for multi core-op network groups");
 
-    return m_core_ops[0]->init_cache(read_offset, write_offset_delta);
+    return m_core_ops[0]->init_cache(read_offset);
 }
 
 hailo_status ConfiguredNetworkGroupBase::update_cache_offset(int32_t offset_delta_entries)
diff --git a/hailort/libhailort/src/network_group/network_group_internal.hpp b/hailort/libhailort/src/network_group/network_group_internal.hpp
index 2691114..7b9d250 100644
--- a/hailort/libhailort/src/network_group/network_group_internal.hpp
+++ b/hailort/libhailort/src/network_group/network_group_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -65,8 +65,8 @@ public:
 
     virtual hailo_status shutdown() override;
 
-    virtual const std::string &get_network_group_name() const override;
-    virtual const std::string &name() const override;
+    virtual const std::string& get_network_group_name() const override;
+    virtual const std::string& name() const override;
 
     virtual Expected<InputStreamRefVector> get_input_streams_by_network(const std::string &network_name="") override;
     virtual Expected<OutputStreamRefVector> get_output_streams_by_network(const std::string &network_name="") override;
@@ -120,7 +120,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params) override;
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params) override;
-    virtual Expected<size_t> get_min_buffer_pool_size() override;
+    virtual Expected<size_t> infer_queue_size() const override;
 
     Expected<std::shared_ptr<InputStreamBase>> get_shared_input_stream_by_name(const std::string &stream_name)
     {
@@ -193,12 +193,11 @@ public:
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
     virtual hailo_status set_nms_max_bboxes_total(const std::string &edge_name, uint32_t max_bboxes_total) override;
-    virtual hailo_status set_nms_result_order_type(const std::string &edge_name, hailo_nms_result_order_type_t order_type) override;
     virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
 
     Expected<std::shared_ptr<net_flow::NmsOpMetadata>> get_nms_meta_data(const std::string &edge_name);
 
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual hailo_status init_cache(uint32_t read_offset) override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
@@ -208,7 +207,6 @@ private:
     ConfiguredNetworkGroupBase(const ConfigureNetworkParams &config_params,
         std::vector<std::shared_ptr<CoreOp>> &&core_ops, NetworkGroupMetadata &&metadata);
 
-    static uint16_t get_smallest_configured_batch_size(const ConfigureNetworkParams &config_params);
     hailo_status add_mux_streams_by_edges_names(OutputStreamWithParamsVector &result,
         const std::unordered_map<std::string, hailo_vstream_params_t> &outputs_edges_params);
 
diff --git a/hailort/libhailort/src/os/microsec_timer.hpp b/hailort/libhailort/src/os/microsec_timer.hpp
index 5b45847..e074556 100644
--- a/hailort/libhailort/src/os/microsec_timer.hpp
+++ b/hailort/libhailort/src/os/microsec_timer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp
index 4ffa432..e893664 100644
--- a/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp
+++ b/hailort/libhailort/src/os/posix/linux/dma_buffer_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/posix/linux/event.cpp b/hailort/libhailort/src/os/posix/linux/event.cpp
index d0232d0..9027502 100644
--- a/hailort/libhailort/src/os/posix/linux/event.cpp
+++ b/hailort/libhailort/src/os/posix/linux/event.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/posix/microsec_timer.cpp b/hailort/libhailort/src/os/posix/microsec_timer.cpp
index fdfb816..c1765ed 100644
--- a/hailort/libhailort/src/os/posix/microsec_timer.cpp
+++ b/hailort/libhailort/src/os/posix/microsec_timer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp
index ba2a5a9..480b9a5 100644
--- a/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp
+++ b/hailort/libhailort/src/os/posix/qnx/dma_buffer_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/posix/qnx/event.cpp b/hailort/libhailort/src/os/posix/qnx/event.cpp
index 893b656..cf1594e 100644
--- a/hailort/libhailort/src/os/posix/qnx/event.cpp
+++ b/hailort/libhailort/src/os/posix/qnx/event.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp
index 5b002c3..3a9771e 100644
--- a/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp
+++ b/hailort/libhailort/src/os/windows/dma_buffer_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/windows/event.cpp b/hailort/libhailort/src/os/windows/event.cpp
index 2b28638..e8ce59e 100644
--- a/hailort/libhailort/src/os/windows/event.cpp
+++ b/hailort/libhailort/src/os/windows/event.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/windows/microsec_timer.cpp b/hailort/libhailort/src/os/windows/microsec_timer.cpp
index 8a7f060..fdc85bb 100644
--- a/hailort/libhailort/src/os/windows/microsec_timer.cpp
+++ b/hailort/libhailort/src/os/windows/microsec_timer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/os/windows/osdep.hpp b/hailort/libhailort/src/os/windows/osdep.hpp
index ac148cf..71aff99 100644
--- a/hailort/libhailort/src/os/windows/osdep.hpp
+++ b/hailort/libhailort/src/os/windows/osdep.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
diff --git a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
index 62840b9..501b8ae 100644
--- a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
+++ b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -12,63 +12,47 @@
 namespace hailort
 {
 
-AsyncInferJobHrpcClient::AsyncInferJobHrpcClient(EventPtr event) : m_event(event), m_job_status(HAILO_UNINITIALIZED)
-{
-}
-
-hailo_status AsyncInferJobHrpcClient::wait(std::chrono::milliseconds timeout)
-{
-    auto status = m_event->wait(timeout);
-    if (HAILO_UNINITIALIZED != m_job_status) {
-        return m_job_status;
-    }
-    CHECK_SUCCESS(status);
-
-    return HAILO_SUCCESS;
-}
-
-hailo_status AsyncInferJobHrpcClient::set_status(hailo_status status)
-{
-    m_job_status = status;
-    return m_event->signal();
-}
-
-CallbacksQueue::CallbacksQueue(const std::vector<std::string> &outputs_names) : m_outputs_names(outputs_names)
+ClientCallbackDispatcher::ClientCallbackDispatcher(uint32_t dispatcher_id, RpcCallbackType callback_type,
+    bool should_remove_callback_after_trigger) : m_dispatcher_id(dispatcher_id), m_callback_type(callback_type)
 {
     m_is_running = true;
-    m_callback_thread = std::thread([this] {
+    m_callback_thread = std::thread([this, should_remove_callback_after_trigger] {
         while (true) {
-            callback_id_t callback_id;
-            hailo_status info_status = HAILO_UNINITIALIZED;
-            std::function<void(const AsyncInferCompletionInfo&)> cb;
+            RpcCallback rpc_callback = {};
+            ClientCallbackDispatcher::CallbackFunc cb;
             {
                 std::unique_lock<std::mutex> lock(m_mutex);
-                m_cv.wait(lock, [this] { return !m_is_running || !m_callbacks_queue.empty(); });
+                m_cv.wait(lock, [this] {
+                    if (!m_is_running) {
+                        return true;
+                    }
+
+                    if (m_triggered_callbacks.empty()) {
+                        return false;
+                    }
+
+                    auto rpc_callback = m_triggered_callbacks.front();
+                    return contains(m_registered_callbacks, rpc_callback.callback_id);
+                });
                 if (!m_is_running) {
                     break;
                 }
 
-                callback_id = m_callbacks_queue.front();
-                m_callbacks_queue.pop();
+                rpc_callback = m_triggered_callbacks.front();
+                m_triggered_callbacks.pop();
 
-                m_cv.wait(lock, [this, callback_id] { return !m_is_running || (m_callbacks.find(callback_id) != m_callbacks.end()); });
-                if (!m_is_running) {
-                    break;
+                auto callback_id = rpc_callback.callback_id;
+                cb = m_registered_callbacks[callback_id];
+                if (should_remove_callback_after_trigger) {
+                    m_registered_callbacks.erase(callback_id);
                 }
-
-                info_status = m_callbacks_status[callback_id];
-                cb = m_callbacks[callback_id];
-                m_callbacks.erase(callback_id);
-                m_callbacks_status.erase(callback_id);
-                m_bindings.erase(callback_id);
             }
-            AsyncInferCompletionInfo info(info_status);
-            cb(info);
+            cb(rpc_callback, HAILO_UNINITIALIZED);
         }
     });
 }
 
-hailo_status CallbacksQueue::shutdown(hailo_status status)
+hailo_status ClientCallbackDispatcher::shutdown(hailo_status status)
 {
     if (!m_is_running) {
         return HAILO_SUCCESS;
@@ -78,20 +62,20 @@ hailo_status CallbacksQueue::shutdown(hailo_status status)
         std::unique_lock<std::mutex> lock(m_mutex);
         m_is_running = false;
 
-        for (const auto &callback : m_callbacks) {
-            m_callbacks_status.erase(callback.first);
-            m_bindings.erase(callback.first);
-            AsyncInferCompletionInfo info(status);
-            callback.second(info);
+        m_additional_reads_funcs.clear();
+        for (const auto &callback : m_registered_callbacks) {
+            RpcCallback rpc_callback = {};
+            rpc_callback.type = RpcCallbackType::INVALID;
+            callback.second(rpc_callback, status);
         }
-        m_callbacks.clear();
+        m_registered_callbacks.clear();
     }
     m_cv.notify_one();
 
     return HAILO_SUCCESS;
 }
 
-CallbacksQueue::~CallbacksQueue()
+ClientCallbackDispatcher::~ClientCallbackDispatcher()
 {
     auto status = shutdown(HAILO_COMMUNICATION_CLOSED);
     if (HAILO_SUCCESS != status) {
@@ -103,55 +87,81 @@ CallbacksQueue::~CallbacksQueue()
     }
 }
 
-Expected<std::shared_ptr<AsyncInferJobHrpcClient>> CallbacksQueue::register_callback(callback_id_t id,
-    const ConfiguredInferModel::Bindings &bindings,
-    std::function<void(const AsyncInferCompletionInfo&)> callback)
+void ClientCallbackDispatcher::add_additional_reads(uint32_t callback_id, AdditionalReadsFunc additional_reads_func)
 {
-    TRY(auto event_ptr, Event::create_shared(Event::State::not_signalled));
-
-    {
-        std::unique_lock<std::mutex> lock(m_mutex);
-        m_bindings[id] = bindings;
-        m_callbacks_status[id] = HAILO_SUCCESS;
-        m_callbacks[id] = [callback, event_ptr] (const AsyncInferCompletionInfo &info) {
-            callback(info);
-            auto status = event_ptr->signal();
-            if (HAILO_SUCCESS != status) {
-                LOGGER__CRITICAL("Could not signal event! status = {}", status);
-            }
-        };
-    }
-    m_cv.notify_one();
-
-    auto ptr = make_shared_nothrow<AsyncInferJobHrpcClient>(event_ptr);
-    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
-
-    return ptr;
+    std::unique_lock<std::mutex> lock(m_mutex);
+    m_additional_reads_funcs[callback_id] = additional_reads_func;
 }
 
-hailo_status CallbacksQueue::push_callback(hailo_status callback_status, rpc_object_handle_t callback_handle_id,
-    RpcConnection connection)
+void ClientCallbackDispatcher::register_callback(uint32_t callback_id, ClientCallbackDispatcher::CallbackFunc callback_func)
 {
     {
         std::unique_lock<std::mutex> lock(m_mutex);
-        CHECK(contains(m_callbacks, callback_handle_id), HAILO_NOT_FOUND, "Callback handle (id={}) not found!", callback_handle_id);
-        m_callbacks_status[callback_handle_id] = callback_status;
+        m_registered_callbacks[callback_id] = callback_func;
+    }
+    m_cv.notify_one();
+}
 
-        if (HAILO_SUCCESS == callback_status) {
-            CHECK(contains(m_bindings, callback_handle_id), HAILO_NOT_FOUND, "Callback handle not found!");
-            for (const auto &output_name : m_outputs_names) {
-                TRY(auto buffer, m_bindings[callback_handle_id].output(output_name)->get_buffer());
-                auto status = connection.read_buffer(buffer);
+hailo_status ClientCallbackDispatcher::remove_callback(uint32_t callback_id)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    CHECK(contains(m_registered_callbacks, callback_id), HAILO_NOT_FOUND, "Did not find callback with id {}", callback_id);
+    m_registered_callbacks.erase(callback_id);
+    if (contains(m_additional_reads_funcs, callback_id)) {
+        m_additional_reads_funcs.erase(callback_id);
+    }
+    return HAILO_SUCCESS;
+}
+
+hailo_status ClientCallbackDispatcher::trigger_callback(const RpcCallback &callback, RpcConnection connection)
+{
+    {
+        std::unique_lock<std::mutex> lock(m_mutex);
+        CHECK(callback.type == m_callback_type, HAILO_INTERNAL_FAILURE, "Callback type mismatch!, expected = {}, got = {}",
+            static_cast<uint32_t>(m_callback_type), static_cast<uint32_t>(callback.type));
+
+        if (contains(m_additional_reads_funcs, callback.callback_id)) {
+            auto additional_reads_func = m_additional_reads_funcs[callback.callback_id];
+            m_additional_reads_funcs.erase(callback.callback_id);
+            TRY(auto transfers, additional_reads_func(callback));
+            if (!transfers.empty()) {
+                auto status = connection.read_buffers(std::move(transfers));
                 // TODO: Errors here should be unrecoverable (HRT-14275)
                 CHECK_SUCCESS(status);
             }
         }
-        m_callbacks_queue.push(callback_handle_id);
+
+        m_triggered_callbacks.push(callback);
     }
 
     m_cv.notify_one();
     return HAILO_SUCCESS;
 }
 
+Expected<std::shared_ptr<ClientCallbackDispatcher>> ClientCallbackDispatcherManager::new_dispatcher(RpcCallbackType callback_type,
+    bool should_remove_callback_after_trigger)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto dispatcher_id = m_dispatcher_count++;
+    auto ptr = make_shared_nothrow<ClientCallbackDispatcher>(dispatcher_id, callback_type, should_remove_callback_after_trigger);
+    CHECK_NOT_NULL(ptr, HAILO_OUT_OF_HOST_MEMORY);
+    m_dispatchers[dispatcher_id] = ptr;
+    return ptr;
+}
+
+hailo_status ClientCallbackDispatcherManager::remove_dispatcher(uint32_t dispatcher_id)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    auto dispatcher = m_dispatchers.find(dispatcher_id);
+    CHECK(dispatcher != m_dispatchers.end(), HAILO_NOT_FOUND, "Did not find dispatcher with id {}", dispatcher_id);
+    m_dispatchers.erase(dispatcher);
+    return HAILO_SUCCESS;
+}
+
+std::shared_ptr<ClientCallbackDispatcher> ClientCallbackDispatcherManager::at(uint32_t dispatcher_id)
+{
+    std::unique_lock<std::mutex> lock(m_mutex);
+    return m_dispatchers.at(dispatcher_id);
+}
 
 } // namespace hailort
diff --git a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
index a29c076..e6462a8 100644
--- a/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
+++ b/hailort/libhailort/src/rpc_callbacks/rpc_callbacks_dispatcher.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,72 +11,59 @@
 #define _HAILO_RPC_CALLBACKS_DISPATCHER_HPP_
 
 #include "hailo/infer_model.hpp"
-#include "net_flow/pipeline/infer_model_internal.hpp"
 #include "hrpc_protocol/serializer.hpp"
+#include "vdma/channel/transfer_common.hpp"
+#include "hrpc/rpc_connection.hpp"
 
 namespace hailort
 {
 
-using callback_id_t = uint32_t;
-class CallbacksQueue;
-class CallbacksDispatcher
+class ClientCallbackDispatcher
 {
 public:
-    void add(rpc_object_handle_t cim_handle, std::shared_ptr<CallbacksQueue> callbacks_queue)
-    {
-        m_callbacks_dispatcher[cim_handle] = callbacks_queue;
-    }
+    using CallbackFunc = std::function<void(const RpcCallback&, hailo_status)>;
+    using AdditionalReadsFunc = std::function<Expected<std::vector<TransferBuffer>>(const RpcCallback&)>;
 
-    std::shared_ptr<CallbacksQueue> at(rpc_object_handle_t cim_handle)
-    {
-        return m_callbacks_dispatcher.at(cim_handle);
-    }
+    ClientCallbackDispatcher(uint32_t dispatcher_id, RpcCallbackType callback_type,
+        bool should_remove_callback_after_trigger);
+    ~ClientCallbackDispatcher();
 
-private:
-    std::unordered_map<rpc_object_handle_t, std::shared_ptr<CallbacksQueue>> m_callbacks_dispatcher;
-};
+    ClientCallbackDispatcher(const ClientCallbackDispatcher &other) = delete;
+    ClientCallbackDispatcher& operator=(const ClientCallbackDispatcher &other) = delete;
+    ClientCallbackDispatcher(ClientCallbackDispatcher &&other) = delete;
+    ClientCallbackDispatcher& operator=(ClientCallbackDispatcher &&other) = delete;
 
-class AsyncInferJobHrpcClient : public AsyncInferJobBase
-{
-public:
-    AsyncInferJobHrpcClient(EventPtr event);
-
-    virtual hailo_status wait(std::chrono::milliseconds timeout) override;
-    hailo_status set_status(hailo_status status);
-
-private:
-    EventPtr m_event;
-    std::atomic<hailo_status> m_job_status;
-};
-
-class CallbacksQueue
-{
-public:
-    CallbacksQueue(const std::vector<std::string> &outputs_names);
-    ~CallbacksQueue();
-
-    CallbacksQueue(const CallbacksQueue &other) = delete;
-    CallbacksQueue& operator=(const CallbacksQueue &other) = delete;
-    CallbacksQueue(CallbacksQueue &&other) = delete;
-    CallbacksQueue& operator=(CallbacksQueue &&other) = delete;
-
-    Expected<std::shared_ptr<AsyncInferJobHrpcClient>> register_callback(callback_id_t id,
-        const ConfiguredInferModel::Bindings &bindings,
-        std::function<void(const AsyncInferCompletionInfo&)> callback);
-    hailo_status push_callback(hailo_status callback_status, rpc_object_handle_t callback_handle_id,
-        RpcConnection connection);
+    void add_additional_reads(uint32_t callback_id, AdditionalReadsFunc additional_reads_func);
+    void register_callback(uint32_t callback_id, CallbackFunc callback);
+    hailo_status remove_callback(uint32_t callback_id);
+    hailo_status trigger_callback(const RpcCallback &rpc_callback, RpcConnection connection);
     hailo_status shutdown(hailo_status status);
+    uint32_t id() const { return m_dispatcher_id; }
 
 private:
-    const std::vector<std::string> m_outputs_names;
+    const uint32_t m_dispatcher_id;
+    const RpcCallbackType m_callback_type;
     std::mutex m_mutex;
     std::condition_variable m_cv;
-    std::queue<callback_id_t> m_callbacks_queue;
-    std::unordered_map<callback_id_t, std::function<void(const AsyncInferCompletionInfo&)>> m_callbacks;
+    std::queue<RpcCallback> m_triggered_callbacks;
+    std::unordered_map<uint32_t, CallbackFunc> m_registered_callbacks;
+    std::unordered_map<uint32_t, AdditionalReadsFunc> m_additional_reads_funcs;
     std::atomic_bool m_is_running;
     std::thread m_callback_thread;
-    std::unordered_map<callback_id_t, ConfiguredInferModel::Bindings> m_bindings;
-    std::unordered_map<callback_id_t, hailo_status> m_callbacks_status;
+};
+
+class ClientCallbackDispatcherManager
+{
+public:
+    ClientCallbackDispatcherManager() : m_dispatcher_count(0) {} // TODO: move this module near client.cpp and add it to its CMake
+    Expected<std::shared_ptr<ClientCallbackDispatcher>> new_dispatcher(RpcCallbackType callback_type, bool should_remove_callback_after_trigger);
+    hailo_status remove_dispatcher(uint32_t dispatcher_id);
+    std::shared_ptr<ClientCallbackDispatcher> at(uint32_t dispatcher_id);
+
+private:
+    std::unordered_map<uint32_t, std::shared_ptr<ClientCallbackDispatcher>> m_dispatchers;
+    uint32_t m_dispatcher_count;
+    std::mutex m_mutex;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/service/buffer_pool_per_stream.cpp b/hailort/libhailort/src/service/buffer_pool_per_stream.cpp
index 695ed21..62b64de 100644
--- a/hailort/libhailort/src/service/buffer_pool_per_stream.cpp
+++ b/hailort/libhailort/src/service/buffer_pool_per_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/service/buffer_pool_per_stream.hpp b/hailort/libhailort/src/service/buffer_pool_per_stream.hpp
index 6b667e9..5b64adf 100644
--- a/hailort/libhailort/src/service/buffer_pool_per_stream.hpp
+++ b/hailort/libhailort/src/service/buffer_pool_per_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.cpp b/hailort/libhailort/src/service/hailort_rpc_client.cpp
index a468a7b..17d9bd4 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.cpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -226,8 +226,8 @@ Expected<std::vector<uint32_t>> HailoRtRpcClient::VDevice_configure(const VDevic
     auto proto_identifier = request.mutable_identifier();
     VDevice_convert_identifier_to_proto(identifier, proto_identifier);
     request.set_pid(pid);
-    auto hef_memview = hef.pimpl->get_hef_memview();
-    request.set_hef(hef_memview.data(), hef_memview.size());
+    TRY(auto hef_buffer, hef.pimpl->get_hef_as_buffer());
+    request.set_hef(hef_buffer->data(), hef_buffer->size());
 
     // Serialize NetworkGroupsParamsMap
     for (const auto &name_params_pair : configure_params) {
@@ -580,13 +580,13 @@ Expected<std::vector<hailo_stream_info_t>> HailoRtRpcClient::ConfiguredNetworkGr
         hailo_nms_info_t nms_info{
             proto_stream_info.nms_info().number_of_classes(),
             proto_stream_info.nms_info().max_bboxes_per_class(),
+            proto_stream_info.nms_info().max_bboxes_total(),
             proto_stream_info.nms_info().bbox_size(),
             proto_stream_info.nms_info().chunks_per_frame(),
             proto_stream_info.nms_info().is_defused(),
             nms_defuse_info,
             proto_stream_info.nms_info().burst_size(),
-            static_cast<hailo_nms_burst_type_t>(proto_stream_info.nms_info().burst_type()),
-            HAILO_NMS_RESULT_ORDER_HW
+            static_cast<hailo_nms_burst_type_t>(proto_stream_info.nms_info().burst_type())
         };
         hailo_format_t format{
             static_cast<hailo_format_type_t>(proto_stream_info.format().type()),
@@ -834,17 +834,14 @@ Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> deserialize_ops_metada
             (op_metadata_proto.type() == static_cast<uint32_t>(net_flow::OperationType::YOLOV5SEG))) {
             // In case this is an NMS PP - initilize the values for the nms post process config
             auto &nms_config_proto = op_metadata_proto.nms_post_process_config();
-            auto max_proposals = (HAILO_NMS_RESULT_ORDER_BY_SCORE == static_cast<hailo_nms_result_order_type_t>(nms_config_proto.order_type())) ?
-                nms_config_proto.max_proposals_total() :nms_config_proto.max_proposals_per_class();
             nms_post_process_config = {nms_config_proto.nms_score_th(),
                                         nms_config_proto.nms_iou_th(),
-                                        max_proposals,
+                                        nms_config_proto.max_proposals_per_class(),
+                                        nms_config_proto.max_proposals_total(),
                                         nms_config_proto.number_of_classes(),
                                         nms_config_proto.background_removal(),
                                         nms_config_proto.background_removal_index(),
-                                        nms_config_proto.cross_classes(),
-                                        nms_config_proto.bbox_only(),
-                                        static_cast<hailo_nms_result_order_type_t>(nms_config_proto.order_type())};
+                                        nms_config_proto.bbox_only()};
             }
 
         switch (static_cast<net_flow::OperationType>(op_metadata_proto.type())) {
@@ -1016,13 +1013,13 @@ LayerInfo deserialize_layer_info(const ProtoLayerInfo &info_proto)
     hailo_nms_info_t nms_info{
         info_proto.nms_info().number_of_classes(),
         info_proto.nms_info().max_bboxes_per_class(),
+        info_proto.nms_info().max_bboxes_total(),
         info_proto.nms_info().bbox_size(),
         info_proto.nms_info().chunks_per_frame(),
         info_proto.nms_info().is_defused(),
         nms_defuse_info,
         info_proto.nms_info().burst_size(),
-        static_cast<hailo_nms_burst_type_t>(info_proto.nms_info().burst_type()),
-        HAILO_NMS_RESULT_ORDER_HW
+        static_cast<hailo_nms_burst_type_t>(info_proto.nms_info().burst_type())
     };
     info.nms_info = nms_info;
 
@@ -1069,20 +1066,11 @@ hailo_vstream_info_t deserialize_vstream_info(const ProtoVStreamInfo &info_proto
     };
     info.format = format;
     if (HailoRTCommon::is_nms(format.order)) {
-        uint32_t max_bboxes;
-        hailo_nms_result_order_type_t order_type;
-        if (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == format.order) {
-            max_bboxes = info_proto.nms_shape().max_bboxes_total();
-            order_type = HAILO_NMS_RESULT_ORDER_BY_SCORE;
-        } else {
-            max_bboxes = info_proto.nms_shape().max_bboxes_per_class();
-            order_type = HAILO_NMS_RESULT_ORDER_BY_CLASS;
-        }
         hailo_nms_shape_t nms_shape = {
             info_proto.nms_shape().number_of_classes(),
-            max_bboxes,
-            info_proto.nms_shape().max_accumulated_mask_size(),
-            order_type
+            info_proto.nms_shape().max_bboxes_per_class(),
+            info_proto.nms_shape().max_bboxes_total(),
+            info_proto.nms_shape().max_accumulated_mask_size()
         };
         info.nms_shape = nms_shape;
     } else {
@@ -1325,19 +1313,19 @@ Expected<std::vector<std::string>> HailoRtRpcClient::ConfiguredNetworkGroup_get_
     return result;
 }
 
-Expected<size_t> HailoRtRpcClient::ConfiguredNetworkGroup_get_min_buffer_pool_size(const NetworkGroupIdentifier &identifier)
+Expected<size_t> HailoRtRpcClient::ConfiguredNetworkGroup_infer_queue_size(const NetworkGroupIdentifier &identifier)
 {
-    ConfiguredNetworkGroup_get_min_buffer_pool_size_Request request;
-    ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply reply;
+    ConfiguredNetworkGroup_infer_queue_size_Request request;
+    ConfiguredNetworkGroup_infer_queue_size_Reply reply;
     auto proto_identifier = request.mutable_identifier();
     ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
     ClientContextWithTimeout context;
-    grpc::Status status = m_stub->ConfiguredNetworkGroup_get_min_buffer_pool_size(&context, request, &reply);
+    grpc::Status status = m_stub->ConfiguredNetworkGroup_infer_queue_size(&context, request, &reply);
     CHECK_GRPC_STATUS_AS_EXPECTED(status);
     assert(reply.status() < HAILO_STATUS_COUNT);
     CHECK_SUCCESS_AS_EXPECTED(static_cast<hailo_status>(reply.status()));
-    auto min_buffer_pool_size = reply.min_buffer_pool_size();
-    return min_buffer_pool_size;
+    auto queue_size = reply.infer_queue_size();
+    return queue_size;
 }
 
 Expected<std::unique_ptr<LayerInfo>> HailoRtRpcClient::ConfiguredNetworkGroup_get_layer_info(const NetworkGroupIdentifier &identifier, const std::string &stream_name)
@@ -1442,22 +1430,6 @@ hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_bboxes_total(c
     return static_cast<hailo_status>(reply.status());
 }
 
-hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_result_order_type(const NetworkGroupIdentifier &identifier,
-    const std::string &edge_name, hailo_nms_result_order_type_t order_type)
-{
-    ConfiguredNetworkGroup_set_nms_result_order_type_Request request;
-    auto proto_identifier = request.mutable_identifier();
-    ConfiguredNetworkGroup_convert_identifier_to_proto(identifier, proto_identifier);
-    request.set_edge_name(edge_name);
-    request.set_nms_result_order_type(static_cast<uint32_t>(order_type));
-    ConfiguredNetworkGroup_set_nms_result_order_type_Reply reply;
-    ClientContextWithTimeout context;
-    grpc::Status status = m_stub->ConfiguredNetworkGroup_set_nms_result_order_type(&context, request, &reply);
-    CHECK_GRPC_STATUS(status);
-    assert(reply.status() < HAILO_STATUS_COUNT);
-    return static_cast<hailo_status>(reply.status());
-}
-
 hailo_status HailoRtRpcClient::ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier,
     const std::string &edge_name, uint32_t max_accumulated_mask_size)
 {
diff --git a/hailort/libhailort/src/service/hailort_rpc_client.hpp b/hailort/libhailort/src/service/hailort_rpc_client.hpp
index 37ff9fd..bedcabc 100644
--- a/hailort/libhailort/src/service/hailort_rpc_client.hpp
+++ b/hailort/libhailort/src/service/hailort_rpc_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -72,7 +72,19 @@ class ClientContextWithTimeout : public grpc::ClientContext {
 public:
     ClientContextWithTimeout(const std::chrono::milliseconds context_timeout = CONTEXT_TIMEOUT)
     {
-        set_deadline(std::chrono::system_clock::now() + context_timeout);
+        auto timeout = get_request_timeout(context_timeout);
+
+        set_deadline(std::chrono::system_clock::now() + timeout);
+    }
+    
+    // TODO: HRT-16034: make common function with hrpc client.
+    std::chrono::milliseconds get_request_timeout(const std::chrono::milliseconds default_timeout)
+    {
+    auto timeout_seconds = get_env_variable(HAILO_REQUEST_TIMEOUT_SECONDS);
+    if (timeout_seconds) {
+        return std::chrono::seconds(std::stoi(timeout_seconds.value()));
+    }
+    return default_timeout;
     }
 };
 
@@ -122,14 +134,13 @@ public:
     Expected<bool> ConfiguredNetworkGroup_is_multi_context(const NetworkGroupIdentifier &identifier);
     Expected<ConfigureNetworkParams> ConfiguredNetworkGroup_get_config_params(const NetworkGroupIdentifier &identifier);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_sorted_output_names(const NetworkGroupIdentifier &identifier);
-    Expected<size_t> ConfiguredNetworkGroup_get_min_buffer_pool_size(const NetworkGroupIdentifier &identifier);
+    Expected<size_t> ConfiguredNetworkGroup_infer_queue_size(const NetworkGroupIdentifier &identifier);
     Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroup_get_layer_info(const NetworkGroupIdentifier &identifier, const std::string &stream_name);
     Expected<std::vector<net_flow::PostProcessOpMetadataPtr>> ConfiguredNetworkGroup_get_ops_metadata(const NetworkGroupIdentifier &identifier);
     hailo_status ConfiguredNetworkGroup_set_nms_score_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t nms_score_th);
     hailo_status ConfiguredNetworkGroup_set_nms_iou_threshold(const NetworkGroupIdentifier &identifier, const std::string &edge_name, float32_t iou_th);
     hailo_status ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_bboxes);
     hailo_status ConfiguredNetworkGroup_set_nms_max_bboxes_total(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_bboxes);
-    hailo_status ConfiguredNetworkGroup_set_nms_result_order_type(const NetworkGroupIdentifier &identifier, const std::string &edge_name, hailo_nms_result_order_type_t order_type);
     hailo_status ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(const NetworkGroupIdentifier &identifier, const std::string &edge_name, uint32_t max_accumulated_mask_size);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_stream_names_from_vstream_name(const NetworkGroupIdentifier &identifier, const std::string &vstream_name);
     Expected<std::vector<std::string>> ConfiguredNetworkGroup_get_vstream_names_from_stream_name(const NetworkGroupIdentifier &identifier, const std::string &stream_name);
diff --git a/hailort/libhailort/src/service/network_group_client.cpp b/hailort/libhailort/src/service/network_group_client.cpp
index f052ee0..89be01f 100644
--- a/hailort/libhailort/src/service/network_group_client.cpp
+++ b/hailort/libhailort/src/service/network_group_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -36,7 +36,7 @@ Expected<std::shared_ptr<ConfiguredNetworkGroupClient>> ConfiguredNetworkGroupCl
 {
     TRY(auto ng_name, client->ConfiguredNetworkGroup_name(identifier));
     TRY(auto streams_infos, client->ConfiguredNetworkGroup_get_all_stream_infos(identifier, ng_name));
-    TRY(auto min_buffer_pool_size, client->ConfiguredNetworkGroup_get_min_buffer_pool_size(identifier));
+    TRY(auto min_buffer_pool_size, client->ConfiguredNetworkGroup_infer_queue_size(identifier));
 
     std::unordered_set<stream_name_t> input_streams_names;
     std::unordered_set<stream_name_t> output_streams_names;
@@ -170,12 +170,12 @@ Expected<LatencyMeasurementResult> ConfiguredNetworkGroupClient::get_latency_mea
     return m_client->ConfiguredNetworkGroup_get_latency_measurement(m_identifier, network_name);
 }
 
-const std::string &ConfiguredNetworkGroupClient::get_network_group_name() const
+const std::string& ConfiguredNetworkGroupClient::get_network_group_name() const
 {
     return m_network_group_name;
 }
 
-const std::string &ConfiguredNetworkGroupClient::name() const
+const std::string& ConfiguredNetworkGroupClient::name() const
 {
     return m_network_group_name;
 }
@@ -438,9 +438,9 @@ Expected<std::vector<OutputVStream>> ConfiguredNetworkGroupClient::create_output
     return vstreams;
 }
 
-Expected<size_t> ConfiguredNetworkGroupClient::get_min_buffer_pool_size()
+Expected<size_t> ConfiguredNetworkGroupClient::infer_queue_size() const
 {
-    return m_client->ConfiguredNetworkGroup_get_min_buffer_pool_size(m_identifier);
+    return m_client->ConfiguredNetworkGroup_infer_queue_size(m_identifier);
 }
 
 Expected<std::unique_ptr<LayerInfo>> ConfiguredNetworkGroupClient::get_layer_info(const std::string &stream_name)
@@ -473,18 +473,13 @@ hailo_status ConfiguredNetworkGroupClient::set_nms_max_bboxes_total(const std::s
     return m_client->ConfiguredNetworkGroup_set_nms_max_bboxes_total(m_identifier, edge_name, max_bboxes_total);
 }
 
-hailo_status ConfiguredNetworkGroupClient::set_nms_result_order_type(const std::string &edge_name, hailo_nms_result_order_type_t order_type)
-{
-    return m_client->ConfiguredNetworkGroup_set_nms_result_order_type(m_identifier, edge_name, order_type);
-}
-
 hailo_status ConfiguredNetworkGroupClient::set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size)
 {
     return m_client->ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(m_identifier, edge_name, max_accumulated_mask_size);
 }
 
 // TODO: support kv-cache over service (HRT-13968)
-hailo_status ConfiguredNetworkGroupClient::init_cache(uint32_t /* read_offset */, int32_t /* write_offset_delta */)
+hailo_status ConfiguredNetworkGroupClient::init_cache(uint32_t /* read_offset */)
 {
     return HAILO_NOT_IMPLEMENTED;
 }
diff --git a/hailort/libhailort/src/service/network_group_client.hpp b/hailort/libhailort/src/service/network_group_client.hpp
index fd632fc..70303ca 100644
--- a/hailort/libhailort/src/service/network_group_client.hpp
+++ b/hailort/libhailort/src/service/network_group_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -34,8 +34,8 @@ public:
     ConfiguredNetworkGroupClient &operator=(ConfiguredNetworkGroupClient &&other) = delete;
     ConfiguredNetworkGroupClient(ConfiguredNetworkGroupClient &&other) = delete;
 
-    virtual const std::string &get_network_group_name() const override;
-    virtual const std::string &name() const override;
+    virtual const std::string& get_network_group_name() const override;
+    virtual const std::string& name() const override;
     virtual Expected<hailo_stream_interface_t> get_default_streams_interface() override;
     virtual std::vector<std::reference_wrapper<InputStream>> get_input_streams_by_interface(hailo_stream_interface_t stream_interface) override;
     virtual std::vector<std::reference_wrapper<OutputStream>> get_output_streams_by_interface(hailo_stream_interface_t stream_interface) override;
@@ -88,7 +88,7 @@ public:
 
     virtual Expected<std::vector<InputVStream>> create_input_vstreams(const std::map<std::string, hailo_vstream_params_t> &inputs_params);
     virtual Expected<std::vector<OutputVStream>> create_output_vstreams(const std::map<std::string, hailo_vstream_params_t> &outputs_params);
-    virtual Expected<size_t> get_min_buffer_pool_size() override;
+    virtual Expected<size_t> infer_queue_size() const override;
 
     virtual hailo_status before_fork() override;
     virtual hailo_status after_fork_in_parent() override;
@@ -122,10 +122,9 @@ public:
     virtual hailo_status set_nms_iou_threshold(const std::string &edge_name, float32_t iou_threshold) override;
     virtual hailo_status set_nms_max_bboxes_per_class(const std::string &edge_name, uint32_t max_bboxes_per_class) override;
     virtual hailo_status set_nms_max_bboxes_total(const std::string &edge_name, uint32_t max_bboxes_total) override;
-    virtual hailo_status set_nms_result_order_type(const std::string &edge_name, hailo_nms_result_order_type_t order_type) override;
     virtual hailo_status set_nms_max_accumulated_mask_size(const std::string &edge_name, uint32_t max_accumulated_mask_size) override;
 
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual hailo_status init_cache(uint32_t read_offset) override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
diff --git a/hailort/libhailort/src/service/rpc_client_utils.hpp b/hailort/libhailort/src/service/rpc_client_utils.hpp
index a6363cb..b02e9f9 100644
--- a/hailort/libhailort/src/service/rpc_client_utils.hpp
+++ b/hailort/libhailort/src/service/rpc_client_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.cpp b/hailort/libhailort/src/stream_common/async_stream_base.cpp
index 059f407..504a248 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.cpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -184,6 +184,9 @@ hailo_status AsyncInputStreamBase::activate_stream()
 {
     std::unique_lock<std::mutex> lock(m_stream_mutex);
 
+    // Clear old abort state
+    m_is_aborted = false;
+
     auto status = activate_stream_impl();
     CHECK_SUCCESS(status);
 
@@ -230,6 +233,13 @@ hailo_status AsyncInputStreamBase::call_write_async_impl(TransferRequest &&trans
         {
             std::lock_guard<std::mutex> lock(m_stream_mutex);
             m_ongoing_transfers--;
+
+            if (HAILO_SUCCESS != callback_status) {
+                if (m_is_stream_activated) {
+                    // Need to abort only if we are active!
+                    m_is_aborted = true;
+                }
+            }
         }
 
         m_has_ready_buffer.notify_all();
@@ -329,6 +339,13 @@ hailo_status AsyncOutputStreamBase::call_read_async_impl(TransferRequest &&trans
         {
             std::lock_guard<std::mutex> lock(m_stream_mutex);
             m_ongoing_transfers--;
+
+            if (HAILO_SUCCESS != callback_status) {
+                if (m_is_stream_activated) {
+                    // Need to abort only if we are active!
+                    m_is_aborted = true;
+                }
+            }
         }
 
         m_has_ready_buffer.notify_all();
@@ -349,6 +366,9 @@ hailo_status AsyncOutputStreamBase::activate_stream()
 {
     std::unique_lock<std::mutex> lock(m_stream_mutex);
 
+    // Clear old abort state
+    m_is_aborted = false;
+
     auto status = activate_stream_impl();
     CHECK_SUCCESS(status);
 
diff --git a/hailort/libhailort/src/stream_common/async_stream_base.hpp b/hailort/libhailort/src/stream_common/async_stream_base.hpp
index 0b8fda5..2deccdf 100644
--- a/hailort/libhailort/src/stream_common/async_stream_base.hpp
+++ b/hailort/libhailort/src/stream_common/async_stream_base.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/nms_stream.cpp b/hailort/libhailort/src/stream_common/nms_stream.cpp
index 23490fe..704abb0 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.cpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/nms_stream.hpp b/hailort/libhailort/src/stream_common/nms_stream.hpp
index 786ff70..50d121f 100644
--- a/hailort/libhailort/src/stream_common/nms_stream.hpp
+++ b/hailort/libhailort/src/stream_common/nms_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
index b47a12d..8c32ba4 100644
--- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
index b7611ac..d4e2407 100644
--- a/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/stream_common/queued_stream_buffer_pool.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.cpp b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
index 1738b44..9f0c820 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.cpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/remote_process_stream.hpp b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
index 7b69914..60f1a1d 100644
--- a/hailort/libhailort/src/stream_common/remote_process_stream.hpp
+++ b/hailort/libhailort/src/stream_common/remote_process_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/stream.cpp b/hailort/libhailort/src/stream_common/stream.cpp
index 15304a2..c14c8fa 100644
--- a/hailort/libhailort/src/stream_common/stream.cpp
+++ b/hailort/libhailort/src/stream_common/stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp b/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
index 625c713..a1faebe 100644
--- a/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/stream_common/stream_buffer_pool.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/stream_internal.cpp b/hailort/libhailort/src/stream_common/stream_internal.cpp
index 0eb70e3..c300003 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.cpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/stream_common/stream_internal.hpp b/hailort/libhailort/src/stream_common/stream_internal.hpp
index 9e4028a..1ee6e10 100644
--- a/hailort/libhailort/src/stream_common/stream_internal.hpp
+++ b/hailort/libhailort/src/stream_common/stream_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/transform/eigen.hpp b/hailort/libhailort/src/transform/eigen.hpp
index 15dc901..a6a5181 100644
--- a/hailort/libhailort/src/transform/eigen.hpp
+++ b/hailort/libhailort/src/transform/eigen.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/transform/transform.cpp b/hailort/libhailort/src/transform/transform.cpp
index 72c6d3b..bb4e228 100644
--- a/hailort/libhailort/src/transform/transform.cpp
+++ b/hailort/libhailort/src/transform/transform.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -107,7 +107,6 @@ bool TransformContextUtils::should_reorder(const hailo_3d_image_shape_t &src_ima
             } else {
                 return true;
             }
-        case HAILO_FORMAT_ORDER_HAILO_NMS:
         case HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS:
         case HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE:
             return true;
@@ -1928,20 +1927,20 @@ Expected<std::unique_ptr<OutputTransformContext>> NMSOutputTransformContext::cre
 
     const auto internal_dst_format = HailoRTDefaults::expand_auto_format(dst_format, src_format);
 
-    CHECK_AS_EXPECTED((HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS == internal_dst_format.order) || (HAILO_FORMAT_ORDER_HAILO_NMS == internal_dst_format.order),
+    CHECK_AS_EXPECTED(HailoRTCommon::is_nms_by_class(internal_dst_format.order),
         HAILO_INVALID_ARGUMENT, "Format order should be HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS");
 
     CHECK_AS_EXPECTED(HAILO_FORMAT_TYPE_FLOAT32 == internal_dst_format.type, HAILO_INVALID_ARGUMENT,
         "Format type of HAILO_FORMAT_TYPE_FLOAT32");
 
     const auto src_frame_size = HailoRTCommon::get_nms_hw_frame_size(nms_info);
-    auto dst_frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, internal_dst_format);
+    auto dst_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(nms_info, internal_dst_format);
 
     Buffer quant_buffer;
     auto should_quantize = TransformContextUtils::should_quantize(HAILO_D2H_STREAM, src_format, internal_dst_format);
     CHECK_EXPECTED(should_quantize);
     if (*should_quantize) {
-        dst_frame_size = HailoRTCommon::get_nms_host_frame_size(nms_info, internal_dst_format);
+        dst_frame_size = HailoRTCommon::get_nms_by_class_host_frame_size(nms_info, internal_dst_format);
         auto expected_nms_quant_buffer = Buffer::create(dst_frame_size, 0);
         CHECK_EXPECTED(expected_nms_quant_buffer);
         quant_buffer = expected_nms_quant_buffer.release();
@@ -1979,12 +1978,12 @@ hailo_status NMSOutputTransformContext::transform(const MemoryView src, MemoryVi
     CHECK(dst.size() == m_dst_frame_size, HAILO_INVALID_ARGUMENT,
         "dst_size must be {}. passed size - {}", m_dst_frame_size, dst.size());
 
-    CHECK(((HAILO_FORMAT_ORDER_HAILO_NMS == m_dst_format.order)|| (HAILO_FORMAT_ORDER_HAILO_NMS_BY_CLASS == m_dst_format.order)),
+    CHECK(HailoRTCommon::is_nms_by_class(m_dst_format.order),
         HAILO_INVALID_ARGUMENT, "Wrong format order {}", HailoRTCommon::get_format_order_str(m_dst_format.order));
 
     assert(HAILO_FORMAT_ORDER_HAILO_NMS_ON_CHIP == m_src_format.order);
 
-    auto shape_size = HailoRTCommon::get_nms_host_shape_size(m_nms_info);
+    auto shape_size = HailoRTCommon::get_nms_by_class_host_shape_size(m_nms_info);
 
     if ((HAILO_FORMAT_FLAGS_TRANSPOSED & m_src_format.flags) || (HAILO_FORMAT_FLAGS_TRANSPOSED & m_dst_format.flags)) {
         LOGGER__ERROR("NMS doesn't support transposed format");
diff --git a/hailort/libhailort/src/transform/transform_internal.hpp b/hailort/libhailort/src/transform/transform_internal.hpp
index be5d7e1..5f28add 100644
--- a/hailort/libhailort/src/transform/transform_internal.hpp
+++ b/hailort/libhailort/src/transform/transform_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/CMakeLists.txt b/hailort/libhailort/src/utils/CMakeLists.txt
index a299800..f68525c 100644
--- a/hailort/libhailort/src/utils/CMakeLists.txt
+++ b/hailort/libhailort/src/utils/CMakeLists.txt
@@ -9,6 +9,8 @@ set(SRC_FILES
     ${CMAKE_CURRENT_SOURCE_DIR}/sensor_config_utils.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/soc_utils/partial_cluster_reader.cpp
     ${CMAKE_CURRENT_SOURCE_DIR}/measurement_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/query_stats_utils.cpp
+    ${CMAKE_CURRENT_SOURCE_DIR}/pool_allocator.cpp
 )
 
 add_subdirectory(profiler)
diff --git a/hailort/libhailort/src/utils/buffer.cpp b/hailort/libhailort/src/utils/buffer.cpp
index fd248b2..9e27419 100644
--- a/hailort/libhailort/src/utils/buffer.cpp
+++ b/hailort/libhailort/src/utils/buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -145,6 +145,14 @@ Expected<Buffer> Buffer::create(BufferStoragePtr storage, bool register_storage
     return Buffer(std::move(storage_impl));
 }
 
+Expected<BufferPtr> Buffer::create_shared(BufferStoragePtr storage, bool register_storage)
+{
+    TRY(auto buffer, create(storage, register_storage));
+    auto buffer_ptr = make_shared_nothrow<Buffer>(std::move(buffer));
+    CHECK_NOT_NULL_AS_EXPECTED(buffer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+    return buffer_ptr;
+}
+
 Expected<Buffer> Buffer::copy() const
 {
     return Buffer::create(m_data, m_size);
@@ -332,6 +340,11 @@ MemoryView::MemoryView(void *data, size_t size) noexcept :
     m_size(size)
 {}
 
+MemoryView::MemoryView(const std::string &data) noexcept :
+    m_data(const_cast<char *>(data.data())),
+    m_size(data.size())
+{}
+
 const MemoryView MemoryView::create_const(const void *data, size_t size) noexcept
 {
     return MemoryView(const_cast<void *>(data), size);
diff --git a/hailort/libhailort/src/utils/buffer_storage.cpp b/hailort/libhailort/src/utils/buffer_storage.cpp
index 35fcd53..f3aad61 100644
--- a/hailort/libhailort/src/utils/buffer_storage.cpp
+++ b/hailort/libhailort/src/utils/buffer_storage.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -255,4 +255,56 @@ Expected<std::string> SharedMemoryStorage::shm_name()
     return m_shm_buffer->shm_name();
 }
 
+PooledBufferStorage::PooledBufferStorage(BufferPtr buffer, BasicBufferPoolPtr buffer_pool) :
+    m_buffer_pool(buffer_pool),
+    m_buffer(buffer)
+{}
+
+PooledBufferStorage::~PooledBufferStorage()
+{
+    auto status = m_buffer_pool->return_to_pool(m_buffer);
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to return buffer to pool: {}", status);
+    }
+}
+
+size_t PooledBufferStorage::size() const
+{
+    return m_buffer->size();
+}
+
+void *PooledBufferStorage::user_address()
+{
+    return m_buffer->data();
+}
+
+Expected<void*> PooledBufferStorage::release() noexcept
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
+DmaMappedBufferStorage::DmaMappedBufferStorage(Buffer buffer, vdma::MappedBufferPtr mapped_buffer) :
+    m_buffer(std::move(buffer)), m_mapped_buffer(mapped_buffer)
+{}
+
+DmaMappedBufferStorage::DmaMappedBufferStorage(DmaMappedBufferStorage&& other) noexcept :
+    BufferStorage(std::move(other)), m_buffer(std::move(other.m_buffer)),
+    m_mapped_buffer(std::move(other.m_mapped_buffer))
+{}
+
+size_t DmaMappedBufferStorage::size() const
+{
+    return m_buffer.size();
+}
+
+void *DmaMappedBufferStorage::user_address()
+{
+    return m_buffer.data();
+}
+
+Expected<void*> DmaMappedBufferStorage::release() noexcept
+{
+    return make_unexpected(HAILO_INVALID_OPERATION);
+}
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/buffer_storage.hpp b/hailort/libhailort/src/utils/buffer_storage.hpp
index 2b438ce..f50269b 100644
--- a/hailort/libhailort/src/utils/buffer_storage.hpp
+++ b/hailort/libhailort/src/utils/buffer_storage.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -14,10 +14,12 @@
 #include "hailo/expected.hpp"
 #include "hailo/buffer.hpp"
 
+#include "common/buffer_pool.hpp"
 #include "common/shared_memory_buffer.hpp"
 
 #include "utils/exported_resource_manager.hpp"
 #include "vdma/memory/continuous_buffer.hpp"
+#include "vdma/memory/mapped_buffer.hpp"
 
 #include <memory>
 #include <cstdint>
@@ -202,6 +204,46 @@ private:
     SharedMemoryBufferPtr m_shm_buffer;
 };
 
+class PooledBufferStorage : public BufferStorage
+{
+public:
+    ~PooledBufferStorage();
+
+    PooledBufferStorage(BufferPtr buffer, BasicBufferPoolPtr buffer_pool);
+    PooledBufferStorage(PooledBufferStorage&& other) noexcept;
+    PooledBufferStorage(const PooledBufferStorage &) = delete;
+    PooledBufferStorage &operator=(PooledBufferStorage &&) = delete;
+    PooledBufferStorage &operator=(const PooledBufferStorage &) = delete;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void*> release() noexcept override;
+
+private:
+    BasicBufferPoolPtr m_buffer_pool;
+    BufferPtr m_buffer;
+};
+
+class DmaMappedBufferStorage : public BufferStorage
+{
+public:
+    virtual ~DmaMappedBufferStorage() = default;
+
+    DmaMappedBufferStorage(Buffer buffer, vdma::MappedBufferPtr mapped_buffer);
+    DmaMappedBufferStorage(DmaMappedBufferStorage&& other) noexcept;
+    DmaMappedBufferStorage(const DmaMappedBufferStorage &) = delete;
+    DmaMappedBufferStorage &operator=(DmaMappedBufferStorage &&) = delete;
+    DmaMappedBufferStorage &operator=(const DmaMappedBufferStorage &) = delete;
+
+    virtual size_t size() const override;
+    virtual void *user_address() override;
+    virtual Expected<void*> release() noexcept override;
+
+private:
+    Buffer m_buffer;
+    vdma::MappedBufferPtr m_mapped_buffer;
+};
+
 } /* namespace hailort */
 
 #endif /* _HAILO_BUFFER_STORAGE_HPP_ */
diff --git a/hailort/libhailort/src/utils/dma_buffer_utils.hpp b/hailort/libhailort/src/utils/dma_buffer_utils.hpp
index 1a22bc7..4aca27f 100644
--- a/hailort/libhailort/src/utils/dma_buffer_utils.hpp
+++ b/hailort/libhailort/src/utils/dma_buffer_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/exported_resource_manager.hpp b/hailort/libhailort/src/utils/exported_resource_manager.hpp
index 3222700..c8a3bde 100644
--- a/hailort/libhailort/src/utils/exported_resource_manager.hpp
+++ b/hailort/libhailort/src/utils/exported_resource_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/hailort_common.cpp b/hailort/libhailort/src/utils/hailort_common.cpp
index 35e1aa9..9b36851 100644
--- a/hailort/libhailort/src/utils/hailort_common.cpp
+++ b/hailort/libhailort/src/utils/hailort_common.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -21,6 +21,20 @@ const uint32_t HailoRTCommon::MAX_NMS_BURST_SIZE;
 const size_t HailoRTCommon::DMA_ABLE_ALIGNMENT_WRITE_HW_LIMITATION;
 const size_t HailoRTCommon::DMA_ABLE_ALIGNMENT_READ_HW_LIMITATION;
 
+// TODO: HRT-15885 - remove this function
+uint32_t HailoRTCommon::get_nms_host_shape_size(const hailo_nms_info_t &nms_info)
+{
+    LOGGER__WARNING("get_nms_host_shape_size is deprecated, use get_nms_by_class_host_shape_size instead");
+    return get_nms_by_class_host_shape_size(nms_info);
+}
+
+// TODO: HRT-15885 - remove this function
+uint32_t HailoRTCommon::get_nms_host_shape_size(const hailo_nms_shape_t &nms_shape)
+{
+    LOGGER__WARNING("get_nms_host_shape_size is deprecated, use get_nms_by_class_host_shape_size instead");
+    return get_nms_by_class_host_shape_size(nms_shape);
+}
+
 Expected<hailo_device_id_t> HailoRTCommon::to_device_id(const std::string &device_id)
 {
     hailo_device_id_t id = {};
@@ -54,7 +68,7 @@ uint32_t HailoRTCommon::get_nms_host_frame_size(const hailo_nms_shape_t &nms_sha
     } else if (HAILO_FORMAT_ORDER_HAILO_NMS_BY_SCORE == format.order) {
         frame_size = get_nms_by_score_host_frame_size(nms_shape);
     } else {
-        auto shape_size = get_nms_host_shape_size(nms_shape);
+        auto shape_size = get_nms_by_class_host_shape_size(nms_shape);
         frame_size =  shape_size * get_format_data_bytes(format);
     }
     if (frame_size < UINT32_MAX) {
diff --git a/hailort/libhailort/src/utils/hailort_logger.cpp b/hailort/libhailort/src/utils/hailort_logger.cpp
index edbd7da..0828850 100644
--- a/hailort/libhailort/src/utils/hailort_logger.cpp
+++ b/hailort/libhailort/src/utils/hailort_logger.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/hailort_logger.hpp b/hailort/libhailort/src/utils/hailort_logger.hpp
index 9cc420c..f3ee260 100644
--- a/hailort/libhailort/src/utils/hailort_logger.hpp
+++ b/hailort/libhailort/src/utils/hailort_logger.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/measurement_utils.cpp b/hailort/libhailort/src/utils/measurement_utils.cpp
index 287b460..bd637c9 100644
--- a/hailort/libhailort/src/utils/measurement_utils.cpp
+++ b/hailort/libhailort/src/utils/measurement_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/measurement_utils.hpp b/hailort/libhailort/src/utils/measurement_utils.hpp
index 2c4044d..f677417 100644
--- a/hailort/libhailort/src/utils/measurement_utils.hpp
+++ b/hailort/libhailort/src/utils/measurement_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/measurement_utils_internal.hpp b/hailort/libhailort/src/utils/measurement_utils_internal.hpp
index a93e06b..4a737ae 100644
--- a/hailort/libhailort/src/utils/measurement_utils_internal.hpp
+++ b/hailort/libhailort/src/utils/measurement_utils_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/pool_allocator.cpp b/hailort/libhailort/src/utils/pool_allocator.cpp
new file mode 100644
index 0000000..d58f218
--- /dev/null
+++ b/hailort/libhailort/src/utils/pool_allocator.cpp
@@ -0,0 +1,37 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pool_allocator.cpp
+ * @brief Pool allocator
+ **/
+
+#include "pool_allocator.hpp"
+
+namespace hailort
+{
+
+Expected<std::shared_ptr<PoolAllocator>> PoolAllocator::create_shared(size_t pool_size, size_t buffer_size, std::function<Expected<Buffer>(size_t)> allocate_func)
+{
+    TRY(auto buffer_pool, BasicBufferPool::create_shared(buffer_size, pool_size, allocate_func));
+
+    auto allocator = make_shared_nothrow<PoolAllocator>(buffer_pool);
+    CHECK_NOT_NULL(allocator, HAILO_OUT_OF_HOST_MEMORY);
+    return allocator;
+}
+
+PoolAllocator::PoolAllocator(BasicBufferPoolPtr buffer_pool) : m_buffer_pool(buffer_pool) {}
+
+Expected<BufferPtr> PoolAllocator::allocate()
+{
+    std::lock_guard<std::mutex> lock(m_mutex);
+    TRY(auto buffer, m_buffer_pool->acquire_buffer());
+
+    auto pooled_buffer_storage = make_shared_nothrow<PooledBufferStorage>(buffer, m_buffer_pool);
+    CHECK_NOT_NULL(pooled_buffer_storage, HAILO_OUT_OF_HOST_MEMORY);
+
+    return Buffer::create_shared(pooled_buffer_storage, false);
+}
+
+} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/utils/pool_allocator.hpp b/hailort/libhailort/src/utils/pool_allocator.hpp
new file mode 100644
index 0000000..2b659af
--- /dev/null
+++ b/hailort/libhailort/src/utils/pool_allocator.hpp
@@ -0,0 +1,36 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file pool_allocator.hpp
+ * @brief Pool allocator
+ **/
+
+#ifndef _HAILO_POOL_ALLOCATOR_HPP_
+#define _HAILO_POOL_ALLOCATOR_HPP_
+
+#include "buffer_storage.hpp"
+#include "common/buffer_pool.hpp"
+
+#include <memory>
+
+namespace hailort
+{
+
+class PoolAllocator final
+{
+public:
+    static Expected<std::shared_ptr<PoolAllocator>> create_shared(size_t pool_size, size_t buffer_size, std::function<Expected<Buffer>(size_t)> allocate_func);
+
+    PoolAllocator(BasicBufferPoolPtr buffer_pool);
+    Expected<BufferPtr> allocate();
+
+private:
+    BasicBufferPoolPtr m_buffer_pool;
+    std::mutex m_mutex;
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_POOL_ALLOCATOR_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/utils/profiler/handler.hpp b/hailort/libhailort/src/utils/profiler/handler.hpp
index 1cd04a8..0b5aee5 100644
--- a/hailort/libhailort/src/utils/profiler/handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/handler.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
index 88be045..4779b54 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -11,11 +11,30 @@
 
 #include "common/logger_macros.hpp"
 #include "common/os_utils.hpp"
+#include "common/env_vars.hpp"
 
 namespace hailort
 {
 MonitorHandler::MonitorHandler()
-{}
+{
+    auto env_val = get_env_variable(SCHEDULER_MON_TIME_INTERVAL_IN_MILLISECONDS_ENV_VAR);
+    if (HAILO_SUCCESS == env_val.status()) {
+
+        std::stringstream ss(env_val.value());
+        int env_val_int = 0;
+        ss >> env_val_int;
+
+        // Check if the entire string was consumed and there were no errors
+        if (ss.fail() || !ss.eof()) {
+            LOGGER__WARNING("Failed to convert HAILO_MONITOR_TIME_INTERVAL env var to int, using default value {} sec", DEFAULT_SCHEDULER_MON_INTERVAL.count());
+            m_mon_interval = DEFAULT_SCHEDULER_MON_INTERVAL;
+            return;
+        }
+        m_mon_interval = std::chrono::milliseconds(static_cast<int>(env_val_int));
+    } else {
+        m_mon_interval = DEFAULT_SCHEDULER_MON_INTERVAL;
+    }
+}
 
 MonitorHandler::~MonitorHandler()
 {
@@ -188,10 +207,14 @@ hailo_status MonitorHandler::start_mon(const std::string &unique_vdevice_hash)
     CHECK_EXPECTED_AS_STATUS(tmp_file);
     m_mon_tmp_output = tmp_file.release();
 
+    auto tmp_nnc_utilization_file_exp = open_temp_nnc_utilization_file();
+    CHECK_EXPECTED_AS_STATUS(tmp_nnc_utilization_file_exp);
+    m_nnc_utilization_tmp_output = tmp_nnc_utilization_file_exp.release();
+
     m_mon_thread = std::thread([this] ()
     {
         while (true) {
-            auto status = m_mon_shutdown_event->wait(DEFAULT_SCHEDULER_MON_INTERVAL);
+            auto status = m_mon_shutdown_event->wait(m_mon_interval);
             if (HAILO_TIMEOUT == status) {
                 dump_state();
             } else if (HAILO_SUCCESS == status) {
@@ -229,6 +252,33 @@ Expected<std::shared_ptr<TempFile>> MonitorHandler::open_temp_mon_file()
     return tmp_file_ptr;
 }
 
+Expected<std::shared_ptr<TempFile>> MonitorHandler::open_temp_nnc_utilization_file()
+{
+    auto tmp_file = TempFile::create(NNC_UTILIZATION_FILE_NAME, NNC_UTILIZATION_TMP_DIR);
+    CHECK_EXPECTED(tmp_file);
+
+    auto tmp_file_ptr = make_shared_nothrow<TempFile>(tmp_file.release());
+    CHECK_AS_EXPECTED(nullptr != tmp_file_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+    return tmp_file_ptr;
+}
+
+void MonitorHandler::write_utilization_to_file(const double utilization_percentage)
+{
+    auto locked_file = LockedFile::create(m_nnc_utilization_tmp_output->name(), "w");
+    if (locked_file.status() != HAILO_SUCCESS) {
+        LOGGER__ERROR("Failed to open and lock file {}, with status: {}", m_nnc_utilization_tmp_output->name(), locked_file.status());
+        return;
+    }
+
+    std::string utilization_percentage_str = std::to_string(utilization_percentage) + "\n";
+    auto ret = write(locked_file->get_fd(), utilization_percentage_str.c_str(), utilization_percentage_str.size());
+    if (-1 == ret) {
+        LOGGER__ERROR("Failed to write nnc utilization file, errno={}", errno);
+        return;
+    }
+}
+
 void MonitorHandler::dump_state()
 {
     auto file = LockedFile::create(m_mon_tmp_output->name(), "w");
@@ -270,6 +320,9 @@ void MonitorHandler::log_monitor_device_infos(ProtoMon &mon)
     for (auto const &device_info_pair : m_devices_info) {
         auto curr_device_utilization = device_info_pair.second.device_utilization_duration;
         auto utilization_percentage = ((curr_device_utilization * 100) /  m_last_measured_time_duration);
+#if defined(__GNUC__)
+        write_utilization_to_file(utilization_percentage);
+#endif
 
         auto device_infos = mon.add_device_infos();
         device_infos->set_device_id(device_info_pair.second.device_id);
diff --git a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
index 1247549..7ec7c2a 100644
--- a/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/monitor_handler.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,7 +44,10 @@
 namespace hailort
 {
 
-#define SCHEDULER_MON_TMP_DIR ("/tmp/hmon_files/")
+#define SCHEDULER_MON_TMP_DIR "/tmp/hmon_files/"
+#define NNC_UTILIZATION_TMP_DIR "/tmp/nnc_utilization/"
+#define NNC_UTILIZATION_FILE_NAME "nnc_utilization"
+#define NNC_UTILIZATION_FILE_PATH (NNC_UTILIZATION_TMP_DIR NNC_UTILIZATION_FILE_NAME)
 #define DEFAULT_SCHEDULER_MON_INTERVAL (std::chrono::seconds(1))
 #define SCHEDULER_MON_NAN_VAL (-1)
 
@@ -179,6 +182,8 @@ private:
     hailo_status start_mon(const std::string &unique_vdevice_hash);
 #if defined(__GNUC__)
     Expected<std::shared_ptr<TempFile>> open_temp_mon_file();
+    Expected<std::shared_ptr<TempFile>> open_temp_nnc_utilization_file();
+    void write_utilization_to_file(const double utilization_percentage);
     void dump_state();
 #endif
     void time_dependent_events_cycle_calc();
@@ -198,6 +203,7 @@ private:
     EventPtr m_mon_shutdown_event;
 #if defined(__GNUC__)
     std::shared_ptr<TempFile> m_mon_tmp_output;
+    std::shared_ptr<TempFile> m_nnc_utilization_tmp_output;
 #endif
     std::chrono::time_point<std::chrono::steady_clock> m_last_measured_timestamp;
     double m_last_measured_time_duration;
@@ -205,6 +211,7 @@ private:
     std::unordered_map<scheduler_core_op_handle_t, CoreOpInfo> m_core_ops_info;
     std::unordered_map<device_id_t, DeviceInfo> m_devices_info;
     std::string m_unique_vdevice_hash; // only one vdevice is allowed at a time. vdevice will be unregistered in its destruction.
+    std::chrono::milliseconds m_mon_interval;
 };
 }
 
diff --git a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
index 584fbed..d96991c 100644
--- a/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
+++ b/hailort/libhailort/src/utils/profiler/profiler_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
index cabf38a..cc54ca3 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
index 1bbae6d..e441a6b 100644
--- a/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
+++ b/hailort/libhailort/src/utils/profiler/scheduler_profiler_handler.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/profiler/tracer.cpp b/hailort/libhailort/src/utils/profiler/tracer.cpp
index 7fa77ff..d6a7995 100644
--- a/hailort/libhailort/src/utils/profiler/tracer.cpp
+++ b/hailort/libhailort/src/utils/profiler/tracer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/profiler/tracer.hpp b/hailort/libhailort/src/utils/profiler/tracer.hpp
index 4b6634e..c5e480b 100644
--- a/hailort/libhailort/src/utils/profiler/tracer.hpp
+++ b/hailort/libhailort/src/utils/profiler/tracer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -14,6 +14,7 @@
 
 #include "scheduler_profiler_handler.hpp"
 #include "monitor_handler.hpp"
+#include "utils/query_stats_utils.hpp"
 namespace hailort
 {
 class Tracer
diff --git a/hailort/libhailort/src/utils/profiler/tracer_macros.hpp b/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
index 5b58186..bb71032 100644
--- a/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
+++ b/hailort/libhailort/src/utils/profiler/tracer_macros.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/query_stats_utils.cpp b/hailort/libhailort/src/utils/query_stats_utils.cpp
new file mode 100644
index 0000000..2e53ebd
--- /dev/null
+++ b/hailort/libhailort/src/utils/query_stats_utils.cpp
@@ -0,0 +1,316 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file query_stats_utils.cpp
+ * @brief Stats query utils module implementation
+ **/
+
+#include "hailo/hailort.h"
+#include "common/logger_macros.hpp"
+#include "common/process.hpp"
+#include "common/filesystem.hpp"
+#include "utils/query_stats_utils.hpp"
+#include "utils/profiler/tracer_macros.hpp"
+#include "common/env_vars.hpp"
+
+#include <iostream>
+#include <fstream>
+#include <sstream>
+#include <string>
+#include <thread>
+#include <chrono>
+#include <inttypes.h>
+#include <regex>
+#include <cstdint>
+
+namespace hailort {
+
+// Platform-specific macros for popen and pclose
+#ifdef _WIN32
+#define popen _popen
+#define pclose _pclose
+#endif
+
+#define MEM_INFO_PATH ("/proc/meminfo")
+#define CPU_INFO_PATH ("/proc/stat")
+#define PERFORMANCE_QUERY_SAMPLING_TIME_WINDOW (std::chrono::milliseconds(100))
+#define MAX_COMMAND_OUTPUT_LENGTH (UINT32_MAX)
+#define HAILO_NOC_PERF_FILE_PATH "/etc/hailo_noc_perf.sh"
+#define HAILO_NOC_MEASURE_OUTPUT_FILE_PATH "/etc/hailo_noc_perf.sh"
+
+
+Expected<float32_t> QueryStatsUtils::calculate_cpu_utilization()
+{
+    // First sample
+    uint64_t user1, nice1, system1, idle1, iowait1, irq1, softirq1, steal1;
+    auto status = parse_cpu_stats(user1, nice1, system1, idle1, iowait1, irq1, softirq1, steal1);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    std::this_thread::sleep_for(PERFORMANCE_QUERY_SAMPLING_TIME_WINDOW);
+
+    // Second sample
+    uint64_t user2, nice2, system2, idle2, iowait2, irq2, softirq2, steal2;
+    status = parse_cpu_stats(user2, nice2, system2, idle2, iowait2, irq2, softirq2, steal2);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    // Calculate deltas
+    uint64_t total1 = user1 + nice1 + system1 + idle1 + iowait1 + irq1 + softirq1 + steal1;
+    uint64_t total2 = user2 + nice2 + system2 + idle2 + iowait2 + irq2 + softirq2 + steal2;
+    uint64_t totalDelta = total2 - total1;
+
+    uint64_t idleDelta = (idle2 + iowait2) - (idle1 + iowait1);
+
+    // Calculate utilization percentage
+    float32_t utilization = 10 * (static_cast<float32_t>(totalDelta - idleDelta) / static_cast<float32_t>(totalDelta)) * static_cast<float32_t>(100.0);
+    return utilization;
+}
+
+
+// Function parses the first line of /proc/stat
+hailo_status QueryStatsUtils::parse_cpu_stats(uint64_t &user, uint64_t &nice, uint64_t &system, uint64_t &idle,
+    uint64_t &iowait, uint64_t &irq, uint64_t &softirq, uint64_t &steal)
+{
+    std::ifstream procStat(CPU_INFO_PATH);
+    if (!procStat.is_open()) {
+        LOGGER__ERROR("Error: Unable to open {}", CPU_INFO_PATH);
+        return HAILO_OPEN_FILE_FAILURE;
+    }
+
+    std::string line;
+    char cpuLabel[16];
+
+    getline(procStat, line); // Read the first line (starts with "cpu")
+    int matches = sscanf(line.c_str(), "%s %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64
+                                    " %" PRIu64 " %" PRIu64 " %" PRIu64 " %" PRIu64,
+                         cpuLabel, &user, &nice, &system, &idle, &iowait, &irq, &softirq, &steal);
+
+    procStat.close();
+    if (matches != 9 || std::string(cpuLabel).substr(0, 3) != "cpu") {
+        LOGGER__ERROR("Error: Failed to parse CPU stats from {}", CPU_INFO_PATH);
+        return HAILO_INTERNAL_FAILURE;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::tuple<int64_t, int64_t>> QueryStatsUtils::calculate_ram_sizes()
+{
+    // function is based on Linux 'free' command
+    int64_t total_ram = -1;
+    int64_t used_ram = -1;
+    const auto output = run_command("free");
+    CHECK_EXPECTED(output);
+
+    std::istringstream stream(output.value().second);
+
+    std::string label;
+    long long total, used, freeMem, shared, buffCache, available;
+    // Parse the output, searching for the line that starts with "Mem:"
+    while (stream >> label) {
+        if (label == "Mem:") {
+            if (stream >> total >> used >> freeMem >> shared >> buffCache >> available) {
+                total_ram = static_cast<int64_t>(total);
+                used_ram = static_cast<int64_t>(used);
+            }
+            break;
+        }
+    }
+
+    if (total_ram == -1 || used_ram == -1) {
+        LOGGER__ERROR("Error: Failed to parse RAM stats from 'free' command");
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    return std::make_tuple(total_ram, used_ram);
+}
+
+std::string QueryStatsUtils::get_sampling_time_window_as_string()
+{
+    std::ostringstream oss;
+    oss.precision(1);  // Set precision to 1 decimal place
+    oss << std::fixed << std::chrono::duration<double>(PERFORMANCE_QUERY_SAMPLING_TIME_WINDOW).count();
+    return oss.str();
+}
+
+Expected<int32_t> QueryStatsUtils::get_dsp_utilization()
+{
+    std::string delay_str = get_sampling_time_window_as_string();
+
+    const std::string dsp_utilization_command = "dsp-utilization -i 1 -b --delay " + delay_str;
+
+    const auto output = run_command(dsp_utilization_command);
+    CHECK_EXPECTED(output);
+
+    // Use regex to extract the percentage value (e.g., %15)
+    std::regex percentageRegex(R"((\d+)%)");
+    std::smatch match;
+
+    if (!regex_search(output.value().second, match, percentageRegex)) {
+        LOGGER__ERROR("Error: No percentage found in output of '{}' command", dsp_utilization_command);
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+    return stoi(match[1]);
+}
+
+Expected<std::vector<ddr_noc_row_data_t>> QueryStatsUtils::read_ddr_noc_output_file(const std::string &filename)
+{
+    LOGGER__INFO("Opening ddr_noc file output in path: {}", filename);
+    std::ifstream file(filename);
+    std::vector<ddr_noc_row_data_t> data;
+
+    if (!file.is_open()) {
+        LOGGER__ERROR("Error: Could not open file {}", filename);
+        return make_unexpected(HAILO_OPEN_FILE_FAILURE);
+    }
+
+    std::string line;
+    bool headerSkipped = false;
+
+    while (std::getline(file, line)) {
+        if (!headerSkipped) {
+            headerSkipped = true; // Skip the header line
+            continue;
+        }
+
+        std::istringstream ss(line);
+        ddr_noc_row_data_t row;
+        int index;
+        std::string note;
+
+        ss >> index >> row.time >> row.counter0 >> row.counter1 >> row.counter2;
+        data.push_back(row);
+    }
+
+    file.close();
+    return data;
+}
+
+int32_t QueryStatsUtils::calculate_ddr_noc_data_per_second(const std::vector<ddr_noc_row_data_t> &data, int ddr_noc_row_data_t::*member,
+    const float32_t duration)
+{
+    double sum = 0.0;
+    for (const auto &row : data) {
+        sum += row.*member;
+    }
+    return static_cast<int32_t>(data.empty() ? 0.0 : sum / static_cast<double>(duration));
+}
+
+hailo_status QueryStatsUtils::execute_noc_command(const std::string &command)
+{
+    if (!Filesystem::does_file_exists(HAILO_NOC_PERF_FILE_PATH)) {
+        LOGGER__ERROR("Error: File {} does not exist", HAILO_NOC_PERF_FILE_PATH);
+        return HAILO_FILE_OPERATION_FAILURE;
+    }
+
+    const std::string command_with_source = std::string(". ") + HAILO_NOC_PERF_FILE_PATH + " && " + command;
+    LOGGER__INFO("Run the following DDR NOC command: {}", command_with_source);
+
+    auto ret_val = system(command_with_source.c_str());
+    if (0 != ret_val) {
+        LOGGER__ERROR("Error: Failed to execute DDR NOC command: {}", command_with_source);
+        return HAILO_INTERNAL_FAILURE;
+    }
+
+    return HAILO_SUCCESS;
+}
+
+Expected<std::pair<int32_t, std::string>> QueryStatsUtils::run_command(const std::string &cmd)
+{
+    static const std::string COMMAND_OUTPUT_FILE = "/tmp/command_output";
+    const std::string command_with_stdout = "sh -c \"" + cmd + "\" > " + COMMAND_OUTPUT_FILE;
+    auto ret_val = system(command_with_stdout.c_str());
+    if (0 != ret_val) {
+        return std::make_pair(ret_val, std::string());
+    }
+
+    FileReader file_reader(COMMAND_OUTPUT_FILE);
+    CHECK_SUCCESS(file_reader.open());
+    TRY(auto file_size, file_reader.get_size());
+    TRY(auto buffer, Buffer::create(file_size));
+    CHECK_SUCCESS(file_reader.read(buffer.data(), file_size));
+
+    std::string output(buffer.to_string());
+    return std::make_pair(ret_val, output);
+}
+
+Expected<int32_t> QueryStatsUtils::get_ddr_noc_utilization()
+{
+    std::string delay_str = get_sampling_time_window_as_string();
+
+    if (Filesystem::does_file_exists(HAILO_NOC_MEASURE_OUTPUT_FILE_PATH)) {
+        std::remove(HAILO_NOC_MEASURE_OUTPUT_FILE_PATH);
+    }
+
+    auto status = execute_noc_command("noc_set_counter_total 0");
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    status = execute_noc_command("noc_measure_sleep " + delay_str + " 50 0 " + HAILO_NOC_MEASURE_OUTPUT_FILE_PATH);
+    CHECK_SUCCESS_AS_EXPECTED(status);
+
+    auto data = read_ddr_noc_output_file(HAILO_NOC_MEASURE_OUTPUT_FILE_PATH);
+    CHECK_EXPECTED(data);
+
+    if (data.value().empty()) {
+        LOGGER__ERROR("Error: No data available to process to get ddr_noc_utilization.");
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    int32_t total_transactions = calculate_ddr_noc_data_per_second(data.value(), &ddr_noc_row_data_t::counter0, std::stof(delay_str));
+
+    return total_transactions;
+}
+
+Expected<std::istringstream> QueryStatsUtils::read_nnc_utilization_file()
+{
+    const std::string get_nnc_utilization_command = std::string("cat ") + NNC_UTILIZATION_FILE_PATH +
+        std::string("* && rm -f ") + NNC_UTILIZATION_FILE_PATH + std::string("*");
+
+    const auto output = run_command(get_nnc_utilization_command);
+    CHECK_EXPECTED(output);
+
+    std::istringstream stream(output.value().second);
+    return stream;
+}
+
+Expected<float32_t> QueryStatsUtils::get_nnc_utilization(const std::string &id_info_str, const std::string &device_arch_str)
+{
+    auto unique_vdevice_hash = std::to_string(std::chrono::duration_cast<std::chrono::milliseconds>(
+        std::chrono::system_clock::now().time_since_epoch()).count());
+    TRACE(MonitorStartTrace, unique_vdevice_hash);
+    TRACE(AddDeviceTrace, id_info_str, device_arch_str);
+
+    std::this_thread::sleep_for(PERFORMANCE_QUERY_SAMPLING_TIME_WINDOW);
+
+    TRACE(DumpProfilerStateTrace);
+
+    const uint32_t max_retries = 5;
+    uint32_t retry = 0;
+    std::string ret_val_str = "";
+
+    while (retry < max_retries) {
+        auto stream = read_nnc_utilization_file();
+        CHECK_EXPECTED(stream);
+        ret_val_str = stream.value().str();
+
+        if (ret_val_str.empty()) {
+            retry++;
+            std::this_thread::sleep_for(PERFORMANCE_QUERY_SAMPLING_TIME_WINDOW);
+        } else {
+            break;
+        }
+    }
+
+    TRACE(MonitorEndTrace, unique_vdevice_hash);
+
+    if (ret_val_str.empty()) {
+        LOGGER__ERROR("Error: No data available to process to get nnc_utilization.");
+        return make_unexpected(HAILO_INTERNAL_FAILURE);
+    }
+
+    float32_t utilization = static_cast<float32_t>(std::stof(ret_val_str));
+    return utilization;
+}
+
+} /* namespace hailort */
diff --git a/hailort/libhailort/src/utils/query_stats_utils.hpp b/hailort/libhailort/src/utils/query_stats_utils.hpp
new file mode 100644
index 0000000..02f1c45
--- /dev/null
+++ b/hailort/libhailort/src/utils/query_stats_utils.hpp
@@ -0,0 +1,51 @@
+/**
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
+ * Distributed under the MIT license (https://opensource.org/licenses/MIT)
+ **/
+/**
+ * @file query_stats_utils.cpp
+ * @brief QueryStatsUtils is a class for querying the system for performance and health information.
+ **/
+
+#ifndef _HAILO_QUERY_STATS_UTILS_HPP_
+#define _HAILO_QUERY_STATS_UTILS_HPP_
+
+#include "hailo/hailort.h"
+#include "hailo/expected.hpp"
+#include "common/utils.hpp"
+
+#include <tuple>
+
+namespace hailort {
+
+typedef struct {
+    double time;
+    int counter0;
+    int counter1;
+    int counter2;
+} ddr_noc_row_data_t;
+
+class QueryStatsUtils
+{
+public:
+    static Expected<float32_t> calculate_cpu_utilization();
+    static Expected<std::tuple<int64_t, int64_t>> calculate_ram_sizes();
+    static Expected<int32_t> get_dsp_utilization();
+    static Expected<float32_t> get_nnc_utilization(const std::string &id_info_str, const std::string &device_arch_str);
+    static Expected<int32_t> get_ddr_noc_utilization();
+
+private:
+    static hailo_status parse_cpu_stats(uint64_t &user, uint64_t &nice, uint64_t &system, uint64_t &idle,
+        uint64_t &iowait, uint64_t &irq, uint64_t &softirq, uint64_t &steal);
+    static  Expected<std::vector<ddr_noc_row_data_t>> read_ddr_noc_output_file(const std::string &filename);
+    static  int32_t calculate_ddr_noc_data_per_second(const std::vector<ddr_noc_row_data_t> &data, int ddr_noc_row_data_t::*member,
+        const float32_t duration);
+    static hailo_status execute_noc_command(const std::string &command);
+    static Expected<std::pair<int32_t, std::string>> run_command(const std::string &cmd);
+    static Expected<std::istringstream> read_nnc_utilization_file();
+    static std::string get_sampling_time_window_as_string();
+};
+
+} /* namespace hailort */
+
+#endif /* _HAILO_QUERY_STATS_UTILS_HPP_ */
\ No newline at end of file
diff --git a/hailort/libhailort/src/utils/sensor_config_utils.cpp b/hailort/libhailort/src/utils/sensor_config_utils.cpp
index 1f723f1..3610cb7 100644
--- a/hailort/libhailort/src/utils/sensor_config_utils.cpp
+++ b/hailort/libhailort/src/utils/sensor_config_utils.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/sensor_config_utils.hpp b/hailort/libhailort/src/utils/sensor_config_utils.hpp
index 628bf34..17f1b71 100644
--- a/hailort/libhailort/src/utils/sensor_config_utils.hpp
+++ b/hailort/libhailort/src/utils/sensor_config_utils.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/shared_resource_manager.hpp b/hailort/libhailort/src/utils/shared_resource_manager.hpp
index 8e4e394..2a501e4 100644
--- a/hailort/libhailort/src/utils/shared_resource_manager.hpp
+++ b/hailort/libhailort/src/utils/shared_resource_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
index 9a55118..b20a18b 100644
--- a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -17,10 +17,13 @@
 namespace hailort
 {
 
+//TODO: HRT-16652 - support more architecture's SKU
 // SKU is three bit value in fuse file in order to differentiate the different kind of boards
-#define SKU_VALUE_BITMAP (0x7)
-#define HAILO15H_SKU_VALUE (0x0)
-#define HAILO15M_SKU_VALUE (0x3)
+#define SKU_VALUE_BITMAP    (0x7)
+#define HAILO15H_SKU_VALUE  (0x0)
+#define HAILO10H_SKU_VALUE  (0x1)
+#define HAILO15M_SKU_VALUE  (0x3)
+
 
 // SKU and partial cluster layout bitmap are located at specific locations in the fuse file according to the spec
 // Located in issue HRT-12971
@@ -34,6 +37,7 @@ Expected<uint32_t> PartialClusterReader::get_arch_default_bitmap(hailo_device_ar
         // Currently only supported architectures for this function are HAILO15H and HAILO15M - but in future can add
         case HAILO_ARCH_HAILO15H:
         case HAILO_ARCH_HAILO15M:
+        case HAILO_ARCH_HAILO10H:
             return static_cast<uint32_t>(PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15_DEFAULT);
         default:
             LOGGER__ERROR("Error, Given architecture {} doesnt support partial cluster layout",
@@ -44,9 +48,9 @@ Expected<uint32_t> PartialClusterReader::get_arch_default_bitmap(hailo_device_ar
 
 bool PartialClusterReader::validate_arch_partial_clusters_bitmap(uint32_t bitmap, uint8_t sku_value)
 {
-        // Currently only supported architectures for this function are HAILO15H and HAILO15M - but in future can add
     switch (sku_value) {
         case HAILO15H_SKU_VALUE:
+        case HAILO10H_SKU_VALUE:
             return (PARTIAL_CLUSTERS_LAYOUT_BITMAP__HAILO15_DEFAULT == bitmap);
         case HAILO15M_SKU_VALUE:
             return (std::find(HAILO15M__PARTIAL_CLUSTERS_LAYOUT_BITMAP_ARRAY.begin(),
@@ -97,11 +101,12 @@ Expected<std::pair<uint32_t, uint8_t>> PartialClusterReader::read_fuse_file()
 Expected<uint8_t> PartialClusterReader::get_sku_value_from_arch(hailo_device_architecture_t dev_arch)
 {
     switch(dev_arch) {
-        // Currently only supported architectures for this function are HAILO15H and HAILO15M - but in future can add
         case HAILO_ARCH_HAILO15H:
             return HAILO15H_SKU_VALUE;
         case HAILO_ARCH_HAILO15M:
             return HAILO15M_SKU_VALUE;
+        case HAILO_ARCH_HAILO10H:
+            return HAILO10H_SKU_VALUE;
         default:
             LOGGER__ERROR("Error, Unknown sku value for Given architecture {}",
                 HailoRTCommon::get_device_arch_str(dev_arch));
@@ -126,11 +131,15 @@ Expected<uint32_t> PartialClusterReader::get_partial_clusters_layout_bitmap(hail
     const auto sku_value = fuse_file_data.second;
     switch (dev_arch) {
         case HAILO_ARCH_HAILO15H:
-            CHECK_AS_EXPECTED((HAILO15H_SKU_VALUE == sku_value), HAILO_INTERNAL_FAILURE,
+            CHECK(HAILO15H_SKU_VALUE == sku_value, HAILO_INTERNAL_FAILURE,
                 "Device arch is of type {} but sku is {}", static_cast<int>(dev_arch), sku_value);
             break;
         case HAILO_ARCH_HAILO15M:
-            CHECK_AS_EXPECTED((HAILO15M_SKU_VALUE == sku_value), HAILO_INTERNAL_FAILURE,
+            CHECK(HAILO15M_SKU_VALUE == sku_value, HAILO_INTERNAL_FAILURE,
+                "Device arch is of type {} but sku is {}", static_cast<int>(dev_arch), sku_value);
+            break;
+        case HAILO_ARCH_HAILO10H:
+            CHECK(HAILO10H_SKU_VALUE == sku_value, HAILO_INTERNAL_FAILURE,
                 "Device arch is of type {} but sku is {}", static_cast<int>(dev_arch), sku_value);
             break;
         default:
@@ -154,6 +163,8 @@ Expected<hailo_device_architecture_t> PartialClusterReader::get_actual_dev_arch_
             return HAILO_ARCH_HAILO15M;
         } else if (HAILO15H_SKU_VALUE == sku_value) {
             return HAILO_ARCH_HAILO15H;
+        } else if (HAILO10H_SKU_VALUE == sku_value) {
+            return HAILO_ARCH_HAILO10H;
         } else {
             LOGGER__ERROR("Error, Invalid sku received {}", sku_value);
             return make_unexpected(HAILO_INVALID_ARGUMENT);
diff --git a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
index 7f454d2..e643ac1 100644
--- a/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
+++ b/hailort/libhailort/src/utils/soc_utils/partial_cluster_reader.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/utils/thread_safe_map.hpp b/hailort/libhailort/src/utils/thread_safe_map.hpp
index f48293e..48e8463 100644
--- a/hailort/libhailort/src/utils/thread_safe_map.hpp
+++ b/hailort/libhailort/src/utils/thread_safe_map.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
index aee37cc..8b79e97 100644
--- a/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
+++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
index e9b27e5..0f5df14 100644
--- a/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
+++ b/hailort/libhailort/src/vdevice/callback_reorder_queue.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
index 153e059..e638ece 100644
--- a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
index 2d56245..8663481 100644
--- a/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/infer_request_accumulator.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
index 05785fd..61f4821 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -39,9 +39,7 @@ Expected<std::shared_ptr<ScheduledCoreOp>> ScheduledCoreOp::create(std::shared_p
     CHECK_EXPECTED(batch_size_expected);
     const auto max_batch_size = batch_size_expected.release();
 
-    auto max_queue_size_per_device_expected = added_core_op->get_async_max_queue_size_per_device();
-    CHECK_EXPECTED(max_queue_size_per_device_expected);
-    const auto max_queue_size_per_device = max_queue_size_per_device_expected.release();
+    TRY(auto max_queue_size_per_device, added_core_op->get_infer_queue_size_per_device());
 
     // DEFAULT_BATCH_SIZE and SINGLE_CONTEXT_BATCH_SIZE support streaming and therfore we are not using dynamic batch flow
     auto use_dynamic_batch_flow = added_core_op->get_supported_features().multi_context && (max_batch_size > SINGLE_CONTEXT_BATCH_SIZE);
@@ -97,6 +95,9 @@ bool ScheduledCoreOp::is_over_threshold() const
 
 bool ScheduledCoreOp::is_over_threshold_timeout() const
 {
+    if (HAILO_INFINITE_TIMEOUT == m_timeout) {
+        return false;
+    }
     return m_timeout <= (std::chrono::steady_clock::now() - m_last_run_time_stamp);
 }
 
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
index 78bbd9b..b58ece3 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_core_op_state.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
index 94a3645..ce26acf 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
index 6c70395..335c962 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduled_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
index 3424618..dd03371 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -296,6 +296,7 @@ hailo_status CoreOpsScheduler::set_priority(const scheduler_core_op_handle_t &co
 
 hailo_status CoreOpsScheduler::bind_buffers()
 {
+    hailo_status status = HAILO_SUCCESS;
     // For now, binding buffers will take place only on one device
     if (m_devices.size() > 1) {
         return HAILO_SUCCESS;
@@ -312,11 +313,15 @@ hailo_status CoreOpsScheduler::bind_buffers()
 
         TRY(auto infer_request, m_infer_requests.at(core_op_pair.first).dequeue());
         TRY(auto vdma_core_op, get_vdma_core_op(core_op_pair.first, m_devices.begin()->second->device_id));
-        CHECK_SUCCESS(vdma_core_op->bind_buffers(infer_request.transfers));
+        status = (vdma_core_op->bind_buffers(infer_request.transfers));
         m_bounded_infer_requests[core_op_pair.first].enqueue(std::move(infer_request));
+        if (HAILO_SUCCESS != status) {
+            LOGGER__ERROR("Failed to bind buffers for core op {}", core_op_pair.first);
+            return status;
+        }
     }
 
-    return HAILO_SUCCESS;
+    return status;
 }
 
 hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_core_op_handle_t &core_op_handle)
@@ -328,10 +333,12 @@ hailo_status CoreOpsScheduler::optimize_streaming_if_enabled(const scheduler_cor
             next_pair = m_devices.begin();
         }
         auto &device_info = next_pair->second;
+        // if HAILO_DISABLE_IDLE_OPT_ENV_VAR then we want the burst size to be the threshold
+        auto burst_size = is_env_variable_on(HAILO_DISABLE_IDLE_OPT_ENV_VAR)? scheduled_core_op->get_threshold() : DEFAULT_BURST_SIZE;
         if (device_info->current_core_op_handle == core_op_handle && !device_info->is_switching_core_op &&
             !CoreOpsSchedulerOracle::should_stop_streaming(*this, scheduled_core_op->get_priority(), device_info->device_id) &&
-            (get_frames_ready_to_transfer(core_op_handle, device_info->device_id) >= DEFAULT_BURST_SIZE)) {
-            auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, DEFAULT_BURST_SIZE);
+            (get_frames_ready_to_transfer(core_op_handle, device_info->device_id) >= burst_size)) {
+            auto status = send_all_pending_buffers(core_op_handle, device_info->device_id, burst_size);
             CHECK_SUCCESS(status);
         }
     }
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
index 9bd0b97..e98f20d 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
index e9040db..47396b7 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_base.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
index ce5f583..76d2aa3 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp
index 110dfe4..cd8f74f 100644
--- a/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp
+++ b/hailort/libhailort/src/vdevice/scheduler/scheduler_oracle.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/vdevice.cpp b/hailort/libhailort/src/vdevice/vdevice.cpp
index c3922de..dadc331 100644
--- a/hailort/libhailort/src/vdevice/vdevice.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -114,7 +114,8 @@ hailo_status VDevice::after_fork_in_child()
     return HAILO_SUCCESS;
 }
 
-VDeviceHandle::VDeviceHandle(uint32_t handle) : m_handle(handle)
+VDeviceHandle::VDeviceHandle(const hailo_vdevice_params_t &params, uint32_t handle) :
+    VDevice(params), m_handle(handle)
 {}
 
 VDeviceHandle::~VDeviceHandle()
@@ -138,7 +139,7 @@ Expected<std::unique_ptr<VDevice>> VDeviceHandle::create(const hailo_vdevice_par
     release_resource_if(same_vdevice_status != HAILO_SUCCESS, expected_handle.value());
     CHECK_SUCCESS_AS_EXPECTED(same_vdevice_status);
 
-    auto handle_vdevice = std::unique_ptr<VDeviceHandle>(new VDeviceHandle(expected_handle.value()));
+    auto handle_vdevice = std::unique_ptr<VDeviceHandle>(new VDeviceHandle(params, expected_handle.value()));
     CHECK_AS_EXPECTED(handle_vdevice != nullptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::unique_ptr<VDevice>(std::move(handle_vdevice));
@@ -244,8 +245,9 @@ bool VDevice::should_force_hrpc_client()
 
 #ifdef HAILO_SUPPORT_MULTI_PROCESS
 
-VDeviceClient::VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t client_utils_handle, VDeviceIdentifier &&identifier,
-    std::vector<std::unique_ptr<Device>> &&devices) :
+VDeviceClient::VDeviceClient(const hailo_vdevice_params_t &params, std::unique_ptr<HailoRtRpcClient> client, uint32_t client_utils_handle,
+    VDeviceIdentifier &&identifier, std::vector<std::unique_ptr<Device>> &&devices) :
+        VDevice(params),
         m_client(std::move(client)),
         m_client_utils_handle(client_utils_handle),
         m_identifier(std::move(identifier)),
@@ -284,6 +286,16 @@ hailo_status VDeviceClient::before_fork()
 {
     m_is_listener_thread_running = false;
 
+    const char* grpc_fork_support = std::getenv("GRPC_ENABLE_FORK_SUPPORT");
+    const char* grpc_poll_strategy = std::getenv("GRPC_POLL_STRATEGY");
+
+    bool is_fork_supported = (grpc_fork_support && std::string(grpc_fork_support) == "1") &&
+                             (grpc_poll_strategy && std::string(grpc_poll_strategy) == "poll");
+
+    if (!is_fork_supported) {
+        LOGGER__WARNING("Using the same VDeviceClient instance after fork is supported only when setting the env vars GRPC_ENABLE_FORK_SUPPORT=1 and GRPC_POLL_STRATEGY=poll.");
+    }
+
     TRY(auto instance, HailoRtRpcClientUtils::get_instance(m_client_utils_handle));
     instance->before_fork();
     m_client.reset();
@@ -351,7 +363,7 @@ Expected<std::unique_ptr<VDevice>> VDeviceClient::create(const hailo_vdevice_par
     auto devices = (VDevice::service_over_ip_mode()) ? std::vector<std::unique_ptr<Device>>() : client->VDevice_get_physical_devices(vdevice_handle);
     CHECK_EXPECTED(devices);
 
-    auto client_vdevice = std::unique_ptr<VDeviceClient>(new VDeviceClient(std::move(client), client_utils_handle, VDeviceIdentifier(vdevice_handle), devices.release()));
+    auto client_vdevice = std::unique_ptr<VDeviceClient>(new VDeviceClient(params, std::move(client), client_utils_handle, VDeviceIdentifier(vdevice_handle), devices.release()));
     CHECK_AS_EXPECTED(client_vdevice != nullptr, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::unique_ptr<VDevice>(std::move(client_vdevice));
@@ -559,12 +571,24 @@ Expected<std::unique_ptr<VDevice>> VDevice::create(const hailo_vdevice_params_t
     return vdevice_ptr;
 }
 
+Expected<std::shared_ptr<VDevice>> VDevice::create_shared(const hailo_vdevice_params_t &params)
+{
+    TRY(std::shared_ptr<VDevice> vdevice, VDevice::create(params));
+    return vdevice;
+}
+
 Expected<std::unique_ptr<VDevice>> VDevice::create()
 {
     auto params = HailoRTDefaults::get_vdevice_params();
     return create(params);
 }
 
+Expected<std::shared_ptr<VDevice>> VDevice::create_shared()
+{
+    TRY(std::shared_ptr<VDevice> vdevice, VDevice::create());
+    return vdevice;
+}
+
 Expected<std::unique_ptr<VDevice>> VDevice::create(const std::vector<std::string> &device_ids)
 {
     auto params = HailoRTDefaults::get_vdevice_params();
@@ -578,6 +602,12 @@ Expected<std::unique_ptr<VDevice>> VDevice::create(const std::vector<std::string
     return create(params);
 }
 
+Expected<std::shared_ptr<VDevice>> VDevice::create_shared(const std::vector<std::string> &device_ids)
+{
+    TRY(std::shared_ptr<VDevice> vdevice, VDevice::create(device_ids));
+    return vdevice;
+}
+
 Expected<HailoRTDriver::AcceleratorType> VDeviceBase::get_accelerator_type(hailo_device_id_t *device_ids, size_t device_count)
 {
     auto acc_type = HailoRTDriver::AcceleratorType::ACC_TYPE_MAX_VALUE;
@@ -662,7 +692,7 @@ Expected<std::unique_ptr<VDeviceBase>> VDeviceBase::create(const hailo_vdevice_p
         }
     }
 
-    auto vdevice = std::unique_ptr<VDeviceBase>(new (std::nothrow) VDeviceBase(std::move(devices), scheduler_ptr, unique_vdevice_hash));
+    auto vdevice = std::unique_ptr<VDeviceBase>(new (std::nothrow) VDeviceBase(params, std::move(devices), scheduler_ptr, unique_vdevice_hash));
     CHECK_AS_EXPECTED(nullptr != vdevice, HAILO_OUT_OF_HOST_MEMORY);
 
     return vdevice;
@@ -753,13 +783,25 @@ Expected<ConfiguredNetworkGroupVector> VDeviceBase::configure(Hef &hef,
 
 Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const std::string &hef_path, const std::string &name)
 {
-    TRY(auto infer_model_base, InferModelBase::create(*this, hef_path, name));
-    return std::shared_ptr<InferModel>(std::move(infer_model_base));
+    TRY(auto hef, Hef::create(hef_path));
+    return create_infer_model(hef, name);
 }
 
 Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(const MemoryView hef_buffer, const std::string &name)
 {
-    TRY(auto infer_model_base, InferModelBase::create(*this, hef_buffer, name));
+    TRY(auto hef, Hef::create(hef_buffer));
+    return create_infer_model(hef, name);
+}
+
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(std::shared_ptr<Buffer> hef_buffer, const std::string &name)
+{
+    TRY(auto hef, Hef::create(hef_buffer));
+    return create_infer_model(hef, name);
+}
+
+Expected<std::shared_ptr<InferModel>> VDevice::create_infer_model(Hef hef, const std::string &name)
+{
+    TRY(auto infer_model_base, InferModelBase::create(*this, hef, name));
     return std::shared_ptr<InferModel>(std::move(infer_model_base));
 }
 
@@ -829,8 +871,7 @@ Expected<std::vector<std::string>> VDeviceBase::get_device_ids(const hailo_vdevi
     if (params.device_ids == nullptr) {
         // Use device scan pool
         return Device::scan();
-    }
-    else {
+    } else {
         std::vector<std::string> device_ids;
         device_ids.reserve(params.device_count);
 
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
index 799fa81..315a3c4 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -39,12 +39,12 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::create(VDevice &vdevice,
         }
     }
 
-    // On HcpConfigCoreOp, we don't support get_async_max_queue_size (and the core op doesn't use the queue).
+    // On HcpConfigCoreOp, we don't support infer_queue_size (and the core op doesn't use the queue).
     size_t queue_size = 0;
     auto iface = core_ops.begin()->second->get_default_streams_interface();
     CHECK_EXPECTED(iface);
     if ((iface.value() != HAILO_STREAM_INTERFACE_ETH) && (iface.value() != HAILO_STREAM_INTERFACE_MIPI)) {
-        auto per_device_queue_size = core_ops.begin()->second->get_async_max_queue_size();
+        auto per_device_queue_size = core_ops.begin()->second->infer_queue_size();
         CHECK_EXPECTED(per_device_queue_size);
         queue_size = *per_device_queue_size * core_ops.size();
     }
@@ -89,7 +89,7 @@ Expected<std::shared_ptr<VDeviceCoreOp>> VDeviceCoreOp::duplicate(std::shared_pt
 
 VDeviceCoreOp::~VDeviceCoreOp()
 {
-    (void)shutdown();
+    (void)shutdown_impl();
 }
 
 VDeviceCoreOp::VDeviceCoreOp(VDevice &vdevice,
@@ -382,6 +382,11 @@ hailo_status VDeviceCoreOp::deactivate_impl()
 }
 
 hailo_status VDeviceCoreOp::shutdown()
+{
+    return shutdown_impl();
+}
+
+hailo_status VDeviceCoreOp::shutdown_impl()
 {
     hailo_status status = HAILO_SUCCESS; // Success oriented
 
@@ -406,10 +411,9 @@ hailo_status VDeviceCoreOp::shutdown()
         m_core_ops_scheduler.lock()->remove_core_op(m_core_op_handle);
 
         assert(m_infer_requests_accumulator);
-        auto queue_size = get_async_max_queue_size();
-        assert(queue_size);
+        TRY(auto queue_size, infer_queue_size());
 
-        const auto timeout = DEFAULT_TRANSFER_TIMEOUT * (*queue_size);
+        const auto timeout = DEFAULT_TRANSFER_TIMEOUT * queue_size;
         auto accumulator_shutdown_status = m_infer_requests_accumulator->shutdown(timeout);
         if (HAILO_SUCCESS != accumulator_shutdown_status) {
             status = accumulator_shutdown_status;
@@ -441,9 +445,9 @@ Expected<std::shared_ptr<VdmaConfigCoreOp>> VDeviceCoreOp::get_core_op_by_device
     return core_op;
 }
 
-Expected<size_t> VDeviceCoreOp::get_async_max_queue_size_per_device() const
+Expected<size_t> VDeviceCoreOp::get_infer_queue_size_per_device() const
 {
-    return m_core_ops.begin()->second->get_async_max_queue_size();
+    return m_core_ops.begin()->second->infer_queue_size();
 }
 
 Expected<HwInferResults> VDeviceCoreOp::run_hw_infer_estimator()
@@ -499,11 +503,11 @@ bool VDeviceCoreOp::has_caches() const
     return false;
 }
 
-hailo_status VDeviceCoreOp::init_cache(uint32_t read_offset, int32_t write_offset_delta)
+hailo_status VDeviceCoreOp::init_cache(uint32_t read_offset)
 {
     CHECK(1 == m_core_ops.size(), HAILO_INVALID_OPERATION,
         "init_cache function is not supported on more than 1 physical device.");
-    return m_core_ops.begin()->second->init_cache(read_offset, write_offset_delta);
+    return m_core_ops.begin()->second->init_cache(read_offset);
 }
 
 hailo_status VDeviceCoreOp::update_cache_offset(int32_t offset_delta_entries)
@@ -547,7 +551,7 @@ hailo_status VDeviceCoreOp::add_to_trace()
 
     if (*stream_interface != HAILO_STREAM_INTERFACE_ETH) {
         for (const auto &core_op : m_core_ops) {
-            auto queue_size_exp = core_op.second->get_async_max_queue_size();
+            auto queue_size_exp = core_op.second->infer_queue_size();
             CHECK_EXPECTED_AS_STATUS(queue_size_exp);
             const uint32_t queue_size = static_cast<uint32_t>(*queue_size_exp);
 
diff --git a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
index 021665c..6ba531a 100644
--- a/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_core_op.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -89,7 +89,7 @@ public:
     size_t devices_count() const { return m_core_ops.size(); }
     Expected<std::shared_ptr<VdmaConfigCoreOp>> get_core_op_by_device_id(const device_id_t &device_bdf_id);
 
-    Expected<size_t> get_async_max_queue_size_per_device() const;
+    Expected<size_t> get_infer_queue_size_per_device() const;
 
     virtual Expected<HwInferResults> run_hw_infer_estimator() override;
     virtual Expected<Buffer> get_intermediate_buffer(const IntermediateBufferKey &) override;
@@ -98,7 +98,7 @@ public:
     virtual Expected<uint32_t> get_cache_read_length() const override;
     virtual Expected<uint32_t> get_cache_write_length() const override;
     virtual Expected<uint32_t> get_cache_entry_size(uint32_t cache_id) const override;
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual hailo_status init_cache(uint32_t read_offset) override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
@@ -114,6 +114,7 @@ public:
         hailo_status &status);
 
 private:
+    hailo_status shutdown_impl();
     hailo_status create_vdevice_streams_from_config_params();
     hailo_status create_input_vdevice_stream_from_config_params(
         const hailo_stream_parameters_t &stream_params, const std::string &stream_name);
diff --git a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
index 66e7c96..c7ef87e 100644
--- a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -16,65 +16,85 @@
 namespace hailort
 {
 
-Expected<std::string> VDeviceHrpcClient::get_device_id(const hailo_vdevice_params_t &params)
+Expected<std::vector<std::string>> VDeviceHrpcClient::get_device_ids(const hailo_vdevice_params_t &params)
 {
     // TODO: Validate the chosen device-id is of the requested type (eiter soc-acc or nnc-acc)?
-    std::string device_id;
-    if (nullptr != params.device_ids) {
-        return std::string(params.device_ids[0].id);
-    } else {
+    if (nullptr == params.device_ids) {
         auto acc_type = HailoRTDriver::AcceleratorType::SOC_ACCELERATOR;
 
         // If forcing hrpc service, we assume here that there is a NNC-acc connected as we use sockets
         if (VDevice::should_force_hrpc_client()) {
             acc_type = HailoRTDriver::AcceleratorType::NNC_ACCELERATOR;
         }
-
-        TRY(auto scan_results, HailoRTDriver::scan_devices(acc_type));
-        CHECK_AS_EXPECTED(scan_results.size() > 0, HAILO_OUT_OF_PHYSICAL_DEVICES, "No devices found");
-
-        return std::string(scan_results[0].device_id);
+        TRY(auto device_infos, HailoRTDriver::scan_devices(acc_type));
+        std::vector<std::string> device_ids;
+        device_ids.reserve(device_infos.size());
+        for (const auto &device_info : device_infos) {
+            device_ids.push_back(device_info.device_id);
+        }
+        return device_ids;
+    } else {
+        std::vector<std::string> device_ids;
+        device_ids.reserve(params.device_count);
+        for (uint32_t i = 0; i < params.device_count; i++) {
+            device_ids.push_back(std::string(params.device_ids[i].id));
+        }
+        return device_ids;
     }
 }
 
+Expected<std::tuple<std::shared_ptr<Client>, rpc_object_handle_t>>
+VDeviceHrpcClient::create_available_vdevice(const std::vector<std::string> &device_ids, const hailo_vdevice_params_t &params)
+{
+    const bool is_user_specific_devices = (params.device_ids != nullptr);
+
+    for (const auto &device_id : device_ids) {
+        auto client = make_shared_nothrow<Client>(device_id);
+        CHECK_NOT_NULL(client, HAILO_INTERNAL_FAILURE);
+
+        auto status = client->connect();
+        CHECK_SUCCESS(status, "Failed to connect to server");
+
+        TRY(auto request_buffer, client->allocate_request_buffer(), "Failed to allocate request buffer");
+        TRY(auto request_size, CreateVDeviceSerializer::serialize_request(params, MemoryView(*request_buffer)));
+        TRY(auto result, client->execute_request(HailoRpcActionID::VDEVICE__CREATE, MemoryView(request_buffer->data(), request_size)));
+        TRY(auto tuple, CreateVDeviceSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
+        status = std::get<0>(tuple);
+        if (!is_user_specific_devices && (HAILO_DEVICE_IN_USE == status)) {
+            continue;
+        }
+        CHECK_SUCCESS(status);
+        
+        return std::make_tuple(client, std::get<1>(tuple)); // Only single device is supported
+    }
+
+    LOGGER__ERROR("Failed to create vdevice. there are not enough free devices. requested: 1, found: 0");
+    return make_unexpected(HAILO_OUT_OF_PHYSICAL_DEVICES);
+}
+
 Expected<std::unique_ptr<VDevice>> VDeviceHrpcClient::create(const hailo_vdevice_params_t &params)
 {
-    CHECK_AS_EXPECTED(params.device_count == 1, HAILO_OUT_OF_PHYSICAL_DEVICES, "Only single device is supported!");
+    CHECK(params.device_count == 1, HAILO_OUT_OF_PHYSICAL_DEVICES, "Only single device is supported!");
 
-    TRY(auto device_id, get_device_id(params));
-    auto client = make_shared_nothrow<Client>(device_id);
-    CHECK_NOT_NULL(client, HAILO_INTERNAL_FAILURE);
-
-    auto status = client->connect();
-    CHECK_SUCCESS_AS_EXPECTED(status, "Failed to connect to server");
-
-    auto callbacks_dispatcher = make_shared_nothrow<CallbacksDispatcher>();
-    CHECK_NOT_NULL_AS_EXPECTED(callbacks_dispatcher, HAILO_OUT_OF_HOST_MEMORY);
+    TRY(auto device_ids, get_device_ids(params));
+    TRY(auto tuple, create_available_vdevice(device_ids, params));
+    auto client = std::get<0>(tuple);
 
     client->register_custom_reply(HailoRpcActionID::CALLBACK_CALLED,
-    [callbacks_dispatcher] (const MemoryView &serialized_reply, RpcConnection connection) -> hailo_status {
-        TRY(auto tuple, CallbackCalledSerializer::deserialize_reply(serialized_reply));
-        auto callback_status = std::get<0>(tuple);
-        auto callback_handle_id = std::get<1>(tuple);
-        auto cim_handle = std::get<2>(tuple);
-
-        auto status = callbacks_dispatcher->at(cim_handle)->push_callback(callback_status, callback_handle_id, connection);
+    [callback_dispatcher_manager = client->callback_dispatcher_manager()] (const MemoryView &serialized_reply, RpcConnection connection) -> hailo_status {
+        TRY(auto rpc_callback, CallbackCalledSerializer::deserialize_reply(serialized_reply));
+        auto status = callback_dispatcher_manager->at(rpc_callback.dispatcher_id)->trigger_callback(rpc_callback, connection);
         CHECK_SUCCESS(status);
 
         return HAILO_SUCCESS;
     });
 
-    TRY(auto request, CreateVDeviceSerializer::serialize_request(params));
-    TRY(auto result, client->execute_request(HailoRpcActionID::VDEVICE__CREATE, MemoryView(request)));
-    TRY(auto tuple, CreateVDeviceSerializer::deserialize_reply(MemoryView(result)));
-    status = std::get<0>(tuple);
-    CHECK_SUCCESS_AS_EXPECTED(status);
-
+    auto device_id = client->device_id();
     TRY(auto device, PcieDeviceHrpcClient::create(device_id, client));
 
     auto vdevice_handle = std::get<1>(tuple);
-    auto vdevice_client = make_unique_nothrow<VDeviceHrpcClient>(std::move(client), vdevice_handle, callbacks_dispatcher,
-        std::move(device), device_id);
+    auto vdevice_client = make_unique_nothrow<VDeviceHrpcClient>(params, std::move(client), vdevice_handle,
+        client->callback_dispatcher_manager(), std::move(device), device_id);
     CHECK_NOT_NULL(vdevice_client, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::unique_ptr<VDevice>(std::move(vdevice_client));
@@ -86,42 +106,46 @@ VDeviceHrpcClient::~VDeviceHrpcClient()
         return;
     }
 
-    auto request = DestroyVDeviceSerializer::serialize_request(m_handle);
-    if (!request) {
+    auto request_buffer = m_client->allocate_request_buffer();
+    if (!request_buffer) {
+        LOGGER__CRITICAL("Failed to create buffer for VDevice_release request");
+        return;
+    }
+
+    auto request_size = DestroyVDeviceSerializer::serialize_request(m_handle, MemoryView(**request_buffer));
+    if (!request_size) {
         LOGGER__CRITICAL("Failed to serialize VDevice_release request");
         return;
     }
 
-    auto result = m_client->execute_request(HailoRpcActionID::VDEVICE__DESTROY, MemoryView(*request));
-    if (!result) {
-        LOGGER__CRITICAL("Failed to destroy VDevice! status = {}", result.status());
+    auto result_expected = m_client->execute_request(HailoRpcActionID::VDEVICE__DESTROY, MemoryView(request_buffer.value()->data(), *request_size));
+    if (!result_expected) {
+        LOGGER__CRITICAL("Failed to destroy VDevice! status = {}", result_expected.status());
         return;
     }
+    auto result = result_expected.release();
 
-    if (HAILO_SUCCESS != DestroyVDeviceSerializer::deserialize_reply(MemoryView(*result))) {
-        LOGGER__CRITICAL("Failed to destroy VDevice! status = {}", result.status());
+    auto status = DestroyVDeviceSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to destroy VDevice! status = {}", status);
     }
 }
 
 Expected<std::shared_ptr<InferModel>> VDeviceHrpcClient::create_infer_model(const MemoryView hef_buffer, const std::string &name)
 {
-    TRY(auto request, CreateInferModelSerializer::serialize_request(m_handle, hef_buffer.size(), name));
-    TRY(auto result, m_client->execute_request(HailoRpcActionID::VDEVICE__CREATE_INFER_MODEL,
-        MemoryView(request), [&hef_buffer] (RpcConnection connection) -> hailo_status {
-        // TODO: change write to accept uint64_t, or accept file stream instead or write in chunks
-        auto status = connection.write_buffer(hef_buffer);
-        CHECK_SUCCESS(status);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
 
-        return HAILO_SUCCESS;
-    }));
-    TRY(auto tuple, CreateInferModelSerializer::deserialize_reply(MemoryView(result)));
+    TRY(auto request_size, CreateInferModelSerializer::serialize_request(m_handle, hef_buffer.size(), name, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::VDEVICE__CREATE_INFER_MODEL,
+        MemoryView(request_buffer->data(), request_size), std::vector<TransferBuffer>{hef_buffer}));
+    TRY(auto tuple, CreateInferModelSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto infer_model_handle = std::get<1>(tuple);
 
     TRY(auto hef, Hef::create(hef_buffer));
     TRY(auto infer_model, InferModelHrpcClient::create(std::move(hef), name, m_client, infer_model_handle, m_handle,
-        *this, m_callbacks_dispatcher));
+        *this, m_callback_dispatcher_manager));
 
     return std::shared_ptr<InferModel>(std::move(infer_model));
 }
diff --git a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
index 12a9756..78fb358 100644
--- a/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_hrpc_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -23,13 +23,13 @@ class VDeviceHrpcClient : public VDevice
 {
 public:
     static Expected<std::unique_ptr<VDevice>> create(const hailo_vdevice_params_t &params);
+    static Expected<std::vector<std::string>> get_device_ids(const hailo_vdevice_params_t &params);
 
-    static Expected<std::string> get_device_id(const hailo_vdevice_params_t &params);
-
-    VDeviceHrpcClient(std::shared_ptr<Client> client, uint32_t handle, std::shared_ptr<CallbacksDispatcher> callbacks_dispatcher,
+    VDeviceHrpcClient(const hailo_vdevice_params_t &params, std::shared_ptr<Client> client, uint32_t handle,
+        std::shared_ptr<ClientCallbackDispatcherManager> callback_dispatcher_manager,
         std::unique_ptr<PcieDeviceHrpcClient> &&device, std::string device_id)
-        : m_client(client), m_handle(handle), m_callbacks_dispatcher(callbacks_dispatcher), m_device(std::move(device)),
-        m_device_id(device_id) {}
+        : VDevice(params), m_client(client), m_handle(handle), m_callback_dispatcher_manager(callback_dispatcher_manager),
+            m_device(std::move(device)), m_device_id(device_id) {}
 
     VDeviceHrpcClient(VDeviceHrpcClient &&) = delete;
     VDeviceHrpcClient(const VDeviceHrpcClient &) = delete;
@@ -51,9 +51,12 @@ public:
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 private:
+    static Expected<std::tuple<std::shared_ptr<Client>, rpc_object_handle_t>>
+        create_available_vdevice(const std::vector<std::string> &device_ids, const hailo_vdevice_params_t &params);
+
     std::shared_ptr<Client> m_client;
     uint32_t m_handle;
-    std::shared_ptr<CallbacksDispatcher> m_callbacks_dispatcher;
+    std::shared_ptr<ClientCallbackDispatcherManager> m_callback_dispatcher_manager;
     std::unique_ptr<PcieDeviceHrpcClient> m_device;
     std::string m_device_id;
 };
diff --git a/hailort/libhailort/src/vdevice/vdevice_internal.hpp b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
index 4dfd743..0029564 100644
--- a/hailort/libhailort/src/vdevice/vdevice_internal.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_internal.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -144,9 +144,9 @@ public:
     static Expected<bool> device_ids_contains_eth(const hailo_vdevice_params_t &params);
 
 private:
-    VDeviceBase(std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler,
-        const std::string &unique_vdevice_hash="") :
-        m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0), m_unique_vdevice_hash(unique_vdevice_hash)
+    VDeviceBase(const hailo_vdevice_params_t &params, std::map<device_id_t, std::unique_ptr<Device>> &&devices, CoreOpsSchedulerPtr core_ops_scheduler,
+        const std::string &unique_vdevice_hash="") : VDevice(params),
+            m_devices(std::move(devices)), m_core_ops_scheduler(core_ops_scheduler), m_next_core_op_handle(0), m_unique_vdevice_hash(unique_vdevice_hash)
         {}
 
     static Expected<std::map<device_id_t, std::unique_ptr<Device>>> create_devices(const hailo_vdevice_params_t &params);
@@ -199,7 +199,8 @@ public:
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 private:
-    VDeviceClient(std::unique_ptr<HailoRtRpcClient> client, uint32_t client_handle, VDeviceIdentifier &&identifier, std::vector<std::unique_ptr<hailort::Device>> &&devices);
+    VDeviceClient(const hailo_vdevice_params_t &params, std::unique_ptr<HailoRtRpcClient> client,
+        uint32_t client_handle, VDeviceIdentifier &&identifier, std::vector<std::unique_ptr<hailort::Device>> &&devices);
 
     hailo_status create_client();
     hailo_status start_listener_thread(VDeviceIdentifier identifier);
@@ -246,7 +247,7 @@ public:
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
 
 private:
-    VDeviceHandle(uint32_t handle);
+    VDeviceHandle(const hailo_vdevice_params_t &params, uint32_t handle);
     uint32_t m_handle;
 };
 
diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
index a07aeff..dda0953 100644
--- a/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
+++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
index 20d0530..13cb858 100644
--- a/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
+++ b/hailort/libhailort/src/vdevice/vdevice_native_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
index f3c0f0c..bce89c8 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -25,11 +25,11 @@ namespace vdma {
 
 Expected<BoundaryChannelPtr> BoundaryChannel::create(HailoRTDriver &driver, vdma::ChannelId channel_id,
     Direction direction, vdma::DescriptorList &&desc_list, TransferLauncher &transfer_launcher,
-    size_t ongoing_transfers, size_t pending_transfers, bool split_transfer, const std::string &stream_name, LatencyMeterPtr latency_meter)
+    size_t queue_size, bool split_transfer, const std::string &stream_name, LatencyMeterPtr latency_meter)
 {
     hailo_status status = HAILO_UNINITIALIZED;
     auto channel_ptr = make_shared_nothrow<BoundaryChannel>(driver, channel_id, direction, std::move(desc_list),
-        transfer_launcher, ongoing_transfers, pending_transfers, split_transfer, stream_name, latency_meter, status);
+        transfer_launcher, queue_size, split_transfer, stream_name, latency_meter, status);
     CHECK_NOT_NULL_AS_EXPECTED(channel_ptr, HAILO_OUT_OF_HOST_MEMORY);
     CHECK_SUCCESS_AS_EXPECTED(status, "Failed creating BoundaryChannel");
     return channel_ptr;
@@ -42,7 +42,7 @@ size_t BoundaryChannel::get_chunk_size() const
 
 BoundaryChannel::BoundaryChannel(HailoRTDriver &driver, vdma::ChannelId channel_id, Direction direction,
                                  DescriptorList &&desc_list, TransferLauncher &transfer_launcher,
-                                 size_t ongoing_transfers_queue_size, size_t pending_transfers_queue_size,
+                                 size_t queue_size,
                                  bool split_transfer, const std::string &stream_name,
                                  LatencyMeterPtr latency_meter, hailo_status &status) :
     m_channel_id(channel_id),
@@ -54,9 +54,10 @@ BoundaryChannel::BoundaryChannel(HailoRTDriver &driver, vdma::ChannelId channel_
     m_descs(m_desc_list.count()),
     m_is_channel_activated(false),
     m_channel_mutex(),
+    // When measuring latency, we use 2 interrupts per transfer, so we have half the space for ongoing transfers
+    m_ongoing_transfers((latency_meter == nullptr ) ? ONGOING_TRANSFERS_SIZE : ONGOING_TRANSFERS_SIZE / 2),
     // CircularArrays with storage_size x can store x-1 elements, hence the +1
-    m_ongoing_transfers(ongoing_transfers_queue_size + 1),
-    m_pending_transfers(pending_transfers_queue_size + 1),
+    m_pending_transfers(queue_size + 1),
     m_latency_meter(latency_meter),
     m_pending_latency_measurements(ONGOING_TRANSFERS_SIZE), // Make sure there will always be place for latency measure
     m_last_timestamp_num_processed(0),
@@ -93,10 +94,6 @@ static hailo_status get_callback_status(vdma::ChannelId channel_id, const Channe
     } else if (!irq_data.validation_success) {
         LOGGER__WARNING("Channel {} validation failed", channel_id);
         status = HAILO_INTERNAL_FAILURE;
-    } else if ((0 != irq_data.host_error) || (0 != irq_data.device_error)) {
-        LOGGER__WARNING("Channel {} completed with errors: host_error {} device_error {}",
-            channel_id, irq_data.host_error, irq_data.device_error);
-        status = HAILO_INTERNAL_FAILURE;
     } else {
         status = HAILO_SUCCESS;
     }
@@ -211,34 +208,57 @@ uint16_t BoundaryChannel::free_descs()
     return static_cast<uint16_t>(num_free);
 }
 
-std::vector<TransferRequest> BoundaryChannel::split_messages(TransferRequest &&transfer_request)
+Expected<std::vector<TransferRequest>> BoundaryChannel::split_messages(TransferRequest &&original_request)
 {
     const auto chunk_size = get_chunk_size();
 
-    if ((transfer_request.get_total_transfer_size() <= chunk_size) || (!m_split_transfer) ||
-        (transfer_request.transfer_buffers.at(0).type() != TransferBufferType::MEMORYVIEW)) {
-        return std::vector<TransferRequest>{transfer_request};
+    if (!m_split_transfer || (original_request.transfer_buffers.at(0).type() != TransferBufferType::MEMORYVIEW)) {
+        // Split not supported
+        return std::vector<TransferRequest>{original_request};
     }
-    
-    auto total_transfers_count = DIV_ROUND_UP(transfer_request.get_total_transfer_size(), chunk_size);
-    std::vector<TransferRequest> transfer_request_split(total_transfers_count);
 
-    uint32_t split_transfer_idx = 0;
-    for (auto &buffer : transfer_request.transfer_buffers) {
+    // From original_request, create a vector of several TransferRequests.
+    // Each TransferRequest may be splitted into serveral buffers, but the total size of the buffers in each
+    // TransferRequest will not exceed chunk_size (which is the optimal amount of bytes for single transfer).
+    // In addition, each TransferRequest should hold no more than MAX_TRANSFER_BUFFERS_IN_REQUEST buffers.
+    // Notice that each new transfer will consume a full descriptor in bytes (even if the size is smaller than
+    // descriptors size).
+    std::vector<TransferRequest> transfer_request_split;
+    TransferRequest current_transfer{};
+    size_t current_transfer_consumed_bytes = 0;
+
+    for (auto &buffer : original_request.transfer_buffers) {
         size_t bytes_processed = 0;
-        auto size = buffer.size();
-        auto transfers_count = DIV_ROUND_UP(size, chunk_size);
-        for (; split_transfer_idx < transfers_count; split_transfer_idx++) {
-            size_t amount_to_read = std::min(size - bytes_processed, chunk_size);
-            void *buffer_w_offset = static_cast<void*>(const_cast<uint8_t*>(buffer.base_buffer().value().data())
-                 + bytes_processed);
-            transfer_request_split.at(split_transfer_idx).transfer_buffers.push_back(MemoryView(buffer_w_offset, amount_to_read));
-            transfer_request_split.at(split_transfer_idx).callback = [](hailo_status) {};
-            bytes_processed += amount_to_read;
+        while (bytes_processed < buffer.size()) {
+            assert(chunk_size > current_transfer_consumed_bytes);
+            const auto size_left_in_transfer = chunk_size - current_transfer_consumed_bytes;
+            size_t amount_to_transfer = std::min(buffer.size() - bytes_processed, size_left_in_transfer);
+            assert(amount_to_transfer > 0);
+
+            TRY(auto base_buffer, buffer.base_buffer());
+            auto sub_buffer = MemoryView(base_buffer.data() + bytes_processed, amount_to_transfer);
+            bytes_processed += amount_to_transfer;
+            current_transfer.transfer_buffers.push_back(TransferBuffer{sub_buffer});
+            const auto desc_consumed = m_desc_list.descriptors_in_buffer(amount_to_transfer);
+            current_transfer_consumed_bytes += desc_consumed * m_desc_list.desc_page_size();
+
+            // Start a new trasnfer if reach the limit.
+            if ((current_transfer_consumed_bytes >= chunk_size) ||
+                current_transfer.transfer_buffers.size() >= MAX_TRANSFER_BUFFERS_IN_REQUEST) {
+                transfer_request_split.emplace_back(std::move(current_transfer));
+                current_transfer = TransferRequest{};
+                current_transfer_consumed_bytes = 0;
+            }
         }
     }
+
+    if (current_transfer.get_total_transfer_size() > 0) {
+        transfer_request_split.emplace_back(std::move(current_transfer));
+    }
+
     // Setting the original callback for the last transfer
-    transfer_request_split.back().callback = std::move(transfer_request.callback);
+    transfer_request_split.back().callback = original_request.callback;
+
     // Removing previous bounded buffers since transfer now is split
     m_bounded_buffer = nullptr;
 
@@ -252,7 +272,7 @@ hailo_status BoundaryChannel::launch_transfer(TransferRequest &&transfer_request
         return HAILO_STREAM_NOT_ACTIVATED;
     }
 
-    auto transfer_request_split = split_messages(std::move(transfer_request));
+    TRY(auto transfer_request_split, split_messages(std::move(transfer_request)));
     for (auto &transfer : transfer_request_split) {
         const auto desired_desc_num = m_desc_list.descriptors_in_buffer(transfer.get_total_transfer_size());
         if ((m_ongoing_transfers.size() < m_ongoing_transfers.capacity()) &&
@@ -316,11 +336,8 @@ Expected<uint16_t> BoundaryChannel::launch_transfer_impl(TransferRequest &transf
     auto current_num_available = num_available;
     for (auto &transfer_buffer : transfer_request.transfer_buffers) {
         TRY(auto mapped_buffer, transfer_buffer.map_buffer(m_driver, m_direction));
-        driver_transfer_buffers.emplace_back(HailoRTDriver::TransferBuffer{
-            mapped_buffer->handle(),
-            transfer_buffer.offset(),
-            transfer_buffer.size()
-        });
+        TRY(auto driver_buffers, transfer_buffer.to_driver_buffers());
+        driver_transfer_buffers.insert(driver_transfer_buffers.end(), driver_buffers.begin(), driver_buffers.end());
 
         const auto desired_desc_num = m_desc_list.descriptors_in_buffer(transfer_buffer.size());
         CHECK(desired_desc_num <= MAX_SG_DESCS_COUNT, HAILO_INTERNAL_FAILURE);
@@ -349,17 +366,15 @@ Expected<uint16_t> BoundaryChannel::launch_transfer_impl(TransferRequest &transf
     }
     m_descs.enqueue(total_descs_count);
 
-    TRY_WITH_ACCEPTABLE_STATUS(HAILO_STREAM_ABORT, const auto desc_programmed, m_driver.launch_transfer(
+    auto status = m_driver.launch_transfer(
         m_channel_id,
         m_desc_list.handle(),
         num_available,
         driver_transfer_buffers,
         should_bind,
         first_desc_interrupts,
-        last_desc_interrupts
-        ));
-    CHECK(total_descs_count == desc_programmed, HAILO_INTERNAL_FAILURE,
-        "Inconsistent desc programed expecting {} got {}", total_descs_count, desc_programmed);
+        last_desc_interrupts);
+    CHECK_SUCCESS_WITH_ACCEPTABLE_STATUS(HAILO_STREAM_ABORT, status);
 
     return last_desc;
 }
@@ -415,7 +430,7 @@ void BoundaryChannel::cancel_pending_transfers()
 size_t BoundaryChannel::get_max_ongoing_transfers(size_t transfer_size) const
 {
     size_t divide_factor = m_split_transfer ? DIV_ROUND_UP(transfer_size, get_chunk_size()) : 1;
-    return std::max(m_pending_transfers.capacity() / divide_factor, m_ongoing_transfers.capacity() / divide_factor);
+    return m_pending_transfers.capacity() / divide_factor;
 }
 
 bool BoundaryChannel::is_ready(size_t transfer_size) const
@@ -423,14 +438,6 @@ bool BoundaryChannel::is_ready(size_t transfer_size) const
     return DIV_ROUND_UP(transfer_size, get_chunk_size()) < (m_pending_transfers.capacity() - m_pending_transfers.size());
 }
 
-// TODO: try and get rid of this func and merge with get_max_ongoing_transfers (HRT-13557)
-size_t BoundaryChannel::get_max_aligned_transfers_in_desc_list(size_t transfer_size) const
-{
-    // Since this calc if for aligned transfers, we don't need to factor in the bounce buffer
-    static const auto NO_BOUNCE_BUFFER = false;
-    return m_desc_list.max_transfers(static_cast<uint32_t>(transfer_size), NO_BOUNCE_BUFFER);
-}
-
 hailo_status BoundaryChannel::update_latency_meter()
 {
     TRY(auto timestamp_list, m_driver.vdma_interrupts_read_timestamps(m_channel_id));
diff --git a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
index 3e0bd10..d0b7863 100644
--- a/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
+++ b/hailort/libhailort/src/vdma/channel/boundary_channel.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -38,11 +38,11 @@ public:
     using Direction = HailoRTDriver::DmaDirection;
 
     static Expected<BoundaryChannelPtr> create(HailoRTDriver &driver, vdma::ChannelId channel_id, Direction direction,
-        vdma::DescriptorList &&desc_list, TransferLauncher &transfer_launcher, size_t ongoing_transfers,
-        size_t pending_transfers = 0, bool split_transfer = false, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr);
+        vdma::DescriptorList &&desc_list, TransferLauncher &transfer_launcher, size_t queue_size,
+        bool split_transfer = false, const std::string &stream_name = "", LatencyMeterPtr latency_meter = nullptr);
 
     BoundaryChannel(HailoRTDriver &driver, vdma::ChannelId channel_id, Direction direction, DescriptorList &&desc_list,
-        TransferLauncher &transfer_launcher, size_t ongoing_transfers_queue_size, size_t pending_transfers_queue_size, bool split_transfer,
+        TransferLauncher &transfer_launcher, size_t queue_size, bool split_transfer,
         const std::string &stream_name, LatencyMeterPtr latency_meter, hailo_status &status);
     BoundaryChannel(const BoundaryChannel &other) = delete;
     BoundaryChannel &operator=(const BoundaryChannel &other) = delete;
@@ -81,7 +81,6 @@ public:
 
     // TODO: rename BoundaryChannel::get_max_ongoing_transfers to BoundaryChannel::get_max_parallel_transfers (HRT-13513)
     size_t get_max_ongoing_transfers(size_t transfer_size) const;
-    size_t get_max_aligned_transfers_in_desc_list(size_t transfer_size) const;
 
     vdma::ChannelId get_channel_id() const
     {
@@ -120,7 +119,7 @@ private:
 
     Expected<bool> should_bind_buffer(TransferRequest &transfer_request);
     static Expected<bool> is_same_buffer(MappedBufferPtr mapped_buff, TransferBuffer &transfer_buffer);
-    std::vector<TransferRequest> split_messages(TransferRequest &&transfer_request);
+    Expected<std::vector<TransferRequest>> split_messages(TransferRequest &&transfer_request);
 
     size_t get_chunk_size() const;
 
diff --git a/hailort/libhailort/src/vdma/channel/channel_id.hpp b/hailort/libhailort/src/vdma/channel/channel_id.hpp
index 0290ae9..e9c8de2 100644
--- a/hailort/libhailort/src/vdma/channel/channel_id.hpp
+++ b/hailort/libhailort/src/vdma/channel/channel_id.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/channels_group.cpp b/hailort/libhailort/src/vdma/channel/channels_group.cpp
index fd8a5e0..b5063f6 100644
--- a/hailort/libhailort/src/vdma/channel/channels_group.cpp
+++ b/hailort/libhailort/src/vdma/channel/channels_group.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/channels_group.hpp b/hailort/libhailort/src/vdma/channel/channels_group.hpp
index e6f7717..79b0d88 100644
--- a/hailort/libhailort/src/vdma/channel/channels_group.hpp
+++ b/hailort/libhailort/src/vdma/channel/channels_group.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
index 06913bb..c853977 100644
--- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
+++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
index 0188f1f..335a85e 100644
--- a/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
+++ b/hailort/libhailort/src/vdma/channel/interrupts_dispatcher.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/transfer_common.cpp b/hailort/libhailort/src/vdma/channel/transfer_common.cpp
index 2ac8ab9..8de93cc 100644
--- a/hailort/libhailort/src/vdma/channel/transfer_common.cpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_common.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -7,7 +7,6 @@
  **/
 
 #include "transfer_common.hpp"
-#include "vdma/memory/mapped_buffer.hpp"
 
 namespace hailort
 {
@@ -65,8 +64,18 @@ Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(HailoRTDriver &driver
     if (TransferBufferType::DMABUF == m_type) {
         TRY(m_mappings, vdma::MappedBuffer::create_shared_from_dmabuf(m_dmabuf.fd, m_dmabuf.size, driver, direction));
     } else {
-        TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size()));
-        TRY(m_mappings, vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction));
+        if (is_aligned_for_dma()) {
+            TRY(auto dma_able_buffer, vdma::DmaAbleBuffer::create_from_user_address(m_base_buffer.data(), m_base_buffer.size()));
+            TRY(m_mappings, vdma::MappedBuffer::create_shared(std::move(dma_able_buffer), driver, direction));
+        } else {
+            // Allocate a new bounce buffer for the mapping.
+            // On H2D dir, copy the data on the map
+            // On D2H dir, copy the data on the unmap
+            TRY(m_mappings, vdma::MappedBuffer::create_shared_by_allocation(m_base_buffer.size(), driver, direction));
+            if (HailoRTDriver::DmaDirection::H2D == direction) {
+                (void)copy_to(MemoryView(m_mappings->user_address(), m_mappings->size()));
+            }
+        }
     }
 
     return Expected<vdma::MappedBufferPtr>{m_mappings};
@@ -74,9 +83,47 @@ Expected<vdma::MappedBufferPtr> TransferBuffer::map_buffer(HailoRTDriver &driver
 
 void TransferBuffer::unmap_buffer()
 {
+    const bool is_bounce_buffer = !is_aligned_for_dma();
+    if (is_bounce_buffer && m_mappings && (HailoRTDriver::DmaDirection::D2H == m_mappings->direction())) {
+        (void)copy_from(MemoryView(m_mappings->user_address(), m_mappings->size()));
+    }
     m_mappings.reset();
 }
 
+Expected<std::vector<HailoRTDriver::TransferBuffer>> TransferBuffer::to_driver_buffers()
+{
+    CHECK(m_mappings, HAILO_INTERNAL_FAILURE, "transfer-buffer must be mapped before launch-transfer");
+
+    std::vector<HailoRTDriver::TransferBuffer> res;
+    HailoRTDriver::TransferBuffer buf;
+
+    if (TransferBufferType::DMABUF == m_type) {
+        CHECK(0 == m_offset, HAILO_INTERNAL_FAILURE, "no support for non-zero offset for dmabuf");
+        buf.is_dma_buf = true;
+        buf.size = m_size;
+        buf.addr_or_fd = static_cast<uintptr_t>(m_dmabuf.fd);
+        res.emplace_back(buf);
+
+        return Expected<std::vector<HailoRTDriver::TransferBuffer>>{res};
+    } else {
+        auto parts = get_continuous_parts();
+
+        buf.is_dma_buf = false;
+        buf.size = parts.first.size();
+        buf.addr_or_fd = reinterpret_cast<uintptr_t>(m_mappings->user_address()) + static_cast<uintptr_t>(m_offset);
+        res.emplace_back(buf);
+
+        if (!parts.second.empty()) {
+            buf.size = parts.second.size();
+            buf.addr_or_fd = reinterpret_cast<uintptr_t>(m_mappings->user_address());
+            res.emplace_back(buf);
+        }
+
+        return Expected<std::vector<HailoRTDriver::TransferBuffer>>{res};
+    }
+
+}
+
 hailo_status TransferBuffer::copy_to(MemoryView buffer)
 {
     CHECK(buffer.size() == m_size, HAILO_INTERNAL_FAILURE, "buffer size {} must be {}", buffer.size(), m_size);
@@ -108,6 +155,16 @@ hailo_status TransferBuffer::copy_from(const MemoryView buffer)
     return HAILO_SUCCESS;
 }
 
+bool TransferBuffer::is_aligned_for_dma() const
+{
+    if (TransferBufferType::DMABUF == m_type) {
+        return true;
+    }
+
+    const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
+    return (0 == reinterpret_cast<uintptr_t>(m_base_buffer.data()) % dma_able_alignment);
+}
+
 bool TransferBuffer::is_wrap_around() const
 {
     return (m_offset + m_size) > m_base_buffer.size();
diff --git a/hailort/libhailort/src/vdma/channel/transfer_common.hpp b/hailort/libhailort/src/vdma/channel/transfer_common.hpp
index 553caaa..627c64b 100644
--- a/hailort/libhailort/src/vdma/channel/transfer_common.hpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_common.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,9 +44,17 @@ public:
     Expected<vdma::MappedBufferPtr> map_buffer(HailoRTDriver &driver, HailoRTDriver::DmaDirection direction);
     void unmap_buffer();
 
+    // Assumes map_buffer() has already been called.
+    // Returns the transfer-buffer as the driver expects to receive it.
+    // If the buffer is circular with wrap-around, will return two buffers
+    // similar to get_continuos_parts(). Otherwise, we return only one.
+    Expected<std::vector<HailoRTDriver::TransferBuffer>> to_driver_buffers();
+
     hailo_status copy_to(MemoryView buffer);
     hailo_status copy_from(const MemoryView buffer);
 
+    bool is_aligned_for_dma() const;
+
     TransferBufferType type () const { return m_type; }
 
 private:
@@ -79,7 +87,7 @@ using TransferDoneCallback = std::function<void(hailo_status)>;
 
 struct TransferRequest {
     // Initialization dependency - callback must be before transfer_buffers to avoid race condition
-    TransferDoneCallback callback;
+    TransferDoneCallback callback = [](hailo_status) {};
     std::vector<TransferBuffer> transfer_buffers;
     TransferRequest() = default;
     TransferRequest(TransferBuffer &&transfer_buffers_arg, const TransferDoneCallback &callback_arg):
@@ -104,16 +112,6 @@ struct TransferRequest {
         return total_transfer_size;
     }
 
-    Expected<bool> is_request_aligned() {
-        CHECK(!transfer_buffers.empty(), HAILO_INVALID_ARGUMENT, "TransferRequest is empty");
-        CHECK(TransferBufferType::MEMORYVIEW == transfer_buffers[0].type(), HAILO_INVALID_ARGUMENT,
-            "get_aligned_request is only supported in MEMORYVIEW type TransferBuffer");
-
-        const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
-        TRY(auto base_buffer, transfer_buffers[0].base_buffer());
-        return (0 == reinterpret_cast<uintptr_t>(base_buffer.data()) % dma_able_alignment);
-    }
-
     Expected<bool> is_request_end_aligned() {
         const auto dma_able_alignment = OsUtils::get_dma_able_alignment();
         TRY(auto base_buffer, transfer_buffers[0].base_buffer());
@@ -122,6 +120,11 @@ struct TransferRequest {
     }
 };
 
+// Helper function to create TransferRequest from buffer and callback
+inline TransferRequest to_request(void *buffer, size_t size, const TransferDoneCallback &callback) {
+    return TransferRequest(TransferBuffer(MemoryView(buffer, size)), callback);
+}
+
 struct InferRequest {
     // Transfer for each stream
     std::unordered_map<std::string, TransferRequest> transfers;
diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp
index 14034dd..1a83e9d 100644
--- a/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp
index 2c71c98..a647ca3 100644
--- a/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp
+++ b/hailort/libhailort/src/vdma/channel/transfer_launcher.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
index 4a5b6f4..331fa46 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
index d89dd3f..618164f 100644
--- a/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
+++ b/hailort/libhailort/src/vdma/circular_stream_buffer_pool.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp
index 260e0b0..5c44f5e 100644
--- a/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp
+++ b/hailort/libhailort/src/vdma/dma_mapped_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/driver/hailort_driver.cpp b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
index 30c2bff..c8b6d4a 100755
--- a/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -35,6 +35,8 @@ static_assert(MIN_D2H_CHANNEL_INDEX == VDMA_DEST_CHANNELS_START, "Driver and lib
 static_assert(ONGOING_TRANSFERS_SIZE == HAILO_VDMA_MAX_ONGOING_TRANSFERS, "Driver and libhailort parameters mismatch");
 static_assert(MAX_IRQ_TIMESTAMPS_SIZE == CHANNEL_IRQ_TIMESTAMPS_SIZE, "Driver and libhailort parameters mismatch");
 
+static_assert(MAX_TRANSFER_BUFFERS_IN_REQUEST == HAILO_MAX_BUFFERS_PER_SINGLE_TRANSFER, "Driver and libhailort parameters mismatch");
+
 static_assert(static_cast<int>(InterruptsDomain::NONE) == HAILO_VDMA_INTERRUPTS_DOMAIN_NONE, "Driver and libhailort parameters mismatch");
 static_assert(static_cast<int>(InterruptsDomain::HOST) == HAILO_VDMA_INTERRUPTS_DOMAIN_HOST, "Driver and libhailort parameters mismatch");
 static_assert(static_cast<int>(InterruptsDomain::DEVICE) == HAILO_VDMA_INTERRUPTS_DOMAIN_DEVICE, "Driver and libhailort parameters mismatch");
@@ -148,6 +150,8 @@ static HailoRTDriver::DeviceBoardType board_type_to_device_board_type(enum hailo
         return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO10H;
     case HAILO_BOARD_TYPE_HAILO10H_LEGACY:
         return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_HAILO10H_LEGACY;
+    case HAILO_BOARD_TYPE_MARS:
+        return HailoRTDriver::DeviceBoardType::DEVICE_BOARD_TYPE_MARS;
     default:
         LOGGER__ERROR("Invalid board type from ioctl {}", static_cast<int>(board_type));
         break;
@@ -304,9 +308,9 @@ HailoRTDriver::HailoRTDriver(const std::string &device_id, FileDescriptor &&fd,
 HailoRTDriver::~HailoRTDriver()
 {
     for (const auto &buffer_info : m_mapped_buffer) {
-        auto status = vdma_buffer_unmap_ioctl(buffer_info.handle);
+        auto status = vdma_buffer_unmap_ioctl(buffer_info.second.handle);
         if (HAILO_SUCCESS != status) {
-            LOGGER__ERROR("Failed to unmap buffer handle {} status {}", buffer_info.handle, status);
+            LOGGER__ERROR("Failed to unmap buffer handle {} status {}", buffer_info.second.handle, status);
         }
     }
 }
@@ -451,11 +455,16 @@ static Expected<IrqData> to_irq_data(const hailo_vdma_interrupts_wait_params& pa
 
         irq.channels_irq_data[i].channel_id.engine_index = engine_index;
         irq.channels_irq_data[i].channel_id.channel_index = channel_index;
-        irq.channels_irq_data[i].is_active = params.irq_data[i].is_active;
-        irq.channels_irq_data[i].transfers_completed = params.irq_data[i].transfers_completed;
-        irq.channels_irq_data[i].host_error = params.irq_data[i].host_error;
-        irq.channels_irq_data[i].device_error = params.irq_data[i].device_error;
-        irq.channels_irq_data[i].validation_success = params.irq_data[i].validation_success;
+        irq.channels_irq_data[i].validation_success = true;
+        irq.channels_irq_data[i].is_active = true;
+
+        if (params.irq_data[i].data == HAILO_VDMA_TRANSFER_DATA_CHANNEL_WITH_ERROR) {
+            irq.channels_irq_data[i].validation_success = false;
+        } else if (params.irq_data[i].data == HAILO_VDMA_TRANSFER_DATA_CHANNEL_NOT_ACTIVE) {
+            irq.channels_irq_data[i].is_active = false;
+        } else {
+            irq.channels_irq_data[i].transfers_completed = params.irq_data[i].data;
+        }
     }
     return irq;
 }
@@ -577,14 +586,14 @@ Expected<uint64_t> HailoRTDriver::write_action_list(uint8_t *data, size_t size)
     return dma_address;
 }
 
-Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map_dmabuf(int dmabuf_fd, size_t required_size, DmaDirection data_direction,
-    DmaBufferType buffer_type)
+Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map_dmabuf(int dmabuf_fd, size_t required_size,
+    DmaDirection data_direction, DmaBufferType buffer_type)
 {
     CHECK_AS_EXPECTED (DmaBufferType::DMABUF_BUFFER == buffer_type, HAILO_INVALID_ARGUMENT,
         "Error, Invalid buffer type given, buffer type {}", static_cast<int>(buffer_type));
 
-    return vdma_buffer_map(dmabuf_fd, required_size, data_direction, INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER,
-        buffer_type);
+    return vdma_buffer_map(static_cast<uintptr_t>(dmabuf_fd), required_size, data_direction,
+        INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER, buffer_type);
 }
 
 Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(uintptr_t user_address, size_t required_size,
@@ -592,23 +601,20 @@ Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(uintptr
     DmaBufferType buffer_type) {
 
     std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
-    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
-        [user_address, required_size, data_direction](const auto& mapped_buffer_info) {
-            return (mapped_buffer_info.address == user_address) &&
-                   (mapped_buffer_info.size == required_size) &&
-                   (mapped_buffer_info.direction == data_direction);
-    });
+    auto mapped_buffer_key = MappedBufferKey{user_address, data_direction, required_size};
+    auto mapped_buffer = m_mapped_buffer.find(mapped_buffer_key);
+
     if (mapped_buffer != m_mapped_buffer.end()) {
         // Buffer already mapped, increase ref count and use it.
-        assert(mapped_buffer->mapped_count > 0);
+        assert(mapped_buffer->second.mapped_count > 0);
         const bool mismatched_driver_handle = (driver_buff_handle != INVALID_MAPPED_BUFFER_DRIVER_IDENTIFIER) &&
-            (mapped_buffer->driver_buff_handle != driver_buff_handle);
+            (mapped_buffer->second.driver_buff_handle != driver_buff_handle);
         CHECK(!mismatched_driver_handle, HAILO_INVALID_ARGUMENT,
-            "Mapped buffer driver handle 0x{:x} is different than required handle 0x{:x}", mapped_buffer->driver_buff_handle,
+            "Mapped buffer driver handle 0x{:x} is different than required handle 0x{:x}", mapped_buffer->second.driver_buff_handle,
             driver_buff_handle);
 
-        mapped_buffer->mapped_count++;
-        return Expected<VdmaBufferHandle>(mapped_buffer->handle);
+        mapped_buffer->second.mapped_count++;
+        return Expected<VdmaBufferHandle>(mapped_buffer->second.handle);
     } else {
         // Buffer not mapped, map it now
         auto handle = vdma_buffer_map_ioctl(user_address, required_size, data_direction,
@@ -616,53 +622,29 @@ Expected<HailoRTDriver::VdmaBufferHandle> HailoRTDriver::vdma_buffer_map(uintptr
         CHECK_EXPECTED(handle);
 
         const auto mapping_count = 1;
-        m_mapped_buffer.emplace_back(MappedBufferInfo {
+        m_mapped_buffer[mapped_buffer_key] = MappedBufferInfo {
             handle.value(),
-            user_address,
-            data_direction,
-            required_size,
             driver_buff_handle,
             mapping_count
-        });
+        };
 
         return handle.release();
     }
 }
 
-hailo_status HailoRTDriver::vdma_buffer_unmap(VdmaBufferHandle handle) {
-    std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
-    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
-        [handle](const auto& mapped_buffer_info) {
-            return mapped_buffer_info.handle == handle;
-    });
-    CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer handle {} not found", handle);
-
-    assert(mapped_buffer->mapped_count > 0);
-    mapped_buffer->mapped_count--;
-    if (mapped_buffer->mapped_count == 0) {
-        m_mapped_buffer.erase(mapped_buffer);
-        return vdma_buffer_unmap_ioctl(handle);
-    }
-    return HAILO_SUCCESS;
-}
-
 hailo_status HailoRTDriver::vdma_buffer_unmap(uintptr_t user_address, size_t size, DmaDirection data_direction)
 {
     std::unique_lock<std::mutex> mapping_lock(m_mapped_buffer_lock);
-    auto mapped_buffer = std::find_if(m_mapped_buffer.begin(), m_mapped_buffer.end(),
-        [user_address, size, data_direction](const auto& mapped_buffer_info) {
-            return (mapped_buffer_info.address == user_address) &&
-                   (mapped_buffer_info.size == size) &&
-                   (mapped_buffer_info.direction == data_direction);
-    });
+    auto mapped_buffer_key = MappedBufferKey{user_address, data_direction, size};
+    auto mapped_buffer = m_mapped_buffer.find(mapped_buffer_key);
     CHECK(mapped_buffer != m_mapped_buffer.end(), HAILO_NOT_FOUND, "Mapped buffer {} {} not found",
         user_address, size);
 
-    assert(mapped_buffer->mapped_count > 0);
-    mapped_buffer->mapped_count--;
-    if (mapped_buffer->mapped_count == 0) {
-        const auto handle = mapped_buffer->handle;
-        m_mapped_buffer.erase(mapped_buffer);
+    assert(mapped_buffer->second.mapped_count > 0);
+    mapped_buffer->second.mapped_count--;
+    if (mapped_buffer->second.mapped_count == 0) {
+        const auto handle = mapped_buffer->second.handle;
+        m_mapped_buffer.erase(mapped_buffer_key);
         return vdma_buffer_unmap_ioctl(handle);
     }
     return HAILO_SUCCESS;
@@ -690,19 +672,21 @@ hailo_status HailoRTDriver::vdma_buffer_sync(VdmaBufferHandle handle, DmaSyncDir
 }
 
 hailo_status HailoRTDriver::descriptors_list_program(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
-    size_t buffer_size, size_t buffer_offset, uint8_t channel_index, uint32_t starting_desc, bool should_bind,
-    InterruptsDomain last_desc_interrupts)
+    size_t buffer_size, size_t buffer_offset, uint8_t channel_index, uint32_t starting_desc, uint32_t batch_size,
+    bool should_bind, InterruptsDomain last_desc_interrupts, uint32_t stride)
 {
     hailo_desc_list_program_params params{};
     params.buffer_handle = buffer_handle;
     params.buffer_size = buffer_size;
     params.buffer_offset = buffer_offset;
+    params.batch_size = batch_size;
     params.desc_handle = desc_handle;
     params.channel_index = channel_index;
     params.starting_desc = starting_desc;
 
     params.should_bind = should_bind;
     params.last_interrupts_domain = (hailo_vdma_interrupts_domain)last_desc_interrupts;
+    params.stride = stride;
 
 #ifdef NDEBUG
     params.is_debug = false;
@@ -714,7 +698,7 @@ hailo_status HailoRTDriver::descriptors_list_program(uintptr_t desc_handle, Vdma
     return HAILO_SUCCESS;
 }
 
-Expected<uint32_t> HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
+hailo_status HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
     uint32_t starting_desc, const std::vector<TransferBuffer> &transfer_buffers,
     bool should_bind, InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts)
 {
@@ -729,8 +713,9 @@ Expected<uint32_t> HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, ui
     params.starting_desc = starting_desc;
     params.buffers_count = static_cast<uint8_t>(transfer_buffers.size());
     for (size_t i = 0; i < transfer_buffers.size(); i++) {
-        params.buffers[i].mapped_buffer_handle = transfer_buffers[i].buffer_handle;
-        params.buffers[i].offset = static_cast<uint32_t>(transfer_buffers[i].offset);
+        params.buffers[i].buffer_type = transfer_buffers[i].is_dma_buf ? 
+            HAILO_DMA_DMABUF_BUFFER : HAILO_DMA_USER_PTR_BUFFER;
+        params.buffers[i].addr_or_fd = transfer_buffers[i].addr_or_fd;
         params.buffers[i].size = static_cast<uint32_t>(transfer_buffers[i].size);
     }
     params.should_bind = should_bind;
@@ -745,7 +730,7 @@ Expected<uint32_t> HailoRTDriver::launch_transfer(vdma::ChannelId channel_id, ui
 
     RUN_AND_CHECK_IOCTL_RESULT(HAILO_VDMA_LAUNCH_TRANSFER, &params, "Failed launch transfer");
 
-    return Expected<uint32_t>(params.descs_programed);
+    return HAILO_SUCCESS;
 }
 
 #if defined(__linux__)
@@ -1131,4 +1116,9 @@ bool HailoRTDriver::is_valid_channel_id(const vdma::ChannelId &channel_id)
     return (channel_id.engine_index < m_dma_engines_count) && (channel_id.channel_index < MAX_VDMA_CHANNELS_PER_ENGINE);
 }
 
+hailo_status HailoRTDriver::reset_chip()
+{
+    RUN_AND_CHECK_IOCTL_RESULT(HAILO_SOC_POWER_OFF, nullptr, "Failed poweroff");
+    return HAILO_SUCCESS;
+}
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/driver/hailort_driver.hpp b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
index 13e6284..9c86181 100755
--- a/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
+++ b/hailort/libhailort/src/vdma/driver/hailort_driver.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -41,7 +41,12 @@ constexpr size_t ONGOING_TRANSFERS_SIZE = 128;
 static_assert((0 == ((ONGOING_TRANSFERS_SIZE - 1) & ONGOING_TRANSFERS_SIZE)), "ONGOING_TRANSFERS_SIZE must be a power of 2");
 
 #define MIN_ACTIVE_TRANSFERS_SCALE (2)
-#define MAX_ACTIVE_TRANSFERS_SCALE (4)
+
+#if defined(HAILO_SUPPORT_MULTI_PROCESS) || defined(_WIN32)
+#define MAX_ACTIVE_TRANSFERS_SCALE (8)
+#else
+#define MAX_ACTIVE_TRANSFERS_SCALE (32)
+#endif
 
 #define HAILO_MAX_BATCH_SIZE ((ONGOING_TRANSFERS_SIZE / MIN_ACTIVE_TRANSFERS_SCALE) - 1)
 
@@ -50,13 +55,16 @@ static_assert((0 == ((ONGOING_TRANSFERS_SIZE - 1) & ONGOING_TRANSFERS_SIZE)), "O
 
 #define PCIE_EXPECTED_MD5_LENGTH (16)
 
-constexpr size_t VDMA_CHANNELS_PER_ENGINE = 32;
-constexpr size_t MAX_VDMA_ENGINES_COUNT = 3;
-constexpr size_t MAX_VDMA_CHANNELS_COUNT = MAX_VDMA_ENGINES_COUNT * VDMA_CHANNELS_PER_ENGINE;
-constexpr uint8_t MIN_H2D_CHANNEL_INDEX = 0;
-constexpr uint8_t MAX_H2D_CHANNEL_INDEX = 15;
-constexpr uint8_t MIN_D2H_CHANNEL_INDEX = MAX_H2D_CHANNEL_INDEX + 1;
-constexpr uint8_t MAX_D2H_CHANNEL_INDEX = 31;
+constexpr size_t VDMA_CHANNELS_PER_ENGINE           = 32;
+constexpr size_t MAX_VDMA_ENGINES_COUNT             = 3;
+constexpr size_t MAX_VDMA_CHANNELS_COUNT            = MAX_VDMA_ENGINES_COUNT * VDMA_CHANNELS_PER_ENGINE;
+constexpr uint8_t MIN_H2D_CHANNEL_INDEX             = 0;
+constexpr uint8_t MAX_H2D_CHANNEL_INDEX             = 15;
+constexpr uint8_t MIN_D2H_CHANNEL_INDEX             = MAX_H2D_CHANNEL_INDEX + 1;
+constexpr uint8_t MAX_D2H_CHANNEL_INDEX             = 31;
+constexpr uint8_t MIN_ENHANCED_D2H_CHANNEL_INDEX    = 28;
+
+constexpr size_t MAX_TRANSFER_BUFFERS_IN_REQUEST = 8;
 
 // NOTE: don't change members from this struct without updating all code using it (platform specific)
 struct ChannelInterruptTimestamp {
@@ -73,8 +81,6 @@ struct ChannelIrqData {
     vdma::ChannelId channel_id;
     bool is_active;
     uint8_t transfers_completed;
-    uint8_t host_error;
-    uint8_t device_error;
     bool validation_success;
 };
 
@@ -172,6 +178,7 @@ public:
         DEVICE_BOARD_TYPE_HAILO15L,
         DEVICE_BOARD_TYPE_HAILO10H,
         DEVICE_BOARD_TYPE_HAILO10H_LEGACY,
+        DEVICE_BOARD_TYPE_MARS,
         DEVICE_BOARD_TYPE_COUNT,
     };
 
@@ -248,6 +255,8 @@ public:
 
     hailo_status reset_nn_core();
 
+    hailo_status reset_chip();
+
     Expected<uint64_t> write_action_list(uint8_t *data, size_t size);
 
     /**
@@ -275,7 +284,6 @@ public:
     /**
     * Unmaps user buffer mapped using HailoRTDriver::map_buffer.
     */
-    hailo_status vdma_buffer_unmap(VdmaBufferHandle handle);
     hailo_status vdma_buffer_unmap(uintptr_t user_address, size_t size, DmaDirection data_direction);
 
     hailo_status vdma_buffer_sync(VdmaBufferHandle buffer, DmaSyncDirection sync_direction, size_t offset, size_t count);
@@ -301,18 +309,20 @@ public:
      */
     hailo_status descriptors_list_program(uintptr_t desc_handle, VdmaBufferHandle buffer_handle,
         size_t buffer_size, size_t buffer_offset, uint8_t channel_index,
-        uint32_t starting_desc, bool should_bind, InterruptsDomain last_desc_interrupts);
+        uint32_t starting_desc, uint32_t batch_size, bool should_bind, InterruptsDomain last_desc_interrupts,
+        uint32_t stride);
 
     struct TransferBuffer {
-        VdmaBufferHandle buffer_handle;
-        size_t offset;
+        bool is_dma_buf;
+        uintptr_t addr_or_fd;
         size_t size;
     };
 
     /**
      * Launches some transfer on the given channel.
+     * The maximum number of transfer buffers is MAX_TRANSFER_BUFFERS_IN_REQUEST.
      */
-    Expected<uint32_t> launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
+    hailo_status launch_transfer(vdma::ChannelId channel_id, uintptr_t desc_handle,
         uint32_t starting_desc, const std::vector<TransferBuffer> &transfer_buffer, bool should_bind,
         InterruptsDomain first_desc_interrupts, InterruptsDomain last_desc_interrupts);
 
@@ -347,7 +357,7 @@ public:
     hailo_status close_connection(vdma::ChannelId input_channel, vdma::ChannelId output_channel,
         PcieSessionType session_type);
 
-    const std::string &device_id() const
+    const std::string& device_id() const
     {
         return m_device_id;
     }
@@ -454,15 +464,31 @@ private:
     // TODO HRT-11937: when ioctl is combined, move caching to driver
     struct MappedBufferInfo {
         VdmaBufferHandle handle;
-        uintptr_t address;
-        DmaDirection direction;
-        size_t size;
         vdma_mapped_buffer_driver_identifier driver_buff_handle;
         size_t mapped_count;
     };
 
+    struct MappedBufferKey {
+        uintptr_t address;
+        DmaDirection direction;
+        size_t size;
+
+        bool operator==(const MappedBufferKey &other) const
+        {
+            return address == other.address && direction == other.direction && size >= other.size;
+        }
+    };
+
+    struct MappedBufferKeyHash {
+        std::size_t operator()(const MappedBufferKey &key) const {
+            return std::hash<uintptr_t>()(key.address) ^
+                   std::hash<size_t>()(key.size) ^
+                   std::hash<int>()(static_cast<int>(key.direction));
+        }
+    };
+
     std::mutex m_mapped_buffer_lock;
-    std::list<MappedBufferInfo> m_mapped_buffer;
+    std::unordered_map<MappedBufferKey, MappedBufferInfo, MappedBufferKeyHash> m_mapped_buffer;
 
 };
 
diff --git a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
index b30018a..d243884 100644
--- a/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
+++ b/hailort/libhailort/src/vdma/driver/os/driver_os_specific.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
index 51bdf56..135b70e 100644
--- a/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/posix/linux/driver_os_specific.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
index f7080de..12f0478 100644
--- a/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/posix/qnx/driver_os_specific.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
index 700bc50..07d86b1 100644
--- a/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
+++ b/hailort/libhailort/src/vdma/driver/os/windows/driver_os_specific.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
index af1a17a..c16ce68 100644
--- a/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
+++ b/hailort/libhailort/src/vdma/integrated/integrated_device.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -13,7 +13,11 @@
 #include <fstream>
 #ifdef __linux__
 #include <glob.h>
-#endif
+#ifdef GPIO_V2_GET_LINE_IOCTL
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#endif // GPIO_V2_GET_LINE_IOCTL
+#endif // __linux__
 #include <memory>
 
 namespace hailort
@@ -147,8 +151,14 @@ Expected<hailo_extended_device_information_t> IntegratedDevice::get_extended_dev
     constexpr auto STATUS_ENABLED = "okay";
     constexpr auto ETH_STATUS_FILE = "/proc/device-tree/ethernet@1b5000/status";
     constexpr auto PCI_STATUS_FILE = "/proc/device-tree/hailo_pci_ep_driver/status";
+    constexpr auto FUSE_FILE = "/sys/devices/soc0/fuse";
+    constexpr auto ULT_OFFSET_IN_FUSE = sizeof(uint32_t); // crypto_dummy field before the ULT
+    constexpr auto LOT_ID_SIZE = 8;
     constexpr auto NOT_AVAILABLE = 0;
 
+    // LOT_ID_SIZE + sizeof(wafer_info) = HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH
+    static_assert(LOT_ID_SIZE == HAILO_UNIT_LEVEL_TRACKING_BYTES_LENGTH - sizeof(uint32_t), "LOT_ID_SIZE is not as expected!");
+
     hailo_extended_device_information_t info = {};
 
     auto compare_file_content = [](const std::string &file_path, const std::string &expected_value) -> Expected<bool> {
@@ -166,7 +176,7 @@ Expected<hailo_extended_device_information_t> IntegratedDevice::get_extended_dev
     TRY(auto is_eth_supported, compare_file_content(ETH_STATUS_FILE, STATUS_ENABLED));
     TRY(auto is_pci_supported, compare_file_content(PCI_STATUS_FILE, STATUS_ENABLED));
 
-    info.boot_source = HAILO_DEVICE_BOOT_SOURCE_PCIE; // TODO: HRT-15562
+    info.boot_source = HAILO_DEVICE_BOOT_SOURCE_INVALID; // TODO: HRT-15562
     info.eth_mac_address[0] = NOT_AVAILABLE; // TODO: HRT-15562
     info.lcs = NOT_AVAILABLE; // TODO: HRT-15562
     info.neural_network_core_clock_rate = NOT_AVAILABLE; // TODO: HRT-15562
@@ -177,7 +187,30 @@ Expected<hailo_extended_device_information_t> IntegratedDevice::get_extended_dev
     info.supported_features.mdio = NOT_AVAILABLE; // TODO: HRT-15562
     info.supported_features.mipi = NOT_AVAILABLE; // TODO: HRT-15562
     info.supported_features.pcie = is_pci_supported;
-    info.unit_level_tracking_id[0] = NOT_AVAILABLE; // TODO: HRT-15562
+
+    {
+        FileReader reader(FUSE_FILE);
+        auto status = reader.open();
+        CHECK_SUCCESS(status, "Failed to open file {}", FUSE_FILE);
+
+        status = reader.seek(ULT_OFFSET_IN_FUSE);
+        CHECK_SUCCESS(status, "Failed to seek to offset {} in file {}", ULT_OFFSET_IN_FUSE, FUSE_FILE);
+
+        status = reader.read(info.unit_level_tracking_id, LOT_ID_SIZE);
+        CHECK_SUCCESS(status, "Failed to read {} bytes from file {}", LOT_ID_SIZE, FUSE_FILE);
+
+        // Reverse the bytes to get the correct order - same is done in hailo8
+        std::reverse(info.unit_level_tracking_id, info.unit_level_tracking_id + LOT_ID_SIZE);
+
+        status = reader.read(info.unit_level_tracking_id + LOT_ID_SIZE, sizeof(info.unit_level_tracking_id) - LOT_ID_SIZE);
+        CHECK_SUCCESS(status, "Failed to read {} bytes from file {}", sizeof(info.unit_level_tracking_id) - LOT_ID_SIZE, FUSE_FILE);
+    }
+
+#if defined(__linux__) && defined(GPIO_V2_GET_LINE_IOCTL)
+    if (m_device_architecture == HAILO_ARCH_HAILO10H) {
+        TRY(info.gpio_mask, GpioReader().read());
+    }
+#endif
 
     return info;
 }
@@ -208,4 +241,43 @@ Expected<bool> IntegratedDevice::has_INA231()
     return has_INA231;
 }
 
+#if defined(__linux__) && defined(GPIO_V2_GET_LINE_IOCTL)
+IntegratedDevice::GpioReader::~GpioReader()
+{
+    if (m_request_fd >= 0) {
+        (void)close(m_request_fd);
+    }
+
+    if (m_fd >= 0) {
+        (void)close(m_fd);
+    }
+}
+
+Expected<uint16_t> IntegratedDevice::GpioReader::read()
+{
+    constexpr auto GPIO_MASK_FILE = "/dev/gpiochip1";
+    m_fd = open(GPIO_MASK_FILE, O_RDONLY);
+    CHECK(m_fd >= 0, HAILO_FILE_OPERATION_FAILURE, "Failed to open {}", GPIO_MASK_FILE);
+
+    struct gpio_v2_line_request req = {};
+    req.num_lines = HAILO_GPIO_MASK_VALUES_LENGTH;
+    req.config.flags = GPIO_V2_LINE_FLAG_INPUT;
+    for (uint32_t i = 0; i < HAILO_GPIO_MASK_VALUES_LENGTH; i++) {
+        req.offsets[i] = i;
+    }
+
+    int ret = ioctl(m_fd, GPIO_V2_GET_LINE_IOCTL, &req);
+    CHECK(ret >= 0, HAILO_FILE_OPERATION_FAILURE, "Failed to get line from ioctl, errno = {}", errno);
+    m_request_fd = req.fd;
+
+    struct gpio_v2_line_values values = {};
+    values.mask = UINT16_MAX;
+
+    ret = ioctl(m_request_fd, GPIO_V2_LINE_GET_VALUES_IOCTL, &values);
+    CHECK(ret >= 0, HAILO_FILE_OPERATION_FAILURE, "Failed to get line values from ioctl");
+
+    return static_cast<uint16_t>(values.bits);
+}
+#endif // __linux__
+
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
index a04cd69..edcd39e 100644
--- a/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
+++ b/hailort/libhailort/src/vdma/integrated/integrated_device.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -18,6 +18,10 @@
 
 #include <memory>
 
+#ifdef __linux__
+#include <linux/gpio.h>
+#endif
+
 
 namespace hailort
 
@@ -57,6 +61,18 @@ protected:
 private:
     IntegratedDevice(std::unique_ptr<HailoRTDriver> &&driver, hailo_status &status);
     std::shared_ptr<SocPowerMeasurement> m_power_measurement_data;
+
+#if defined(__linux__) && defined(GPIO_V2_GET_LINE_IOCTL)
+    class GpioReader final {
+    public:
+        GpioReader() : m_fd(-1), m_request_fd(-1) {}
+        Expected<uint16_t> read();
+        ~GpioReader();
+    private:
+        int m_fd;
+        int m_request_fd;
+    };
+#endif
 };
 
 
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
index d4f8a38..7501f94 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,15 +44,15 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
         const auto DONT_FORCE_BATCH_SIZE = false;
         static const bool IS_CIRCULAR = true;
         static const bool IS_VDMA_ALIGNED_BUFFER = false;
+        static const bool IS_NOT_DDR = false;
         auto buffer_sizes_requirements_exp = vdma::BufferSizesRequirements::get_buffer_requirements_single_transfer(
             vdma::VdmaBuffer::Type::SCATTER_GATHER, static_cast<uint16_t>(max_page_size), min_active_trans,
             max_active_trans, transfer_size, IS_CIRCULAR, DONT_FORCE_DEFAULT_PAGE_SIZE, DONT_FORCE_BATCH_SIZE,
-            IS_VDMA_ALIGNED_BUFFER);
+            IS_VDMA_ALIGNED_BUFFER, IS_NOT_DDR);
         if (HAILO_SUCCESS == buffer_sizes_requirements_exp.status()) {
             // We found a valid page size
             const auto desc_page_size = buffer_sizes_requirements_exp->desc_page_size();
-            const auto descs_count = (is_env_variable_on(HAILO_CONFIGURE_FOR_HW_INFER_ENV_VAR)) ?
-                MAX_SG_DESCS_COUNT : buffer_sizes_requirements_exp->descs_count();
+            const auto descs_count = buffer_sizes_requirements_exp->descs_count();
             return BufferSizesRequirements(descs_count, desc_page_size);
         } else if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == buffer_sizes_requirements_exp.status()) {
             // If we can't meet the requirements, try to double the page size and try again
@@ -67,7 +67,7 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
 Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_multiple_transfers(
     vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t batch_size,
     const std::vector<uint32_t> &transfer_sizes, bool is_circular, bool force_default_page_size,
-    bool force_batch_size)
+    bool force_batch_size, bool is_ddr)
 {
     const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
         MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT;
@@ -94,14 +94,14 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
     CHECK_AS_EXPECTED(initial_desc_page_size >= MIN_PAGE_SIZE, HAILO_INTERNAL_FAILURE,
         "Initial descriptor page size ({}) is smaller than minimum descriptor page size ({})",
         initial_desc_page_size, MIN_PAGE_SIZE);
-    if (get_required_descriptor_count(transfer_sizes, max_desc_page_size) > MAX_DESCS_COUNT) {
+    if (get_required_descriptor_count(transfer_sizes, max_desc_page_size, is_ddr) > MAX_DESCS_COUNT) {
         return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
     }
 
     // Defined as uint32_t to prevent overflow (as we multiply it by two in each iteration of the while loop bellow)
     auto local_desc_page_size = static_cast<uint32_t>(initial_desc_page_size);
 
-    auto descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size);
+    auto descs_count = get_required_descriptor_count(transfer_sizes, initial_desc_page_size, is_ddr);
     // Too many descriptors; try a larger desc_page_size which will lead to less descriptors used
     while ((descs_count * batch_size) > (MAX_DESCS_COUNT - 1)) {
         CHECK_AS_EXPECTED(IS_FIT_IN_UINT16(local_desc_page_size << 1), HAILO_INTERNAL_FAILURE,
@@ -115,12 +115,14 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
                 // If not forcing minimum batch (It's acceptable to run infer on lower batch instead of returning error)
                 // once reached over the max page size, stop
                 local_desc_page_size = max_desc_page_size;
-                descs_count = get_required_descriptor_count(transfer_sizes, static_cast<uint16_t>(local_desc_page_size));
+                descs_count = get_required_descriptor_count(transfer_sizes, static_cast<uint16_t>(local_desc_page_size),
+                    is_ddr);
                 break;
             }
         }
 
-        descs_count = get_required_descriptor_count(transfer_sizes, static_cast<uint16_t>(local_desc_page_size));
+        descs_count = get_required_descriptor_count(transfer_sizes, static_cast<uint16_t>(local_desc_page_size),
+            is_ddr);
     }
 
     // Found desc_page_size and descs_count
@@ -141,7 +143,8 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
 
 Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requirements_single_transfer(
     vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size, uint16_t min_batch_size, uint16_t max_batch_size,
-    uint32_t transfer_size, bool is_circular, bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer)
+    uint32_t transfer_size, bool is_circular, bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer,
+    bool is_ddr)
 {
     const uint32_t MAX_DESCS_COUNT = (buffer_type == vdma::VdmaBuffer::Type::SCATTER_GATHER) ?
         MAX_SG_DESCS_COUNT : MAX_CCB_DESCS_COUNT;
@@ -150,7 +153,7 @@ Expected<BufferSizesRequirements> BufferSizesRequirements::get_buffer_requiremen
 
     // First, get the result for the min size
     auto results = get_buffer_requirements_multiple_transfers(buffer_type, max_desc_page_size,
-        min_batch_size, {transfer_size}, is_circular, force_default_page_size, force_batch_size);
+        min_batch_size, {transfer_size}, is_circular, force_default_page_size, force_batch_size, is_ddr);
     if (HAILO_CANT_MEET_BUFFER_REQUIREMENTS == results.status()) {
         // In case of failure to meet requirements, return without error printed to the prompt.
         return make_unexpected(HAILO_CANT_MEET_BUFFER_REQUIREMENTS);
@@ -199,14 +202,17 @@ uint16_t BufferSizesRequirements::find_initial_desc_page_size(
 }
 
 uint32_t BufferSizesRequirements::get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes,
-    uint16_t desc_page_size)
+    uint16_t desc_page_size, bool is_ddr_layer)
 {
     uint32_t desc_count = 0;
     for (auto &transfer_size : transfer_sizes) {
         desc_count += DIV_ROUND_UP(transfer_size, desc_page_size);
     }
-    // One extra descriptor is needed, because the amount of available descriptors is (desc_count - 1)
-    return desc_count + 1;
+    // In case of is_circular don't add extra descriptor - because amount of descs needs to be power of 2 and then
+    // will round up to next power of 2 (and we anyways make sure we are smaller than MAX_DESCS which is 64K - 1)
+    // Otherwise - one extra descriptor is "required", because the amount of available descriptors is (desc_count - 1)
+    // TODO HRT-15875: check if we need this other descriptor in non circular cases
+    return is_ddr_layer ? desc_count : (desc_count + 1);
 }
 
 } /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
index 4319db9..f484357 100644
--- a/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
+++ b/hailort/libhailort/src/vdma/memory/buffer_requirements.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -42,20 +42,21 @@ public:
     static Expected<BufferSizesRequirements> get_buffer_requirements_multiple_transfers(
         vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size,
         uint16_t batch_size, const std::vector<uint32_t> &transfer_sizes, bool is_circular,
-        bool force_default_page_size, bool force_batch_size);
+        bool force_default_page_size, bool force_batch_size, bool is_ddr);
 
     static Expected<BufferSizesRequirements> get_buffer_requirements_single_transfer(
         vdma::VdmaBuffer::Type buffer_type, uint16_t max_desc_page_size,
         uint16_t min_batch_size, uint16_t max_batch_size, uint32_t transfer_size, bool is_circular,
-        bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer);
+        bool force_default_page_size, bool force_batch_size, bool is_vdma_aligned_buffer, bool is_ddr);
 
 private:
     static uint16_t find_initial_desc_page_size(vdma::VdmaBuffer::Type buffer_type, const std::vector<uint32_t> &transfer_sizes,
         uint16_t max_desc_page_size, bool force_default_page_size, uint16_t min_page_size);
-    static uint32_t get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes, uint16_t desc_page_size);
+    static uint32_t get_required_descriptor_count(const std::vector<uint32_t> &transfer_sizes, uint16_t desc_page_size,
+        bool is_ddr_layer);
 
-    const uint32_t m_descs_count;
-    const uint16_t m_desc_page_size;
+    uint32_t m_descs_count;
+    uint16_t m_desc_page_size;
 };
 
 } /* namespace vdma */
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
index 8e21b6d..d5ccc86 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
index 1defeac..8157636 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp
index 4ee7f94..73f6d6f 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -50,12 +50,14 @@ uint32_t ContinuousEdgeLayer::descs_count() const
 }
 
 Expected<uint32_t> ContinuousEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-    size_t desc_offset, size_t buffer_offset, bool should_bind)
+    size_t desc_offset, size_t buffer_offset, uint32_t batch_size, bool should_bind, uint32_t stride)
 {
     (void)last_desc_interrupts_domain;
     (void)desc_offset;
     (void)buffer_offset;
+    (void)batch_size;
     (void)should_bind;
+    (void)stride;
 
     // The descriptors in continuous mode are programmed by the hw, nothing to do here.
     return descriptors_in_buffer(transfer_size);
diff --git a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
index ccc761d..44b1d07 100644
--- a/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
+++ b/hailort/libhailort/src/vdma/memory/continuous_edge_layer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -40,7 +40,7 @@ public:
     virtual uint32_t descs_count() const override;
 
     virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset, size_t buffer_offset = 0, bool should_bind = false) override;
+        size_t desc_offset, size_t buffer_offset = 0, uint32_t batch_size = 1, bool should_bind = false, uint32_t stride = 0) override;
 
 private:
     ContinuousEdgeLayer(std::shared_ptr<ContinuousBuffer> &&buffer, size_t size, size_t offset,
diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
index 41cb467..7ee47de 100644
--- a/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
+++ b/hailort/libhailort/src/vdma/memory/descriptor_list.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -81,8 +81,8 @@ DescriptorList::DescriptorList(DescriptorList &&other) noexcept :
 }
 
 hailo_status DescriptorList::program(MappedBuffer& buffer, size_t buffer_size,
-    size_t buffer_offset, ChannelId channel_id, uint32_t starting_desc, bool should_bind /* = true */,
-    InterruptsDomain last_desc_interrupts /* = InterruptsDomain::NONE */)
+    size_t buffer_offset, ChannelId channel_id, uint32_t starting_desc, uint32_t batch_size /* = 1 */,
+    bool should_bind /* = true */, InterruptsDomain last_desc_interrupts /* = InterruptsDomain::NONE */, uint32_t stride /* = 0 */)
 {
     const auto desc_list_capacity = m_desc_page_size * count();
     CHECK(buffer_size <= desc_list_capacity, HAILO_INVALID_ARGUMENT,
@@ -90,7 +90,7 @@ hailo_status DescriptorList::program(MappedBuffer& buffer, size_t buffer_size,
         buffer_size, desc_list_capacity);
 
     return m_driver.descriptors_list_program(m_desc_list_info.handle, buffer.handle(), buffer_size,
-        buffer_offset, channel_id.channel_index, starting_desc, should_bind, last_desc_interrupts);
+        buffer_offset, channel_id.channel_index, starting_desc, batch_size, should_bind, last_desc_interrupts, stride);
 }
 
 uint32_t DescriptorList::descriptors_in_buffer(size_t buffer_size) const
@@ -115,5 +115,17 @@ uint32_t DescriptorList::calculate_descriptors_count(uint32_t buffer_size, uint1
     return get_nearest_powerof_2(descs_count, MIN_SG_DESCS_COUNT);
 }
 
+size_t DescriptorList::descriptors_buffer_allocation_size(uint32_t desc_count)
+{
+    // based on hailo_desc_list_create from linux driver
+    auto ALIGN = [](size_t size, size_t alignment) {
+        const auto mask = alignment - 1;
+        return (size + mask) & ~mask;
+    };
+
+    const auto total_size = vdma::SINGLE_DESCRIPTOR_SIZE * desc_count;
+    return ALIGN(total_size, vdma::DESCRIPTOR_LIST_ALIGN);
+}
+
 } /* namespace vdma */
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
index c967d8a..9d297e8 100644
--- a/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
+++ b/hailort/libhailort/src/vdma/memory/descriptor_list.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -51,6 +51,9 @@ static_assert(MAX_SG_PAGE_SIZE > 0, "MAX_SG_PAGE_SIZE must be larger then 0");
 static_assert(is_powerof2(DEFAULT_SG_PAGE_SIZE), "DEFAULT_SG_PAGE_SIZE must be a power of 2");
 static_assert(DEFAULT_SG_PAGE_SIZE > 0, "DEFAULT_SG_PAGE_SIZE must be larger then 0");
 
+static constexpr size_t SINGLE_DESCRIPTOR_SIZE = 0x10;
+static constexpr size_t DESCRIPTOR_LIST_ALIGN = 1 << 16;
+
 
 class DescriptorList
 {
@@ -95,13 +98,21 @@ public:
     // Map descriptors starting at offset to the start of buffer, wrapping around the descriptor list as needed
     // On hailo8, we allow configuring buffer without specific channel index (default is INVALID_VDMA_CHANNEL_INDEX).
     hailo_status program(MappedBuffer& buffer, size_t buffer_size, size_t buffer_offset,
-        ChannelId channel_id, uint32_t starting_desc = 0, bool should_bind = true,
-        InterruptsDomain last_desc_interrupts = InterruptsDomain::NONE);
+        ChannelId channel_id, uint32_t starting_desc = 0,
+        uint32_t batch_size = 1,
+        bool should_bind = true,
+        InterruptsDomain last_desc_interrupts = InterruptsDomain::NONE,
+        uint32_t stride = 0);
 
     uint32_t descriptors_in_buffer(size_t buffer_size) const;
     static uint32_t descriptors_in_buffer(size_t buffer_size, uint16_t desc_page_size);
     static uint32_t calculate_descriptors_count(uint32_t buffer_size, uint16_t batch_size, uint16_t desc_page_size);
 
+    /**
+     * Returns the size of the buffer needed to allocate the descriptors list.
+     */
+    static size_t descriptors_buffer_allocation_size(uint32_t desc_count);
+
 private:
     DescriptorList(uint32_t desc_count, uint16_t desc_page_size, bool is_circular, HailoRTDriver &driver,
         hailo_status &status);
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
index f13bb53..e65a92f 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
index 1827b05..45b269b 100644
--- a/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/dma_able_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
index 1c31dfd..45d54d9 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -60,7 +60,8 @@ MappedBuffer::MappedBuffer(HailoRTDriver &driver, DmaAbleBufferPtr buffer, Hailo
 MappedBuffer::~MappedBuffer()
 {
     if (HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE != m_mapping_handle) {
-        m_driver.vdma_buffer_unmap(m_mapping_handle);
+        auto address = INVALID_FD != m_fd ? static_cast<uintptr_t>(m_fd) : reinterpret_cast<uintptr_t>(user_address());
+        m_driver.vdma_buffer_unmap(address, size(), m_data_direction);
         m_mapping_handle = HailoRTDriver::INVALID_DRIVER_VDMA_MAPPING_HANDLE_VALUE;
     }
 }
@@ -75,6 +76,7 @@ MappedBuffer::MappedBuffer(MappedBuffer &&other) noexcept :
 
 void* MappedBuffer::user_address()
 {
+    assert (m_buffer); // On dmabuf, m_buffer does not exist
     return m_buffer->user_address();
 }
 
@@ -83,6 +85,11 @@ size_t MappedBuffer::size() const
     return m_size;
 }
 
+HailoRTDriver::DmaDirection MappedBuffer::direction() const
+{
+    return m_data_direction;
+}
+
 Expected<int> MappedBuffer::fd()
 {
     CHECK(INVALID_FD != m_fd, HAILO_INTERNAL_FAILURE, "fd is only supported for DMABUF type MappedBuffer");
diff --git a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
index 00ea958..0bbedae 100644
--- a/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/mapped_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -61,6 +61,7 @@ public:
 
     size_t size() const;
     void *user_address();
+    HailoRTDriver::DmaDirection direction() const;
     HailoRTDriver::VdmaBufferHandle handle();
     hailo_status synchronize(HailoRTDriver::DmaSyncDirection sync_direction);
     // TODO: validate that offset is cache aligned (HRT-9811)
diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
index 7cb3ffb..9b83325 100644
--- a/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/sg_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
index 30a4b6c..e51efa4 100644
--- a/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/sg_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp
index 2eac464..ddbd2a8 100644
--- a/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp
+++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -62,11 +62,12 @@ uint32_t SgEdgeLayer::descs_count() const
 }
 
 Expected<uint32_t> SgEdgeLayer::program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-    size_t desc_offset, size_t buffer_offset, bool should_bind)
+    size_t desc_offset, size_t buffer_offset, uint32_t batch_size, bool should_bind, uint32_t stride)
 {
     CHECK_SUCCESS(m_desc_list.program(*get_mapped_buffer(), transfer_size, buffer_offset, m_channel_id,
-        static_cast<uint32_t>(desc_offset), should_bind, last_desc_interrupts_domain));
-    return descriptors_in_buffer(transfer_size);
+        static_cast<uint32_t>(desc_offset), batch_size, should_bind,
+        last_desc_interrupts_domain, stride));
+    return descriptors_in_buffer(transfer_size) * batch_size;
 }
 
 }
diff --git a/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp
index 54bb768..324a6eb 100644
--- a/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp
+++ b/hailort/libhailort/src/vdma/memory/sg_edge_layer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -47,7 +47,7 @@ public:
     virtual uint32_t descs_count() const override;
 
     virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset, size_t buffer_offset = 0, bool should_bind = false) override;
+        size_t desc_offset, size_t buffer_offset = 0, uint32_t batch_size = 1, bool should_bind = false, uint32_t stride = 0) override;
 
 private:
     SgEdgeLayer(std::shared_ptr<SgBuffer> &&buffer, DescriptorList &&desc_list,
diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
index 0a40f16..2858710 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
index 25a95a6..5631dd1 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_buffer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp
index 3b953e1..ca66a35 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -9,10 +9,46 @@
 
 #include "vdma_edge_layer.hpp"
 #include "control_protocol.h"
+#include "sg_edge_layer.hpp"
+#include "continuous_edge_layer.hpp"
 
 namespace hailort {
 namespace vdma {
 
+
+Expected<std::unique_ptr<VdmaEdgeLayer>> VdmaEdgeLayer::create(HailoRTDriver &driver,
+    std::shared_ptr<vdma::VdmaBuffer> backing_buffer, size_t buffer_offset, size_t size,
+    uint16_t desc_page_size, uint32_t total_desc_count, bool is_circular, ChannelId channel_id)
+{
+    switch (backing_buffer->type()) {
+    case Type::SCATTER_GATHER:
+    {
+        auto sg_buffer = std::static_pointer_cast<SgBuffer>(backing_buffer);
+        TRY(auto sg_edge_layer, SgEdgeLayer::create(std::move(sg_buffer), size, buffer_offset, driver,
+            total_desc_count, desc_page_size, is_circular, channel_id));
+
+        auto edge_layer_ptr = make_unique_nothrow<SgEdgeLayer>(std::move(sg_edge_layer));
+        CHECK_NOT_NULL(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        return std::unique_ptr<VdmaEdgeLayer>(std::move(edge_layer_ptr));
+    }
+    case Type::CONTINUOUS:
+    {
+        auto continuous_buffer = std::static_pointer_cast<ContinuousBuffer>(backing_buffer);
+        TRY(auto continuous_edge_layer, ContinuousEdgeLayer::create(std::move(continuous_buffer), size, buffer_offset,
+            desc_page_size, total_desc_count));
+        auto edge_layer_ptr = make_unique_nothrow<ContinuousEdgeLayer>(std::move(continuous_edge_layer));
+        CHECK_NOT_NULL(edge_layer_ptr, HAILO_OUT_OF_HOST_MEMORY);
+
+        return std::unique_ptr<VdmaEdgeLayer>(std::move(edge_layer_ptr));
+    }
+    }
+
+    LOGGER__ERROR("Unsupported buffer type: {}", static_cast<int>(backing_buffer->type()));
+    return make_unexpected(HAILO_INTERNAL_FAILURE);
+}
+
+
 VdmaEdgeLayer::VdmaEdgeLayer(std::shared_ptr<VdmaBuffer> &&buffer, const size_t size, const size_t offset) :
     m_buffer(std::move(buffer)),
     m_size(size),
@@ -29,7 +65,7 @@ CONTROL_PROTOCOL__host_buffer_info_t VdmaEdgeLayer::get_host_buffer_info(Type ty
 {
     CONTROL_PROTOCOL__host_buffer_info_t buffer_info{};
     buffer_info.buffer_type = static_cast<uint8_t>((type == vdma::VdmaEdgeLayer::Type::SCATTER_GATHER) ?
-        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC : 
+        CONTROL_PROTOCOL__HOST_BUFFER_TYPE_EXTERNAL_DESC :
         CONTROL_PROTOCOL__HOST_BUFFER_TYPE_CCB);
     buffer_info.dma_address = dma_address;
     buffer_info.desc_page_size = desc_page_size;
diff --git a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp
index 4ab3058..9d8a574 100644
--- a/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp
+++ b/hailort/libhailort/src/vdma/memory/vdma_edge_layer.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -22,10 +22,11 @@ namespace vdma {
 class VdmaEdgeLayer {
 public:
 
-    enum class Type {
-        SCATTER_GATHER,
-        CONTINUOUS
-    };
+    using Type = VdmaBuffer::Type;
+
+    static Expected<std::unique_ptr<VdmaEdgeLayer>> create(HailoRTDriver &driver,
+        std::shared_ptr<vdma::VdmaBuffer> backing_buffer, size_t buffer_offset, size_t size,
+        uint16_t desc_page_size, uint32_t total_desc_count, bool is_circular, ChannelId channel_id);
 
     virtual ~VdmaEdgeLayer() = default;
 
@@ -60,7 +61,7 @@ public:
     hailo_status write(const void *buf_src, size_t count, size_t offset);
 
     virtual Expected<uint32_t> program_descriptors(size_t transfer_size, InterruptsDomain last_desc_interrupts_domain,
-        size_t desc_offset, size_t buffer_offset = 0, bool should_bind = false) = 0;
+        size_t desc_offset, size_t buffer_offset = 0, uint32_t batch_size = 1, bool should_bind = false, uint32_t stride = 0) = 0;
 
     CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(uint32_t transfer_size);
     static CONTROL_PROTOCOL__host_buffer_info_t get_host_buffer_info(Type type, uint64_t dma_address,
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
index 961e0e0..1ab9234 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
index 03119c6..03c96f1 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
index 0ca92e0..b628678 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -17,28 +17,56 @@
 namespace hailort
 {
 
-Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(const std::string &device_id)
+Expected<std::shared_ptr<Client>> PcieDeviceHrpcClient::get_connected_client(const std::string &device_id)
 {
     auto client = make_shared_nothrow<Client>(device_id);
-    CHECK_NOT_NULL(client, HAILO_INTERNAL_FAILURE);
+    CHECK_NOT_NULL(client, HAILO_OUT_OF_HOST_MEMORY);
 
     auto status = client->connect();
-    CHECK_SUCCESS_AS_EXPECTED(status, "Failed to connect to server");
+    CHECK_SUCCESS(status, "Failed to connect to server");
 
-    return PcieDeviceHrpcClient::create(device_id, client);
+    client->register_custom_reply(HailoRpcActionID::CALLBACK_CALLED,
+    [callback_dispatcher_manager = client->callback_dispatcher_manager()] (const MemoryView &serialized_reply, RpcConnection connection) -> hailo_status {
+        TRY(auto rpc_callback, CallbackCalledSerializer::deserialize_reply(serialized_reply));
+        auto status = callback_dispatcher_manager->at(rpc_callback.dispatcher_id)->trigger_callback(rpc_callback, connection);
+        CHECK_SUCCESS(status);
+
+        return HAILO_SUCCESS;
+    });
+
+    return client;
+}
+
+Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(const std::string &device_id)
+{
+    auto client = get_connected_client(device_id);
+    return PcieDeviceHrpcClient::create(device_id, client ? client.release() : nullptr);
+}
+
+Expected<rpc_object_handle_t> PcieDeviceHrpcClient::create_remote_device(std::shared_ptr<Client> client)
+{
+    TRY(auto request_buffer, client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, CreateDeviceSerializer::serialize_request(MemoryView(*request_buffer)));
+    TRY(auto result, client->execute_request(HailoRpcActionID::DEVICE__CREATE, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, CreateDeviceSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
+    auto status = std::get<0>(tuple);
+    CHECK_SUCCESS(status);
+
+    auto device_handle = std::get<1>(tuple);
+    return device_handle;
 }
 
 Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(const std::string &device_id,
     std::shared_ptr<Client> client)
 {
-    TRY(auto request, CreateDeviceSerializer::serialize_request());
-    TRY(auto result, client->execute_request(HailoRpcActionID::DEVICE__CREATE, MemoryView(request)));
-    TRY(auto tuple, CreateDeviceSerializer::deserialize_reply(MemoryView(result)));
-    auto status = std::get<0>(tuple);
-    CHECK_SUCCESS_AS_EXPECTED(status);
+    auto device_handle = INVALID_HANDLE_ID;
+    std::shared_ptr<ClientCallbackDispatcher> callback_dispatcher = nullptr;
+    if (client) {
+        TRY(device_handle, create_remote_device(client), "Failed to create device");
+        TRY(callback_dispatcher, client->callback_dispatcher_manager()->new_dispatcher(RpcCallbackType::DEVICE_NOTIFICATION, false));
+    }
 
-    auto device_handle = std::get<1>(tuple);
-    auto device = make_unique_nothrow<PcieDeviceHrpcClient>(device_id, client, device_handle);
+    auto device = make_unique_nothrow<PcieDeviceHrpcClient>(device_id, client, device_handle, callback_dispatcher);
     CHECK_NOT_NULL(device, HAILO_OUT_OF_HOST_MEMORY);
 
     return std::unique_ptr<PcieDeviceHrpcClient>(std::move(device));
@@ -46,32 +74,47 @@ Expected<std::unique_ptr<PcieDeviceHrpcClient>> PcieDeviceHrpcClient::create(con
 
 PcieDeviceHrpcClient::~PcieDeviceHrpcClient()
 {
-    if (INVALID_HANDLE_ID == m_handle) {
+    if ((INVALID_HANDLE_ID == m_handle) || !m_client) {
         return;
     }
 
-    auto request = DestroyDeviceSerializer::serialize_request(m_handle);
-    if (!request) {
+    auto request_buffer = m_client->allocate_request_buffer();
+    if (!request_buffer) {
+        LOGGER__CRITICAL("Failed to create buffer for Device_release request");
+        return;
+    }
+
+    auto request_size = DestroyDeviceSerializer::serialize_request(m_handle, MemoryView(**request_buffer));
+    if (!request_size) {
         LOGGER__CRITICAL("Failed to serialize Device_release request");
         return;
     }
 
-    auto result = m_client->execute_request(HailoRpcActionID::DEVICE__DESTROY, MemoryView(*request));
-    if (!result) {
-        LOGGER__CRITICAL("Failed to destroy Device! status = {}", result.status());
+    auto result_expected = m_client->execute_request(HailoRpcActionID::DEVICE__DESTROY, MemoryView(request_buffer.value()->data(), *request_size));
+    if (!result_expected) {
+        LOGGER__CRITICAL("Failed to destroy Device! status = {}", result_expected.status());
         return;
     }
+    auto result = result_expected.release();
 
-    if (HAILO_SUCCESS != DestroyDeviceSerializer::deserialize_reply(MemoryView(*result))) {
-        LOGGER__CRITICAL("Failed to destroy Device! status = {}", result.status());
+    auto status = DestroyDeviceSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to destroy Device! status = {}", status);
+    }
+
+    status = m_client->callback_dispatcher_manager()->remove_dispatcher(m_callback_dispatcher->id());
+    if (HAILO_SUCCESS != status) {
+        LOGGER__CRITICAL("Failed to remove callback dispatcher! status = {}", status);
     }
 }
 
 Expected<hailo_device_identity_t> PcieDeviceHrpcClient::identify()
 {
-    TRY(auto request, IdentifyDeviceSerializer::serialize_request(m_handle));
-    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__IDENTIFY, MemoryView(request)));
-    TRY(auto tuple, IdentifyDeviceSerializer::deserialize_reply(MemoryView(result)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, IdentifyDeviceSerializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__IDENTIFY, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, IdentifyDeviceSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto identity = std::get<1>(tuple);
@@ -81,9 +124,11 @@ Expected<hailo_device_identity_t> PcieDeviceHrpcClient::identify()
 
 Expected<hailo_extended_device_information_t> PcieDeviceHrpcClient::get_extended_device_information()
 {
-    TRY(auto request, ExtendedDeviceInfoSerializer::serialize_request(m_handle));
-    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__EXTENDED_INFO, MemoryView(request)));
-    TRY(auto tuple, ExtendedDeviceInfoSerializer::deserialize_reply(MemoryView(result)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, ExtendedDeviceInfoSerializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__EXTENDED_INFO, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, ExtendedDeviceInfoSerializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto extended_info = std::get<1>(tuple);
@@ -96,9 +141,43 @@ Expected<hailo_chip_temperature_info_t> PcieDeviceHrpcClient::get_chip_temperatu
     using Serializer = GetChipTemperatureSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__GET_CHIP_TEMPERATURE;
 
-    TRY(auto request, Serializer::serialize_request(m_handle));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
-    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
+
+    CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
+    auto info = std::get<1>(tuple);
+
+    return info;
+}
+
+Expected<hailo_health_stats_t> PcieDeviceHrpcClient::query_health_stats()
+{
+    using Serializer = QueryHealthStatsSerializer;
+    constexpr auto ActionID = HailoRpcActionID::DEVICE__QUERY_HEALTH_STATS;
+
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
+
+    CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
+    auto info = std::get<1>(tuple);
+
+    return info;
+}
+
+Expected<hailo_performance_stats_t> PcieDeviceHrpcClient::query_performance_stats()
+{
+    using Serializer = QueryPerformanceStatsSerializer;
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__QUERY_PERFORMANCE_STATS, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto info = std::get<1>(tuple);
@@ -113,9 +192,11 @@ Expected<float32_t> PcieDeviceHrpcClient::power_measurement(
     using Serializer = PowerMeasurementSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__POWER_MEASUREMENT;
 
-    TRY(auto request, Serializer::serialize_request(m_handle, dvm, measurement_type));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
-    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, dvm, measurement_type, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto power = std::get<1>(tuple);
@@ -130,9 +211,11 @@ hailo_status PcieDeviceHrpcClient::start_power_measurement(
     using Serializer = StartPowerMeasurementSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__START_POWER_MEASUREMENT;
 
-    TRY(auto request, Serializer::serialize_request(m_handle, averaging_factor, sampling_period));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
-    return Serializer::deserialize_reply(MemoryView(result));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, averaging_factor, sampling_period, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    return Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
 }
 
 Expected<hailo_power_measurement_data_t> PcieDeviceHrpcClient::get_power_measurement(
@@ -144,9 +227,11 @@ Expected<hailo_power_measurement_data_t> PcieDeviceHrpcClient::get_power_measure
     using Serializer = GetPowerMeasurementSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__GET_POWER_MEASUREMENT;
 
-    TRY(auto request, Serializer::serialize_request(m_handle, should_clear));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
-    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, should_clear, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
 
     CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
     auto data = std::get<1>(tuple);
@@ -163,10 +248,12 @@ hailo_status PcieDeviceHrpcClient::set_power_measurement(
     using Serializer = SetPowerMeasurementSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__SET_POWER_MEASUREMENT;
 
-    TRY(auto request, Serializer::serialize_request(m_handle, dvm, measurement_type));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, dvm, measurement_type, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
 
-    return Serializer::deserialize_reply(MemoryView(result));
+    return Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
 }
 
 hailo_status PcieDeviceHrpcClient::stop_power_measurement()
@@ -174,14 +261,33 @@ hailo_status PcieDeviceHrpcClient::stop_power_measurement()
     using Serializer = StopPowerMeasurementSerializer;
     constexpr auto ActionID = HailoRpcActionID::DEVICE__STOP_POWER_MEASUREMENT;
 
-    TRY(auto request, Serializer::serialize_request(m_handle));
-    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request)));
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
 
-    return Serializer::deserialize_reply(MemoryView(result));
+    return Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
+}
+
+Expected<hailo_device_architecture_t> PcieDeviceHrpcClient::get_architecture() const
+{
+    using Serializer = GetArchitectureSerializer;
+    constexpr auto ActionID = HailoRpcActionID::DEVICE__GET_ARCHITECTURE;
+
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    TRY(auto request_buffer, m_client->allocate_request_buffer(), "Failed to allocate request buffer");
+    TRY(auto request_size, Serializer::serialize_request(m_handle, MemoryView(*request_buffer)));
+    TRY(auto result, m_client->execute_request(ActionID, MemoryView(request_buffer->data(), request_size)));
+    TRY(auto tuple, Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size)));
+
+    CHECK_SUCCESS_AS_EXPECTED(std::get<0>(tuple));
+    auto device_arch = std::get<1>(tuple);
+    return device_arch;
 }
 
 hailo_status PcieDeviceHrpcClient::dma_map(void *address, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
     auto driver = m_client->get_driver();
     if (nullptr == driver) {
         return HAILO_SUCCESS;
@@ -191,6 +297,7 @@ hailo_status PcieDeviceHrpcClient::dma_map(void *address, size_t size, hailo_dma
 
 hailo_status PcieDeviceHrpcClient::dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
     auto driver = m_client->get_driver();
     if (nullptr == driver) {
         return HAILO_SUCCESS;
@@ -200,6 +307,7 @@ hailo_status PcieDeviceHrpcClient::dma_unmap(void *address, size_t size, hailo_d
 
 hailo_status PcieDeviceHrpcClient::dma_map_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
     auto driver = m_client->get_driver();
     if (nullptr == driver) {
         return HAILO_SUCCESS;
@@ -209,6 +317,7 @@ hailo_status PcieDeviceHrpcClient::dma_map_dmabuf(int dmabuf_fd, size_t size, ha
 
 hailo_status PcieDeviceHrpcClient::dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t data_direction)
 {
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
     auto driver = m_client->get_driver();
     if (nullptr == driver) {
         return HAILO_SUCCESS;
@@ -216,4 +325,81 @@ hailo_status PcieDeviceHrpcClient::dma_unmap_dmabuf(int dmabuf_fd, size_t size,
     return VdmaDevice::dma_unmap_dmabuf_impl(*driver.get(), dmabuf_fd, size, data_direction);
 }
 
-} /* namespace hailort */
+hailo_status PcieDeviceHrpcClient::reset(hailo_reset_device_mode_t mode)
+{
+    CHECK_NOT_NULL(m_client, HAILO_INVALID_OPERATION);
+    auto driver = m_client->get_driver();
+    CHECK_NOT_NULL(driver, HAILO_NOT_IMPLEMENTED);
+
+    if (mode != HAILO_RESET_DEVICE_MODE_CHIP) {
+        return HAILO_NOT_IMPLEMENTED;
+    }
+
+    // Disconnect client before reset
+    m_client = nullptr;
+    return driver->reset_chip();
+}
+
+hailo_status PcieDeviceHrpcClient::set_notification_callback(const NotificationCallback &func, hailo_notification_id_t notification_id,
+    void *opaque)
+{
+    switch (notification_id) {
+    case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_TEMPERATURE_ALARM:
+    case HAILO_NOTIFICATION_ID_HEALTH_MONITOR_OVERCURRENT_ALARM:
+        break;
+    default:
+        LOGGER__ERROR("Unsupported notification id = {}", static_cast<uint32_t>(notification_id));
+        return HAILO_NOT_IMPLEMENTED;
+    }
+
+    m_callback_dispatcher->register_callback(notification_id,
+        [this, func, opaque = opaque]
+        (const RpcCallback &rpc_callback, hailo_status shutdown_status) {
+            if (shutdown_status != HAILO_UNINITIALIZED) {
+                return;
+            }
+            func(*this, rpc_callback.data.device_notification.notification, opaque);
+        });
+    using Serializer = SetNotificationCallbackSerializer;
+    TRY(auto serialized_request, m_client->allocate_request_buffer());
+    TRY(auto request_size, Serializer::serialize_request({m_handle, notification_id, static_cast<rpc_object_handle_t>(notification_id),
+        m_callback_dispatcher->id()}, MemoryView(*serialized_request)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__SET_NOTIFICATION_CALLBACK, MemoryView(serialized_request->data(), request_size)));
+    return Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
+}
+
+hailo_status PcieDeviceHrpcClient::remove_notification_callback(hailo_notification_id_t notification_id)
+{
+    auto status = m_callback_dispatcher->remove_callback(notification_id);
+    CHECK_SUCCESS(status);
+
+    using Serializer = RemoveNotificationCallbackSerializer;
+    TRY(auto serialized_request, m_client->allocate_request_buffer());
+    TRY(auto request_size, Serializer::serialize_request(m_handle, notification_id, MemoryView(*serialized_request)));
+    TRY(auto result, m_client->execute_request(HailoRpcActionID::DEVICE__REMOVE_NOTIFICATION_CALLBACK, MemoryView(serialized_request->data(), request_size)));
+    return Serializer::deserialize_reply(MemoryView(result.buffer->data(), result.header.size));
+}
+
+hailo_status PcieDeviceHrpcClient::before_fork()
+{
+    m_client.reset();
+    return HAILO_SUCCESS;
+}
+
+hailo_status PcieDeviceHrpcClient::after_fork_in_parent()
+{
+    std::this_thread::sleep_for(std::chrono::milliseconds(100)); // TODO: remove this after HRT-14998 is fixed
+    TRY(m_client, get_connected_client(m_device_id), "Failed to create client");
+    // Keeping the same device handle
+    return HAILO_SUCCESS;
+}
+
+hailo_status PcieDeviceHrpcClient::after_fork_in_child()
+{
+    std::this_thread::sleep_for(std::chrono::milliseconds(200)); // TODO: remove this after HRT-14998 is fixed
+    TRY(m_client, get_connected_client(m_device_id), "Failed to create client");
+    TRY(m_handle, create_remote_device(m_client), "Failed to create device");
+    return HAILO_SUCCESS;
+}
+
+} /* namespace hailort */
\ No newline at end of file
diff --git a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp
index 71bbbba..b0c6229 100644
--- a/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp
+++ b/hailort/libhailort/src/vdma/pcie/pcie_device_hrpc_client.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -13,6 +13,7 @@
 #include "hailo/device.hpp"
 #include "hailo/hailort.h"
 #include "hrpc/client.hpp"
+#include "rpc_callbacks/rpc_callbacks_dispatcher.hpp"
 
 
 namespace hailort
@@ -24,17 +25,15 @@ public:
     static Expected<std::unique_ptr<PcieDeviceHrpcClient>> create(const std::string &device_id,
         std::shared_ptr<Client> client);
 
-    PcieDeviceHrpcClient(const std::string &device_id, std::shared_ptr<Client> client, uint32_t handle) :
-        Device(Device::Type::PCIE), m_device_id(device_id), m_client(client), m_handle(handle) {}
+    PcieDeviceHrpcClient(const std::string &device_id, std::shared_ptr<Client> client, uint32_t handle,
+        std::shared_ptr<ClientCallbackDispatcher> callback_dispatcher) :
+        Device(Device::Type::PCIE), m_device_id(device_id), m_client(client), m_handle(handle), m_callback_dispatcher(callback_dispatcher) {}
     virtual ~PcieDeviceHrpcClient();
 
     virtual Expected<ConfiguredNetworkGroupVector> configure(Hef &/*hef*/,
         const NetworkGroupsParamsMap &configure_params={}) override { (void)configure_params; return make_unexpected(HAILO_NOT_IMPLEMENTED); }
     virtual Expected<size_t> read_log(MemoryView &/*buffer*/, hailo_cpu_id_t /*cpu_id*/) override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
-    virtual hailo_status reset(hailo_reset_device_mode_t /*mode*/) override { return HAILO_NOT_IMPLEMENTED; }
-    virtual hailo_status set_notification_callback(const NotificationCallback &/*func*/, hailo_notification_id_t /*notification_id*/,
-        void */*opaque*/) override { return HAILO_NOT_IMPLEMENTED; }
-    virtual hailo_status remove_notification_callback(hailo_notification_id_t /*notification_id*/) override { return HAILO_NOT_IMPLEMENTED; }
+    virtual hailo_status reset(hailo_reset_device_mode_t mode) override;
     virtual hailo_status firmware_update(const MemoryView &/*firmware_binary*/, bool /*should_reset*/) override { return HAILO_NOT_IMPLEMENTED; }
     virtual hailo_status second_stage_update(uint8_t */*second_stage_binary*/, uint32_t /*second_stage_binary_length*/) override { return HAILO_NOT_IMPLEMENTED; }
     virtual hailo_status store_sensor_config(uint32_t /*section_index*/, hailo_sensor_types_t /*sensor_type*/,
@@ -55,7 +54,6 @@ public:
     virtual Expected<Buffer> read_user_config() override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
     virtual hailo_status write_user_config(const MemoryView &/*buffer*/) override { return HAILO_NOT_IMPLEMENTED; }
     virtual hailo_status erase_user_config() override { return HAILO_NOT_IMPLEMENTED; }
-    virtual Expected<hailo_device_architecture_t> get_architecture() const override { return make_unexpected(HAILO_NOT_IMPLEMENTED); }
     virtual const char* get_dev_id() const override { return m_device_id.c_str(); }
     virtual bool is_stream_interface_supported(const hailo_stream_interface_t &/*stream_interface*/) const override { return false; }
 
@@ -67,21 +65,35 @@ public:
     virtual Expected<hailo_device_identity_t> identify() override;
     virtual Expected<hailo_extended_device_information_t> get_extended_device_information() override;
     virtual Expected<hailo_chip_temperature_info_t> get_chip_temperature() override;
+    virtual Expected<hailo_health_stats_t> query_health_stats() override;
+    virtual Expected<hailo_performance_stats_t> query_performance_stats() override;
     virtual Expected<float32_t> power_measurement(hailo_dvm_options_t dvm, hailo_power_measurement_types_t measurement_type) override;
     virtual hailo_status start_power_measurement(hailo_averaging_factor_t averaging_factor, hailo_sampling_period_t sampling_period) override;
     virtual Expected<hailo_power_measurement_data_t> get_power_measurement(hailo_measurement_buffer_index_t buffer_index, bool should_clear) override;
     virtual hailo_status set_power_measurement(hailo_measurement_buffer_index_t buffer_index, hailo_dvm_options_t dvm, hailo_power_measurement_types_t measurement_type) override;
     virtual hailo_status stop_power_measurement() override;
+    virtual Expected<hailo_device_architecture_t> get_architecture() const override;
+    virtual hailo_status set_notification_callback(const NotificationCallback &func, hailo_notification_id_t notification_id,
+        void *opaque) override;
+    virtual hailo_status remove_notification_callback(hailo_notification_id_t notification_id) override;
 
     virtual hailo_status dma_map(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_unmap(void *address, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_map_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
     virtual hailo_status dma_unmap_dmabuf(int dmabuf_fd, size_t size, hailo_dma_buffer_direction_t direction) override;
 
+    virtual hailo_status before_fork() override;
+    virtual hailo_status after_fork_in_parent() override;
+    virtual hailo_status after_fork_in_child() override;
+
 private:
+    static Expected<std::shared_ptr<Client>> get_connected_client(const std::string &device_id);
+    static Expected<rpc_object_handle_t> create_remote_device(std::shared_ptr<Client> client);
+
     std::string m_device_id;
     std::shared_ptr<Client> m_client;
     uint32_t m_handle;
+    std::shared_ptr<ClientCallbackDispatcher> m_callback_dispatcher;
 };
 
 } /* namespace hailort */
diff --git a/hailort/libhailort/src/vdma/pcie_session.cpp b/hailort/libhailort/src/vdma/pcie_session.cpp
index e262dd3..94ad207 100644
--- a/hailort/libhailort/src/vdma/pcie_session.cpp
+++ b/hailort/libhailort/src/vdma/pcie_session.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -44,7 +44,7 @@ Expected<PcieSession> PcieSession::create(std::shared_ptr<HailoRTDriver> driver,
     auto create_channel = [&](vdma::ChannelId id, vdma::BoundaryChannel::Direction dir, vdma::DescriptorList &&desc_list) {
         // TODO: HRT-15701 : remove 4
         return vdma::BoundaryChannel::create(*driver, id, dir, std::move(desc_list), *transfer_launcher,
-            MAX_ONGOING_TRANSFERS - 1, 4 * MAX_ONGOING_TRANSFERS, true);
+            4 * MAX_ONGOING_TRANSFERS, true);
     };
 
     TRY(auto input_channel, create_channel(input_channel_id, vdma::BoundaryChannel::Direction::H2D, std::move(input_desc_list)));
@@ -82,12 +82,22 @@ hailo_status PcieSession::read(void *buffer, size_t size, std::chrono::milliseco
 
 hailo_status PcieSession::write_async(const void *buffer, size_t size, std::function<void(hailo_status)> &&callback)
 {
-    return launch_transfer_async(*m_input, const_cast<void *>(buffer), size, std::move(callback));
+    return m_input->launch_transfer(to_request(const_cast<void *>(buffer), size, std::move(callback)));
+}
+
+hailo_status PcieSession::write_async(TransferRequest &&request)
+{
+    return m_input->launch_transfer(std::move(request));
 }
 
 hailo_status PcieSession::read_async(void *buffer, size_t size, std::function<void(hailo_status)> &&callback)
 {
-    return launch_transfer_async(*m_output, buffer, size, std::move(callback));
+    return m_output->launch_transfer(to_request(buffer, size, std::move(callback)));
+}
+
+hailo_status PcieSession::read_async(TransferRequest &&request)
+{
+    return m_output->launch_transfer(std::move(request));
 }
 
 hailo_status PcieSession::close()
@@ -142,7 +152,7 @@ hailo_status PcieSession::launch_transfer_sync(vdma::BoundaryChannel &channel,
         cb_params.cv.notify_one();
     };
 
-    auto status = launch_transfer_async(channel, buffer, size, std::move(callback));
+    auto status = channel.launch_transfer(to_request(buffer, size, callback));
     if (HAILO_STREAM_ABORT == status) {
         return status;
     }
@@ -154,17 +164,6 @@ hailo_status PcieSession::launch_transfer_sync(vdma::BoundaryChannel &channel,
     return cb_params.status;
 }
 
-hailo_status PcieSession::launch_transfer_async(vdma::BoundaryChannel &channel,
-    void *buffer, size_t size, std::function<void(hailo_status)> &&callback)
-{
-    TransferRequest request{
-        {TransferBuffer(MemoryView(buffer, size))},
-        std::move(callback)
-    };
-
-    return channel.launch_transfer(std::move(request));
-}
-
 Expected<vdma::DescriptorList> PcieSession::create_desc_list(HailoRTDriver &driver)
 {
     const bool circular = true;
diff --git a/hailort/libhailort/src/vdma/pcie_session.hpp b/hailort/libhailort/src/vdma/pcie_session.hpp
index 0746399..6205879 100644
--- a/hailort/libhailort/src/vdma/pcie_session.hpp
+++ b/hailort/libhailort/src/vdma/pcie_session.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -75,6 +75,9 @@ public:
     hailo_status write_async(const void *buffer, size_t size, std::function<void(hailo_status)> &&callback);
     hailo_status read_async(void *buffer, size_t size, std::function<void(hailo_status)> &&callback);
 
+    hailo_status write_async(TransferRequest &&request);
+    hailo_status read_async(TransferRequest &&request);
+
     hailo_status close();
 
     inline PcieSessionType session_type() const
@@ -115,8 +118,6 @@ private:
 
     hailo_status launch_transfer_sync(vdma::BoundaryChannel &channel,
         void *buffer, size_t size, std::chrono::milliseconds timeout, CbParams &cb_params);
-    static hailo_status launch_transfer_async(vdma::BoundaryChannel &channel,
-        void *buffer, size_t size, std::function<void(hailo_status)> &&callback);
     static Expected<vdma::DescriptorList> create_desc_list(HailoRTDriver &driver);
 
     std::atomic<bool> m_should_close {true};
diff --git a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
index 1aa317b..beef102 100644
--- a/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_activated_core_op.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
index c6805cc..d20ba66 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 
@@ -31,7 +31,7 @@ Expected<std::shared_ptr<VdmaConfigCoreOp>> VdmaConfigCoreOp::create_shared(Acti
 
 VdmaConfigCoreOp::~VdmaConfigCoreOp()
 {
-    (void)shutdown();
+    (void)shutdown_impl();
 }
 
 VdmaConfigCoreOp::VdmaConfigCoreOp(ActiveCoreOpHolder &active_core_op_holder, const ConfigureNetworkParams &config_params,
@@ -164,6 +164,11 @@ hailo_status VdmaConfigCoreOp::unregister_cache_update_callback()
 }
 
 hailo_status VdmaConfigCoreOp::shutdown()
+{
+    return shutdown_impl();
+}
+
+hailo_status VdmaConfigCoreOp::shutdown_impl()
 {
     hailo_status status = HAILO_SUCCESS; // Success oriented
 
@@ -328,10 +333,10 @@ Expected<uint32_t> VdmaConfigCoreOp::get_cache_entry_size(uint32_t cache_id) con
     return cache_buffer_it->second.entry_size();
 }
 
-hailo_status VdmaConfigCoreOp::init_cache(uint32_t read_offset, int32_t write_offset_delta)
+hailo_status VdmaConfigCoreOp::init_cache(uint32_t read_offset)
 {
     CHECK(has_caches(), HAILO_INVALID_OPERATION, "No caches in core-op");
-    return m_cache_manager->init_caches(read_offset, write_offset_delta);
+    return m_cache_manager->init_caches(read_offset);
 }
 
 hailo_status VdmaConfigCoreOp::update_cache_offset(int32_t offset_delta_entries)
diff --git a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
index 8a78745..4a34fde 100644
--- a/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_core_op.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -49,7 +49,7 @@ public:
     virtual hailo_status activate_impl(uint16_t dynamic_batch_size) override;
     // Will first deactivate host resources (via deactivate_host_resources) and then reset the core-op on the fw
     virtual hailo_status deactivate_impl() override;
-    virtual hailo_status shutdown() override final; // Final since called from destructor
+    virtual hailo_status shutdown() override;
 
     // Activate all resources related to the core-op on the host.
     hailo_status activate_host_resources();
@@ -81,7 +81,7 @@ public:
     virtual Expected<uint32_t> get_cache_read_length() const override;
     virtual Expected<uint32_t> get_cache_write_length() const override;
     virtual Expected<uint32_t> get_cache_entry_size(uint32_t cache_id) const override;
-    virtual hailo_status init_cache(uint32_t read_offset, int32_t write_offset_delta) override;
+    virtual hailo_status init_cache(uint32_t read_offset) override;
     virtual hailo_status update_cache_offset(int32_t offset_delta_entries) override;
     virtual Expected<std::vector<uint32_t>> get_cache_ids() const override;
     virtual Expected<Buffer> read_cache_buffer(uint32_t cache_id) override;
@@ -101,6 +101,7 @@ public:
 private:
     Expected<uint32_t> get_cache_length_impl(std::function<size_t(const CacheBuffer&)> length_getter,
         const std::string &length_type) const;
+    hailo_status shutdown_impl();
 
     std::shared_ptr<ResourcesManager> m_resources_manager;
     std::shared_ptr<CacheManager> m_cache_manager;
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.cpp b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
index 055e18c..8316460 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.cpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/vdma_config_manager.hpp b/hailort/libhailort/src/vdma/vdma_config_manager.hpp
index 9c182ff..38bfe99 100644
--- a/hailort/libhailort/src/vdma/vdma_config_manager.hpp
+++ b/hailort/libhailort/src/vdma/vdma_config_manager.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/vdma_device.cpp b/hailort/libhailort/src/vdma/vdma_device.cpp
index 0bb4b3f..d7534fd 100644
--- a/hailort/libhailort/src/vdma/vdma_device.cpp
+++ b/hailort/libhailort/src/vdma/vdma_device.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -164,7 +164,7 @@ Expected<std::shared_ptr<ConfiguredNetworkGroup>> VdmaDevice::create_configured_
 
     TRY(auto resource_manager, ResourcesManagerBuilder::build(current_core_op_index,
         *this, get_driver(), m_cache_manager, config_params, core_op_metadata,
-        static_cast<HEFHwArch>(hef.pimpl->get_device_arch())));
+        static_cast<HEFHwArch>(hef.pimpl->get_device_arch()), hef));
 
     TRY(auto core_op_ptr, VdmaConfigCoreOp::create_shared(m_active_core_op_holder, config_params,
         resource_manager, m_cache_manager, core_op_metadata));
diff --git a/hailort/libhailort/src/vdma/vdma_device.hpp b/hailort/libhailort/src/vdma/vdma_device.hpp
index 558df1d..1743ee3 100644
--- a/hailort/libhailort/src/vdma/vdma_device.hpp
+++ b/hailort/libhailort/src/vdma/vdma_device.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/libhailort/src/vdma/vdma_stream.cpp b/hailort/libhailort/src/vdma/vdma_stream.cpp
index f92a7c6..79dc33d 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.cpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.cpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
@@ -101,7 +101,11 @@ void VdmaInputStream::set_vdevice_core_op_handle(vdevice_core_op_handle_t core_o
 Expected<std::unique_ptr<StreamBufferPool>> VdmaInputStream::allocate_buffer_pool()
 {
     const auto frame_size = get_frame_size();
-    const auto max_transfers_in_desc_list = m_channel->get_max_aligned_transfers_in_desc_list(frame_size);
+
+    // Since this calc if for aligned transfers, we don't need to factor in the bounce buffer
+    constexpr auto NO_BOUNCE_BUFFER = false;
+    const auto max_transfers_in_desc_list = m_channel->get_desc_list().max_transfers(static_cast<uint32_t>(frame_size), NO_BOUNCE_BUFFER);
+
     const auto max_ongoing_transfers = m_channel->get_max_ongoing_transfers(frame_size);
     if (max_transfers_in_desc_list < max_ongoing_transfers) {
         // In this case we don't bind, since the descriptor list isn't big enough to hold all the buffers.
@@ -172,7 +176,7 @@ hailo_status VdmaInputStream::bind_buffer(TransferRequest &&transfer_request)
 {
     m_channel->remove_buffer_binding();
     if (TransferBufferType::MEMORYVIEW == transfer_request.transfer_buffers[0].type()) {
-        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        const auto is_request_aligned = transfer_request.transfer_buffers[0].is_aligned_for_dma();
         if (!is_request_aligned) {
             // Best effort, if buffer is not aligned - will program descriptors later
             return HAILO_SUCCESS;
@@ -189,7 +193,7 @@ hailo_status VdmaInputStream::write_async_impl(TransferRequest &&transfer_reques
     if (transfer_request.transfer_buffers[0].type() == TransferBufferType::DMABUF) {
         return m_channel->launch_transfer(std::move(transfer_request));
     } else {
-        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        const auto is_request_aligned = transfer_request.transfer_buffers[0].is_aligned_for_dma();
         if (is_request_aligned) {
             return m_channel->launch_transfer(std::move(transfer_request));
         } else {
@@ -266,7 +270,10 @@ hailo_stream_interface_t VdmaOutputStream::get_interface() const
 
 Expected<std::unique_ptr<StreamBufferPool>> VdmaOutputStream::allocate_buffer_pool()
 {
-    const auto max_transfers_in_desc_list = m_channel->get_max_aligned_transfers_in_desc_list(m_transfer_size);
+    // Since this calc if for aligned transfers, we don't need to factor in the bounce buffer
+    constexpr auto NO_BOUNCE_BUFFER = false;
+    const auto max_transfers_in_desc_list = m_channel->get_desc_list().max_transfers(m_transfer_size, NO_BOUNCE_BUFFER);
+
     const auto max_ongoing_transfers = m_channel->get_max_ongoing_transfers(m_transfer_size);
     if (max_transfers_in_desc_list < max_ongoing_transfers) {
         // In this case we don't bind, since the descriptor list isn't big enough to hold all the buffers.
@@ -357,7 +364,7 @@ hailo_status VdmaOutputStream::read_async_impl(TransferRequest &&transfer_reques
     if (transfer_request.transfer_buffers[0].type() == TransferBufferType::DMABUF) {
         return m_channel->launch_transfer(std::move(transfer_request));
     } else {
-        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        const auto is_request_aligned = transfer_request.transfer_buffers[0].is_aligned_for_dma();
         if (is_request_aligned) {
             bool can_skip_alignment = true; // TODO :change back to false when HRT-15741 is resolved, then implement HRT-15731
             bool owned_by_user = (StreamBufferMode::OWNING != buffer_mode()); // Buffers owned by HRT are always aligned
@@ -383,7 +390,7 @@ hailo_status VdmaOutputStream::bind_buffer(TransferRequest &&transfer_request)
 {
     m_channel->remove_buffer_binding();
     if (TransferBufferType::MEMORYVIEW == transfer_request.transfer_buffers[0].type()) {
-        TRY(auto is_request_aligned, transfer_request.is_request_aligned());
+        const auto is_request_aligned = transfer_request.transfer_buffers[0].is_aligned_for_dma();
         TRY(auto is_request_end_aligned, transfer_request.is_request_end_aligned());
         if (!is_request_aligned || !is_request_end_aligned) {
             // Best effort, if buffer is not aligned - will program descriptors later
diff --git a/hailort/libhailort/src/vdma/vdma_stream.hpp b/hailort/libhailort/src/vdma/vdma_stream.hpp
index c7f7496..274d0e7 100644
--- a/hailort/libhailort/src/vdma/vdma_stream.hpp
+++ b/hailort/libhailort/src/vdma/vdma_stream.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2019-2024 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/prepare_externals/CMakeLists.txt b/hailort/prepare_externals/CMakeLists.txt
index a4c8716..927b2e1 100644
--- a/hailort/prepare_externals/CMakeLists.txt
+++ b/hailort/prepare_externals/CMakeLists.txt
@@ -8,6 +8,7 @@ include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/pybind11.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/catch2.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/spdlog.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/json.cmake)
+include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/cpp-httplib.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/dotwriter.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/benchmark.cmake)
 include(${CMAKE_CURRENT_LIST_DIR}/../cmake/external/readerwriterqueue.cmake)
diff --git a/hailort/rpc/hailort_rpc.proto b/hailort/rpc/hailort_rpc.proto
index 3bb9702..211f396 100644
--- a/hailort/rpc/hailort_rpc.proto
+++ b/hailort/rpc/hailort_rpc.proto
@@ -36,7 +36,7 @@ service ProtoHailoRtRpc {
     rpc ConfiguredNetworkGroup_is_multi_context (ConfiguredNetworkGroup_is_multi_context_Request) returns (ConfiguredNetworkGroup_is_multi_context_Reply) {}
     rpc ConfiguredNetworkGroup_get_config_params(ConfiguredNetworkGroup_get_config_params_Request) returns (ConfiguredNetworkGroup_get_config_params_Reply) {}
     rpc ConfiguredNetworkGroup_get_sorted_output_names(ConfiguredNetworkGroup_get_sorted_output_names_Request) returns (ConfiguredNetworkGroup_get_sorted_output_names_Reply) {}
-    rpc ConfiguredNetworkGroup_get_min_buffer_pool_size(ConfiguredNetworkGroup_get_min_buffer_pool_size_Request) returns (ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply) {}
+    rpc ConfiguredNetworkGroup_infer_queue_size(ConfiguredNetworkGroup_infer_queue_size_Request) returns (ConfiguredNetworkGroup_infer_queue_size_Reply) {}
     rpc ConfiguredNetworkGroup_get_stream_names_from_vstream_name(ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Request) returns (ConfiguredNetworkGroup_get_stream_names_from_vstream_name_Reply) {}
     rpc ConfiguredNetworkGroup_get_vstream_names_from_stream_name(ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Request) returns (ConfiguredNetworkGroup_get_vstream_names_from_stream_name_Reply) {}
     rpc ConfiguredNetworkGroup_infer_async(ConfiguredNetworkGroup_infer_async_Request) returns (ConfiguredNetworkGroup_infer_async_Reply) {}
@@ -46,7 +46,6 @@ service ProtoHailoRtRpc {
     rpc ConfiguredNetworkGroup_set_nms_iou_threshold(ConfiguredNetworkGroup_set_nms_iou_threshold_Request) returns (ConfiguredNetworkGroup_set_nms_iou_threshold_Reply) {}
     rpc ConfiguredNetworkGroup_set_nms_max_bboxes_per_class(ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Request) returns (ConfiguredNetworkGroup_set_nms_max_bboxes_per_class_Reply) {}
     rpc ConfiguredNetworkGroup_set_nms_max_bboxes_total(ConfiguredNetworkGroup_set_nms_max_bboxes_total_Request) returns (ConfiguredNetworkGroup_set_nms_max_bboxes_total_Reply) {}
-    rpc ConfiguredNetworkGroup_set_nms_result_order_type(ConfiguredNetworkGroup_set_nms_result_order_type_Request) returns (ConfiguredNetworkGroup_set_nms_result_order_type_Reply) {}
     rpc ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size(ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request) returns (ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Reply) {}
 
 
@@ -272,7 +271,6 @@ message ProtoNmsInfo {
     uint32 burst_size = 7;
     ProtoNmsBurstType burst_type = 8;
     uint32 max_bboxes_total = 9;
-    uint32 order_type = 10;
 }
 
 message ProtoQuantInfo {
@@ -436,10 +434,8 @@ message ProtoNmsPostProcessConfig {
     uint32 number_of_classes = 4;
     bool background_removal = 5;
     uint32 background_removal_index = 6;
-    bool cross_classes = 7;
-    bool bbox_only = 8;
-    uint32 max_proposals_total = 9;
-    uint32 order_type = 10;
+    bool bbox_only = 7;
+    uint32 max_proposals_total = 8;
 }
 
 message ProtoYolov8MatchingLayersNames {
@@ -737,13 +733,13 @@ message ConfiguredNetworkGroup_get_sorted_output_names_Reply {
     repeated string sorted_output_names = 2;
 }
 
-message ConfiguredNetworkGroup_get_min_buffer_pool_size_Request {
+message ConfiguredNetworkGroup_infer_queue_size_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
 }
 
-message ConfiguredNetworkGroup_get_min_buffer_pool_size_Reply {
+message ConfiguredNetworkGroup_infer_queue_size_Reply {
     uint32 status = 1;
-    uint32 min_buffer_pool_size = 2;
+    uint32 infer_queue_size = 2;
 }
 
 message ConfiguredNetworkGroup_get_layer_info_Request {
@@ -805,16 +801,6 @@ message ConfiguredNetworkGroup_set_nms_max_bboxes_total_Reply {
     uint32 status = 1;
 }
 
-message ConfiguredNetworkGroup_set_nms_result_order_type_Request {
-    ProtoConfiguredNetworkGroupIdentifier identifier = 1;
-    string edge_name = 2;
-    uint32 nms_result_order_type = 3;
-}
-
-message ConfiguredNetworkGroup_set_nms_result_order_type_Reply {
-    uint32 status = 1;
-}
-
 message ConfiguredNetworkGroup_set_nms_max_accumulated_mask_size_Request {
     ProtoConfiguredNetworkGroupIdentifier identifier = 1;
     string edge_name = 2;
diff --git a/hailort/rpc/rpc_definitions.hpp b/hailort/rpc/rpc_definitions.hpp
index b0a6c81..10f99e8 100644
--- a/hailort/rpc/rpc_definitions.hpp
+++ b/hailort/rpc/rpc_definitions.hpp
@@ -1,5 +1,5 @@
 /**
- * Copyright (c) 2020-2022 Hailo Technologies Ltd. All rights reserved.
+ * Copyright (c) 2019-2025 Hailo Technologies Ltd. All rights reserved.
  * Distributed under the MIT license (https://opensource.org/licenses/MIT)
  **/
 /**
diff --git a/hailort/scripts/download_hefs.cmd b/hailort/scripts/download_hefs.cmd
index 4c91acf..72c6f54 100644
--- a/hailort/scripts/download_hefs.cmd
+++ b/hailort/scripts/download_hefs.cmd
@@ -1,7 +1,7 @@
 :: cmd
 @ECHO OFF
 set BASE_URI=https://hailo-hailort.s3.eu-west-2.amazonaws.com
-set HRT_VERSION=4.20.1
+set HRT_VERSION=4.21.0
 set REMOTE_HEF_DIR=Hailo8/%HRT_VERSION%/HEFS
 set LOCAL_EXAMPLES_HEF_DIR=..\libhailort\examples\hefs
 set LOCAL_TUTORIALS_HEF_DIR=..\libhailort\bindings\python\platform\hailo_tutorials\hefs
diff --git a/hailort/scripts/download_hefs.sh b/hailort/scripts/download_hefs.sh
index 9451216..f02b5d2 100755
--- a/hailort/scripts/download_hefs.sh
+++ b/hailort/scripts/download_hefs.sh
@@ -2,7 +2,7 @@
 set -e
 
 readonly BASE_URI="https://hailo-hailort.s3.eu-west-2.amazonaws.com"
-readonly HRT_VERSION=4.20.1
+readonly HRT_VERSION=4.21.0
 readonly REMOTE_HEF_DIR="Hailo8/${HRT_VERSION}/HEFS"
 readonly LOCAL_EXAMPLES_HEF_DIR="../libhailort/examples/hefs"
 readonly LOCAL_TUTORIALS_HEF_DIR="../libhailort/bindings/python/platform/hailo_tutorials/hefs"