diff --git a/src/arena.c b/src/arena.c
index ce117226..a809b85f 100644
--- a/src/arena.c
+++ b/src/arena.c
@@ -799,11 +799,23 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id, mi_stats_t*
 static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
   if (arena_id != NULL) *arena_id = _mi_arena_id_none();
-  if (size < MI_ARENA_BLOCK_SIZE) return false;
-
+  if (size < MI_ARENA_BLOCK_SIZE) {
+    _mi_warning_message("the arena size is too small (memory at %p with size %zu)\n", start, size);
+    return false;
+  }
   if (is_large) {
     mi_assert_internal(memid.initially_committed && memid.is_pinned);
   }
+  if (!_mi_is_aligned(start, MI_SEGMENT_ALIGN)) {
+    void* const aligned_start = mi_align_up_ptr(start, MI_SEGMENT_ALIGN);
+    const size_t diff = (uint8_t*)aligned_start - (uint8_t*)start;
+    if (diff >= size || (size - diff) < MI_ARENA_BLOCK_SIZE) {
+      _mi_warning_message("after alignment, the size of the arena becomes too small (memory at %p with size %zu)\n", start, size);
+      return false;
+    }
+    start = aligned_start;
+    size = size - diff;
+  }
 
   const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
   const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
diff --git a/test/main-override-static.c b/test/main-override-static.c
index 4ead333a..1fee2452 100644
--- a/test/main-override-static.c
+++ b/test/main-override-static.c
@@ -1,3 +1,6 @@
+#if _WIN32
+#include <windows.h>
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 #include <assert.h>
@@ -23,11 +26,13 @@ static void test_heap_walk(void);
 static void test_heap_arena(void);
 static void test_align(void);
 static void test_canary_leak(void);
+static void test_manage_os_memory(void);
 // static void test_large_pages(void);
 
 int main() {
   mi_version();
   mi_stats_reset();
+  test_manage_os_memory();
   // test_large_pages();
   // detect double frees and heap corruption
   // double_free1();
@@ -263,6 +268,34 @@ static void test_canary_leak(void) {
   free(p);
 }
 
+#if _WIN32
+static void test_manage_os_memory(void) {
+  size_t size = 256 * 1024 * 1024;
+  void* ptr = VirtualAlloc(NULL, size, MEM_RESERVE | MEM_COMMIT, PAGE_READWRITE); 
+  mi_arena_id_t arena_id;
+  mi_manage_os_memory_ex(ptr, size, true /* committed */, true /* pinned */, false /* is zero */, -1 /* numa node */, true /* exclusive */, &arena_id);
+  mi_heap_t* cuda_heap = mi_heap_new_in_arena(arena_id);    // you can do this in any thread
+
+  // now allocate only in the cuda arena
+  void* p1 = mi_heap_malloc(cuda_heap, 8);
+  int* p2 = mi_heap_malloc_tp(cuda_heap, int);
+  *p2 = 42;
+  
+  // and maybe set the cuda heap as the default heap? (but careful as now `malloc` will allocate in the cuda heap as well)
+  {
+    mi_heap_t* prev_default_heap = mi_heap_set_default(cuda_heap);
+    void* p3 = mi_malloc(8);  // allocate in the cuda heap 
+    mi_free(p3);
+  }
+  mi_free(p1);
+  mi_free(p2);
+}
+#else
+static void test_manage_os_memory(void) {
+  // empty
+}
+#endif
+
 // Experiment with huge OS pages
 #if 0