////////////////////////////////////////////////////////////////////////////
//
//  Crytek Engine Source File.
//  Copyright (C), Crytek Studios, 2004.
// -------------------------------------------------------------------------
//  File name:   PS3MemoryManagement
//  Version:     v1.00
//  Created:     8/8/2009 by Chris Raine
//  Compilers:   GCC 4.1.1
//  Description: System Memory Management routines for ps3
// -------------------------------------------------------------------------
////////////////////////////////////////////////////////////////////////////

#include <sys/memory.h>
#include <cstdio>
#include <cstring>
#include <pthread.h>
#include <mspace.h>

#include "PS3MemoryManagement.h"

  // FATAL ERROR
#define MEM_FATAL(reason, x)                    \
  if (!(x))                                     \
  {                                             \
    const char* __r = reason;                   \
    __asm__ volatile ( "tw 31,1,1"              \
                       : : "m"(__r): );         \
    while(true);                                \
  }                                             \

  // debugging check 
#define MEM_DEBUG(reason, x)                    \
  if (!(x))                                     \
  {                                             \
    const char* __r = reason;                   \
    __asm__ volatile ( "tw 31,1,1"              \
                       : : "m"(__r): );         \
    while(true);                                \
  }                                             \

#define MEM_ASSERT(x)                           \
  if (!(x))                                     \
  {                                             \
    __asm__ volatile ( "tw 31,1,1");            \
    while(true);                                \
  }                                             \


// Address ranges for the various allocators - change with care!
#define MINUMUM_ALIGNMENT 4 

// Define this so that the allocators start dumpings lots of useless
// information onto tty
//#define MEM_DEBUG_LOG 

extern "C"
{
	void *__REAL__calloc(size_t, size_t);
	void *__REAL__malloc(size_t);
	void __REAL__free(void *);
	void *__REAL__realloc(void *, size_t);
	void *__REAL__reallocalign(void *, size_t, size_t);
	void *__REAL__memalign(size_t, size_t);
	void __REAL___malloc_init(void);
	void __REAL___malloc_finalize(void);
}

typedef uint32_t handle_t;

////////////////////////////////////////////////////////////////////////////////
namespace 
{ 
  ////////////////////////////////////////////////////////////////////////////////
  // Small helper routines 
  ////////////////////////////////////////////////////////////////////////////////
  static inline uint32_t next_power_of_two(uint32_t value)
  {
    size_t next_power = value;
    // Possible to skip branch - result will be correct anyway!
    if ((next_power & (next_power-1)) != 0)
    {
      next_power--;
      next_power |= next_power >> 1;
      next_power |= next_power >> 2;
      next_power |= next_power >> 4;
      next_power |= next_power >> 8;
      next_power |= next_power >> 16;
      next_power++;
    }
    return next_power;
  }
  static inline uint32_t log2(uint32_t value)
  {
    uint32_t result = (value & 0xAAAAAAAA) != 0;
    result |= ((value & 0xFFFF0000) != 0) << 4;
    result |= ((value & 0xFF00FF00) != 0) << 3;
    result |= ((value & 0xF0F0F0F0) != 0) << 2;
    result |= ((value & 0xCCCCCCCC) != 0) << 1;
    return result; 
  }

  // Compile time version of the log2 algorithm
  template<uint32_t value>
  static inline uint32_t log2()
  {
    uint32_t result = (value & 0xAAAAAAAA) != 0;
    result |= ((value & 0xFFFF0000) != 0) << 4;
    result |= ((value & 0xFF00FF00) != 0) << 3;
    result |= ((value & 0xF0F0F0F0) != 0) << 2;
    result |= ((value & 0xCCCCCCCC) != 0) << 1;
    return result; 
  }

 	// Global lock - REMOVE ME! Make me more threading aware
	static pthread_mutex_t g_lock;
  static bool g_lock_initialized = false;
  static inline void InitLock()
  {
    pthread_mutexattr_t attr;
    pthread_mutexattr_init(&attr);
    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_RECURSIVE);
    pthread_mutex_init(&g_lock, &attr);
    pthread_mutexattr_destroy(&attr);
    pthread_mutex_lock(&g_lock);
    pthread_mutex_unlock(&g_lock);
    g_lock_initialized = true;
  }

	struct Lock
	{
		Lock()
		{ 
      if (g_lock_initialized) 
        pthread_mutex_lock(&g_lock); 
    }

		~Lock()
		{ 
      if (g_lock_initialized)
        pthread_mutex_unlock(&g_lock); 
    }
	};

  #define LOCK Lock __l


  ////////////////////////////////////////////////////////////////////////////////
  // Virtual Memory Interface 
  // 
  ////////////////////////////////////////////////////////////////////////////////
 
  // Represents a set of physical pages mapped to a virtual address
  // range. Please note that although this structure is called page,
  // it does not represent a single page, but rather a set of memory
  // pages (with either 64kb or 1mb granularity) spanning the given
  // allocation size.
  struct page_t
  {
    sys_addr_t       start;
    size_t           size;
    sys_memory_t     memory_id;
    page_t*          next; 
  }; 

  // An virtual address mapping. Physical memory is requested from the
  // operating system and mapped to the virtual address range
  // represented by this strucure. 
  struct address_space_t
  {
    sys_addr_t          base_address;
    sys_addr_t          end_address;
    PAGESIZE            granularity;
    size_t              page_count; 
    size_t              mapped_bytes; 
    page_t*             page_list;
    address_space_t*    next; 
  }; 

  // The list of system wide address spaces 
  static address_space_t sys_address_spaces[3];

  // Create an address space to which physical memory can be allocated 
  bool create_space(
    address_space_t* space, 
    PAGESIZE granularity, 
    size_t size, 
    address_space_t* previous_space = NULL)
  {
    // Default flags for the mappings include access from ppu, spu and
    // raw spu threads to the address space 
    signed flags = 
      SYS_MEMORY_ACCESS_RIGHT_PPU_THR |
      SYS_MEMORY_ACCESS_RIGHT_SPU_THR | 
      SYS_MEMORY_ACCESS_RIGHT_RAW_SPU;

    switch (granularity)
    {
    case PAGESIZE_64KB: 
      flags |= SYS_MEMORY_GRANULARITY_64K;
      break;
    case PAGESIZE_1M: 
      flags |= SYS_MEMORY_GRANULARITY_1M;
      break;
    }

    // If the size of the addres space is a multiple of 1 mb the
    // lv2 page management mechanism somehow performs and order of an
    // magnitude better
    size = (size + (PAGESIZE_1M-1)) & ~(PAGESIZE_1M-1);
    
    // Request the address space from the system 
    sys_addr_t base_address = 0; 
    signed result = sys_mmapper_allocate_address(
      size, flags, 0x0UL, &base_address);
    if (result != CELL_OK) 
    {
      switch(result)
      {
      case EINVAL: 
        MEM_FATAL("flags are invalid", false); 
        return false; 
      case EALIGN: 
        MEM_FATAL("size and alignment are invalid\n", false); 
        return false; 
      case ENOMEM: 
        MEM_FATAL(
          "The requested area cannot be allocated because of"
          "user address space shortage", false); 
        return false; 
      case EAGAIN: 
        MEM_FATAL(
          "The requested area cannot be allocated because of kernel "
          "resource shortage", false); 
        return false; 
      case EFAULT: 
        MEM_FATAL("the requested address range is invalid",false); 
        return false; 
      }
    }
    space->base_address = space->end_address = base_address; 
    space->granularity = granularity; 

    if (previous_space)
      previous_space->next = space; 

    return true; 
  }
  
  bool destroy_space(address_space_t* space)
  {
    // Abort if the address range still has mapped memory
    if (space->page_count != 0)
      return false; 
    return true; 
  }

  // Map a contigous range of physical memory onto a virtual addres
  // space 
  page_t* alloc_pages(address_space_t* range, size_t size, 
                      sys_addr_t base_address = ~0U)
  {
    signed result = 0; 
    size_t alignment = 0;
    page_t* new_page = NULL; 
    page_t* last_page = range->page_list;
    page_t* page_iter = range->page_list;
    uint64_t alloc_flags = 0; 

    switch (range->granularity)
    {
    case PAGESIZE_64KB:
      alignment = 64<<10;
      alloc_flags |= SYS_MEMORY_GRANULARITY_64K;
      break;
    case PAGESIZE_1M:
      alignment = 1<<20;
      alloc_flags |= SYS_MEMORY_GRANULARITY_1M;
      break;
    }
    size = (size + (alignment-1)) & ~(alignment-1);

    
    if (base_address == ~0U)
    {
      // Loop over the physical pages to see if we can find a gap in the
      // address range large enough to satisify the allocation
      // request. If not, simply append the allocation request to the
      // end of the storage
      base_address = range->base_address;
      while(page_iter) 
      { 
        last_page = page_iter; 
        base_address = page_iter->start + page_iter->size; 
        if (page_iter->next)
        {
          size_t distance = 
            page_iter->next->start - (base_address);
          if (distance > size)
            break; 
        }
        page_iter = page_iter->next; 
      } 
    }
    else
    {
      // If the base address has been specified we find the last page
      // that is smaller than the given base address so the page list
      // stays sorted by address
      while(page_iter) 
      { 
        last_page = page_iter; 
        if (base_address > page_iter->start)
            break; 
        page_iter = page_iter->next; 
      } 
    }

    // Allocate memory for the page (this has to go!)
    new_page = (page_t*)__REAL__malloc(sizeof(page_t));
    std::memset(new_page, 0, sizeof(page_t));
    new_page->start = base_address;
    new_page->size = size; 

    // Request the operating system to allocate memory to the 
    // given address range 
    result = sys_mmapper_allocate_memory(
      size, alloc_flags, &new_page->memory_id);
    if( result != CELL_OK ) 
    {
      switch(result)
      {
      case EINVAL: 
        MEM_FATAL("flags are invalid", false); 
        break;
      case EALIGN: 
        MEM_FATAL(
          "size is not the multiple of granularity that is "
          "configured by flags", false); 
        break;
      case EFAULT: 
        MEM_FATAL("memory id is invalid", false); 
        break;
      case ENOMEM: 
        MEM_FATAL(
          "The requested memory cannot be "
          "allocated because of user memory shortage",false); 
        break;
      case EAGAIN: 
        MEM_FATAL(
          "The requested memory cannot be allocated because of "
          "kernel resource shortage",false); 
        break;
      }
    }

    // Request the operating system to map the allocated memory to the
    // the given base address
    result  = sys_mmapper_map_memory(
      base_address, new_page->memory_id, SYS_MEMORY_PROT_READ_WRITE);
    if(result != CELL_OK ) 
    {
      switch(result)
      {
      case EINVAL: 
        MEM_FATAL("start_addr or flags are invalid", false); 
        break;
      case ESRCH: 
        MEM_FATAL("memory id is invalid", false); 
        break;
      case EALIGN: 
        MEM_FATAL(
          "start_addr is invalid (not the multiple of page size)",
          false); 
        break;
      case EAGAIN: 
        MEM_FATAL(
          "The mapping fails because of kernel resource "
          "shortage",false); 
        break;
      case EBUSY: 
        MEM_FATAL(
          "The specified address range overlaps with an area "
          "which is already mapped\n", false); 
        break;
      }
    }	

    // Insert the newly created page into the list of pages this
    // addres space manages 
    if (last_page)
    {
      page_t* tmp = last_page->next; 
      last_page->next = new_page; 
      new_page->next = tmp; 
    }
    else
      range->page_list = new_page; 

    // If the address space has grown because we could not find a
    // large enough gap within the addres space, the end address of
    // the allocation is adjusted here.
    if (range->end_address < (new_page->start + size))
      range->end_address = new_page->start + size;

    // Increment the number of allocated pages 
    ++range->page_count; 
    
    // Adjust the size of the address space 
    range->mapped_bytes += size; 

    return new_page; 
  }
  
  // Unmap a set of pages from the given address range 
  bool unmap_pages(address_space_t* range, page_t* page)
  {
    signed result = 0; 
    sys_memory_t memory_id; 

    // Request the operating system to unmap the memory of the page
    // set we gave
    result = sys_mmapper_unmap_memory( 
      page->start, &memory_id);
    if( result != CELL_OK ) 
    {
      switch(result)
      {
      case EINVAL: 
        MEM_FATAL("start_addr is invalid", false); 
        break;
      case EFAULT: 
        MEM_FATAL("mem_id is invalid", false); 
        break;
      }
    }
    
    // Request the operating system to free the memory we have
    // previously requested
    result = sys_mmapper_free_memory( memory_id );
    if( result != CELL_OK ) 
    {
      switch(result)
      {			
      case ESRCH: 
        MEM_FATAL("mem_id is invalid", false); 
        break;
      case EBUSY: 
        MEM_FATAL("The memory specified by mem_id is still mapped",false); 
        break;
      }
    }

    // Remove the page from the list of pages within the address space 
    page_t* last_page = range->page_list; 
    while (last_page && last_page->next != page)
      last_page = last_page->next;
    if (last_page)
      last_page->next = page->next;
    else
      range->page_list = page->next;

    // Remove the page from the list of pages and decrease the
    // allocated size of the address range 
    --range->page_count; 
    range->mapped_bytes -= page->size; 

    // If this is the last page within the addres space, then we need
    // to adjust the addres range
    if (range->end_address == (page->start + page->size))
      range->end_address -= page->size;

    // Free the memory from the actual page (This needs to go)
    __REAL__free(page);
    return true; 
  }
}

////////////////////////////////////////////////////////////////////////////////
// Bitmap/Slab based allocator for small allocations (very little
// internal and external fragmentation) 
////////////////////////////////////////////////////////////////////////////////

// An block of memory
struct bank_t 
{ 
  page_t*   page; 
  uint32_t  refcount  : 16;
  uint32_t  obj_size  : 11;
  uint32_t  obj_align : 4;
  uint32_t  colour    : 1;
  uint64_t* allocated_bin; 
  uint64_t* unused_bin; 
}; 

// Bitmap Allocator 
class BitmapAllocator 
{ 
  // The banks of the allocator. 
  bank_t   banks[4096];
  uint32_t used_banks[128];

  struct _free_list
  {
    bank_t*     entry;
    _free_list* next; 
  } *free_list[10];

  // The address range of the allocator
  address_space_t space;

  // The number of bytes wasted due to internall fragmentation
  // (accumulated size of the internal structures)
  size_t wasted_internal;

  // The number of bytes wasted due to external fragmentation (due
  // to alignment)
  size_t wasted_external;


  // Combines two indices (bin and bit index) to compute the index
  // within a bucket
  template<uint32_t bitcount>
  inline uint32_t combine(uint32_t i, uint32_t j) const
  {
    uint32_t msb = log2<bitcount>();
    return (i<<msb) | (j);
  }

  // Extracts the i and j indcies (bin and bucket) from a given
  // bucket index
  template<uint32_t bitcount>
  inline void extract(
    uint32_t index, uint32_t& i, uint32_t& j) const
  {
    uint32_t msb = log2<bitcount>();
    i = index >> msb; 
    j = index & ((1<<msb)-1); 
  }

  // Extracts the i and j indcies (bin and bucket) from a given
  // bucket index
  template<uint32_t bitcount>
  inline uint32_t extract_i(uint32_t index) const
  {
    uint32_t msb = log2<bitcount>();
    return index >> msb; 
  }

  template<uint32_t bitcount>
  inline uint32_t extract_j(uint32_t index) const
  {
    uint32_t msb = log2<bitcount>();
    return index & ((1<<msb)-1);
  }

  // Retrieve the number of bit bins for a given bank 
  inline size_t num_bit_bins(bank_t* bank) const 
  {
    return (0xffff / bank->obj_size) >> 6;
  }

  // Retrieve the maximum refcount for the bank 
  inline size_t max_refcount(bank_t* bank) const
  {
    size_t obj_size = bank->obj_size; 
    size_t offset = num_bit_bins(bank) * sizeof(uint64_t) * 2;
    offset = (offset + (obj_size-1)) & ~(obj_size-1);
    return (0xffff-offset) / obj_size;
  }

  // Retrieve the data offset for a given bank. Note that the offset
  // is aligned to the size of the objects that are allocated within
  // the bank to enfore strict alignment garantees.
  inline size_t bank_header_size(bank_t* bank) const
  {
    size_t obj_size = bank->obj_size; 
    size_t offset = num_bit_bins(bank) * sizeof(uint64_t) * 2;
    return (offset + (obj_size-1)) & ~(obj_size-1);
  }

#if 0
  inline void print_free_list(size_t index)
  {
    if (!g_lock_initialized)
      return; 
    fprintf(stderr, "free list %d: \n", index);
    _free_list* iter = free_list[index];
    while (iter) 
    { 
      MEM_ASSERT(iter->entry);
      fprintf(stderr, "\t bank 0x%08x refcount %4d obj size %4d\n", 
              iter->entry, iter->entry->refcount, iter->entry->obj_size);        
      iter = iter->next; 
    } 
  }
#endif

  // Find an unused bank within the list of banks and map a chunk of
  // physical memory onto it. The bank will be subsequently used to
  // satisfy allocations of the given size and boundary.
  bool allocate_bank(_free_list*& head, size_t size, size_t boundary)
  {
    uint32_t index = ~0U;
    for (uint32_t i=0; i<128 && index==~0UL; ++i)
    {
      uint32_t& usage = used_banks[i];
      if (usage != ~0U)
      {
        for (uint32_t j=0; j<32 && index==~0UL; ++j)
        {
          if ((usage & (1UL<<j)) == 0)
          {
            index = combine<32>(i,j);
            usage |= (1UL<<j);
          }
        }
        MEM_ASSERT(index != ~0UL);
      }
    }
    MEM_FATAL("bitmap allocator out of banks!", index != ~0U);

    // Map memory from operating system to bank 
    uintptr_t page_address = space.base_address | index<<16;
    page_t* new_page = alloc_pages(
      &space, PAGESIZE_64KB, page_address);
    MEM_FATAL("bitmap allocator could not map memor!", new_page);

    // Initialize the bank with the value found in the address 
    bank_t* bank = &banks[index];
    bank->page = new_page; 
    bank->refcount = 0;
    bank->obj_size = size; 
    bank->obj_align = boundary;
    bank->colour  = index & 0x3;
    bank->allocated_bin = reinterpret_cast<uint64_t*>(new_page->start);
    bank->unused_bin = reinterpret_cast<uint64_t*>(
      new_page->start + num_bit_bins(bank)*sizeof(uint64_t));
      
    // If the free list does not exist, we allocate a new free list
    // here. We insert the newly allocated block at the front of the
    // freelist so that it gets filled first.
    _free_list* new_head =
      (_free_list*)__REAL__malloc(sizeof(_free_list));
    MEM_FATAL(
      "bitmap allocator could create entry for free list",
      new_head);
    std::memset(new_head, 0, sizeof(_free_list));
    new_head->entry = bank;
    new_head->next = head;
    head = new_head;

#if defined(MEM_DEBUG_LOG)
    print_free_list(log2(size));
    fprintf( stderr, "bank %4d created\n",index);
#endif
      
    return true; 
  }

  // Free the address space occupied by the bank so it can be reused
  // for something else
  bool deallocate_bank(bank_t* bank) 
  { 
    for (size_t i=0; i<0x1000; ++i)
    {
      if (&banks[i] != bank)
        continue;

      size_t index = log2(bank->obj_size);
      MEM_ASSERT(free_list[index]);
      if (free_list[index]->entry == bank)
      {
        __REAL__free(free_list[index]);
        free_list[index] = NULL;
      }
      else
      {
        _free_list* last = free_list[index];
        _free_list* tail = last->next;
        while(tail)
        {
          if (tail->entry == bank)
          {
            last->next = tail->next;
            __REAL__free(tail);
            break;
          }
          last = tail; tail = tail->next;
        }
      }

      unmap_pages(&space, bank->page);
      uint32_t& usage = used_banks[extract_i<32>(i)];
      usage &= ~(1<<extract_j<32>(i));
      std::memset(bank, 0, sizeof(bank_t));
#if defined(MEM_DEBUG_LOG)
      print_free_list(index);
      fprintf( stderr, "bank %4d destroyed\n",i);
#endif
      return true;
    }
    return false;
  } 

public:

  // 512 byte maximum size, 16 byte maximum alignment 
  enum { MAX_ALLOC_SIZE = 0x200, MAX_ALIGN = 0x10 };

  bool initialize()
  {
    std::memset(this, 0, sizeof(this)); 
    if (!create_space(
          &space, PAGESIZE_64KB, 512<<20, NULL))
      return false; 
    return true; 
  }

  bool finalize()
  {
    if (!destroy_space(&space))
      return false; 
    return true; 
  }

  // Allocate memory from the bitmap allocator 
  void* allocate(size_t size, size_t boundary)
  {
    size_t aligned_size = next_power_of_two(size);
    size_t wasted = aligned_size - size; 

    MEM_DEBUG("too large allocation for bitmap allocator", 
              aligned_size <= 0x200);
    MEM_DEBUG("too small allocation for bitmap allocator", 
              aligned_size >= 0x4);

    size_t index = log2(aligned_size);
    if (!free_list[index] && !allocate_bank(
          free_list[index], aligned_size, aligned_size))
    {
      MEM_FATAL("bitmap allocator no free blocks!", false);
    }

    bank_t* bank = free_list[index]->entry; 
    MEM_ASSERT(aligned_size == bank->obj_size);
    MEM_ASSERT(boundary <= bank->obj_size);
    MEM_ASSERT(bank->refcount < max_refcount(bank));

    size_t obj_size = bank->obj_size; 
    size_t offset = bank_header_size(bank);

    void* ptr = NULL;
    for (uint32_t i=0; i<num_bit_bins(bank) && !ptr; ++i)
    {
      uint64_t& allocbin = bank->allocated_bin[i];
      uint64_t& wastebin = bank->unused_bin[i];
      if (allocbin == ~0ULL)
        continue;
      for (uint32_t j=0; j<64 && !ptr; ++j)
      {
        if (allocbin & (1ULL<<j))
          continue;
        // Calculate the (aligned) offset to the first object
        size_t data_offset = offset + combine<64>(i,j) * obj_size;
        ptr = (void*)(bank->page->start + data_offset); 
        allocbin |= 1ULL<<j;
        wastebin |= 1ULL<<j;
#if defined(MEM_DEBUG_LOG)
        fprintf(
          stderr, 
          "0x%04x bytes at 0x%08x (i = %8d, j = %8d) allocated\n", 
          obj_size, ptr, i, j);
#endif
      }
      MEM_ASSERT(ptr);
    }
    ++bank->refcount; 

    // Remove the bank from the the free list if it is full
    if (bank->refcount == max_refcount(bank))
    {
      _free_list* head = free_list[index];
      free_list[index] = free_list[index]->next;
      __REAL__free(head);
    }

    MEM_ASSERT(ptr != NULL);
    return ptr; 
  }

  // Deallocate memory from the bitmap allocator
  void deallocate(void* ptr, handle_t handle)
  {
    uintptr_t offset = reinterpret_cast<uintptr_t>(ptr);
    uint32_t index = handle, i=0, j=0;
    extract<32>(index, i, j);
    MEM_ASSERT((index >= 0 && index < 0x100));
    MEM_ASSERT(((offset>>16) & 0xfff) == index);
    MEM_ASSERT(used_banks[index>>5] & (1<<(index&0x1f)));
    bank_t* bank  = &banks[index];
    MEM_ASSERT(bank->refcount > 0);

    // Find the bit bin that marks the allocation
    size_t bank_header = bank_header_size(bank);
    size_t obj_size = bank->obj_size; 
    MEM_ASSERT((offset&0xffff) - bank_header >= 0);
    extract<64>(((offset&0xffff) - bank_header) / obj_size, i, j);
#if defined(MEM_DEBUG_LOG)
    fprintf(
      stderr, 
      "0x%04x bytes at 0x%08x (i = %8d, j = %8d) deallocated\n", 
      bank->obj_size, ptr, i, j);
#endif
    MEM_FATAL(
      "bitmap allocator trying to deallocate unsed memory",
      bank->allocated_bin[i] & (1ULL<<j));

    // Clear the allocation flags 
    bank->allocated_bin[i] &= ~(1ULL<<j);
    bank->unused_bin[j] &= ~(1ULL<<j);
    if (bank->refcount == max_refcount(bank))
    {
      size_t free_list_index = log2(obj_size);
      _free_list *tail = free_list[free_list_index];
      if (!tail) 
      {
        free_list[free_list_index] =
          (_free_list*)__REAL__malloc(sizeof(_free_list));
        free_list[free_list_index]->entry = bank;
        free_list[free_list_index]->next = NULL;
      }
      else
      {
        while (tail && tail->next)
          tail = tail->next; 
        tail->next =
          (_free_list*)__REAL__malloc(sizeof(_free_list));
        tail->next->entry = bank;
        tail->next->next = NULL;
      }
    }
      
    --bank->refcount;

    // If the bank has no further allocations now would be a great
    // time to give it back to the operating system. And ensure that
    // we remove it from the free list.
    if (bank->refcount == 0)
    {
      MEM_FATAL(
        "bitmap allocator could not deallocate bank",
        deallocate_bank(bank));
    }
  }

  // Updates a previous allocation to a new size. 
  void* update(
    void* ptr, size_t size, size_t boundary, handle_t handle)
  {
    uintptr_t offset = reinterpret_cast<uintptr_t>(ptr);
    uint32_t index = handle, i=0, j=0;
    extract<32>(index, i, j);
    MEM_ASSERT((index >= 0 && index < 0x100));
    MEM_ASSERT(((offset>>16) & 0xfff) == index);
    MEM_ASSERT(used_banks[index>>5] & (1<<(index&0x1f)));
    bank_t* bank  = &banks[index];

    // Shrinking is a no-op
    if (size < bank->obj_size)
      return ptr; 

    // Allocate a new pointer
    void* new_ptr = NULL;
    if (size > MAX_ALLOC_SIZE)
      new_ptr = sys::malloc(size);
    else 
      new_ptr = allocate(size, boundary);
    // Move the previous contents over to the new pointer 
    std::memmove(
      new_ptr, ptr, (size<bank->obj_size) ? size: bank->obj_size);
    // .... and deallocate the old pointer 
    deallocate(ptr, handle);

    return new_ptr; 
  }

  // Returns the size of an allocation 
  size_t size(void* ptr, handle_t handle )
  {
    uintptr_t offset = reinterpret_cast<uintptr_t>(ptr);
    uint32_t index = handle, i=0, j=0;
    extract<32>(index, i, j);
    MEM_ASSERT((index >= 0 && index < 0x100));
    MEM_ASSERT(((offset>>16) & 0xfff) == index);
    MEM_ASSERT(used_banks[index>>5] & (1<<(index&0x1f)));
    return banks[index].obj_size;
  }

  // Return a valid handle to the caller if the allocation is managed by
  // this allocator 
  handle_t contains(void* ptr)
  {
    uintptr_t offset = reinterpret_cast<uintptr_t>(ptr); 
    if (offset < space.base_address || 
        offset >= space.base_address+(256<<20))
      return ~0UL;
    size_t bank_index = (offset>>16) & 0xfff; 
    return bank_index;
  }
}; 
// Work In Progress - not used for now.... 
static BitmapAllocator sys_bitmap_alloc; 


////////////////////////////////////////////////////////////////////////////////
// Scratch memory allocator for non persistent allocations 
////////////////////////////////////////////////////////////////////////////////

// Allocation internals 
struct CScratchMemoryAllocator::internals
{
  // The address space for the scratch memory allocator
  address_space_t address_space;

  // The allocator statistics for the scratch allocator 
  allocator_stats stats; 
};

void CScratchMemoryAllocator::get_statistics(allocator_stats& stats) const
{    
  LOCK;
  std::memcpy(&stats, &m_internals->stats, sizeof(allocator_stats));
}

void* CScratchMemoryAllocator::allocate(size_t size, size_t boundary)
{
  LOCK;
  // ensure alignment to page granularity within the allocator 
  size_t aligned_size = (size + (boundary-1)) & ~(boundary-1);
  
  page_t* page_set = alloc_pages(
    &(m_internals->address_space), aligned_size);
  MEM_FATAL("CScratchAllocator allocation error : out of memory", 
            page_set);
  return (void*)page_set->start; 
}

void CScratchMemoryAllocator::deallocate(void* _ptr)
{
  LOCK;
  for (page_t* iter = m_internals->address_space.page_list; 
       iter; iter=iter->next)
  {
    if ((void*)iter->start == _ptr)
    {
      MEM_FATAL("CScratchAllocator deallocation error: "
        "could not unmap page range",
        unmap_pages(&m_internals->address_space, iter)); 
      return;
    }
  }
  MEM_FATAL("CScratchAllocator deallocation error: "
            "memory not contained in allocator?", false);
}

void* CScratchMemoryAllocator::update(
  void* _ptr, size_t size, size_t boundary)
{
  LOCK;
  page_t* old_pages = NULL;
  for (page_t* iter = m_internals->address_space.page_list; 
       iter; iter=iter->next)
  {
    if ((void*)iter->start == _ptr)
    {
      old_pages = iter;
      break;
    }
  }
  MEM_ASSERT(old_pages); 

  page_t* new_pages = alloc_pages(&m_internals->address_space, size); 
  MEM_ASSERT(new_pages);
  std::memcpy((void*)new_pages->start, (void*)old_pages->start, old_pages->size);  
  MEM_FATAL("CScratchAllocator deallocation error: "
    "could not unmap page range",
    unmap_pages(&(m_internals->address_space), old_pages)); 

  return (void*)new_pages->start;
}

void* CScratchMemoryAllocator::contains(void* _ptr) const
{
  LOCK;
  if (!m_internals) return 0;
  for (page_t* iter = m_internals->address_space.page_list; 
       iter; iter=iter->next)
    if ((void*)iter->start == _ptr)
      return _ptr;
  return NULL;
}

size_t CScratchMemoryAllocator::msize(void* _ptr) const
{
  LOCK;
  if (!m_internals) return 0;
  for (page_t* iter = m_internals->address_space.page_list; 
       iter; iter=iter->next)
    if ((void*)iter->start == _ptr)
      return iter->size;
  return 0;
}

// Create a scratch memory allocator for the system 
CScratchMemoryAllocator* CScratchMemoryAllocator::create(
  PAGESIZE page_size)
{
  LOCK;

  CScratchMemoryAllocator* scratch_alloc = (CScratchMemoryAllocator*)
    __REAL__malloc(sizeof(CScratchMemoryAllocator));
  MEM_ASSERT(scratch_alloc);

  scratch_alloc->m_internals =(CScratchMemoryAllocator::internals*)
    __REAL__malloc(sizeof(CScratchMemoryAllocator::internals));
  MEM_ASSERT(scratch_alloc->m_internals);

  MEM_FATAL("CScratchMemoryAllocator could not create address space",
    create_space(&scratch_alloc->m_internals->address_space, 
      page_size, 256<<20));

  return scratch_alloc;
}

namespace sys
{
  bool initialize()
  {
    __REAL___malloc_init();

    if (!sys_bitmap_alloc.initialize())
      return false; 

    InitLock();

    return true; 
  }
  
  bool finalize()
  {
    if (!sys_bitmap_alloc.finalize())
      return false; 

    __REAL___malloc_finalize();

    return true; 
  }

  size_t _msize(void* __p)
  {
    LOCK;
    handle_t handle = sys_bitmap_alloc.contains(__p);
    if (handle != ~0UL)
      return sys_bitmap_alloc.size(__p, handle); 
    return malloc_usable_size(__p);
  }
  
  void *calloc(size_t nmemb, size_t size)
  {
    LOCK;
    const size_t actual_size = nmemb*size; 
    if (actual_size < BitmapAllocator::MAX_ALLOC_SIZE)
    {
      void* ptr = sys_bitmap_alloc.allocate(
        actual_size, MINUMUM_ALIGNMENT);
      std::memset(ptr, 0, actual_size);
      return ptr; 
    }

    return __REAL__calloc(nmemb, size);
  }
  
	void *malloc(size_t size)
  {
    LOCK;
    if (size <= BitmapAllocator::MAX_ALLOC_SIZE)
      return sys_bitmap_alloc.allocate(
        size, MINUMUM_ALIGNMENT);
    
    return __REAL__malloc(size);
  }

  void free(void * __p)
  {
    LOCK;
    handle_t handle = sys_bitmap_alloc.contains(__p);
    if (handle != ~0UL)
    {
      sys_bitmap_alloc.deallocate(__p, handle); 
      return; 
    }

    __REAL__free(__p);
  }
  
	void *realloc(void* __p, size_t size)
  {
    LOCK;
    handle_t handle = sys_bitmap_alloc.contains(__p);
    if (handle != ~0UL)
      return sys_bitmap_alloc.update(
        __p, size, MINUMUM_ALIGNMENT, handle);


    return __REAL__realloc(__p, size);
  }
  
	void *reallocalign(void *__p, size_t size, size_t align)
  {
    LOCK;
    handle_t handle = sys_bitmap_alloc.contains(__p);
    if (handle != ~0UL)
      return sys_bitmap_alloc.update(
        __p, size, align, handle);

    return __REAL__reallocalign(__p, size, align);
  }

  
	void *memalign(size_t boundary, size_t size)
  {

    LOCK;
    if (boundary <= BitmapAllocator::MAX_ALIGN && 
        size <= BitmapAllocator::MAX_ALLOC_SIZE)
      return sys_bitmap_alloc.allocate(size, boundary);
        
    return __REAL__memalign(boundary, size);
  }
}
