dm: add hugetlb memory management support

Type '-T' in DM cmdline to enable hugetlb memory allocation.
It enabled 2 level of huge page:
Level 1: size 2M, path: /run/hugepage/acrn/huge_lv1/guid
Level 2: size 1G, path: /run/hugepage/acrn/huge_lv2/guid

NOTE:
before running, please make sure system already have enough hugepages
reserved under:
/sys/kernel/mm/hugepages/hugepages-xxxxkB/nr_hugepages

Signed-off-by: Jason Chen CJ <jason.cj.chen@intel.com>
Acked-by: Xu, Anthony <anthony.xu@intel.com>
This commit is contained in:
Jason Chen CJ 2018-03-14 22:03:52 +08:00 committed by Jack Ren
parent ac5da17e52
commit 4cad694be2
6 changed files with 546 additions and 9 deletions

View File

@ -100,7 +100,7 @@ SRCS += core/consport.c
SRCS += core/vmmapi.c SRCS += core/vmmapi.c
SRCS += core/mptbl.c SRCS += core/mptbl.c
SRCS += core/main.c SRCS += core/main.c
SRCS += core/hugetlb.c
OBJS := $(patsubst %.c,$(DM_OBJDIR)/%.o,$(SRCS)) OBJS := $(patsubst %.c,$(DM_OBJDIR)/%.o,$(SRCS))

518
devicemodel/core/hugetlb.c Normal file
View File

@ -0,0 +1,518 @@
/*-
* Copyright (c) 2018 Intel Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <stdlib.h>
#include <stdio.h>
#include <stdbool.h>
#include <unistd.h>
#include <string.h>
#include <sys/mman.h>
#include <sys/vfs.h>
#include <sys/mount.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <ctype.h>
#include <fcntl.h>
#include <errno.h>
#include <assert.h>
#include "vmm.h"
#include "vhm_ioctl_defs.h"
#include "vmmapi.h"
#define HUGETLB_LV1 0
#define HUGETLB_LV2 1
#define HUGETLB_LV_MAX 2
#define MAX_PATH_LEN 128
#define HUGETLBFS_MAGIC 0x958458f6
/* HugePage Level 1 for 2M page, Level 2 for 1G page*/
#define PATH_HUGETLB_LV1 "/run/hugepage/acrn/huge_lv1/"
#define OPT_HUGETLB_LV1 "pagesize=2M"
#define PATH_HUGETLB_LV2 "/run/hugepage/acrn/huge_lv2/"
#define OPT_HUGETLB_LV2 "pagesize=1G"
/* hugetlb_info record private information for one specific hugetlbfs:
* - mounted: is hugetlbfs mounted for below mount_path
* - mount_path: hugetlbfs mount path
* - mount_opt: hugetlb mount option
* - node_path: record for hugetlbfs node path
* - pg_size: this hugetlbfs's page size
* - lowmem: lowmem of this hugetlbfs need allocate
* - highmem: highmem of this hugetlbfs need allocate
*/
struct hugetlb_info {
bool mounted;
char *mount_path;
char *mount_opt;
char node_path[MAX_PATH_LEN];
int fd;
int pg_size;
size_t lowmem;
size_t highmem;
};
static struct hugetlb_info hugetlb_priv[HUGETLB_LV_MAX] = {
{
.mounted = false,
.mount_path = PATH_HUGETLB_LV1,
.mount_opt = OPT_HUGETLB_LV1,
.fd = -1,
.pg_size = 0,
.lowmem = 0,
.highmem = 0,
},
{
.mounted = false,
.mount_path = PATH_HUGETLB_LV2,
.mount_opt = OPT_HUGETLB_LV2,
.fd = -1,
.pg_size = 0,
.lowmem = 0,
.highmem = 0,
},
};
static void *ptr;
static size_t total_size;
static int hugetlb_lv_max;
static int open_hugetlbfs(struct vmctx *ctx, int level)
{
char uuid_str[48];
uint8_t UUID[16];
char *path;
struct statfs fs;
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return -EINVAL;
}
path = hugetlb_priv[level].node_path;
strncpy(path, hugetlb_priv[level].mount_path, MAX_PATH_LEN);
/* UUID will use 32 bytes */
if (strlen(path) + 32 > MAX_PATH_LEN) {
perror("PATH overflow");
return -ENOMEM;
}
uuid_copy(UUID, ctx->vm_uuid);
sprintf(uuid_str, "%02X%02X%02X%02X%02X%02X%02X%02X"
"%02X%02X%02X%02X%02X%02X%02X%02X\n",
UUID[0], UUID[1], UUID[2], UUID[3],
UUID[4], UUID[5], UUID[6], UUID[7],
UUID[8], UUID[9], UUID[10], UUID[11],
UUID[12], UUID[13], UUID[14], UUID[15]);
strncat(path, uuid_str, strlen(uuid_str));
printf("open hugetlbfs file %s\n", path);
hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644);
if (hugetlb_priv[level].fd < 0) {
perror("Open hugtlbfs failed");
return -EINVAL;
}
/* get the pagesize */
if (fstatfs(hugetlb_priv[level].fd, &fs) != 0) {
perror("Failed to get statfs fo hugetlbfs");
return -EINVAL;
}
if (fs.f_type == HUGETLBFS_MAGIC) {
/* get hugepage size from fstat*/
hugetlb_priv[level].pg_size = fs.f_bsize;
} else {
close(hugetlb_priv[level].fd);
unlink(hugetlb_priv[level].node_path);
hugetlb_priv[level].fd = -1;
return -EINVAL;
}
return 0;
}
static void close_hugetlbfs(int level)
{
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return;
}
if (hugetlb_priv[level].fd >= 0) {
close(hugetlb_priv[level].fd);
hugetlb_priv[level].fd = -1;
unlink(hugetlb_priv[level].node_path);
hugetlb_priv[level].pg_size = 0;
}
}
static bool should_enable_hugetlb_level(int level)
{
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return false;
}
return (hugetlb_priv[level].lowmem > 0 ||
hugetlb_priv[level].highmem > 0);
}
/*
* level : hugepage level
* len : region length for mmap
* offset : region start offset from ctx->baseaddr
* skip : skip offset in different level hugetlbfs fd
*/
static int mmap_hugetlbfs(struct vmctx *ctx, int level, size_t len,
size_t offset, size_t skip)
{
char *addr;
size_t pagesz = 0;
int fd, i;
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return -EINVAL;
}
fd = hugetlb_priv[level].fd;
addr = mmap(ctx->baseaddr + offset, len, PROT_READ | PROT_WRITE,
MAP_SHARED | MAP_FIXED, fd, skip);
if (addr == MAP_FAILED)
return -ENOMEM;
printf("mmap 0x%lx@%p\n", len, addr);
/* pre-allocate hugepages by touch them */
pagesz = hugetlb_priv[level].pg_size;
printf("touch %ld pages with pagesz 0x%lx\n", len/pagesz, pagesz);
for (i = 0; i < len/pagesz; i++) {
*(volatile char *)addr = *addr;
addr += pagesz;
}
return 0;
}
static int mmap_hugetlbfs_lowmem(struct vmctx *ctx)
{
size_t len, offset, skip;
int level, ret = 0, pg_size;
offset = skip = 0;
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
len = hugetlb_priv[level].lowmem;
pg_size = hugetlb_priv[level].pg_size;
while (len > 0) {
ret = mmap_hugetlbfs(ctx, level, len, offset, skip);
if (ret < 0 && level > HUGETLB_LV1) {
len -= pg_size;
hugetlb_priv[level].lowmem = len;
hugetlb_priv[level-1].lowmem += pg_size;
} else if (ret < 0 && level == HUGETLB_LV1)
return ret;
else {
offset += len;
break;
}
}
}
return 0;
}
static int mmap_hugetlbfs_highmem(struct vmctx *ctx)
{
size_t len, offset, skip;
int level, ret = 0, pg_size;
offset = 4 * GB;
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
skip = hugetlb_priv[level].lowmem;
len = hugetlb_priv[level].highmem;
pg_size = hugetlb_priv[level].pg_size;
while (len > 0) {
ret = mmap_hugetlbfs(ctx, level, len, offset, skip);
if (ret < 0 && level > HUGETLB_LV1) {
len -= pg_size;
hugetlb_priv[level].highmem = len;
hugetlb_priv[level-1].highmem += pg_size;
} else if (ret < 0 && level == HUGETLB_LV1)
return ret;
else {
offset += len;
break;
}
}
}
return 0;
}
static int create_hugetlb_dirs(int level)
{
char tmp_path[MAX_PATH_LEN], *path;
int i, len;
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return -EINVAL;
}
path = hugetlb_priv[level].mount_path;
len = strlen(path);
if (len >= MAX_PATH_LEN) {
perror("exceed max path len");
return -EINVAL;
}
strcpy(tmp_path, path);
if (tmp_path[len - 1] != '/')
strcat(tmp_path, "/");
len = strlen(tmp_path);
for (i = 1; i < len; i++) {
if (tmp_path[i] == '/') {
tmp_path[i] = 0;
if (access(tmp_path, F_OK) != 0) {
if (mkdir(tmp_path, 0755) < 0) {
perror("mkdir failed");
return -1;
}
}
tmp_path[i] = '/';
}
}
return 0;
}
static int mount_hugetlbfs(int level)
{
int ret;
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return -EINVAL;
}
if (hugetlb_priv[level].mounted)
return 0;
/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
ret = mount("none", hugetlb_priv[level].mount_path, "hugetlbfs",
0, hugetlb_priv[level].mount_opt);
if (ret == 0)
hugetlb_priv[level].mounted = true;
return ret;
}
static void umount_hugetlbfs(int level)
{
if (level >= HUGETLB_LV_MAX) {
perror("exceed max hugetlb level");
return;
}
if (hugetlb_priv[level].mounted) {
umount(hugetlb_priv[level].mount_path);
hugetlb_priv[level].mounted = false;
}
}
bool check_hugetlb_support(void)
{
int level;
for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) {
if (create_hugetlb_dirs(level) < 0)
return false;
}
for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) {
if (mount_hugetlbfs(level) < 0) {
level--;
break;
}
}
if (level < HUGETLB_LV1) /* mount fail for level 1 */
return false;
else if (level == HUGETLB_LV1) /* mount fail for level 2 */
printf("WARNING: only level 1 hugetlb supported");
hugetlb_lv_max = level;
return true;
}
int hugetlb_setup_memory(struct vmctx *ctx)
{
int level;
size_t lowmem, highmem;
/* for first time DM start UOS, hugetlbfs is already mounted by
* check_hugetlb_support; but for reboot, here need re-mount
* it as it already be umount by hugetlb_unsetup_memory
* TODO: actually, correct reboot process should not change memory
* layout, the setup_memory should be removed from reboot process
*/
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++)
mount_hugetlbfs(level);
/* open hugetlbfs and get pagesize for two level */
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
if (open_hugetlbfs(ctx, level) < 0) {
perror("failed to open hugetlbfs");
goto err;
}
}
/* all memory should be at least align with
* hugetlb_priv[HUGETLB_LV1].pg_size */
ctx->lowmem =
ALIGN_DOWN(ctx->lowmem, hugetlb_priv[HUGETLB_LV1].pg_size);
ctx->highmem =
ALIGN_DOWN(ctx->highmem, hugetlb_priv[HUGETLB_LV1].pg_size);
if (ctx->highmem > 0)
total_size = 4 * GB + ctx->highmem;
else
total_size = ctx->lowmem;
if (total_size == 0) {
perror("vm request 0 memory");
goto err;
}
/* check & set hugetlb level memory size for lowmem & highmem */
highmem = ctx->highmem;
lowmem = ctx->lowmem;
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
hugetlb_priv[level].lowmem =
ALIGN_DOWN(lowmem, hugetlb_priv[level].pg_size);
hugetlb_priv[level].highmem =
ALIGN_DOWN(highmem, hugetlb_priv[level].pg_size);
if (level > HUGETLB_LV1) {
hugetlb_priv[level-1].lowmem = lowmem =
lowmem - hugetlb_priv[level].lowmem;
hugetlb_priv[level-1].highmem = highmem =
highmem - hugetlb_priv[level].highmem;
}
}
/* align up total size with huge page size for vma alignment */
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
if (should_enable_hugetlb_level(level)) {
total_size += hugetlb_priv[level].pg_size;
break;
}
}
/* dump hugepage trying to setup */
printf("\ntry to setup hugepage with:\n");
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
printf("\tlevel %d - lowmem 0x%lx, highmem 0x%lx\n", level,
hugetlb_priv[level].lowmem,
hugetlb_priv[level].highmem);
}
printf("total_size 0x%lx\n\n", total_size);
/* basic overview vma */
ptr = mmap(NULL, total_size, PROT_NONE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
if (ptr == MAP_FAILED) {
perror("anony mmap fail");
goto err;
}
/* align up baseaddr according to hugepage level size */
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
if (should_enable_hugetlb_level(level)) {
ctx->baseaddr = (void *)ALIGN_UP((size_t)ptr,
hugetlb_priv[level].pg_size);
break;
}
}
printf("mmap ptr 0x%p -> baseaddr 0x%p\n", ptr, ctx->baseaddr);
/* mmap lowmem */
if (mmap_hugetlbfs_lowmem(ctx) < 0)
goto err;
/* mmap highmem */
if (mmap_hugetlbfs_highmem(ctx) < 0)
goto err;
/* dump hugepage really setup */
printf("\nreally setup hugepage with:\n");
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
printf("\tlevel %d - lowmem 0x%lx, highmem 0x%lx\n", level,
hugetlb_priv[level].lowmem,
hugetlb_priv[level].highmem);
}
printf("total_size 0x%lx\n\n", total_size);
return 0;
err:
if (ptr) {
munmap(ptr, total_size);
ptr = NULL;
}
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
close_hugetlbfs(level);
umount_hugetlbfs(level);
}
return -ENOMEM;
}
void hugetlb_unsetup_memory(struct vmctx *ctx)
{
int level;
if (total_size > 0) {
munmap(ptr, total_size);
total_size = 0;
ptr = NULL;
}
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
close_hugetlbfs(level);
umount_hugetlbfs(level);
}
}

View File

@ -78,6 +78,7 @@ char *guest_uuid_str;
char *vsbl_file_name; char *vsbl_file_name;
uint8_t trusty_enabled; uint8_t trusty_enabled;
bool stdio_in_use; bool stdio_in_use;
bool hugetlb;
static int guest_vmexit_on_hlt, guest_vmexit_on_pause; static int guest_vmexit_on_hlt, guest_vmexit_on_pause;
static int virtio_msix = 1; static int virtio_msix = 1;
@ -127,7 +128,7 @@ static void
usage(int code) usage(int code)
{ {
fprintf(stderr, fprintf(stderr,
"Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n" "Usage: %s [-abehuwxACHPSTWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] \n" " %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] \n"
" %*s [--vsbl vsbl_file_name] [--part_info part_info_name]\n" " %*s [--vsbl vsbl_file_name] [--part_info part_info_name]\n"
" %*s [--enable_trusty] <vm>\n" " %*s [--enable_trusty] <vm>\n"
@ -150,6 +151,7 @@ usage(int code)
" -U: uuid\n" " -U: uuid\n"
" -w: ignore unimplemented MSRs\n" " -w: ignore unimplemented MSRs\n"
" -W: force virtio to use single-vector MSI\n" " -W: force virtio to use single-vector MSI\n"
" -T: use hugetlb for memory allocation\n"
" -x: local apic is in x2APIC mode\n" " -x: local apic is in x2APIC mode\n"
" -Y: disable MPtable generation\n" " -Y: disable MPtable generation\n"
" -k: kernel image path\n" " -k: kernel image path\n"
@ -601,11 +603,12 @@ main(int argc, char *argv[])
rtc_localtime = 1; rtc_localtime = 1;
memflags = 0; memflags = 0;
quit_vm_loop = 0; quit_vm_loop = 0;
hugetlb = 0;
if (signal(SIGINT, sig_handler_term) == SIG_ERR) if (signal(SIGINT, sig_handler_term) == SIG_ERR)
fprintf(stderr, "cannot register handler for SIGINT\n"); fprintf(stderr, "cannot register handler for SIGINT\n");
optstr = "abehuwxACHIMPSWYvk:r:B:p:g:c:s:m:l:U:G:i:"; optstr = "abehuwxACHIMPSTWYvk:r:B:p:g:c:s:m:l:U:G:i:";
while ((c = getopt_long(argc, argv, optstr, long_options, while ((c = getopt_long(argc, argv, optstr, long_options,
&option_idx)) != -1) { &option_idx)) != -1) {
switch (c) { switch (c) {
@ -689,6 +692,10 @@ main(int argc, char *argv[])
case 'W': case 'W':
virtio_msix = 0; virtio_msix = 0;
break; break;
case 'T':
if (check_hugetlb_support())
hugetlb = 1;
break;
case 'x': case 'x':
x2apic_mode = 1; x2apic_mode = 1;
break; break;

View File

@ -39,9 +39,6 @@
#include "dm.h" #include "dm.h"
#include "smbiostbl.h" #include "smbiostbl.h"
#define MB (1024*1024)
#define GB (1024ULL*1024*1024)
#define SMBIOS_BASE 0xF1000 #define SMBIOS_BASE 0xF1000
/* ACRN-DM_ACPI_BASE - SMBIOS_BASE) */ /* ACRN-DM_ACPI_BASE - SMBIOS_BASE) */

View File

@ -58,9 +58,6 @@
#include "dm.h" #include "dm.h"
#define MB (1024 * 1024UL)
#define GB (1024 * 1024 * 1024UL)
#define MAP_NOCORE 0 #define MAP_NOCORE 0
#define MAP_ALIGNED_SUPER 0 #define MAP_ALIGNED_SUPER 0
@ -387,6 +384,9 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
objsize = ctx->lowmem; objsize = ctx->lowmem;
} }
if (hugetlb)
return hugetlb_setup_memory(ctx);
/* /*
* Stake out a contiguous region covering the guest physical memory * Stake out a contiguous region covering the guest physical memory
* and the adjoining guard regions. * and the adjoining guard regions.
@ -430,6 +430,11 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
void void
vm_unsetup_memory(struct vmctx *ctx) vm_unsetup_memory(struct vmctx *ctx)
{ {
if (hugetlb) {
hugetlb_unsetup_memory(ctx);
return;
}
if (ctx->lowmem > 0) if (ctx->lowmem > 0)
munmap(ctx->mmap_lowmem, ctx->lowmem); munmap(ctx->mmap_lowmem, ctx->lowmem);

View File

@ -41,6 +41,12 @@
*/ */
#define VMMAPI_VERSION 0103 /* 2 digit major followed by 2 digit minor */ #define VMMAPI_VERSION 0103 /* 2 digit major followed by 2 digit minor */
#define MB (1024 * 1024UL)
#define GB (1024 * 1024 * 1024UL)
#define ALIGN_UP(x, align) (((x) + ((align)-1)) & ~((align)-1))
#define ALIGN_DOWN(x, align) ((x) & ~((align)-1))
struct iovec; struct iovec;
struct vmctx { struct vmctx {
@ -116,6 +122,9 @@ void vm_destroy(struct vmctx *ctx);
int vm_parse_memsize(const char *optarg, size_t *memsize); int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void vm_unsetup_memory(struct vmctx *ctx); void vm_unsetup_memory(struct vmctx *ctx);
bool check_hugetlb_support(void);
int hugetlb_setup_memory(struct vmctx *ctx);
void hugetlb_unsetup_memory(struct vmctx *ctx);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
uint32_t vm_get_lowmem_limit(struct vmctx *ctx); uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
@ -150,4 +159,5 @@ int vm_create_vcpu(struct vmctx *ctx, int vcpu_id);
int vm_get_cpu_state(struct vmctx *ctx, void *state_buf); int vm_get_cpu_state(struct vmctx *ctx, void *state_buf);
extern bool hugetlb;
#endif /* _VMMAPI_H_ */ #endif /* _VMMAPI_H_ */