mirror of
https://github.com/projectacrn/acrn-hypervisor.git
synced 2025-08-16 23:29:50 +00:00
check if there are enough free huge pages(1GB & 2MB now) for UOS, if no, it will try to reserve more pages for UOS. here are two examples 1. if system has free huge pages: 1 page of 1GB size, 0 page of 2MB. one UOS need 2GB + 500MB memory, it will try to reserve one more 1GB page and 250 2MB pages from system. If enough free system memory, it can succeed. 2. if system has free huge pages: 4 pages of 1GB size, 0 page of 2MB. one UOS need 2GB + 500MB, 1GB huge pages 4 > 2, it's enough, and 2 pages left. It will try to reserve 250 2MB pages from system free memory, if failed, it will try to release 1 of the 2 left 1GB pages. And then try to reserve 250 2MB pages. Signed-off-by: Minggui Cao <minggui.cao@intel.com> Reviewed-by: Jason Chen CJ <jason.cj.chen@intel.com> Acked-by: Eddie Dong <eddie.dong@intel.com>
740 lines
19 KiB
C
740 lines
19 KiB
C
/*-
|
|
* Copyright (c) 2018 Intel Corporation
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
*/
|
|
#include <stdlib.h>
|
|
#include <stdio.h>
|
|
#include <stdbool.h>
|
|
#include <unistd.h>
|
|
#include <string.h>
|
|
#include <sys/mman.h>
|
|
#include <sys/vfs.h>
|
|
#include <sys/mount.h>
|
|
#include <sys/stat.h>
|
|
#include <sys/types.h>
|
|
#include <fcntl.h>
|
|
#include <errno.h>
|
|
|
|
#include "vmmapi.h"
|
|
|
|
#define HUGETLB_LV1 0
|
|
#define HUGETLB_LV2 1
|
|
#define HUGETLB_LV_MAX 2
|
|
|
|
#define MAX_PATH_LEN 128
|
|
|
|
#define HUGETLBFS_MAGIC 0x958458f6
|
|
|
|
/* HugePage Level 1 for 2M page, Level 2 for 1G page*/
|
|
#define PATH_HUGETLB_LV1 "/run/hugepage/acrn/huge_lv1/"
|
|
#define OPT_HUGETLB_LV1 "pagesize=2M"
|
|
#define PATH_HUGETLB_LV2 "/run/hugepage/acrn/huge_lv2/"
|
|
#define OPT_HUGETLB_LV2 "pagesize=1G"
|
|
|
|
#define SYS_PATH_LV1 "/sys/kernel/mm/hugepages/hugepages-2048kB/"
|
|
#define SYS_PATH_LV2 "/sys/kernel/mm/hugepages/hugepages-1048576kB/"
|
|
#define SYS_NR_HUGEPAGES "nr_hugepages"
|
|
#define SYS_FREE_HUGEPAGES "free_hugepages"
|
|
|
|
/* hugetlb_info record private information for one specific hugetlbfs:
|
|
* - mounted: is hugetlbfs mounted for below mount_path
|
|
* - mount_path: hugetlbfs mount path
|
|
* - mount_opt: hugetlb mount option
|
|
* - node_path: record for hugetlbfs node path
|
|
* - pg_size: this hugetlbfs's page size
|
|
* - lowmem: lowmem of this hugetlbfs need allocate
|
|
* - highmem: highmem of this hugetlbfs need allocate
|
|
*.- pages_delta: its value equals needed pages - free pages,
|
|
*.---if > 0: it's the gap for needed page; if < 0, more free than needed.
|
|
* - nr_pages_path: sys path for total number of pages
|
|
*.- free_pages_path: sys path for number of free pages
|
|
*/
|
|
struct hugetlb_info {
|
|
bool mounted;
|
|
char *mount_path;
|
|
char *mount_opt;
|
|
|
|
char node_path[MAX_PATH_LEN];
|
|
int fd;
|
|
int pg_size;
|
|
size_t lowmem;
|
|
size_t highmem;
|
|
|
|
int pages_delta;
|
|
char *nr_pages_path;
|
|
char *free_pages_path;
|
|
};
|
|
|
|
static struct hugetlb_info hugetlb_priv[HUGETLB_LV_MAX] = {
|
|
{
|
|
.mounted = false,
|
|
.mount_path = PATH_HUGETLB_LV1,
|
|
.mount_opt = OPT_HUGETLB_LV1,
|
|
.fd = -1,
|
|
.pg_size = 0,
|
|
.lowmem = 0,
|
|
.highmem = 0,
|
|
|
|
.pages_delta = 0,
|
|
.nr_pages_path = SYS_PATH_LV1 SYS_NR_HUGEPAGES,
|
|
.free_pages_path = SYS_PATH_LV1 SYS_FREE_HUGEPAGES,
|
|
},
|
|
{
|
|
.mounted = false,
|
|
.mount_path = PATH_HUGETLB_LV2,
|
|
.mount_opt = OPT_HUGETLB_LV2,
|
|
.fd = -1,
|
|
.pg_size = 0,
|
|
.lowmem = 0,
|
|
.highmem = 0,
|
|
|
|
.pages_delta = 0,
|
|
.nr_pages_path = SYS_PATH_LV2 SYS_NR_HUGEPAGES,
|
|
.free_pages_path = SYS_PATH_LV2 SYS_FREE_HUGEPAGES,
|
|
},
|
|
};
|
|
|
|
static void *ptr;
|
|
static size_t total_size;
|
|
static int hugetlb_lv_max;
|
|
|
|
static int open_hugetlbfs(struct vmctx *ctx, int level)
|
|
{
|
|
char uuid_str[48];
|
|
uint8_t UUID[16];
|
|
char *path;
|
|
size_t len;
|
|
struct statfs fs;
|
|
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return -EINVAL;
|
|
}
|
|
|
|
path = hugetlb_priv[level].node_path;
|
|
memset(path, '\0', MAX_PATH_LEN);
|
|
strncpy(path, hugetlb_priv[level].mount_path, MAX_PATH_LEN);
|
|
|
|
len = strnlen(path, MAX_PATH_LEN);
|
|
/* UUID will use 32 bytes */
|
|
if (len + 32 > MAX_PATH_LEN) {
|
|
perror("PATH overflow");
|
|
return -ENOMEM;
|
|
}
|
|
|
|
uuid_copy(UUID, ctx->vm_uuid);
|
|
sprintf(uuid_str, "%02X%02X%02X%02X%02X%02X%02X%02X"
|
|
"%02X%02X%02X%02X%02X%02X%02X%02X\n",
|
|
UUID[0], UUID[1], UUID[2], UUID[3],
|
|
UUID[4], UUID[5], UUID[6], UUID[7],
|
|
UUID[8], UUID[9], UUID[10], UUID[11],
|
|
UUID[12], UUID[13], UUID[14], UUID[15]);
|
|
|
|
*(path + len) = '\0';
|
|
strncat(path, uuid_str, strlen(uuid_str));
|
|
|
|
printf("open hugetlbfs file %s\n", path);
|
|
|
|
hugetlb_priv[level].fd = open(path, O_CREAT | O_RDWR, 0644);
|
|
if (hugetlb_priv[level].fd < 0) {
|
|
perror("Open hugtlbfs failed");
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* get the pagesize */
|
|
if (fstatfs(hugetlb_priv[level].fd, &fs) != 0) {
|
|
perror("Failed to get statfs fo hugetlbfs");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (fs.f_type == HUGETLBFS_MAGIC) {
|
|
/* get hugepage size from fstat*/
|
|
hugetlb_priv[level].pg_size = fs.f_bsize;
|
|
} else {
|
|
close(hugetlb_priv[level].fd);
|
|
unlink(hugetlb_priv[level].node_path);
|
|
hugetlb_priv[level].fd = -1;
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static void close_hugetlbfs(int level)
|
|
{
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return;
|
|
}
|
|
|
|
if (hugetlb_priv[level].fd >= 0) {
|
|
close(hugetlb_priv[level].fd);
|
|
hugetlb_priv[level].fd = -1;
|
|
unlink(hugetlb_priv[level].node_path);
|
|
hugetlb_priv[level].pg_size = 0;
|
|
}
|
|
}
|
|
|
|
static bool should_enable_hugetlb_level(int level)
|
|
{
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return false;
|
|
}
|
|
|
|
return (hugetlb_priv[level].lowmem > 0 ||
|
|
hugetlb_priv[level].highmem > 0);
|
|
}
|
|
|
|
/*
|
|
* level : hugepage level
|
|
* len : region length for mmap
|
|
* offset : region start offset from ctx->baseaddr
|
|
* skip : skip offset in different level hugetlbfs fd
|
|
*/
|
|
static int mmap_hugetlbfs(struct vmctx *ctx, int level, size_t len,
|
|
size_t offset, size_t skip)
|
|
{
|
|
char *addr;
|
|
size_t pagesz = 0;
|
|
int fd, i;
|
|
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return -EINVAL;
|
|
}
|
|
|
|
fd = hugetlb_priv[level].fd;
|
|
addr = mmap(ctx->baseaddr + offset, len, PROT_READ | PROT_WRITE,
|
|
MAP_SHARED | MAP_FIXED, fd, skip);
|
|
if (addr == MAP_FAILED)
|
|
return -ENOMEM;
|
|
|
|
printf("mmap 0x%lx@%p\n", len, addr);
|
|
|
|
/* pre-allocate hugepages by touch them */
|
|
pagesz = hugetlb_priv[level].pg_size;
|
|
|
|
printf("touch %ld pages with pagesz 0x%lx\n", len/pagesz, pagesz);
|
|
|
|
for (i = 0; i < len/pagesz; i++) {
|
|
*(volatile char *)addr = *addr;
|
|
addr += pagesz;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mmap_hugetlbfs_lowmem(struct vmctx *ctx)
|
|
{
|
|
size_t len, offset, skip;
|
|
int level, ret = 0, pg_size;
|
|
|
|
offset = skip = 0;
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
len = hugetlb_priv[level].lowmem;
|
|
pg_size = hugetlb_priv[level].pg_size;
|
|
while (len > 0) {
|
|
ret = mmap_hugetlbfs(ctx, level, len, offset, skip);
|
|
if (ret < 0 && level > HUGETLB_LV1) {
|
|
len -= pg_size;
|
|
hugetlb_priv[level].lowmem = len;
|
|
hugetlb_priv[level-1].lowmem += pg_size;
|
|
} else if (ret < 0 && level == HUGETLB_LV1)
|
|
return ret;
|
|
else {
|
|
offset += len;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mmap_hugetlbfs_highmem(struct vmctx *ctx)
|
|
{
|
|
size_t len, offset, skip;
|
|
int level, ret = 0, pg_size;
|
|
|
|
offset = 4 * GB;
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
skip = hugetlb_priv[level].lowmem;
|
|
len = hugetlb_priv[level].highmem;
|
|
pg_size = hugetlb_priv[level].pg_size;
|
|
while (len > 0) {
|
|
ret = mmap_hugetlbfs(ctx, level, len, offset, skip);
|
|
if (ret < 0 && level > HUGETLB_LV1) {
|
|
len -= pg_size;
|
|
hugetlb_priv[level].highmem = len;
|
|
hugetlb_priv[level-1].highmem += pg_size;
|
|
} else if (ret < 0 && level == HUGETLB_LV1)
|
|
return ret;
|
|
else {
|
|
offset += len;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int create_hugetlb_dirs(int level)
|
|
{
|
|
char tmp_path[MAX_PATH_LEN], *path;
|
|
int i;
|
|
size_t len;
|
|
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return -EINVAL;
|
|
}
|
|
|
|
path = hugetlb_priv[level].mount_path;
|
|
len = strlen(path);
|
|
if (len >= MAX_PATH_LEN || len == 0) {
|
|
perror("invalid path len");
|
|
return -EINVAL;
|
|
}
|
|
|
|
memset(tmp_path, '\0', MAX_PATH_LEN);
|
|
strncpy(tmp_path, path, MAX_PATH_LEN - 1);
|
|
|
|
if ((tmp_path[len - 1] != '/') && (strlen(tmp_path) < MAX_PATH_LEN - 1))
|
|
strcat(tmp_path, "/");
|
|
|
|
len = strlen(tmp_path);
|
|
for (i = 1; i < len; i++) {
|
|
if (tmp_path[i] == '/') {
|
|
tmp_path[i] = 0;
|
|
if (access(tmp_path, F_OK) != 0) {
|
|
if (mkdir(tmp_path, 0755) < 0) {
|
|
perror("mkdir failed");
|
|
return -1;
|
|
}
|
|
}
|
|
tmp_path[i] = '/';
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int mount_hugetlbfs(int level)
|
|
{
|
|
int ret;
|
|
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (hugetlb_priv[level].mounted)
|
|
return 0;
|
|
|
|
/* only support x86 as HUGETLB level-1 2M page, level-2 1G page*/
|
|
ret = mount("none", hugetlb_priv[level].mount_path, "hugetlbfs",
|
|
0, hugetlb_priv[level].mount_opt);
|
|
if (ret == 0)
|
|
hugetlb_priv[level].mounted = true;
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void umount_hugetlbfs(int level)
|
|
{
|
|
if (level >= HUGETLB_LV_MAX) {
|
|
perror("exceed max hugetlb level");
|
|
return;
|
|
}
|
|
|
|
if (hugetlb_priv[level].mounted) {
|
|
umount(hugetlb_priv[level].mount_path);
|
|
hugetlb_priv[level].mounted = false;
|
|
}
|
|
}
|
|
|
|
static int read_sys_info(const char *sys_path)
|
|
{
|
|
FILE *fp;
|
|
char tmp_buf[12];
|
|
int pages = 0;
|
|
int result;
|
|
|
|
fp = fopen(sys_path, "r");
|
|
if (fp == NULL) {
|
|
printf("can't open: %s, err: %s\n", sys_path, strerror(errno));
|
|
return 0;
|
|
}
|
|
|
|
memset(tmp_buf, 0, 12);
|
|
result = fread(&tmp_buf, sizeof(char), 8, fp);
|
|
if (result <= 0)
|
|
printf("read %s, error: %s, please check!\n",
|
|
sys_path, strerror(errno));
|
|
else
|
|
pages = strtol(tmp_buf, NULL, 10);
|
|
|
|
fclose(fp);
|
|
return pages;
|
|
}
|
|
|
|
/* check if enough free huge pages for the UOS */
|
|
static bool hugetlb_check_memgap(void)
|
|
{
|
|
int lvl, free_pages, need_pages;
|
|
bool has_gap = false;
|
|
|
|
for (lvl = HUGETLB_LV1; lvl < hugetlb_lv_max; lvl++) {
|
|
free_pages = read_sys_info(hugetlb_priv[lvl].free_pages_path);
|
|
need_pages = (hugetlb_priv[lvl].lowmem +
|
|
hugetlb_priv[lvl].highmem) / hugetlb_priv[lvl].pg_size;
|
|
|
|
hugetlb_priv[lvl].pages_delta = need_pages - free_pages;
|
|
/* if delta > 0, it's a gap for needed pages, to be handled */
|
|
if (hugetlb_priv[lvl].pages_delta > 0)
|
|
has_gap = true;
|
|
|
|
printf("level %d free/need pages:%d/%d page size:0x%x\n", lvl,
|
|
free_pages, need_pages, hugetlb_priv[lvl].pg_size);
|
|
}
|
|
|
|
return has_gap;
|
|
}
|
|
|
|
/* try to reserve more huge pages on the level */
|
|
static void reserve_more_pages(int level)
|
|
{
|
|
int total_pages, orig_pages, cur_pages;
|
|
char cmd_buf[MAX_PATH_LEN];
|
|
FILE *fp;
|
|
|
|
orig_pages = read_sys_info(hugetlb_priv[level].nr_pages_path);
|
|
total_pages = orig_pages + hugetlb_priv[level].pages_delta;
|
|
snprintf(cmd_buf, MAX_PATH_LEN, "echo %d > %s",
|
|
total_pages, hugetlb_priv[level].nr_pages_path);
|
|
|
|
/* system cmd to reserve needed huge pages */
|
|
fp = popen(cmd_buf, "r");
|
|
if (fp == NULL) {
|
|
printf("cmd: %s failed!\n", cmd_buf);
|
|
return;
|
|
}
|
|
pclose(fp);
|
|
|
|
printf("to reserve pages (+orig %d): %s\n", orig_pages, cmd_buf);
|
|
cur_pages = read_sys_info(hugetlb_priv[level].nr_pages_path);
|
|
hugetlb_priv[level].pages_delta = total_pages - cur_pages;
|
|
}
|
|
|
|
/* try to release larger free page */
|
|
static bool release_larger_freepage(int level_limit)
|
|
{
|
|
int level;
|
|
int total_pages, orig_pages, cur_pages;
|
|
char cmd_buf[MAX_PATH_LEN];
|
|
FILE *fp;
|
|
|
|
for (level = hugetlb_lv_max - 1; level >= level_limit; level--) {
|
|
if (hugetlb_priv[level].pages_delta >= 0)
|
|
continue;
|
|
|
|
/* free one unsed larger page */
|
|
orig_pages = read_sys_info(hugetlb_priv[level].nr_pages_path);
|
|
total_pages = orig_pages - 1;
|
|
snprintf(cmd_buf, MAX_PATH_LEN, "echo %d > %s",
|
|
total_pages, hugetlb_priv[level].nr_pages_path);
|
|
|
|
fp = popen(cmd_buf, "r");
|
|
if (fp == NULL) {
|
|
printf("cmd to free mem: %s failed!\n", cmd_buf);
|
|
return false;
|
|
}
|
|
pclose(fp);
|
|
|
|
cur_pages = read_sys_info(hugetlb_priv[level].nr_pages_path);
|
|
|
|
/* release page successfully */
|
|
if (cur_pages < orig_pages) {
|
|
hugetlb_priv[level].pages_delta++;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (level < level_limit)
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
/* reserve more free huge pages as different levels.
|
|
* it need handle following cases:
|
|
* A.no enough free memory to reserve gap pages, just fails.
|
|
* B enough free memory to reserve each level gap pages
|
|
*.C.enough free memory, but it can't reserve enough higher level gap pages,
|
|
* so lower level need handle that, to reserve more free pages.
|
|
*.D.enough higher level free pages, but not enough free memory for
|
|
* lower level gap pages, so release some higher level free pages for that.
|
|
* other info:
|
|
*. even enough free memory, it is eaiser to reserve smaller pages than
|
|
* lager ones, for example:2MB easier than 1GB. One flow of current solution:
|
|
*.it could leave SOS very small free memory.
|
|
*.return value: true: success; false: failure
|
|
*/
|
|
static bool hugetlb_reserve_pages(void)
|
|
{
|
|
int left_gap, pg_size;
|
|
int level;
|
|
|
|
printf("to reserve more free pages:\n");
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
if (hugetlb_priv[level].pages_delta <= 0)
|
|
continue;
|
|
|
|
/* if gaps, try to reserve more pages */
|
|
reserve_more_pages(level);
|
|
|
|
/* check if reserved enough pages */
|
|
if (hugetlb_priv[level].pages_delta == 0)
|
|
continue;
|
|
|
|
/* probably system allocates fewer pages than needed
|
|
* especially for larger page like 1GB, even there is enough
|
|
* free memory, it stil can fail to allocate 1GB huge page.
|
|
* so if that,it needs the next level to handle it.
|
|
*/
|
|
if (level > HUGETLB_LV1) {
|
|
left_gap = hugetlb_priv[level].pages_delta;
|
|
pg_size = hugetlb_priv[level].pg_size;
|
|
hugetlb_priv[level - 1].pages_delta += (size_t)left_gap
|
|
* pg_size / hugetlb_priv[level - 1].pg_size;
|
|
continue;
|
|
}
|
|
|
|
/* now for level == HUGETLB_LV1 it still can't alloc enough
|
|
* pages, go back to larger pages, to check if it can
|
|
*.release some unused free pages, if release success,
|
|
* let it go LV1 again
|
|
*/
|
|
if (release_larger_freepage(level + 1))
|
|
level++;
|
|
else
|
|
break;
|
|
}
|
|
|
|
if (level >= HUGETLB_LV1) {
|
|
printf("level %d pages gap: %d failed to reserve!\n",
|
|
level, left_gap);
|
|
return false;
|
|
}
|
|
|
|
printf("now enough free pages are reserved!\n");
|
|
return true;
|
|
}
|
|
|
|
bool check_hugetlb_support(void)
|
|
{
|
|
int level;
|
|
|
|
for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) {
|
|
if (create_hugetlb_dirs(level) < 0)
|
|
return false;
|
|
}
|
|
|
|
for (level = HUGETLB_LV1; level < HUGETLB_LV_MAX; level++) {
|
|
if (mount_hugetlbfs(level) < 0) {
|
|
level--;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (level < HUGETLB_LV1) /* mount fail for level 1 */
|
|
return false;
|
|
else if (level == HUGETLB_LV1) /* mount fail for level 2 */
|
|
printf("WARNING: only level 1 hugetlb supported");
|
|
|
|
hugetlb_lv_max = level;
|
|
|
|
return true;
|
|
}
|
|
|
|
int hugetlb_setup_memory(struct vmctx *ctx)
|
|
{
|
|
int level;
|
|
size_t lowmem, highmem;
|
|
bool has_gap;
|
|
|
|
/* for first time DM start UOS, hugetlbfs is already mounted by
|
|
* check_hugetlb_support; but for reboot, here need re-mount
|
|
* it as it already be umount by hugetlb_unsetup_memory
|
|
* TODO: actually, correct reboot process should not change memory
|
|
* layout, the setup_memory should be removed from reboot process
|
|
*/
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++)
|
|
mount_hugetlbfs(level);
|
|
|
|
/* open hugetlbfs and get pagesize for two level */
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
|
|
if (open_hugetlbfs(ctx, level) < 0) {
|
|
perror("failed to open hugetlbfs");
|
|
goto err;
|
|
}
|
|
}
|
|
|
|
/* all memory should be at least align with
|
|
* hugetlb_priv[HUGETLB_LV1].pg_size */
|
|
ctx->lowmem =
|
|
ALIGN_DOWN(ctx->lowmem, hugetlb_priv[HUGETLB_LV1].pg_size);
|
|
ctx->highmem =
|
|
ALIGN_DOWN(ctx->highmem, hugetlb_priv[HUGETLB_LV1].pg_size);
|
|
|
|
if (ctx->highmem > 0)
|
|
total_size = 4 * GB + ctx->highmem;
|
|
else
|
|
total_size = ctx->lowmem;
|
|
|
|
if (total_size == 0) {
|
|
perror("vm request 0 memory");
|
|
goto err;
|
|
}
|
|
|
|
/* check & set hugetlb level memory size for lowmem & highmem */
|
|
highmem = ctx->highmem;
|
|
lowmem = ctx->lowmem;
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
hugetlb_priv[level].lowmem =
|
|
ALIGN_DOWN(lowmem, hugetlb_priv[level].pg_size);
|
|
hugetlb_priv[level].highmem =
|
|
ALIGN_DOWN(highmem, hugetlb_priv[level].pg_size);
|
|
if (level > HUGETLB_LV1) {
|
|
hugetlb_priv[level-1].lowmem = lowmem =
|
|
lowmem - hugetlb_priv[level].lowmem;
|
|
hugetlb_priv[level-1].highmem = highmem =
|
|
highmem - hugetlb_priv[level].highmem;
|
|
}
|
|
}
|
|
|
|
/* it will check each level memory need */
|
|
has_gap = hugetlb_check_memgap();
|
|
if (has_gap) {
|
|
if (!hugetlb_reserve_pages())
|
|
goto err;
|
|
}
|
|
|
|
/* align up total size with huge page size for vma alignment */
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
if (should_enable_hugetlb_level(level)) {
|
|
total_size += hugetlb_priv[level].pg_size;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* dump hugepage trying to setup */
|
|
printf("\ntry to setup hugepage with:\n");
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
|
|
printf("\tlevel %d - lowmem 0x%lx, highmem 0x%lx\n", level,
|
|
hugetlb_priv[level].lowmem,
|
|
hugetlb_priv[level].highmem);
|
|
}
|
|
printf("total_size 0x%lx\n\n", total_size);
|
|
|
|
/* basic overview vma */
|
|
ptr = mmap(NULL, total_size, PROT_NONE,
|
|
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
|
|
if (ptr == MAP_FAILED) {
|
|
perror("anony mmap fail");
|
|
goto err;
|
|
}
|
|
|
|
/* align up baseaddr according to hugepage level size */
|
|
for (level = hugetlb_lv_max - 1; level >= HUGETLB_LV1; level--) {
|
|
if (should_enable_hugetlb_level(level)) {
|
|
ctx->baseaddr = (void *)ALIGN_UP((size_t)ptr,
|
|
hugetlb_priv[level].pg_size);
|
|
break;
|
|
}
|
|
}
|
|
printf("mmap ptr 0x%p -> baseaddr 0x%p\n", ptr, ctx->baseaddr);
|
|
|
|
/* mmap lowmem */
|
|
if (mmap_hugetlbfs_lowmem(ctx) < 0)
|
|
goto err;
|
|
|
|
/* mmap highmem */
|
|
if (mmap_hugetlbfs_highmem(ctx) < 0)
|
|
goto err;
|
|
|
|
/* dump hugepage really setup */
|
|
printf("\nreally setup hugepage with:\n");
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
|
|
printf("\tlevel %d - lowmem 0x%lx, highmem 0x%lx\n", level,
|
|
hugetlb_priv[level].lowmem,
|
|
hugetlb_priv[level].highmem);
|
|
}
|
|
printf("total_size 0x%lx\n\n", total_size);
|
|
|
|
/* map ept for lowmem*/
|
|
if (vm_map_memseg_vma(ctx, ctx->lowmem, 0,
|
|
(uint64_t)ctx->baseaddr, PROT_ALL) < 0)
|
|
goto err;
|
|
|
|
/* map ept for highmem*/
|
|
if (ctx->highmem > 0) {
|
|
if (vm_map_memseg_vma(ctx, ctx->highmem, 4 * GB,
|
|
(uint64_t)(ctx->baseaddr + 4 * GB), PROT_ALL) < 0)
|
|
goto err;
|
|
}
|
|
|
|
return 0;
|
|
|
|
err:
|
|
if (ptr) {
|
|
munmap(ptr, total_size);
|
|
ptr = NULL;
|
|
}
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
|
|
close_hugetlbfs(level);
|
|
umount_hugetlbfs(level);
|
|
}
|
|
return -ENOMEM;
|
|
}
|
|
|
|
void hugetlb_unsetup_memory(struct vmctx *ctx)
|
|
{
|
|
int level;
|
|
|
|
if (total_size > 0) {
|
|
munmap(ptr, total_size);
|
|
total_size = 0;
|
|
ptr = NULL;
|
|
}
|
|
|
|
for (level = HUGETLB_LV1; level < hugetlb_lv_max; level++) {
|
|
close_hugetlbfs(level);
|
|
umount_hugetlbfs(level);
|
|
}
|
|
}
|