From 5c8d55c63fdb06d042932394c0b60a7b30aeb132 Mon Sep 17 00:00:00 2001 From: Julio Montes Date: Mon, 29 Apr 2019 11:13:07 -0500 Subject: [PATCH] qemu-vanilla: add patches required in qemu 4.0 to run kata containers Add kata specific patches for qemu 4.0: * 9p optimization * vm-templating patch fixes #471 Signed-off-by: Julio Montes --- ...utines-of-9p-to-increase-the-I-O-per.patch | 98 +++++++ ...pability-to-bypass-the-shared-memory.patch | 257 ++++++++++++++++++ 2 files changed, 355 insertions(+) create mode 100644 obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch create mode 100644 obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch diff --git a/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch b/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch new file mode 100644 index 0000000000..a888663354 --- /dev/null +++ b/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch @@ -0,0 +1,98 @@ +From 894a8ae5524fff4ad9d3551c515788c5650e1fc7 Mon Sep 17 00:00:00 2001 +From: Yang Zhong +Date: Wed, 28 Mar 2018 20:14:53 +0800 +Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O + performance + +This is a quick workaround, need to be fixed. + +Signed-off-by: Chao Peng +--- + hw/9pfs/9p.c | 12 +++++------- + hw/9pfs/9p.h | 6 +++--- + hw/9pfs/coth.h | 3 +++ + 3 files changed, 11 insertions(+), 10 deletions(-) + +diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c +index 55821343e5..c5f089860d 100644 +--- a/hw/9pfs/9p.c ++++ b/hw/9pfs/9p.c +@@ -690,10 +690,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len) + out_notify: + pdu->s->transport->push_and_notify(pdu); + +- /* Now wakeup anybody waiting in flush for this request */ +- if (!qemu_co_queue_next(&pdu->complete)) { +- pdu_free(pdu); +- } ++ pdu_free(pdu); + } + + static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension) +@@ -3525,7 +3522,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu) + + void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) + { +- Coroutine *co; ++// Coroutine *co; + CoroutineEntry *handler; + V9fsState *s = pdu->s; + +@@ -3543,8 +3540,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr) + } + + qemu_co_queue_init(&pdu->complete); +- co = qemu_coroutine_create(handler, pdu); +- qemu_coroutine_enter(co); ++ handler(pdu); ++ //co = qemu_coroutine_create(handler, pdu); ++ //qemu_coroutine_enter(co); + } + + /* Returns 0 on success, 1 on failure. */ +diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h +index 8883761b2c..24aeba03f7 100644 +--- a/hw/9pfs/9p.h ++++ b/hw/9pfs/9p.h +@@ -320,21 +320,21 @@ extern int total_open_fd; + static inline void v9fs_path_write_lock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_wrlock(&s->rename_lock); ++ // qemu_co_rwlock_wrlock(&s->rename_lock); + } + } + + static inline void v9fs_path_read_lock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_rdlock(&s->rename_lock); ++ // qemu_co_rwlock_rdlock(&s->rename_lock); + } + } + + static inline void v9fs_path_unlock(V9fsState *s) + { + if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) { +- qemu_co_rwlock_unlock(&s->rename_lock); ++ // qemu_co_rwlock_unlock(&s->rename_lock); + } + } + +diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h +index 19e4d9287e..728a25865d 100644 +--- a/hw/9pfs/coth.h ++++ b/hw/9pfs/coth.h +@@ -47,6 +47,9 @@ + qemu_coroutine_yield(); \ + } while (0) + ++#undef v9fs_co_run_in_worker ++#define v9fs_co_run_in_worker(code_block) do {code_block} while(0); ++ + void co_run_in_worker_bh(void *); + int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *); + int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **); +-- +2.17.2 + diff --git a/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch b/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch new file mode 100644 index 0000000000..891ada0bbe --- /dev/null +++ b/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch @@ -0,0 +1,257 @@ +From ea692fc6ff15a231acd2d7396166bef8e49dab05 Mon Sep 17 00:00:00 2001 +From: Lai Jiangshan +Date: Fri, 30 Mar 2018 18:09:54 +0800 +Subject: [PATCH 2/2] migration: add capability to bypass the shared memory +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +1) What's this + +When the migration capability 'bypass-shared-memory' +is set, the shared memory will be bypassed when migration. + +It is the key feature to enable several excellent features for +the qemu, such as qemu-local-migration, qemu-live-update, +extremely-fast-save-restore, vm-template, vm-fast-live-clone, +yet-another-post-copy-migration, etc.. + +The philosophy behind this key feature, including the resulting +advanced key features, is that a part of the memory management +is separated out from the qemu, and let the other toolkits +such as libvirt, kata-containers (https://github.com/kata-containers) +runv(https://github.com/hyperhq/runv/) or some multiple cooperative +qemu commands directly access to it, manage it, provide features on it. + +2) Status in real world + +The hyperhq(http://hyper.sh http://hypercontainer.io/) +introduced the feature vm-template(vm-fast-live-clone) +to the hyper container for several years, it works perfect. +(see https://github.com/hyperhq/runv/pull/297). + +The feature vm-template makes the containers(VMs) can +be started in 130ms and save 80M memory for every +container(VM). So that the hyper containers are fast +and high-density as normal containers. + +kata-containers project (https://github.com/kata-containers) +which was launched by hyper, intel and friends and which descended +from runv (and clear-container) should have this feature enabled. +Unfortunately, due to the code confliction between runv&cc, +this feature was temporary disabled and it is being brought +back by hyper and intel team. + +3) How to use and bring up advanced features. + +In current qemu command line, shared memory has +to be configured via memory-object. + +a) feature: qemu-local-migration, qemu-live-update +Set the mem-path on the tmpfs and set share=on for it when +start the vm. example: +-object \ +memory-backend-file,id=mem,size=128M,mem-path=/dev/shm/memory,share=on \ +-numa node,nodeid=0,cpus=0-7,memdev=mem + +when you want to migrate the vm locally (after fixed a security bug +of the qemu-binary, or other reason), you can start a new qemu with +the same command line and -incoming, then you can migrate the +vm from the old qemu to the new qemu with the migration capability +'bypass-shared-memory' set. The migration will migrate the device-state +*ONLY*, the memory is the origin memory backed by tmpfs file. + +b) feature: extremely-fast-save-restore +the same above, but the mem-path is on the persistent file system. + +c) feature: vm-template, vm-fast-live-clone +the template vm is started as 1), and paused when the guest reaches +the template point(example: the guest app is ready), then the template +vm is saved. (the qemu process of the template can be killed now, because +we need only the memory and the device state files (in tmpfs)). + +Then we can launch one or multiple VMs base on the template vm states, +the new VMs are started without the “share=on”, all the new VMs share +the initial memory from the memory file, they save a lot of memory. +all the new VMs start from the template point, the guest app can go to +work quickly. + +The new VM booted from template vm can’t become template again, +if you need this unusual chained-template feature, you can write +a cloneable-tmpfs kernel module for it. + +The libvirt toolkit can’t manage vm-template currently, in the +hyperhq/runv, we use qemu wrapper script to do it. I hope someone add +“libvrit managed template” feature to libvirt. + +d) feature: yet-another-post-copy-migration +It is a possible feature, no toolkit can do it well now. +Using nbd server/client on the memory file is reluctantly Ok but +inconvenient. A special feature for tmpfs might be needed to +fully complete this feature. +No one need yet another post copy migration method, +but it is possible when some crazy man need it. + +Cc: Samuel Ortiz +Cc: Sebastien Boeuf +Cc: James O. D. Hunt +Cc: Xu Wang +Cc: Peng Tao +Cc: Xiao Guangrong +Cc: Xiao Guangrong +Signed-off-by: Lai Jiangshan +--- + migration/migration.c | 14 ++++++++++++++ + migration/migration.h | 1 + + migration/ram.c | 27 ++++++++++++++++++--------- + qapi/migration.json | 9 +++++++-- + 4 files changed, 40 insertions(+), 11 deletions(-) + +diff --git a/migration/migration.c b/migration/migration.c +index 609e0df5d0..02c96aadb1 100644 +--- a/migration/migration.c ++++ b/migration/migration.c +@@ -2010,6 +2010,20 @@ bool migrate_release_ram(void) + return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM]; + } + ++bool migrate_bypass_shared_memory(void) ++{ ++ MigrationState *s; ++ ++ /* it is not workable with postcopy yet. */ ++ if (migrate_postcopy_ram()) { ++ return false; ++ } ++ ++ s = migrate_get_current(); ++ ++ return s->enabled_capabilities[MIGRATION_CAPABILITY_BYPASS_SHARED_MEMORY]; ++} ++ + bool migrate_postcopy_ram(void) + { + MigrationState *s; +diff --git a/migration/migration.h b/migration/migration.h +index 438f17edad..e8eae82910 100644 +--- a/migration/migration.h ++++ b/migration/migration.h +@@ -261,6 +261,7 @@ MigrationState *migrate_get_current(void); + + bool migrate_postcopy(void); + ++bool migrate_bypass_shared_memory(void); + bool migrate_release_ram(void); + bool migrate_postcopy_ram(void); + bool migrate_zero_blocks(void); +diff --git a/migration/ram.c b/migration/ram.c +index 1ca9ba77b6..1b35b4a30c 100644 +--- a/migration/ram.c ++++ b/migration/ram.c +@@ -1646,6 +1646,11 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, + unsigned long *bitmap = rb->bmap; + unsigned long next; + ++ /* when this ramblock is requested bypassing */ ++ if (!bitmap) { ++ return size; ++ } ++ + if (ramblock_is_ignored(rb)) { + return size; + } +@@ -1773,7 +1778,9 @@ static void migration_bitmap_sync(RAMState *rs) + qemu_mutex_lock(&rs->bitmap_mutex); + rcu_read_lock(); + RAMBLOCK_FOREACH_NOT_IGNORED(block) { +- migration_bitmap_sync_range(rs, block, 0, block->used_length); ++ if (!migrate_bypass_shared_memory() || !qemu_ram_is_shared(block)) { ++ migration_bitmap_sync_range(rs, block, 0, block->used_length); ++ } + } + ram_counters.remaining = ram_bytes_remaining(); + rcu_read_unlock(); +@@ -3183,18 +3190,12 @@ static int ram_state_init(RAMState **rsp) + qemu_mutex_init(&(*rsp)->src_page_req_mutex); + QSIMPLEQ_INIT(&(*rsp)->src_page_requests); + +- /* +- * Count the total number of pages used by ram blocks not including any +- * gaps due to alignment or unplugs. +- */ +- (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS; +- + ram_state_reset(*rsp); + + return 0; + } + +-static void ram_list_init_bitmaps(void) ++static void ram_list_init_bitmaps(RAMState *rs) + { + RAMBlock *block; + unsigned long pages; +@@ -3202,9 +3203,17 @@ static void ram_list_init_bitmaps(void) + /* Skip setting bitmap if there is no RAM */ + if (ram_bytes_total()) { + RAMBLOCK_FOREACH_NOT_IGNORED(block) { ++ if (migrate_bypass_shared_memory() && qemu_ram_is_shared(block)) { ++ continue; ++ } + pages = block->max_length >> TARGET_PAGE_BITS; + block->bmap = bitmap_new(pages); + bitmap_set(block->bmap, 0, pages); ++ /* ++ * Count the total number of pages used by ram blocks not ++ * including any gaps due to alignment or unplugs. ++ */ ++ rs->migration_dirty_pages += pages; + if (migrate_postcopy_ram()) { + block->unsentmap = bitmap_new(pages); + bitmap_set(block->unsentmap, 0, pages); +@@ -3220,7 +3229,7 @@ static void ram_init_bitmaps(RAMState *rs) + qemu_mutex_lock_ramlist(); + rcu_read_lock(); + +- ram_list_init_bitmaps(); ++ ram_list_init_bitmaps(rs); + memory_global_dirty_log_start(); + migration_bitmap_sync_precopy(rs); + +diff --git a/qapi/migration.json b/qapi/migration.json +index 9cfbaf8c6c..4194fdd5a4 100644 +--- a/qapi/migration.json ++++ b/qapi/migration.json +@@ -415,6 +415,10 @@ + # + # @x-ignore-shared: If enabled, QEMU will not migrate shared memory (since 4.0) + # ++# @bypass-shared-memory: the shared memory region will be bypassed on migration. ++# This feature allows the memory region to be reused by new qemu(s) ++# or be migrated separately. (since 2.12) ++# + # Since: 1.2 + ## + { 'enum': 'MigrationCapability', +@@ -422,7 +426,7 @@ + 'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram', + 'block', 'return-path', 'pause-before-switchover', 'multifd', + 'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate', +- 'x-ignore-shared' ] } ++ 'bypass-shared-memory', 'x-ignore-shared' ] } + + ## + # @MigrationCapabilityStatus: +@@ -476,7 +480,8 @@ + # {"state": false, "capability": "compress"}, + # {"state": true, "capability": "events"}, + # {"state": false, "capability": "postcopy-ram"}, +-# {"state": false, "capability": "x-colo"} ++# {"state": false, "capability": "x-colo"}, ++# {"state": false, "capability": "bypass-shared-memory"} + # ]} + # + ## +-- +2.17.2 +