From 5c8d55c63fdb06d042932394c0b60a7b30aeb132 Mon Sep 17 00:00:00 2001
From: Julio Montes <julio.montes@intel.com>
Date: Mon, 29 Apr 2019 11:13:07 -0500
Subject: [PATCH] qemu-vanilla: add patches required in qemu 4.0 to run kata
 containers

Add kata specific patches for qemu 4.0:
* 9p optimization
* vm-templating patch

fixes #471

Signed-off-by: Julio Montes <julio.montes@intel.com>
---
 ...utines-of-9p-to-increase-the-I-O-per.patch |  98 +++++++
 ...pability-to-bypass-the-shared-memory.patch | 257 ++++++++++++++++++
 2 files changed, 355 insertions(+)
 create mode 100644 obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch
 create mode 100644 obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch

diff --git a/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch b/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch
new file mode 100644
index 0000000000..a888663354
--- /dev/null
+++ b/obs-packaging/qemu-vanilla/patches/0001-9p-removing-coroutines-of-9p-to-increase-the-I-O-per.patch
@@ -0,0 +1,98 @@
+From 894a8ae5524fff4ad9d3551c515788c5650e1fc7 Mon Sep 17 00:00:00 2001
+From: Yang Zhong <yang.zhong@intel.com>
+Date: Wed, 28 Mar 2018 20:14:53 +0800
+Subject: [PATCH 1/2] 9p: removing coroutines of 9p to increase the I/O
+ performance
+
+This is a quick workaround, need to be fixed.
+
+Signed-off-by: Chao Peng <chao.p.peng@linux.intel.com>
+---
+ hw/9pfs/9p.c   | 12 +++++-------
+ hw/9pfs/9p.h   |  6 +++---
+ hw/9pfs/coth.h |  3 +++
+ 3 files changed, 11 insertions(+), 10 deletions(-)
+
+diff --git a/hw/9pfs/9p.c b/hw/9pfs/9p.c
+index 55821343e5..c5f089860d 100644
+--- a/hw/9pfs/9p.c
++++ b/hw/9pfs/9p.c
+@@ -690,10 +690,7 @@ static void coroutine_fn pdu_complete(V9fsPDU *pdu, ssize_t len)
+ out_notify:
+     pdu->s->transport->push_and_notify(pdu);
+ 
+-    /* Now wakeup anybody waiting in flush for this request */
+-    if (!qemu_co_queue_next(&pdu->complete)) {
+-        pdu_free(pdu);
+-    }
++    pdu_free(pdu);
+ }
+ 
+ static mode_t v9mode_to_mode(uint32_t mode, V9fsString *extension)
+@@ -3525,7 +3522,7 @@ static inline bool is_read_only_op(V9fsPDU *pdu)
+ 
+ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
+ {
+-    Coroutine *co;
++//    Coroutine *co;
+     CoroutineEntry *handler;
+     V9fsState *s = pdu->s;
+ 
+@@ -3543,8 +3540,9 @@ void pdu_submit(V9fsPDU *pdu, P9MsgHeader *hdr)
+     }
+ 
+     qemu_co_queue_init(&pdu->complete);
+-    co = qemu_coroutine_create(handler, pdu);
+-    qemu_coroutine_enter(co);
++    handler(pdu);
++    //co = qemu_coroutine_create(handler, pdu);
++    //qemu_coroutine_enter(co);
+ }
+ 
+ /* Returns 0 on success, 1 on failure. */
+diff --git a/hw/9pfs/9p.h b/hw/9pfs/9p.h
+index 8883761b2c..24aeba03f7 100644
+--- a/hw/9pfs/9p.h
++++ b/hw/9pfs/9p.h
+@@ -320,21 +320,21 @@ extern int total_open_fd;
+ static inline void v9fs_path_write_lock(V9fsState *s)
+ {
+     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
+-        qemu_co_rwlock_wrlock(&s->rename_lock);
++    //    qemu_co_rwlock_wrlock(&s->rename_lock);
+     }
+ }
+ 
+ static inline void v9fs_path_read_lock(V9fsState *s)
+ {
+     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
+-        qemu_co_rwlock_rdlock(&s->rename_lock);
++     //   qemu_co_rwlock_rdlock(&s->rename_lock);
+     }
+ }
+ 
+ static inline void v9fs_path_unlock(V9fsState *s)
+ {
+     if (s->ctx.export_flags & V9FS_PATHNAME_FSCONTEXT) {
+-        qemu_co_rwlock_unlock(&s->rename_lock);
++     //   qemu_co_rwlock_unlock(&s->rename_lock);
+     }
+ }
+ 
+diff --git a/hw/9pfs/coth.h b/hw/9pfs/coth.h
+index 19e4d9287e..728a25865d 100644
+--- a/hw/9pfs/coth.h
++++ b/hw/9pfs/coth.h
+@@ -47,6 +47,9 @@
+         qemu_coroutine_yield();                                         \
+     } while (0)
+ 
++#undef v9fs_co_run_in_worker
++#define v9fs_co_run_in_worker(code_block) do {code_block} while(0);
++
+ void co_run_in_worker_bh(void *);
+ int coroutine_fn v9fs_co_readlink(V9fsPDU *, V9fsPath *, V9fsString *);
+ int coroutine_fn v9fs_co_readdir(V9fsPDU *, V9fsFidState *, struct dirent **);
+-- 
+2.17.2
+
diff --git a/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch b/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch
new file mode 100644
index 0000000000..891ada0bbe
--- /dev/null
+++ b/obs-packaging/qemu-vanilla/patches/0002-migration-add-capability-to-bypass-the-shared-memory.patch
@@ -0,0 +1,257 @@
+From ea692fc6ff15a231acd2d7396166bef8e49dab05 Mon Sep 17 00:00:00 2001
+From: Lai Jiangshan <jiangshanlai@gmail.com>
+Date: Fri, 30 Mar 2018 18:09:54 +0800
+Subject: [PATCH 2/2] migration: add capability to bypass the shared memory
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+1) What's this
+
+When the migration capability 'bypass-shared-memory'
+is set, the shared memory will be bypassed when migration.
+
+It is the key feature to enable several excellent features for
+the qemu, such as qemu-local-migration, qemu-live-update,
+extremely-fast-save-restore, vm-template, vm-fast-live-clone,
+yet-another-post-copy-migration, etc..
+
+The philosophy behind this key feature, including the resulting
+advanced key features, is that a part of the memory management
+is separated out from the qemu, and let the other toolkits
+such as libvirt, kata-containers (https://github.com/kata-containers)
+runv(https://github.com/hyperhq/runv/) or some multiple cooperative
+qemu commands directly access to it, manage it, provide features on it.
+
+2) Status in real world
+
+The hyperhq(http://hyper.sh  http://hypercontainer.io/)
+introduced the feature vm-template(vm-fast-live-clone)
+to the hyper container for several years, it works perfect.
+(see https://github.com/hyperhq/runv/pull/297).
+
+The feature vm-template makes the containers(VMs) can
+be started in 130ms and save 80M memory for every
+container(VM). So that the hyper containers are fast
+and high-density as normal containers.
+
+kata-containers project (https://github.com/kata-containers)
+which was launched by hyper, intel and friends and which descended
+from runv (and clear-container) should have this feature enabled.
+Unfortunately, due to the code confliction between runv&cc,
+this feature was temporary disabled and it is being brought
+back by hyper and intel team.
+
+3) How to use and bring up advanced features.
+
+In current qemu command line, shared memory has
+to be configured via memory-object.
+
+a) feature: qemu-local-migration, qemu-live-update
+Set the mem-path on the tmpfs and set share=on for it when
+start the vm. example:
+-object \
+memory-backend-file,id=mem,size=128M,mem-path=/dev/shm/memory,share=on \
+-numa node,nodeid=0,cpus=0-7,memdev=mem
+
+when you want to migrate the vm locally (after fixed a security bug
+of the qemu-binary, or other reason), you can start a new qemu with
+the same command line and -incoming, then you can migrate the
+vm from the old qemu to the new qemu with the migration capability
+'bypass-shared-memory' set. The migration will migrate the device-state
+*ONLY*, the memory is the origin memory backed by tmpfs file.
+
+b) feature: extremely-fast-save-restore
+the same above, but the mem-path is on the persistent file system.
+
+c)  feature: vm-template, vm-fast-live-clone
+the template vm is started as 1), and paused when the guest reaches
+the template point(example: the guest app is ready), then the template
+vm is saved. (the qemu process of the template can be killed now, because
+we need only the memory and the device state files (in tmpfs)).
+
+Then we can launch one or multiple VMs base on the template vm states,
+the new VMs are started without the “share=on”, all the new VMs share
+the initial memory from the memory file, they save a lot of memory.
+all the new VMs start from the template point, the guest app can go to
+work quickly.
+
+The new VM booted from template vm can’t become template again,
+if you need this unusual chained-template feature, you can write
+a cloneable-tmpfs kernel module for it.
+
+The libvirt toolkit can’t manage vm-template currently, in the
+hyperhq/runv, we use qemu wrapper script to do it. I hope someone add
+“libvrit managed template” feature to libvirt.
+
+d) feature: yet-another-post-copy-migration
+It is a possible feature, no toolkit can do it well now.
+Using nbd server/client on the memory file is reluctantly Ok but
+inconvenient. A special feature for tmpfs might be needed to
+fully complete this feature.
+No one need yet another post copy migration method,
+but it is possible when some crazy man need it.
+
+Cc: Samuel Ortiz <sameo@linux.intel.com>
+Cc: Sebastien Boeuf <sebastien.boeuf@intel.com>
+Cc: James O. D. Hunt <james.o.hunt@intel.com>
+Cc: Xu Wang <gnawux@gmail.com>
+Cc: Peng Tao <bergwolf@gmail.com>
+Cc: Xiao Guangrong <xiaoguangrong@tencent.com>
+Cc: Xiao Guangrong <xiaoguangrong.eric@gmail.com>
+Signed-off-by: Lai Jiangshan <jiangshanlai@gmail.com>
+---
+ migration/migration.c | 14 ++++++++++++++
+ migration/migration.h |  1 +
+ migration/ram.c       | 27 ++++++++++++++++++---------
+ qapi/migration.json   |  9 +++++++--
+ 4 files changed, 40 insertions(+), 11 deletions(-)
+
+diff --git a/migration/migration.c b/migration/migration.c
+index 609e0df5d0..02c96aadb1 100644
+--- a/migration/migration.c
++++ b/migration/migration.c
+@@ -2010,6 +2010,20 @@ bool migrate_release_ram(void)
+     return s->enabled_capabilities[MIGRATION_CAPABILITY_RELEASE_RAM];
+ }
+ 
++bool migrate_bypass_shared_memory(void)
++{
++    MigrationState *s;
++
++    /* it is not workable with postcopy yet. */
++    if (migrate_postcopy_ram()) {
++        return false;
++    }
++
++    s = migrate_get_current();
++
++    return s->enabled_capabilities[MIGRATION_CAPABILITY_BYPASS_SHARED_MEMORY];
++}
++
+ bool migrate_postcopy_ram(void)
+ {
+     MigrationState *s;
+diff --git a/migration/migration.h b/migration/migration.h
+index 438f17edad..e8eae82910 100644
+--- a/migration/migration.h
++++ b/migration/migration.h
+@@ -261,6 +261,7 @@ MigrationState *migrate_get_current(void);
+ 
+ bool migrate_postcopy(void);
+ 
++bool migrate_bypass_shared_memory(void);
+ bool migrate_release_ram(void);
+ bool migrate_postcopy_ram(void);
+ bool migrate_zero_blocks(void);
+diff --git a/migration/ram.c b/migration/ram.c
+index 1ca9ba77b6..1b35b4a30c 100644
+--- a/migration/ram.c
++++ b/migration/ram.c
+@@ -1646,6 +1646,11 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb,
+     unsigned long *bitmap = rb->bmap;
+     unsigned long next;
+ 
++    /* when this ramblock is requested bypassing */
++    if (!bitmap) {
++        return size;
++    }
++
+     if (ramblock_is_ignored(rb)) {
+         return size;
+     }
+@@ -1773,7 +1778,9 @@ static void migration_bitmap_sync(RAMState *rs)
+     qemu_mutex_lock(&rs->bitmap_mutex);
+     rcu_read_lock();
+     RAMBLOCK_FOREACH_NOT_IGNORED(block) {
+-        migration_bitmap_sync_range(rs, block, 0, block->used_length);
++        if (!migrate_bypass_shared_memory() || !qemu_ram_is_shared(block)) {
++            migration_bitmap_sync_range(rs, block, 0, block->used_length);
++        }
+     }
+     ram_counters.remaining = ram_bytes_remaining();
+     rcu_read_unlock();
+@@ -3183,18 +3190,12 @@ static int ram_state_init(RAMState **rsp)
+     qemu_mutex_init(&(*rsp)->src_page_req_mutex);
+     QSIMPLEQ_INIT(&(*rsp)->src_page_requests);
+ 
+-    /*
+-     * Count the total number of pages used by ram blocks not including any
+-     * gaps due to alignment or unplugs.
+-     */
+-    (*rsp)->migration_dirty_pages = ram_bytes_total() >> TARGET_PAGE_BITS;
+-
+     ram_state_reset(*rsp);
+ 
+     return 0;
+ }
+ 
+-static void ram_list_init_bitmaps(void)
++static void ram_list_init_bitmaps(RAMState *rs)
+ {
+     RAMBlock *block;
+     unsigned long pages;
+@@ -3202,9 +3203,17 @@ static void ram_list_init_bitmaps(void)
+     /* Skip setting bitmap if there is no RAM */
+     if (ram_bytes_total()) {
+         RAMBLOCK_FOREACH_NOT_IGNORED(block) {
++            if (migrate_bypass_shared_memory() && qemu_ram_is_shared(block)) {
++                continue;
++            }
+             pages = block->max_length >> TARGET_PAGE_BITS;
+             block->bmap = bitmap_new(pages);
+             bitmap_set(block->bmap, 0, pages);
++            /*
++             * Count the total number of pages used by ram blocks not
++             * including any gaps due to alignment or unplugs.
++             */
++            rs->migration_dirty_pages += pages;
+             if (migrate_postcopy_ram()) {
+                 block->unsentmap = bitmap_new(pages);
+                 bitmap_set(block->unsentmap, 0, pages);
+@@ -3220,7 +3229,7 @@ static void ram_init_bitmaps(RAMState *rs)
+     qemu_mutex_lock_ramlist();
+     rcu_read_lock();
+ 
+-    ram_list_init_bitmaps();
++    ram_list_init_bitmaps(rs);
+     memory_global_dirty_log_start();
+     migration_bitmap_sync_precopy(rs);
+ 
+diff --git a/qapi/migration.json b/qapi/migration.json
+index 9cfbaf8c6c..4194fdd5a4 100644
+--- a/qapi/migration.json
++++ b/qapi/migration.json
+@@ -415,6 +415,10 @@
+ #
+ # @x-ignore-shared: If enabled, QEMU will not migrate shared memory (since 4.0)
+ #
++# @bypass-shared-memory: the shared memory region will be bypassed on migration.
++#          This feature allows the memory region to be reused by new qemu(s)
++#          or be migrated separately. (since 2.12)
++#
+ # Since: 1.2
+ ##
+ { 'enum': 'MigrationCapability',
+@@ -422,7 +426,7 @@
+            'compress', 'events', 'postcopy-ram', 'x-colo', 'release-ram',
+            'block', 'return-path', 'pause-before-switchover', 'multifd',
+            'dirty-bitmaps', 'postcopy-blocktime', 'late-block-activate',
+-           'x-ignore-shared' ] }
++           'bypass-shared-memory', 'x-ignore-shared' ] }
+ 
+ ##
+ # @MigrationCapabilityStatus:
+@@ -476,7 +480,8 @@
+ #       {"state": false, "capability": "compress"},
+ #       {"state": true, "capability": "events"},
+ #       {"state": false, "capability": "postcopy-ram"},
+-#       {"state": false, "capability": "x-colo"}
++#       {"state": false, "capability": "x-colo"},
++#       {"state": false, "capability": "bypass-shared-memory"}
+ #    ]}
+ #
+ ##
+-- 
+2.17.2
+