moby: Add a 'run' command to execute an image on hyperkit

'moby run' will use the kernel and initrd image produced
by 'moby build' and, on macOS, will run it inside a
hyperkit VM. This assumes that you have a recent version
of Docker for Mac installed as it re-uses the hyperkit
and VPNKit from it.

Signed-off-by: Rolf Neugebauer <rolf.neugebauer@docker.com>
This commit is contained in:
Rolf Neugebauer
2017-03-22 14:27:56 +00:00
parent 759637b3f0
commit ea4ceab3f8
123 changed files with 33850 additions and 0 deletions

1125
vendor/github.com/docker/hyperkit/src/lib/acpitbl.c generated vendored Normal file

File diff suppressed because it is too large Load Diff

81
vendor/github.com/docker/hyperkit/src/lib/atkbdc.c generated vendored Normal file
View File

@@ -0,0 +1,81 @@
/*-
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <xhyve/support/misc.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/inout.h>
#include <xhyve/pci_lpc.h>
#define KBD_DATA_PORT 0x60
#define KBD_STS_CTL_PORT 0x64
#define KBD_SYS_FLAG 0x4
#define KBDC_RESET 0xfe
static int
atkbdc_data_handler(UNUSED int vcpu, UNUSED int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
if (bytes != 1)
return (-1);
*eax = 0;
return (0);
}
static int
atkbdc_sts_ctl_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
int error, retval;
if (bytes != 1)
return (-1);
retval = 0;
if (in) {
*eax = KBD_SYS_FLAG; /* system passed POST */
} else {
switch (*eax) {
case KBDC_RESET: /* Pulse "reset" line. */
error = xh_vm_suspend(VM_SUSPEND_RESET);
assert(error == 0 || errno == EALREADY);
break;
}
}
return (retval);
}
INOUT_PORT(atkdbc, KBD_DATA_PORT, IOPORT_F_INOUT, atkbdc_data_handler);
SYSRES_IO(KBD_DATA_PORT, 1);
INOUT_PORT(atkbdc, KBD_STS_CTL_PORT, IOPORT_F_INOUT, atkbdc_sts_ctl_handler);
SYSRES_IO(KBD_STS_CTL_PORT, 1);

1041
vendor/github.com/docker/hyperkit/src/lib/block_if.c generated vendored Normal file

File diff suppressed because it is too large Load Diff

152
vendor/github.com/docker/hyperkit/src/lib/consport.c generated vendored Normal file
View File

@@ -0,0 +1,152 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <termios.h>
#include <unistd.h>
#include <stdbool.h>
#include <sys/types.h>
#include <sys/select.h>
#include <xhyve/support/misc.h>
#include <xhyve/inout.h>
#include <xhyve/pci_lpc.h>
#define BVM_CONSOLE_PORT 0x220
#define BVM_CONS_SIG ('b' << 8 | 'v')
static struct termios tio_orig, tio_new;
static void
ttyclose(void)
{
tcsetattr(STDIN_FILENO, TCSANOW, &tio_orig);
}
static void
ttyopen(void)
{
tcgetattr(STDIN_FILENO, &tio_orig);
cfmakeraw(&tio_new);
tcsetattr(STDIN_FILENO, TCSANOW, &tio_new);
atexit(ttyclose);
}
static bool
tty_char_available(void)
{
fd_set rfds;
struct timeval tv;
FD_ZERO(&rfds);
FD_SET(STDIN_FILENO, &rfds);
tv.tv_sec = 0;
tv.tv_usec = 0;
if (select(STDIN_FILENO + 1, &rfds, NULL, NULL, &tv) > 0) {
return (true);
} else {
return (false);
}
}
static int
ttyread(void)
{
char rb;
if (tty_char_available()) {
read(STDIN_FILENO, &rb, 1);
return (rb & 0xff);
} else {
return (-1);
}
}
static void
ttywrite(unsigned char wb)
{
(void) write(STDOUT_FILENO, &wb, 1);
}
static int
console_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
static int opened;
if (bytes == 2 && in) {
*eax = BVM_CONS_SIG;
return (0);
}
/*
* Guests might probe this port to look for old ISA devices
* using single-byte reads. Return 0xff for those.
*/
if (bytes == 1 && in) {
*eax = 0xff;
return (0);
}
if (bytes != 4)
return (-1);
if (!opened) {
ttyopen();
opened = 1;
}
if (in)
*eax = (uint32_t) ttyread();
else
ttywrite((unsigned char) *eax);
return (0);
}
SYSRES_IO(BVM_CONSOLE_PORT, 4);
static struct inout_port consport = {
"bvmcons",
BVM_CONSOLE_PORT,
1,
IOPORT_F_INOUT,
console_handler,
NULL
};
void
init_bvmcons(void)
{
register_inout(&consport);
}

142
vendor/github.com/docker/hyperkit/src/lib/dbgport.c generated vendored Normal file
View File

@@ -0,0 +1,142 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/types.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <sys/uio.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <xhyve/support/misc.h>
#include <xhyve/inout.h>
#include <xhyve/dbgport.h>
#include <xhyve/pci_lpc.h>
#define BVM_DBG_PORT 0x224
#define BVM_DBG_SIG ('B' << 8 | 'V')
static int listen_fd, conn_fd;
static struct sockaddr_in saddrin;
static int
dbg_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes, uint32_t *eax,
UNUSED void *arg)
{
char ch;
int nwritten, nread, printonce;
if (bytes == 2 && in) {
*eax = BVM_DBG_SIG;
return (0);
}
if (bytes != 4)
return (-1);
again:
printonce = 0;
while (conn_fd < 0) {
if (!printonce) {
printf("Waiting for connection from gdb\r\n");
printonce = 1;
}
conn_fd = accept(listen_fd, NULL, NULL);
if (conn_fd >= 0)
fcntl(conn_fd, F_SETFL, O_NONBLOCK);
else if (errno != EINTR)
perror("accept");
}
if (in) {
nread = (int) read(conn_fd, &ch, 1);
if (nread == -1 && errno == EAGAIN)
*eax = (uint32_t) (-1);
else if (nread == 1)
*eax = (uint32_t) ch;
else {
close(conn_fd);
conn_fd = -1;
goto again;
}
} else {
ch = (char) *eax;
nwritten = (int) write(conn_fd, &ch, 1);
if (nwritten != 1) {
close(conn_fd);
conn_fd = -1;
goto again;
}
}
return (0);
}
static struct inout_port dbgport = {
"bvmdbg",
BVM_DBG_PORT,
1,
IOPORT_F_INOUT,
dbg_handler,
NULL
};
SYSRES_IO(BVM_DBG_PORT, 4);
void
init_dbgport(int sport)
{
conn_fd = -1;
if ((listen_fd = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket");
exit(1);
}
saddrin.sin_len = sizeof(saddrin);
saddrin.sin_family = AF_INET;
saddrin.sin_addr.s_addr = htonl(INADDR_ANY);
saddrin.sin_port = htons(sport);
if (bind(listen_fd, (struct sockaddr *)&saddrin, sizeof(saddrin)) < 0) {
perror("bind");
exit(1);
}
if (listen(listen_fd, 1) < 0) {
perror("listen");
exit(1);
}
register_inout(&dbgport);
}

315
vendor/github.com/docker/hyperkit/src/lib/inout.c generated vendored Normal file
View File

@@ -0,0 +1,315 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <assert.h>
#include <sys/uio.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/support/psl.h>
#include <xhyve/support/segments.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/xhyve.h>
#include <xhyve/inout.h>
SET_DECLARE(inout_port_set, struct inout_port);
#define MAX_IOPORTS (1 << 16)
#define VERIFY_IOPORT(port, size) \
assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
static struct {
const char *name;
int flags;
inout_func_t handler;
void *arg;
} inout_handlers[MAX_IOPORTS];
static int
default_inout(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
if (in) {
switch (bytes) {
case 4:
*eax = 0xffffffff;
break;
case 2:
*eax = 0xffff;
break;
case 1:
*eax = 0xff;
break;
}
}
return (0);
}
static void
register_default_iohandler(int start, int size)
{
struct inout_port iop;
VERIFY_IOPORT(start, size);
bzero(&iop, sizeof(iop));
iop.name = "default";
iop.port = start;
iop.size = size;
iop.flags = (int)(IOPORT_F_INOUT | IOPORT_F_DEFAULT);
iop.handler = default_inout;
register_inout(&iop);
}
static int
update_register(int vcpuid, enum vm_reg_name reg,
uint64_t val, int size)
{
int error;
uint64_t origval;
switch (size) {
case 1:
case 2:
error = xh_vm_get_register(vcpuid, reg, &origval);
if (error)
return (error);
val &= vie_size2mask(size);
val |= origval & ~vie_size2mask(size);
break;
case 4:
val &= 0xffffffffUL;
break;
case 8:
break;
default:
return (EINVAL);
}
return xh_vm_set_register(vcpuid, reg, val);
}
int
emulate_inout(int vcpu, struct vm_exit *vmexit, int strict)
{
int addrsize, bytes, flags, in, port, prot, rep;
uint32_t eax, val;
inout_func_t handler;
void *arg;
int error, fault, retval;
enum vm_reg_name idxreg;
uint64_t gla, index, iterations, count;
struct vm_inout_str *vis;
struct iovec iov[2];
bytes = vmexit->u.inout.bytes;
in = vmexit->u.inout.in;
port = vmexit->u.inout.port;
assert(port < MAX_IOPORTS);
assert(bytes == 1 || bytes == 2 || bytes == 4);
handler = inout_handlers[port].handler;
if (strict && handler == default_inout)
return (-1);
flags = inout_handlers[port].flags;
arg = inout_handlers[port].arg;
if (in) {
if (!(flags & IOPORT_F_IN))
return (-1);
} else {
if (!(flags & IOPORT_F_OUT))
return (-1);
}
retval = 0;
if (vmexit->u.inout.string) {
vis = &vmexit->u.inout_str;
rep = vis->inout.rep;
addrsize = vis->addrsize;
prot = in ? XHYVE_PROT_WRITE : XHYVE_PROT_READ;
assert(addrsize == 2 || addrsize == 4 || addrsize == 8);
/* Index register */
idxreg = in ? VM_REG_GUEST_RDI : VM_REG_GUEST_RSI;
index = vis->index & vie_size2mask(addrsize);
/* Count register */
count = vis->count & vie_size2mask(addrsize);
/* Limit number of back-to-back in/out emulations to 16 */
iterations = min(count, 16);
while (iterations > 0) {
assert(retval == 0);
if (vie_calculate_gla(vis->paging.cpu_mode,
vis->seg_name, &vis->seg_desc, index, bytes,
addrsize, prot, &gla)) {
vm_inject_gp(vcpu);
break;
}
error = xh_vm_copy_setup(vcpu, &vis->paging, gla,
(size_t)bytes, prot, iov, nitems(iov), &fault);
if (error) {
retval = -1; /* Unrecoverable error */
break;
} else if (fault) {
retval = 0; /* Resume guest to handle fault */
break;
}
if (vie_alignment_check(vis->paging.cpl, bytes,
vis->cr0, vis->rflags, gla)) {
vm_inject_ac(vcpu, 0);
break;
}
val = 0;
if (!in)
xh_vm_copyin(iov, &val, (size_t)bytes);
retval = handler(vcpu, in, port, bytes, &val, arg);
if (retval != 0)
break;
if (in)
xh_vm_copyout(&val, iov, (size_t)bytes);
/* Update index */
if (vis->rflags & PSL_D)
index -= (uint64_t)bytes;
else
index += (uint64_t)bytes;
count--;
iterations--;
}
/* Update index register */
error = update_register(vcpu, idxreg, index, addrsize);
assert(error == 0);
/*
* Update count register only if the instruction had a repeat
* prefix.
*/
if (rep) {
error = update_register(vcpu, VM_REG_GUEST_RCX, count, addrsize);
assert(error == 0);
}
/* Restart the instruction if more iterations remain */
if (retval == 0 && count != 0) {
error = xh_vm_restart_instruction(vcpu);
assert(error == 0);
}
} else {
eax = vmexit->u.inout.eax;
val = eax & vie_size2mask(bytes);
retval = handler(vcpu, in, port, bytes, &val, arg);
if (retval == 0 && in) {
eax &= ~vie_size2mask(bytes);
eax |= val & vie_size2mask(bytes);
error = xh_vm_set_register(vcpu, VM_REG_GUEST_RAX, eax);
assert(error == 0);
}
}
return (retval);
}
void
init_inout(void)
{
struct inout_port **iopp, *iop;
/*
* Set up the default handler for all ports
*/
register_default_iohandler(0, MAX_IOPORTS);
/*
* Overwrite with specified handlers
*/
SET_FOREACH(iopp, inout_port_set) {
iop = *iopp;
assert(iop->port < MAX_IOPORTS);
inout_handlers[iop->port].name = iop->name;
inout_handlers[iop->port].flags = iop->flags;
inout_handlers[iop->port].handler = iop->handler;
inout_handlers[iop->port].arg = NULL;
}
}
int
register_inout(struct inout_port *iop)
{
int i;
VERIFY_IOPORT(iop->port, iop->size);
/*
* Verify that the new registration is not overwriting an already
* allocated i/o range.
*/
if (((unsigned)iop->flags & IOPORT_F_DEFAULT) == 0) {
for (i = iop->port; i < iop->port + iop->size; i++) {
if (((unsigned)inout_handlers[i].flags & IOPORT_F_DEFAULT) == 0)
return (-1);
}
}
for (i = iop->port; i < iop->port + iop->size; i++) {
inout_handlers[i].name = iop->name;
inout_handlers[i].flags = iop->flags;
inout_handlers[i].handler = iop->handler;
inout_handlers[i].arg = iop->arg;
}
return (0);
}
int
unregister_inout(struct inout_port *iop)
{
VERIFY_IOPORT(iop->port, iop->size);
assert(inout_handlers[iop->port].name == iop->name);
register_default_iohandler(iop->port, iop->size);
return (0);
}

68
vendor/github.com/docker/hyperkit/src/lib/ioapic.c generated vendored Normal file
View File

@@ -0,0 +1,68 @@
/*-
* Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/ioapic.h>
/*
* Assign PCI INTx interrupts to I/O APIC pins in a round-robin
* fashion. Note that we have no idea what the HPET is using, but the
* HPET is also programmable whereas this is intended for hardwired
* PCI interrupts.
*
* This assumes a single I/O APIC where pins >= 16 are permitted for
* PCI devices.
*/
static int pci_pins;
void
ioapic_init(void)
{
if (xh_vm_ioapic_pincount(&pci_pins) < 0) {
pci_pins = 0;
return;
}
/* Ignore the first 16 pins. */
if (pci_pins <= 16) {
pci_pins = 0;
return;
}
pci_pins -= 16;
}
int
ioapic_pci_alloc_irq(void)
{
static int last_pin;
if (pci_pins == 0)
return (-1);
return (16 + (last_pin++ % pci_pins));
}

284
vendor/github.com/docker/hyperkit/src/lib/md5c.c generated vendored Normal file
View File

@@ -0,0 +1,284 @@
/*-
* MD5C.C - RSA Data Security, Inc., MD5 message-digest algorithm
*
* Copyright (C) 1991-2, RSA Data Security, Inc. Created 1991. All
* rights reserved.
*
* License to copy and use this software is granted provided that it
* is identified as the "RSA Data Security, Inc. MD5 Message-Digest
* Algorithm" in all material mentioning or referencing this software
* or this function.
*
* License is also granted to make and use derivative works provided
* that such works are identified as "derived from the RSA Data
* Security, Inc. MD5 Message-Digest Algorithm" in all material
* mentioning or referencing the derived work.
*
* RSA Data Security, Inc. makes no representations concerning either
* the merchantability of this software or the suitability of this
* software for any particular purpose. It is provided "as is"
* without express or implied warranty of any kind.
*
* These notices must be retained in any copies of any part of this
* documentation and/or software.
*
* This code is the same as the code published by RSA Inc. It has been
* edited for clarity and style only.
*/
#include <stdint.h>
#include <string.h>
#include <xhyve/support/md5.h>
static void MD5Transform(u_int32_t [4], const unsigned char [64]);
static unsigned char PADDING[64] = {
0x80, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
};
/* F, G, H and I are basic MD5 functions. */
#define F(x, y, z) (((x) & (y)) | ((~x) & (z)))
#define G(x, y, z) (((x) & (z)) | ((y) & (~z)))
#define H(x, y, z) ((x) ^ (y) ^ (z))
#define I(x, y, z) ((y) ^ ((x) | (~z)))
/* ROTATE_LEFT rotates x left n bits. */
#define ROTATE_LEFT(x, n) (((x) << (n)) | ((x) >> (32-(n))))
/*
* FF, GG, HH, and II transformations for rounds 1, 2, 3, and 4.
* Rotation is separate from addition to prevent recomputation.
*/
#define FF(a, b, c, d, x, s, ac) { \
(a) += F ((b), (c), (d)) + (x) + (u_int32_t)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define GG(a, b, c, d, x, s, ac) { \
(a) += G ((b), (c), (d)) + (x) + (u_int32_t)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define HH(a, b, c, d, x, s, ac) { \
(a) += H ((b), (c), (d)) + (x) + (u_int32_t)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
#define II(a, b, c, d, x, s, ac) { \
(a) += I ((b), (c), (d)) + (x) + (u_int32_t)(ac); \
(a) = ROTATE_LEFT ((a), (s)); \
(a) += (b); \
}
/* MD5 initialization. Begins an MD5 operation, writing a new context. */
void
MD5Init(context)
MD5_CTX *context;
{
context->count[0] = context->count[1] = 0;
/* Load magic initialization constants. */
context->state[0] = 0x67452301;
context->state[1] = 0xefcdab89;
context->state[2] = 0x98badcfe;
context->state[3] = 0x10325476;
}
/*
* MD5 block update operation. Continues an MD5 message-digest
* operation, processing another message block, and updating the
* context.
*/
void
MD5Update(context, in, inputLen)
MD5_CTX *context;
const void *in;
unsigned int inputLen;
{
unsigned int i, index, partLen;
const unsigned char *input = in;
/* Compute number of bytes mod 64 */
index = (unsigned int)((context->count[0] >> 3) & 0x3F);
/* Update number of bits */
if ((context->count[0] += ((u_int32_t)inputLen << 3))
< ((u_int32_t)inputLen << 3))
context->count[1]++;
context->count[1] += ((u_int32_t)inputLen >> 29);
partLen = 64 - index;
/* Transform as many times as possible. */
if (inputLen >= partLen) {
memcpy((void *)&context->buffer[index], (const void *)input,
partLen);
MD5Transform(context->state, context->buffer);
for (i = partLen; i + 63 < inputLen; i += 64)
MD5Transform(context->state, &input[i]);
index = 0;
}
else
i = 0;
/* Buffer remaining input */
memcpy((void *)&context->buffer[index], (const void *)&input[i],
inputLen-i);
}
/*
* MD5 padding. Adds padding followed by original length.
*/
static void
MD5Pad(MD5_CTX *context)
{
unsigned char bits[8];
unsigned int index, padLen;
/* Save number of bits */
memcpy(bits, context->count, 8);
/* Pad out to 56 mod 64. */
index = (unsigned int)((context->count[0] >> 3) & 0x3f);
padLen = (index < 56) ? (56 - index) : (120 - index);
MD5Update(context, PADDING, padLen);
/* Append length (before padding) */
MD5Update(context, bits, 8);
}
/*
* MD5 finalization. Ends an MD5 message-digest operation, writing the
* the message digest and zeroizing the context.
*/
void
MD5Final (digest, context)
unsigned char digest[16];
MD5_CTX *context;
{
/* Do padding. */
MD5Pad(context);
/* Store state in digest */
memcpy(digest, context->state, 16);
/* Zeroize sensitive information. */
memset((void *)context, 0, sizeof (*context));
}
/* MD5 basic transformation. Transforms state based on block. */
static void
MD5Transform (state, block)
u_int32_t state[4];
const unsigned char block[64];
{
u_int32_t a = state[0], b = state[1], c = state[2], d = state[3], x[16];
memcpy(x, block, 64);
/* Round 1 */
#define S11 7
#define S12 12
#define S13 17
#define S14 22
FF (a, b, c, d, x[ 0], S11, 0xd76aa478); /* 1 */
FF (d, a, b, c, x[ 1], S12, 0xe8c7b756); /* 2 */
FF (c, d, a, b, x[ 2], S13, 0x242070db); /* 3 */
FF (b, c, d, a, x[ 3], S14, 0xc1bdceee); /* 4 */
FF (a, b, c, d, x[ 4], S11, 0xf57c0faf); /* 5 */
FF (d, a, b, c, x[ 5], S12, 0x4787c62a); /* 6 */
FF (c, d, a, b, x[ 6], S13, 0xa8304613); /* 7 */
FF (b, c, d, a, x[ 7], S14, 0xfd469501); /* 8 */
FF (a, b, c, d, x[ 8], S11, 0x698098d8); /* 9 */
FF (d, a, b, c, x[ 9], S12, 0x8b44f7af); /* 10 */
FF (c, d, a, b, x[10], S13, 0xffff5bb1); /* 11 */
FF (b, c, d, a, x[11], S14, 0x895cd7be); /* 12 */
FF (a, b, c, d, x[12], S11, 0x6b901122); /* 13 */
FF (d, a, b, c, x[13], S12, 0xfd987193); /* 14 */
FF (c, d, a, b, x[14], S13, 0xa679438e); /* 15 */
FF (b, c, d, a, x[15], S14, 0x49b40821); /* 16 */
/* Round 2 */
#define S21 5
#define S22 9
#define S23 14
#define S24 20
GG (a, b, c, d, x[ 1], S21, 0xf61e2562); /* 17 */
GG (d, a, b, c, x[ 6], S22, 0xc040b340); /* 18 */
GG (c, d, a, b, x[11], S23, 0x265e5a51); /* 19 */
GG (b, c, d, a, x[ 0], S24, 0xe9b6c7aa); /* 20 */
GG (a, b, c, d, x[ 5], S21, 0xd62f105d); /* 21 */
GG (d, a, b, c, x[10], S22, 0x2441453); /* 22 */
GG (c, d, a, b, x[15], S23, 0xd8a1e681); /* 23 */
GG (b, c, d, a, x[ 4], S24, 0xe7d3fbc8); /* 24 */
GG (a, b, c, d, x[ 9], S21, 0x21e1cde6); /* 25 */
GG (d, a, b, c, x[14], S22, 0xc33707d6); /* 26 */
GG (c, d, a, b, x[ 3], S23, 0xf4d50d87); /* 27 */
GG (b, c, d, a, x[ 8], S24, 0x455a14ed); /* 28 */
GG (a, b, c, d, x[13], S21, 0xa9e3e905); /* 29 */
GG (d, a, b, c, x[ 2], S22, 0xfcefa3f8); /* 30 */
GG (c, d, a, b, x[ 7], S23, 0x676f02d9); /* 31 */
GG (b, c, d, a, x[12], S24, 0x8d2a4c8a); /* 32 */
/* Round 3 */
#define S31 4
#define S32 11
#define S33 16
#define S34 23
HH (a, b, c, d, x[ 5], S31, 0xfffa3942); /* 33 */
HH (d, a, b, c, x[ 8], S32, 0x8771f681); /* 34 */
HH (c, d, a, b, x[11], S33, 0x6d9d6122); /* 35 */
HH (b, c, d, a, x[14], S34, 0xfde5380c); /* 36 */
HH (a, b, c, d, x[ 1], S31, 0xa4beea44); /* 37 */
HH (d, a, b, c, x[ 4], S32, 0x4bdecfa9); /* 38 */
HH (c, d, a, b, x[ 7], S33, 0xf6bb4b60); /* 39 */
HH (b, c, d, a, x[10], S34, 0xbebfbc70); /* 40 */
HH (a, b, c, d, x[13], S31, 0x289b7ec6); /* 41 */
HH (d, a, b, c, x[ 0], S32, 0xeaa127fa); /* 42 */
HH (c, d, a, b, x[ 3], S33, 0xd4ef3085); /* 43 */
HH (b, c, d, a, x[ 6], S34, 0x4881d05); /* 44 */
HH (a, b, c, d, x[ 9], S31, 0xd9d4d039); /* 45 */
HH (d, a, b, c, x[12], S32, 0xe6db99e5); /* 46 */
HH (c, d, a, b, x[15], S33, 0x1fa27cf8); /* 47 */
HH (b, c, d, a, x[ 2], S34, 0xc4ac5665); /* 48 */
/* Round 4 */
#define S41 6
#define S42 10
#define S43 15
#define S44 21
II (a, b, c, d, x[ 0], S41, 0xf4292244); /* 49 */
II (d, a, b, c, x[ 7], S42, 0x432aff97); /* 50 */
II (c, d, a, b, x[14], S43, 0xab9423a7); /* 51 */
II (b, c, d, a, x[ 5], S44, 0xfc93a039); /* 52 */
II (a, b, c, d, x[12], S41, 0x655b59c3); /* 53 */
II (d, a, b, c, x[ 3], S42, 0x8f0ccc92); /* 54 */
II (c, d, a, b, x[10], S43, 0xffeff47d); /* 55 */
II (b, c, d, a, x[ 1], S44, 0x85845dd1); /* 56 */
II (a, b, c, d, x[ 8], S41, 0x6fa87e4f); /* 57 */
II (d, a, b, c, x[15], S42, 0xfe2ce6e0); /* 58 */
II (c, d, a, b, x[ 6], S43, 0xa3014314); /* 59 */
II (b, c, d, a, x[13], S44, 0x4e0811a1); /* 60 */
II (a, b, c, d, x[ 4], S41, 0xf7537e82); /* 61 */
II (d, a, b, c, x[11], S42, 0xbd3af235); /* 62 */
II (c, d, a, b, x[ 2], S43, 0x2ad7d2bb); /* 63 */
II (b, c, d, a, x[ 9], S44, 0xeb86d391); /* 64 */
state[0] += a;
state[1] += b;
state[2] += c;
state[3] += d;
/* Zeroize sensitive information. */
memset((void *)x, 0, sizeof (x));
}

288
vendor/github.com/docker/hyperkit/src/lib/mem.c generated vendored Normal file
View File

@@ -0,0 +1,288 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Memory ranges are represented with an RB tree. On insertion, the range
* is checked for overlaps. On lookup, the key has the same base and limit
* so it can be searched within the range.
*/
#include <stdint.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <assert.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/tree.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/mem.h>
struct mmio_rb_range {
RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
struct mem_range mr_param;
uint64_t mr_base;
uint64_t mr_end;
};
struct mmio_rb_tree;
RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
static RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
/*
* Per-vCPU cache. Since most accesses from a vCPU will be to
* consecutive addresses in a range, it makes sense to cache the
* result of a lookup.
*/
static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
static pthread_rwlock_t mmio_rwlock;
static int
mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
{
if (a->mr_end < b->mr_base)
return (-1);
else if (a->mr_base > b->mr_end)
return (1);
return (0);
}
static int
mmio_rb_lookup(struct mmio_rb_tree *rbt, uint64_t addr,
struct mmio_rb_range **entry)
{
struct mmio_rb_range find, *res;
find.mr_base = find.mr_end = addr;
res = RB_FIND(mmio_rb_tree, rbt, &find);
if (res != NULL) {
*entry = res;
return (0);
}
return (ENOENT);
}
static int
mmio_rb_add(struct mmio_rb_tree *rbt, struct mmio_rb_range *new)
{
struct mmio_rb_range *overlap;
overlap = RB_INSERT(mmio_rb_tree, rbt, new);
if (overlap != NULL) {
#ifdef RB_DEBUG
printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
new->mr_base, new->mr_end,
overlap->mr_base, overlap->mr_end);
#endif
return (EEXIST);
}
return (0);
}
#if 0
static void
mmio_rb_dump(struct mmio_rb_tree *rbt)
{
struct mmio_rb_range *np;
pthread_rwlock_rdlock(&mmio_rwlock);
RB_FOREACH(np, mmio_rb_tree, rbt) {
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
np->mr_param.name);
}
pthread_rwlock_unlock(&mmio_rwlock);
}
#endif
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare)
static int
mem_read(UNUSED void *unused, int vcpu, uint64_t gpa, uint64_t *rval, int size,
void *arg)
{
int error;
struct mem_range *mr = arg;
error = (*mr->handler)(vcpu, MEM_F_READ, gpa, size, rval, mr->arg1,
mr->arg2);
return (error);
}
static int
mem_write(UNUSED void* unused, int vcpu, uint64_t gpa, uint64_t wval, int size,
void *arg)
{
int error;
struct mem_range *mr = arg;
error = (*mr->handler)(vcpu, MEM_F_WRITE, gpa, size, &wval, mr->arg1,
mr->arg2);
return (error);
}
int
emulate_mem(int vcpu, uint64_t paddr, struct vie *vie,
struct vm_guest_paging *paging)
{
struct mmio_rb_range *entry;
int err, immutable;
pthread_rwlock_rdlock(&mmio_rwlock);
/*
* First check the per-vCPU cache
*/
if (mmio_hint[vcpu] &&
paddr >= mmio_hint[vcpu]->mr_base &&
paddr <= mmio_hint[vcpu]->mr_end) {
entry = mmio_hint[vcpu];
} else
entry = NULL;
if (entry == NULL) {
if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
/* Update the per-vCPU cache */
mmio_hint[vcpu] = entry;
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
pthread_rwlock_unlock(&mmio_rwlock);
return (ESRCH);
}
}
assert(entry != NULL);
/*
* An 'immutable' memory range is guaranteed to be never removed
* so there is no need to hold 'mmio_rwlock' while calling the
* handler.
*
* XXX writes to the PCIR_COMMAND register can cause register_mem()
* to be called. If the guest is using PCI extended config space
* to modify the PCIR_COMMAND register then register_mem() can
* deadlock on 'mmio_rwlock'. However by registering the extended
* config space window as 'immutable' the deadlock can be avoided.
*/
immutable = (entry->mr_param.flags & MEM_F_IMMUTABLE);
if (immutable)
pthread_rwlock_unlock(&mmio_rwlock);
err = xh_vm_emulate_instruction(vcpu, paddr, vie, paging, mem_read,
mem_write, &entry->mr_param);
if (!immutable)
pthread_rwlock_unlock(&mmio_rwlock);
return (err);
}
static int
register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
{
struct mmio_rb_range *entry, *mrp;
int err;
err = 0;
mrp = malloc(sizeof(struct mmio_rb_range));
if (mrp != NULL) {
mrp->mr_param = *memp;
mrp->mr_base = memp->base;
mrp->mr_end = memp->base + memp->size - 1;
pthread_rwlock_wrlock(&mmio_rwlock);
if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
err = mmio_rb_add(rbt, mrp);
pthread_rwlock_unlock(&mmio_rwlock);
if (err)
free(mrp);
} else
err = ENOMEM;
return (err);
}
int
register_mem(struct mem_range *memp)
{
return (register_mem_int(&mmio_rb_root, memp));
}
int
register_mem_fallback(struct mem_range *memp)
{
return (register_mem_int(&mmio_rb_fallback, memp));
}
int
unregister_mem(struct mem_range *memp)
{
struct mem_range *mr;
struct mmio_rb_range *entry = NULL;
int err, i;
pthread_rwlock_wrlock(&mmio_rwlock);
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
if (err == 0) {
mr = &entry->mr_param;
assert(mr->name == memp->name);
assert(mr->base == memp->base && mr->size == memp->size);
assert((mr->flags & MEM_F_IMMUTABLE) == 0);
RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
/* flush Per-vCPU cache */
for (i=0; i < VM_MAXCPU; i++) {
if (mmio_hint[i] == entry)
mmio_hint[i] = NULL;
}
}
pthread_rwlock_unlock(&mmio_rwlock);
if (entry)
free(entry);
return (err);
}
void
init_mem(void)
{
RB_INIT(&mmio_rb_root);
RB_INIT(&mmio_rb_fallback);
pthread_rwlock_init(&mmio_rwlock, NULL);
}

448
vendor/github.com/docker/hyperkit/src/lib/mevent.c generated vendored Normal file
View File

@@ -0,0 +1,448 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Micro event library for FreeBSD, designed for a single i/o thread
* using kqueue, and having events be persistent by default.
*/
#include <assert.h>
#include <errno.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <pthread.h>
#include <sys/types.h>
#include <sys/event.h>
#include <sys/time.h>
#include <xhyve/support/misc.h>
#include <xhyve/mevent.h>
#define MEVENT_MAX 64
#define MEV_ADD 1
#define MEV_ENABLE 2
#define MEV_DISABLE 3
#define MEV_DEL_PENDING 4
extern char *vmname;
static pthread_t mevent_tid;
static int mevent_timid = 43;
static int mevent_pipefd[2];
static pthread_mutex_t mevent_lmutex = PTHREAD_MUTEX_INITIALIZER;
struct mevent {
void (*me_func)(int, enum ev_type, void *);
#define me_msecs me_fd
int me_fd;
int me_timid;
enum ev_type me_type;
void *me_param;
int me_cq;
int me_state;
int me_closefd;
LIST_ENTRY(mevent) me_list;
};
static LIST_HEAD(listhead, mevent) global_head, change_head;
static void
mevent_qlock(void)
{
pthread_mutex_lock(&mevent_lmutex);
}
static void
mevent_qunlock(void)
{
pthread_mutex_unlock(&mevent_lmutex);
}
static void
mevent_pipe_read(int fd, UNUSED enum ev_type type, UNUSED void *param)
{
char buf[MEVENT_MAX];
ssize_t status;
/*
* Drain the pipe read side. The fd is non-blocking so this is
* safe to do.
*/
do {
status = read(fd, buf, sizeof(buf));
} while (status == MEVENT_MAX);
}
static void
mevent_notify(void)
{
char c;
/*
* If calling from outside the i/o thread, write a byte on the
* pipe to force the i/o thread to exit the blocking kevent call.
*/
if (mevent_pipefd[1] != 0 && pthread_self() != mevent_tid) {
write(mevent_pipefd[1], &c, 1);
}
}
static int
mevent_kq_filter(struct mevent *mevp)
{
int retval;
retval = 0;
if (mevp->me_type == EVF_READ)
retval = EVFILT_READ;
if (mevp->me_type == EVF_WRITE)
retval = EVFILT_WRITE;
if (mevp->me_type == EVF_TIMER)
retval = EVFILT_TIMER;
if (mevp->me_type == EVF_SIGNAL)
retval = EVFILT_SIGNAL;
return (retval);
}
static int
mevent_kq_flags(struct mevent *mevp)
{
int ret;
switch (mevp->me_state) {
case MEV_ADD:
ret = EV_ADD; /* implicitly enabled */
break;
case MEV_ENABLE:
ret = EV_ENABLE;
break;
case MEV_DISABLE:
ret = EV_DISABLE;
break;
case MEV_DEL_PENDING:
ret = EV_DELETE;
break;
default:
assert(0);
break;
}
return (ret);
}
static int
mevent_kq_fflags(UNUSED struct mevent *mevp)
{
/* XXX nothing yet, perhaps EV_EOF for reads ? */
return (0);
}
static int
mevent_build(UNUSED int mfd, struct kevent *kev)
{
struct mevent *mevp, *tmpp;
int i;
i = 0;
mevent_qlock();
LIST_FOREACH_SAFE(mevp, &change_head, me_list, tmpp) {
if (mevp->me_closefd) {
/*
* A close of the file descriptor will remove the
* event
*/
close(mevp->me_fd);
} else {
if (mevp->me_type == EVF_TIMER) {
kev[i].ident = (uintptr_t) mevp->me_timid;
kev[i].data = mevp->me_msecs;
} else {
kev[i].ident = (uintptr_t) mevp->me_fd;
kev[i].data = 0;
}
kev[i].filter = (int16_t) mevent_kq_filter(mevp);
kev[i].flags = (uint16_t) mevent_kq_flags(mevp);
kev[i].fflags = (uint32_t) mevent_kq_fflags(mevp);
kev[i].udata = mevp;
i++;
}
mevp->me_cq = 0;
LIST_REMOVE(mevp, me_list);
if (mevp->me_state == MEV_DEL_PENDING) {
free(mevp);
} else {
LIST_INSERT_HEAD(&global_head, mevp, me_list);
}
assert(i < MEVENT_MAX);
}
mevent_qunlock();
return (i);
}
static void
mevent_handle(struct kevent *kev, int numev)
{
struct mevent *mevp;
int i;
for (i = 0; i < numev; i++) {
mevp = kev[i].udata;
/* XXX check for EV_ERROR ? */
(*mevp->me_func)(mevp->me_fd, mevp->me_type, mevp->me_param);
}
}
struct mevent *
mevent_add(int tfd, enum ev_type type,
void (*func)(int, enum ev_type, void *), void *param)
{
struct mevent *lp, *mevp;
if (tfd < 0 || func == NULL) {
return (NULL);
}
mevp = NULL;
mevent_qlock();
/*
* Verify that the fd/type tuple is not present in any list
*/
LIST_FOREACH(lp, &global_head, me_list) {
if (type != EVF_TIMER && lp->me_fd == tfd &&
lp->me_type == type) {
goto exit;
}
}
LIST_FOREACH(lp, &change_head, me_list) {
if (type != EVF_TIMER && lp->me_fd == tfd &&
lp->me_type == type) {
goto exit;
}
}
/*
* Allocate an entry, populate it, and add it to the change list.
*/
mevp = calloc(1, sizeof(struct mevent));
if (mevp == NULL) {
goto exit;
}
if (type == EVF_TIMER) {
mevp->me_msecs = tfd;
mevp->me_timid = mevent_timid++;
} else
mevp->me_fd = tfd;
mevp->me_type = type;
mevp->me_func = func;
mevp->me_param = param;
LIST_INSERT_HEAD(&change_head, mevp, me_list);
mevp->me_cq = 1;
mevp->me_state = MEV_ADD;
mevent_notify();
exit:
mevent_qunlock();
return (mevp);
}
static int
mevent_update(struct mevent *evp, int newstate)
{
/*
* It's not possible to enable/disable a deleted event
*/
if (evp->me_state == MEV_DEL_PENDING)
return (EINVAL);
/*
* No update needed if state isn't changing
*/
if (evp->me_state == newstate)
return (0);
mevent_qlock();
evp->me_state = newstate;
/*
* Place the entry onto the changed list if not already there.
*/
if (evp->me_cq == 0) {
evp->me_cq = 1;
LIST_REMOVE(evp, me_list);
LIST_INSERT_HEAD(&change_head, evp, me_list);
mevent_notify();
}
mevent_qunlock();
return (0);
}
int
mevent_enable(struct mevent *evp)
{
return (mevent_update(evp, MEV_ENABLE));
}
int
mevent_disable(struct mevent *evp)
{
return (mevent_update(evp, MEV_DISABLE));
}
static int
mevent_delete_event(struct mevent *evp, int closefd)
{
mevent_qlock();
/*
* Place the entry onto the changed list if not already there, and
* mark as to be deleted.
*/
if (evp->me_cq == 0) {
evp->me_cq = 1;
LIST_REMOVE(evp, me_list);
LIST_INSERT_HEAD(&change_head, evp, me_list);
mevent_notify();
}
evp->me_state = MEV_DEL_PENDING;
if (closefd)
evp->me_closefd = 1;
mevent_qunlock();
return (0);
}
int
mevent_delete(struct mevent *evp)
{
return (mevent_delete_event(evp, 0));
}
int
mevent_delete_close(struct mevent *evp)
{
return (mevent_delete_event(evp, 1));
}
static void
mevent_set_name(void)
{
}
__attribute__ ((noreturn)) void
mevent_dispatch(void)
{
struct kevent changelist[MEVENT_MAX];
struct kevent eventlist[MEVENT_MAX];
struct mevent *pipev;
int mfd;
int numev;
int ret;
mevent_tid = pthread_self();
mevent_set_name();
mfd = kqueue();
assert(mfd > 0);
/*
* Open the pipe that will be used for other threads to force
* the blocking kqueue call to exit by writing to it. Set the
* descriptor to non-blocking.
*/
ret = pipe(mevent_pipefd);
if (ret < 0) {
perror("pipe");
exit(0);
}
/*
* Add internal event handler for the pipe write fd
*/
pipev = mevent_add(mevent_pipefd[0], EVF_READ, mevent_pipe_read, NULL);
assert(pipev != NULL);
for (;;) {
/*
* Build changelist if required.
* XXX the changelist can be put into the blocking call
* to eliminate the extra syscall. Currently better for
* debug.
*/
numev = mevent_build(mfd, changelist);
if (numev) {
ret = kevent(mfd, changelist, numev, NULL, 0, NULL);
if (ret == -1) {
perror("Error return from kevent change");
}
}
/*
* Block awaiting events
*/
ret = kevent(mfd, NULL, 0, eventlist, MEVENT_MAX, NULL);
if (ret == -1 && errno != EINTR) {
perror("Error return from kevent monitor");
}
/*
* Handle reported events
*/
mevent_handle(eventlist, ret);
}
}

267
vendor/github.com/docker/hyperkit/src/lib/mevent_test.c generated vendored Normal file
View File

@@ -0,0 +1,267 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Test program for the micro event library. Set up a simple TCP echo
* service.
*
* cc mevent_test.c mevent.c -lpthread
*/
#include <stdint.h>
#include <sys/types.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <stdio.h>
#include <stdlib.h>
#include <pthread.h>
#include <unistd.h>
#include <xhyve/mevent.h>
#define TEST_PORT 4321
static pthread_mutex_t accept_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_cond_t accept_condvar = PTHREAD_COND_INITIALIZER;
static struct mevent *tevp;
char *vmname = "test vm";
#define MEVENT_ECHO
/* Number of timer events to capture */
#define TEVSZ 4096
uint64_t tevbuf[TEVSZ];
static __inline uint64_t rdtsc(void)
{
unsigned a, d;
__asm__ __volatile__ ("cpuid");
__asm__ __volatile__ ("rdtsc" : "=a" (a), "=d" (d));
return (((uint64_t) a) | (((uint64_t) d) << 32));
}
static void
timer_print(void)
{
uint64_t min, max, diff, sum, tsc_freq;
size_t len;
int j;
min = UINT64_MAX;
max = 0;
sum = 0;
len = sizeof(tsc_freq);
sysctlbyname("machdep.tsc_freq", &tsc_freq, &len, NULL, 0);
for (j = 1; j < TEVSZ; j++) {
/* Convert a tsc diff into microseconds */
diff = (tevbuf[j] - tevbuf[j-1]) * 1000000 / tsc_freq;
sum += diff;
if (min > diff)
min = diff;
if (max < diff)
max = diff;
}
printf("timers done: usecs, min %llu, max %llu, mean %llu\n", min, max,
sum/(TEVSZ - 1));
}
static void
timer_callback(int fd, enum ev_type type, void *param)
{
static int i;
if (i >= TEVSZ)
abort();
tevbuf[i++] = rdtsc();
if (i == TEVSZ) {
mevent_delete(tevp);
timer_print();
}
}
#ifdef MEVENT_ECHO
struct esync {
pthread_mutex_t e_mt;
pthread_cond_t e_cond;
};
static void
echoer_callback(int fd, enum ev_type type, void *param)
{
struct esync *sync = param;
pthread_mutex_lock(&sync->e_mt);
pthread_cond_signal(&sync->e_cond);
pthread_mutex_unlock(&sync->e_mt);
}
static void *
echoer(void *param)
{
struct esync sync;
struct mevent *mev;
char buf[128];
int fd = (int)(uintptr_t) param;
int len;
pthread_mutex_init(&sync.e_mt, NULL);
pthread_cond_init(&sync.e_cond, NULL);
pthread_mutex_lock(&sync.e_mt);
mev = mevent_add(fd, EVF_READ, echoer_callback, &sync);
if (mev == NULL) {
printf("Could not allocate echoer event\n");
exit(1);
}
while (!pthread_cond_wait(&sync.e_cond, &sync.e_mt)) {
len = read(fd, buf, sizeof(buf));
if (len > 0) {
write(fd, buf, len);
write(0, buf, len);
} else {
break;
}
}
mevent_delete_close(mev);
pthread_mutex_unlock(&sync.e_mt);
pthread_mutex_destroy(&sync.e_mt);
pthread_cond_destroy(&sync.e_cond);
return (NULL);
}
#else
static void *
echoer(void *param)
{
char buf[128];
int fd = (int)(uintptr_t) param;
int len;
while ((len = read(fd, buf, sizeof(buf))) > 0) {
write(1, buf, len);
}
return (NULL);
}
#endif /* MEVENT_ECHO */
static void
acceptor_callback(int fd, enum ev_type type, void *param)
{
pthread_mutex_lock(&accept_mutex);
pthread_cond_signal(&accept_condvar);
pthread_mutex_unlock(&accept_mutex);
}
static void *
acceptor(void *param)
{
struct sockaddr_in sin;
pthread_t tid;
int news;
int s;
static int first;
if ((s = socket(AF_INET, SOCK_STREAM, 0)) < 0) {
perror("socket");
exit(1);
}
sin.sin_len = sizeof(sin);
sin.sin_family = AF_INET;
sin.sin_addr.s_addr = htonl(INADDR_ANY);
sin.sin_port = htons(TEST_PORT);
if (bind(s, (struct sockaddr *)&sin, sizeof(sin)) < 0) {
perror("bind");
exit(1);
}
if (listen(s, 1) < 0) {
perror("listen");
exit(1);
}
(void) mevent_add(s, EVF_READ, acceptor_callback, NULL);
pthread_mutex_lock(&accept_mutex);
while (!pthread_cond_wait(&accept_condvar, &accept_mutex)) {
news = accept(s, NULL, NULL);
if (news < 0) {
perror("accept error");
} else {
static int first = 1;
if (first) {
/*
* Start a timer
*/
first = 0;
tevp = mevent_add(1, EVF_TIMER, timer_callback,
NULL);
}
printf("incoming connection, spawning thread\n");
pthread_create(&tid, NULL, echoer,
(void *)(uintptr_t)news);
}
}
return (NULL);
}
int
main(void)
{
pthread_t tid;
pthread_create(&tid, NULL, acceptor, NULL);
mevent_dispatch();
return (0);
}

View File

@@ -0,0 +1,273 @@
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <errno.h>
#include <sys/stat.h>
#include <caml/alloc.h>
#include <caml/threads.h>
#include <caml/mlvalues.h>
#include <caml/memory.h>
#include <caml/callback.h>
#include <caml/bigarray.h>
#include "mirage_block_c.h"
void
mirage_block_register_thread(){
caml_c_thread_register();
}
void
mirage_block_unregister_thread(){
caml_c_thread_unregister();
}
/* Convenience macro to cache the OCaml callback and immediately abort()
if it can't be found -- this would indicate a fundamental linking error. */
#define OCAML_NAMED_FUNCTION(name) \
static value *fn = NULL; \
if (fn == NULL) { \
fn = caml_named_value(name); \
} \
if (fn == NULL) { \
fprintf(stderr, "Callback.register for " name " not called: are all objects linked?\n"); \
abort(); \
}
/* Every call has 2 C functions:
1. static void ocaml_FOO: using the CAMLparam/CAMLreturn macros. This assumes
the runtime system lock is held. Errors are propagated back by out
parameters, since we must use CAMLreturn and yet we cannot use CAMLreturn
for regular C values.
2. plain C FOO: this acquires the runtime lock and calls the ocaml_FOO
function.
An alternative design would be to use Begin_roots and End_roots after
acquiring the runtime lock. */
static void
ocaml_mirage_block_open(const char *config, const char *options, int *out, int *err) {
CAMLparam0();
CAMLlocal4(ocaml_config, ocaml_options_opt, ocaml_string, handle);
ocaml_config = caml_copy_string(config);
if (options == NULL) {
ocaml_options_opt = Val_int(0); /* None */
} else {
ocaml_string = caml_copy_string(options);
ocaml_options_opt = caml_alloc(1, 0); /* Some */
Store_field (ocaml_options_opt, 0, ocaml_string);
}
OCAML_NAMED_FUNCTION("mirage_block_open")
handle = caml_callback2_exn(*fn, ocaml_config, ocaml_options_opt);
if (Is_exception_result(handle)){
*err = 1;
} else {
*err = 0;
*out = Int_val(handle);
}
CAMLreturn0;
}
mirage_block_handle
mirage_block_open(const char *config, const char *options) {
int result;
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_open(config, options, &result, &err);
caml_release_runtime_system();
if (err){
errno = EINVAL;
return (-1);
} else {
return result;
}
}
static void
ocaml_mirage_block_stat(mirage_block_handle h, struct stat *stat, struct mirage_block_stat *mbs, int *err) {
CAMLparam0();
CAMLlocal2(ocaml_handle, result);
ocaml_handle = Val_int(h);
OCAML_NAMED_FUNCTION("mirage_block_stat")
result = caml_callback_exn(*fn, ocaml_handle);
int read_write = Int_val(Field(result, 0)) != 0;
unsigned int sector_size = (unsigned int)Int_val(Field(result, 1));
uint64_t size_sectors = (uint64_t)Int64_val(Field(result, 2));
int candelete = Bool_val(Field(result, 3));
if (Is_exception_result(result)){
*err = 1;
} else {
*err = 0;
bzero(stat, sizeof(struct stat));
stat->st_dev = 0;
stat->st_ino = 0;
stat->st_mode = S_IFREG | S_IROTH | S_IRGRP | S_IRUSR | (read_write?(S_IWOTH | S_IWGRP | S_IWUSR): 0);
stat->st_nlink = 1;
stat->st_uid = 0;
stat->st_gid = 0;
stat->st_rdev = 0;
stat->st_size = (off_t)(sector_size * size_sectors);
stat->st_blocks = (blkcnt_t)size_sectors;
stat->st_blksize = (blksize_t)sector_size;
stat->st_flags = 0;
stat->st_gen = 0;
mbs->candelete = candelete;
}
CAMLreturn0;
}
int
mirage_block_stat(mirage_block_handle h, struct stat *stat, struct mirage_block_stat *mbs) {
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_stat(h, stat, mbs, &err);
caml_release_runtime_system();
if (err){
errno = EINVAL;
return (-1);
} else {
return 0;
}
}
static void
ocaml_mirage_block_close(int handle, int *err) {
CAMLparam0();
CAMLlocal2(ocaml_handle, result);
ocaml_handle = Val_int(handle);
OCAML_NAMED_FUNCTION("mirage_block_close")
result = caml_callback_exn(*fn, ocaml_handle);
*err = 0;
if (Is_exception_result(result)){
*err = 1;
}
CAMLreturn0;
}
int mirage_block_close(int handle){
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_close(handle, &err);
caml_release_runtime_system();
return err;
}
static void
ocaml_mirage_block_preadv(const int handle, const struct iovec *iov, int iovcnt, off_t ofs, ssize_t *out, int *err) {
CAMLparam0();
CAMLlocal4(ocaml_handle, ocaml_bufs, ocaml_ofs, ocaml_result);
ocaml_handle = Val_int(handle);
ocaml_bufs = caml_alloc_tuple((mlsize_t)iovcnt);
ocaml_ofs = Val_int(ofs);
for (int i = 0; i < iovcnt; i++ ){
Store_field(ocaml_bufs, (mlsize_t)i, caml_ba_alloc_dims(CAML_BA_CHAR | CAML_BA_C_LAYOUT,
1, (*(iov+i)).iov_base, (*(iov+i)).iov_len));
}
OCAML_NAMED_FUNCTION("mirage_block_preadv")
ocaml_result = caml_callback3_exn(*fn, ocaml_handle, ocaml_bufs, ocaml_ofs);
if (Is_exception_result(ocaml_result)) {
*err = 1;
} else {
*err = 0;
*out = Int_val(ocaml_result);
}
CAMLreturn0;
}
ssize_t
mirage_block_preadv(mirage_block_handle h, const struct iovec *iov, int iovcnt, off_t offset) {
ssize_t len;
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_preadv(h, iov, iovcnt, offset, &len, &err);
caml_release_runtime_system();
if (err){
errno = EINVAL;
return (-1);
}
return len;
}
static void
ocaml_mirage_block_pwritev(const int handle, const struct iovec *iov, int iovcnt, off_t ofs, ssize_t *out, int *err) {
CAMLparam0();
CAMLlocal4(ocaml_handle, ocaml_bufs, ocaml_ofs, ocaml_result);
ocaml_handle = Val_int(handle);
ocaml_bufs = caml_alloc_tuple((mlsize_t)iovcnt);
ocaml_ofs = Val_int(ofs);
for (int i = 0; i < iovcnt; i++ ){
Store_field(ocaml_bufs, (mlsize_t)i, caml_ba_alloc_dims(CAML_BA_CHAR | CAML_BA_C_LAYOUT,
1, (*(iov+i)).iov_base, (*(iov+i)).iov_len));
}
OCAML_NAMED_FUNCTION("mirage_block_pwritev")
ocaml_result = caml_callback3_exn(*fn, ocaml_handle, ocaml_bufs, ocaml_ofs);
if (Is_exception_result(ocaml_result)) {
*err = 1;
} else {
*err = 0;
*out = Int_val(ocaml_result);
}
CAMLreturn0;
}
ssize_t
mirage_block_pwritev(mirage_block_handle h, const struct iovec *iov, int iovcnt, off_t offset) {
ssize_t len;
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_pwritev(h, iov, iovcnt, offset, &len, &err);
caml_release_runtime_system();
if (err){
errno = EINVAL;
return (-1);
}
return len;
}
static void
ocaml_mirage_block_delete(int handle, off_t offset, ssize_t len, int *err) {
CAMLparam0();
CAMLlocal4(ocaml_handle, result, ocaml_offset, ocaml_len);
ocaml_handle = Val_int(handle);
ocaml_offset = caml_copy_int64(offset);
ocaml_len = caml_copy_int64(len);
OCAML_NAMED_FUNCTION("mirage_block_delete")
result = caml_callback3_exn(*fn, ocaml_handle, ocaml_offset, ocaml_len);
*err = 0;
if (Is_exception_result(result)){
errno = EINVAL;
*err = 1;
}
CAMLreturn0;
}
int
mirage_block_delete(mirage_block_handle handle, off_t offset, ssize_t len) {
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_delete(handle, offset, len, &err);
caml_release_runtime_system();
return err;
}
static void
ocaml_mirage_block_flush(int handle, int *err) {
CAMLparam0();
CAMLlocal2(ocaml_handle, result);
ocaml_handle = Val_int(handle);
OCAML_NAMED_FUNCTION("mirage_block_flush")
result = caml_callback_exn(*fn, ocaml_handle);
*err = 0;
if (Is_exception_result(result)){
errno = EINVAL;
*err = 1;
}
CAMLreturn0;
}
int mirage_block_flush(int handle){
int err = 1;
caml_acquire_runtime_system();
ocaml_mirage_block_flush(handle, &err);
caml_release_runtime_system();
return err;
}

View File

@@ -0,0 +1,69 @@
/*
* C interface to the mirage-block subsystem
*
* Rules for usage:
* - do not mix with other libraries which embed OCaml runtimes
* - before calling any other function, call `mirage_block_init` from one
* thread: this initialises the runtime
* - before calling open, register your thread with `mirage_block_register_thread`
*/
#include <sys/types.h>
#include <sys/uio.h>
#include <unistd.h>
/* Initialise the mirage-block subsystem. This must be called before calling
mirage_block_register_thread. */
extern void
mirage_block_init(void);
/* Every thread that uses a mirage-block device must be registered with the
runtime system. Call this on every thread you will use, after calling
mirage_block_init once in one thread. */
extern void
mirage_block_register_thread(void);
/* When a thread has finished using mirage-block devices, call this to free
associated resources. */
extern void
mirage_block_unregister_thread(void);
/* An opened mirage-block device */
typedef int mirage_block_handle;
/* Open a mirage block device with the given optional string configuration.
To use the default configuration, pass NULL for options. */
extern mirage_block_handle
mirage_block_open(const char *config, const char *options);
struct mirage_block_stat {
int candelete; /* 1 if the device supports TRIM/DELETE/DISCARD */
};
/* Query a mirage block device. */
extern int
mirage_block_stat(mirage_block_handle h, struct stat *stat, struct mirage_block_stat *buf);
/* Read data from a mirage block device. Note the offset must be sector-aligned
and the memory buffers must also be sector-aligned. */
extern ssize_t
mirage_block_preadv(mirage_block_handle h,
const struct iovec *iov, int iovcnt, off_t offset);
/* Write data to a mirage block device. Note the offset must be sector-aligned
and the memory buffers must also be sector-aligned. */
extern ssize_t
mirage_block_pwritev(mirage_block_handle h,
const struct iovec *iov, int iovcnt, off_t offset);
/* TRIM/DELETE/DISCARD the range of sectors */
extern int
mirage_block_delete(mirage_block_handle h, off_t offset, ssize_t len);
/* Flush any outstanding I/O */
extern
int mirage_block_flush(mirage_block_handle h);
/* Close an open device; subsequent I/O requests will fail. */
extern
int mirage_block_close(mirage_block_handle h);

377
vendor/github.com/docker/hyperkit/src/lib/mptbl.c generated vendored Normal file
View File

@@ -0,0 +1,377 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
// #include <x86/mptable.h>
#include <stdint.h>
#include <stdio.h>
#include <string.h>
#include <sys/types.h>
#include <sys/errno.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/mptable.h>
#include <xhyve/acpi.h>
#include <xhyve/xhyve.h>
#include <xhyve/mptbl.h>
#include <xhyve/pci_emul.h>
#define MPTABLE_BASE 0xF0000
/* floating pointer length + maximum length of configuration table */
#define MPTABLE_MAX_LENGTH (65536 + 16)
#define LAPIC_PADDR 0xFEE00000
#define LAPIC_VERSION 16
#define IOAPIC_PADDR 0xFEC00000
#define IOAPIC_VERSION 0x11
#define MP_SPECREV 4
#define MPFP_SIG "_MP_"
/* Configuration header defines */
#define MPCH_SIG "PCMP"
#define MPCH_OEMID "BHyVe "
#define MPCH_OEMID_LEN 8
#define MPCH_PRODID "Hypervisor "
#define MPCH_PRODID_LEN 12
/* Processor entry defines */
#define MPEP_SIG_FAMILY 6 /* XXX bhyve should supply this */
#define MPEP_SIG_MODEL 26
#define MPEP_SIG_STEPPING 5
#define MPEP_SIG \
((MPEP_SIG_FAMILY << 8) | \
(MPEP_SIG_MODEL << 4) | \
(MPEP_SIG_STEPPING))
#define MPEP_FEATURES (0xBFEBFBFF) /* XXX Intel i7 */
/* Number of local intr entries */
#define MPEII_NUM_LOCAL_IRQ 2
/* Bus entry defines */
#define MPE_NUM_BUSES 2
#define MPE_BUSNAME_LEN 6
#define MPE_BUSNAME_ISA "ISA "
#define MPE_BUSNAME_PCI "PCI "
static void *oem_tbl_start;
static int oem_tbl_size;
static uint8_t
mpt_compute_checksum(void *base, size_t len)
{
uint8_t *bytes;
uint8_t sum;
for(bytes = base, sum = 0; len > 0; len--) {
sum += *bytes++;
}
return ((uint8_t) (256 - sum));
}
static void
mpt_build_mpfp(mpfps_t mpfp, uint64_t gpa)
{
memset(mpfp, 0, sizeof(*mpfp));
memcpy(mpfp->signature, MPFP_SIG, 4);
mpfp->pap = (uint32_t) (gpa + sizeof(*mpfp));
mpfp->length = 1;
mpfp->spec_rev = MP_SPECREV;
mpfp->checksum = mpt_compute_checksum(mpfp, sizeof(*mpfp));
}
static void
mpt_build_mpch(mpcth_t mpch)
{
memset(mpch, 0, sizeof(*mpch));
memcpy(mpch->signature, MPCH_SIG, 4);
mpch->spec_rev = MP_SPECREV;
memcpy(mpch->oem_id, MPCH_OEMID, MPCH_OEMID_LEN);
memcpy(mpch->product_id, MPCH_PRODID, MPCH_PRODID_LEN);
mpch->apic_address = LAPIC_PADDR;
}
static void
mpt_build_proc_entries(proc_entry_ptr mpep, int ncpu)
{
int i;
for (i = 0; i < ncpu; i++) {
memset(mpep, 0, sizeof(*mpep));
mpep->type = MPCT_ENTRY_PROCESSOR;
mpep->apic_id = (uint8_t) i; // XXX
mpep->apic_version = LAPIC_VERSION;
mpep->cpu_flags = PROCENTRY_FLAG_EN;
if (i == 0)
mpep->cpu_flags |= PROCENTRY_FLAG_BP;
mpep->cpu_signature = MPEP_SIG;
mpep->feature_flags = MPEP_FEATURES;
mpep++;
}
}
static void
mpt_build_localint_entries(int_entry_ptr mpie)
{
/* Hardcode LINT0 as ExtINT on all CPUs. */
memset(mpie, 0, sizeof(*mpie));
mpie->type = MPCT_ENTRY_LOCAL_INT;
mpie->int_type = INTENTRY_TYPE_EXTINT;
mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
INTENTRY_FLAGS_TRIGGER_CONFORM;
mpie->dst_apic_id = 0xff;
mpie->dst_apic_int = 0;
mpie++;
/* Hardcode LINT1 as NMI on all CPUs. */
memset(mpie, 0, sizeof(*mpie));
mpie->type = MPCT_ENTRY_LOCAL_INT;
mpie->int_type = INTENTRY_TYPE_NMI;
mpie->int_flags = INTENTRY_FLAGS_POLARITY_CONFORM |
INTENTRY_FLAGS_TRIGGER_CONFORM;
mpie->dst_apic_id = 0xff;
mpie->dst_apic_int = 1;
}
static void
mpt_build_bus_entries(bus_entry_ptr mpeb)
{
memset(mpeb, 0, sizeof(*mpeb));
mpeb->type = MPCT_ENTRY_BUS;
mpeb->bus_id = 0;
memcpy(mpeb->bus_type, MPE_BUSNAME_PCI, MPE_BUSNAME_LEN);
mpeb++;
memset(mpeb, 0, sizeof(*mpeb));
mpeb->type = MPCT_ENTRY_BUS;
mpeb->bus_id = 1;
memcpy(mpeb->bus_type, MPE_BUSNAME_ISA, MPE_BUSNAME_LEN);
}
static void
mpt_build_ioapic_entries(io_apic_entry_ptr mpei, int id)
{
memset(mpei, 0, sizeof(*mpei));
mpei->type = MPCT_ENTRY_IOAPIC;
mpei->apic_id = (uint8_t) id;
mpei->apic_version = IOAPIC_VERSION;
mpei->apic_flags = IOAPICENTRY_FLAG_EN;
mpei->apic_address = IOAPIC_PADDR;
}
static int
mpt_count_ioint_entries(void)
{
int bus, count;
count = 0;
for (bus = 0; bus <= PCI_BUSMAX; bus++)
count += pci_count_lintr(bus);
/*
* Always include entries for the first 16 pins along with a entry
* for each active PCI INTx pin.
*/
return (16 + count);
}
static void
mpt_generate_pci_int(int bus, int slot, int pin, UNUSED int pirq_pin,
int ioapic_irq, void *arg)
{
int_entry_ptr *mpiep, mpie;
mpiep = arg;
mpie = *mpiep;
memset(mpie, 0, sizeof(*mpie));
/*
* This is always after another I/O interrupt entry, so cheat
* and fetch the I/O APIC ID from the prior entry.
*/
mpie->type = MPCT_ENTRY_INT;
mpie->int_type = INTENTRY_TYPE_INT;
mpie->src_bus_id = (uint8_t) bus;
mpie->src_bus_irq = (uint8_t) (slot << 2 | (pin - 1));
mpie->dst_apic_id = mpie[-1].dst_apic_id;
mpie->dst_apic_int = (uint8_t) ioapic_irq;
*mpiep = mpie + 1;
}
static void
mpt_build_ioint_entries(int_entry_ptr mpie, int id)
{
int pin, bus;
/*
* The following config is taken from kernel mptable.c
* mptable_parse_default_config_ints(...), for now
* just use the default config, tweek later if needed.
*/
/* First, generate the first 16 pins. */
for (pin = 0; pin < 16; pin++) {
memset(mpie, 0, sizeof(*mpie));
mpie->type = MPCT_ENTRY_INT;
mpie->src_bus_id = 1;
mpie->dst_apic_id = (uint8_t) id;
/*
* All default configs route IRQs from bus 0 to the first 16
* pins of the first I/O APIC with an APIC ID of 2.
*/
mpie->dst_apic_int = (uint8_t) pin;
switch (pin) {
case 0:
/* Pin 0 is an ExtINT pin. */
mpie->int_type = INTENTRY_TYPE_EXTINT;
break;
case 2:
/* IRQ 0 is routed to pin 2. */
mpie->int_type = INTENTRY_TYPE_INT;
mpie->src_bus_irq = 0;
break;
case SCI_INT:
/* ACPI SCI is level triggered and active-lo. */
mpie->int_flags = INTENTRY_FLAGS_POLARITY_ACTIVELO |
INTENTRY_FLAGS_TRIGGER_LEVEL;
mpie->int_type = INTENTRY_TYPE_INT;
mpie->src_bus_irq = SCI_INT;
break;
default:
/* All other pins are identity mapped. */
mpie->int_type = INTENTRY_TYPE_INT;
mpie->src_bus_irq = (uint8_t) pin;
break;
}
mpie++;
}
/* Next, generate entries for any PCI INTx interrupts. */
for (bus = 0; bus <= PCI_BUSMAX; bus++)
pci_walk_lintr(bus, mpt_generate_pci_int, &mpie);
}
void
mptable_add_oemtbl(void *tbl, int tblsz)
{
oem_tbl_start = tbl;
oem_tbl_size = tblsz;
}
int
mptable_build(int ncpu)
{
mpcth_t mpch;
bus_entry_ptr mpeb;
io_apic_entry_ptr mpei;
proc_entry_ptr mpep;
mpfps_t mpfp;
int_entry_ptr mpie;
int ioints, bus;
char *curraddr;
char *startaddr;
startaddr = paddr_guest2host(MPTABLE_BASE, MPTABLE_MAX_LENGTH);
if (startaddr == NULL) {
fprintf(stderr, "mptable requires mapped mem\n");
return (ENOMEM);
}
/*
* There is no way to advertise multiple PCI hierarchies via MPtable
* so require that there is no PCI hierarchy with a non-zero bus
* number.
*/
for (bus = 1; bus <= PCI_BUSMAX; bus++) {
if (pci_bus_configured(bus)) {
fprintf(stderr, "MPtable is incompatible with "
"multiple PCI hierarchies.\r\n");
fprintf(stderr, "MPtable generation can be disabled "
"by passing the -Y option to bhyve(8).\r\n");
return (EINVAL);
}
}
curraddr = startaddr;
mpfp = (mpfps_t)curraddr;
mpt_build_mpfp(mpfp, MPTABLE_BASE);
curraddr += sizeof(*mpfp);
mpch = (mpcth_t)curraddr;
mpt_build_mpch(mpch);
curraddr += sizeof(*mpch);
mpep = (proc_entry_ptr)curraddr;
mpt_build_proc_entries(mpep, ncpu);
curraddr += sizeof(*mpep) * ((uint64_t) ncpu);
mpch->entry_count += ncpu;
mpeb = (bus_entry_ptr) curraddr;
mpt_build_bus_entries(mpeb);
curraddr += sizeof(*mpeb) * MPE_NUM_BUSES;
mpch->entry_count += MPE_NUM_BUSES;
mpei = (io_apic_entry_ptr)curraddr;
mpt_build_ioapic_entries(mpei, 0);
curraddr += sizeof(*mpei);
mpch->entry_count++;
mpie = (int_entry_ptr) curraddr;
ioints = mpt_count_ioint_entries();
mpt_build_ioint_entries(mpie, 0);
curraddr += sizeof(*mpie) * ((uint64_t) ioints);
mpch->entry_count += ioints;
mpie = (int_entry_ptr)curraddr;
mpt_build_localint_entries(mpie);
curraddr += sizeof(*mpie) * MPEII_NUM_LOCAL_IRQ;
mpch->entry_count += MPEII_NUM_LOCAL_IRQ;
if (oem_tbl_start) {
mpch->oem_table_pointer =
(uint32_t) (curraddr - startaddr + MPTABLE_BASE);
mpch->oem_table_size = (uint16_t) oem_tbl_size;
memcpy(curraddr, oem_tbl_start, oem_tbl_size);
}
mpch->base_table_length = (uint16_t) (curraddr - (char *)mpch);
mpch->checksum = mpt_compute_checksum(mpch, mpch->base_table_length);
return (0);
}

2399
vendor/github.com/docker/hyperkit/src/lib/pci_ahci.c generated vendored Normal file

File diff suppressed because it is too large Load Diff

2138
vendor/github.com/docker/hyperkit/src/lib/pci_emul.c generated vendored Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,68 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <xhyve/support/misc.h>
#include <xhyve/pci_emul.h>
static int
pci_hostbridge_init(struct pci_devinst *pi, UNUSED char *opts)
{
/* config space */
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1275); /* NetApp */
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x1275); /* NetApp */
pci_set_cfgdata8(pi, PCIR_HDRTYPE, PCIM_HDRTYPE_NORMAL);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_HOST);
pci_emul_add_pciecap(pi, PCIEM_TYPE_ROOT_PORT);
return (0);
}
static int
pci_amd_hostbridge_init(struct pci_devinst *pi, char *opts)
{
(void) pci_hostbridge_init(pi, opts);
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x1022); /* AMD */
pci_set_cfgdata16(pi, PCIR_DEVICE, 0x7432); /* made up */
return (0);
}
static struct pci_devemu pci_de_amd_hostbridge = {
.pe_emu = "amd_hostbridge",
.pe_init = pci_amd_hostbridge_init,
};
PCI_EMUL_SET(pci_de_amd_hostbridge);
static struct pci_devemu pci_de_hostbridge = {
.pe_emu = "hostbridge",
.pe_init = pci_hostbridge_init,
};
PCI_EMUL_SET(pci_de_hostbridge);

339
vendor/github.com/docker/hyperkit/src/lib/pci_irq.c generated vendored Normal file
View File

@@ -0,0 +1,339 @@
/*-
* Copyright (c) 2014 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdbool.h>
#include <stdlib.h>
#include <pthread.h>
#include <errno.h>
#include <assert.h>
#include <xhyve/support/misc.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/acpi.h>
#include <xhyve/inout.h>
#include <xhyve/pci_emul.h>
#include <xhyve/pci_irq.h>
#include <xhyve/pci_lpc.h>
/*
* Implement an 8 pin PCI interrupt router compatible with the router
* present on Intel's ICH10 chip.
*/
/* Fields in each PIRQ register. */
#define PIRQ_DIS 0x80
#define PIRQ_IRQ 0x0f
/* Only IRQs 3-7, 9-12, and 14-15 are permitted. */
#define PERMITTED_IRQS 0xdef8
#define IRQ_PERMITTED(irq) (((1U << (irq)) & PERMITTED_IRQS) != 0)
/* IRQ count to disable an IRQ. */
#define IRQ_DISABLED 0xff
static struct pirq {
uint8_t reg;
int use_count;
int active_count;
pthread_mutex_t lock;
} pirqs[8];
static u_char irq_counts[16];
static int pirq_cold = 1;
/*
* Returns true if this pin is enabled with a valid IRQ. Setting the
* register to a reserved IRQ causes interrupts to not be asserted as
* if the pin was disabled.
*/
static bool
pirq_valid_irq(int reg)
{
if (reg & PIRQ_DIS)
return (false);
return (IRQ_PERMITTED(reg & PIRQ_IRQ));
}
uint8_t
pirq_read(int pin)
{
assert(pin > 0 && (unsigned)pin <= nitems(pirqs));
return (pirqs[pin - 1].reg);
}
void
pirq_write(int pin, uint8_t val)
{
struct pirq *pirq;
assert(pin > 0 && (unsigned)pin <= nitems(pirqs));
pirq = &pirqs[pin - 1];
pthread_mutex_lock(&pirq->lock);
if (pirq->reg != (val & (PIRQ_DIS | PIRQ_IRQ))) {
if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
xh_vm_isa_deassert_irq(pirq->reg & PIRQ_IRQ, -1);
pirq->reg = val & (PIRQ_DIS | PIRQ_IRQ);
if (pirq->active_count != 0 && pirq_valid_irq(pirq->reg))
xh_vm_isa_assert_irq(pirq->reg & PIRQ_IRQ, -1);
}
pthread_mutex_unlock(&pirq->lock);
}
void
pci_irq_reserve(int irq)
{
assert(irq >= 0 && (unsigned)irq < nitems(irq_counts));
assert(pirq_cold);
assert(irq_counts[irq] == 0 || irq_counts[irq] == IRQ_DISABLED);
irq_counts[irq] = IRQ_DISABLED;
}
void
pci_irq_use(int irq)
{
assert(irq >= 0 && (unsigned)irq < nitems(irq_counts));
assert(pirq_cold);
assert(irq_counts[irq] != IRQ_DISABLED);
irq_counts[irq]++;
}
void
pci_irq_init(void)
{
unsigned i;
for (i = 0; i < nitems(pirqs); i++) {
pirqs[i].reg = PIRQ_DIS;
pirqs[i].use_count = 0;
pirqs[i].active_count = 0;
pthread_mutex_init(&pirqs[i].lock, NULL);
}
for (i = 0; i < nitems(irq_counts); i++) {
if (IRQ_PERMITTED(i))
irq_counts[i] = 0;
else
irq_counts[i] = IRQ_DISABLED;
}
}
void
pci_irq_assert(struct pci_devinst *pi)
{
struct pirq *pirq;
if (pi->pi_lintr.pirq_pin > 0) {
assert((unsigned)pi->pi_lintr.pirq_pin <= nitems(pirqs));
pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
pthread_mutex_lock(&pirq->lock);
pirq->active_count++;
if (pirq->active_count == 1 && pirq_valid_irq(pirq->reg)) {
xh_vm_isa_assert_irq(pirq->reg & PIRQ_IRQ, pi->pi_lintr.ioapic_irq);
pthread_mutex_unlock(&pirq->lock);
return;
}
pthread_mutex_unlock(&pirq->lock);
}
xh_vm_ioapic_assert_irq(pi->pi_lintr.ioapic_irq);
}
void
pci_irq_deassert(struct pci_devinst *pi)
{
struct pirq *pirq;
if (pi->pi_lintr.pirq_pin > 0) {
assert((unsigned)pi->pi_lintr.pirq_pin <= nitems(pirqs));
pirq = &pirqs[pi->pi_lintr.pirq_pin - 1];
pthread_mutex_lock(&pirq->lock);
pirq->active_count--;
if (pirq->active_count == 0 && pirq_valid_irq(pirq->reg)) {
xh_vm_isa_deassert_irq(pirq->reg & PIRQ_IRQ,
pi->pi_lintr.ioapic_irq);
pthread_mutex_unlock(&pirq->lock);
return;
}
pthread_mutex_unlock(&pirq->lock);
}
xh_vm_ioapic_deassert_irq(pi->pi_lintr.ioapic_irq);
}
int
pirq_alloc_pin(void)
{
int best_count, best_irq, best_pin, irq, pin;
pirq_cold = 0;
/* First, find the least-used PIRQ pin. */
best_pin = 0;
best_count = pirqs[0].use_count;
for (pin = 1; (unsigned)pin < nitems(pirqs); pin++) {
if (pirqs[pin].use_count < best_count) {
best_pin = pin;
best_count = pirqs[pin].use_count;
}
}
pirqs[best_pin].use_count++;
/* Second, route this pin to an IRQ. */
if (pirqs[best_pin].reg == PIRQ_DIS) {
best_irq = -1;
best_count = 0;
for (irq = 0; (unsigned)irq < nitems(irq_counts); irq++) {
if (irq_counts[irq] == IRQ_DISABLED)
continue;
if (best_irq == -1 || irq_counts[irq] < best_count) {
best_irq = irq;
best_count = irq_counts[irq];
}
}
assert(best_irq >= 0);
irq_counts[best_irq]++;
pirqs[best_pin].reg = (uint8_t)best_irq;
xh_vm_isa_set_irq_trigger(best_irq, LEVEL_TRIGGER);
}
return (best_pin + 1);
}
int
pirq_irq(int pin)
{
assert(pin > 0 && (unsigned)pin <= nitems(pirqs));
return (pirqs[pin - 1].reg & PIRQ_IRQ);
}
/* XXX: Generate $PIR table. */
static void
pirq_dsdt(void)
{
char *irq_prs, *old;
int irq, pin;
irq_prs = NULL;
for (irq = 0; (unsigned)irq < nitems(irq_counts); irq++) {
if (!IRQ_PERMITTED(irq))
continue;
if (irq_prs == NULL)
asprintf(&irq_prs, "%d", irq);
else {
old = irq_prs;
asprintf(&irq_prs, "%s,%d", old, irq);
free(old);
}
}
/*
* A helper method to validate a link register's value. This
* duplicates pirq_valid_irq().
*/
dsdt_line("");
dsdt_line("Method (PIRV, 1, NotSerialized)");
dsdt_line("{");
dsdt_line(" If (And (Arg0, 0x%02X))", PIRQ_DIS);
dsdt_line(" {");
dsdt_line(" Return (0x00)");
dsdt_line(" }");
dsdt_line(" And (Arg0, 0x%02X, Local0)", PIRQ_IRQ);
dsdt_line(" If (LLess (Local0, 0x03))");
dsdt_line(" {");
dsdt_line(" Return (0x00)");
dsdt_line(" }");
dsdt_line(" If (LEqual (Local0, 0x08))");
dsdt_line(" {");
dsdt_line(" Return (0x00)");
dsdt_line(" }");
dsdt_line(" If (LEqual (Local0, 0x0D))");
dsdt_line(" {");
dsdt_line(" Return (0x00)");
dsdt_line(" }");
dsdt_line(" Return (0x01)");
dsdt_line("}");
for (pin = 0; (unsigned)pin < nitems(pirqs); pin++) {
dsdt_line("");
dsdt_line("Device (LNK%c)", 'A' + pin);
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0C0F\"))");
dsdt_line(" Name (_UID, 0x%02X)", pin + 1);
dsdt_line(" Method (_STA, 0, NotSerialized)");
dsdt_line(" {");
dsdt_line(" If (PIRV (PIR%c))", 'A' + pin);
dsdt_line(" {");
dsdt_line(" Return (0x0B)");
dsdt_line(" }");
dsdt_line(" Else");
dsdt_line(" {");
dsdt_line(" Return (0x09)");
dsdt_line(" }");
dsdt_line(" }");
dsdt_line(" Name (_PRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_line(" IRQ (Level, ActiveLow, Shared, )");
dsdt_line(" {%s}", irq_prs);
dsdt_line(" })");
dsdt_line(" Name (CB%02X, ResourceTemplate ()", pin + 1);
dsdt_line(" {");
dsdt_line(" IRQ (Level, ActiveLow, Shared, )");
dsdt_line(" {}");
dsdt_line(" })");
dsdt_line(" CreateWordField (CB%02X, 0x01, CIR%c)",
pin + 1, 'A' + pin);
dsdt_line(" Method (_CRS, 0, NotSerialized)");
dsdt_line(" {");
dsdt_line(" And (PIR%c, 0x%02X, Local0)", 'A' + pin,
PIRQ_DIS | PIRQ_IRQ);
dsdt_line(" If (PIRV (Local0))");
dsdt_line(" {");
dsdt_line(" ShiftLeft (0x01, Local0, CIR%c)", 'A' + pin);
dsdt_line(" }");
dsdt_line(" Else");
dsdt_line(" {");
dsdt_line(" Store (0x00, CIR%c)", 'A' + pin);
dsdt_line(" }");
dsdt_line(" Return (CB%02X)", pin + 1);
dsdt_line(" }");
dsdt_line(" Method (_DIS, 0, NotSerialized)");
dsdt_line(" {");
dsdt_line(" Store (0x80, PIR%c)", 'A' + pin);
dsdt_line(" }");
dsdt_line(" Method (_SRS, 1, NotSerialized)");
dsdt_line(" {");
dsdt_line(" CreateWordField (Arg0, 0x01, SIR%c)", 'A' + pin);
dsdt_line(" FindSetRightBit (SIR%c, Local0)", 'A' + pin);
dsdt_line(" Store (Decrement (Local0), PIR%c)", 'A' + pin);
dsdt_line(" }");
dsdt_line("}");
}
free(irq_prs);
}
LPC_DSDT(pirq_dsdt);

425
vendor/github.com/docker/hyperkit/src/lib/pci_lpc.c generated vendored Normal file
View File

@@ -0,0 +1,425 @@
/*-
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* Copyright (c) 2013 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/acpi.h>
#include <xhyve/inout.h>
#include <xhyve/dbgport.h>
#include <xhyve/pci_emul.h>
#include <xhyve/pci_irq.h>
#include <xhyve/pci_lpc.h>
#include <xhyve/uart_emul.h>
#define IO_ICU1 0x20
#define IO_ICU2 0xA0
SET_DECLARE(lpc_dsdt_set, struct lpc_dsdt);
SET_DECLARE(lpc_sysres_set, struct lpc_sysres);
#define ELCR_PORT 0x4d0
SYSRES_IO(ELCR_PORT, 2);
#define IO_TIMER1_PORT 0x40
#define NMISC_PORT 0x61
SYSRES_IO(NMISC_PORT, 1);
static struct pci_devinst *lpc_bridge;
#define LPC_UART_NUM 2
static struct lpc_uart_softc {
struct uart_softc *uart_softc;
const char *opts;
const char *name;
int iobase;
int irq;
int enabled;
} lpc_uart_softc[LPC_UART_NUM];
static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
* For e.g. "com1,stdio"
*/
int
lpc_device_parse(const char *opts)
{
int unit, error;
char *str, *cpy, *lpcdev;
error = -1;
str = cpy = strdup(opts);
lpcdev = strsep(&str, ",");
if (lpcdev != NULL) {
for (unit = 0; unit < LPC_UART_NUM; unit++) {
if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
lpc_uart_softc[unit].opts = str;
lpc_uart_softc[unit].name = lpc_uart_names[unit];
error = 0;
goto done;
}
}
}
done:
if (error)
free(cpy);
return (error);
}
static void
lpc_uart_intr_assert(void *arg)
{
struct lpc_uart_softc *sc = arg;
assert(sc->irq >= 0);
xh_vm_isa_pulse_irq(sc->irq, sc->irq);
}
static void
lpc_uart_intr_deassert(UNUSED void *arg)
{
/*
* The COM devices on the LPC bus generate edge triggered interrupts,
* so nothing more to do here.
*/
}
static int
lpc_uart_io_handler(UNUSED int vcpu, int in, int port, int bytes, uint32_t *eax,
void *arg)
{
int offset;
struct lpc_uart_softc *sc = arg;
offset = port - sc->iobase;
switch (bytes) {
case 1:
if (in)
*eax = uart_read(sc->uart_softc, offset);
else
uart_write(sc->uart_softc, offset, (uint8_t)*eax);
break;
case 2:
if (in) {
*eax = (uint32_t)uart_read(sc->uart_softc, offset);
*eax |= (uint32_t)(uart_read(sc->uart_softc, offset + 1) << 8);
} else {
uart_write(sc->uart_softc, offset, (uint8_t)*eax);
uart_write(sc->uart_softc, offset + 1, (uint8_t)(*eax >> 8));
}
break;
default:
return (-1);
}
return (0);
}
static int
lpc_init(void)
{
struct lpc_uart_softc *sc;
struct inout_port iop;
int unit, error;
/* COM1 and COM2 */
for (unit = 0; unit < LPC_UART_NUM; unit++) {
sc = &lpc_uart_softc[unit];
if (uart_legacy_alloc(unit, &sc->iobase, &sc->irq) != 0) {
fprintf(stderr, "Unable to allocate resources for "
"LPC device %s\n", sc->name);
return (-1);
}
pci_irq_reserve(sc->irq);
sc->uart_softc = uart_init(lpc_uart_intr_assert,
lpc_uart_intr_deassert, sc);
if (uart_set_backend(sc->uart_softc, sc->opts, sc->name) != 0) {
fprintf(stderr, "Unable to initialize backend '%s' "
"for LPC device %s\n", sc->opts, sc->name);
return (-1);
}
bzero(&iop, sizeof(struct inout_port));
iop.name = sc->name;
iop.port = sc->iobase;
iop.size = UART_IO_BAR_SIZE;
iop.flags = IOPORT_F_INOUT;
iop.handler = lpc_uart_io_handler;
iop.arg = sc;
error = register_inout(&iop);
assert(error == 0);
sc->enabled = 1;
}
return (0);
}
static void
pci_lpc_write_dsdt(struct pci_devinst *pi)
{
struct lpc_dsdt **ldpp, *ldp;
dsdt_line("");
dsdt_line("Device (ISA)");
dsdt_line("{");
dsdt_line(" Name (_ADR, 0x%04X%04X)", pi->pi_slot, pi->pi_func);
dsdt_line(" OperationRegion (LPCR, PCI_Config, 0x00, 0x100)");
dsdt_line(" Field (LPCR, AnyAcc, NoLock, Preserve)");
dsdt_line(" {");
dsdt_line(" Offset (0x60),");
dsdt_line(" PIRA, 8,");
dsdt_line(" PIRB, 8,");
dsdt_line(" PIRC, 8,");
dsdt_line(" PIRD, 8,");
dsdt_line(" Offset (0x68),");
dsdt_line(" PIRE, 8,");
dsdt_line(" PIRF, 8,");
dsdt_line(" PIRG, 8,");
dsdt_line(" PIRH, 8");
dsdt_line(" }");
dsdt_line("");
dsdt_indent(1);
SET_FOREACH(ldpp, lpc_dsdt_set) {
ldp = *ldpp;
ldp->handler();
}
dsdt_line("");
dsdt_line("Device (PIC)");
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0000\"))");
dsdt_line(" Name (_CRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_indent(2);
dsdt_fixed_ioport(IO_ICU1, 2);
dsdt_fixed_ioport(IO_ICU2, 2);
dsdt_fixed_irq(2);
dsdt_unindent(2);
dsdt_line(" })");
dsdt_line("}");
dsdt_line("");
dsdt_line("Device (TIMR)");
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0100\"))");
dsdt_line(" Name (_CRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_indent(2);
dsdt_fixed_ioport(IO_TIMER1_PORT, 4);
dsdt_fixed_irq(0);
dsdt_unindent(2);
dsdt_line(" })");
dsdt_line("}");
dsdt_unindent(1);
dsdt_line("}");
}
static void
pci_lpc_sysres_dsdt(void)
{
struct lpc_sysres **lspp, *lsp;
dsdt_line("");
dsdt_line("Device (SIO)");
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0C02\"))");
dsdt_line(" Name (_CRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_indent(2);
SET_FOREACH(lspp, lpc_sysres_set) {
lsp = *lspp;
switch (lsp->type) {
case LPC_SYSRES_IO:
dsdt_fixed_ioport((uint16_t)lsp->base,
(uint16_t)lsp->length);
break;
case LPC_SYSRES_MEM:
dsdt_fixed_mem32(lsp->base, lsp->length);
break;
}
}
dsdt_unindent(2);
dsdt_line(" })");
dsdt_line("}");
}
LPC_DSDT(pci_lpc_sysres_dsdt);
static void
pci_lpc_uart_dsdt(void)
{
struct lpc_uart_softc *sc;
int unit;
for (unit = 0; unit < LPC_UART_NUM; unit++) {
sc = &lpc_uart_softc[unit];
if (!sc->enabled)
continue;
dsdt_line("");
dsdt_line("Device (%s)", lpc_uart_names[unit]);
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0501\"))");
dsdt_line(" Name (_UID, %d)", unit + 1);
dsdt_line(" Name (_CRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_indent(2);
dsdt_fixed_ioport((uint16_t)sc->iobase, UART_IO_BAR_SIZE);
dsdt_fixed_irq((uint8_t) sc->irq);
dsdt_unindent(2);
dsdt_line(" })");
dsdt_line("}");
}
}
LPC_DSDT(pci_lpc_uart_dsdt);
static int
pci_lpc_cfgwrite(UNUSED int vcpu, struct pci_devinst *pi, int coff, int bytes,
uint32_t val)
{
int pirq_pin;
if (bytes == 1) {
pirq_pin = 0;
if (coff >= 0x60 && coff <= 0x63)
pirq_pin = coff - 0x60 + 1;
if (coff >= 0x68 && coff <= 0x6b)
pirq_pin = coff - 0x68 + 5;
if (pirq_pin != 0) {
pirq_write(pirq_pin, (uint8_t)val);
pci_set_cfgdata8(pi, coff, pirq_read(pirq_pin));
return (0);
}
}
return (-1);
}
static void
pci_lpc_write(UNUSED int vcpu, UNUSED struct pci_devinst *pi, UNUSED int baridx,
UNUSED uint64_t offset, UNUSED int size, UNUSED uint64_t value)
{
}
static uint64_t
pci_lpc_read(UNUSED int vcpu, UNUSED struct pci_devinst *pi, UNUSED int baridx,
UNUSED uint64_t offset, UNUSED int size)
{
return (0);
}
#define LPC_DEV 0x7000
#define LPC_VENDOR 0x8086
static int
pci_lpc_init(struct pci_devinst *pi, UNUSED char *opts)
{
/*
* Do not allow more than one LPC bridge to be configured.
*/
if (lpc_bridge != NULL) {
fprintf(stderr, "Only one LPC bridge is allowed.\n");
return (-1);
}
/*
* Enforce that the LPC can only be configured on bus 0. This
* simplifies the ACPI DSDT because it can provide a decode for
* all legacy i/o ports behind bus 0.
*/
if (pi->pi_bus != 0) {
fprintf(stderr, "LPC bridge can be present only on bus 0.\n");
return (-1);
}
if (lpc_init() != 0)
return (-1);
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, LPC_DEV);
pci_set_cfgdata16(pi, PCIR_VENDOR, LPC_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_BRIDGE);
pci_set_cfgdata8(pi, PCIR_SUBCLASS, PCIS_BRIDGE_ISA);
lpc_bridge = pi;
return (0);
}
char *
lpc_pirq_name(int pin)
{
char *name;
if (lpc_bridge == NULL)
return (NULL);
asprintf(&name, "\\_SB.PC00.ISA.LNK%c,", 'A' + pin - 1);
return (name);
}
void
lpc_pirq_routed(void)
{
int pin;
if (lpc_bridge == NULL)
return;
for (pin = 0; pin < 4; pin++)
pci_set_cfgdata8(lpc_bridge, 0x60 + pin, pirq_read(pin + 1));
for (pin = 0; pin < 4; pin++)
pci_set_cfgdata8(lpc_bridge, 0x68 + pin, pirq_read(pin + 5));
}
static struct pci_devemu pci_de_lpc = {
.pe_emu = "lpc",
.pe_init = pci_lpc_init,
.pe_write_dsdt = pci_lpc_write_dsdt,
.pe_cfgwrite = pci_lpc_cfgwrite,
.pe_barwrite = pci_lpc_write,
.pe_barread = pci_lpc_read
};
PCI_EMUL_SET(pci_de_lpc);

119
vendor/github.com/docker/hyperkit/src/lib/pci_uart.c generated vendored Normal file
View File

@@ -0,0 +1,119 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdio.h>
#include <xhyve/support/misc.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/uart_emul.h>
/*
* Pick a PCI vid/did of a chip with a single uart at
* BAR0, that most versions of FreeBSD can understand:
* Siig CyberSerial 1-port.
*/
#define COM_VENDOR 0x131f
#define COM_DEV 0x2000
static void
pci_uart_intr_assert(void *arg)
{
struct pci_devinst *pi = arg;
pci_lintr_assert(pi);
}
static void
pci_uart_intr_deassert(void *arg)
{
struct pci_devinst *pi = arg;
pci_lintr_deassert(pi);
}
static void
pci_uart_write(UNUSED int vcpu, struct pci_devinst *pi, int baridx, uint64_t offset,
int size, uint64_t value)
{
assert(baridx == 0);
assert(size == 1);
uart_write(pi->pi_arg, (int)offset, (uint8_t)value);
}
static uint64_t
pci_uart_read(UNUSED int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size)
{
uint8_t val;
assert(baridx == 0);
assert(size == 1);
val = uart_read(pi->pi_arg, (int)offset);
return (val);
}
static int
pci_uart_init(struct pci_devinst *pi, char *opts)
{
struct uart_softc *sc;
char *name;
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, UART_IO_BAR_SIZE);
pci_lintr_request(pi);
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, COM_DEV);
pci_set_cfgdata16(pi, PCIR_VENDOR, COM_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM);
sc = uart_init(pci_uart_intr_assert, pci_uart_intr_deassert, pi);
pi->pi_arg = sc;
asprintf(&name, "pci uart at %d:%d", pi->pi_slot, pi->pi_func);
if (uart_set_backend(sc, opts, name) != 0) {
fprintf(stderr, "Unable to initialize backend '%s' for %s\n", opts, name);
free(name);
return (-1);
}
free(name);
return (0);
}
static struct pci_devemu pci_de_com = {
.pe_emu = "uart",
.pe_init = pci_uart_init,
.pe_barwrite = pci_uart_write,
.pe_barread = pci_uart_read
};
PCI_EMUL_SET(pci_de_com);

View File

@@ -0,0 +1,546 @@
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/uio.h>
#include <arpa/inet.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/virtio.h>
#define VIRTIO_9P_MOUNT_TAG 1
static int pci_vt9p_debug = 0;
#define DPRINTF(params) if (pci_vt9p_debug) printf params
/* XXX issues with larger buffers elsewhere in stack */
#define BUFSIZE (1 << 18)
#define MAXDESC (BUFSIZE / 4096 + 4)
#define VT9P_RINGSZ (BUFSIZE / 4096 * 4)
struct virtio_9p_config {
uint16_t tag_len;
uint8_t tag[256];
};
/*
* Per-device softc
*/
struct pci_vt9p_out {
struct iovec wiov[MAXDESC];
struct vqueue_info *vq;
int inuse;
uint16_t tag;
uint16_t idx;
uint16_t otag;
};
struct pci_vt9p_softc {
struct virtio_softc v9sc_vs;
struct vqueue_info v9sc_vq;
pthread_mutex_t v9sc_mtx;
pthread_mutex_t v9sc_mtx2;
pthread_t v9sc_thread;
struct virtio_9p_config v9sc_cfg;
struct pci_vt9p_out v9sc_out[VT9P_RINGSZ];
/* -1 means not connected yet */
int v9sc_sock;
int v9sc_inflight;
int port;
char *path;
};
static void pci_vt9p_reset(void *);
static void pci_vt9p_notify(void *, struct vqueue_info *);
static int pci_vt9p_cfgread(void *, int, int, uint32_t *);
static int pci_vt9p_cfgwrite(void *, int, int, uint32_t);
static void pci_vt9p_lazy_initialise_socket(struct pci_vt9p_softc *sc);
static void *pci_vt9p_thread(void *vsc);
static struct virtio_consts vt9p_vi_consts = {
"vt9p", /* our name */
1, /* we support 1 virtqueue */
0, /* config reg size */
pci_vt9p_reset, /* reset */
pci_vt9p_notify, /* device-wide qnotify */
pci_vt9p_cfgread, /* read virtio config */
pci_vt9p_cfgwrite, /* write virtio config */
NULL, /* apply negotiated features */
VIRTIO_9P_MOUNT_TAG, /* our capabilities */
};
static void
pci_vt9p_reset(void *vsc)
{
struct pci_vt9p_softc *sc;
sc = vsc;
DPRINTF(("vt9p: device reset requested !\n"));
vi_reset_dev(&sc->v9sc_vs);
}
/* Used to lazily initialise the socket */
static void
pci_vt9p_lazy_initialise_socket(struct pci_vt9p_softc *sc)
{
struct sockaddr_in sa_in;
struct sockaddr_un sa_un;
int so;
socklen_t sol = (socklen_t) sizeof(int);
if (sc->v9sc_sock != -1)
return;
sa_in.sin_family = AF_INET;
sa_in.sin_port = htons(sc->port);
sa_in.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
sa_un.sun_family = AF_UNIX;
memset(&sa_un, 0, sizeof(sa_un));
strncpy(sa_un.sun_path, sc->path, sizeof(sa_un.sun_path)-1);
int domain = (sc->port != -1)?AF_INET:AF_UNIX;
struct sockaddr *sa = (sc->port != -1)?(struct sockaddr*)&sa_in:(struct sockaddr*)&sa_un;
size_t sa_len = (sc->port != -1)?sizeof(sa_in):sizeof(sa_un);
int max_attempts = 200; /* 200 * 50ms = 10s */
do {
sc->v9sc_sock = socket(domain, SOCK_STREAM, 0);
if (sc->v9sc_sock == -1) {
if (sc->port != -1) {
fprintf(stderr, "virtio-9p: failed to connect to port %d: out of file descriptors\n", sc->port);
} else {
fprintf(stderr, "virtio-9p: failed to connect to path %s: out of file descriptors\n", sc->path);
}
/* The device won't work */
return;
}
if (connect(sc->v9sc_sock, sa, (socklen_t)sa_len) == -1) {
close(sc->v9sc_sock);
sc->v9sc_sock = -1;
usleep(50000);
}
} while ((sc->v9sc_sock == -1) && (--max_attempts > 0));
if (sc->v9sc_sock == -1) {
if (sc->port != -1) {
fprintf(stderr, "virtio-9p: failed to connect to port %d\n", sc->port);
} else {
fprintf(stderr, "virtio-9p: failed to connect to path %s\n", sc->path);
}
/* The device won't work */
}
if (getsockopt(sc->v9sc_sock, SOL_SOCKET, SO_SNDBUF, &so, &sol) != -1) {
if (so < 2 * BUFSIZE) {
so = 2 * BUFSIZE;
(void)setsockopt(sc->v9sc_sock, SOL_SOCKET, SO_SNDBUF, &so, sol);
(void)setsockopt(sc->v9sc_sock, SOL_SOCKET, SO_RCVBUF, &so, sol);
}
}
if (pthread_create(&sc->v9sc_thread, NULL, pci_vt9p_thread, sc) == -1) {
perror("pthread_create");
/* The device won't work */
}
}
static void
pci_vt9p_notify(void *vsc, struct vqueue_info *vq)
{
struct iovec iov[MAXDESC];
uint16_t flags[MAXDESC];
struct pci_vt9p_softc *sc = vsc;
uint16_t idx;
ssize_t n;
int nvec, i, freevec;
struct iovec *wiov;
int nread, nwrite;
size_t readbytes;
uint16_t tag;
uint32_t len;
uint8_t command;
uint16_t otag = 0;
int used = 0;
sc = vsc;
pci_vt9p_lazy_initialise_socket(sc);
/* will be a socket here */
if (sc->v9sc_sock < 0) {
DPRINTF(("vt9p socket invalid\r\n"));
vq_endchains(vq, 0);
return;
}
nvec = vq_getchain(vq, &idx, iov, MAXDESC, flags);
if (nvec == -1) {
DPRINTF(("vt9p bad descriptors\r\n"));
return; /* what to do? */
}
if (nvec == 0) {
DPRINTF(("vt9p got all the descriptors\r\n"));
return;
}
DPRINTF(("vt9p got %d descriptors\r\n", nvec));
wiov = NULL;
nwrite = 0;
nread = 0;
readbytes = 0;
tag = 0;
freevec = -1;
DPRINTF(("vtrnd: vt9p_notify(): %d count %d\r\n", (int)idx, nvec));
for (i = 0; i < nvec; i++) {
DPRINTF(("vt9p iovec %d len %d\r\n", i, (int)iov[i].iov_len));
if (flags[i] & VRING_DESC_F_WRITE) {
DPRINTF(("writeable\r\n"));
nwrite++;
} else {
if (nwrite == 0) {
nread++;
readbytes += iov[i].iov_len;
DPRINTF(("readable\r\n"));
} else {
DPRINTF(("ignoring readable buffers after writeable ones\r\n"));
}
}
if (wiov == NULL && (flags[i] & VRING_DESC_F_WRITE)) {
wiov = &iov[i];
DPRINTF(("vt9p wiov is %p\r\n", (void*)wiov));
}
}
/* do this properly */
if (iov[0].iov_len >= 7) {
uint8_t *ptr = (uint8_t *)iov[0].iov_base;
len = (uint32_t)ptr[0] | ((uint32_t)ptr[1] << 8) | ((uint32_t)ptr[2] << 16) | ((uint32_t)ptr[3] << 24);
command = ptr[4];
tag = (uint16_t)((uint16_t)ptr[5] | ((uint16_t)ptr[6] << 8));
DPRINTF(("vt9p len %d\r\n", (int)len));
DPRINTF(("vt9p command %d\r\n", (int)command));
DPRINTF(("vt9p tag %d\r\n", (int)tag));
otag = 0;
if (command == 108 && iov[0].iov_len >= 9) {
otag = (uint16_t)((uint16_t)ptr[7] | ((uint16_t)ptr[8] << 8));
DPRINTF(("TFlush otag %d\r\n", (int)otag));
}
/* Linux is buggy with writes over 1k, has a buggy zero copy codepath, fix up */
if (command == 118 && iov[0].iov_len >= 23) {
uint32_t wlen = (uint32_t)ptr[19] | ((uint32_t)ptr[20] << 8) | ((uint32_t)ptr[21] << 16) | ((uint32_t)ptr[22] << 24);
DPRINTF(("Twrite wlen %d readbytes %d len %d\r\n", (int)wlen, (int)readbytes, (int)len));
if (readbytes != len) {
DPRINTF(("FIXUP! len\n"));
ptr[0] = (uint8_t)(readbytes & 0xff);
ptr[1] = (uint8_t)((readbytes >> 8) & 0xff);
ptr[2] = (uint8_t)((readbytes >> 16) & 0xff);
ptr[3] = (uint8_t)((readbytes >> 24) & 0xff);
}
/* XXX not sure seeing this now */
if (wlen != readbytes - 23) {
DPRINTF(("FIXUP! wlen\n"));
wlen = (uint32_t) (readbytes - 23);
ptr[19] = (uint8_t)(wlen & 0xff);
ptr[20] = (uint8_t)((wlen >> 8) & 0xff);
ptr[21] = (uint8_t)((wlen >> 16) & 0xff);
ptr[22] = (uint8_t)((wlen >> 24) & 0xff);
}
}
} else {
DPRINTF(("vt9p oops split iovec for command - do this properly\r\n"));
}
if (nwrite == 0) {
DPRINTF(("Nowhere to write to!!\r\n"));
}
/* do something with request! */
pthread_mutex_lock(&sc->v9sc_mtx2);
sc->v9sc_inflight++;
for (i = 0; i < VT9P_RINGSZ; i++) {
if (sc->v9sc_out[i].inuse == 1) {
used++;
continue;
}
sc->v9sc_out[i].inuse = 1;
memcpy(sc->v9sc_out[i].wiov, wiov, (size_t)(sizeof(struct iovec) * (size_t)nwrite));
sc->v9sc_out[i].vq = vq;
sc->v9sc_out[i].tag = tag;
sc->v9sc_out[i].idx = idx;
sc->v9sc_out[i].otag = otag;
break;
}
if (used == VT9P_RINGSZ) {
fprintf(stderr, "virtio-9p: Ring full!\n");
_exit(1);
}
pthread_mutex_unlock(&sc->v9sc_mtx2);
i = 0;
while (readbytes) {
n = writev(sc->v9sc_sock, &iov[i], nread);
if (n <= 0) {
fprintf(stderr, "virtio-9p: unexpected EOF writing to server-- did the 9P server crash?\n");
/* Fatal error, crash VM, let us be restarted */
_exit(1);
}
DPRINTF(("vt9p: wrote to sock %d bytes\r\n", (int)n));
readbytes -= (size_t)n;
if (readbytes != 0) {
while ((size_t)n >= iov[i].iov_len) {
n -= iov[i].iov_len;
i++;
}
iov[i].iov_len -= (size_t) n;
iov[i].iov_base = (char *) iov[i].iov_base + n;
}
}
}
static void *
pci_vt9p_thread(void *vsc)
{
struct pci_vt9p_softc *sc = vsc;
ssize_t ret;
size_t n;
size_t minlen = 7;
uint32_t len;
uint16_t tag, otag;
uint8_t command;
uint8_t *ptr;
int i, ii, j;
struct iovec *wiov;
uint8_t *buf;
char ident[16];
snprintf(ident, sizeof(ident), "9p:%s", sc->v9sc_cfg.tag);
pthread_setname_np(ident);
buf = calloc(1, BUFSIZE);
if (! buf) {
fprintf(stderr, "virtio-p9: memory allocation failed\n");
_exit(1);
}
while (1) {
ptr = buf;
n = 0;
while (n < minlen) {
ret = read(sc->v9sc_sock, ptr, minlen - n);
if (ret <= 0) {
fprintf(stderr, "virtio-9p: unexpected EOF reading -- did the 9P server crash?\n");
/* Fatal error, crash VM, let us be restarted */
_exit(1);
}
n += (size_t) ret;
ptr += ret;
}
len = (uint32_t)buf[0] | ((uint32_t)buf[1] << 8) | ((uint32_t)buf[2] << 16) | ((uint32_t)buf[3] << 24);
command = buf[4];
tag = (uint16_t)((uint16_t)buf[5] | ((uint16_t)buf[6] << 8));
DPRINTF(("[thread]Got response for tag %d command %d len %d\r\n", (int)tag, (int)command, (int)len));
n = (size_t)(len - minlen);
ptr = buf + minlen;
while (n) {
assert(len <= BUFSIZE);
ret = read(sc->v9sc_sock, ptr, n);
if (ret <= 0) {
fprintf(stderr, "virtio-9p: unexpected EOF reading-- did the 9P server crash?\n");
/* Fatal error, crash the VM, let us be restarted */
_exit(1);
}
n -= (size_t) ret;
ptr += ret;
}
DPRINTF(("[thread]got complete response for tag %d len %d\r\n", (int)tag, (int)len));
if (command == 107) {
char msg[128];
uint16_t slen = (uint16_t)((uint16_t)buf[7] | ((uint16_t)buf[8] << 8));
memcpy(msg, &buf[9], slen);
msg[slen] = 0;
DPRINTF(("[thread]Rerror: %s\r\n", msg));
}
if (command == 109) { /* Rflush */
for (i = 0; i < VT9P_RINGSZ; i++) {
if (sc->v9sc_out[i].tag == tag) {
otag = sc->v9sc_out[i].otag;
for (j = 0; j < VT9P_RINGSZ; j++) {
if (sc->v9sc_out[j].tag == otag && sc->v9sc_out[j].inuse) {
pthread_mutex_lock(&sc->v9sc_mtx2);
sc->v9sc_out[j].inuse = 0;
sc->v9sc_inflight--;
vq_relchain(sc->v9sc_out[j].vq, sc->v9sc_out[j].idx, ((uint32_t) 0));
pthread_mutex_unlock(&sc->v9sc_mtx2);
break;
}
}
break;
}
}
}
for (i = 0; i < VT9P_RINGSZ; i++) {
if (sc->v9sc_out[i].tag == tag) {
wiov = sc->v9sc_out[i].wiov;
ii = 0;
ptr = buf;
n = len;
while (n) {
size_t m = n;
if (m > wiov[ii].iov_len)
m = wiov[ii].iov_len;
DPRINTF(("[thread]copy %d bytes to iov at %p\r\n", (int)m, wiov[ii].iov_base));
memcpy(wiov[ii].iov_base, ptr, m);
ptr += m;
n -= (size_t)m;
ii++;
}
DPRINTF(("[thread]release\r\n"));
pthread_mutex_lock(&sc->v9sc_mtx2);
vq_relchain(sc->v9sc_out[i].vq, sc->v9sc_out[i].idx, ((uint32_t) len));
sc->v9sc_out[i].inuse = 0;
sc->v9sc_inflight--;
/* Generate interrupt even if some requests are outstanding, because
if we're using a blocking poll then we expect one request to be
permanently outstanding at all times. */
DPRINTF(("[thread]endchain\r\n"));
vq_endchains(sc->v9sc_out[i].vq, 1);
pthread_mutex_unlock(&sc->v9sc_mtx2);
break;
}
}
}
return NULL;
}
static char *
copy_up_to_comma(const char *from)
{
char *comma = strchr(from, ',');
char *tmp = NULL;
if (comma == NULL) {
tmp = strdup(from); /* rest of string */
} else {
size_t length = (size_t)(comma - from);
tmp = strndup(from, length);
}
return tmp;
}
static int
pci_vt9p_init(struct pci_devinst *pi, char *opts)
{
struct pci_vt9p_softc *sc;
int port = -1; /* if != -1, the port is valid. path is valid otherwise */
char *path = "";
char *tag = "plan9";
sc = calloc(1, sizeof(struct pci_vt9p_softc));
if (! sc) {
return 1;
}
sc->v9sc_sock = -1;
fprintf(stdout, "virtio-9p: initialising %s\n", opts);
while (1) {
char *next;
if (! opts)
break;
next = strchr(opts, ',');
if (next)
next[0] = '\0';
if (strncmp(opts, "port=", 5) == 0) {
port = atoi(&opts[5]);
if (port == 0) {
fprintf(stderr, "bad port: %s\r\n", opts);
return 1;
}
} else if (strncmp(opts, "path=", 5) == 0) {
path = copy_up_to_comma(opts + 5);
} else if (strncmp(opts, "tag=", 4) == 0) {
tag = copy_up_to_comma(opts + 4);
} else {
fprintf(stderr, "invalid option: %s\r\n", opts);
return 1;
}
if (! next)
break;
opts = &next[1];
}
if (!((port == -1) != (strcmp(path, "") == 0))) {
fprintf(stderr, "Please pass *either* a port *or* a path. You must pass one, you must not pass both.\n");
return 1;
}
sc->port = port;
sc->path = path;
sc->v9sc_cfg.tag_len = (uint16_t) strlen(tag);
if (sc->v9sc_cfg.tag_len > 256) {
return 1;
}
memcpy(sc->v9sc_cfg.tag, tag, sc->v9sc_cfg.tag_len);
pthread_mutex_init(&sc->v9sc_mtx, NULL);
pthread_mutex_init(&sc->v9sc_mtx2, NULL);
vi_softc_linkup(&sc->v9sc_vs, &vt9p_vi_consts, sc, pi, &sc->v9sc_vq);
sc->v9sc_vs.vs_mtx = &sc->v9sc_mtx;
sc->v9sc_vq.vq_qsize = VT9P_RINGSZ;
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_9P);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_OTHER);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_9P);
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->v9sc_vs, 1, fbsdrun_virtio_msix()))
return (1);
vi_set_io_bar(&sc->v9sc_vs, 0);
return (0);
}
static int
pci_vt9p_cfgwrite(UNUSED void *vsc, int offset, UNUSED int size,
UNUSED uint32_t value)
{
DPRINTF(("vt9p: write to reg %d\n\r", offset));
return 1;
}
static int
pci_vt9p_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
struct pci_vt9p_softc *sc = vsc;
void *ptr;
DPRINTF(("vt9p: read to reg %d\n\r", offset));
ptr = (uint8_t *)&sc->v9sc_cfg + offset;
memcpy(retval, ptr, size);
return 0;
}
static struct pci_devemu pci_de_v9p = {
.pe_emu = "virtio-9p",
.pe_init = pci_vt9p_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_v9p);

View File

@@ -0,0 +1,423 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <sys/param.h>
#include <sys/stat.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
#include <sys/disk.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/support/md5.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/virtio.h>
#include <xhyve/block_if.h>
#define VTBLK_RINGSZ 128
#define VTBLK_S_OK 0
#define VTBLK_S_IOERR 1
#define VTBLK_S_UNSUPP 2
#define VTBLK_BLK_ID_BYTES 20 + 1
/* Capability bits */
#define VTBLK_F_SEG_MAX (1 << 2) /* Maximum request segments */
#define VTBLK_F_BLK_SIZE (1 << 6) /* cfg block size valid */
#define VTBLK_F_FLUSH (1 << 9) /* Cache flush support */
#define VTBLK_F_TOPOLOGY (1 << 10) /* Optimal I/O alignment */
/*
* Host capabilities
*/
#define VTBLK_S_HOSTCAPS \
( VTBLK_F_SEG_MAX | \
VTBLK_F_BLK_SIZE | \
VTBLK_F_FLUSH | \
VTBLK_F_TOPOLOGY | \
VIRTIO_RING_F_INDIRECT_DESC ) /* indirect descriptors */
/*
* Config space "registers"
*/
struct vtblk_config {
uint64_t vbc_capacity;
uint32_t vbc_size_max;
uint32_t vbc_seg_max;
struct {
uint16_t cylinders;
uint8_t heads;
uint8_t sectors;
} vbc_geometry;
uint32_t vbc_blk_size;
struct {
uint8_t physical_block_exp;
uint8_t alignment_offset;
uint16_t min_io_size;
uint32_t opt_io_size;
} vbc_topology;
uint8_t vbc_writeback;
} __packed;
/*
* Fixed-size block header
*/
struct virtio_blk_hdr {
#define VBH_OP_READ 0
#define VBH_OP_WRITE 1
#define VBH_OP_FLUSH 4
#define VBH_OP_FLUSH_OUT 5
#define VBH_OP_IDENT 8
#define VBH_FLAG_BARRIER 0x80000000 /* OR'ed into vbh_type */
uint32_t vbh_type;
uint32_t vbh_ioprio;
uint64_t vbh_sector;
} __packed;
/*
* Debug printf
*/
static int pci_vtblk_debug;
#define DPRINTF(params) if (pci_vtblk_debug) printf params
struct pci_vtblk_ioreq {
struct blockif_req io_req;
struct pci_vtblk_softc *io_sc;
uint8_t *io_status;
uint16_t io_idx;
};
/*
* Per-device softc
*/
struct pci_vtblk_softc {
struct virtio_softc vbsc_vs;
pthread_mutex_t vsc_mtx;
struct vqueue_info vbsc_vq;
struct vtblk_config vbsc_cfg;
struct blockif_ctxt *bc;
char vbsc_ident[VTBLK_BLK_ID_BYTES];
struct pci_vtblk_ioreq vbsc_ios[VTBLK_RINGSZ];
};
static void pci_vtblk_reset(void *);
static void pci_vtblk_notify(void *, struct vqueue_info *);
static int pci_vtblk_cfgread(void *, int, int, uint32_t *);
static int pci_vtblk_cfgwrite(void *, int, int, uint32_t);
static struct virtio_consts vtblk_vi_consts = {
"vtblk", /* our name */
1, /* we support 1 virtqueue */
sizeof(struct vtblk_config), /* config reg size */
pci_vtblk_reset, /* reset */
pci_vtblk_notify, /* device-wide qnotify */
pci_vtblk_cfgread, /* read PCI config */
pci_vtblk_cfgwrite, /* write PCI config */
NULL, /* apply negotiated features */
VTBLK_S_HOSTCAPS, /* our capabilities */
};
static void
pci_vtblk_reset(void *vsc)
{
struct pci_vtblk_softc *sc = vsc;
DPRINTF(("vtblk: device reset requested !\n"));
vi_reset_dev(&sc->vbsc_vs);
}
/* xhyve: FIXME
*
* pci_vtblk_done seems to deadlock when called from pci_vtblk_proc?
*/
static void
pci_vtblk_done_locked(struct blockif_req *br, int err)
{
struct pci_vtblk_ioreq *io = br->br_param;
struct pci_vtblk_softc *sc = io->io_sc;
/* convert errno into a virtio block error return */
if (err == EOPNOTSUPP || err == ENOSYS)
*io->io_status = VTBLK_S_UNSUPP;
else if (err != 0)
*io->io_status = VTBLK_S_IOERR;
else
*io->io_status = VTBLK_S_OK;
/*
* Return the descriptor back to the host.
* We wrote 1 byte (our status) to host.
*/
//pthread_mutex_lock(&sc->vsc_mtx);
vq_relchain(&sc->vbsc_vq, io->io_idx, 1);
vq_endchains(&sc->vbsc_vq, 0);
//pthread_mutex_unlock(&sc->vsc_mtx);
}
static void
pci_vtblk_done(struct blockif_req *br, int err) {
struct pci_vtblk_ioreq *io = br->br_param;
struct pci_vtblk_softc *sc = io->io_sc;
pthread_mutex_lock(&sc->vsc_mtx);
pci_vtblk_done_locked(br, err);
pthread_mutex_unlock(&sc->vsc_mtx);
}
static void
pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
{
struct virtio_blk_hdr *vbh;
struct pci_vtblk_ioreq *io;
int i, n;
int err;
ssize_t iolen;
int writeop, type;
struct iovec iov[BLOCKIF_IOV_MAX + 2];
uint16_t idx, flags[BLOCKIF_IOV_MAX + 2];
n = vq_getchain(vq, &idx, iov, BLOCKIF_IOV_MAX + 2, flags);
/*
* The first descriptor will be the read-only fixed header,
* and the last is for status (hence +2 above and below).
* The remaining iov's are the actual data I/O vectors.
*
* XXX - note - this fails on crash dump, which does a
* VIRTIO_BLK_T_FLUSH with a zero transfer length
*/
assert(n >= 2 && n <= BLOCKIF_IOV_MAX + 2);
io = &sc->vbsc_ios[idx];
assert((flags[0] & VRING_DESC_F_WRITE) == 0);
assert(iov[0].iov_len == sizeof(struct virtio_blk_hdr));
vbh = iov[0].iov_base;
memcpy(&io->io_req.br_iov, &iov[1],
sizeof(struct iovec) * ((size_t)n - 2));
io->io_req.br_iovcnt = n - 2;
io->io_req.br_offset = (off_t)(vbh->vbh_sector * DEV_BSIZE);
io->io_status = iov[--n].iov_base;
assert(iov[n].iov_len == 1);
assert(flags[n] & VRING_DESC_F_WRITE);
/*
* XXX
* The guest should not be setting the BARRIER flag because
* we don't advertise the capability.
*/
type = vbh->vbh_type & ~VBH_FLAG_BARRIER;
writeop = (type == VBH_OP_WRITE);
iolen = 0;
for (i = 1; i < n; i++) {
/*
* - write op implies read-only descriptor,
* - read/ident op implies write-only descriptor,
* therefore test the inverse of the descriptor bit
* to the op.
*/
assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
iolen += iov[i].iov_len;
}
io->io_req.br_resid = iolen;
DPRINTF(("virtio-block: %s op, %zd bytes, %d segs\n\r",
writeop ? "write" : "read/ident", iolen, i - 1));
switch (type) {
case VBH_OP_READ:
err = blockif_read(sc->bc, &io->io_req);
break;
case VBH_OP_WRITE:
err = blockif_write(sc->bc, &io->io_req);
break;
case VBH_OP_FLUSH:
case VBH_OP_FLUSH_OUT:
err = blockif_flush(sc->bc, &io->io_req);
break;
case VBH_OP_IDENT:
/* Assume a single buffer */
/* S/n equal to buffer is not zero-terminated. */
memset(iov[1].iov_base, 0, iov[1].iov_len);
strncpy(iov[1].iov_base, sc->vbsc_ident,
MIN(iov[1].iov_len, sizeof(sc->vbsc_ident)));
/* xhyve: FIXME */
pci_vtblk_done_locked(&io->io_req, 0);
return;
default:
/* xhyve: FIXME */
pci_vtblk_done_locked(&io->io_req, EOPNOTSUPP);
return;
}
assert(err == 0);
}
static void
pci_vtblk_notify(void *vsc, struct vqueue_info *vq)
{
struct pci_vtblk_softc *sc = vsc;
while (vq_has_descs(vq))
pci_vtblk_proc(sc, vq);
}
static int
pci_vtblk_init(struct pci_devinst *pi, char *opts)
{
char bident[sizeof("XX:X:X")];
struct blockif_ctxt *bctxt;
MD5_CTX mdctx;
u_char digest[16];
struct pci_vtblk_softc *sc;
off_t size;
int i, sectsz, sts, sto;
if (opts == NULL) {
printf("virtio-block: backing device required\n");
return (1);
}
/*
* The supplied backing file has to exist
*/
snprintf(bident, sizeof(bident), "%d:%d", pi->pi_slot, pi->pi_func);
bctxt = blockif_open(opts, bident);
if (bctxt == NULL) {
perror("Could not open backing file");
return (1);
}
size = blockif_size(bctxt);
sectsz = blockif_sectsz(bctxt);
blockif_psectsz(bctxt, &sts, &sto);
sc = calloc(1, sizeof(struct pci_vtblk_softc));
sc->bc = bctxt;
for (i = 0; i < VTBLK_RINGSZ; i++) {
struct pci_vtblk_ioreq *io = &sc->vbsc_ios[i];
io->io_req.br_callback = pci_vtblk_done;
io->io_req.br_param = io;
io->io_sc = sc;
io->io_idx = (uint16_t)i;
}
pthread_mutex_init(&sc->vsc_mtx, NULL);
/* init virtio softc and virtqueues */
vi_softc_linkup(&sc->vbsc_vs, &vtblk_vi_consts, sc, pi, &sc->vbsc_vq);
sc->vbsc_vs.vs_mtx = &sc->vsc_mtx;
sc->vbsc_vq.vq_qsize = VTBLK_RINGSZ;
/* sc->vbsc_vq.vq_notify = we have no per-queue notify */
/*
* Create an identifier for the backing file. Use parts of the
* md5 sum of the filename
*/
MD5Init(&mdctx);
MD5Update(&mdctx, opts, (unsigned)strlen(opts));
MD5Final(digest, &mdctx);
snprintf(sc->vbsc_ident, VTBLK_BLK_ID_BYTES, "BHYVE-%02X%02X-%02X%02X-%02X%02X",
digest[0], digest[1], digest[2], digest[3], digest[4], digest[5]);
/* setup virtio block config space */
sc->vbsc_cfg.vbc_capacity =
(uint64_t)(size / DEV_BSIZE); /* 512-byte units */
sc->vbsc_cfg.vbc_size_max = 0; /* not negotiated */
sc->vbsc_cfg.vbc_seg_max = BLOCKIF_IOV_MAX;
sc->vbsc_cfg.vbc_geometry.cylinders = 0; /* no geometry */
sc->vbsc_cfg.vbc_geometry.heads = 0;
sc->vbsc_cfg.vbc_geometry.sectors = 0;
sc->vbsc_cfg.vbc_blk_size = (uint32_t)sectsz;
sc->vbsc_cfg.vbc_topology.physical_block_exp =
(uint8_t)((sts > sectsz) ? (ffsll(sts / sectsz) - 1) : 0);
sc->vbsc_cfg.vbc_topology.alignment_offset =
(uint8_t)((sto != 0) ? ((sts - sto) / sectsz) : 0);
sc->vbsc_cfg.vbc_topology.min_io_size = 0;
sc->vbsc_cfg.vbc_topology.opt_io_size = 0;
sc->vbsc_cfg.vbc_writeback = 0;
/*
* Should we move some of this into virtio.c? Could
* have the device, class, and subdev_0 as fields in
* the virtio constants structure.
*/
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_BLOCK);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vbsc_vs, 1, fbsdrun_virtio_msix())) {
blockif_close(sc->bc);
free(sc);
return (1);
}
vi_set_io_bar(&sc->vbsc_vs, 0);
return (0);
}
static int
pci_vtblk_cfgwrite(UNUSED void *vsc, int offset, UNUSED int size,
UNUSED uint32_t value)
{
DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
return (1);
}
static int
pci_vtblk_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
struct pci_vtblk_softc *sc = vsc;
void *ptr;
/* our caller has already verified offset and size */
ptr = (uint8_t *)&sc->vbsc_cfg + offset;
memcpy(retval, ptr, size);
return (0);
}
static struct pci_devemu pci_de_vblk = {
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vblk);

View File

@@ -0,0 +1,774 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <sys/select.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
#include <net/ethernet.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/atomic.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/support/md5.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/mevent.h>
#include <xhyve/virtio.h>
#define USE_MEVENT 0
#define VTNET_RINGSZ 1024
#define VTNET_MAXSEGS 32
/*
* Host capabilities. Note that we only offer a few of these.
*/
#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
#define VIRTIO_NET_F_GUEST_ANNOUNCE \
(1 << 21) /* guest can send gratuitous pkts */
#define VTNET_S_HOSTCAPS \
(VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
VIRTIO_F_NOTIFY_ON_EMPTY)
#define ETHER_IS_MULTICAST(addr) (*(addr) & 0x01) /* is address mcast/bcast? */
/*
* PCI config-space "registers"
*/
struct virtio_net_config {
uint8_t mac[6];
uint16_t status;
} __packed;
/*
* Queue definitions.
*/
#define VTNET_RXQ 0
#define VTNET_TXQ 1
#define VTNET_CTLQ 2 /* NB: not yet supported */
#define VTNET_MAXQ 3
/*
* Fixed network header size
*/
struct virtio_net_rxhdr {
uint8_t vrh_flags;
uint8_t vrh_gso_type;
uint16_t vrh_hdr_len;
uint16_t vrh_gso_size;
uint16_t vrh_csum_start;
uint16_t vrh_csum_offset;
uint16_t vrh_bufs;
} __packed;
/*
* Debug printf
*/
static int pci_vtnet_debug;
#define DPRINTF(params) if (pci_vtnet_debug) printf params
#define WPRINTF(params) printf params
/*
* Per-device softc
*/
struct pci_vtnet_softc {
struct virtio_softc vsc_vs;
struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
pthread_mutex_t vsc_mtx;
struct mevent *vsc_mevp;
int vsc_tapfd;
int vsc_rx_ready;
volatile int resetting; /* set and checked outside lock */
uint64_t vsc_features; /* negotiated features */
struct virtio_net_config vsc_config;
pthread_mutex_t rx_mtx;
int rx_in_progress;
int rx_vhdrlen;
int rx_merge; /* merged rx bufs in use */
pthread_t tx_tid;
pthread_mutex_t tx_mtx;
pthread_cond_t tx_cond;
int tx_in_progress;
};
static void pci_vtnet_reset(void *);
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
static void pci_vtnet_neg_features(void *, uint64_t);
static struct virtio_consts vtnet_vi_consts = {
"vtnet", /* our name */
VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
sizeof(struct virtio_net_config), /* config reg size */
pci_vtnet_reset, /* reset */
NULL, /* device-wide qnotify -- not used */
pci_vtnet_cfgread, /* read PCI config */
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
};
/*
* If the transmit thread is active then stall until it is done.
*/
static void
pci_vtnet_txwait(struct pci_vtnet_softc *sc)
{
pthread_mutex_lock(&sc->tx_mtx);
while (sc->tx_in_progress) {
pthread_mutex_unlock(&sc->tx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->tx_mtx);
}
pthread_mutex_unlock(&sc->tx_mtx);
}
/*
* If the receive thread is active then stall until it is done.
*/
static void
pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
{
pthread_mutex_lock(&sc->rx_mtx);
while (sc->rx_in_progress) {
pthread_mutex_unlock(&sc->rx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->rx_mtx);
}
pthread_mutex_unlock(&sc->rx_mtx);
}
static void
pci_vtnet_reset(void *vsc)
{
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device reset requested !\n"));
sc->resetting = 1;
/*
* Wait for the transmit and receive threads to finish their
* processing.
*/
pci_vtnet_txwait(sc);
pci_vtnet_rxwait(sc);
sc->vsc_rx_ready = 0;
sc->rx_merge = 1;
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
/* now reset rings, MSI-X vectors, and negotiated capabilities */
vi_reset_dev(&sc->vsc_vs);
sc->resetting = 0;
}
/*
* Called to send a buffer chain out to the tap device
*/
static void
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
static char pad[60]; /* all zero bytes */
if (sc->vsc_tapfd == -1)
return;
/*
* If the length is < 60, pad out to that and add the
* extra zero'd segment to the iov. It is guaranteed that
* there is always an extra iov available by the caller.
*/
if (len < 60) {
iov[iovcnt].iov_base = pad;
iov[iovcnt].iov_len = (size_t)(60 - len);
iovcnt++;
}
(void) writev(sc->vsc_tapfd, iov, iovcnt);
}
/*
* Called when there is read activity on the tap file descriptor.
* Each buffer posted by the guest is assumed to be able to contain
* an entire ethernet frame + rx header.
* MP note: the dummybuf is only used for discarding frames, so there
* is no need for it to be per-vtnet or locked.
*/
static uint8_t dummybuf[2048];
static __inline struct iovec *
rx_iov_trim(struct iovec *iov, int *niov, int tlen)
{
struct iovec *riov;
/* XXX short-cut: assume first segment is >= tlen */
assert(iov[0].iov_len >= (size_t)tlen);
iov[0].iov_len -= (size_t)tlen;
if (iov[0].iov_len == 0) {
assert(*niov > 1);
*niov -= 1;
riov = &iov[1];
} else {
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base +
(size_t)tlen);
riov = &iov[0];
}
return (riov);
}
static void
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
{
struct iovec iov[VTNET_MAXSEGS], *riov;
struct vqueue_info *vq;
void *vrx;
int len, n;
uint16_t idx;
/*
* Should never be called without a valid tap fd
*/
assert(sc->vsc_tapfd != -1);
/*
* But, will be called when the rx ring hasn't yet
* been set up or the guest is resetting the device.
*/
if (!sc->vsc_rx_ready || sc->resetting) {
/*
* Drop the packet and try later.
*/
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
return;
}
/*
* Check for available rx buffers
*/
vq = &sc->vsc_queues[VTNET_RXQ];
if (!vq_has_descs(vq)) {
/*
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
(void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf));
vq_endchains(vq, 1);
return;
}
do {
/*
* Get descriptor chain.
*/
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
assert(n >= 1 && n <= VTNET_MAXSEGS);
/*
* Get a pointer to the rx header, and use the
* data immediately following it for the packet buffer.
*/
vrx = iov[0].iov_base;
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
len = (int) readv(sc->vsc_tapfd, riov, n);
if (len < 0 && errno == EWOULDBLOCK) {
/*
* No more packets, but still some avail ring
* entries. Interrupt if needed/appropriate.
*/
vq_retchain(vq);
vq_endchains(vq, 0);
return;
}
/*
* The only valid field in the rx packet header is the
* number of buffers if merged rx bufs were negotiated.
*/
memset(vrx, 0, sc->rx_vhdrlen);
if (sc->rx_merge) {
struct virtio_net_rxhdr *vrxh;
vrxh = vrx;
vrxh->vrh_bufs = 1;
}
/*
* Release this chain and handle more chains.
*/
vq_relchain(vq, idx, (uint32_t)(len + sc->rx_vhdrlen));
} while (vq_has_descs(vq));
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
vq_endchains(vq, 1);
}
#if USE_MEVENT
static void
pci_vtnet_tap_callback(UNUSED int fd, UNUSED enum ev_type type, void *param)
{
struct pci_vtnet_softc *sc = param;
pthread_mutex_lock(&sc->rx_mtx);
sc->rx_in_progress = 1;
pci_vtnet_tap_rx(sc);
sc->rx_in_progress = 0;
pthread_mutex_unlock(&sc->rx_mtx);
}
#else /* !USE_MEVENT */
static void *
pci_vtnet_tap_select_func(void *vsc) {
struct pci_vtnet_softc *sc;
fd_set rfd;
pthread_setname_np("net:tap:rx");
sc = vsc;
assert(sc);
assert(sc->vsc_tapfd != -1);
FD_ZERO(&rfd);
FD_SET(sc->vsc_tapfd, &rfd);
while (1) {
if (select((sc->vsc_tapfd + 1), &rfd, NULL, NULL, NULL) == -1) {
abort();
}
pthread_mutex_lock(&sc->rx_mtx);
sc->rx_in_progress = 1;
pci_vtnet_tap_rx(sc);
sc->rx_in_progress = 0;
pthread_mutex_unlock(&sc->rx_mtx);
}
return (NULL);
}
#endif
static void
pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
/*
* A qnotify means that the rx process can now begin
*/
if (sc->vsc_rx_ready == 0) {
sc->vsc_rx_ready = 1;
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
}
}
static void
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
{
struct iovec iov[VTNET_MAXSEGS + 1];
int i, n;
int plen, tlen;
uint16_t idx;
/*
* Obtain chain of descriptors. The first one is
* really the header descriptor, so we need to sum
* up two lengths: packet length and transfer length.
*/
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
assert(n >= 1 && n <= VTNET_MAXSEGS);
plen = 0;
tlen = (int)iov[0].iov_len;
for (i = 1; i < n; i++) {
plen += iov[i].iov_len;
tlen += iov[i].iov_len;
}
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
/* chain is processed, release it and set tlen */
vq_relchain(vq, idx, (uint32_t)tlen);
}
static void
pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
/*
* Any ring entries to process?
*/
if (!vq_has_descs(vq))
return;
/* Signal the tx thread for processing */
pthread_mutex_lock(&sc->tx_mtx);
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
if (sc->tx_in_progress == 0)
pthread_cond_signal(&sc->tx_cond);
pthread_mutex_unlock(&sc->tx_mtx);
}
/*
* Thread which will handle processing of TX desc
*/
static void *
pci_vtnet_tx_thread(void *param)
{
struct pci_vtnet_softc *sc = param;
struct vqueue_info *vq;
int error;
pthread_setname_np("net:tap:tx");
vq = &sc->vsc_queues[VTNET_TXQ];
/*
* Let us wait till the tx queue pointers get initialised &
* first tx signaled
*/
pthread_mutex_lock(&sc->tx_mtx);
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
for (;;) {
/* note - tx mutex is locked here */
while (sc->resetting || !vq_has_descs(vq)) {
vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
mb();
if (!sc->resetting && vq_has_descs(vq))
break;
sc->tx_in_progress = 0;
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
}
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
sc->tx_in_progress = 1;
pthread_mutex_unlock(&sc->tx_mtx);
do {
/*
* Run through entries, placing them into
* iovecs and sending when an end-of-packet
* is found
*/
pci_vtnet_proctx(sc, vq);
} while (vq_has_descs(vq));
/*
* Generate an interrupt if needed.
*/
vq_endchains(vq, 1);
pthread_mutex_lock(&sc->tx_mtx);
}
}
#ifdef notyet
static void
pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
{
DPRINTF(("vtnet: control qnotify!\n\r"));
}
#endif
static int
pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
{
struct ether_addr *ea;
char *tmpstr;
char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
tmpstr = strsep(&mac_str,"=");
if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
ea = ether_aton(mac_str);
if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
fprintf(stderr, "Invalid MAC %s\n", mac_str);
return (EINVAL);
} else
memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
}
return (0);
}
static int
pci_vtnet_init(struct pci_devinst *pi, char *opts)
{
MD5_CTX mdctx;
unsigned char digest[16];
char nstr[80];
struct pci_vtnet_softc *sc;
char *devname;
char *vtopts;
int mac_provided;
#if !USE_MEVENT
pthread_t sthrd;
#endif
sc = calloc(1, sizeof(struct pci_vtnet_softc));
pthread_mutex_init(&sc->vsc_mtx, NULL);
vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
#ifdef notyet
sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
#endif
/*
* Attempt to open the tap device and read the MAC address
* if specified
*/
mac_provided = 0;
sc->vsc_tapfd = -1;
if (opts != NULL) {
char tbuf[80];
int err;
devname = vtopts = strdup(opts);
(void) strsep(&vtopts, ",");
if (vtopts != NULL) {
err = pci_vtnet_parsemac(vtopts, sc->vsc_config.mac);
if (err != 0) {
free(devname);
return (err);
}
mac_provided = 1;
}
strcpy(tbuf, "/dev/");
strlcat(tbuf, devname, sizeof(tbuf));
free(devname);
sc->vsc_tapfd = open(tbuf, O_RDWR);
if (sc->vsc_tapfd == -1) {
WPRINTF(("open of tap device %s failed\n", tbuf));
} else {
/*
* Set non-blocking and register for read
* notifications with the event loop
*/
int opt = 1;
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
WPRINTF(("tap device O_NONBLOCK failed\n"));
close(sc->vsc_tapfd);
sc->vsc_tapfd = -1;
}
#if USE_MEVENT
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
EVF_READ,
pci_vtnet_tap_callback,
sc);
if (sc->vsc_mevp == NULL) {
WPRINTF(("Could not register event\n"));
close(sc->vsc_tapfd);
sc->vsc_tapfd = -1;
}
#else /* !USE_MEVENT */
if (pthread_create(&sthrd, NULL, pci_vtnet_tap_select_func, sc)) {
WPRINTF(("Could not create tap receive thread\n"));
close(sc->vsc_tapfd);
sc->vsc_tapfd = -1;
}
#endif
}
}
/*
* The default MAC address is the standard NetApp OUI of 00-a0-98,
* followed by an MD5 of the PCI slot/func number and dev name
*/
if (!mac_provided) {
snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot,
pi->pi_func, vmname);
MD5Init(&mdctx);
MD5Update(&mdctx, nstr, (unsigned int)strlen(nstr));
MD5Final(digest, &mdctx);
sc->vsc_config.mac[0] = 0x00;
sc->vsc_config.mac[1] = 0xa0;
sc->vsc_config.mac[2] = 0x98;
sc->vsc_config.mac[3] = digest[0];
sc->vsc_config.mac[4] = digest[1];
sc->vsc_config.mac[5] = digest[2];
}
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
/* Link is up if we managed to open tap device. */
sc->vsc_config.status = (opts == NULL || sc->vsc_tapfd >= 0);
/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
return (1);
/* use BAR 0 to map config regs in IO space */
vi_set_io_bar(&sc->vsc_vs, 0);
sc->resetting = 0;
sc->rx_merge = 1;
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
sc->rx_in_progress = 0;
pthread_mutex_init(&sc->rx_mtx, NULL);
/*
* Initialize tx semaphore & spawn TX processing thread.
* As of now, only one thread for TX desc processing is
* spawned.
*/
sc->tx_in_progress = 0;
pthread_mutex_init(&sc->tx_mtx, NULL);
pthread_cond_init(&sc->tx_cond, NULL);
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
return (0);
}
static int
pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
if (offset < 6) {
assert(offset + size <= 6);
/*
* The driver is allowed to change the MAC address
*/
ptr = &sc->vsc_config.mac[offset];
memcpy(ptr, &value, size);
} else {
/* silently ignore other writes */
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
}
return (0);
}
static int
pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
ptr = (uint8_t *)&sc->vsc_config + offset;
memcpy(retval, ptr, size);
return (0);
}
static void
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
{
struct pci_vtnet_softc *sc = vsc;
sc->vsc_features = negotiated_features;
if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
sc->rx_merge = 0;
/* non-merge rx header is 2 bytes shorter */
sc->rx_vhdrlen -= 2;
}
}
static struct pci_devemu pci_de_vnet_tap = {
.pe_emu = "virtio-tap",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vnet_tap);

View File

@@ -0,0 +1,834 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
*
* The vmnet support is ported from the MirageOS project:
*
* https://github.com/mirage/ocaml-vmnet
*
* Copyright (C) 2014 Anil Madhavapeddy <anil@recoil.org>
*
* Permission to use, copy, modify, and distribute this software for any
* purpose with or without fee is hereby granted, provided that the above
* copyright notice and this permission notice appear in all copies.
*
* THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
* WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
* ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
* WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <strings.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <sys/select.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <sys/ioctl.h>
#include <net/ethernet.h>
#include <dispatch/dispatch.h>
#include <vmnet/vmnet.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/atomic.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/support/md5.h>
#include <xhyve/support/uuid.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/mevent.h>
#include <xhyve/virtio.h>
#define VTNET_RINGSZ 1024
#define VTNET_MAXSEGS 32
/*
* Host capabilities. Note that we only offer a few of these.
*/
#define VIRTIO_NET_F_CSUM (1 << 0) /* host handles partial cksum */
#define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* guest handles partial cksum */
#define VIRTIO_NET_F_MAC (1 << 5) /* host supplies MAC */
#define VIRTIO_NET_F_GSO_DEPREC (1 << 6) /* deprecated: host handles GSO */
#define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* guest can rcv TSOv4 */
#define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* guest can rcv TSOv6 */
#define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* guest can rcv TSO with ECN */
#define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* guest can rcv UFO */
#define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* host can rcv TSOv4 */
#define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* host can rcv TSOv6 */
#define VIRTIO_NET_F_HOST_ECN (1 << 13) /* host can rcv TSO with ECN */
#define VIRTIO_NET_F_HOST_UFO (1 << 14) /* host can rcv UFO */
#define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* host can merge RX buffers */
#define VIRTIO_NET_F_STATUS (1 << 16) /* config status field available */
#define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* control channel available */
#define VIRTIO_NET_F_CTRL_RX (1 << 18) /* control channel RX mode support */
#define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* control channel VLAN filtering */
#define VIRTIO_NET_F_GUEST_ANNOUNCE \
(1 << 21) /* guest can send gratuitous pkts */
#define VTNET_S_HOSTCAPS \
(VIRTIO_NET_F_MAC | VIRTIO_NET_F_MRG_RXBUF | VIRTIO_NET_F_STATUS | \
VIRTIO_F_NOTIFY_ON_EMPTY)
/*
* PCI config-space "registers"
*/
struct virtio_net_config {
uint8_t mac[6];
uint16_t status;
} __packed;
/*
* Queue definitions.
*/
#define VTNET_RXQ 0
#define VTNET_TXQ 1
#define VTNET_CTLQ 2 /* NB: not yet supported */
#define VTNET_MAXQ 3
/*
* Fixed network header size
*/
struct virtio_net_rxhdr {
uint8_t vrh_flags;
uint8_t vrh_gso_type;
uint16_t vrh_hdr_len;
uint16_t vrh_gso_size;
uint16_t vrh_csum_start;
uint16_t vrh_csum_offset;
uint16_t vrh_bufs;
} __packed;
/*
* Debug printf
*/
static int pci_vtnet_debug;
#define DPRINTF(params) if (pci_vtnet_debug) printf params
/*
* Per-device softc
*/
struct pci_vtnet_softc {
struct virtio_softc vsc_vs;
struct vqueue_info vsc_queues[VTNET_MAXQ - 1];
pthread_mutex_t vsc_mtx;
struct vmnet_state *vms;
int vsc_rx_ready;
volatile int resetting; /* set and checked outside lock */
uint64_t vsc_features; /* negotiated features */
struct virtio_net_config vsc_config;
pthread_mutex_t rx_mtx;
int rx_in_progress;
int rx_vhdrlen;
int rx_merge; /* merged rx bufs in use */
pthread_t tx_tid;
pthread_mutex_t tx_mtx;
pthread_cond_t tx_cond;
int tx_in_progress;
};
static void pci_vtnet_reset(void *);
/* static void pci_vtnet_notify(void *, struct vqueue_info *); */
static int pci_vtnet_cfgread(void *, int, int, uint32_t *);
static int pci_vtnet_cfgwrite(void *, int, int, uint32_t);
static void pci_vtnet_neg_features(void *, uint64_t);
static struct virtio_consts vtnet_vi_consts = {
"vtnet", /* our name */
VTNET_MAXQ - 1, /* we currently support 2 virtqueues */
sizeof(struct virtio_net_config), /* config reg size */
pci_vtnet_reset, /* reset */
NULL, /* device-wide qnotify -- not used */
pci_vtnet_cfgread, /* read PCI config */
pci_vtnet_cfgwrite, /* write PCI config */
pci_vtnet_neg_features, /* apply negotiated features */
VTNET_S_HOSTCAPS, /* our capabilities */
};
struct vmnet_state {
interface_ref iface;
uint8_t mac[6];
unsigned int mtu;
unsigned int max_packet_size;
};
static void pci_vtnet_tap_callback(struct pci_vtnet_softc *sc);
/*
* Create an interface for the guest using Apple's vmnet framework.
*
* The interface works in VMNET_SHARED_MODE which allows for packets
* of the guest to reach other guests and the Internet.
*
* See also: https://developer.apple.com/library/mac/documentation/vmnet/Reference/vmnet_Reference/index.html
*/
static int
vmn_create(struct pci_vtnet_softc *sc)
{
xpc_object_t interface_desc;
uuid_t uuid;
__block interface_ref iface;
__block vmnet_return_t iface_status;
dispatch_semaphore_t iface_created;
dispatch_queue_t if_create_q;
dispatch_queue_t if_q;
struct vmnet_state *vms;
uint32_t uuid_status;
interface_desc = xpc_dictionary_create(NULL, NULL, 0);
xpc_dictionary_set_uint64(interface_desc, vmnet_operation_mode_key,
VMNET_SHARED_MODE);
if (guest_uuid_str != NULL) {
uuid_from_string(guest_uuid_str, &uuid, &uuid_status);
if (uuid_status != uuid_s_ok) {
return (-1);
}
} else {
uuid_generate_random(uuid);
}
xpc_dictionary_set_uuid(interface_desc, vmnet_interface_id_key, uuid);
iface = NULL;
iface_status = 0;
vms = malloc(sizeof(struct vmnet_state));
if (!vms) {
return (-1);
}
if_create_q = dispatch_queue_create("org.xhyve.vmnet.create",
DISPATCH_QUEUE_SERIAL);
iface_created = dispatch_semaphore_create(0);
iface = vmnet_start_interface(interface_desc, if_create_q,
^(vmnet_return_t status, xpc_object_t interface_param)
{
iface_status = status;
if (status != VMNET_SUCCESS || !interface_param) {
dispatch_semaphore_signal(iface_created);
return;
}
if (sscanf(xpc_dictionary_get_string(interface_param,
vmnet_mac_address_key),
"%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
&vms->mac[0], &vms->mac[1], &vms->mac[2], &vms->mac[3],
&vms->mac[4], &vms->mac[5]) != 6)
{
assert(0);
}
vms->mtu = (unsigned)
xpc_dictionary_get_uint64(interface_param, vmnet_mtu_key);
vms->max_packet_size = (unsigned)
xpc_dictionary_get_uint64(interface_param,
vmnet_max_packet_size_key);
dispatch_semaphore_signal(iface_created);
});
dispatch_semaphore_wait(iface_created, DISPATCH_TIME_FOREVER);
dispatch_release(if_create_q);
if (iface == NULL || iface_status != VMNET_SUCCESS) {
printf("virtio_net: Could not create vmnet interface, "
"permission denied or no entitlement?\n");
free(vms);
return (-1);
}
vms->iface = iface;
sc->vms = vms;
if_q = dispatch_queue_create("org.xhyve.vmnet.iface_q", 0);
vmnet_interface_set_event_callback(iface, VMNET_INTERFACE_PACKETS_AVAILABLE,
if_q, ^(UNUSED interface_event_t event_id, UNUSED xpc_object_t event)
{
pci_vtnet_tap_callback(sc);
});
return (0);
}
static ssize_t
vmn_read(struct vmnet_state *vms, struct iovec *iov, int n) {
vmnet_return_t r;
struct vmpktdesc v;
int pktcnt;
int i;
v.vm_pkt_size = 0;
for (i = 0; i < n; i++) {
v.vm_pkt_size += iov[i].iov_len;
}
assert(v.vm_pkt_size >= vms->max_packet_size);
v.vm_pkt_iov = iov;
v.vm_pkt_iovcnt = (uint32_t) n;
v.vm_flags = 0; /* TODO no clue what this is */
pktcnt = 1;
r = vmnet_read(vms->iface, &v, &pktcnt);
assert(r == VMNET_SUCCESS);
if (pktcnt < 1) {
return (-1);
}
return ((ssize_t) v.vm_pkt_size);
}
static void
vmn_write(struct vmnet_state *vms, struct iovec *iov, int n) {
vmnet_return_t r;
struct vmpktdesc v;
int pktcnt;
int i;
v.vm_pkt_size = 0;
for (i = 0; i < n; i++) {
v.vm_pkt_size += iov[i].iov_len;
}
assert(v.vm_pkt_size <= vms->max_packet_size);
v.vm_pkt_iov = iov;
v.vm_pkt_iovcnt = (uint32_t) n;
v.vm_flags = 0; /* TODO no clue what this is */
pktcnt = 1;
r = vmnet_write(vms->iface, &v, &pktcnt);
assert(r == VMNET_SUCCESS);
}
/*
* If the transmit thread is active then stall until it is done.
*/
static void
pci_vtnet_txwait(struct pci_vtnet_softc *sc)
{
pthread_mutex_lock(&sc->tx_mtx);
while (sc->tx_in_progress) {
pthread_mutex_unlock(&sc->tx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->tx_mtx);
}
pthread_mutex_unlock(&sc->tx_mtx);
}
/*
* If the receive thread is active then stall until it is done.
*/
static void
pci_vtnet_rxwait(struct pci_vtnet_softc *sc)
{
pthread_mutex_lock(&sc->rx_mtx);
while (sc->rx_in_progress) {
pthread_mutex_unlock(&sc->rx_mtx);
usleep(10000);
pthread_mutex_lock(&sc->rx_mtx);
}
pthread_mutex_unlock(&sc->rx_mtx);
}
static void
pci_vtnet_reset(void *vsc)
{
struct pci_vtnet_softc *sc = vsc;
DPRINTF(("vtnet: device reset requested !\n"));
sc->resetting = 1;
/*
* Wait for the transmit and receive threads to finish their
* processing.
*/
pci_vtnet_txwait(sc);
pci_vtnet_rxwait(sc);
sc->vsc_rx_ready = 0;
sc->rx_merge = 1;
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
/* now reset rings, MSI-X vectors, and negotiated capabilities */
vi_reset_dev(&sc->vsc_vs);
sc->resetting = 0;
}
/*
* Called to send a buffer chain out to the tap device
*/
static void
pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
int len)
{
static char pad[60]; /* all zero bytes */
if (!sc->vms)
return;
/*
* If the length is < 60, pad out to that and add the
* extra zero'd segment to the iov. It is guaranteed that
* there is always an extra iov available by the caller.
*/
if (len < 60) {
iov[iovcnt].iov_base = pad;
iov[iovcnt].iov_len = (size_t)(60 - len);
iovcnt++;
}
vmn_write(sc->vms, iov, iovcnt);
}
/*
* Called when there is read activity on the tap file descriptor.
* Each buffer posted by the guest is assumed to be able to contain
* an entire ethernet frame + rx header.
* MP note: the dummybuf is only used for discarding frames, so there
* is no need for it to be per-vtnet or locked.
*/
static uint8_t dummybuf[2048];
static __inline struct iovec *
rx_iov_trim(struct iovec *iov, int *niov, int tlen)
{
struct iovec *riov;
/* XXX short-cut: assume first segment is >= tlen */
assert(iov[0].iov_len >= (size_t)tlen);
iov[0].iov_len -= (size_t)tlen;
if (iov[0].iov_len == 0) {
assert(*niov > 1);
*niov -= 1;
riov = &iov[1];
} else {
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base +
(size_t)tlen);
riov = &iov[0];
}
return (riov);
}
static void
pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
{
struct iovec iov[VTNET_MAXSEGS], *riov;
struct vqueue_info *vq;
void *vrx;
int len, n;
uint16_t idx;
/*
* Should never be called without a valid tap fd
*/
assert(sc->vms);
/*
* But, will be called when the rx ring hasn't yet
* been set up or the guest is resetting the device.
*/
if (!sc->vsc_rx_ready || sc->resetting) {
/*
* Drop the packet and try later.
*/
iov[0].iov_base = dummybuf;
iov[0].iov_len = sizeof(dummybuf);
(void) vmn_read(sc->vms, iov, 1);
return;
}
/*
* Check for available rx buffers
*/
vq = &sc->vsc_queues[VTNET_RXQ];
if (!vq_has_descs(vq)) {
/*
* Drop the packet and try later. Interrupt on
* empty, if that's negotiated.
*/
iov[0].iov_base = dummybuf;
iov[0].iov_len = sizeof(dummybuf);
(void) vmn_read(sc->vms, iov, 1);
vq_endchains(vq, 1);
return;
}
do {
/*
* Get descriptor chain.
*/
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
assert(n >= 1 && n <= VTNET_MAXSEGS);
/*
* Get a pointer to the rx header, and use the
* data immediately following it for the packet buffer.
*/
vrx = iov[0].iov_base;
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
len = (int) vmn_read(sc->vms, riov, n);
if (len < 0 && errno == EWOULDBLOCK) {
/*
* No more packets, but still some avail ring
* entries. Interrupt if needed/appropriate.
*/
vq_retchain(vq);
vq_endchains(vq, 0);
return;
}
/*
* The only valid field in the rx packet header is the
* number of buffers if merged rx bufs were negotiated.
*/
memset(vrx, 0, sc->rx_vhdrlen);
if (sc->rx_merge) {
struct virtio_net_rxhdr *vrxh;
vrxh = vrx;
vrxh->vrh_bufs = 1;
}
/*
* Release this chain and handle more chains.
*/
vq_relchain(vq, idx, (uint32_t)(len + sc->rx_vhdrlen));
} while (vq_has_descs(vq));
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
vq_endchains(vq, 1);
}
static void
pci_vtnet_tap_callback(struct pci_vtnet_softc *sc)
{
pthread_mutex_lock(&sc->rx_mtx);
sc->rx_in_progress = 1;
pci_vtnet_tap_rx(sc);
sc->rx_in_progress = 0;
pthread_mutex_unlock(&sc->rx_mtx);
}
static void
pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
/*
* A qnotify means that the rx process can now begin
*/
if (sc->vsc_rx_ready == 0) {
sc->vsc_rx_ready = 1;
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
}
}
static void
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
{
struct iovec iov[VTNET_MAXSEGS + 1];
int i, n;
int plen, tlen;
uint16_t idx;
/*
* Obtain chain of descriptors. The first one is
* really the header descriptor, so we need to sum
* up two lengths: packet length and transfer length.
*/
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
assert(n >= 1 && n <= VTNET_MAXSEGS);
plen = 0;
tlen = (int)iov[0].iov_len;
for (i = 1; i < n; i++) {
plen += iov[i].iov_len;
tlen += iov[i].iov_len;
}
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
/* chain is processed, release it and set tlen */
vq_relchain(vq, idx, (uint32_t)tlen);
}
static void
pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq)
{
struct pci_vtnet_softc *sc = vsc;
/*
* Any ring entries to process?
*/
if (!vq_has_descs(vq))
return;
/* Signal the tx thread for processing */
pthread_mutex_lock(&sc->tx_mtx);
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
if (sc->tx_in_progress == 0)
pthread_cond_signal(&sc->tx_cond);
pthread_mutex_unlock(&sc->tx_mtx);
}
/*
* Thread which will handle processing of TX desc
*/
static void *
pci_vtnet_tx_thread(void *param)
{
struct pci_vtnet_softc *sc = param;
struct vqueue_info *vq;
int error;
pthread_setname_np("net:vmnet:tx");
vq = &sc->vsc_queues[VTNET_TXQ];
/*
* Let us wait till the tx queue pointers get initialised &
* first tx signaled
*/
pthread_mutex_lock(&sc->tx_mtx);
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
for (;;) {
/* note - tx mutex is locked here */
while (sc->resetting || !vq_has_descs(vq)) {
vq->vq_used->vu_flags &= ~VRING_USED_F_NO_NOTIFY;
mb();
if (!sc->resetting && vq_has_descs(vq))
break;
sc->tx_in_progress = 0;
error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx);
assert(error == 0);
}
vq->vq_used->vu_flags |= VRING_USED_F_NO_NOTIFY;
sc->tx_in_progress = 1;
pthread_mutex_unlock(&sc->tx_mtx);
do {
/*
* Run through entries, placing them into
* iovecs and sending when an end-of-packet
* is found
*/
pci_vtnet_proctx(sc, vq);
} while (vq_has_descs(vq));
/*
* Generate an interrupt if needed.
*/
vq_endchains(vq, 1);
pthread_mutex_lock(&sc->tx_mtx);
}
}
#ifdef notyet
static void
pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq)
{
DPRINTF(("vtnet: control qnotify!\n\r"));
}
#endif
static int
pci_vtnet_init(struct pci_devinst *pi, UNUSED char *opts)
{
struct pci_vtnet_softc *sc;
int mac_provided;
sc = calloc(1, sizeof(struct pci_vtnet_softc));
pthread_mutex_init(&sc->vsc_mtx, NULL);
vi_softc_linkup(&sc->vsc_vs, &vtnet_vi_consts, sc, pi, sc->vsc_queues);
sc->vsc_vs.vs_mtx = &sc->vsc_mtx;
sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq;
sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq;
#ifdef notyet
sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ;
sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq;
#endif
/*
* Attempt to open the tap device and read the MAC address
* if specified
*/
mac_provided = 0;
if (vmn_create(sc) == -1) {
return (-1);
}
if (print_mac == 1)
{
printf("MAC: %02x:%02x:%02x:%02x:%02x:%02x\n",
sc->vms->mac[0], sc->vms->mac[1], sc->vms->mac[2],
sc->vms->mac[3], sc->vms->mac[4], sc->vms->mac[5]);
exit(0);
}
sc->vsc_config.mac[0] = sc->vms->mac[0];
sc->vsc_config.mac[1] = sc->vms->mac[1];
sc->vsc_config.mac[2] = sc->vms->mac[2];
sc->vsc_config.mac[3] = sc->vms->mac[3];
sc->vsc_config.mac[4] = sc->vms->mac[4];
sc->vsc_config.mac[5] = sc->vms->mac[5];
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
/* Link is up if we managed to open tap device. */
sc->vsc_config.status = 1;
/* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */
if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix()))
return (1);
/* use BAR 0 to map config regs in IO space */
vi_set_io_bar(&sc->vsc_vs, 0);
sc->resetting = 0;
sc->rx_merge = 1;
sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
sc->rx_in_progress = 0;
pthread_mutex_init(&sc->rx_mtx, NULL);
/*
* Initialize tx semaphore & spawn TX processing thread.
* As of now, only one thread for TX desc processing is
* spawned.
*/
sc->tx_in_progress = 0;
pthread_mutex_init(&sc->tx_mtx, NULL);
pthread_cond_init(&sc->tx_cond, NULL);
pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc);
return (0);
}
static int
pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
if (offset < 6) {
assert(offset + size <= 6);
/*
* The driver is allowed to change the MAC address
*/
ptr = &sc->vsc_config.mac[offset];
memcpy(ptr, &value, size);
} else {
/* silently ignore other writes */
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
}
return (0);
}
static int
pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval)
{
struct pci_vtnet_softc *sc = vsc;
void *ptr;
ptr = (uint8_t *)&sc->vsc_config + offset;
memcpy(retval, ptr, size);
return (0);
}
static void
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
{
struct pci_vtnet_softc *sc = vsc;
sc->vsc_features = negotiated_features;
if (!(sc->vsc_features & VIRTIO_NET_F_MRG_RXBUF)) {
sc->rx_merge = 0;
/* non-merge rx header is 2 bytes shorter */
sc->rx_vhdrlen -= 2;
}
}
static struct pci_devemu pci_de_vnet_vmnet = {
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vnet_vmnet);

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,187 @@
/*-
* Copyright (c) 2014 Nahanni Systems Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer
* in this position and unchanged.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* virtio entropy device emulation.
* Randomness is sourced from /dev/random which does not block
* once it has been seeded at bootup.
*/
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <pthread.h>
#include <fcntl.h>
#include <unistd.h>
#include <errno.h>
#include <assert.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/linker_set.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/virtio.h>
#define VTRND_RINGSZ 64
static int pci_vtrnd_debug;
#define DPRINTF(params) if (pci_vtrnd_debug) printf params
#define WPRINTF(params) printf params
/*
* Per-device softc
*/
struct pci_vtrnd_softc {
struct virtio_softc vrsc_vs;
struct vqueue_info vrsc_vq;
pthread_mutex_t vrsc_mtx;
uint64_t vrsc_cfg;
int vrsc_fd;
};
static void pci_vtrnd_reset(void *);
static void pci_vtrnd_notify(void *, struct vqueue_info *);
static struct virtio_consts vtrnd_vi_consts = {
"vtrnd", /* our name */
1, /* we support 1 virtqueue */
0, /* config reg size */
pci_vtrnd_reset, /* reset */
pci_vtrnd_notify, /* device-wide qnotify */
NULL, /* read virtio config */
NULL, /* write virtio config */
NULL, /* apply negotiated features */
0, /* our capabilities */
};
static void
pci_vtrnd_reset(void *vsc)
{
struct pci_vtrnd_softc *sc;
sc = vsc;
DPRINTF(("vtrnd: device reset requested !\n"));
vi_reset_dev(&sc->vrsc_vs);
}
static void
pci_vtrnd_notify(void *vsc, struct vqueue_info *vq)
{
struct iovec iov;
struct pci_vtrnd_softc *sc;
int len;
uint16_t idx;
sc = vsc;
if (sc->vrsc_fd < 0) {
vq_endchains(vq, 0);
return;
}
while (vq_has_descs(vq)) {
vq_getchain(vq, &idx, &iov, 1, NULL);
len = (int) read(sc->vrsc_fd, iov.iov_base, iov.iov_len);
DPRINTF(("vtrnd: vtrnd_notify(): %d\r\n", len));
/* Catastrophe if unable to read from /dev/random */
assert(len > 0);
/*
* Release this chain and handle more
*/
vq_relchain(vq, idx, (uint32_t)len);
}
vq_endchains(vq, 1); /* Generate interrupt if appropriate. */
}
static int
pci_vtrnd_init(struct pci_devinst *pi, UNUSED char *opts)
{
struct pci_vtrnd_softc *sc;
int fd;
int len;
uint8_t v;
/*
* Should always be able to open /dev/random.
*/
fd = open("/dev/random", O_RDONLY | O_NONBLOCK);
assert(fd >= 0);
/*
* Check that device is seeded and non-blocking.
*/
len = (int) read(fd, &v, sizeof(v));
if (len <= 0) {
WPRINTF(("vtrnd: /dev/random not ready, read(): %d", len));
return (1);
}
sc = calloc(1, sizeof(struct pci_vtrnd_softc));
vi_softc_linkup(&sc->vrsc_vs, &vtrnd_vi_consts, sc, pi, &sc->vrsc_vq);
sc->vrsc_vs.vs_mtx = &sc->vrsc_mtx;
sc->vrsc_vq.vq_qsize = VTRND_RINGSZ;
/* keep /dev/random opened while emulating */
sc->vrsc_fd = fd;
/* initialize config space */
pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_RANDOM);
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_CRYPTO);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_ENTROPY);
pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR);
if (vi_intr_init(&sc->vrsc_vs, 1, fbsdrun_virtio_msix()))
return (1);
vi_set_io_bar(&sc->vrsc_vs, 0);
return (0);
}
static struct pci_devemu pci_de_vrnd = {
.pe_emu = "virtio-rnd",
.pe_init = pci_vtrnd_init,
.pe_barwrite = vi_pci_write,
.pe_barread = vi_pci_read
};
PCI_EMUL_SET(pci_de_vrnd);

File diff suppressed because it is too large Load Diff

308
vendor/github.com/docker/hyperkit/src/lib/pm.c generated vendored Normal file
View File

@@ -0,0 +1,308 @@
/*-
* Copyright (c) 2013 Hudson River Trading LLC
* Written by: John H. Baldwin <jhb@FreeBSD.org>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <pthread.h>
#include <signal.h>
#include <xhyve/support/misc.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/acpi.h>
#include <xhyve/inout.h>
#include <xhyve/mevent.h>
#include <xhyve/pci_irq.h>
#include <xhyve/pci_lpc.h>
static pthread_mutex_t pm_lock = PTHREAD_MUTEX_INITIALIZER;
static struct mevent *power_button;
static sig_t old_power_handler;
/*
* Reset Control register at I/O port 0xcf9. Bit 2 forces a system
* reset when it transitions from 0 to 1. Bit 1 selects the type of
* reset to attempt: 0 selects a "soft" reset, and 1 selects a "hard"
* reset.
*/
static int
reset_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
int error;
static uint8_t reset_control;
if (bytes != 1)
return (-1);
if (in)
*eax = reset_control;
else {
reset_control = (uint8_t) *eax;
/* Treat hard and soft resets the same. */
if (reset_control & 0x4) {
error = xh_vm_suspend(VM_SUSPEND_RESET);
assert(error == 0 || errno == EALREADY);
}
}
return (0);
}
INOUT_PORT(reset_reg, 0xCF9, IOPORT_F_INOUT, reset_handler);
/*
* ACPI's SCI is a level-triggered interrupt.
*/
static int sci_active;
static void
sci_assert(void)
{
if (sci_active)
return;
xh_vm_isa_assert_irq(SCI_INT, SCI_INT);
sci_active = 1;
}
static void
sci_deassert(void)
{
if (!sci_active)
return;
xh_vm_isa_deassert_irq(SCI_INT, SCI_INT);
sci_active = 0;
}
/*
* Power Management 1 Event Registers
*
* The only power management event supported is a power button upon
* receiving SIGTERM.
*/
static uint16_t pm1_enable, pm1_status;
#define PM1_TMR_STS 0x0001
#define PM1_BM_STS 0x0010
#define PM1_GBL_STS 0x0020
#define PM1_PWRBTN_STS 0x0100
#define PM1_SLPBTN_STS 0x0200
#define PM1_RTC_STS 0x0400
#define PM1_WAK_STS 0x8000
#define PM1_TMR_EN 0x0001
#define PM1_GBL_EN 0x0020
#define PM1_PWRBTN_EN 0x0100
#define PM1_SLPBTN_EN 0x0200
#define PM1_RTC_EN 0x0400
static void
sci_update(void)
{
int need_sci;
/* See if the SCI should be active or not. */
need_sci = 0;
if ((pm1_enable & PM1_TMR_EN) && (pm1_status & PM1_TMR_STS))
need_sci = 1;
if ((pm1_enable & PM1_GBL_EN) && (pm1_status & PM1_GBL_STS))
need_sci = 1;
if ((pm1_enable & PM1_PWRBTN_EN) && (pm1_status & PM1_PWRBTN_STS))
need_sci = 1;
if ((pm1_enable & PM1_SLPBTN_EN) && (pm1_status & PM1_SLPBTN_STS))
need_sci = 1;
if ((pm1_enable & PM1_RTC_EN) && (pm1_status & PM1_RTC_STS))
need_sci = 1;
if (need_sci)
sci_assert();
else
sci_deassert();
}
static int
pm1_status_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
if (bytes != 2)
return (-1);
pthread_mutex_lock(&pm_lock);
if (in)
*eax = pm1_status;
else {
/*
* Writes are only permitted to clear certain bits by
* writing 1 to those flags.
*/
pm1_status &= ~(*eax & (PM1_WAK_STS | PM1_RTC_STS |
PM1_SLPBTN_STS | PM1_PWRBTN_STS | PM1_BM_STS));
sci_update();
}
pthread_mutex_unlock(&pm_lock);
return (0);
}
static int
pm1_enable_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
if (bytes != 2)
return (-1);
pthread_mutex_lock(&pm_lock);
if (in)
*eax = pm1_enable;
else {
/*
* Only permit certain bits to be set. We never use
* the global lock, but ACPI-CA whines profusely if it
* can't set GBL_EN.
*/
pm1_enable = *eax & (PM1_PWRBTN_EN | PM1_GBL_EN);
sci_update();
}
pthread_mutex_unlock(&pm_lock);
return (0);
}
INOUT_PORT(pm1_status, PM1A_EVT_ADDR, IOPORT_F_INOUT, pm1_status_handler);
INOUT_PORT(pm1_enable, PM1A_EVT_ADDR2, IOPORT_F_INOUT, pm1_enable_handler);
void
push_power_button(void)
{
pthread_mutex_lock(&pm_lock);
if (!(pm1_status & PM1_PWRBTN_STS)) {
pm1_status |= PM1_PWRBTN_STS;
sci_update();
}
pthread_mutex_unlock(&pm_lock);
}
static void
power_button_handler(UNUSED int signal, UNUSED enum ev_type type,
UNUSED void *arg)
{
push_power_button();
}
/*
* Power Management 1 Control Register
*
* This is mostly unimplemented except that we wish to handle writes that
* set SPL_EN to handle S5 (soft power off).
*/
static uint16_t pm1_control;
#define PM1_SCI_EN 0x0001
#define PM1_SLP_TYP 0x1c00
#define PM1_SLP_EN 0x2000
#define PM1_ALWAYS_ZERO 0xc003
static int
pm1_control_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
int error;
if (bytes != 2)
return (-1);
if (in)
*eax = pm1_control;
else {
/*
* Various bits are write-only or reserved, so force them
* to zero in pm1_control. Always preserve SCI_EN as OSPM
* can never change it.
*/
pm1_control = (uint16_t) ((pm1_control & PM1_SCI_EN) |
(*eax & ~((unsigned) (PM1_SLP_EN | PM1_ALWAYS_ZERO))));
/*
* If SLP_EN is set, check for S5. Bhyve's _S5_ method
* says that '5' should be stored in SLP_TYP for S5.
*/
if (*eax & PM1_SLP_EN) {
if ((pm1_control & PM1_SLP_TYP) >> 10 == 5) {
error = xh_vm_suspend(VM_SUSPEND_POWEROFF);
assert(error == 0 || errno == EALREADY);
}
}
}
return (0);
}
INOUT_PORT(pm1_control, PM1A_CNT_ADDR, IOPORT_F_INOUT, pm1_control_handler);
SYSRES_IO(PM1A_EVT_ADDR, 8);
/*
* ACPI SMI Command Register
*
* This write-only register is used to enable and disable ACPI.
*/
static int
smi_cmd_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
assert(!in);
if (bytes != 1)
return (-1);
pthread_mutex_lock(&pm_lock);
switch (*eax) {
case BHYVE_ACPI_ENABLE:
pm1_control |= PM1_SCI_EN;
if (power_button == NULL) {
power_button = mevent_add(SIGTERM, EVF_SIGNAL,
power_button_handler, NULL);
old_power_handler = signal(SIGTERM, SIG_IGN);
}
break;
case BHYVE_ACPI_DISABLE:
pm1_control &= ~PM1_SCI_EN;
if (power_button != NULL) {
mevent_delete(power_button);
power_button = NULL;
signal(SIGTERM, old_power_handler);
}
break;
}
pthread_mutex_unlock(&pm_lock);
return (0);
}
INOUT_PORT(smi_cmd, SMI_CMD, IOPORT_F_OUT, smi_cmd_handler);
SYSRES_IO(SMI_CMD, 1);
void
sci_init(void)
{
/*
* Mark ACPI's SCI as level trigger and bump its use count
* in the PIRQ router.
*/
pci_irq_use(SCI_INT);
xh_vm_isa_set_irq_trigger(SCI_INT, LEVEL_TRIGGER);
}

50
vendor/github.com/docker/hyperkit/src/lib/post.c generated vendored Normal file
View File

@@ -0,0 +1,50 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <assert.h>
#include <xhyve/support/misc.h>
#include <xhyve/inout.h>
#include <xhyve/pci_lpc.h>
static int
post_data_handler(UNUSED int vcpu, int in, UNUSED int port, int bytes,
uint32_t *eax, UNUSED void *arg)
{
assert(in == 1);
if (bytes != 1)
return (-1);
*eax = 0xff; /* return some garbage */
return (0);
}
INOUT_PORT(post, 0x84, IOPORT_F_IN, post_data_handler);
SYSRES_IO(0x84, 1);

120
vendor/github.com/docker/hyperkit/src/lib/rtc.c generated vendored Normal file
View File

@@ -0,0 +1,120 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <time.h>
#include <assert.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/acpi.h>
#include <xhyve/pci_lpc.h>
#include <xhyve/rtc.h>
#define IO_RTC 0x70
#define RTC_LMEM_LSB 0x34
#define RTC_LMEM_MSB 0x35
#define RTC_HMEM_LSB 0x5b
#define RTC_HMEM_SB 0x5c
#define RTC_HMEM_MSB 0x5d
#define m_64KB (64*1024)
#define m_16MB (16*1024*1024)
/*
* Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
*/
static time_t
rtc_time(int use_localtime)
{
struct tm tm;
time_t t;
time(&t);
if (use_localtime) {
localtime_r(&t, &tm);
t = timegm(&tm);
}
return (t);
}
void
rtc_init(int use_localtime)
{
size_t himem;
size_t lomem;
int err;
/* XXX init diag/reset code/equipment/checksum ? */
/*
* Report guest memory size in nvram cells as required by UEFI.
* Little-endian encoding.
* 0x34/0x35 - 64KB chunks above 16MB, below 4GB
* 0x5b/0x5c/0x5d - 64KB chunks above 4GB
*/
lomem = (xh_vm_get_lowmem_size() - m_16MB) / m_64KB;
err = xh_vm_rtc_write(RTC_LMEM_LSB, ((uint8_t) lomem));
assert(err == 0);
err = xh_vm_rtc_write(RTC_LMEM_MSB, ((uint8_t) (lomem >> 8)));
assert(err == 0);
himem = xh_vm_get_highmem_size() / m_64KB;
err = xh_vm_rtc_write(RTC_HMEM_LSB, ((uint8_t) himem));
assert(err == 0);
err = xh_vm_rtc_write(RTC_HMEM_SB, ((uint8_t) (himem >> 8)));
assert(err == 0);
err = xh_vm_rtc_write(RTC_HMEM_MSB, ((uint8_t) (himem >> 16)));
assert(err == 0);
err = xh_vm_rtc_settime(rtc_time(use_localtime));
assert(err == 0);
}
static void
rtc_dsdt(void)
{
dsdt_line("");
dsdt_line("Device (RTC)");
dsdt_line("{");
dsdt_line(" Name (_HID, EisaId (\"PNP0B00\"))");
dsdt_line(" Name (_CRS, ResourceTemplate ()");
dsdt_line(" {");
dsdt_indent(2);
dsdt_fixed_ioport(IO_RTC, 2);
dsdt_fixed_irq(8);
dsdt_unindent(2);
dsdt_line(" })");
dsdt_line("}");
}
LPC_DSDT(rtc_dsdt);
/*
* Reserve the extended RTC I/O ports although they are not emulated at this
* time.
*/
SYSRES_IO(0x72, 6);

823
vendor/github.com/docker/hyperkit/src/lib/smbiostbl.c generated vendored Normal file
View File

@@ -0,0 +1,823 @@
/*-
* Copyright (c) 2014 Tycho Nightingale <tycho.nightingale@pluribusnetworks.com>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <sys/param.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/md5.h>
#include <xhyve/support/uuid.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/xhyve.h>
#include <xhyve/smbiostbl.h>
#define GB (1024ULL*1024*1024)
#define SMBIOS_BASE 0xF1000
/* BHYVE_ACPI_BASE - SMBIOS_BASE) */
#define SMBIOS_MAX_LENGTH (0xF2400 - 0xF1000)
#define SMBIOS_TYPE_BIOS 0
#define SMBIOS_TYPE_SYSTEM 1
#define SMBIOS_TYPE_CHASSIS 3
#define SMBIOS_TYPE_PROCESSOR 4
#define SMBIOS_TYPE_MEMARRAY 16
#define SMBIOS_TYPE_MEMDEVICE 17
#define SMBIOS_TYPE_MEMARRAYMAP 19
#define SMBIOS_TYPE_BOOT 32
#define SMBIOS_TYPE_EOT 127
struct smbios_structure {
uint8_t type;
uint8_t length;
uint16_t handle;
} __packed;
typedef int (*initializer_func_t)(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
struct smbios_template_entry {
struct smbios_structure *entry;
const char **strings;
initializer_func_t initializer;
};
/*
* SMBIOS Structure Table Entry Point
*/
#define SMBIOS_ENTRY_EANCHOR "_SM_"
#define SMBIOS_ENTRY_EANCHORLEN 4
#define SMBIOS_ENTRY_IANCHOR "_DMI_"
#define SMBIOS_ENTRY_IANCHORLEN 5
struct smbios_entry_point {
char eanchor[4]; /* anchor tag */
uint8_t echecksum; /* checksum of entry point structure */
uint8_t eplen; /* length in bytes of entry point */
uint8_t major; /* major version of the SMBIOS spec */
uint8_t minor; /* minor version of the SMBIOS spec */
uint16_t maxssize; /* maximum size in bytes of a struct */
uint8_t revision; /* entry point structure revision */
uint8_t format[5]; /* entry point rev-specific data */
char ianchor[5]; /* intermediate anchor tag */
uint8_t ichecksum; /* intermediate checksum */
uint16_t stlen; /* len in bytes of structure table */
uint32_t staddr; /* physical addr of structure table */
uint16_t stnum; /* number of structure table entries */
uint8_t bcdrev; /* BCD value representing DMI ver */
} __packed;
/*
* BIOS Information
*/
#define SMBIOS_FL_ISA 0x00000010 /* ISA is supported */
#define SMBIOS_FL_PCI 0x00000080 /* PCI is supported */
#define SMBIOS_FL_SHADOW 0x00001000 /* BIOS shadowing is allowed */
#define SMBIOS_FL_CDBOOT 0x00008000 /* Boot from CD is supported */
// #define SMBIOS_FL_SELBOOT 0x00010000 /* Selectable Boot supported */
#define SMBIOS_FL_EDD 0x00080000 /* EDD Spec is supported */
#define SMBIOS_XB1_FL_ACPI 0x00000001 /* ACPI is supported */
#define SMBIOS_XB2_FL_BBS 0x00000001 /* BIOS Boot Specification */
#define SMBIOS_XB2_FL_VM 0x00000010 /* Virtual Machine */
struct smbios_table_type0 {
struct smbios_structure header;
uint8_t vendor; /* vendor string */
uint8_t version; /* version string */
uint16_t segment; /* address segment location */
uint8_t rel_date; /* release date */
uint8_t size; /* rom size */
uint64_t cflags; /* characteristics */
uint8_t xc_bytes[2]; /* characteristics ext bytes */
uint8_t sb_major_rel; /* system bios version */
uint8_t sb_minor_rele;
uint8_t ecfw_major_rel; /* embedded ctrl fw version */
uint8_t ecfw_minor_rel;
} __packed;
/*
* System Information
*/
#define SMBIOS_WAKEUP_SWITCH 0x06 /* power switch */
struct smbios_table_type1 {
struct smbios_structure header;
uint8_t manufacturer; /* manufacturer string */
uint8_t product; /* product name string */
uint8_t version; /* version string */
uint8_t serial; /* serial number string */
uint8_t uuid[16]; /* uuid byte array */
uint8_t wakeup; /* wake-up event */
uint8_t sku; /* sku number string */
uint8_t family; /* family name string */
} __packed;
/*
* System Enclosure or Chassis
*/
#define SMBIOS_CHT_UNKNOWN 0x02 /* unknown */
#define SMBIOS_CHST_SAFE 0x03 /* safe */
#define SMBIOS_CHSC_NONE 0x03 /* none */
struct smbios_table_type3 {
struct smbios_structure header;
uint8_t manufacturer; /* manufacturer string */
uint8_t type; /* type */
uint8_t version; /* version string */
uint8_t serial; /* serial number string */
uint8_t asset; /* asset tag string */
uint8_t bustate; /* boot-up state */
uint8_t psstate; /* power supply state */
uint8_t tstate; /* thermal state */
uint8_t security; /* security status */
uint8_t uheight; /* height in 'u's */
uint8_t cords; /* number of power cords */
uint8_t elems; /* number of element records */
uint8_t elemlen; /* length of records */
uint8_t sku; /* sku number string */
} __packed;
/*
* Processor Information
*/
#define SMBIOS_PRT_CENTRAL 0x03 /* central processor */
#define SMBIOS_PRF_OTHER 0x01 /* other */
#define SMBIOS_PRS_PRESENT 0x40 /* socket is populated */
#define SMBIOS_PRS_ENABLED 0x1 /* enabled */
#define SMBIOS_PRU_NONE 0x06 /* none */
#define SMBIOS_PFL_64B 0x04 /* 64-bit capable */
struct smbios_table_type4 {
struct smbios_structure header;
uint8_t socket; /* socket designation string */
uint8_t type; /* processor type */
uint8_t family; /* processor family */
uint8_t manufacturer; /* manufacturer string */
uint64_t cpuid; /* processor cpuid */
uint8_t version; /* version string */
uint8_t voltage; /* voltage */
uint16_t clkspeed; /* ext clock speed in mhz */
uint16_t maxspeed; /* maximum speed in mhz */
uint16_t curspeed; /* current speed in mhz */
uint8_t status; /* status */
uint8_t upgrade; /* upgrade */
uint16_t l1handle; /* l1 cache handle */
uint16_t l2handle; /* l2 cache handle */
uint16_t l3handle; /* l3 cache handle */
uint8_t serial; /* serial number string */
uint8_t asset; /* asset tag string */
uint8_t part; /* part number string */
uint8_t cores; /* cores per socket */
uint8_t ecores; /* enabled cores */
uint8_t threads; /* threads per socket */
uint16_t cflags; /* processor characteristics */
uint16_t family2; /* processor family 2 */
} __packed;
/*
* Physical Memory Array
*/
#define SMBIOS_MAL_SYSMB 0x03 /* system board or motherboard */
#define SMBIOS_MAU_SYSTEM 0x03 /* system memory */
#define SMBIOS_MAE_NONE 0x03 /* none */
struct smbios_table_type16 {
struct smbios_structure header;
uint8_t location; /* physical device location */
uint8_t use; /* device functional purpose */
uint8_t ecc; /* err detect/correct method */
uint32_t size; /* max mem capacity in kb */
uint16_t errhand; /* handle of error (if any) */
uint16_t ndevs; /* num of slots or sockets */
uint64_t xsize; /* max mem capacity in bytes */
} __packed;
/*
* Memory Device
*/
#define SMBIOS_MDFF_UNKNOWN 0x02 /* unknown */
#define SMBIOS_MDT_UNKNOWN 0x02 /* unknown */
#define SMBIOS_MDF_UNKNOWN 0x0004 /* unknown */
struct smbios_table_type17 {
struct smbios_structure header;
uint16_t arrayhand; /* handle of physl mem array */
uint16_t errhand; /* handle of mem error data */
uint16_t twidth; /* total width in bits */
uint16_t dwidth; /* data width in bits */
uint16_t size; /* size in bytes */
uint8_t form; /* form factor */
uint8_t set; /* set */
uint8_t dloc; /* device locator string */
uint8_t bloc; /* phys bank locator string */
uint8_t type; /* memory type */
uint16_t flags; /* memory characteristics */
uint16_t maxspeed; /* maximum speed in mhz */
uint8_t manufacturer; /* manufacturer string */
uint8_t serial; /* serial number string */
uint8_t asset; /* asset tag string */
uint8_t part; /* part number string */
uint8_t attributes; /* attributes */
uint32_t xsize; /* extended size in mbs */
uint16_t curspeed; /* current speed in mhz */
uint16_t minvoltage; /* minimum voltage */
uint16_t maxvoltage; /* maximum voltage */
uint16_t curvoltage; /* configured voltage */
} __packed;
/*
* Memory Array Mapped Address
*/
struct smbios_table_type19 {
struct smbios_structure header;
uint32_t saddr; /* start phys addr in kb */
uint32_t eaddr; /* end phys addr in kb */
uint16_t arrayhand; /* physical mem array handle */
uint8_t width; /* num of dev in row */
uint64_t xsaddr; /* start phys addr in bytes */
uint64_t xeaddr; /* end phys addr in bytes */
} __packed;
/*
* System Boot Information
*/
#define SMBIOS_BOOT_NORMAL 0 /* no errors detected */
struct smbios_table_type32 {
struct smbios_structure header;
uint8_t reserved[6];
uint8_t status; /* boot status */
} __packed;
/*
* End-of-Table
*/
struct smbios_table_type127 {
struct smbios_structure header;
} __packed;
static struct smbios_table_type0 smbios_type0_template = {
{ SMBIOS_TYPE_BIOS, sizeof (struct smbios_table_type0), 0 },
1, /* bios vendor string */
2, /* bios version string */
0xF000, /* bios address segment location */
3, /* bios release date */
0x0, /* bios size (64k * (n + 1) is the size in bytes) */
SMBIOS_FL_ISA | SMBIOS_FL_PCI | SMBIOS_FL_SHADOW |
SMBIOS_FL_CDBOOT | SMBIOS_FL_EDD,
{ SMBIOS_XB1_FL_ACPI, SMBIOS_XB2_FL_BBS | SMBIOS_XB2_FL_VM },
0x0, /* bios major release */
0x0, /* bios minor release */
0xff, /* embedded controller firmware major release */
0xff /* embedded controller firmware minor release */
};
static const char *smbios_type0_strings[] = {
"BHYVE", /* vendor string */
"1.00", /* bios version string */
"03/14/2014", /* bios release date string */
NULL
};
static struct smbios_table_type1 smbios_type1_template = {
{ SMBIOS_TYPE_SYSTEM, sizeof (struct smbios_table_type1), 0 },
1, /* manufacturer string */
2, /* product string */
3, /* version string */
4, /* serial number string */
{ 0 },
SMBIOS_WAKEUP_SWITCH,
5, /* sku string */
6 /* family string */
};
static int smbios_type1_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static const char *smbios_type1_strings[] = {
" ", /* manufacturer string */
"BHYVE", /* product name string */
"1.0", /* version string */
"None", /* serial number string */
"None", /* sku string */
" ", /* family name string */
NULL
};
static struct smbios_table_type3 smbios_type3_template = {
{ SMBIOS_TYPE_CHASSIS, sizeof (struct smbios_table_type3), 0 },
1, /* manufacturer string */
SMBIOS_CHT_UNKNOWN,
2, /* version string */
3, /* serial number string */
4, /* asset tag string */
SMBIOS_CHST_SAFE,
SMBIOS_CHST_SAFE,
SMBIOS_CHST_SAFE,
SMBIOS_CHSC_NONE,
0, /* height in 'u's (0=enclosure height unspecified) */
0, /* number of power cords (0=number unspecified) */
0, /* number of contained element records */
0, /* length of records */
5 /* sku number string */
};
static const char *smbios_type3_strings[] = {
" ", /* manufacturer string */
"1.0", /* version string */
"None", /* serial number string */
"None", /* asset tag string */
"None", /* sku number string */
NULL
};
static struct smbios_table_type4 smbios_type4_template = {
{ SMBIOS_TYPE_PROCESSOR, sizeof (struct smbios_table_type4), 0 },
1, /* socket designation string */
SMBIOS_PRT_CENTRAL,
SMBIOS_PRF_OTHER,
2, /* manufacturer string */
0, /* cpuid */
3, /* version string */
0, /* voltage */
0, /* external clock frequency in mhz (0=unknown) */
0, /* maximum frequency in mhz (0=unknown) */
0, /* current frequency in mhz (0=unknown) */
SMBIOS_PRS_PRESENT | SMBIOS_PRS_ENABLED,
SMBIOS_PRU_NONE,
(uint16_t)-1, /* l1 cache handle */
(uint16_t)-1, /* l2 cache handle */
(uint16_t)-1, /* l3 cache handle */
4, /* serial number string */
5, /* asset tag string */
6, /* part number string */
0, /* cores per socket (0=unknown) */
0, /* enabled cores per socket (0=unknown) */
0, /* threads per socket (0=unknown) */
SMBIOS_PFL_64B,
SMBIOS_PRF_OTHER
};
static const char *smbios_type4_strings[] = {
" ", /* socket designation string */
" ", /* manufacturer string */
" ", /* version string */
"None", /* serial number string */
"None", /* asset tag string */
"None", /* part number string */
NULL
};
static int smbios_type4_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static struct smbios_table_type16 smbios_type16_template = {
{ SMBIOS_TYPE_MEMARRAY, sizeof (struct smbios_table_type16), 0 },
SMBIOS_MAL_SYSMB,
SMBIOS_MAU_SYSTEM,
SMBIOS_MAE_NONE,
0x80000000, /* max mem capacity in kb (0x80000000=use extended) */
(uint16_t)-1, /* handle of error (if any) */
0, /* number of slots or sockets (TBD) */
0 /* extended maximum memory capacity in bytes (TBD) */
};
static int smbios_type16_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static struct smbios_table_type17 smbios_type17_template = {
{ SMBIOS_TYPE_MEMDEVICE, sizeof (struct smbios_table_type17), 0 },
(uint16_t)-1, /* handle of physical memory array */
(uint16_t)-1, /* handle of memory error data */
64, /* total width in bits including ecc */
64, /* data width in bits */
0x7fff, /* size in bytes (0x7fff=use extended)*/
SMBIOS_MDFF_UNKNOWN,
0, /* set (0x00=none, 0xff=unknown) */
1, /* device locator string */
2, /* physical bank locator string */
SMBIOS_MDT_UNKNOWN,
SMBIOS_MDF_UNKNOWN,
0, /* maximum memory speed in mhz (0=unknown) */
3, /* manufacturer string */
4, /* serial number string */
5, /* asset tag string */
6, /* part number string */
0, /* attributes (0=unknown rank information) */
0, /* extended size in mb (TBD) */
0, /* current speed in mhz (0=unknown) */
0, /* minimum voltage in mv (0=unknown) */
0, /* maximum voltage in mv (0=unknown) */
0 /* configured voltage in mv (0=unknown) */
};
static const char *smbios_type17_strings[] = {
" ", /* device locator string */
" ", /* physical bank locator string */
" ", /* manufacturer string */
"None", /* serial number string */
"None", /* asset tag string */
"None", /* part number string */
NULL
};
static int smbios_type17_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static struct smbios_table_type19 smbios_type19_template = {
{ SMBIOS_TYPE_MEMARRAYMAP, sizeof (struct smbios_table_type19), 0 },
0xffffffff, /* starting phys addr in kb (0xffffffff=use ext) */
0xffffffff, /* ending phys addr in kb (0xffffffff=use ext) */
(uint16_t)-1, /* physical memory array handle */
1, /* number of devices that form a row */
0, /* extended starting phys addr in bytes (TDB) */
0 /* extended ending phys addr in bytes (TDB) */
};
static int smbios_type19_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static struct smbios_table_type32 smbios_type32_template = {
{ SMBIOS_TYPE_BOOT, sizeof (struct smbios_table_type32), 0 },
{ 0, 0, 0, 0, 0, 0 },
SMBIOS_BOOT_NORMAL
};
static struct smbios_table_type127 smbios_type127_template = {
{ SMBIOS_TYPE_EOT, sizeof (struct smbios_table_type127), 0 }
};
static int smbios_generic_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size);
static struct smbios_template_entry smbios_template[] = {
{ (struct smbios_structure *)&smbios_type0_template,
smbios_type0_strings,
smbios_generic_initializer },
{ (struct smbios_structure *)&smbios_type1_template,
smbios_type1_strings,
smbios_type1_initializer },
{ (struct smbios_structure *)&smbios_type3_template,
smbios_type3_strings,
smbios_generic_initializer },
{ (struct smbios_structure *)&smbios_type4_template,
smbios_type4_strings,
smbios_type4_initializer },
{ (struct smbios_structure *)&smbios_type16_template,
NULL,
smbios_type16_initializer },
{ (struct smbios_structure *)&smbios_type17_template,
smbios_type17_strings,
smbios_type17_initializer },
{ (struct smbios_structure *)&smbios_type19_template,
NULL,
smbios_type19_initializer },
{ (struct smbios_structure *)&smbios_type32_template,
NULL,
smbios_generic_initializer },
{ (struct smbios_structure *)&smbios_type127_template,
NULL,
smbios_generic_initializer },
{ NULL,NULL, NULL }
};
static uint64_t guest_lomem, guest_himem;
static uint16_t type16_handle;
static int
smbios_generic_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr, uint16_t *n,
UNUSED uint16_t *size)
{
struct smbios_structure *entry;
memcpy(curaddr, template_entry, template_entry->length);
entry = (struct smbios_structure *)curaddr;
entry->handle = *n + 1;
curaddr += entry->length;
if (template_strings != NULL) {
int i;
for (i = 0; template_strings[i] != NULL; i++) {
const char *string;
int len;
string = template_strings[i];
len = (int)(strlen(string) + 1);
memcpy(curaddr, string, len);
curaddr += len;
}
*curaddr = '\0';
curaddr++;
} else {
/* Minimum string section is double nul */
*curaddr = '\0';
curaddr++;
*curaddr = '\0';
curaddr++;
}
(*n)++;
*endaddr = curaddr;
return (0);
}
static int
smbios_type1_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size)
{
struct smbios_table_type1 *type1;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type1 = (struct smbios_table_type1 *)curaddr;
if (guest_uuid_str != NULL) {
uuid_t uuid;
uint32_t status;
uuid_from_string(guest_uuid_str, &uuid, &status);
if (status != uuid_s_ok)
return (-1);
uuid_enc_le(&type1->uuid, &uuid);
} else {
MD5_CTX mdctx;
u_char digest[16];
char hostname[MAXHOSTNAMELEN];
/*
* Universally unique and yet reproducible are an
* oxymoron, however reproducible is desirable in
* this case.
*/
if (gethostname(hostname, sizeof(hostname)))
return (-1);
MD5Init(&mdctx);
MD5Update(&mdctx, vmname, (unsigned)strlen(vmname));
MD5Update(&mdctx, hostname, (unsigned)sizeof(hostname));
MD5Final(digest, &mdctx);
/*
* Set the variant and version number.
*/
digest[6] &= 0x0F;
digest[6] |= 0x30; /* version 3 */
digest[8] &= 0x3F;
digest[8] |= 0x80;
memcpy(&type1->uuid, digest, sizeof (digest));
}
return (0);
}
static int
smbios_type4_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size)
{
int i;
for (i = 0; i < guest_ncpus; i++) {
struct smbios_table_type4 *type4;
char *p;
int nstrings, len;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type4 = (struct smbios_table_type4 *)curaddr;
p = curaddr + sizeof (struct smbios_table_type4);
nstrings = 0;
while (p < *endaddr - 1) {
if (*p++ == '\0')
nstrings++;
}
len = sprintf(*endaddr - 1, "CPU #%d", i) + 1;
*endaddr += len - 1;
*(*endaddr) = '\0';
(*endaddr)++;
type4->socket = (uint8_t)(nstrings + 1);
curaddr = *endaddr;
}
return (0);
}
static int
smbios_type16_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size)
{
struct smbios_table_type16 *type16;
type16_handle = *n;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type16 = (struct smbios_table_type16 *)curaddr;
type16->xsize = guest_lomem + guest_himem;
type16->ndevs = guest_himem > 0 ? 2 : 1;
return (0);
}
static int
smbios_type17_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size)
{
struct smbios_table_type17 *type17;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type17 = (struct smbios_table_type17 *)curaddr;
type17->arrayhand = type16_handle;
type17->xsize = (uint32_t)guest_lomem;
if (guest_himem > 0) {
curaddr = *endaddr;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type17 = (struct smbios_table_type17 *)curaddr;
type17->arrayhand = type16_handle;
type17->xsize = (uint32_t)guest_himem;
}
return (0);
}
static int
smbios_type19_initializer(struct smbios_structure *template_entry,
const char **template_strings, char *curaddr, char **endaddr,
uint16_t *n, uint16_t *size)
{
struct smbios_table_type19 *type19;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type19 = (struct smbios_table_type19 *)curaddr;
type19->arrayhand = type16_handle;
type19->xsaddr = 0;
type19->xeaddr = guest_lomem;
if (guest_himem > 0) {
curaddr = *endaddr;
smbios_generic_initializer(template_entry, template_strings,
curaddr, endaddr, n, size);
type19 = (struct smbios_table_type19 *)curaddr;
type19->arrayhand = type16_handle;
type19->xsaddr = 4*GB;
type19->xeaddr = guest_himem;
}
return (0);
}
static void
smbios_ep_initializer(struct smbios_entry_point *smbios_ep, uint32_t staddr)
{
memset(smbios_ep, 0, sizeof(*smbios_ep));
memcpy(smbios_ep->eanchor, SMBIOS_ENTRY_EANCHOR,
SMBIOS_ENTRY_EANCHORLEN);
smbios_ep->eplen = 0x1F;
assert(sizeof (struct smbios_entry_point) == smbios_ep->eplen);
smbios_ep->major = 2;
smbios_ep->minor = 6;
smbios_ep->revision = 0;
memcpy(smbios_ep->ianchor, SMBIOS_ENTRY_IANCHOR,
SMBIOS_ENTRY_IANCHORLEN);
smbios_ep->staddr = staddr;
smbios_ep->bcdrev = 0x24;
}
static void
smbios_ep_finalizer(struct smbios_entry_point *smbios_ep, uint16_t len,
uint16_t num, uint16_t maxssize)
{
uint8_t checksum;
int i;
smbios_ep->maxssize = maxssize;
smbios_ep->stlen = len;
smbios_ep->stnum = num;
checksum = 0;
for (i = 0x10; i < 0x1f; i++) {
checksum -= ((uint8_t *)smbios_ep)[i];
}
smbios_ep->ichecksum = checksum;
checksum = 0;
for (i = 0; i < 0x1f; i++) {
checksum -= ((uint8_t *)smbios_ep)[i];
}
smbios_ep->echecksum = checksum;
}
int
smbios_build(void)
{
struct smbios_entry_point *smbios_ep;
uint16_t n;
uint16_t maxssize;
char *curaddr, *startaddr, *ststartaddr;
int i;
int err;
guest_lomem = xh_vm_get_lowmem_size();
guest_himem = xh_vm_get_highmem_size();
startaddr = paddr_guest2host(SMBIOS_BASE, SMBIOS_MAX_LENGTH);
if (startaddr == NULL) {
fprintf(stderr, "smbios table requires mapped mem\n");
return (ENOMEM);
}
curaddr = startaddr;
smbios_ep = (struct smbios_entry_point *)curaddr;
smbios_ep_initializer(smbios_ep, SMBIOS_BASE +
sizeof(struct smbios_entry_point));
curaddr += sizeof(struct smbios_entry_point);
ststartaddr = curaddr;
n = 0;
maxssize = 0;
for (i = 0; smbios_template[i].entry != NULL; i++) {
struct smbios_structure *entry;
const char **strings;
initializer_func_t initializer;
char *endaddr;
uint16_t size;
entry = smbios_template[i].entry;
strings = smbios_template[i].strings;
initializer = smbios_template[i].initializer;
err = (*initializer)(entry, strings, curaddr, &endaddr,
&n, &size);
if (err != 0)
return (err);
if (size > maxssize)
maxssize = size;
curaddr = endaddr;
}
assert(curaddr - startaddr < SMBIOS_MAX_LENGTH);
smbios_ep_finalizer(smbios_ep, ((uint16_t) (curaddr - ststartaddr)), n,
maxssize);
return (0);
}

938
vendor/github.com/docker/hyperkit/src/lib/task_switch.c generated vendored Normal file
View File

@@ -0,0 +1,938 @@
/*-
* Copyright (c) 2014 Neel Natu <neel@freebsd.org>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <xhyve/support/psl.h>
#include <xhyve/support/segments.h>
#include <xhyve/support/specialreg.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/xhyve.h>
/*
* Using 'struct i386tss' is tempting but causes myriad sign extension
* issues because all of its fields are defined as signed integers.
*/
struct tss32 {
uint16_t tss_link;
uint16_t rsvd1;
uint32_t tss_esp0;
uint16_t tss_ss0;
uint16_t rsvd2;
uint32_t tss_esp1;
uint16_t tss_ss1;
uint16_t rsvd3;
uint32_t tss_esp2;
uint16_t tss_ss2;
uint16_t rsvd4;
uint32_t tss_cr3;
uint32_t tss_eip;
uint32_t tss_eflags;
uint32_t tss_eax;
uint32_t tss_ecx;
uint32_t tss_edx;
uint32_t tss_ebx;
uint32_t tss_esp;
uint32_t tss_ebp;
uint32_t tss_esi;
uint32_t tss_edi;
uint16_t tss_es;
uint16_t rsvd5;
uint16_t tss_cs;
uint16_t rsvd6;
uint16_t tss_ss;
uint16_t rsvd7;
uint16_t tss_ds;
uint16_t rsvd8;
uint16_t tss_fs;
uint16_t rsvd9;
uint16_t tss_gs;
uint16_t rsvd10;
uint16_t tss_ldt;
uint16_t rsvd11;
uint16_t tss_trap;
uint16_t tss_iomap;
};
CTASSERT(sizeof(struct tss32) == 104);
#define SEL_START(sel) (((sel) & ~0x7))
#define SEL_LIMIT(sel) (((sel) | 0x7))
#define TSS_BUSY(type) (((type) & 0x2) != 0)
static uint64_t
GETREG(int vcpu, int reg)
{
uint64_t val;
int error;
error = xh_vm_get_register(vcpu, reg, &val);
assert(error == 0);
return (val);
}
static void
SETREG(int vcpu, int reg, uint64_t val)
{
int error;
error = xh_vm_set_register(vcpu, reg, val);
assert(error == 0);
}
static struct seg_desc
usd_to_seg_desc(struct user_segment_descriptor *usd)
{
struct seg_desc seg_desc;
seg_desc.base = (u_int)USD_GETBASE(usd);
if (usd->sd_gran)
seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
else
seg_desc.limit = (u_int)USD_GETLIMIT(usd);
seg_desc.access = (uint32_t) (usd->sd_type | (usd->sd_dpl << 5) | (usd->sd_p << 7));
seg_desc.access |= (uint32_t) (usd->sd_xx << 12);
seg_desc.access |= (uint32_t) (usd->sd_def32 << 14);
seg_desc.access |= (uint32_t) (usd->sd_gran << 15);
return (seg_desc);
}
/*
* Inject an exception with an error code that is a segment selector.
* The format of the error code is described in section 6.13, "Error Code",
* Intel SDM volume 3.
*
* Bit 0 (EXT) denotes whether the exception occurred during delivery
* of an external event like an interrupt.
*
* Bit 1 (IDT) indicates whether the selector points to a gate descriptor
* in the IDT.
*
* Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
*/
static void
sel_exception(int vcpu, int vector, uint16_t sel, int ext)
{
/*
* Bit 2 from the selector is retained as-is in the error code.
*
* Bit 1 can be safely cleared because none of the selectors
* encountered during task switch emulation refer to a task
* gate in the IDT.
*
* Bit 0 is set depending on the value of 'ext'.
*/
sel &= ~0x3;
if (ext)
sel |= 0x1;
xh_vm_inject_fault(vcpu, vector, 1, sel);
}
/*
* Return 0 if the selector 'sel' in within the limits of the GDT/LDT
* and non-zero otherwise.
*/
static int
desc_table_limit_check(int vcpu, uint16_t sel)
{
uint64_t base;
uint32_t limit, access;
int error, reg;
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
error = xh_vm_get_desc(vcpu, reg, &base, &limit, &access);
assert(error == 0);
if (reg == VM_REG_GUEST_LDTR) {
if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
return (-1);
}
if (limit < SEL_LIMIT(sel))
return (-1);
else
return (0);
}
/*
* Read/write the segment descriptor 'desc' into the GDT/LDT slot referenced
* by the selector 'sel'.
*
* Returns 0 on success.
* Returns 1 if an exception was injected into the guest.
* Returns -1 otherwise.
*/
static int
desc_table_rw(int vcpu, struct vm_guest_paging *paging,
uint16_t sel, struct user_segment_descriptor *desc, bool doread,
int *faultptr)
{
struct iovec iov[2];
uint64_t base;
uint32_t limit, access;
int error, reg;
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
error = xh_vm_get_desc(vcpu, reg, &base, &limit, &access);
assert(error == 0);
assert(limit >= SEL_LIMIT(sel));
error = xh_vm_copy_setup(vcpu, paging, base + SEL_START(sel),
sizeof(*desc), doread ? XHYVE_PROT_READ : XHYVE_PROT_WRITE, iov, nitems(iov),
faultptr);
if (error || *faultptr)
return (error);
if (doread)
xh_vm_copyin(iov, desc, sizeof(*desc));
else
xh_vm_copyout(desc, iov, sizeof(*desc));
return (0);
}
static int
desc_table_read(int vcpu, struct vm_guest_paging *paging, uint16_t sel,
struct user_segment_descriptor *desc, int *faultptr)
{
return (desc_table_rw(vcpu, paging, sel, desc, true, faultptr));
}
static int
desc_table_write(int vcpu, struct vm_guest_paging *paging, uint16_t sel,
struct user_segment_descriptor *desc, int *faultptr)
{
return (desc_table_rw(vcpu, paging, sel, desc, false, faultptr));
}
/*
* Read the TSS descriptor referenced by 'sel' into 'desc'.
*
* Returns 0 on success.
* Returns 1 if an exception was injected into the guest.
* Returns -1 otherwise.
*/
static int
read_tss_descriptor(int vcpu, struct vm_task_switch *ts, uint16_t sel,
struct user_segment_descriptor *desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
int error;
assert(!ISLDT(sel));
assert(IDXSEL(sel) != 0);
/* Fetch the new TSS descriptor */
if (desc_table_limit_check(vcpu, sel)) {
if (ts->reason == TSR_IRET)
sel_exception(vcpu, IDT_TS, sel, ts->ext);
else
sel_exception(vcpu, IDT_GP, sel, ts->ext);
return (1);
}
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
error = desc_table_read(vcpu, &sup_paging, sel, desc, faultptr);
return (error);
}
static bool
code_desc(int sd_type)
{
/* code descriptor */
return ((sd_type & 0x18) == 0x18);
}
static bool
stack_desc(int sd_type)
{
/* writable data descriptor */
return ((sd_type & 0x1A) == 0x12);
}
static bool
data_desc(int sd_type)
{
/* data descriptor or a readable code descriptor */
return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
}
static bool
ldt_desc(int sd_type)
{
return (sd_type == SDT_SYSLDT);
}
CTASSERT(sizeof(struct user_segment_descriptor) == 8);
/*
* Validate the descriptor 'seg_desc' associated with 'segment'.
*/
static int
validate_seg_desc(int vcpu, struct vm_task_switch *ts, int segment,
struct seg_desc *seg_desc, int *faultptr)
{
struct vm_guest_paging sup_paging;
struct user_segment_descriptor usd;
int error, idtvec;
int cpl, dpl, rpl;
uint16_t sel, cs;
bool ldtseg, codeseg, stackseg, dataseg, conforming;
ldtseg = codeseg = stackseg = dataseg = false;
switch (segment) {
case VM_REG_GUEST_LDTR:
ldtseg = true;
break;
case VM_REG_GUEST_CS:
codeseg = true;
break;
case VM_REG_GUEST_SS:
stackseg = true;
break;
case VM_REG_GUEST_DS:
case VM_REG_GUEST_ES:
case VM_REG_GUEST_FS:
case VM_REG_GUEST_GS:
dataseg = true;
break;
default:
assert(0);
}
/* Get the segment selector */
sel = (uint16_t) GETREG(vcpu, segment);
/* LDT selector must point into the GDT */
if (ldtseg && ISLDT(sel)) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
/* Descriptor table limit check */
if (desc_table_limit_check(vcpu, sel)) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
/* NULL selector */
if (IDXSEL(sel) == 0) {
/* Code and stack segment selectors cannot be NULL */
if (codeseg || stackseg) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
seg_desc->base = 0;
seg_desc->limit = 0;
seg_desc->access = 0x10000; /* unusable */
return (0);
}
/* Read the descriptor from the GDT/LDT */
sup_paging = ts->paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
error = desc_table_read(vcpu, &sup_paging, sel, &usd, faultptr);
if (error || *faultptr)
return (error);
/* Verify that the descriptor type is compatible with the segment */
if ((ldtseg && !ldt_desc(usd.sd_type)) ||
(codeseg && !code_desc(usd.sd_type)) ||
(dataseg && !data_desc(usd.sd_type)) ||
(stackseg && !stack_desc(usd.sd_type))) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
/* Segment must be marked present */
if (!usd.sd_p) {
if (ldtseg)
idtvec = IDT_TS;
else if (stackseg)
idtvec = IDT_SS;
else
idtvec = IDT_NP;
sel_exception(vcpu, idtvec, sel, ts->ext);
return (1);
}
cs = (uint16_t) GETREG(vcpu, VM_REG_GUEST_CS);
cpl = cs & SEL_RPL_MASK;
rpl = sel & SEL_RPL_MASK;
dpl = usd.sd_dpl;
if (stackseg && (rpl != cpl || dpl != cpl)) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
if (codeseg) {
conforming = (usd.sd_type & 0x4) ? true : false;
if ((conforming && (cpl < dpl)) ||
(!conforming && (cpl != dpl))) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
}
if (dataseg) {
/*
* A data segment is always non-conforming except when it's
* descriptor is a readable, conforming code segment.
*/
if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
conforming = true;
else
conforming = false;
if (!conforming && (rpl > dpl || cpl > dpl)) {
sel_exception(vcpu, IDT_TS, sel, ts->ext);
return (1);
}
}
*seg_desc = usd_to_seg_desc(&usd);
return (0);
}
static void
tss32_save(int vcpu, struct vm_task_switch *task_switch,
uint32_t eip, struct tss32 *tss, struct iovec *iov)
{
/* General purpose registers */
tss->tss_eax = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RAX);
tss->tss_ecx = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RCX);
tss->tss_edx = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RDX);
tss->tss_ebx = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RBX);
tss->tss_esp = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RSP);
tss->tss_ebp = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RBP);
tss->tss_esi = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RSI);
tss->tss_edi = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RDI);
/* Segment selectors */
tss->tss_es = (uint16_t) GETREG(vcpu, VM_REG_GUEST_ES);
tss->tss_cs = (uint16_t) GETREG(vcpu, VM_REG_GUEST_CS);
tss->tss_ss = (uint16_t) GETREG(vcpu, VM_REG_GUEST_SS);
tss->tss_ds = (uint16_t) GETREG(vcpu, VM_REG_GUEST_DS);
tss->tss_fs = (uint16_t) GETREG(vcpu, VM_REG_GUEST_FS);
tss->tss_gs = (uint16_t) GETREG(vcpu, VM_REG_GUEST_GS);
/* eflags and eip */
tss->tss_eflags = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RFLAGS);
if (task_switch->reason == TSR_IRET)
tss->tss_eflags &= ~((unsigned) PSL_NT);
tss->tss_eip = eip;
/* Copy updated old TSS into guest memory */
xh_vm_copyout(tss, iov, sizeof(struct tss32));
}
static void
update_seg_desc(int vcpu, int reg, struct seg_desc *sd)
{
int error;
error = xh_vm_set_desc(vcpu, reg, sd->base, sd->limit, sd->access);
assert(error == 0);
}
/*
* Update the vcpu registers to reflect the state of the new task.
*/
static int
tss32_restore(int vcpu, struct vm_task_switch *ts, uint16_t ot_sel,
struct tss32 *tss, struct iovec *iov, int *faultptr)
{
struct seg_desc seg_desc, seg_desc2;
uint64_t *pdpte, maxphyaddr, reserved;
uint32_t eflags;
int error, i;
bool nested;
nested = false;
if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
tss->tss_link = ot_sel;
nested = true;
}
eflags = tss->tss_eflags;
if (nested)
eflags |= PSL_NT;
/* LDTR */
SETREG(vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
/* PBDR */
if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
if (ts->paging.paging_mode == PAGING_MODE_PAE) {
/*
* XXX Assuming 36-bit MAXPHYADDR.
*/
maxphyaddr = (1UL << 36) - 1;
pdpte = paddr_guest2host(tss->tss_cr3 & ~((unsigned) 0x1f), 32);
for (i = 0; i < 4; i++) {
/* Check reserved bits if the PDPTE is valid */
if (!(pdpte[i] & 0x1))
continue;
/*
* Bits 2:1, 8:5 and bits above the processor's
* maximum physical address are reserved.
*/
reserved = ~maxphyaddr | 0x1E6;
if (pdpte[i] & reserved) {
vm_inject_gp(vcpu);
return (1);
}
}
SETREG(vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
SETREG(vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
SETREG(vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
SETREG(vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
}
SETREG(vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
ts->paging.cr3 = tss->tss_cr3;
}
/* eflags and eip */
SETREG(vcpu, VM_REG_GUEST_RFLAGS, eflags);
SETREG(vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
/* General purpose registers */
SETREG(vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
SETREG(vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
SETREG(vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
SETREG(vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
SETREG(vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
SETREG(vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
SETREG(vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
SETREG(vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
/* Segment selectors */
SETREG(vcpu, VM_REG_GUEST_ES, tss->tss_es);
SETREG(vcpu, VM_REG_GUEST_CS, tss->tss_cs);
SETREG(vcpu, VM_REG_GUEST_SS, tss->tss_ss);
SETREG(vcpu, VM_REG_GUEST_DS, tss->tss_ds);
SETREG(vcpu, VM_REG_GUEST_FS, tss->tss_fs);
SETREG(vcpu, VM_REG_GUEST_GS, tss->tss_gs);
/*
* If this is a nested task then write out the new TSS to update
* the previous link field.
*/
if (nested)
xh_vm_copyout(tss, iov, sizeof(*tss));
/* Validate segment descriptors */
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_LDTR, &seg_desc);
/*
* Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
*
* The SS and CS attribute checks on VM-entry are inter-dependent so
* we need to make sure that both segments are valid before updating
* either of them. This ensures that the VMCS state can pass the
* VM-entry checks so the guest can handle any exception injected
* during task switch emulation.
*/
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_CS, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_SS, &seg_desc2,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_CS, &seg_desc);
update_seg_desc(vcpu, VM_REG_GUEST_SS, &seg_desc2);
ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_DS, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_DS, &seg_desc);
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_ES, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_ES, &seg_desc);
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_FS, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_FS, &seg_desc);
error = validate_seg_desc(vcpu, ts, VM_REG_GUEST_GS, &seg_desc,
faultptr);
if (error || *faultptr)
return (error);
update_seg_desc(vcpu, VM_REG_GUEST_GS, &seg_desc);
return (0);
}
/*
* Push an error code on the stack of the new task. This is needed if the
* task switch was triggered by a hardware exception that causes an error
* code to be saved (e.g. #PF).
*/
static int
push_errcode(int vcpu, struct vm_guest_paging *paging, int task_type,
uint32_t errcode, int *faultptr)
{
struct iovec iov[2];
struct seg_desc seg_desc;
int stacksize, bytes, error;
uint64_t gla, cr0, rflags;
uint32_t esp;
uint16_t stacksel;
*faultptr = 0;
cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
rflags = GETREG(vcpu, VM_REG_GUEST_RFLAGS);
stacksel = (uint16_t) GETREG(vcpu, VM_REG_GUEST_SS);
error = xh_vm_get_desc(vcpu, VM_REG_GUEST_SS, &seg_desc.base,
&seg_desc.limit, &seg_desc.access);
assert(error == 0);
/*
* Section "Error Code" in the Intel SDM vol 3: the error code is
* pushed on the stack as a doubleword or word (depending on the
* default interrupt, trap or task gate size).
*/
if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
bytes = 4;
else
bytes = 2;
/*
* PUSH instruction from Intel SDM vol 2: the 'B' flag in the
* stack-segment descriptor determines the size of the stack
* pointer outside of 64-bit mode.
*/
if (SEG_DESC_DEF32(seg_desc.access))
stacksize = 4;
else
stacksize = 2;
esp = (uint32_t) GETREG(vcpu, VM_REG_GUEST_RSP);
esp -= (uint32_t) bytes;
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp,
bytes, stacksize, XHYVE_PROT_WRITE, &gla))
{
sel_exception(vcpu, IDT_SS, stacksel, 1);
*faultptr = 1;
return (0);
}
if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
vm_inject_ac(vcpu, 1);
*faultptr = 1;
return (0);
}
error = xh_vm_copy_setup(vcpu, paging, gla, ((size_t) bytes),
XHYVE_PROT_WRITE, iov, nitems(iov), faultptr);
if (error || *faultptr)
return (error);
xh_vm_copyout(&errcode, iov, ((size_t) bytes));
SETREG(vcpu, VM_REG_GUEST_RSP, esp);
return (0);
}
/*
* Evaluate return value from helper functions and potentially return to
* the VM run loop.
*/
#define CHKERR(error,fault) \
do { \
assert((error == 0) || (error == EFAULT)); \
if (error) \
return (VMEXIT_ABORT); \
else if (fault) \
return (VMEXIT_CONTINUE); \
} while (0)
int vmexit_task_switch(struct vm_exit *vmexit, int *pvcpu);
int
vmexit_task_switch(struct vm_exit *vmexit, int *pvcpu)
{
struct seg_desc nt;
struct tss32 oldtss, newtss;
struct vm_task_switch *task_switch;
struct vm_guest_paging *paging, sup_paging;
struct user_segment_descriptor nt_desc, ot_desc;
struct iovec nt_iov[2], ot_iov[2];
uint64_t cr0, ot_base;
uint32_t eip, ot_lim, access;
int error, ext, fault, minlimit, nt_type, ot_type, vcpu;
enum task_switch_reason reason;
uint16_t nt_sel, ot_sel;
task_switch = &vmexit->u.task_switch;
nt_sel = task_switch->tsssel;
ext = vmexit->u.task_switch.ext;
reason = vmexit->u.task_switch.reason;
paging = &vmexit->u.task_switch.paging;
vcpu = *pvcpu;
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
/*
* Calculate the instruction pointer to store in the old TSS.
*/
eip = (uint32_t) (vmexit->rip + ((uint64_t) vmexit->inst_length));
/*
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
* The following page table accesses are implicitly supervisor mode:
* - accesses to GDT or LDT to load segment descriptors
* - accesses to the task state segment during task switch
*/
sup_paging = *paging;
sup_paging.cpl = 0; /* implicit supervisor mode */
/* Fetch the new TSS descriptor */
error = read_tss_descriptor(vcpu, task_switch, nt_sel, &nt_desc,
&fault);
CHKERR(error, fault);
nt = usd_to_seg_desc(&nt_desc);
/* Verify the type of the new TSS */
nt_type = SEG_DESC_TYPE(nt.access);
if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
sel_exception(vcpu, IDT_TS, nt_sel, ext);
goto done;
}
/* TSS descriptor must have present bit set */
if (!SEG_DESC_PRESENT(nt.access)) {
sel_exception(vcpu, IDT_NP, nt_sel, ext);
goto done;
}
/*
* TSS must have a minimum length of 104 bytes for a 32-bit TSS and
* 44 bytes for a 16-bit TSS.
*/
if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
minlimit = 104 - 1;
else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
minlimit = 44 - 1;
else
minlimit = 0;
assert(minlimit > 0);
if (nt.limit < ((uint32_t) minlimit)) {
sel_exception(vcpu, IDT_TS, nt_sel, ext);
goto done;
}
/* TSS must be busy if task switch is due to IRET */
if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
sel_exception(vcpu, IDT_TS, nt_sel, ext);
goto done;
}
/*
* TSS must be available (not busy) if task switch reason is
* CALL, JMP, exception or interrupt.
*/
if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
sel_exception(vcpu, IDT_GP, nt_sel, ext);
goto done;
}
/* Fetch the new TSS */
error = xh_vm_copy_setup(vcpu, &sup_paging, nt.base,
((size_t) (minlimit + 1)), (XHYVE_PROT_READ | XHYVE_PROT_WRITE), nt_iov,
nitems(nt_iov), &fault);
CHKERR(error, fault);
xh_vm_copyin(nt_iov, &newtss, ((size_t) (minlimit + 1)));
/* Get the old TSS selector from the guest's task register */
ot_sel = (uint16_t) GETREG(vcpu, VM_REG_GUEST_TR);
if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
/*
* This might happen if a task switch was attempted without
* ever loading the task register with LTR. In this case the
* TR would contain the values from power-on:
* (sel = 0, base = 0, limit = 0xffff).
*/
sel_exception(vcpu, IDT_TS, ot_sel, task_switch->ext);
goto done;
}
/* Get the old TSS base and limit from the guest's task register */
error = xh_vm_get_desc(vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
&access);
assert(error == 0);
assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
ot_type = SEG_DESC_TYPE(access);
assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
/* Fetch the old TSS descriptor */
error = read_tss_descriptor(vcpu, task_switch, ot_sel, &ot_desc,
&fault);
CHKERR(error, fault);
/* Get the old TSS */
error = xh_vm_copy_setup(vcpu, &sup_paging, ot_base,
((size_t) (minlimit + 1)), (XHYVE_PROT_READ | XHYVE_PROT_WRITE),
ot_iov, nitems(ot_iov), &fault);
CHKERR(error, fault);
xh_vm_copyin(ot_iov, &oldtss, ((size_t) (minlimit + 1)));
/*
* Clear the busy bit in the old TSS descriptor if the task switch
* due to an IRET or JMP instruction.
*/
if (reason == TSR_IRET || reason == TSR_JMP) {
ot_desc.sd_type &= ~0x2;
error = desc_table_write(vcpu, &sup_paging, ot_sel,
&ot_desc, &fault);
CHKERR(error, fault);
}
if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
return (VMEXIT_ABORT);
}
/* Save processor state in old TSS */
tss32_save(vcpu, task_switch, eip, &oldtss, ot_iov);
/*
* If the task switch was triggered for any reason other than IRET
* then set the busy bit in the new TSS descriptor.
*/
if (reason != TSR_IRET) {
nt_desc.sd_type |= 0x2;
error = desc_table_write(vcpu, &sup_paging, nt_sel,
&nt_desc, &fault);
CHKERR(error, fault);
}
/* Update task register to point at the new TSS */
SETREG(vcpu, VM_REG_GUEST_TR, nt_sel);
/* Update the hidden descriptor state of the task register */
nt = usd_to_seg_desc(&nt_desc);
update_seg_desc(vcpu, VM_REG_GUEST_TR, &nt);
/* Set CR0.TS */
cr0 = GETREG(vcpu, VM_REG_GUEST_CR0);
SETREG(vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
/*
* We are now committed to the task switch. Any exceptions encountered
* after this point will be handled in the context of the new task and
* the saved instruction pointer will belong to the new task.
*/
error = xh_vm_set_register(vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
assert(error == 0);
/* Load processor state from new TSS */
error = tss32_restore(vcpu, task_switch, ot_sel, &newtss, nt_iov,
&fault);
CHKERR(error, fault);
/*
* Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
* caused an error code to be generated, this error code is copied
* to the stack of the new task.
*/
if (task_switch->errcode_valid) {
assert(task_switch->ext);
assert(task_switch->reason == TSR_IDT_GATE);
error = push_errcode(vcpu, &task_switch->paging, nt_type,
task_switch->errcode, &fault);
CHKERR(error, fault);
}
/*
* Treatment of virtual-NMI blocking if NMI is delivered through
* a task gate.
*
* Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
* If the virtual NMIs VM-execution control is 1, VM entry injects
* an NMI, and delivery of the NMI causes a task switch that causes
* a VM exit, virtual-NMI blocking is in effect before the VM exit
* commences.
*
* Thus, virtual-NMI blocking is in effect at the time of the task
* switch VM exit.
*/
/*
* Treatment of virtual-NMI unblocking on IRET from NMI handler task.
*
* Section "Changes to Instruction Behavior in VMX Non-Root Operation"
* If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
* This unblocking of virtual-NMI occurs even if IRET causes a fault.
*
* Thus, virtual-NMI blocking is cleared at the time of the task switch
* VM exit.
*/
/*
* If the task switch was triggered by an event delivered through
* the IDT then extinguish the pending event from the vcpu's
* exitintinfo.
*/
if (task_switch->reason == TSR_IDT_GATE) {
error = xh_vm_set_intinfo(vcpu, 0);
assert(error == 0);
}
/*
* XXX should inject debug exception if 'T' bit is 1
*/
done:
return (VMEXIT_CONTINUE);
}

797
vendor/github.com/docker/hyperkit/src/lib/uart_emul.c generated vendored Normal file
View File

@@ -0,0 +1,797 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* Copyright (c) 2013 Neel Natu <neel@freebsd.org>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <stddef.h>
#include <strings.h>
#include <unistd.h>
#include <fcntl.h>
#include <pthread.h>
#include <termios.h>
#include <assert.h>
#include <errno.h>
#include <sys/mman.h>
#include <xhyve/support/ns16550.h>
#include <xhyve/mevent.h>
#include <xhyve/uart_emul.h>
#define COM1_BASE 0x3F8
#define COM1_IRQ 4
#define COM2_BASE 0x2F8
#define COM2_IRQ 3
#define DEFAULT_RCLK 1843200
#define DEFAULT_BAUD 9600
#define FCR_RX_MASK 0xC0
#define MCR_OUT1 0x04
#define MCR_OUT2 0x08
#define MSR_DELTA_MASK 0x0f
#ifndef REG_SCR
#define REG_SCR com_scr
#endif
#define FIFOSZ 16
static bool uart_stdio; /* stdio in use for i/o */
static struct termios tio_stdio_orig;
static struct {
int baseaddr;
int irq;
bool inuse;
} uart_lres[] = {
{ COM1_BASE, COM1_IRQ, false},
{ COM2_BASE, COM2_IRQ, false},
};
#define UART_NLDEVS (sizeof(uart_lres) / sizeof(uart_lres[0]))
struct fifo {
uint8_t buf[FIFOSZ];
int rindex; /* index to read from */
int windex; /* index to write to */
int num; /* number of characters in the fifo */
int size; /* size of the fifo */
};
struct ttyfd {
bool opened;
int fd; /* tty device file descriptor */
int sfd;
char *name; /* slave pty name when using autopty*/
struct termios tio_orig, tio_new; /* I/O Terminals */
};
struct log {
unsigned char *ring; /* array used as a ring */
size_t next; /* offset of the next free byte */
size_t length; /* total length of the ring */
};
struct uart_softc {
pthread_mutex_t mtx; /* protects all softc elements */
uint8_t data; /* Data register (R/W) */
uint8_t ier; /* Interrupt enable register (R/W) */
uint8_t lcr; /* Line control register (R/W) */
uint8_t mcr; /* Modem control register (R/W) */
uint8_t lsr; /* Line status register (R/W) */
uint8_t msr; /* Modem status register (R/W) */
uint8_t fcr; /* FIFO control register (W) */
uint8_t scr; /* Scratch register (R/W) */
uint8_t dll; /* Baudrate divisor latch LSB */
uint8_t dlh; /* Baudrate divisor latch MSB */
struct fifo rxfifo;
struct mevent *mev;
struct ttyfd tty;
struct log log;
bool thre_int_pending; /* THRE interrupt pending */
void *arg;
uart_intr_func_t intr_assert;
uart_intr_func_t intr_deassert;
};
static void uart_drain(int fd, enum ev_type ev, void *arg);
static void
ttyclose(void)
{
tcsetattr(STDIN_FILENO, TCSANOW, &tio_stdio_orig);
}
static void
ttyopen(struct ttyfd *tf)
{
tcgetattr(tf->fd, &tf->tio_orig);
tf->tio_new = tf->tio_orig;
cfmakeraw(&tf->tio_new);
tf->tio_new.c_cflag |= CLOCAL;
tcsetattr(tf->fd, TCSANOW, &tf->tio_new);
if (tf->fd == STDIN_FILENO) {
tio_stdio_orig = tf->tio_orig;
atexit(ttyclose);
}
}
static int
ttyread(struct ttyfd *tf)
{
unsigned char rb;
ssize_t n = read(tf->fd, &rb, 1);
if (n == 1)
return (rb);
if (n == 0 && tf->name) {
/* We will get end of file in a loop until a slave is opened,
so open a slave ourselves here. */
if (tf->sfd != -1) close(tf->sfd);
fprintf(stdout, "Reopening slave pty\n");
tf->sfd = open(tf->name, O_RDONLY | O_NONBLOCK);
}
return (-1);
}
static void
ttywrite(struct ttyfd *tf, unsigned char wb)
{
(void)write(tf->fd, &wb, 1);
}
static void
ringwrite(struct log *log, unsigned char wb)
{
*(log->ring + log->next) = wb;
log->next = (log->next + 1) % log->length;
}
static void
rxfifo_reset(struct uart_softc *sc, int size)
{
char flushbuf[32];
struct fifo *fifo;
ssize_t nread;
int error;
fifo = &sc->rxfifo;
bzero(fifo, sizeof(struct fifo));
fifo->size = size;
if (sc->tty.opened) {
/*
* Flush any unread input from the tty buffer.
*/
while (1) {
nread = read(sc->tty.fd, flushbuf, sizeof(flushbuf));
if (nread != sizeof(flushbuf))
break;
}
/*
* Enable mevent to trigger when new characters are available
* on the tty fd.
*/
error = mevent_enable(sc->mev);
assert(error == 0);
}
}
static int
rxfifo_available(struct uart_softc *sc)
{
struct fifo *fifo;
fifo = &sc->rxfifo;
return (fifo->num < fifo->size);
}
static int
rxfifo_putchar(struct uart_softc *sc, uint8_t ch)
{
struct fifo *fifo;
int error;
fifo = &sc->rxfifo;
if (fifo->num < fifo->size) {
fifo->buf[fifo->windex] = ch;
fifo->windex = (fifo->windex + 1) % fifo->size;
fifo->num++;
if (!rxfifo_available(sc)) {
if (sc->tty.opened) {
/*
* Disable mevent callback if the FIFO is full.
*/
error = mevent_disable(sc->mev);
assert(error == 0);
}
}
return (0);
} else
return (-1);
}
static int
rxfifo_getchar(struct uart_softc *sc)
{
struct fifo *fifo;
int c, error, wasfull;
wasfull = 0;
fifo = &sc->rxfifo;
if (fifo->num > 0) {
if (!rxfifo_available(sc))
wasfull = 1;
c = fifo->buf[fifo->rindex];
fifo->rindex = (fifo->rindex + 1) % fifo->size;
fifo->num--;
if (wasfull) {
if (sc->tty.opened) {
error = mevent_enable(sc->mev);
assert(error == 0);
}
}
return (c);
} else
return (-1);
}
static int
rxfifo_numchars(struct uart_softc *sc)
{
struct fifo *fifo = &sc->rxfifo;
return (fifo->num);
}
static void
uart_opentty(struct uart_softc *sc)
{
ttyopen(&sc->tty);
sc->mev = mevent_add(sc->tty.fd, EVF_READ, uart_drain, sc);
assert(sc->mev != NULL);
}
static int
uart_mapring(struct uart_softc *sc, const char *path)
{
int retval = -1, fd = -1;
sc->log.length = 65536;
if ((fd = open(path, O_CREAT | O_TRUNC | O_RDWR, 0644)) == -1) {
perror("open console-ring");
goto out;
}
if (ftruncate(fd, (off_t)sc->log.length) == -1){
perror("ftruncate console-ring");
goto out;
}
if ((sc->log.ring = (unsigned char*)mmap(NULL, sc->log.length, PROT_WRITE, MAP_SHARED, fd, 0)) == MAP_FAILED) {
perror("mmap console-ring");
goto out;
}
sc->log.next = 0;
retval = 0;
out:
if (fd != -1) close(fd);
return retval;
}
/*
* The IIR returns a prioritized interrupt reason:
* - receive data available
* - transmit holding register empty
* - modem status change
*
* Return an interrupt reason if one is available.
*/
static int
uart_intr_reason(struct uart_softc *sc)
{
if ((sc->lsr & LSR_OE) != 0 && (sc->ier & IER_ERLS) != 0)
return (IIR_RLS);
else if (rxfifo_numchars(sc) > 0 && (sc->ier & IER_ERXRDY) != 0)
return (IIR_RXTOUT);
else if (sc->thre_int_pending && (sc->ier & IER_ETXRDY) != 0)
return (IIR_TXRDY);
else if ((sc->msr & MSR_DELTA_MASK) != 0 && (sc->ier & IER_EMSC) != 0)
return (IIR_MLSC);
else
return (IIR_NOPEND);
}
static void
uart_reset(struct uart_softc *sc)
{
uint16_t divisor;
divisor = DEFAULT_RCLK / DEFAULT_BAUD / 16;
sc->dll = (uint8_t) divisor;
sc->dlh = (uint8_t) (divisor >> 16);
rxfifo_reset(sc, 1); /* no fifo until enabled by software */
}
/*
* Toggle the COM port's intr pin depending on whether or not we have an
* interrupt condition to report to the processor.
*/
static void
uart_toggle_intr(struct uart_softc *sc)
{
uint8_t intr_reason;
intr_reason = (uint8_t) uart_intr_reason(sc);
if (intr_reason == IIR_NOPEND)
(*sc->intr_deassert)(sc->arg);
else
(*sc->intr_assert)(sc->arg);
}
static void
uart_drain(int fd, enum ev_type ev, void *arg)
{
struct uart_softc *sc;
int ch;
sc = arg;
assert(fd == sc->tty.fd);
assert(ev == EVF_READ);
/*
* This routine is called in the context of the mevent thread
* to take out the softc lock to protect against concurrent
* access from a vCPU i/o exit
*/
pthread_mutex_lock(&sc->mtx);
if ((sc->mcr & MCR_LOOPBACK) != 0) {
(void) ttyread(&sc->tty);
} else {
while (rxfifo_available(sc) &&
((ch = ttyread(&sc->tty)) != -1)) {
rxfifo_putchar(sc, ((uint8_t) ch));
}
uart_toggle_intr(sc);
}
pthread_mutex_unlock(&sc->mtx);
}
void
uart_write(struct uart_softc *sc, int offset, uint8_t value)
{
int fifosz;
uint8_t msr;
pthread_mutex_lock(&sc->mtx);
/*
* Take care of the special case DLAB accesses first
*/
if ((sc->lcr & LCR_DLAB) != 0) {
if (offset == REG_DLL) {
sc->dll = value;
goto done;
}
if (offset == REG_DLH) {
sc->dlh = value;
goto done;
}
}
switch (offset) {
case REG_DATA:
if (sc->mcr & MCR_LOOPBACK) {
if (rxfifo_putchar(sc, value) != 0)
sc->lsr |= LSR_OE;
} else if (sc->tty.opened) {
ttywrite(&sc->tty, value);
if (sc->log.ring)
ringwrite(&sc->log, value);
} /* else drop on floor */
sc->thre_int_pending = true;
break;
case REG_IER:
/*
* Apply mask so that bits 4-7 are 0
* Also enables bits 0-3 only if they're 1
*/
sc->ier = value & 0x0F;
break;
case REG_FCR:
/*
* When moving from FIFO and 16450 mode and vice versa,
* the FIFO contents are reset.
*/
if ((sc->fcr & FCR_ENABLE) ^ (value & FCR_ENABLE)) {
fifosz = (value & FCR_ENABLE) ? FIFOSZ : 1;
rxfifo_reset(sc, fifosz);
}
/*
* The FCR_ENABLE bit must be '1' for the programming
* of other FCR bits to be effective.
*/
if ((value & FCR_ENABLE) == 0) {
sc->fcr = 0;
} else {
if ((value & FCR_RCV_RST) != 0)
rxfifo_reset(sc, FIFOSZ);
sc->fcr = value &
(FCR_ENABLE | FCR_DMA | FCR_RX_MASK);
}
break;
case REG_LCR:
sc->lcr = value;
break;
case REG_MCR:
/* Apply mask so that bits 5-7 are 0 */
sc->mcr = value & 0x1F;
msr = 0;
if (sc->mcr & MCR_LOOPBACK) {
/*
* In the loopback mode certain bits from the
* MCR are reflected back into MSR
*/
if (sc->mcr & MCR_RTS)
msr |= MSR_CTS;
if (sc->mcr & MCR_DTR)
msr |= MSR_DSR;
if (sc->mcr & MCR_OUT1)
msr |= MSR_RI;
if (sc->mcr & MCR_OUT2)
msr |= MSR_DCD;
}
/*
* Detect if there has been any change between the
* previous and the new value of MSR. If there is
* then assert the appropriate MSR delta bit.
*/
if ((msr & MSR_CTS) ^ (sc->msr & MSR_CTS))
sc->msr |= MSR_DCTS;
if ((msr & MSR_DSR) ^ (sc->msr & MSR_DSR))
sc->msr |= MSR_DDSR;
if ((msr & MSR_DCD) ^ (sc->msr & MSR_DCD))
sc->msr |= MSR_DDCD;
if ((sc->msr & MSR_RI) != 0 && (msr & MSR_RI) == 0)
sc->msr |= MSR_TERI;
/*
* Update the value of MSR while retaining the delta
* bits.
*/
sc->msr &= MSR_DELTA_MASK;
sc->msr |= msr;
break;
case REG_LSR:
/*
* Line status register is not meant to be written to
* during normal operation.
*/
break;
case REG_MSR:
/*
* As far as I can tell MSR is a read-only register.
*/
break;
case REG_SCR:
sc->scr = value;
break;
default:
break;
}
done:
uart_toggle_intr(sc);
pthread_mutex_unlock(&sc->mtx);
}
uint8_t
uart_read(struct uart_softc *sc, int offset)
{
uint8_t iir, intr_reason, reg;
pthread_mutex_lock(&sc->mtx);
/*
* Take care of the special case DLAB accesses first
*/
if ((sc->lcr & LCR_DLAB) != 0) {
if (offset == REG_DLL) {
reg = sc->dll;
goto done;
}
if (offset == REG_DLH) {
reg = sc->dlh;
goto done;
}
}
switch (offset) {
case REG_DATA:
reg = (uint8_t) rxfifo_getchar(sc);
break;
case REG_IER:
reg = sc->ier;
break;
case REG_IIR:
iir = (sc->fcr & FCR_ENABLE) ? IIR_FIFO_MASK : 0;
intr_reason = (uint8_t) uart_intr_reason(sc);
/*
* Deal with side effects of reading the IIR register
*/
if (intr_reason == IIR_TXRDY)
sc->thre_int_pending = false;
iir |= intr_reason;
reg = iir;
break;
case REG_LCR:
reg = sc->lcr;
break;
case REG_MCR:
reg = sc->mcr;
break;
case REG_LSR:
/* Transmitter is always ready for more data */
sc->lsr |= LSR_TEMT | LSR_THRE;
/* Check for new receive data */
if (rxfifo_numchars(sc) > 0)
sc->lsr |= LSR_RXRDY;
else
sc->lsr &= ~LSR_RXRDY;
reg = sc->lsr;
/* The LSR_OE bit is cleared on LSR read */
sc->lsr &= ~LSR_OE;
break;
case REG_MSR:
/*
* MSR delta bits are cleared on read
*/
reg = sc->msr;
sc->msr &= ~MSR_DELTA_MASK;
break;
case REG_SCR:
reg = sc->scr;
break;
default:
reg = 0xFF;
break;
}
done:
uart_toggle_intr(sc);
pthread_mutex_unlock(&sc->mtx);
return (reg);
}
int
uart_legacy_alloc(int which, int *baseaddr, int *irq)
{
if ((which < 0) || (((unsigned) which) >= UART_NLDEVS) ||
uart_lres[which].inuse)
{
return (-1);
}
uart_lres[which].inuse = true;
*baseaddr = uart_lres[which].baseaddr;
*irq = uart_lres[which].irq;
return (0);
}
struct uart_softc *
uart_init(uart_intr_func_t intr_assert, uart_intr_func_t intr_deassert,
void *arg)
{
struct uart_softc *sc;
sc = calloc(1, sizeof(struct uart_softc));
sc->arg = arg;
sc->intr_assert = intr_assert;
sc->intr_deassert = intr_deassert;
pthread_mutex_init(&sc->mtx, NULL);
uart_reset(sc);
return (sc);
}
static int
uart_tty_backend(struct uart_softc *sc, const char *backend)
{
int fd;
int retval;
retval = -1;
fd = open(backend, O_RDWR | O_NONBLOCK);
if (fd > 0 && isatty(fd)) {
sc->tty.fd = fd;
sc->tty.opened = true;
retval = 0;
}
return (retval);
}
static char *
copy_up_to_comma(const char *from)
{
char *comma = strchr(from, ',');
char *tmp = NULL;
if (comma == NULL) {
tmp = strdup(from); /* rest of string */
} else {
ptrdiff_t length = comma - from;
tmp = strndup(from, (size_t)length);
}
return tmp;
}
int
uart_set_backend(struct uart_softc *sc, const char *backend, const char *devname)
{
int retval;
char *linkname = NULL;
char *logname = NULL;
int ptyfd;
char *ptyname;
retval = -1;
if (backend == NULL)
return (0);
sc->tty.fd = -1;
sc->tty.sfd = -1;
sc->tty.name = NULL;
while (1) {
char *next;
if (!backend)
break;
next = strchr(backend, ',');
if (next)
next[0] = '\0';
if (strcmp("stdio", backend) == 0 && !uart_stdio) {
sc->tty.fd = STDIN_FILENO;
sc->tty.opened = true;
uart_stdio = true;
retval = fcntl(sc->tty.fd, F_SETFL, O_NONBLOCK);
} else if (strcmp("autopty", backend) == 0 ||
strncmp("autopty=", backend, 8) == 0) {
linkname = NULL;
if (strncmp("autopty=", backend, 8) == 0)
linkname = copy_up_to_comma(backend + 8);
fprintf(stdout, "linkname %s\n", linkname);
if ((ptyfd = open("/dev/ptmx", O_RDWR | O_NONBLOCK)) == -1) {
fprintf(stderr, "error opening /dev/ptmx char device");
goto err;
}
if ((ptyname = ptsname(ptyfd)) == NULL) {
perror("ptsname: error getting name for slave pseudo terminal");
goto err;
}
if ((retval = grantpt(ptyfd)) == -1) {
perror("error setting up ownership and permissions on slave pseudo terminal");
goto err;
}
if ((retval = unlockpt(ptyfd)) == -1) {
perror("error unlocking slave pseudo terminal, to allow its usage");
goto err;
}
fprintf(stdout, "%s connected to %s\n", devname, ptyname);
if (linkname) {
if ((unlink(linkname) == -1) && (errno != ENOENT)) {
perror("unlinking autopty symlink");
goto err;
}
if (symlink(ptyname, linkname) == -1){
perror("creating autopty symlink");
goto err;
}
fprintf(stdout, "%s linked to %s\n", devname, linkname);
}
sc->tty.fd = ptyfd;
sc->tty.name = ptyname;
sc->tty.opened = true;
retval = 0;
} else if (strncmp("log=", backend, 4) == 0) {
logname = copy_up_to_comma(backend + 4);
if (uart_mapring(sc, logname) == -1) {
goto err;
}
} else if (uart_tty_backend(sc, backend) == 0) {
retval = 0;
} else {
goto err;
}
if (!next)
break;
backend = &next[1];
}
if (retval == 0)
uart_opentty(sc);
goto out;
err:
if (sc->tty.fd != -1) close(sc->tty.fd);
out:
if (linkname) free(linkname);
if (logname) free(logname);
return (retval);
}

768
vendor/github.com/docker/hyperkit/src/lib/virtio.c generated vendored Normal file
View File

@@ -0,0 +1,768 @@
/*-
* Copyright (c) 2013 Chris Torek <torek @ torek net>
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <stdio.h>
#include <stdint.h>
#include <pthread.h>
#include <sys/param.h>
#include <sys/uio.h>
#include <xhyve/support/misc.h>
#include <xhyve/xhyve.h>
#include <xhyve/pci_emul.h>
#include <xhyve/virtio.h>
/*
* Functions for dealing with generalized "virtual devices" as
* defined by <https://www.google.com/#output=search&q=virtio+spec>
*/
/*
* In case we decide to relax the "virtio softc comes at the
* front of virtio-based device softc" constraint, let's use
* this to convert.
*/
#define DEV_SOFTC(vs) ((void *)(vs))
/*
* Link a virtio_softc to its constants, the device softc, and
* the PCI emulation.
*/
void
vi_softc_linkup(struct virtio_softc *vs, struct virtio_consts *vc,
void *dev_softc, struct pci_devinst *pi,
struct vqueue_info *queues)
{
int i;
/* vs and dev_softc addresses must match */
assert((void *)vs == dev_softc);
vs->vs_vc = vc;
vs->vs_pi = pi;
pi->pi_arg = vs;
vs->vs_queues = queues;
for (i = 0; i < vc->vc_nvq; i++) {
queues[i].vq_vs = vs;
queues[i].vq_num = (uint16_t) i;
}
}
/*
* Reset device (device-wide). This erases all queues, i.e.,
* all the queues become invalid (though we don't wipe out the
* internal pointers, we just clear the VQ_ALLOC flag).
*
* It resets negotiated features to "none".
*
* If MSI-X is enabled, this also resets all the vectors to NO_VECTOR.
*/
void
vi_reset_dev(struct virtio_softc *vs)
{
struct vqueue_info *vq;
int i, nvq;
nvq = vs->vs_vc->vc_nvq;
for (vq = vs->vs_queues, i = 0; i < nvq; vq++, i++) {
vq->vq_flags = 0;
vq->vq_last_avail = 0;
vq->vq_save_used = 0;
vq->vq_pfn = 0;
vq->vq_msix_idx = VIRTIO_MSI_NO_VECTOR;
}
vs->vs_negotiated_caps = 0;
vs->vs_curq = 0;
/* vs->vs_status = 0; -- redundant */
if (vs->vs_isr)
pci_lintr_deassert(vs->vs_pi);
vs->vs_isr = 0;
vs->vs_msix_cfg_idx = VIRTIO_MSI_NO_VECTOR;
}
/*
* Set I/O BAR (usually 0) to map PCI config registers.
*/
void
vi_set_io_bar(struct virtio_softc *vs, int barnum)
{
size_t size;
/*
* ??? should we use CFG0 if MSI-X is disabled?
* Existing code did not...
*/
size = VTCFG_R_CFG1 + vs->vs_vc->vc_cfgsize;
pci_emul_alloc_bar(vs->vs_pi, barnum, PCIBAR_IO, size);
}
/*
* Initialize MSI-X vector capabilities if we're to use MSI-X,
* or MSI capabilities if not.
*
* We assume we want one MSI-X vector per queue, here, plus one
* for the config vec.
*/
int
vi_intr_init(struct virtio_softc *vs, int barnum, int use_msix)
{
int nvec;
if (use_msix) {
vs->vs_flags |= VIRTIO_USE_MSIX;
VS_LOCK(vs);
vi_reset_dev(vs); /* set all vectors to NO_VECTOR */
VS_UNLOCK(vs);
nvec = vs->vs_vc->vc_nvq + 1;
if (pci_emul_add_msixcap(vs->vs_pi, nvec, barnum))
return (1);
} else
vs->vs_flags &= ~VIRTIO_USE_MSIX;
/* Only 1 MSI vector for bhyve */
pci_emul_add_msicap(vs->vs_pi, 1);
/* Legacy interrupts are mandatory for virtio devices */
pci_lintr_request(vs->vs_pi);
return (0);
}
/*
* Initialize the currently-selected virtio queue (vs->vs_curq).
* The guest just gave us a page frame number, from which we can
* calculate the addresses of the queue.
*/
static void
vi_vq_init(struct virtio_softc *vs, uint32_t pfn)
{
struct vqueue_info *vq;
uint64_t phys;
size_t size;
char *base;
vq = &vs->vs_queues[vs->vs_curq];
vq->vq_pfn = pfn;
phys = (uint64_t)pfn << VRING_PFN;
size = vring_size(vq->vq_qsize);
base = paddr_guest2host(phys, size);
/* First page(s) are descriptors... */
vq->vq_desc = (struct virtio_desc *)base;
base += vq->vq_qsize * sizeof(struct virtio_desc);
/* ... immediately followed by "avail" ring (entirely uint16_t's) */
vq->vq_avail = (struct vring_avail *)base;
base += (2 + vq->vq_qsize + 1) * sizeof(uint16_t);
/* Then it's rounded up to the next page... */
base = (char *) roundup2(((uintptr_t) base), ((uintptr_t) VRING_ALIGN));
/* ... and the last page(s) are the used ring. */
vq->vq_used = (struct vring_used *)base;
/* Mark queue as allocated, and start at 0 when we use it. */
vq->vq_flags = VQ_ALLOC;
vq->vq_last_avail = 0;
vq->vq_save_used = 0;
}
/*
* Helper inline for vq_getchain(): record the i'th "real"
* descriptor.
*/
static inline void
_vq_record(int i, volatile struct virtio_desc *vd, struct iovec *iov, int n_iov,
uint16_t *flags)
{
if (i >= n_iov)
return;
iov[i].iov_base = paddr_guest2host(vd->vd_addr, vd->vd_len);
iov[i].iov_len = vd->vd_len;
if (flags != NULL)
flags[i] = vd->vd_flags;
}
#define VQ_MAX_DESCRIPTORS 512 /* see below */
/*
* Examine the chain of descriptors starting at the "next one" to
* make sure that they describe a sensible request. If so, return
* the number of "real" descriptors that would be needed/used in
* acting on this request. This may be smaller than the number of
* available descriptors, e.g., if there are two available but
* they are two separate requests, this just returns 1. Or, it
* may be larger: if there are indirect descriptors involved,
* there may only be one descriptor available but it may be an
* indirect pointing to eight more. We return 8 in this case,
* i.e., we do not count the indirect descriptors, only the "real"
* ones.
*
* Basically, this vets the vd_flags and vd_next field of each
* descriptor and tells you how many are involved. Since some may
* be indirect, this also needs the vmctx (in the pci_devinst
* at vs->vs_pi) so that it can find indirect descriptors.
*
* As we process each descriptor, we copy and adjust it (guest to
* host address wise, also using the vmtctx) into the given iov[]
* array (of the given size). If the array overflows, we stop
* placing values into the array but keep processing descriptors,
* up to VQ_MAX_DESCRIPTORS, before giving up and returning -1.
* So you, the caller, must not assume that iov[] is as big as the
* return value (you can process the same thing twice to allocate
* a larger iov array if needed, or supply a zero length to find
* out how much space is needed).
*
* If you want to verify the WRITE flag on each descriptor, pass a
* non-NULL "flags" pointer to an array of "uint16_t" of the same size
* as n_iov and we'll copy each vd_flags field after unwinding any
* indirects.
*
* If some descriptor(s) are invalid, this prints a diagnostic message
* and returns -1. If no descriptors are ready now it simply returns 0.
*
* You are assumed to have done a vq_ring_ready() if needed (note
* that vq_has_descs() does one).
*/
int
vq_getchain(struct vqueue_info *vq, uint16_t *pidx, struct iovec *iov,
int n_iov, uint16_t *flags)
{
int i;
u_int ndesc, n_indir;
u_int idx, next;
volatile struct virtio_desc *vdir, *vindir, *vp;
struct virtio_softc *vs;
const char *name;
vs = vq->vq_vs;
name = vs->vs_vc->vc_name;
/*
* Note: it's the responsibility of the guest not to
* update vq->vq_avail->va_idx until all of the descriptors
* the guest has written are valid (including all their
* vd_next fields and vd_flags).
*
* Compute (last_avail - va_idx) in integers mod 2**16. This is
* the number of descriptors the device has made available
* since the last time we updated vq->vq_last_avail.
*
* We just need to do the subtraction as an unsigned int,
* then trim off excess bits.
*/
idx = vq->vq_last_avail;
ndesc = (uint16_t)((u_int)vq->vq_avail->va_idx - idx);
if (ndesc == 0)
return (0);
if (ndesc > vq->vq_qsize) {
/* XXX need better way to diagnose issues */
fprintf(stderr,
"%s: ndesc (%u) out of range, driver confused?\r\n",
name, (u_int)ndesc);
return (-1);
}
/*
* Now count/parse "involved" descriptors starting from
* the head of the chain.
*
* To prevent loops, we could be more complicated and
* check whether we're re-visiting a previously visited
* index, but we just abort if the count gets excessive.
*/
*pidx = next = vq->vq_avail->va_ring[idx & (vq->vq_qsize - 1)];
vq->vq_last_avail++;
for (i = 0; i < VQ_MAX_DESCRIPTORS; next = vdir->vd_next) {
if (next >= vq->vq_qsize) {
fprintf(stderr,
"%s: descriptor index %u out of range, "
"driver confused?\r\n",
name, next);
return (-1);
}
vdir = &vq->vq_desc[next];
if ((vdir->vd_flags & VRING_DESC_F_INDIRECT) == 0) {
_vq_record(i, vdir, iov, n_iov, flags);
i++;
} else if ((vs->vs_vc->vc_hv_caps &
VIRTIO_RING_F_INDIRECT_DESC) == 0) {
fprintf(stderr,
"%s: descriptor has forbidden INDIRECT flag, "
"driver confused?\r\n",
name);
return (-1);
} else {
n_indir = vdir->vd_len / 16;
if ((vdir->vd_len & 0xf) || n_indir == 0) {
fprintf(stderr,
"%s: invalid indir len 0x%x, "
"driver confused?\r\n",
name, (u_int)vdir->vd_len);
return (-1);
}
vindir = paddr_guest2host(vdir->vd_addr, vdir->vd_len);
/*
* Indirects start at the 0th, then follow
* their own embedded "next"s until those run
* out. Each one's indirect flag must be off
* (we don't really have to check, could just
* ignore errors...).
*/
next = 0;
for (;;) {
vp = &vindir[next];
if (vp->vd_flags & VRING_DESC_F_INDIRECT) {
fprintf(stderr,
"%s: indirect desc has INDIR flag,"
" driver confused?\r\n",
name);
return (-1);
}
_vq_record(i, vp, iov, n_iov, flags);
if (++i > VQ_MAX_DESCRIPTORS)
goto loopy;
if ((vp->vd_flags & VRING_DESC_F_NEXT) == 0)
break;
next = vp->vd_next;
if (next >= n_indir) {
fprintf(stderr,
"%s: invalid next %u > %u, "
"driver confused?\r\n",
name, (u_int)next, n_indir);
return (-1);
}
}
}
if ((vdir->vd_flags & VRING_DESC_F_NEXT) == 0)
return (i);
}
loopy:
fprintf(stderr,
"%s: descriptor loop? count > %d - driver confused?\r\n",
name, i);
return (-1);
}
/*
* Return the currently-first request chain back to the available queue.
*
* (This chain is the one you handled when you called vq_getchain()
* and used its positive return value.)
*/
void
vq_retchain(struct vqueue_info *vq)
{
vq->vq_last_avail--;
}
/*
* Return specified request chain to the guest, setting its I/O length
* to the provided value.
*
* (This chain is the one you handled when you called vq_getchain()
* and used its positive return value.)
*/
void
vq_relchain(struct vqueue_info *vq, uint16_t idx, uint32_t iolen)
{
uint16_t uidx, mask;
volatile struct vring_used *vuh;
volatile struct virtio_used *vue;
/*
* Notes:
* - mask is N-1 where N is a power of 2 so computes x % N
* - vuh points to the "used" data shared with guest
* - vue points to the "used" ring entry we want to update
* - head is the same value we compute in vq_iovecs().
*
* (I apologize for the two fields named vu_idx; the
* virtio spec calls the one that vue points to, "id"...)
*/
mask = vq->vq_qsize - 1;
vuh = vq->vq_used;
uidx = vuh->vu_idx;
vue = &vuh->vu_ring[uidx++ & mask];
vue->vu_idx = idx;
vue->vu_tlen = iolen;
vuh->vu_idx = uidx;
}
/*
* Driver has finished processing "available" chains and calling
* vq_relchain on each one. If driver used all the available
* chains, used_all should be set.
*
* If the "used" index moved we may need to inform the guest, i.e.,
* deliver an interrupt. Even if the used index did NOT move we
* may need to deliver an interrupt, if the avail ring is empty and
* we are supposed to interrupt on empty.
*
* Note that used_all_avail is provided by the caller because it's
* a snapshot of the ring state when he decided to finish interrupt
* processing -- it's possible that descriptors became available after
* that point. (It's also typically a constant 1/True as well.)
*/
void
vq_endchains(struct vqueue_info *vq, int used_all_avail)
{
struct virtio_softc *vs;
uint16_t event_idx, new_idx, old_idx;
int intr;
/*
* Interrupt generation: if we're using EVENT_IDX,
* interrupt if we've crossed the event threshold.
* Otherwise interrupt is generated if we added "used" entries,
* but suppressed by VRING_AVAIL_F_NO_INTERRUPT.
*
* In any case, though, if NOTIFY_ON_EMPTY is set and the
* entire avail was processed, we need to interrupt always.
*/
vs = vq->vq_vs;
old_idx = vq->vq_save_used;
vq->vq_save_used = new_idx = vq->vq_used->vu_idx;
if (used_all_avail &&
(vs->vs_negotiated_caps & VIRTIO_F_NOTIFY_ON_EMPTY))
intr = 1;
else if (vs->vs_negotiated_caps & VIRTIO_RING_F_EVENT_IDX) {
event_idx = VQ_USED_EVENT_IDX(vq);
/*
* This calculation is per docs and the kernel
* (see src/sys/dev/virtio/virtio_ring.h).
*/
intr = (uint16_t)(new_idx - event_idx - 1) <
(uint16_t)(new_idx - old_idx);
} else {
intr = new_idx != old_idx &&
!(vq->vq_avail->va_flags & VRING_AVAIL_F_NO_INTERRUPT);
}
if (intr)
vq_interrupt(vs, vq);
}
/* Note: these are in sorted order to make for a fast search */
static struct config_reg {
uint16_t cr_offset; /* register offset */
uint8_t cr_size; /* size (bytes) */
uint8_t cr_ro; /* true => reg is read only */
const char *cr_name; /* name of reg */
} config_regs[] = {
{ VTCFG_R_HOSTCAP, 4, 1, "HOSTCAP" },
{ VTCFG_R_GUESTCAP, 4, 0, "GUESTCAP" },
{ VTCFG_R_PFN, 4, 0, "PFN" },
{ VTCFG_R_QNUM, 2, 1, "QNUM" },
{ VTCFG_R_QSEL, 2, 0, "QSEL" },
{ VTCFG_R_QNOTIFY, 2, 0, "QNOTIFY" },
{ VTCFG_R_STATUS, 1, 0, "STATUS" },
{ VTCFG_R_ISR, 1, 0, "ISR" },
{ VTCFG_R_CFGVEC, 2, 0, "CFGVEC" },
{ VTCFG_R_QVEC, 2, 0, "QVEC" },
};
static inline struct config_reg *
vi_find_cr(int offset) {
u_int hi, lo, mid;
struct config_reg *cr;
lo = 0;
hi = sizeof(config_regs) / sizeof(*config_regs) - 1;
while (hi >= lo) {
mid = (hi + lo) >> 1;
cr = &config_regs[mid];
if (cr->cr_offset == offset)
return (cr);
if (cr->cr_offset < offset)
lo = mid + 1;
else
hi = mid - 1;
}
return (NULL);
}
/*
* Handle pci config space reads.
* If it's to the MSI-X info, do that.
* If it's part of the virtio standard stuff, do that.
* Otherwise dispatch to the actual driver.
*/
uint64_t
vi_pci_read(UNUSED int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size)
{
struct virtio_softc *vs = pi->pi_arg;
struct virtio_consts *vc;
struct config_reg *cr;
uint64_t virtio_config_size, max;
const char *name;
uint32_t newoff;
uint32_t value;
int error;
if (vs->vs_flags & VIRTIO_USE_MSIX) {
if (baridx == pci_msix_table_bar(pi) ||
baridx == pci_msix_pba_bar(pi)) {
return (pci_emul_msix_tread(pi, offset, size));
}
}
/* XXX probably should do something better than just assert() */
assert(baridx == 0);
if (vs->vs_mtx)
pthread_mutex_lock(vs->vs_mtx);
vc = vs->vs_vc;
name = vc->vc_name;
value = size == 1 ? 0xff : size == 2 ? 0xffff : 0xffffffff;
if (size != 1 && size != 2 && size != 4)
goto bad;
if (pci_msix_enabled(pi))
virtio_config_size = VTCFG_R_CFG1;
else
virtio_config_size = VTCFG_R_CFG0;
if (offset >= virtio_config_size) {
/*
* Subtract off the standard size (including MSI-X
* registers if enabled) and dispatch to underlying driver.
* If that fails, fall into general code.
*/
newoff = (uint32_t) (offset - virtio_config_size);
max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
if ((newoff + ((unsigned) size)) > max)
goto bad;
error = (*vc->vc_cfgread)(DEV_SOFTC(vs), ((int) newoff), size, &value);
if (!error)
goto done;
}
bad:
cr = vi_find_cr((int) offset);
if (cr == NULL || cr->cr_size != size) {
if (cr != NULL) {
/* offset must be OK, so size must be bad */
fprintf(stderr,
"%s: read from %s: bad size %d\r\n",
name, cr->cr_name, size);
} else {
fprintf(stderr,
"%s: read from bad offset/size %jd/%d\r\n",
name, (uintmax_t)offset, size);
}
goto done;
}
switch (offset) {
case VTCFG_R_HOSTCAP:
value = (uint32_t) vc->vc_hv_caps;
break;
case VTCFG_R_GUESTCAP:
value = vs->vs_negotiated_caps;
break;
case VTCFG_R_PFN:
if (vs->vs_curq < vc->vc_nvq)
value = vs->vs_queues[vs->vs_curq].vq_pfn;
break;
case VTCFG_R_QNUM:
value = vs->vs_curq < vc->vc_nvq ?
vs->vs_queues[vs->vs_curq].vq_qsize : 0;
break;
case VTCFG_R_QSEL:
value = (uint32_t) (vs->vs_curq);
break;
case VTCFG_R_QNOTIFY:
value = 0; /* XXX */
break;
case VTCFG_R_STATUS:
value = vs->vs_status;
break;
case VTCFG_R_ISR:
value = vs->vs_isr;
vs->vs_isr = 0; /* a read clears this flag */
if (value)
pci_lintr_deassert(pi);
break;
case VTCFG_R_CFGVEC:
value = vs->vs_msix_cfg_idx;
break;
case VTCFG_R_QVEC:
value = vs->vs_curq < vc->vc_nvq ?
vs->vs_queues[vs->vs_curq].vq_msix_idx :
VIRTIO_MSI_NO_VECTOR;
break;
}
done:
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
return (value);
}
/*
* Handle pci config space writes.
* If it's to the MSI-X info, do that.
* If it's part of the virtio standard stuff, do that.
* Otherwise dispatch to the actual driver.
*/
void
vi_pci_write(UNUSED int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
{
struct virtio_softc *vs = pi->pi_arg;
struct vqueue_info *vq;
struct virtio_consts *vc;
struct config_reg *cr;
uint64_t virtio_config_size, max;
const char *name;
uint32_t newoff;
int error;
if (vs->vs_flags & VIRTIO_USE_MSIX) {
if (baridx == pci_msix_table_bar(pi) ||
baridx == pci_msix_pba_bar(pi)) {
pci_emul_msix_twrite(pi, offset, size, value);
return;
}
}
/* XXX probably should do something better than just assert() */
assert(baridx == 0);
if (vs->vs_mtx)
pthread_mutex_lock(vs->vs_mtx);
vc = vs->vs_vc;
name = vc->vc_name;
if (size != 1 && size != 2 && size != 4)
goto bad;
if (pci_msix_enabled(pi))
virtio_config_size = VTCFG_R_CFG1;
else
virtio_config_size = VTCFG_R_CFG0;
if (offset >= virtio_config_size) {
/*
* Subtract off the standard size (including MSI-X
* registers if enabled) and dispatch to underlying driver.
*/
newoff = (uint32_t) (offset - virtio_config_size);
max = vc->vc_cfgsize ? vc->vc_cfgsize : 0x100000000;
if ((newoff + ((unsigned) size)) > max)
goto bad;
error = (*vc->vc_cfgwrite)(DEV_SOFTC(vs), ((int) newoff), size,
((uint32_t) value));
if (!error)
goto done;
}
bad:
cr = vi_find_cr((int) offset);
if (cr == NULL || cr->cr_size != size || cr->cr_ro) {
if (cr != NULL) {
/* offset must be OK, wrong size and/or reg is R/O */
if (cr->cr_size != size)
fprintf(stderr,
"%s: write to %s: bad size %d\r\n",
name, cr->cr_name, size);
if (cr->cr_ro)
fprintf(stderr,
"%s: write to read-only reg %s\r\n",
name, cr->cr_name);
} else {
fprintf(stderr,
"%s: write to bad offset/size %jd/%d\r\n",
name, (uintmax_t)offset, size);
}
goto done;
}
switch (offset) {
case VTCFG_R_GUESTCAP:
vs->vs_negotiated_caps = (uint32_t) (value & vc->vc_hv_caps);
if (vc->vc_apply_features)
(*vc->vc_apply_features)(DEV_SOFTC(vs),
vs->vs_negotiated_caps);
break;
case VTCFG_R_PFN:
if (vs->vs_curq >= vc->vc_nvq)
goto bad_qindex;
vi_vq_init(vs, ((uint32_t) value));
break;
case VTCFG_R_QSEL:
/*
* Note that the guest is allowed to select an
* invalid queue; we just need to return a QNUM
* of 0 while the bad queue is selected.
*/
vs->vs_curq = (int) value;
break;
case VTCFG_R_QNOTIFY:
if (value >= ((uint64_t) vc->vc_nvq)) {
fprintf(stderr, "%s: queue %d notify out of range\r\n",
name, (int)value);
goto done;
}
vq = &vs->vs_queues[value];
if (vq->vq_notify)
(*vq->vq_notify)(DEV_SOFTC(vs), vq);
else if (vc->vc_qnotify)
(*vc->vc_qnotify)(DEV_SOFTC(vs), vq);
else
fprintf(stderr,
"%s: qnotify queue %d: missing vq/vc notify\r\n",
name, (int)value);
break;
case VTCFG_R_STATUS:
vs->vs_status = (uint8_t) value;
if (value == 0)
(*vc->vc_reset)(DEV_SOFTC(vs));
break;
case VTCFG_R_CFGVEC:
vs->vs_msix_cfg_idx = (uint16_t) value;
break;
case VTCFG_R_QVEC:
if (vs->vs_curq >= vc->vc_nvq)
goto bad_qindex;
vq = &vs->vs_queues[vs->vs_curq];
vq->vq_msix_idx = (uint16_t) value;
break;
}
goto done;
bad_qindex:
fprintf(stderr,
"%s: write config reg %s: curq %d >= max %d\r\n",
name, cr->cr_name, vs->vs_curq, vc->vc_nvq);
done:
if (vs->vs_mtx)
pthread_mutex_unlock(vs->vs_mtx);
}

104
vendor/github.com/docker/hyperkit/src/lib/xmsr.c generated vendored Normal file
View File

@@ -0,0 +1,104 @@
/*-
* Copyright (c) 2011 NetApp, Inc.
* Copyright (c) 2015 xhyve developers
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <xhyve/support/misc.h>
#include <xhyve/support/specialreg.h>
#include <xhyve/vmm/vmm_api.h>
#include <xhyve/xhyve.h>
#include <xhyve/xmsr.h>
int
emulate_wrmsr(UNUSED int vcpu, uint32_t num, UNUSED uint64_t val)
{
switch (num) {
case 0xd04: /* Sandy Bridge uncore PMCs */
case 0xc24:
return (0);
case MSR_BIOS_UPDT_TRIG:
return (0);
case MSR_BIOS_SIGN:
return (0);
default:
break;
}
return (-1);
}
int
emulate_rdmsr(UNUSED int vcpu, uint32_t num, uint64_t *val)
{
int error = 0;
switch (num) {
case MSR_BIOS_SIGN:
case MSR_IA32_PLATFORM_ID:
case MSR_PKG_ENERGY_STATUS:
case MSR_PP0_ENERGY_STATUS:
case MSR_PP1_ENERGY_STATUS:
case MSR_DRAM_ENERGY_STATUS:
*val = 0;
break;
case MSR_RAPL_POWER_UNIT:
/*
* Use the default value documented in section
* "RAPL Interfaces" in Intel SDM vol3.
*/
*val = 0x000a1003;
break;
default:
error = -1;
break;
}
return (error);
}
int
init_msr(void)
{
u_int regs[4];
u_int cpu_vendor[4];
do_cpuid(0, regs);
cpu_vendor[0] = regs[1];
cpu_vendor[1] = regs[3];
cpu_vendor[2] = regs[2];
cpu_vendor[3] = 0;
if (strcmp(((char *) cpu_vendor), "GenuineIntel") == 0) {
return 0;
} else {
fprintf(stderr, "Unknown cpu vendor \"%s\"\n", ((char *) cpu_vendor));
return (-1);
}
}