diff --git a/cli/config/configuration.toml.in b/cli/config/configuration.toml.in index 7ed5141248..be3086ac12 100644 --- a/cli/config/configuration.toml.in +++ b/cli/config/configuration.toml.in @@ -234,6 +234,10 @@ path = "@NETMONPATH@" # - none # Used when customize network. Only creates a tap device. No veth pair. # +# - tcfilter +# Uses tc filter rules to redirect traffic from the network interface +# provided by plugin to a tap interface connected to the VM. +# internetworking_model="@DEFNETWORKMODEL@" # If enabled, the runtime will create opentracing.io traces and spans. diff --git a/virtcontainers/network.go b/virtcontainers/network.go index a583f28ceb..86f5a3b345 100644 --- a/virtcontainers/network.go +++ b/virtcontainers/network.go @@ -51,6 +51,11 @@ const ( // This will be used for vethtap, macvtap, ipvtap NetXConnectEnlightenedModel + // NetXConnectTCFilterModel redirects traffic from the network interface + // provided by the network plugin to a tap interface. + // This works for ipvlan and macvlan as well. + NetXConnectTCFilterModel + // NetXConnectNoneModel can be used when the VM is in the host network namespace NetXConnectNoneModel @@ -63,21 +68,38 @@ func (n NetInterworkingModel) IsValid() bool { return 0 <= int(n) && int(n) < int(NetXConnectInvalidModel) } +const ( + defaultNetModelStr = "default" + + bridgedNetModelStr = "bridged" + + macvtapNetModelStr = "macvtap" + + enlightenedNetModelStr = "enlightened" + + tcFilterNetModelStr = "tcfilter" + + noneNetModelStr = "none" +) + //SetModel change the model string value func (n *NetInterworkingModel) SetModel(modelName string) error { switch modelName { - case "default": + case defaultNetModelStr: *n = DefaultNetInterworkingModel return nil - case "bridged": + case bridgedNetModelStr: *n = NetXConnectBridgedModel return nil - case "macvtap": + case macvtapNetModelStr: *n = NetXConnectMacVtapModel return nil - case "enlightened": + case enlightenedNetModelStr: *n = NetXConnectEnlightenedModel return nil + case tcFilterNetModelStr: + *n = NetXConnectTCFilterModel + return nil case "none": *n = NetXConnectNoneModel return nil @@ -493,6 +515,11 @@ func xconnectVMNetwork(endpoint Endpoint, connect bool, numCPUs uint32, disableV return tapNetworkPair(endpoint, numCPUs, disableVhostNet) } return untapNetworkPair(endpoint) + case NetXConnectTCFilterModel: + if connect { + return setupTCFiltering(endpoint, numCPUs, disableVhostNet) + } + return removeTCFiltering(endpoint) case NetXConnectEnlightenedModel: return fmt.Errorf("Unsupported networking model") default: @@ -757,6 +784,178 @@ func bridgeNetworkPair(endpoint Endpoint, numCPUs uint32, disableVhostNet bool) return nil } +func setupTCFiltering(endpoint Endpoint, numCPUs uint32, disableVhostNet bool) error { + netHandle, err := netlink.NewHandle() + if err != nil { + return err + } + defer netHandle.Delete() + + netPair := endpoint.NetworkPair() + + tapLink, fds, err := createLink(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{}, int(numCPUs)) + if err != nil { + return fmt.Errorf("Could not create TAP interface: %s", err) + } + netPair.VMFds = fds + + if !disableVhostNet { + vhostFds, err := createVhostFds(int(numCPUs)) + if err != nil { + return fmt.Errorf("Could not setup vhost fds %s : %s", netPair.VirtIface.Name, err) + } + netPair.VhostFds = vhostFds + } + + var attrs *netlink.LinkAttrs + var link netlink.Link + + link, err = getLinkForEndpoint(endpoint, netHandle) + if err != nil { + return err + } + + attrs = link.Attrs() + + // Save the veth MAC address to the TAP so that it can later be used + // to build the hypervisor command line. This MAC address has to be + // the one inside the VM in order to avoid any firewall issues. The + // bridge created by the network plugin on the host actually expects + // to see traffic from this MAC address and not another one. + netPair.TAPIface.HardAddr = attrs.HardwareAddr.String() + + if err := netHandle.LinkSetMTU(tapLink, attrs.MTU); err != nil { + return fmt.Errorf("Could not set TAP MTU %d: %s", attrs.MTU, err) + } + + if err := netHandle.LinkSetUp(tapLink); err != nil { + return fmt.Errorf("Could not enable TAP %s: %s", netPair.TAPIface.Name, err) + } + + tapAttrs := tapLink.Attrs() + + if err := addQdiscIngress(tapAttrs.Index); err != nil { + return err + } + + if err := addQdiscIngress(attrs.Index); err != nil { + return err + } + + if err := addRedirectTCFilter(attrs.Index, tapAttrs.Index); err != nil { + return err + } + + if err := addRedirectTCFilter(tapAttrs.Index, attrs.Index); err != nil { + return err + } + + return nil +} + +// addQdiscIngress creates a new qdisc for nwtwork interface with the specified network index +// on "ingress". qdiscs normally don't work on ingress so this is really a special qdisc +// that you can consider an "alternate root" for inbound packets. +// Handle for ingress qdisc defaults to "ffff:" +// +// This is equivalent to calling `tc qdisc add dev eth0 ingress` +func addQdiscIngress(index int) error { + qdisc := &netlink.Ingress{ + QdiscAttrs: netlink.QdiscAttrs{ + LinkIndex: index, + Parent: netlink.HANDLE_INGRESS, + }, + } + + err := netlink.QdiscAdd(qdisc) + if err != nil { + return fmt.Errorf("Failed to add qdisc for network index %d : %s", index, err) + } + + return nil +} + +// addRedirectTCFilter adds a tc filter for device with index "sourceIndex". +// All traffic for interface with index "sourceIndex" is redirected to interface with +// index "destIndex" +// +// This is equivalent to calling: +// `tc filter add dev source parent ffff: protocol all u32 match u8 0 0 action mirred egress redirect dev dest` +func addRedirectTCFilter(sourceIndex, destIndex int) error { + filter := &netlink.U32{ + FilterAttrs: netlink.FilterAttrs{ + LinkIndex: sourceIndex, + Parent: netlink.MakeHandle(0xffff, 0), + Protocol: unix.ETH_P_ALL, + }, + Actions: []netlink.Action{ + &netlink.MirredAction{ + ActionAttrs: netlink.ActionAttrs{ + Action: netlink.TC_ACT_STOLEN, + }, + MirredAction: netlink.TCA_EGRESS_REDIR, + Ifindex: destIndex, + }, + }, + } + + if err := netlink.FilterAdd(filter); err != nil { + return fmt.Errorf("Failed to add filter for index %d : %s", sourceIndex, err) + } + + return nil +} + +// removeRedirectTCFilter removes all tc u32 filters created on ingress qdisc for "link". +func removeRedirectTCFilter(link netlink.Link) error { + if link == nil { + return nil + } + + // Handle 0xffff is used for ingress + filters, err := netlink.FilterList(link, netlink.MakeHandle(0xffff, 0)) + if err != nil { + return err + } + + for _, f := range filters { + u32, ok := f.(*netlink.U32) + + if !ok { + continue + } + + if err := netlink.FilterDel(u32); err != nil { + return err + } + } + return nil +} + +// removeQdiscIngress removes the ingress qdisc previously created on "link". +func removeQdiscIngress(link netlink.Link) error { + if link == nil { + return nil + } + + qdiscs, err := netlink.QdiscList(link) + if err != nil { + return err + } + + for _, qdisc := range qdiscs { + ingress, ok := qdisc.(*netlink.Ingress) + if !ok { + continue + } + + if err := netlink.QdiscDel(ingress); err != nil { + return err + } + } + return nil +} + func untapNetworkPair(endpoint Endpoint) error { netHandle, err := netlink.NewHandle() if err != nil { @@ -844,6 +1043,48 @@ func unBridgeNetworkPair(endpoint Endpoint) error { return nil } +func removeTCFiltering(endpoint Endpoint) error { + netHandle, err := netlink.NewHandle() + if err != nil { + return err + } + defer netHandle.Delete() + + netPair := endpoint.NetworkPair() + + tapLink, err := getLinkByName(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{}) + if err != nil { + return fmt.Errorf("Could not get TAP interface: %s", err) + } + + if err := netHandle.LinkSetDown(tapLink); err != nil { + return fmt.Errorf("Could not disable TAP %s: %s", netPair.TAPIface.Name, err) + } + + if err := netHandle.LinkDel(tapLink); err != nil { + return fmt.Errorf("Could not remove TAP %s: %s", netPair.TAPIface.Name, err) + } + + link, err := getLinkForEndpoint(endpoint, netHandle) + if err != nil { + return err + } + + if err := removeRedirectTCFilter(link); err != nil { + return err + } + + if err := removeQdiscIngress(link); err != nil { + return err + } + + if err := netHandle.LinkSetDown(link); err != nil { + return fmt.Errorf("Could not disable veth %s: %s", netPair.VirtIface.Name, err) + } + + return nil +} + func createNetNS() (string, error) { n, err := ns.NewNS() if err != nil { diff --git a/virtcontainers/network_test.go b/virtcontainers/network_test.go index 6f640b7861..658df417ac 100644 --- a/virtcontainers/network_test.go +++ b/virtcontainers/network_test.go @@ -192,6 +192,7 @@ func TestNetInterworkingModelIsValid(t *testing.T) { {"Invalid Model", NetXConnectInvalidModel, false}, {"Default Model", NetXConnectDefaultModel, true}, {"Bridged Model", NetXConnectBridgedModel, true}, + {"TC Filter Model", NetXConnectTCFilterModel, true}, {"Macvtap Model", NetXConnectMacVtapModel, true}, {"Enlightened Model", NetXConnectEnlightenedModel, true}, } @@ -212,11 +213,12 @@ func TestNetInterworkingModelSetModel(t *testing.T) { wantErr bool }{ {"Invalid Model", "Invalid", true}, - {"default Model", "default", false}, - {"bridged Model", "bridged", false}, - {"macvtap Model", "macvtap", false}, - {"enlightened Model", "enlightened", false}, - {"none Model", "none", false}, + {"default Model", defaultNetModelStr, false}, + {"bridged Model", bridgedNetModelStr, false}, + {"macvtap Model", macvtapNetModelStr, false}, + {"enlightened Model", enlightenedNetModelStr, false}, + {"tcfilter Model", tcFilterNetModelStr, false}, + {"none Model", noneNetModelStr, false}, } for _, tt := range tests { @@ -338,3 +340,41 @@ func TestCreateMacVtap(t *testing.T) { err = netHandle.LinkDel(brLink) assert.NoError(err) } + +func TestTcRedirectNetwork(t *testing.T) { + if os.Geteuid() != 0 { + t.Skip(testDisabledAsNonRoot) + } + + assert := assert.New(t) + + netHandle, err := netlink.NewHandle() + assert.NoError(err) + defer netHandle.Delete() + + // Create a test veth interface. + vethName := "foo" + veth := &netlink.Veth{LinkAttrs: netlink.LinkAttrs{Name: vethName, TxQLen: 200, MTU: 1400}, PeerName: "bar"} + + err = netlink.LinkAdd(veth) + assert.NoError(err) + + endpoint, err := createVethNetworkEndpoint(1, vethName, NetXConnectTCFilterModel) + assert.NoError(err) + + link, err := netlink.LinkByName(vethName) + assert.NoError(err) + + err = netHandle.LinkSetUp(link) + assert.NoError(err) + + err = setupTCFiltering(endpoint, 1, true) + assert.NoError(err) + + err = removeTCFiltering(endpoint) + assert.NoError(err) + + // Remove the veth created for testing. + err = netHandle.LinkDel(link) + assert.NoError(err) +}