Merge pull request #827 from amshinde/tc-filtering

network: Use tc filtering rules in bridge mode
This commit is contained in:
Sebastien Boeuf 2018-10-24 15:21:54 -07:00 committed by GitHub
commit c7a9e454ac
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 294 additions and 9 deletions

View File

@ -234,6 +234,10 @@ path = "@NETMONPATH@"
# - none # - none
# Used when customize network. Only creates a tap device. No veth pair. # Used when customize network. Only creates a tap device. No veth pair.
# #
# - tcfilter
# Uses tc filter rules to redirect traffic from the network interface
# provided by plugin to a tap interface connected to the VM.
#
internetworking_model="@DEFNETWORKMODEL@" internetworking_model="@DEFNETWORKMODEL@"
# If enabled, the runtime will create opentracing.io traces and spans. # If enabled, the runtime will create opentracing.io traces and spans.

View File

@ -51,6 +51,11 @@ const (
// This will be used for vethtap, macvtap, ipvtap // This will be used for vethtap, macvtap, ipvtap
NetXConnectEnlightenedModel NetXConnectEnlightenedModel
// NetXConnectTCFilterModel redirects traffic from the network interface
// provided by the network plugin to a tap interface.
// This works for ipvlan and macvlan as well.
NetXConnectTCFilterModel
// NetXConnectNoneModel can be used when the VM is in the host network namespace // NetXConnectNoneModel can be used when the VM is in the host network namespace
NetXConnectNoneModel NetXConnectNoneModel
@ -63,21 +68,38 @@ func (n NetInterworkingModel) IsValid() bool {
return 0 <= int(n) && int(n) < int(NetXConnectInvalidModel) return 0 <= int(n) && int(n) < int(NetXConnectInvalidModel)
} }
const (
defaultNetModelStr = "default"
bridgedNetModelStr = "bridged"
macvtapNetModelStr = "macvtap"
enlightenedNetModelStr = "enlightened"
tcFilterNetModelStr = "tcfilter"
noneNetModelStr = "none"
)
//SetModel change the model string value //SetModel change the model string value
func (n *NetInterworkingModel) SetModel(modelName string) error { func (n *NetInterworkingModel) SetModel(modelName string) error {
switch modelName { switch modelName {
case "default": case defaultNetModelStr:
*n = DefaultNetInterworkingModel *n = DefaultNetInterworkingModel
return nil return nil
case "bridged": case bridgedNetModelStr:
*n = NetXConnectBridgedModel *n = NetXConnectBridgedModel
return nil return nil
case "macvtap": case macvtapNetModelStr:
*n = NetXConnectMacVtapModel *n = NetXConnectMacVtapModel
return nil return nil
case "enlightened": case enlightenedNetModelStr:
*n = NetXConnectEnlightenedModel *n = NetXConnectEnlightenedModel
return nil return nil
case tcFilterNetModelStr:
*n = NetXConnectTCFilterModel
return nil
case "none": case "none":
*n = NetXConnectNoneModel *n = NetXConnectNoneModel
return nil return nil
@ -493,6 +515,11 @@ func xconnectVMNetwork(endpoint Endpoint, connect bool, numCPUs uint32, disableV
return tapNetworkPair(endpoint, numCPUs, disableVhostNet) return tapNetworkPair(endpoint, numCPUs, disableVhostNet)
} }
return untapNetworkPair(endpoint) return untapNetworkPair(endpoint)
case NetXConnectTCFilterModel:
if connect {
return setupTCFiltering(endpoint, numCPUs, disableVhostNet)
}
return removeTCFiltering(endpoint)
case NetXConnectEnlightenedModel: case NetXConnectEnlightenedModel:
return fmt.Errorf("Unsupported networking model") return fmt.Errorf("Unsupported networking model")
default: default:
@ -757,6 +784,178 @@ func bridgeNetworkPair(endpoint Endpoint, numCPUs uint32, disableVhostNet bool)
return nil return nil
} }
func setupTCFiltering(endpoint Endpoint, numCPUs uint32, disableVhostNet bool) error {
netHandle, err := netlink.NewHandle()
if err != nil {
return err
}
defer netHandle.Delete()
netPair := endpoint.NetworkPair()
tapLink, fds, err := createLink(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{}, int(numCPUs))
if err != nil {
return fmt.Errorf("Could not create TAP interface: %s", err)
}
netPair.VMFds = fds
if !disableVhostNet {
vhostFds, err := createVhostFds(int(numCPUs))
if err != nil {
return fmt.Errorf("Could not setup vhost fds %s : %s", netPair.VirtIface.Name, err)
}
netPair.VhostFds = vhostFds
}
var attrs *netlink.LinkAttrs
var link netlink.Link
link, err = getLinkForEndpoint(endpoint, netHandle)
if err != nil {
return err
}
attrs = link.Attrs()
// Save the veth MAC address to the TAP so that it can later be used
// to build the hypervisor command line. This MAC address has to be
// the one inside the VM in order to avoid any firewall issues. The
// bridge created by the network plugin on the host actually expects
// to see traffic from this MAC address and not another one.
netPair.TAPIface.HardAddr = attrs.HardwareAddr.String()
if err := netHandle.LinkSetMTU(tapLink, attrs.MTU); err != nil {
return fmt.Errorf("Could not set TAP MTU %d: %s", attrs.MTU, err)
}
if err := netHandle.LinkSetUp(tapLink); err != nil {
return fmt.Errorf("Could not enable TAP %s: %s", netPair.TAPIface.Name, err)
}
tapAttrs := tapLink.Attrs()
if err := addQdiscIngress(tapAttrs.Index); err != nil {
return err
}
if err := addQdiscIngress(attrs.Index); err != nil {
return err
}
if err := addRedirectTCFilter(attrs.Index, tapAttrs.Index); err != nil {
return err
}
if err := addRedirectTCFilter(tapAttrs.Index, attrs.Index); err != nil {
return err
}
return nil
}
// addQdiscIngress creates a new qdisc for nwtwork interface with the specified network index
// on "ingress". qdiscs normally don't work on ingress so this is really a special qdisc
// that you can consider an "alternate root" for inbound packets.
// Handle for ingress qdisc defaults to "ffff:"
//
// This is equivalent to calling `tc qdisc add dev eth0 ingress`
func addQdiscIngress(index int) error {
qdisc := &netlink.Ingress{
QdiscAttrs: netlink.QdiscAttrs{
LinkIndex: index,
Parent: netlink.HANDLE_INGRESS,
},
}
err := netlink.QdiscAdd(qdisc)
if err != nil {
return fmt.Errorf("Failed to add qdisc for network index %d : %s", index, err)
}
return nil
}
// addRedirectTCFilter adds a tc filter for device with index "sourceIndex".
// All traffic for interface with index "sourceIndex" is redirected to interface with
// index "destIndex"
//
// This is equivalent to calling:
// `tc filter add dev source parent ffff: protocol all u32 match u8 0 0 action mirred egress redirect dev dest`
func addRedirectTCFilter(sourceIndex, destIndex int) error {
filter := &netlink.U32{
FilterAttrs: netlink.FilterAttrs{
LinkIndex: sourceIndex,
Parent: netlink.MakeHandle(0xffff, 0),
Protocol: unix.ETH_P_ALL,
},
Actions: []netlink.Action{
&netlink.MirredAction{
ActionAttrs: netlink.ActionAttrs{
Action: netlink.TC_ACT_STOLEN,
},
MirredAction: netlink.TCA_EGRESS_REDIR,
Ifindex: destIndex,
},
},
}
if err := netlink.FilterAdd(filter); err != nil {
return fmt.Errorf("Failed to add filter for index %d : %s", sourceIndex, err)
}
return nil
}
// removeRedirectTCFilter removes all tc u32 filters created on ingress qdisc for "link".
func removeRedirectTCFilter(link netlink.Link) error {
if link == nil {
return nil
}
// Handle 0xffff is used for ingress
filters, err := netlink.FilterList(link, netlink.MakeHandle(0xffff, 0))
if err != nil {
return err
}
for _, f := range filters {
u32, ok := f.(*netlink.U32)
if !ok {
continue
}
if err := netlink.FilterDel(u32); err != nil {
return err
}
}
return nil
}
// removeQdiscIngress removes the ingress qdisc previously created on "link".
func removeQdiscIngress(link netlink.Link) error {
if link == nil {
return nil
}
qdiscs, err := netlink.QdiscList(link)
if err != nil {
return err
}
for _, qdisc := range qdiscs {
ingress, ok := qdisc.(*netlink.Ingress)
if !ok {
continue
}
if err := netlink.QdiscDel(ingress); err != nil {
return err
}
}
return nil
}
func untapNetworkPair(endpoint Endpoint) error { func untapNetworkPair(endpoint Endpoint) error {
netHandle, err := netlink.NewHandle() netHandle, err := netlink.NewHandle()
if err != nil { if err != nil {
@ -844,6 +1043,48 @@ func unBridgeNetworkPair(endpoint Endpoint) error {
return nil return nil
} }
func removeTCFiltering(endpoint Endpoint) error {
netHandle, err := netlink.NewHandle()
if err != nil {
return err
}
defer netHandle.Delete()
netPair := endpoint.NetworkPair()
tapLink, err := getLinkByName(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{})
if err != nil {
return fmt.Errorf("Could not get TAP interface: %s", err)
}
if err := netHandle.LinkSetDown(tapLink); err != nil {
return fmt.Errorf("Could not disable TAP %s: %s", netPair.TAPIface.Name, err)
}
if err := netHandle.LinkDel(tapLink); err != nil {
return fmt.Errorf("Could not remove TAP %s: %s", netPair.TAPIface.Name, err)
}
link, err := getLinkForEndpoint(endpoint, netHandle)
if err != nil {
return err
}
if err := removeRedirectTCFilter(link); err != nil {
return err
}
if err := removeQdiscIngress(link); err != nil {
return err
}
if err := netHandle.LinkSetDown(link); err != nil {
return fmt.Errorf("Could not disable veth %s: %s", netPair.VirtIface.Name, err)
}
return nil
}
func createNetNS() (string, error) { func createNetNS() (string, error) {
n, err := ns.NewNS() n, err := ns.NewNS()
if err != nil { if err != nil {

View File

@ -192,6 +192,7 @@ func TestNetInterworkingModelIsValid(t *testing.T) {
{"Invalid Model", NetXConnectInvalidModel, false}, {"Invalid Model", NetXConnectInvalidModel, false},
{"Default Model", NetXConnectDefaultModel, true}, {"Default Model", NetXConnectDefaultModel, true},
{"Bridged Model", NetXConnectBridgedModel, true}, {"Bridged Model", NetXConnectBridgedModel, true},
{"TC Filter Model", NetXConnectTCFilterModel, true},
{"Macvtap Model", NetXConnectMacVtapModel, true}, {"Macvtap Model", NetXConnectMacVtapModel, true},
{"Enlightened Model", NetXConnectEnlightenedModel, true}, {"Enlightened Model", NetXConnectEnlightenedModel, true},
} }
@ -212,11 +213,12 @@ func TestNetInterworkingModelSetModel(t *testing.T) {
wantErr bool wantErr bool
}{ }{
{"Invalid Model", "Invalid", true}, {"Invalid Model", "Invalid", true},
{"default Model", "default", false}, {"default Model", defaultNetModelStr, false},
{"bridged Model", "bridged", false}, {"bridged Model", bridgedNetModelStr, false},
{"macvtap Model", "macvtap", false}, {"macvtap Model", macvtapNetModelStr, false},
{"enlightened Model", "enlightened", false}, {"enlightened Model", enlightenedNetModelStr, false},
{"none Model", "none", false}, {"tcfilter Model", tcFilterNetModelStr, false},
{"none Model", noneNetModelStr, false},
} }
for _, tt := range tests { for _, tt := range tests {
@ -338,3 +340,41 @@ func TestCreateMacVtap(t *testing.T) {
err = netHandle.LinkDel(brLink) err = netHandle.LinkDel(brLink)
assert.NoError(err) assert.NoError(err)
} }
func TestTcRedirectNetwork(t *testing.T) {
if os.Geteuid() != 0 {
t.Skip(testDisabledAsNonRoot)
}
assert := assert.New(t)
netHandle, err := netlink.NewHandle()
assert.NoError(err)
defer netHandle.Delete()
// Create a test veth interface.
vethName := "foo"
veth := &netlink.Veth{LinkAttrs: netlink.LinkAttrs{Name: vethName, TxQLen: 200, MTU: 1400}, PeerName: "bar"}
err = netlink.LinkAdd(veth)
assert.NoError(err)
endpoint, err := createVethNetworkEndpoint(1, vethName, NetXConnectTCFilterModel)
assert.NoError(err)
link, err := netlink.LinkByName(vethName)
assert.NoError(err)
err = netHandle.LinkSetUp(link)
assert.NoError(err)
err = setupTCFiltering(endpoint, 1, true)
assert.NoError(err)
err = removeTCFiltering(endpoint)
assert.NoError(err)
// Remove the veth created for testing.
err = netHandle.LinkDel(link)
assert.NoError(err)
}