Merge pull request #59678 from Cynerva/gkk/kube-proxy-node-name

Automatic merge from submit-queue. If you want to cherry-pick this change to another branch, please follow the instructions <a href="https://github.com/kubernetes/community/blob/master/contributors/devel/cherry-picks.md">here</a>.

juju: Fix Services unreachable when externalTrafficPolicy=Local on AWS

**What this PR does / why we need it**:

This fixes Services being unreachable when externalTrafficPolicy=Local in Juju based AWS deployments.

The root cause is that kube-proxy is unable to identify local endpoints because it's using the incorrect node name. The fix is to specify node name via --hostname-override.

**Which issue(s) this PR fixes**:

https://github.com/juju-solutions/bundle-canonical-kubernetes/issues/489

**Special notes for your reviewer**:

**Release note**:

```release-note
NONE
```
This commit is contained in:
Kubernetes Submit Queue
2018-02-10 04:43:33 -08:00
committed by GitHub

View File

@@ -24,7 +24,7 @@ import time
from shlex import split from shlex import split
from subprocess import check_call, check_output from subprocess import check_call, check_output
from subprocess import CalledProcessError from subprocess import CalledProcessError
from socket import gethostname from socket import gethostname, getfqdn
from charms import layer from charms import layer
from charms.layer import snap from charms.layer import snap
@@ -614,6 +614,7 @@ def configure_kube_proxy(api_servers, cluster_cidr):
kube_proxy_opts['logtostderr'] = 'true' kube_proxy_opts['logtostderr'] = 'true'
kube_proxy_opts['v'] = '0' kube_proxy_opts['v'] = '0'
kube_proxy_opts['master'] = random.choice(api_servers) kube_proxy_opts['master'] = random.choice(api_servers)
kube_proxy_opts['hostname-override'] = get_node_name()
if b'lxc' in check_output('virt-what', shell=True): if b'lxc' in check_output('virt-what', shell=True):
kube_proxy_opts['conntrack-max-per-core'] = '0' kube_proxy_opts['conntrack-max-per-core'] = '0'
@@ -989,44 +990,13 @@ def _systemctl_is_active(application):
return False return False
class GetNodeNameFailed(Exception):
pass
def get_node_name(): def get_node_name():
# Get all the nodes in the cluster kubelet_extra_args = parse_extra_args('kubelet-extra-args')
cmd = 'kubectl --kubeconfig={} get no -o=json'.format(kubeconfig_path) cloud_provider = kubelet_extra_args.get('cloud-provider', '')
cmd = cmd.split() if cloud_provider == 'aws':
deadline = time.time() + 180 return getfqdn()
while time.time() < deadline: else:
try: return gethostname()
raw = check_output(cmd)
except CalledProcessError:
hookenv.log('Failed to get node name for node %s.'
' Will retry.' % (gethostname()))
time.sleep(1)
continue
result = json.loads(raw.decode('utf-8'))
if 'items' in result:
for node in result['items']:
if 'status' not in node:
continue
if 'addresses' not in node['status']:
continue
# find the hostname
for address in node['status']['addresses']:
if address['type'] == 'Hostname':
if address['address'] == gethostname():
return node['metadata']['name']
# if we didn't match, just bail to the next node
break
time.sleep(1)
msg = 'Failed to get node name for node %s' % gethostname()
raise GetNodeNameFailed(msg)
class ApplyNodeLabelFailed(Exception): class ApplyNodeLabelFailed(Exception):