This commit is contained in:
Maisem Ali 2016-02-08 15:38:56 -08:00
commit 1168df93f6
41 changed files with 3423 additions and 2586 deletions

View File

@ -46,6 +46,8 @@ for ((i=0; i < NUM_NODES; i++)) do
VAGRANT_NODE_NAMES[$i]="node-$((i+1))"
done
CLUSTER_IP_RANGE="${CLUSTER_IP_RANGE:-10.246.0.0/16}"
SERVICE_CLUSTER_IP_RANGE=10.247.0.0/16 # formerly PORTAL_NET
# Since this isn't exposed on the network, default to a simple user/passwd

View File

@ -71,6 +71,7 @@ func (s *ProxyServerConfig) AddFlags(fs *pflag.FlagSet) {
fs.Var(componentconfig.PortRangeVar{&s.PortRange}, "proxy-port-range", "Range of host ports (beginPort-endPort, inclusive) that may be consumed in order to proxy service traffic. If unspecified (0-0) then ports will be randomly chosen.")
fs.StringVar(&s.HostnameOverride, "hostname-override", s.HostnameOverride, "If non-empty, will use this string as identification instead of the actual hostname.")
fs.Var(&s.Mode, "proxy-mode", "Which proxy mode to use: 'userspace' (older) or 'iptables' (faster). If blank, look at the Node object on the Kubernetes API and respect the '"+ExperimentalProxyModeAnnotation+"' annotation if provided. Otherwise use the best-available proxy (currently iptables). If the iptables proxy is selected, regardless of how, but the system's kernel or iptables versions are insufficient, this always falls back to the userspace proxy.")
fs.IntVar(s.IPTablesMasqueradeBit, "iptables-masquerade-bit", util.IntPtrDerefOr(s.IPTablesMasqueradeBit, 14), "If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].")
fs.DurationVar(&s.IPTablesSyncPeriod.Duration, "iptables-sync-period", s.IPTablesSyncPeriod.Duration, "How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.")
fs.DurationVar(&s.ConfigSyncPeriod, "config-sync-period", s.ConfigSyncPeriod, "How often configuration from the apiserver is refreshed. Must be greater than 0.")
fs.BoolVar(&s.MasqueradeAll, "masquerade-all", s.MasqueradeAll, "If using the pure iptables proxy, SNAT everything")

View File

@ -191,18 +191,23 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
proxyMode := getProxyMode(string(config.Mode), client.Nodes(), hostname, iptInterface, iptables.LinuxKernelCompatTester{})
if proxyMode == proxyModeIptables {
glog.V(2).Info("Using iptables Proxier.")
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll)
glog.V(0).Info("Using iptables Proxier.")
if config.IPTablesMasqueradeBit == nil {
// IPTablesMasqueradeBit must be specified or defaulted.
return nil, fmt.Errorf("Unable to read IPTablesMasqueradeBit from config")
}
proxierIptables, err := iptables.NewProxier(iptInterface, execer, config.IPTablesSyncPeriod.Duration, config.MasqueradeAll, *config.IPTablesMasqueradeBit)
if err != nil {
glog.Fatalf("Unable to create proxier: %v", err)
}
proxier = proxierIptables
endpointsHandler = proxierIptables
// No turning back. Remove artifacts that might still exist from the userspace Proxier.
glog.V(2).Info("Tearing down userspace rules. Errors here are acceptable.")
glog.V(0).Info("Tearing down userspace rules.")
userspace.CleanupLeftovers(iptInterface)
} else {
glog.V(2).Info("Using userspace Proxier.")
glog.V(0).Info("Using userspace Proxier.")
// This is a proxy.LoadBalancer which NewProxier needs but has methods we don't need for
// our config.EndpointsConfigHandler.
loadBalancer := userspace.NewLoadBalancerRR()
@ -222,7 +227,7 @@ func NewProxyServerDefault(config *options.ProxyServerConfig) (*ProxyServer, err
}
proxier = proxierUserspace
// Remove artifacts from the pure-iptables Proxier.
glog.V(2).Info("Tearing down pure-iptables proxy rules. Errors here are acceptable.")
glog.V(0).Info("Tearing down pure-iptables proxy rules.")
iptables.CleanupLeftovers(iptInterface)
}
iptInterface.AddReloadFunc(proxier.Sync)

View File

@ -63,6 +63,7 @@ kube-proxy
--healthz-bind-address=127.0.0.1: The IP address for the health check server to serve on, defaulting to 127.0.0.1 (set to 0.0.0.0 for all interfaces)
--healthz-port=10249: The port to bind the health check server. Use 0 to disable.
--hostname-override="": If non-empty, will use this string as identification instead of the actual hostname.
--iptables-masquerade-bit=14: If using the pure iptables proxy, the bit of the fwmark space to mark packets requiring SNAT with. Must be within the range [0, 31].
--iptables-sync-period=30s: How often iptables rules are refreshed (e.g. '5s', '1m', '2h22m'). Must be greater than 0.
--kube-api-burst=10: Burst to use while talking with kubernetes apiserver
--kube-api-qps=5: QPS to use while talking with kubernetes apiserver
@ -76,7 +77,7 @@ kube-proxy
--udp-timeout=250ms: How long an idle UDP connection will be kept open (e.g. '250ms', '2s'). Must be greater than 0. Only applicable for proxy-mode=userspace
```
###### Auto generated by spf13/cobra on 1-Feb-2016
###### Auto generated by spf13/cobra on 7-Feb-2016
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -48,6 +48,8 @@ Guide](../admin/README.md).
* **Pull Request Process** ([pull-requests.md](pull-requests.md)): When and why pull requests are closed.
* **Kubernetes On-Call Rotations** ([on-call-rotations.md](on-call-rotations.md)): Descriptions of on-call rotations for build and end-user support
* **Faster PR reviews** ([faster_reviews.md](faster_reviews.md)): How to get faster PR reviews.
* **Getting Recent Builds** ([getting-builds.md](getting-builds.md)): How to get recent builds including the latest builds that pass CI.
@ -73,6 +75,9 @@ Guide](../admin/README.md).
* **Coding Conventions** ([coding-conventions.md](coding-conventions.md)):
Coding style advice for contributors.
* **Document Conventions** ([how-to-doc.md](how-to-doc.md))
Document style advice for contributors.
* **Running a cluster locally** ([running-locally.md](running-locally.md)):
A fast and lightweight local cluster deployment for developement.

View File

@ -0,0 +1,105 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
Kubernetes "Github and Build-cop" Rotation
==========================================
Preqrequisites
--------------
* Ensure you have [write access to http://github.com/kubernetes/kubernetes](https://github.com/orgs/kubernetes/teams/kubernetes-maintainers)
* Test your admin access by e.g. adding a label to an issue.
Traffic sources and responsibilities
------------------------------------
* GitHub [https://github.com/kubernetes/kubernetes/issues](https://github.com/kubernetes/kubernetes/issues) and [https://github.com/kubernetes/kubernetes/pulls](https://github.com/kubernetes/kubernetes/pulls): Your job is to be the first responder to all new issues and PRs. If you are not equipped to do this (which is fine!), it is your job to seek guidance!
* Support issues should be closed and redirected to Stackoverflow (see example response below).
* All incoming issues should be tagged with a team label (team/{api,ux,control-plane,node,cluster,csi,redhat,mesosphere,gke,release-infra,test-infra,none}); for issues that overlap teams, you can use multiple team labels
* There is a related concept of "Github teams" which allow you to @ mention a set of people; feel free to @ mention a Github team if you wish, but this is not a substitute for adding a team/* label, which is required
* [Google teams](https://github.com/orgs/kubernetes/teams?utf8=%E2%9C%93&query=goog-)
* [Redhat teams](https://github.com/orgs/kubernetes/teams?utf8=%E2%9C%93&query=rh-)
* [SIGs](https://github.com/orgs/kubernetes/teams?utf8=%E2%9C%93&query=sig-)
* If the issue is reporting broken builds, broken e2e tests, or other obvious P0 issues, label the issue with priority/P0 and assign it to someone. This is the only situation in which you should add a priority/* label
* non-P0 issues do not need a reviewer assigned initially
* Assign any issues related to Vagrant to @derekwaynecarr (and @mention him in the issue)
* All incoming PRs should be assigned a reviewer.
* unless it is a WIP (Work in Progress), RFC (Request for Comments), or design proposal.
* An auto-assigner [should do this for you] (https://github.com/kubernetes/kubernetes/pull/12365/files)
* When in doubt, choose a TL or team maintainer of the most relevant team; they can delegate
* Keep in mind that you can @ mention people in an issue/PR to bring it to their attention without assigning it to them. You can also @ mention github teams, such as @kubernetes/goog-ux or @kubernetes/kubectl
* If you need help triaging an issue or PR, consult with (or assign it to) @brendandburns, @thockin, @bgrant0607, @quinton-hoole, @davidopp, @dchen1107, @lavalamp (all U.S. Pacific Time) or @fgrzadkowski (Central European Time).
* At the beginning of your shift, please add team/* labels to any issues that have fallen through the cracks and don't have one. Likewise, be fair to the next person in rotation: try to ensure that every issue that gets filed while you are on duty is handled. The Github query to find issues with no team/* label is: [here](https://github.com/kubernetes/kubernetes/issues?utf8=%E2%9C%93&q=is%3Aopen+is%3Aissue+-label%3Ateam%2Fcontrol-plane+-label%3Ateam%2Fmesosphere+-label%3Ateam%2Fredhat+-label%3Ateam%2Frelease-infra+-label%3Ateam%2Fnone+-label%3Ateam%2Fnode+-label%3Ateam%2Fcluster+-label%3Ateam%2Fux+-label%3Ateam%2Fcsi+-label%3Ateam%2Fapi+-label%3Ateam%2Ftest-infra+).
Example response for support issues:
Please re-post your question to [stackoverflow](http://stackoverflow.com/questions/tagged/kubernetes).
We are trying to consolidate the channels to which questions for help/support are posted so that we can improve our efficiency in responding to your requests, and to make it easier for you to find answers to frequently asked questions and how to address common use cases.
We regularly see messages posted in multiple forums, with the full response thread only in one place or, worse, spread across multiple forums. Also, the large volume of support issues on github is making it difficult for us to use issues to identify real bugs.
The Kubernetes team scans stackoverflow on a regular basis, and will try to ensure your questions don't go unanswered.
Before posting a new question, please search stackoverflow for answers to similar questions, and also familiarize yourself with:
* [the user guide](http://kubernetes.io/v1.0/)
* [the troubleshooting guide](http://kubernetes.io/v1.0/docs/troubleshooting.html)
Again, thanks for using Kubernetes.
The Kubernetes Team
Build-copping
-------------
* The [merge-bot submit queue](http://submit-queue.k8s.io/) ([source](https://github.com/kubernetes/contrib/tree/master/submit-queue)) should auto-merge all eligible PRs for you once they've passed all the relevant checks mentioned below and all [critical e2e tests] (https://goto.google.com/k8s-test/view/Critical%20Builds/) are passing. If the merge-bot been disabled for some reason, or tests are failing, you might need to do some manual merging to get things back on track.
* Once a day or so, look at the [flaky test builds](https://goto.google.com/k8s-test/view/Flaky/); if they are timing out, clusters are failing to start, or tests are consistently failing (instead of just flaking), file an issue to get things back on track.
* Jobs that are not in [critical e2e tests] (https://goto.google.com/k8s-test/view/Critical%20Builds/) or [flaky test builds](https://goto.google.com/k8s-test/view/Flaky/) are not your responsibility to monitor. The `Test owner:` in the job description will be automatically emailed if the job is failing.
* If you are a weekday oncall, ensure that PRs confirming to the following pre-requisites are being merged at a reasonable rate:
* [Have been LGTMd](https://github.com/kubernetes/kubernetes/labels/lgtm)
* Pass Travis and Shippable.
* Author has signed CLA if applicable.
* If you are a weekend oncall, [never merge PRs manually](collab.md), instead add the label "lgtm" to the PRs once they have been LGTMd and passed Travis and Shippable; this will cause merge-bot to merge them automatically (or make them easy to find by the next oncall, who will merge them).
* When the build is broken, roll back the PRs responsible ASAP
* When E2E tests are unstable, a "merge freeze" may be instituted. During a merge freeze:
* Oncall should slowly merge LGTMd changes throughout the day while monitoring E2E to ensure stability.
* Ideally the E2E run should be green, but some tests are flaky and can fail randomly (not as a result of a particular change).
* If a large number of tests fail, or tests that normally pass fail, that is an indication that one or more of the PR(s) in that build might be problematic (and should be reverted).
* Use the Test Results Analyzer to see individual test history over time.
* Flake mitigation
* Tests that flake (fail a small percentage of the time) need an issue filed against them. Please read [this](https://github.com/kubernetes/kubernetes/blob/doc-flaky-test/docs/devel/flaky-tests.md#filing-issues-for-flaky-tests); the build cop is expected to file issues for any flaky tests they encounter.
* It's reasonable to manually merge PRs that fix a flake or otherwise mitigate it.
Contact information
-------------------
[@k8s-oncall](https://github.com/k8s-oncall) will reach the current person on call.
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/devel/on-call-build-cop.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,52 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
Kubernetes On-Call Rotations
====================
Kubernetes "first responder" rotations
--------------------------------------
Kubernetes has generated a lot of public traffic: email, pull-requests, bugs, etc. So much traffic that it's becoming impossible to keep up with it all! This is a fantastic problem to have. In order to be sure that SOMEONE, but not EVERYONE on the team is paying attention to public traffic, we have instituted two "first responder" rotations, listed below. Please read this page before proceeding to the pages linked below, which are specific to each rotation.
Please also read our [notes on OSS collaboration](collab.md), particularly the bits about hours. Specifically, each rotation is expected to be active primarily during work hours, less so off hours.
During regular workday work hours of your shift, your primary responsibility is to monitor the traffic sources specific to your rotation. You can check traffic in the evenings if you feel so inclined, but it is not expected to be as highly focused as work hours. For weekends, you should check traffic very occasionally (e.g. once or twice a day). Again, it is not expected to be as highly focused as workdays. It is assumed that over time, everyone will get weekday and weekend shifts, so the workload will balance out.
If you can not serve your shift, and you know this ahead of time, it is your responsibility to find someone to cover and to change the rotation. If you have an emergency, your responsibilities fall on the primary of the other rotation, who acts as your secondary. If you need help to cover all of the tasks, partners with oncall rotations (e.g., [Redhat](https://github.com/orgs/kubernetes/teams/rh-oncall)).
If you are not on duty you DO NOT need to do these things. You are free to focus on "real work".
Note that Kubernetes will occasionally enter code slush/freeze, prior to milestones. When it does, there might be changes in the instructions (assigning milestones, for instance).
* [Github and Build Cop Rotation](on-call-build-cop.md)
* [User Support Rotation](on-call-user-support.md)
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/devel/on-call-rotations.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -0,0 +1,83 @@
<!-- BEGIN MUNGE: UNVERSIONED_WARNING -->
<!-- BEGIN STRIP_FOR_RELEASE -->
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<img src="http://kubernetes.io/img/warning.png" alt="WARNING"
width="25" height="25">
<h2>PLEASE NOTE: This document applies to the HEAD of the source tree</h2>
If you are using a released version of Kubernetes, you should
refer to the docs that go with that version.
Documentation for other releases can be found at
[releases.k8s.io](http://releases.k8s.io).
</strong>
--
<!-- END STRIP_FOR_RELEASE -->
<!-- END MUNGE: UNVERSIONED_WARNING -->
Kubernetes "User Support" Rotation
==================================
Traffic sources and responsibilities
------------------------------------
* [StackOverflow](http://stackoverflow.com/questions/tagged/kubernetes) and [ServerFault](http://serverfault.com/questions/tagged/google-kubernetes): Respond to any thread that has no responses and is more than 6 hours old (over time we will lengthen this timeout to allow community responses). If you are not equipped to respond, it is your job to redirect to someone who can.
* [Query for unanswered Kubernetes StackOverflow questions](http://stackoverflow.com/search?q=%5Bkubernetes%5D+answers%3A0)
* [Query for unanswered Kubernetes ServerFault questions](http://serverfault.com/questions/tagged/google-kubernetes?sort=unanswered&pageSize=15)
* Direct poorly formulated questions to [stackoverflow's tips about how to ask](http://stackoverflow.com/help/how-to-ask)
* Direct off-topic questions to [stackoverflow's policy](http://stackoverflow.com/help/on-topic)
* [Slack](https://kubernetes.slack.com) ([registration](http://slack.k8s.io)): Your job is to be on Slack, watching for questions and answering or redirecting as needed. Also check out the [Slack Archive](http://kubernetes.slackarchive.io/).
* [Email/Groups](https://groups.google.com/forum/#!forum/google-containers): Respond to any thread that has no responses and is more than 6 hours old (over time we will lengthen this timeout to allow community responses). If you are not equipped to respond, it is your job to redirect to someone who can.
* [Legacy] [IRC](irc://irc.freenode.net/#google-containers) (irc.freenode.net #google-containers): watch IRC for questions and try to redirect users to Slack. Also check out the [IRC logs](https://botbot.me/freenode/google-containers/).
In general, try to direct support questions to:
1. Documentation, such as the [user guide](../user-guide/README.md) and [troubleshooting guide](../troubleshooting.md)
2. Stackoverflow
If you see questions on a forum other than Stackoverflow, try to redirect them to Stackoverflow. Example response:
Please re-post your question to [stackoverflow](http://stackoverflow.com/questions/tagged/kubernetes).
We are trying to consolidate the channels to which questions for help/support are posted so that we can improve our efficiency in responding to your requests, and to make it easier for you to find answers to frequently asked questions and how to address common use cases.
We regularly see messages posted in multiple forums, with the full response thread only in one place or, worse, spread across multiple forums. Also, the large volume of support issues on github is making it difficult for us to use issues to identify real bugs.
The Kubernetes team scans stackoverflow on a regular basis, and will try to ensure your questions don't go unanswered.
Before posting a new question, please search stackoverflow for answers to similar questions, and also familiarize yourself with:
* [the user guide](http://kubernetes.io/v1.1/)
* [the troubleshooting guide](http://kubernetes.io/v1.1/docs/troubleshooting.html)
Again, thanks for using Kubernetes.
The Kubernetes Team
If you answer a question (in any of the above forums) that you think might be useful for someone else in the future, *please add it to one of the FAQs in the wiki*:
* [User FAQ](https://github.com/kubernetes/kubernetes/wiki/User-FAQ)
* [Developer FAQ](https://github.com/kubernetes/kubernetes/wiki/Developer-FAQ)
* [Debugging FAQ](https://github.com/kubernetes/kubernetes/wiki/Debugging-FAQ).
Getting it into the FAQ is more important than polish. Please indicate the date it was added, so people can judge the likelihood that it is out-of-date (and please correct any FAQ entries that you see contain out-of-date information).
Contact information
-------------------
[@k8s-support-oncall](https://github.com/k8s-support-oncall) will reach the current person on call.
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/devel/on-call-user-support.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

BIN
docs/devel/pr_workflow.dia Normal file

Binary file not shown.

BIN
docs/devel/pr_workflow.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 79 KiB

View File

@ -34,34 +34,54 @@ Documentation for other releases can be found at
Pull Request Process
====================
An overview of how we will manage old or out-of-date pull requests.
Process
-------
We will close any pull requests older than two weeks.
Exceptions can be made for PRs that have active review comments, or that are awaiting other dependent PRs. Closed pull requests are easy to recreate, and little work is lost by closing a pull request that subsequently needs to be reopened.
We want to limit the total number of PRs in flight to:
* Maintain a clean project
* Remove old PRs that would be difficult to rebase as the underlying code has changed over time
* Encourage code velocity
An overview of how pull requests are managed for kubernetes. This document
assumes the reader has already followed the [development guide](development.md)
to set up their environment.
Life of a Pull Request
----------------------
Unless in the last few weeks of a milestone when we need to reduce churn and stabilize, we aim to be always accepting pull requests.
Either the [on call](https://github.com/kubernetes/kubernetes/wiki/Kubernetes-on-call-rotations) manually or the [github "munger"](https://github.com/kubernetes/contrib/tree/master/mungegithub) submit-queue plugin automatically will manage merging PRs.
Either the [on call](on-call-rotations.md) manually or the [github "munger"](https://github.com/kubernetes/contrib/tree/master/mungegithub) submit-queue plugin automatically will manage merging PRs.
There are several requirements for the submit-queue to work:
* Author must have signed CLA ("cla: yes" label added to PR)
* No changes can be made since last lgtm label was applied
* k8s-bot must have reported the GCE E2E build and test steps passed (Travis, Shippable and Jenkins build)
* k8s-bot must have reported the GCE E2E build and test steps passed (Travis, Jenkins unit/integration, Jenkins e2e)
Additionally, for infrequent or new contributors, we require the on call to apply the "ok-to-merge" label manually. This is gated by the [whitelist](https://github.com/kubernetes/contrib/blob/master/mungegithub/whitelist.txt).
### Before sending a pull request
The following will save time for both you and your reviewer:
* Enable [pre-commit hooks](development.md#committing-changes-to-your-fork) and verify they pass.
* Verify `hack/verify-generated-docs.sh` passes.
* Verify `hack/test-go.sh` passes.
### Visual overview
![PR workflow](pr_workflow.png)
Other notes
-----------
Pull requests that are purely support questions will be closed and
redirected to [stackoverflow](http://stackoverflow.com/questions/tagged/kubernetes).
We do this to consolidate help/support questions into a single channel,
improve efficiency in responding to requests and make FAQs easier
to find.
Pull requests older than 2 weeks will be closed. Exceptions can be made
for PRs that have active review comments, or that are awaiting other dependent PRs.
Closed pull requests are easy to recreate, and little work is lost by closing a pull
request that subsequently needs to be reopened. We want to limit the total number of PRs in flight to:
* Maintain a clean project
* Remove old PRs that would be difficult to rebase as the underlying code has changed over time
* Encourage code velocity
Automation
----------

View File

@ -173,14 +173,14 @@ Bare-metal | custom | Fedora | _none_ | [docs](fedora/fedor
Bare-metal | custom | Fedora | flannel | [docs](fedora/flannel_multi_node_cluster.md) | | Community ([@aveshagarwal](https://github.com/aveshagarwal))
libvirt | custom | Fedora | flannel | [docs](fedora/flannel_multi_node_cluster.md) | | Community ([@aveshagarwal](https://github.com/aveshagarwal))
KVM | custom | Fedora | flannel | [docs](fedora/flannel_multi_node_cluster.md) | | Community ([@aveshagarwal](https://github.com/aveshagarwal))
Mesos/Docker | custom | Ubuntu | Docker | [docs](mesos-docker.md) | [✓][4] | Community ([Kubernetes-Mesos Authors](https://github.com/mesosphere/kubernetes-mesos/blob/master/AUTHORS.md))
Mesos/Docker | custom | Ubuntu | Docker | [docs](mesos-docker.md) | [✓][4] | Community ([Kubernetes-Mesos Authors](https://github.com/mesosphere/kubernetes-mesos/blob/master/AUTHORS.md))
Mesos/GCE | | | | [docs](mesos.md) | | Community ([Kubernetes-Mesos Authors](https://github.com/mesosphere/kubernetes-mesos/blob/master/AUTHORS.md))
DCOS | Marathon | CoreOS/Alpine | custom | [docs](dcos.md) | | Community ([Kubernetes-Mesos Authors](https://github.com/mesosphere/kubernetes-mesos/blob/master/AUTHORS.md))
AWS | CoreOS | CoreOS | flannel | [docs](coreos.md) | | Community
GCE | CoreOS | CoreOS | flannel | [docs](coreos.md) | | Community ([@pires](https://github.com/pires))
Vagrant | CoreOS | CoreOS | flannel | [docs](coreos.md) | | Community ([@pires](https://github.com/pires), [@AntonioMeireles](https://github.com/AntonioMeireles))
Bare-metal (Offline) | CoreOS | CoreOS | flannel | [docs](coreos/bare_metal_offline.md) | | Community ([@jeffbean](https://github.com/jeffbean))
Bare-metal | CoreOS | CoreOS | Calico | [docs](coreos/bare_metal_calico.md) | | Community ([@caseydavenport](https://github.com/caseydavenport))
Bare-metal | CoreOS | CoreOS | Calico | [docs](coreos/bare_metal_calico.md) | [✓][5] | Community ([@caseydavenport](https://github.com/caseydavenport))
CloudStack | Ansible | CoreOS | flannel | [docs](cloudstack.md) | | Community ([@runseb](https://github.com/runseb))
Vmware | | Debian | OVS | [docs](vsphere.md) | | Community ([@pietern](https://github.com/pietern))
Bare-metal | custom | CentOS | _none_ | [docs](centos/centos_manual_config.md) | | Community ([@coolsvap](https://github.com/coolsvap))
@ -226,6 +226,8 @@ Definition of columns:
[3]: https://gist.github.com/erictune/2f39b22f72565365e59b
<!-- Mesos/Docker conformance test result -->
[4]: https://gist.github.com/sttts/d27f3b879223895494d4
<!-- Calico/CoreOS conformance test result -->
[5]: https://gist.github.com/caseydavenport/98ca87e709b21f03d195
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->

View File

@ -34,91 +34,202 @@ Documentation for other releases can be found at
Bare Metal Kubernetes on CoreOS with Calico Networking
------------------------------------------
This document describes how to deploy Kubernetes with Calico networking on _bare metal_ CoreOS. For more information on Project Calico, visit [projectcalico.org](http://projectcalico.org) and the [calico-docker repository](https://github.com/projectcalico/calico-docker).
This document describes how to deploy Kubernetes with Calico networking on _bare metal_ CoreOS. For more information on Project Calico, visit [projectcalico.org](http://projectcalico.org) and the [calico-containers repository](https://github.com/projectcalico/calico-containers).
To install Calico on an existing Kubernetes cluster, or for more information on deploying Calico with Kubernetes in a number of other environments take a look at our supported [deployment guides](https://github.com/projectcalico/calico-docker/tree/master/docs/kubernetes).
To install Calico on an existing Kubernetes cluster, or for more information on deploying Calico with Kubernetes in a number of other environments take a look at our supported [deployment guides](https://github.com/projectcalico/calico-containers/tree/master/docs/cni/kubernetes).
Specifically, this guide will have you do the following:
- Deploy a Kubernetes master node on CoreOS using cloud-config
- Deploy two Kubernetes compute nodes with Calico Networking using cloud-config
- Deploy a Kubernetes master node on CoreOS using cloud-config.
- Deploy two Kubernetes compute nodes with Calico Networking using cloud-config.
- Configure `kubectl` to access your cluster.
## Prerequisites
The resulting cluster will use SSL between Kubernetes components. It will run the SkyDNS service and kube-ui, and be fully conformant with the Kubernetes v1.1 conformance tests.
1. At least three bare-metal machines (or VMs) to work with. This guide will configure them as follows:
- 1 Kubernetes Master
- 2 Kubernetes Nodes
2. Your nodes should have IP connectivity.
## Prerequisites and Assumptions
- At least three bare-metal machines (or VMs) to work with. This guide will configure them as follows:
- 1 Kubernetes Master
- 2 Kubernetes Nodes
- Your nodes should have IP connectivity to each other and the internet.
- This guide assumes a DHCP server on your network to assign server IPs.
- This guide uses `192.168.0.0/16` as the subnet from which pod IP addresses are assigned. If this overlaps with your host subnet, you will need to configure Calico to use a different [IP pool](https://github.com/projectcalico/calico-containers/blob/master/docs/calicoctl/pool.md#calicoctl-pool-commands).
## Cloud-config
This guide will use [cloud-config](https://coreos.com/docs/cluster-management/setup/cloudinit-cloud-config/) to configure each of the nodes in our Kubernetes cluster.
We'll use two cloud-config files:
- `master-config.yaml`: Cloud-config for the Kubernetes master
- `node-config.yaml`: Cloud-config for each Kubernetes node
- `master-config.yaml`: cloud-config for the Kubernetes master
- `node-config.yaml`: cloud-config for each Kubernetes node
## Download CoreOS
Let's download the CoreOS bootable ISO. We'll use this image to boot and install CoreOS on each server.
```
wget http://stable.release.core-os.net/amd64-usr/current/coreos_production_iso_image.iso
```
> You can also download the ISO from the [CoreOS website](https://coreos.com/docs/running-coreos/platforms/iso/).
Download the stable CoreOS bootable ISO from the [CoreOS website](https://coreos.com/docs/running-coreos/platforms/iso/).
## Configure the Kubernetes Master
Once you've downloaded the image, use it to boot your Kubernetes master. Once booted, you should be automatically logged in as the `core` user.
1. Once you've downloaded the ISO image, burn the ISO to a CD/DVD/USB key and boot from it (if using a virtual machine you can boot directly from the ISO). Once booted, you should be automatically logged in as the `core` user at the terminal. At this point CoreOS is running from the ISO and it hasn't been installed yet.
*On another machine*, download the `calico-kubernetes` repository, which contains the necessary cloud-config files for this guide, and make a copy of the file `master-config-template.yaml`.
2. *On another machine*, download the the [master cloud-config template](https://raw.githubusercontent.com/projectcalico/calico-cni/k8s-1.1-docs/samples/kubernetes/cloud-config/master-config-template.yaml) and save it as `master-config.yaml`.
3. Replace the following variables in the `master-config.yaml` file.
- `<SSH_PUBLIC_KEY>`: The public key you will use for SSH access to this server. See [generating ssh keys](https://help.github.com/articles/generating-ssh-keys/)
4. Copy the edited `master-config.yaml` to your Kubernetes master machine (using a USB stick, for example).
5. The CoreOS bootable ISO comes with a tool called `coreos-install` which will allow us to install CoreOS and configure the machine using a cloud-config file. The following command will download and install stable CoreOS using the `master-config.yaml` file we just created for configuration. Run this on the Kubernetes master.
> **Warning:** this is a destructive operation that erases disk `sda` on your server.
```
sudo coreos-install -d /dev/sda -C stable -c master-config.yaml
```
6. Once complete, restart the server and boot from `/dev/sda` (you may need to remove the ISO image). When it comes back up, you should have SSH access as the `core` user using the public key provided in the `master-config.yaml` file.
### Configure TLS
The master requires the CA certificate, `ca.pem`; its own certificate, `apiserver.pem` and its private key, `apiserver-key.pem`. This [CoreOS guide](https://coreos.com/kubernetes/docs/latest/openssl.html) explains how to generate these.
1. Generate the necessary certificates for the master. This [guide for generating Kubernetes TLS Assets](https://coreos.com/kubernetes/docs/latest/openssl.html) explains how to use OpenSSL to generate the required assets.
2. Send the three files to your master host (using `scp` for example).
3. Move them to the `/etc/kubernetes/ssl` folder and ensure that only the root user can read the key:
```
# Move keys
sudo mkdir -p /etc/kubernetes/ssl/
sudo mv -t /etc/kubernetes/ssl/ ca.pem apiserver.pem apiserver-key.pem
# Set Permissions
sudo chmod 600 /etc/kubernetes/ssl/apiserver-key.pem
sudo chown root:root /etc/kubernetes/ssl/apiserver-key.pem
```
4. Restart the kubelet to pick up the changes:
```
sudo systemctl restart kubelet
```
## Configure the compute nodes
The following steps will set up a single Kubernetes node for use as a compute host. Run these steps to deploy each Kubernetes node in your cluster.
1. Boot up the node machine using the bootable ISO we downloaded earlier. You should be automatically logged in as the `core` user.
2. Make a copy of the [node cloud-config template](https://raw.githubusercontent.com/projectcalico/calico-cni/k8s-1.1-docs/samples/kubernetes/cloud-config/node-config-template.yaml) for this machine.
3. Replace the following placeholders in the `node-config.yaml` file to match your deployment.
- `<HOSTNAME>`: Hostname for this node (e.g. kube-node1, kube-node2)
- `<SSH_PUBLIC_KEY>`: The public key you will use for SSH access to this server.
- `<KUBERNETES_MASTER>`: The IPv4 address of the Kubernetes master.
4. Replace the following placeholders with the contents of their respective files.
- `<CA_CERT>`: Complete contents of `ca.pem`
- `<CA_KEY_CERT>`: Complete contents of `ca-key.pem`
> **Important:** in a production deployment, embedding the secret key in cloud-config is a bad idea! In production you should use an appropriate secret manager.
> **Important:** Make sure you indent the entire file to match the indentation of the placeholder. For example:
>
> ```
> - path: /etc/kubernetes/ssl/ca.pem
> owner: core
> permissions: 0644
> content: |
> <CA_CERT>
> ```
>
> should look like this once the certificate is in place:
>
> ```
> - path: /etc/kubernetes/ssl/ca.pem
> owner: core
> permissions: 0644
> content: |
> -----BEGIN CERTIFICATE-----
> MIIC9zCCAd+gAwIBAgIJAJMnVnhVhy5pMA0GCSqGSIb3DQEBCwUAMBIxEDAOBgNV
> ...<snip>...
> QHwi1rNc8eBLNrd4BM/A1ZeDVh/Q9KxN+ZG/hHIXhmWKgN5wQx6/81FIFg==
> -----END CERTIFICATE-----
> ```
5. Move the modified `node-config.yaml` to your Kubernetes node machine and install and configure CoreOS on the node using the following command.
> **Warning:** this is a destructive operation that erases disk `sda` on your server.
```
sudo coreos-install -d /dev/sda -C stable -c node-config.yaml
```
6. Once complete, restart the server and boot into `/dev/sda`. When it comes back up, you should have SSH access as the `core` user using the public key provided in the `node-config.yaml` file. It will take some time for the node to be fully configured.
## Configure Kubeconfig
To administer your cluster from a separate host, you will need the client and admin certificates generated earlier (`ca.pem`, `admin.pem`, `admin-key.pem`). With certificates in place, run the following commands with the appropriate filepaths.
```
wget https://github.com/projectcalico/calico-kubernetes/archive/master.tar.gz
tar -xvf master.tar.gz
cp calico-kubernetes-master/config/cloud-config/master-config-template.yaml master-config.yaml
kubectl config set-cluster calico-cluster --server=https://<KUBERNETES_MASTER> --certificate-authority=<CA_CERT_PATH>
kubectl config set-credentials calico-admin --certificate-authority=<CA_CERT_PATH> --client-key=<ADMIN_KEY_PATH> --client-certificate=<ADMIN_CERT_PATH>
kubectl config set-context calico --cluster=calico-cluster --user=calico-admin
kubectl config use-context calico
```
You'll need to replace the following variables in the `master-config.yaml` file.
- `<SSH_PUBLIC_KEY>`: The public key you will use for SSH access to this server.
Check your work with `kubectl get nodes`.
Move the edited `master-config.yaml` to your Kubernetes master machine. The CoreOS bootable ISO comes with a tool called `coreos-install` which will allow us to install CoreOS and configure the machine using a cloud-config file. The following command will download and install stable CoreOS using the `master-config.yaml` file we just created for configuration. Run this on the Kubernetes master.
## Install the DNS Addon
Most Kubernetes deployments will require the DNS addon for service discovery. To install DNS, create the skydns service and replication controller provided.
```
sudo coreos-install -d /dev/sda -C stable -c master-config.yaml
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico-cni/k8s-1.1-docs/samples/kubernetes/master/dns/skydns.yaml
```
Once complete, eject the bootable ISO and restart the server. When it comes back up, you should have SSH access as the `core` user using the public key provided in the `master-config.yaml` file. It may take a few minutes for the machine to be fully configured with Kubernetes and Calico.
## Install the Kubernetes UI Addon (Optional)
## Configure the compute hosts
>The following steps will set up a single Kubernetes node for use as a compute host. Run these steps to deploy each Kubernetes node in your cluster.
First, boot up the node machine using the bootable ISO we downloaded earlier. You should be automatically logged in as the `core` user.
Make a copy of the `node-config-template.yaml` in the `calico-kubernetes` repository for this machine.
The Kubernetes UI can be installed using `kubectl` to run the following manifest file.
```
cp calico-kubernetes-master/config/cloud-config/node-config-template.yaml node-config.yaml
kubectl create -f https://raw.githubusercontent.com/projectcalico/calico-cni/k8s-1.1-docs/samples/kubernetes/master/kube-ui/kube-ui.yaml
```
You'll need to replace the following variables in the `node-config.yaml` file to match your deployment.
- `<HOSTNAME>`: Hostname for this node (e.g. kube-node1, kube-node2)
- `<SSH_PUBLIC_KEY>`: The public key you will use for SSH access to this server.
- `<KUBERNETES_MASTER>`: The IPv4 address of the Kubernetes master.
## Launch other Services With Calico-Kubernetes
Move the modified `node-config.yaml` to your Kubernetes node machine and install and configure CoreOS on the node using the following command.
At this point, you have a fully functioning cluster running on Kubernetes with a master and two nodes networked with Calico. You can now follow any of the [standard documentation](../../../examples/) to set up other services on your cluster.
## Connectivity to outside the cluster
Because containers in this guide have private `192.168.0.0/16` IPs, you will need NAT to allow connectivity between containers and the internet. However, in a production data center deployment, NAT is not always necessary, since Calico can peer with the data center's border routers over BGP.
### NAT on the nodes
The simplest method for enabling connectivity from containers to the internet is to use outgoing NAT on your Kubernetes nodes.
Calico can provide outgoing NAT for containers. To enable it, use the following `calicoctl` command:
```
sudo coreos-install -d /dev/sda -C stable -c node-config.yaml
ETCD_AUTHORITY=<master_ip:6666> calicoctl pool add <CONTAINER_SUBNET> --nat-outgoing
```
Once complete, eject the bootable disc and restart the server. When it comes back up, you should have SSH access as the `core` user using the public key provided in the `node-config.yaml` file. It will take some time for the node to be fully configured. Once fully configured, you can check that the node is running with the following command on the Kubernetes master.
By default, `<CONTAINER_SUBNET>` will be `192.168.0.0/16`. You can find out which pools have been configured with the following command:
```
kubectl get nodes
ETCD_AUTHORITY=<master_ip:6666> calicoctl pool show
```
### NAT at the border router
In a data center environment, it is recommended to configure Calico to peer with the border routers over BGP. This means that the container IPs will be routable anywhere in the data center, and so NAT is not needed on the nodes (though it may be enabled at the data center edge to allow outbound-only internet connectivity).
The Calico documentation contains more information on how to configure Calico to [peer with existing infrastructure](https://github.com/projectcalico/calico-containers/blob/master/docs/ExternalConnectivity.md).
[![Analytics](https://ga-beacon.appspot.com/UA-52125893-3/kubernetes/docs/getting-started-guides/coreos/bare_metal_calico.md?pixel)](https://github.com/igrigorik/ga-beacon)
<!-- BEGIN MUNGE: GENERATED_ANALYTICS -->
[![Analytics](https://kubernetes-site.appspot.com/UA-36037335-10/GitHub/docs/getting-started-guides/coreos/bare_metal_calico.md?pixel)]()
<!-- END MUNGE: GENERATED_ANALYTICS -->

View File

@ -513,11 +513,11 @@ then look at the logs again.
```console
u@node$ iptables-save | grep hostnames
-A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-57KPRZ3JQVENLNBR -s 10.244.3.6/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-57KPRZ3JQVENLNBR -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.3.6:9376
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -s 10.244.1.7/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-WNBA2IHDGP2BOBGZ -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.1.7:9376
-A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x4d415351/0xffffffff
-A KUBE-SEP-X3P2623AGDH6CDF3 -s 10.244.2.3/32 -m comment --comment "default/hostnames:" -j MARK --set-xmark 0x00004000/0x00004000
-A KUBE-SEP-X3P2623AGDH6CDF3 -p tcp -m comment --comment "default/hostnames:" -m tcp -j DNAT --to-destination 10.244.2.3:9376
-A KUBE-SERVICES -d 10.0.1.175/32 -p tcp -m comment --comment "default/hostnames: cluster IP" -m tcp --dport 80 -j KUBE-SVC-NWV5X2332I4OT4T3
-A KUBE-SVC-NWV5X2332I4OT4T3 -m comment --comment "default/hostnames:" -m statistic --mode random --probability 0.33332999982 -j KUBE-SEP-WNBA2IHDGP2BOBGZ

View File

@ -47,19 +47,20 @@ a docker image. See [Secrets design document](../design/secrets.md) for more inf
- [Built-in Secrets](#built-in-secrets)
- [Service Accounts Automatically Create and Attach Secrets with API Credentials](#service-accounts-automatically-create-and-attach-secrets-with-api-credentials)
- [Creating your own Secrets](#creating-your-own-secrets)
- [Creating a Secret Using kubectl secret](#creating-a-secret-using-kubectl-secret)
- [Creating a Secret Using kubectl create secret](#creating-a-secret-using-kubectl-create-secret)
- [Creating a Secret Manually](#creating-a-secret-manually)
- [Decoding a Secret](#decoding-a-secret)
- [Manually specifying a Secret to be Mounted on a Pod](#manually-specifying-a-secret-to-be-mounted-on-a-pod)
- [Using Secrets](#using-secrets)
- [Using Secrets as Files from a Pod](#using-secrets-as-files-from-a-pod)
- [Consuming Secret Values from Volumes](#consuming-secret-values-from-volumes)
- [Using Secrets as Environment Variables](#using-secrets-as-environment-variables)
- [Consuming Secret Values from Environment Variables](#consuming-secret-values-from-environment-variables)
- [Using imagePullSecrets](#using-imagepullsecrets)
- [Manually specifying an imagePullSecret](#manually-specifying-an-imagepullsecret)
- [Arranging for imagePullSecrets to be Automatically Attached](#arranging-for-imagepullsecrets-to-be-automatically-attached)
- [Using Secrets as Files from a Pod](#using-secrets-as-files-from-a-pod)
- [Automatic Mounting of Manually Created Secrets](#automatic-mounting-of-manually-created-secrets)
- [Details](#details)
- [Restrictions](#restrictions)
- [Consuming Secret Values](#consuming-secret-values)
- [Secret and Pod Lifetime interaction](#secret-and-pod-lifetime-interaction)
- [Use cases](#use-cases)
- [Use-Case: Pod with ssh keys](#use-case-pod-with-ssh-keys)
@ -82,8 +83,8 @@ more control over how it is used, and reduces the risk of accidental exposure.
Users can create secrets, and the system also creates some secrets.
To use a secret, a pod needs to reference the secret.
A secret can be used with a pod in two ways: either as files in a [volume](volumes.md) mounted on one or more of
its containers, or used by kubelet when pulling images for the pod.
A secret can be used with a pod in two ways: as files in a [volume](volumes.md) mounted on one or more of
its containers, in environment variables, or used by kubelet when pulling images for the pod.
### Built-in Secrets
@ -102,7 +103,7 @@ information on how Service Accounts work.
### Creating your own Secrets
#### Creating a Secret Using kubectl secret
#### Creating a Secret Using kubectl create secret
Say that some pods need to access a database. The
username and password that the pods should use is in the files
@ -219,9 +220,22 @@ $ echo "MWYyZDFlMmU2N2RmCg==" | base64 -D
1f2d1e2e67df
```
### Manually specifying a Secret to be Mounted on a Pod
### Using Secrets
Once the secret is created, you can create a pod that consumes that secret.
Secrets can be mounted as data volumes or be exposed as environment variables to
be used by a container in a pod. They can also be used by other parts of the
system, without being directly exposed to the pod. For example, they can hold
credentials that other parts of the system should use to interact with external
systems on your behalf.
#### Using Secrets as Files from a Pod
To consume a Secret in a volume in a Pod:
1. Create a secret or use an existing one. Multiple pods can reference the same secret.
1. Modify your Pod definition to add a volume under `spec.volumes[]`. Name the volume anything, and have a `spec.volumes[].secret.secretName` field equal to the name of the secret object.
1. Add a `spec.containers[].volumeMounts[]` to each container that needs the secret. Specify `spec.containers[].volumeMounts[].readOnly = true` and `spec.containers[].volumeMounts[].mountPath` to an unused directory name where you would like the secrets to appear.
1. Modify your image and/or command line so that the the program looks for files in that directory. Each key in the secret `data` map becomes the filename under `mountPath`.
This is an example of a pod that mounts a secret in a volume:
@ -253,22 +267,80 @@ This is an example of a pod that mounts a secret in a volume:
}
```
Each secret you want to use needs its own `spec.volumes`.
Each secret you want to use needs to be referred to in `spec.volumes`.
If there are multiple containers in the pod, then each container needs its
own `volumeMounts` block, but only one `spec.volumes` is needed per secret.
You can package many files into one secret, or use many secrets,
whichever is convenient.
You can package many files into one secret, or use many secrets, whichever is convenient.
See another example of creating a secret and a pod that consumes that secret in a volume [here](secrets/).
### Using Secrets
##### Consuming Secret Values from Volumes
Secrets can be mounted as data volumes to be used by a container in a pod.
They can also be used by other parts of the system, without being directly
exposed to the pod. For example, they can hold credentials that other
parts of the system should use to interact with external systems on your behalf.
Inside the container that mounts a secret volume, the secret keys appear as
files and the secret values are base-64 decoded and stored inside these files.
This is the result of commands
executed inside the container from the example above:
```console
$ ls /etc/foo/
username
password
$ cat /etc/foo/username
admin
$ cat /etc/foo/password
1f2d1e2e67df
```
The program in a container is responsible for reading the secret(s) from the
files.
#### Using Secrets as Environment Variables
To use a secret in an environment variable in a pod:
1. Create a secret or use an existing one. Multiple pods can reference the same secret.
1. Modify your Pod definition in each container that you wish to consume the value of a secret key to add an environment variable for each secret key you wish to consume. The environment variable that consumes the secret key should populate the secret's name and key in `env[x].valueFrom.secretKeyRef`.
1. Modify your image and/or command line so that the the program looks for values in the specified environment variabless
This is an example of a pod that mounts a secret in a volume:
```yaml
apiVersion: v1
kind: Pod
metadata:
name: secret-env-pod
spec:
containers:
- name: mycontainer
image: redis
env:
- name: SECRET_USERNAME
valueFrom:
secretKeyRef:
name: mysecret
key: username
- name: SECRET_PASSWORD
valueFrom:
secretKeyRef:
name: mysecret
key: password
restartPolicy: Never
```
##### Consuming Secret Values from Environment Variables
Inside a container that consumes a secret in an environment variables, the secret keys appear as
normal environment variables containing the base-64 decoded values of the secret data.
This is the result of commands executed inside the container from the example above:
```console
$ echo $SECRET_USERNAME
admin
$ cat /etc/foo/password
1f2d1e2e67df
```
#### Using imagePullSecrets
@ -288,17 +360,6 @@ field set to that of the service account.
See [here](service-accounts.md#adding-imagepullsecrets-to-a-service-account)
for a detailed explanation of that process.
#### Using Secrets as Files from a Pod
To use a secret from a Pod:
1. Create a secret or use an existing one. Multiple pods can reference the same secret.
1. Modify your Pod definition to add a volume under `spec.volumes[]`. Name the volume anything, and have a `spec.volumes[].secret.secretName` field equal to the name of the secret object.
1. Add a `spec.containers[].volumeMounts[]` to each container that needs the secret. Specify `spec.containers[].volumeMounts[].readOnly = true` and `spec.containers[].volumeMounts[].mountPath` to an unused directory name where you would like the secrets to appear.
1. Modify your image and/or command line so that the the program looks for files in that directory. Each key in the secret `data` map becomes the filename under `mountPath`.
See the [Use Cases](#use-cases) section for detailed examples.
#### Automatic Mounting of Manually Created Secrets
We plan to extend the service account behavior so that manually created
@ -328,31 +389,6 @@ controller. It does not include pods created via the kubelets
`--manifest-url` flag, its `--config` flag, or its REST API (these are
not common ways to create pods.)
### Consuming Secret Values
Inside the container that mounts a secret volume, the secret keys appear as
files and the secret values are base-64 decoded and stored inside these files.
This is the result of commands
executed inside the container from the example above:
```console
$ ls /etc/foo/
username
password
$ cat /etc/foo/username
value-1
$ cat /etc/foo/password
value-2
```
The program in a container is responsible for reading the secret(s) from the
files. Currently, if a program expects a secret to be stored in an environment
variable, then the user needs to modify the image to populate the environment
variable from the file as an step before running the main program. Future
versions of Kubernetes are expected to provide more automation for populating
environment variables from files.
### Secret and Pod Lifetime interaction
When a pod is created via the API, there is no check whether a referenced

View File

@ -148,6 +148,7 @@ input-dirs
insecure-bind-address
insecure-port
insecure-skip-tls-verify
iptables-masquerade-bit
iptables-sync-period
ir-data-source
ir-dbname

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@ -37,6 +37,12 @@ func autoConvert_componentconfig_KubeProxyConfiguration_To_v1alpha1_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int32(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = int32(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err
}
@ -127,6 +133,12 @@ func autoConvert_v1alpha1_KubeProxyConfiguration_To_componentconfig_KubeProxyCon
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = int(in.HealthzPort)
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int)
*out.IPTablesMasqueradeBit = int(*in.IPTablesMasqueradeBit)
} else {
out.IPTablesMasqueradeBit = nil
}
if err := s.Convert(&in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, 0); err != nil {
return err
}

View File

@ -43,6 +43,12 @@ func deepCopy_v1alpha1_KubeProxyConfiguration(in KubeProxyConfiguration, out *Ku
out.HealthzBindAddress = in.HealthzBindAddress
out.HealthzPort = in.HealthzPort
out.HostnameOverride = in.HostnameOverride
if in.IPTablesMasqueradeBit != nil {
out.IPTablesMasqueradeBit = new(int32)
*out.IPTablesMasqueradeBit = *in.IPTablesMasqueradeBit
} else {
out.IPTablesMasqueradeBit = nil
}
if err := deepCopy_unversioned_Duration(in.IPTablesSyncPeriod, &out.IPTablesSyncPeriod, c); err != nil {
return err
}

View File

@ -55,6 +55,10 @@ func addDefaultingFuncs(scheme *runtime.Scheme) {
if obj.ConntrackMax == 0 {
obj.ConntrackMax = 256 * 1024 // 4x default (64k)
}
if obj.IPTablesMasqueradeBit == nil {
temp := int32(14)
obj.IPTablesMasqueradeBit = &temp
}
if obj.ConntrackTCPEstablishedTimeout == zero {
obj.ConntrackTCPEstablishedTimeout = unversioned.Duration{Duration: 24 * time.Hour} // 1 day (1/5 default)
}

File diff suppressed because it is too large Load Diff

View File

@ -31,6 +31,9 @@ type KubeProxyConfiguration struct {
HealthzPort int32 `json:"healthzPort"`
// hostnameOverride, if non-empty, will be used as the identity instead of the actual hostname.
HostnameOverride string `json:"hostnameOverride"`
// iptablesMasqueradeBit is the bit of the iptables fwmark space to use for SNAT if using
// the pure iptables proxy mode. Values must be within the range [0, 31].
IPTablesMasqueradeBit *int32 `json:"iptablesMasqueradeBit"`
// iptablesSyncPeriod is the period that iptables rules are refreshed (e.g. '5s', '1m',
// '2h22m'). Must be greater than 0.
IPTablesSyncPeriod unversioned.Duration `json:"iptablesSyncPeriodSeconds"`

View File

@ -81,6 +81,17 @@ type KubeletContainerName struct {
ContainerName string
}
// containerNamePrefix is used to identify the containers on the node managed by this
// process.
var containerNamePrefix = "k8s"
// SetContainerNamePrefix allows the container prefix name for this process to be changed.
// This is intended to support testing and bootstrapping experimentation. It cannot be
// changed once the Kubelet starts.
func SetContainerNamePrefix(prefix string) {
containerNamePrefix = prefix
}
// DockerPuller is an abstract interface for testability. It abstracts image pull operations.
type DockerPuller interface {
Pull(image string, secrets []api.Secret) error
@ -209,8 +220,6 @@ func (p throttledDockerPuller) IsImagePresent(name string) (bool, error) {
return p.puller.IsImagePresent(name)
}
const containerNamePrefix = "k8s"
// Creates a name which can be reversed to identify both full pod name and container name.
func BuildDockerName(dockerName KubeletContainerName, container *api.Container) (string, string) {
containerName := dockerName.ContainerName + "." + strconv.FormatUint(kubecontainer.HashContainer(container), 16)

View File

@ -1357,6 +1357,38 @@ func containerAndPodFromLabels(inspect *docker.Container) (pod *api.Pod, contain
return
}
func (dm *DockerManager) applyOOMScoreAdj(container *api.Container, containerInfo *docker.Container) error {
cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
if err != nil {
if err == os.ErrNotExist {
// Container exited. We cannot do anything about it. Ignore this error.
glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
return nil
}
return err
}
// Set OOM score of the container based on the priority of the container.
// Processes in lower-priority pods should be killed first if the system runs out of memory.
// The main pod infrastructure container is considered high priority, since if it is killed the
// whole pod will die.
// TODO: Cache this value.
var oomScoreAdj int
if containerInfo.Name == PodInfraContainerName {
oomScoreAdj = qos.PodInfraOOMAdj
} else {
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, int64(dm.machineInfo.MemoryCapacity))
}
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
if err == os.ErrNotExist {
// Container exited. We cannot do anything about it. Ignore this error.
glog.V(2).Infof("Failed to apply OOM score adj on container %q with ID %q. Init process does not exist.", containerInfo.Name, containerInfo.ID)
return nil
}
return err
}
return nil
}
// Run a single container from a pod. Returns the docker container ID
// If do not need to pass labels, just pass nil.
func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Container, netMode, ipcMode, pidMode string, restartCount int) (kubecontainer.ContainerID, error) {
@ -1418,24 +1450,9 @@ func (dm *DockerManager) runContainerInPod(pod *api.Pod, container *api.Containe
return kubecontainer.ContainerID{}, fmt.Errorf("can't get init PID for container %q", id)
}
// Set OOM score of the container based on the priority of the container.
// Processes in lower-priority pods should be killed first if the system runs out of memory.
// The main pod infrastructure container is considered high priority, since if it is killed the
// whole pod will die.
var oomScoreAdj int
if container.Name == PodInfraContainerName {
oomScoreAdj = qos.PodInfraOOMAdj
} else {
oomScoreAdj = qos.GetContainerOOMScoreAdjust(container, int64(dm.machineInfo.MemoryCapacity))
if err := dm.applyOOMScoreAdj(container, containerInfo); err != nil {
return kubecontainer.ContainerID{}, fmt.Errorf("failed to apply oom-score-adj to container %q- %v", err, containerInfo.Name)
}
cgroupName, err := dm.procFs.GetFullContainerName(containerInfo.State.Pid)
if err != nil {
return kubecontainer.ContainerID{}, fmt.Errorf("GetFullContainerName: %v", err)
}
if err = dm.oomAdjuster.ApplyOOMScoreAdjContainer(cgroupName, oomScoreAdj, 5); err != nil {
return kubecontainer.ContainerID{}, fmt.Errorf("ApplyOOMScoreAdjContainer: %v", err)
}
// The addNDotsOption call appends the ndots option to the resolv.conf file generated by docker.
// This resolv.conf file is shared by all containers of the same pod, and needs to be modified only once per pod.
// we modify it when the pause container is created since it is the first container created in the pod since it holds

View File

@ -171,7 +171,7 @@ func (m *manager) RemovePod(pod *api.Pod) {
for _, probeType := range [...]probeType{readiness, liveness} {
key.probeType = probeType
if worker, ok := m.workers[key]; ok {
close(worker.stop)
worker.stop()
}
}
}
@ -188,7 +188,7 @@ func (m *manager) CleanupPods(activePods []*api.Pod) {
for key, worker := range m.workers {
if _, ok := desiredPods[key.podUID]; !ok {
close(worker.stop)
worker.stop()
}
}
}

View File

@ -18,6 +18,7 @@ package prober
import (
"fmt"
"strconv"
"testing"
"time"
@ -26,6 +27,7 @@ import (
kubecontainer "k8s.io/kubernetes/pkg/kubelet/container"
"k8s.io/kubernetes/pkg/kubelet/prober/results"
"k8s.io/kubernetes/pkg/probe"
"k8s.io/kubernetes/pkg/types"
"k8s.io/kubernetes/pkg/util/runtime"
"k8s.io/kubernetes/pkg/util/wait"
)
@ -173,6 +175,31 @@ func TestCleanupPods(t *testing.T) {
}
}
func TestCleanupRepeated(t *testing.T) {
m := newTestManager()
defer cleanup(t, m)
podTemplate := api.Pod{
Spec: api.PodSpec{
Containers: []api.Container{{
Name: "prober1",
ReadinessProbe: defaultProbe,
LivenessProbe: defaultProbe,
}},
},
}
const numTestPods = 100
for i := 0; i < numTestPods; i++ {
pod := podTemplate
pod.UID = types.UID(strconv.Itoa(i))
m.AddPod(&pod)
}
for i := 0; i < 10; i++ {
m.CleanupPods([]*api.Pod{})
}
}
func TestUpdatePodStatus(t *testing.T) {
unprobed := api.ContainerStatus{
Name: "unprobed_container",

View File

@ -32,8 +32,8 @@ import (
// stop channel is closed. The worker uses the probe Manager's statusManager to get up-to-date
// container IDs.
type worker struct {
// Channel for stopping the probe, it should be closed to trigger a stop.
stop chan struct{}
// Channel for stopping the probe.
stopCh chan struct{}
// The pod containing this probe (read-only)
pod *api.Pod
@ -70,7 +70,7 @@ func newWorker(
container api.Container) *worker {
w := &worker{
stop: make(chan struct{}),
stopCh: make(chan struct{}, 1), // Buffer so stop() can be non-blocking.
pod: pod,
container: container,
probeType: probeType,
@ -109,7 +109,7 @@ probeLoop:
for w.doProbe() {
// Wait for next probe tick.
select {
case <-w.stop:
case <-w.stopCh:
break probeLoop
case <-probeTicker.C:
// continue
@ -117,6 +117,15 @@ probeLoop:
}
}
// stop stops the probe worker. The worker handles cleanup and removes itself from its manager.
// It is safe to call stop multiple times.
func (w *worker) stop() {
select {
case w.stopCh <- struct{}{}:
default: // Non-blocking.
}
}
// doProbe probes the container once and records the result.
// Returns whether the worker should continue.
func (w *worker) doProbe() (keepGoing bool) {

View File

@ -236,7 +236,9 @@ func TestCleanUp(t *testing.T) {
}
}
close(w.stop)
for i := 0; i < 10; i++ {
w.stop() // Stop should be callable multiple times without consequence.
}
if err := waitForWorkerExit(m, []probeKey{key}); err != nil {
t.Fatalf("[%s] error waiting for worker exit: %v", probeType, err)
}

View File

@ -54,13 +54,20 @@ import (
const iptablesMinVersion = utiliptables.MinCheckVersion
// the services chain
const iptablesServicesChain utiliptables.Chain = "KUBE-SERVICES"
const kubeServicesChain utiliptables.Chain = "KUBE-SERVICES"
// the nodeports chain
const iptablesNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
const kubeNodePortsChain utiliptables.Chain = "KUBE-NODEPORTS"
// the kubernetes postrouting chain
const kubePostroutingChain utiliptables.Chain = "KUBE-POSTROUTING"
// the mark-for-masquerade chain
const kubeMarkMasqChain utiliptables.Chain = "KUBE-MARK-MASQ"
// the mark we apply to traffic needing SNAT
const iptablesMasqueradeMark = "0x4d415351"
// TODO(thockin): Remove this for v1.3 or v1.4.
const oldIptablesMasqueradeMark = "0x4d415351"
// IptablesVersioner can query the current iptables version.
type IptablesVersioner interface {
@ -126,8 +133,7 @@ type serviceInfo struct {
loadBalancerStatus api.LoadBalancerStatus
sessionAffinityType api.ServiceAffinity
stickyMaxAgeSeconds int
// Deprecated, but required for back-compat (including e2e)
externalIPs []string
externalIPs []string
}
// returns a new serviceInfo struct
@ -149,9 +155,10 @@ type Proxier struct {
haveReceivedEndpointsUpdate bool // true once we've seen an OnEndpointsUpdate event
// These are effectively const and do not need the mutex to be held.
syncPeriod time.Duration
iptables utiliptables.Interface
masqueradeAll bool
syncPeriod time.Duration
iptables utiliptables.Interface
masqueradeAll bool
masqueradeMark string
}
type localPort struct {
@ -177,7 +184,7 @@ var _ proxy.ProxyProvider = &Proxier{}
// An error will be returned if iptables fails to update or acquire the initial lock.
// Once a proxier is created, it will keep iptables up to date in the background and
// will not terminate if a particular iptables call fails.
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool) (*Proxier, error) {
func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod time.Duration, masqueradeAll bool, masqueradeBit int) (*Proxier, error) {
// Set the route_localnet sysctl we need for
if err := utilsysctl.SetSysctl(sysctlRouteLocalnet, 1); err != nil {
return nil, fmt.Errorf("can't set sysctl %s: %v", sysctlRouteLocalnet, err)
@ -191,50 +198,122 @@ func NewProxier(ipt utiliptables.Interface, exec utilexec.Interface, syncPeriod
glog.Warningf("can't set sysctl %s: %v", sysctlBridgeCallIptables, err)
}
// Generate the masquerade mark to use for SNAT rules.
if masqueradeBit < 0 || masqueradeBit > 31 {
return nil, fmt.Errorf("invalid iptables-masquerade-bit %v not in [0, 31]", masqueradeBit)
}
masqueradeValue := 1 << uint(masqueradeBit)
masqueradeMark := fmt.Sprintf("%#08x/%#08x", masqueradeValue, masqueradeValue)
return &Proxier{
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string),
portsMap: make(map[localPort]closeable),
syncPeriod: syncPeriod,
iptables: ipt,
masqueradeAll: masqueradeAll,
serviceMap: make(map[proxy.ServicePortName]*serviceInfo),
endpointsMap: make(map[proxy.ServicePortName][]string),
portsMap: make(map[localPort]closeable),
syncPeriod: syncPeriod,
iptables: ipt,
masqueradeAll: masqueradeAll,
masqueradeMark: masqueradeMark,
}, nil
}
// CleanupLeftovers removes all iptables rules and chains created by the Proxier
// It returns true if an error was encountered. Errors are logged.
func CleanupLeftovers(ipt utiliptables.Interface) (encounteredError bool) {
//TODO: actually tear down all rules and chains.
args := []string{"-m", "comment", "--comment", "kubernetes service portals", "-j", string(iptablesServicesChain)}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainOutput, args...); err != nil {
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
encounteredError = true
// Unlink the services chain.
args := []string{
"-m", "comment", "--comment", "kubernetes service portals",
"-j", string(kubeServicesChain),
}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPrerouting, args...); err != nil {
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
encounteredError = true
tableChainsWithJumpServices := []struct {
table utiliptables.Table
chain utiliptables.Chain
}{
{utiliptables.TableFilter, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainPrerouting},
}
args = []string{"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT", "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
encounteredError = true
}
// flush and delete chains.
chains := []utiliptables.Chain{iptablesServicesChain, iptablesNodePortsChain}
for _, c := range chains {
// flush chain, then if sucessful delete, delete will fail if flush fails.
if err := ipt.FlushChain(utiliptables.TableNAT, c); err != nil {
glog.Errorf("Error flushing pure-iptables proxy chain: %v", err)
encounteredError = true
} else {
if err = ipt.DeleteChain(utiliptables.TableNAT, c); err != nil {
glog.Errorf("Error deleting pure-iptables proxy chain: %v", err)
for _, tc := range tableChainsWithJumpServices {
if err := ipt.DeleteRule(tc.table, tc.chain, args...); err != nil {
if !utiliptables.IsNotFoundError(err) {
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
encounteredError = true
}
}
}
// Unlink the postrouting chain.
args = []string{
"-m", "comment", "--comment", "kubernetes postrouting rules",
"-j", string(kubePostroutingChain),
}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
if !utiliptables.IsNotFoundError(err) {
glog.Errorf("Error removing pure-iptables proxy rule: %v", err)
encounteredError = true
}
}
// Flush and remove all of our chains.
if iptablesSaveRaw, err := ipt.Save(utiliptables.TableNAT); err != nil {
glog.Errorf("Failed to execute iptables-save for %s: %v", utiliptables.TableNAT, err)
encounteredError = true
} else {
existingNATChains := getChainLines(utiliptables.TableNAT, iptablesSaveRaw)
natChains := bytes.NewBuffer(nil)
natRules := bytes.NewBuffer(nil)
writeLine(natChains, "*nat")
// Start with chains we know we need to remove.
for _, chain := range []utiliptables.Chain{kubeServicesChain, kubeNodePortsChain, kubePostroutingChain, kubeMarkMasqChain} {
if _, found := existingNATChains[chain]; found {
chainString := string(chain)
writeLine(natChains, existingNATChains[chain]) // flush
writeLine(natRules, "-X", chainString) // delete
}
}
// Hunt for service and endpoint chains.
for chain := range existingNATChains {
chainString := string(chain)
if strings.HasPrefix(chainString, "KUBE-SVC-") || strings.HasPrefix(chainString, "KUBE-SEP-") {
writeLine(natChains, existingNATChains[chain]) // flush
writeLine(natRules, "-X", chainString) // delete
}
}
writeLine(natRules, "COMMIT")
natLines := append(natChains.Bytes(), natRules.Bytes()...)
// Write it.
err = ipt.Restore(utiliptables.TableNAT, natLines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableNAT, err)
encounteredError = true
}
}
{
filterBuf := bytes.NewBuffer(nil)
writeLine(filterBuf, "*filter")
writeLine(filterBuf, fmt.Sprintf(":%s - [0:0]", kubeServicesChain))
writeLine(filterBuf, fmt.Sprintf("-X %s", kubeServicesChain))
writeLine(filterBuf, "COMMIT")
// Write it.
if err := ipt.Restore(utiliptables.TableFilter, filterBuf.Bytes(), utiliptables.NoFlushTables, utiliptables.RestoreCounters); err != nil {
glog.Errorf("Failed to execute iptables-restore for %s: %v", utiliptables.TableFilter, err)
encounteredError = true
}
}
// Clean up the older SNAT rule which was directly in POSTROUTING.
// TODO(thockin): Remove this for v1.3 or v1.4.
args = []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE",
}
if err := ipt.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
if !utiliptables.IsNotFoundError(err) {
glog.Errorf("Error removing old-style SNAT rule: %v", err)
encounteredError = true
}
}
return encounteredError
}
@ -481,37 +560,45 @@ func (proxier *Proxier) syncProxyRules() {
}
glog.V(3).Infof("Syncing iptables rules")
// Ensure main chains and rules are installed.
tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT}
for _, table := range tablesNeedServicesChain {
if _, err := proxier.iptables.EnsureChain(table, iptablesServicesChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, iptablesServicesChain, err)
return
}
}
// Link the services chain.
tableChainsNeedJumpServices := []struct {
table utiliptables.Table
chain utiliptables.Chain
}{
{utiliptables.TableFilter, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainPrerouting},
}
comment := "kubernetes service portals"
args := []string{"-m", "comment", "--comment", comment, "-j", string(iptablesServicesChain)}
for _, tc := range tableChainsNeedJumpServices {
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, iptablesServicesChain, err)
return
}
}
// Link the output rules.
// Create and link the kube services chain.
{
comment := "kubernetes service traffic requiring SNAT"
args := []string{"-m", "comment", "--comment", comment, "-m", "mark", "--mark", iptablesMasqueradeMark, "-j", "MASQUERADE"}
if _, err := proxier.iptables.EnsureRule(utiliptables.Append, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Failed to ensure that chain %s obeys MASQUERADE mark: %v", utiliptables.ChainPostrouting, err)
tablesNeedServicesChain := []utiliptables.Table{utiliptables.TableFilter, utiliptables.TableNAT}
for _, table := range tablesNeedServicesChain {
if _, err := proxier.iptables.EnsureChain(table, kubeServicesChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", table, kubeServicesChain, err)
return
}
}
tableChainsNeedJumpServices := []struct {
table utiliptables.Table
chain utiliptables.Chain
}{
{utiliptables.TableFilter, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainOutput},
{utiliptables.TableNAT, utiliptables.ChainPrerouting},
}
comment := "kubernetes service portals"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubeServicesChain)}
for _, tc := range tableChainsNeedJumpServices {
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, tc.table, tc.chain, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", tc.table, tc.chain, kubeServicesChain, err)
return
}
}
}
// Create and link the kube postrouting chain.
{
if _, err := proxier.iptables.EnsureChain(utiliptables.TableNAT, kubePostroutingChain); err != nil {
glog.Errorf("Failed to ensure that %s chain %s exists: %v", utiliptables.TableNAT, kubePostroutingChain, err)
return
}
comment := "kubernetes postrouting rules"
args := []string{"-m", "comment", "--comment", comment, "-j", string(kubePostroutingChain)}
if _, err := proxier.iptables.EnsureRule(utiliptables.Prepend, utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
glog.Errorf("Failed to ensure that %s chain %s jumps to %s: %v", utiliptables.TableNAT, utiliptables.ChainPostrouting, kubePostroutingChain, err)
return
}
}
@ -521,7 +608,7 @@ func (proxier *Proxier) syncProxyRules() {
existingFilterChains := make(map[utiliptables.Chain]string)
iptablesSaveRaw, err := proxier.iptables.Save(utiliptables.TableFilter)
if err != nil { // if we failed to get any rules
glog.Errorf("Failed to execute iptables-save, syncing all rules. %s", err.Error())
glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err)
} else { // otherwise parse the output
existingFilterChains = getChainLines(utiliptables.TableFilter, iptablesSaveRaw)
}
@ -529,7 +616,7 @@ func (proxier *Proxier) syncProxyRules() {
existingNATChains := make(map[utiliptables.Chain]string)
iptablesSaveRaw, err = proxier.iptables.Save(utiliptables.TableNAT)
if err != nil { // if we failed to get any rules
glog.Errorf("Failed to execute iptables-save, syncing all rules. %s", err.Error())
glog.Errorf("Failed to execute iptables-save, syncing all rules: %v", err)
} else { // otherwise parse the output
existingNATChains = getChainLines(utiliptables.TableNAT, iptablesSaveRaw)
}
@ -544,24 +631,52 @@ func (proxier *Proxier) syncProxyRules() {
writeLine(natChains, "*nat")
// Make sure we keep stats for the top-level chains, if they existed
// (which they should have because we created them above).
if chain, ok := existingFilterChains[iptablesServicesChain]; ok {
// (which most should have because we created them above).
if chain, ok := existingFilterChains[kubeServicesChain]; ok {
writeLine(filterChains, chain)
} else {
writeLine(filterChains, makeChainLine(iptablesServicesChain))
writeLine(filterChains, makeChainLine(kubeServicesChain))
}
if chain, ok := existingNATChains[iptablesServicesChain]; ok {
if chain, ok := existingNATChains[kubeServicesChain]; ok {
writeLine(natChains, chain)
} else {
writeLine(natChains, makeChainLine(iptablesServicesChain))
writeLine(natChains, makeChainLine(kubeServicesChain))
}
if chain, ok := existingNATChains[iptablesNodePortsChain]; ok {
if chain, ok := existingNATChains[kubeNodePortsChain]; ok {
writeLine(natChains, chain)
} else {
writeLine(natChains, makeChainLine(iptablesNodePortsChain))
writeLine(natChains, makeChainLine(kubeNodePortsChain))
}
if chain, ok := existingNATChains[kubePostroutingChain]; ok {
writeLine(natChains, chain)
} else {
writeLine(natChains, makeChainLine(kubePostroutingChain))
}
if chain, ok := existingNATChains[kubeMarkMasqChain]; ok {
writeLine(natChains, chain)
} else {
writeLine(natChains, makeChainLine(kubeMarkMasqChain))
}
// Accumulate nat chains to keep.
// Install the kubernetes-specific postrouting rules. We use a whole chain for
// this so that it is easier to flush and change, for example if the mark
// value should ever change.
writeLine(natRules, []string{
"-A", string(kubePostroutingChain),
"-m", "comment", "--comment", `"kubernetes service traffic requiring SNAT"`,
"-m", "mark", "--mark", proxier.masqueradeMark,
"-j", "MASQUERADE",
}...)
// Install the kubernetes-specific masquerade mark rule. We use a whole chain for
// this so that it is easier to flush and change, for example if the mark
// value should ever change.
writeLine(natRules, []string{
"-A", string(kubeMarkMasqChain),
"-j", "MARK", "--set-xmark", proxier.masqueradeMark,
}...)
// Accumulate NAT chains to keep.
activeNATChains := map[utiliptables.Chain]bool{} // use a map as a set
// Accumulate new local ports that we have opened.
@ -582,18 +697,16 @@ func (proxier *Proxier) syncProxyRules() {
// Capture the clusterIP.
args := []string{
"-A", string(iptablesServicesChain),
"-m", "comment", "--comment", fmt.Sprintf("\"%s cluster IP\"", svcName.String()),
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s cluster IP"`, svcName.String()),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
if proxier.masqueradeAll {
writeLine(natRules, append(args,
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
writeLine(natRules, append(args, "-j", string(kubeMarkMasqChain))...)
}
writeLine(natRules, append(args,
"-j", string(svcChain))...)
writeLine(natRules, append(args, "-j", string(svcChain))...)
// Capture externalIPs.
for _, externalIP := range svcInfo.externalIPs {
@ -621,15 +734,14 @@ func (proxier *Proxier) syncProxyRules() {
}
} // We're holding the port, so it's OK to install iptables rules.
args := []string{
"-A", string(iptablesServicesChain),
"-m", "comment", "--comment", fmt.Sprintf("\"%s external IP\"", svcName.String()),
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s external IP"`, svcName.String()),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", externalIP),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
// We have to SNAT packets to external IPs.
writeLine(natRules, append(args,
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
writeLine(natRules, append(args, "-j", string(kubeMarkMasqChain))...)
// Allow traffic for external IPs that does not come from a bridge (i.e. not from a container)
// nor from a local process to be forwarded to the service.
@ -638,30 +750,26 @@ func (proxier *Proxier) syncProxyRules() {
externalTrafficOnlyArgs := append(args,
"-m", "physdev", "!", "--physdev-is-in",
"-m", "addrtype", "!", "--src-type", "LOCAL")
writeLine(natRules, append(externalTrafficOnlyArgs,
"-j", string(svcChain))...)
writeLine(natRules, append(externalTrafficOnlyArgs, "-j", string(svcChain))...)
dstLocalOnlyArgs := append(args, "-m", "addrtype", "--dst-type", "LOCAL")
// Allow traffic bound for external IPs that happen to be recognized as local IPs to stay local.
// This covers cases like GCE load-balancers which get added to the local routing table.
writeLine(natRules, append(dstLocalOnlyArgs,
"-j", string(svcChain))...)
writeLine(natRules, append(dstLocalOnlyArgs, "-j", string(svcChain))...)
}
// Capture load-balancer ingress.
for _, ingress := range svcInfo.loadBalancerStatus.Ingress {
if ingress.IP != "" {
args := []string{
"-A", string(iptablesServicesChain),
"-m", "comment", "--comment", fmt.Sprintf("\"%s loadbalancer IP\"", svcName.String()),
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s loadbalancer IP"`, svcName.String()),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", ingress.IP),
"--dport", fmt.Sprintf("%d", svcInfo.port),
}
// We have to SNAT packets from external IPs.
writeLine(natRules, append(args,
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
writeLine(natRules, append(args,
"-j", string(svcChain))...)
writeLine(natRules, append(args, "-j", string(kubeMarkMasqChain))...)
writeLine(natRules, append(args, "-j", string(svcChain))...)
}
}
@ -687,27 +795,24 @@ func (proxier *Proxier) syncProxyRules() {
}
newLocalPorts[lp] = socket
} // We're holding the port, so it's OK to install iptables rules.
args := []string{
"-A", string(kubeNodePortsChain),
"-m", "comment", "--comment", svcName.String(),
"-m", protocol, "-p", protocol,
"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
}
// Nodeports need SNAT.
writeLine(natRules,
"-A", string(iptablesNodePortsChain),
"-m", "comment", "--comment", svcName.String(),
"-m", protocol, "-p", protocol,
"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))
writeLine(natRules, append(args, "-j", string(kubeMarkMasqChain))...)
// Jump to the service chain.
writeLine(natRules,
"-A", string(iptablesNodePortsChain),
"-m", "comment", "--comment", svcName.String(),
"-m", protocol, "-p", protocol,
"--dport", fmt.Sprintf("%d", svcInfo.nodePort),
"-j", string(svcChain))
writeLine(natRules, append(args, "-j", string(svcChain))...)
}
// If the service has no endpoints then reject packets.
if len(proxier.endpointsMap[svcName]) == 0 {
writeLine(filterRules,
"-A", string(iptablesServicesChain),
"-m", "comment", "--comment", fmt.Sprintf("\"%s has no endpoints\"", svcName.String()),
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", fmt.Sprintf(`"%s has no endpoints"`, svcName.String()),
"-m", protocol, "-p", protocol,
"-d", fmt.Sprintf("%s/32", svcInfo.clusterIP.String()),
"--dport", fmt.Sprintf("%d", svcInfo.port),
@ -777,16 +882,14 @@ func (proxier *Proxier) syncProxyRules() {
// TODO: if we grow logic to get this node's pod CIDR, we can use it.
writeLine(natRules, append(args,
"-s", fmt.Sprintf("%s/32", strings.Split(endpoints[i], ":")[0]),
"-j", "MARK", "--set-xmark", fmt.Sprintf("%s/0xffffffff", iptablesMasqueradeMark))...)
"-j", string(kubeMarkMasqChain))...)
// Update client-affinity lists.
if svcInfo.sessionAffinityType == api.ServiceAffinityClientIP {
args = append(args, "-m", "recent", "--name", string(endpointChain), "--set")
}
// DNAT to final destination.
args = append(args,
"-m", protocol, "-p", protocol,
"-j", "DNAT", "--to-destination", endpoints[i])
args = append(args, "-m", protocol, "-p", protocol, "-j", "DNAT", "--to-destination", endpoints[i])
writeLine(natRules, args...)
}
}
@ -810,10 +913,10 @@ func (proxier *Proxier) syncProxyRules() {
// Finally, tail-call to the nodeports chain. This needs to be after all
// other service portal rules.
writeLine(natRules,
"-A", string(iptablesServicesChain),
"-m", "comment", "--comment", "\"kubernetes service nodeports; NOTE: this must be the last rule in this chain\"",
"-A", string(kubeServicesChain),
"-m", "comment", "--comment", `"kubernetes service nodeports; NOTE: this must be the last rule in this chain"`,
"-m", "addrtype", "--dst-type", "LOCAL",
"-j", string(iptablesNodePortsChain))
"-j", string(kubeNodePortsChain))
// Write the end-of-table markers.
writeLine(filterRules, "COMMIT")
@ -825,23 +928,37 @@ func (proxier *Proxier) syncProxyRules() {
natLines := append(natChains.Bytes(), natRules.Bytes()...)
lines := append(filterLines, natLines...)
glog.V(3).Infof("Syncing iptables rules: %s", lines)
glog.V(3).Infof("Restoring iptables rules: %s", lines)
err = proxier.iptables.RestoreAll(lines, utiliptables.NoFlushTables, utiliptables.RestoreCounters)
if err != nil {
glog.Errorf("Failed to sync iptables rules: %v", err)
glog.Errorf("Failed to execute iptables-restore: %v", err)
// Revert new local ports.
for k, v := range newLocalPorts {
glog.Errorf("Closing local port %s", k.String())
v.Close()
}
} else {
// Close old local ports and save new ones.
for k, v := range proxier.portsMap {
if newLocalPorts[k] == nil {
v.Close()
}
return
}
// Close old local ports and save new ones.
for k, v := range proxier.portsMap {
if newLocalPorts[k] == nil {
v.Close()
}
}
proxier.portsMap = newLocalPorts
// Clean up the older SNAT rule which was directly in POSTROUTING.
// TODO(thockin): Remove this for v1.3 or v1.4.
args := []string{
"-m", "comment", "--comment", "kubernetes service traffic requiring SNAT",
"-m", "mark", "--mark", oldIptablesMasqueradeMark,
"-j", "MASQUERADE",
}
if err := proxier.iptables.DeleteRule(utiliptables.TableNAT, utiliptables.ChainPostrouting, args...); err != nil {
if !utiliptables.IsNotFoundError(err) {
glog.Errorf("Error removing old-style SNAT rule: %v", err)
}
proxier.portsMap = newLocalPorts
}
}

View File

@ -196,27 +196,37 @@ func CleanupLeftovers(ipt iptables.Interface) (encounteredError bool) {
// Delete Rules first, then Flush and Delete Chains
args := []string{"-m", "comment", "--comment", "handle ClusterIPs; NOTE: this must be before the NodePort rules"}
if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostPortalChain))...); err != nil {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
}
}
if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerPortalChain))...); err != nil {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
}
}
args = []string{"-m", "addrtype", "--dst-type", "LOCAL"}
args = append(args, "-m", "comment", "--comment", "handle service NodePorts; NOTE: this must be the last rule in the chain")
if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainOutput, append(args, "-j", string(iptablesHostNodePortChain))...); err != nil {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
}
}
if err := ipt.DeleteRule(iptables.TableNAT, iptables.ChainPrerouting, append(args, "-j", string(iptablesContainerNodePortChain))...); err != nil {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
}
}
args = []string{"-m", "comment", "--comment", "Ensure that non-local NodePort traffic can flow"}
if err := ipt.DeleteRule(iptables.TableFilter, iptables.ChainInput, append(args, "-j", string(iptablesNonLocalNodePortChain))...); err != nil {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error removing userspace rule: %v", err)
encounteredError = true
}
}
// flush and delete chains.
@ -228,12 +238,16 @@ func CleanupLeftovers(ipt iptables.Interface) (encounteredError bool) {
for _, c := range chains {
// flush chain, then if successful delete, delete will fail if flush fails.
if err := ipt.FlushChain(table, c); err != nil {
glog.Errorf("Error flushing userspace chain: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error flushing userspace chain: %v", err)
encounteredError = true
}
} else {
if err = ipt.DeleteChain(table, c); err != nil {
glog.Errorf("Error deleting userspace chain: %v", err)
encounteredError = true
if !iptables.IsNotFoundError(err) {
glog.Errorf("Error deleting userspace chain: %v", err)
encounteredError = true
}
}
}
}

View File

@ -22,6 +22,7 @@ import (
"net/http"
"net/url"
"strconv"
"strings"
"time"
"k8s.io/kubernetes/pkg/api"
@ -231,6 +232,15 @@ func ResourceLocation(getter ResourceGetter, rt http.RoundTripper, ctx api.Conte
return loc, rt, nil
}
// getContainerNames returns a formatted string containing the container names
func getContainerNames(pod *api.Pod) string {
names := []string{}
for _, c := range pod.Spec.Containers {
names = append(names, c.Name)
}
return strings.Join(names, " ")
}
// LogLocation returns the log URL for a pod container. If opts.Container is blank
// and only one container is present in the pod, that container is used.
func LogLocation(
@ -249,10 +259,14 @@ func LogLocation(
// If a container was provided, it must be valid
container := opts.Container
if len(container) == 0 {
if len(pod.Spec.Containers) == 1 {
switch len(pod.Spec.Containers) {
case 1:
container = pod.Spec.Containers[0].Name
} else {
case 0:
return nil, nil, errors.NewBadRequest(fmt.Sprintf("a container name must be specified for pod %s", name))
default:
containerNames := getContainerNames(pod)
return nil, nil, errors.NewBadRequest(fmt.Sprintf("a container name must be specified for pod %s, choose one of: [%s]", name, containerNames))
}
} else {
if !podHasContainerWithName(pod, container) {
@ -386,10 +400,14 @@ func streamLocation(
// Try to figure out a container
// If a container was provided, it must be valid
if container == "" {
if len(pod.Spec.Containers) == 1 {
switch len(pod.Spec.Containers) {
case 1:
container = pod.Spec.Containers[0].Name
} else {
case 0:
return nil, nil, errors.NewBadRequest(fmt.Sprintf("a container name must be specified for pod %s", name))
default:
containerNames := getContainerNames(pod)
return nil, nil, errors.NewBadRequest(fmt.Sprintf("a container name must be specified for pod %s, choose one of: [%s]", name, containerNames))
}
} else {
if !podHasContainerWithName(pod, container) {

View File

@ -129,7 +129,7 @@ func TestCheckLogLocation(t *testing.T) {
Status: api.PodStatus{},
},
opts: &api.PodLogOptions{},
expectedErr: errors.NewBadRequest("a container name must be specified for pod test"),
expectedErr: errors.NewBadRequest("a container name must be specified for pod test, choose one of: [container1 container2]"),
},
{
in: &api.Pod{

View File

@ -296,6 +296,7 @@ func (runner *runner) Save(table Table) ([]byte, error) {
// run and return
args := []string{"-t", string(table)}
glog.V(4).Infof("running iptables-save %v", args)
return runner.exec.Command(cmdIptablesSave, args...).CombinedOutput()
}
@ -305,6 +306,7 @@ func (runner *runner) SaveAll() ([]byte, error) {
defer runner.mu.Unlock()
// run and return
glog.V(4).Infof("running iptables-save")
return runner.exec.Command(cmdIptablesSave, []string{}...).CombinedOutput()
}
@ -354,6 +356,7 @@ func (runner *runner) restoreInternal(args []string, data []byte, flush FlushFla
return err
}
// run the command and return the output or an error including the output and error
glog.V(4).Infof("running iptables-restore %v", args)
b, err := runner.exec.Command(cmdIptablesRestore, args...).CombinedOutput()
if err != nil {
return fmt.Errorf("%v (%s)", err, b)
@ -576,3 +579,17 @@ func (runner *runner) reload() {
f()
}
}
// IsNotFoundError returns true if the error indicates "not found". It parses
// the error string looking for known values, which is imperfect but works in
// practice.
func IsNotFoundError(err error) bool {
es := err.Error()
if strings.Contains(es, "No such file or directory") {
return true
}
if strings.Contains(es, "No chain/target/match by that name") {
return true
}
return false
}

View File

@ -21,6 +21,7 @@ package oom
import (
"fmt"
"io/ioutil"
"os"
"path"
"strconv"
@ -48,7 +49,18 @@ func getPids(cgroupName string) ([]int, error) {
return fsManager.GetPids()
}
func syscallNotExists(err error) bool {
if err == nil {
return false
}
if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) {
return true
}
return false
}
// Writes 'value' to /proc/<pid>/oom_score_adj. PID = 0 means self
// Returns os.ErrNotExist if the `pid` does not exist.
func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
if pid < 0 {
return fmt.Errorf("invalid PID %d specified for oom_score_adj", pid)
@ -61,20 +73,18 @@ func applyOOMScoreAdj(pid int, oomScoreAdj int) error {
pidStr = strconv.Itoa(pid)
}
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
maxTries := 2
oomScoreAdjPath := path.Join("/proc", pidStr, "oom_score_adj")
value := strconv.Itoa(oomScoreAdj)
var err error
for i := 0; i < maxTries; i++ {
_, readErr := ioutil.ReadFile(oomScoreAdjPath)
if readErr != nil {
err = fmt.Errorf("failed to read oom_score_adj: %v", readErr)
} else if writeErr := ioutil.WriteFile(oomScoreAdjPath, []byte(strconv.Itoa(oomScoreAdj)), 0700); writeErr != nil {
err = fmt.Errorf("failed to set oom_score_adj to %d: %v", oomScoreAdj, writeErr)
} else {
return nil
if err = ioutil.WriteFile(oomScoreAdjPath, []byte(value), 0700); err != nil {
if syscallNotExists(err) {
return os.ErrNotExist
}
err = fmt.Errorf("failed to apply oom-score-adj to pid %d (%v)", err)
}
}
return err
}
@ -86,6 +96,10 @@ func (oomAdjuster *OOMAdjuster) applyOOMScoreAdjContainer(cgroupName string, oom
continueAdjusting := false
pidList, err := oomAdjuster.pidLister(cgroupName)
if err != nil {
if syscallNotExists(err) {
// Nothing to do since the container doesn't exist anymore.
return os.ErrNotExist
}
continueAdjusting = true
glog.Errorf("Error getting process list for cgroup %s: %+v", cgroupName, err)
} else if len(pidList) == 0 {
@ -97,6 +111,7 @@ func (oomAdjuster *OOMAdjuster) applyOOMScoreAdjContainer(cgroupName string, oom
if err = oomAdjuster.ApplyOOMScoreAdj(pid, oomScoreAdj); err == nil {
adjustedProcessSet[pid] = true
}
// Processes can come and go while we try to apply oom score adjust value. So ignore errors here.
}
}
}

View File

@ -19,6 +19,7 @@ package procfs
import (
"fmt"
"io/ioutil"
"os"
"path"
"strconv"
"strings"
@ -48,6 +49,9 @@ func (pfs *ProcFS) GetFullContainerName(pid int) (string, error) {
filePath := path.Join("/proc", strconv.Itoa(pid), "cgroup")
content, err := ioutil.ReadFile(filePath)
if err != nil {
if e, ok := err.(*os.SyscallError); ok && os.IsNotExist(e) {
return "", os.ErrNotExist
}
return "", err
}
return containerNameFromProcCgroup(string(content))

View File

@ -20,7 +20,6 @@ import (
"bytes"
"fmt"
"math/rand"
"sort"
"strings"
"sync"
@ -101,20 +100,31 @@ func (g *genericScheduler) Schedule(pod *api.Pod, nodeLister algorithm.NodeListe
return g.selectHost(priorityList)
}
// This method takes a prioritized list of nodes and sorts them in reverse order based on scores
// and then picks one randomly from the nodes that had the highest score
// selectHost takes a prioritized list of nodes and then picks one
// randomly from the nodes that had the highest score.
func (g *genericScheduler) selectHost(priorityList schedulerapi.HostPriorityList) (string, error) {
if len(priorityList) == 0 {
return "", fmt.Errorf("empty priorityList")
}
sort.Sort(sort.Reverse(priorityList))
hosts := getBestHosts(priorityList)
maxScore := priorityList[0].Score
// idx contains indices of elements with score == maxScore.
idx := []int{}
for i, entry := range priorityList {
if entry.Score > maxScore {
maxScore = entry.Score
idx = []int{i}
} else if entry.Score == maxScore {
idx = append(idx, i)
}
}
g.randomLock.Lock()
defer g.randomLock.Unlock()
ix := g.random.Int() % len(idx)
g.randomLock.Unlock()
ix := g.random.Int() % len(hosts)
return hosts[ix], nil
return priorityList[idx[ix]].Host, nil
}
// Filters the nodes to find the ones that fit based on the given predicate functions
@ -258,18 +268,6 @@ func PrioritizeNodes(pod *api.Pod, machinesToPods map[string][]*api.Pod, podList
return result, nil
}
func getBestHosts(list schedulerapi.HostPriorityList) []string {
result := []string{}
for _, hostEntry := range list {
if hostEntry.Score == list[0].Score {
result = append(result, hostEntry.Host)
} else {
break
}
}
return result
}
// EqualPriority is a prioritizer function that gives an equal weight of one to all nodes
func EqualPriority(_ *api.Pod, machinesToPods map[string][]*api.Pod, podLister algorithm.PodLister, nodeLister algorithm.NodeLister) (schedulerapi.HostPriorityList, error) {
nodes, err := nodeLister.List()

View File

@ -166,14 +166,14 @@ func TestSelectHost(t *testing.T) {
func TestGenericScheduler(t *testing.T) {
tests := []struct {
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
nodes []string
pod *api.Pod
pods []*api.Pod
expectedHost string
expectsErr bool
name string
predicates map[string]algorithm.FitPredicate
prioritizers []algorithm.PriorityConfig
nodes []string
pod *api.Pod
pods []*api.Pod
expectedHosts sets.String
expectsErr bool
}{
{
predicates: map[string]algorithm.FitPredicate{"false": falsePredicate},
@ -183,44 +183,43 @@ func TestGenericScheduler(t *testing.T) {
name: "test 1",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
// Random choice between both, the rand seeded above with zero, chooses "machine1"
expectedHost: "machine1",
name: "test 2",
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
expectedHosts: sets.NewString("machine1", "machine2"),
name: "test 2",
},
{
// Fits on a machine where the pod ID matches the machine name
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "machine2"}},
expectedHost: "machine2",
name: "test 3",
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Function: EqualPriority, Weight: 1}},
nodes: []string{"machine1", "machine2"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "machine2"}},
expectedHosts: sets.NewString("machine2"),
name: "test 3",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
expectedHost: "3",
name: "test 4",
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
expectedHosts: sets.NewString("3"),
name: "test 4",
},
{
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
expectedHost: "2",
name: "test 5",
predicates: map[string]algorithm.FitPredicate{"matches": matchesPredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}},
nodes: []string{"3", "2", "1"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
expectedHosts: sets.NewString("2"),
name: "test 5",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}, {Function: reverseNumericPriority, Weight: 2}},
nodes: []string{"3", "2", "1"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
expectedHost: "1",
name: "test 6",
predicates: map[string]algorithm.FitPredicate{"true": truePredicate},
prioritizers: []algorithm.PriorityConfig{{Function: numericPriority, Weight: 1}, {Function: reverseNumericPriority, Weight: 2}},
nodes: []string{"3", "2", "1"},
pod: &api.Pod{ObjectMeta: api.ObjectMeta{Name: "2"}},
expectedHosts: sets.NewString("1"),
name: "test 6",
},
{
predicates: map[string]algorithm.FitPredicate{"true": truePredicate, "false": falsePredicate},
@ -266,8 +265,8 @@ func TestGenericScheduler(t *testing.T) {
if err != nil {
t.Errorf("Unexpected error: %v", err)
}
if test.expectedHost != machine {
t.Errorf("Failed : %s, Expected: %s, Saw: %s", test.name, test.expectedHost, machine)
if !test.expectedHosts.Has(machine) {
t.Errorf("Failed : %s, Expected: %s, Saw: %s", test.name, test.expectedHosts, machine)
}
}
}

View File

@ -57,6 +57,7 @@ func CoreDump(dir string) {
cmds = append(cmds, []command{
{"cat /var/log/kubelet.log", "kubelet"},
{"cat /var/log/supervisor/supervisord.log", "supervisord"},
{"cat /var/log/dmesg", "dmesg"},
}...)
}

View File

@ -184,13 +184,9 @@ var _ = Describe("Kubelet", func() {
Expect(vs.CapacityBytes).NotTo(BeZero())
Expect(vs.AvailableBytes).NotTo(BeZero())
Expect(vs.UsedBytes).NotTo(BeZero())
if strings.HasPrefix(vs.Name, "default-token-") {
volumeNames = append(volumeNames, "default-token-")
} else {
volumeNames = append(volumeNames, vs.Name)
}
volumeNames = append(volumeNames, vs.Name)
}
Expect(volumeNames).To(ConsistOf("default-token-", "test-empty-dir"))
Expect(volumeNames).To(ConsistOf("test-empty-dir"))
// fs usage (not for system containers)
Expect(container.Rootfs).NotTo(BeNil(), spew.Sdump(container))

View File

@ -52,7 +52,7 @@ var kubeOutputRelPath = flag.String("k8s-build-output", "_output/local/bin/linux
var kubeRoot = ""
const buildScriptRelPath = "hack/build-go.sh"
const ginkoTestRelPath = "test/e2e_node"
const ginkgoTestRelPath = "test/e2e_node"
const healthyTimeoutDuration = time.Minute * 3
func main() {
@ -158,9 +158,9 @@ func runTests(fullhost string) ([]byte, error) {
glog.Infof("Kubelet host %s tunnel running on port %s", host, ah.LPort)
u.Wait()
glog.Infof("Running ginkgo tests against host %s", host)
ginkoTests := filepath.Join(kubeRoot, ginkoTestRelPath)
ginkgoTests := filepath.Join(kubeRoot, ginkgoTestRelPath)
return exec.Command(
"ginkgo", ginkoTests, "--",
"ginkgo", ginkgoTests, "--",
"--kubelet-address", fmt.Sprintf("http://127.0.0.1:%s", kh.LPort),
"--api-server-address", fmt.Sprintf("http://127.0.0.1:%s", ah.LPort),
"--node-name", fullhost,