Merge "Add memory awareness to system load governor"
diff --git a/doc/source/admin/components.rst b/doc/source/admin/components.rst
index d6b0984..2e18b51 100644
--- a/doc/source/admin/components.rst
+++ b/doc/source/admin/components.rst
@@ -575,6 +575,16 @@
       The executor will observe system load and determine whether
       to accept more jobs every 30 seconds.
 
+   .. attr:: min_avail_mem
+      :default: 5.0
+
+      This is the minimum percentage of system RAM available. The
+      executor will stop accepting more than 1 job at a time until
+      more memory is available. The available memory percentage is
+      calculated from the total available memory divided by the
+      total real memory multiplied by 100. Buffers and cache are
+      considered available in the calculation.
+
    .. attr:: hostname
       :default: hostname of the server
 
diff --git a/requirements.txt b/requirements.txt
index 39a2b02..3ab5850 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -27,3 +27,4 @@
 iso8601
 aiohttp
 uvloop;python_version>='3.5'
+psutil
diff --git a/zuul/executor/server.py b/zuul/executor/server.py
index e72fc13..d2c04ac 100644
--- a/zuul/executor/server.py
+++ b/zuul/executor/server.py
@@ -18,6 +18,7 @@
 import logging
 import multiprocessing
 import os
+import psutil
 import shutil
 import signal
 import shlex
@@ -1952,6 +1953,7 @@
         ''' Apply some heuristics to decide whether or not we should
             be askign for more jobs '''
         load_avg = os.getloadavg()[0]
+        avail_mem_pct = 100.0 - psutil.virtual_memory().percent
         if self.accepting_work:
             # Don't unregister if we don't have any active jobs.
             if load_avg > self.max_load_avg and self.job_workers:
@@ -1959,10 +1961,19 @@
                     "Unregistering due to high system load {} > {}".format(
                         load_avg, self.max_load_avg))
                 self.unregister_work()
-        elif load_avg <= self.max_load_avg:
+            elif avail_mem_pct < self.min_avail_mem:
+                self.log.info(
+                    "Unregistering due to low memory {:3.1f}% < {}".format(
+                        avail_mem_pct, self.min_avail_mem))
+                self.unregister_work()
+        elif (load_avg <= self.max_load_avg and
+                avail_mem_pct >= self.min_avail_mem):
             self.log.info(
-                "Re-registering as load is within limits {} <= {}".format(
-                    load_avg, self.max_load_avg))
+                "Re-registering as job is within limits "
+                "{} <= {} {:3.1f}% <= {}".format(load_avg,
+                                                 self.max_load_avg,
+                                                 avail_mem_pct,
+                                                 self.min_avail_mem))
             self.register_work()
         if self.statsd:
             base_key = 'zuul.executor.%s' % self.hostname