Home | History | Annotate | Download | only in scheduler
      1 #!/usr/bin/python
      2 #pylint: disable-msg=C0111
      3 
      4 import gc, time
      5 import common
      6 from autotest_lib.frontend import setup_django_environment
      7 from autotest_lib.frontend.afe import frontend_test_utils
      8 from autotest_lib.client.common_lib.test_utils import mock
      9 from autotest_lib.client.common_lib.test_utils import unittest
     10 from autotest_lib.database import database_connection
     11 from autotest_lib.frontend.afe import models
     12 from autotest_lib.scheduler import agent_task
     13 from autotest_lib.scheduler import monitor_db, drone_manager, email_manager
     14 from autotest_lib.scheduler import pidfile_monitor
     15 from autotest_lib.scheduler import scheduler_config, gc_stats, host_scheduler
     16 from autotest_lib.scheduler import monitor_db_functional_test
     17 from autotest_lib.scheduler import monitor_db_unittest
     18 from autotest_lib.scheduler import scheduler_models
     19 
     20 _DEBUG = False
     21 
     22 
     23 class AtomicGroupTest(monitor_db_unittest.DispatcherSchedulingTest):
     24 
     25     def test_atomic_group_hosts_blocked_from_non_atomic_jobs(self):
     26         # Create a job scheduled to run on label6.
     27         self._create_job(metahosts=[self.label6.id])
     28         self._run_scheduler()
     29         # label6 only has hosts that are in atomic groups associated with it,
     30         # there should be no scheduling.
     31         self._check_for_extra_schedulings()
     32 
     33 
     34     def test_atomic_group_hosts_blocked_from_non_atomic_jobs_explicit(self):
     35         # Create a job scheduled to run on label5.  This is an atomic group
     36         # label but this job does not request atomic group scheduling.
     37         self._create_job(metahosts=[self.label5.id])
     38         self._run_scheduler()
     39         # label6 only has hosts that are in atomic groups associated with it,
     40         # there should be no scheduling.
     41         self._check_for_extra_schedulings()
     42 
     43 
     44     def test_atomic_group_scheduling_basics(self):
     45         # Create jobs scheduled to run on an atomic group.
     46         job_a = self._create_job(synchronous=True, metahosts=[self.label4.id],
     47                          atomic_group=1)
     48         job_b = self._create_job(synchronous=True, metahosts=[self.label5.id],
     49                          atomic_group=1)
     50         self._run_scheduler()
     51         # atomic_group.max_number_of_machines was 2 so we should run on 2.
     52         self._assert_job_scheduled_on_number_of(job_a.id, (5, 6, 7), 2)
     53         self._assert_job_scheduled_on(job_b.id, 8)  # label5
     54         self._assert_job_scheduled_on(job_b.id, 9)  # label5
     55         self._check_for_extra_schedulings()
     56 
     57         # The three host label4 atomic group still has one host available.
     58         # That means a job with a synch_count of 1 asking to be scheduled on
     59         # the atomic group can still use the final machine.
     60         #
     61         # This may seem like a somewhat odd use case.  It allows the use of an
     62         # atomic group as a set of machines to run smaller jobs within (a set
     63         # of hosts configured for use in network tests with eachother perhaps?)
     64         onehost_job = self._create_job(atomic_group=1)
     65         self._run_scheduler()
     66         self._assert_job_scheduled_on_number_of(onehost_job.id, (5, 6, 7), 1)
     67         self._check_for_extra_schedulings()
     68 
     69         # No more atomic groups have hosts available, no more jobs should
     70         # be scheduled.
     71         self._create_job(atomic_group=1)
     72         self._run_scheduler()
     73         self._check_for_extra_schedulings()
     74 
     75 
     76     def test_atomic_group_scheduling_obeys_acls(self):
     77         # Request scheduling on a specific atomic label but be denied by ACLs.
     78         self._do_query('DELETE FROM afe_acl_groups_hosts '
     79                        'WHERE host_id in (8,9)')
     80         job = self._create_job(metahosts=[self.label5.id], atomic_group=1)
     81         self._run_scheduler()
     82         self._check_for_extra_schedulings()
     83 
     84 
     85     def test_atomic_group_scheduling_dependency_label_exclude(self):
     86         # A dependency label that matches no hosts in the atomic group.
     87         job_a = self._create_job(atomic_group=1)
     88         job_a.dependency_labels.add(self.label3)
     89         self._run_scheduler()
     90         self._check_for_extra_schedulings()
     91 
     92 
     93     def test_atomic_group_scheduling_metahost_dependency_label_exclude(self):
     94         # A metahost and dependency label that excludes too many hosts.
     95         job_b = self._create_job(synchronous=True, metahosts=[self.label4.id],
     96                                  atomic_group=1)
     97         job_b.dependency_labels.add(self.label7)
     98         self._run_scheduler()
     99         self._check_for_extra_schedulings()
    100 
    101 
    102     def test_atomic_group_scheduling_dependency_label_match(self):
    103         # A dependency label that exists on enough atomic group hosts in only
    104         # one of the two atomic group labels.
    105         job_c = self._create_job(synchronous=True, atomic_group=1)
    106         job_c.dependency_labels.add(self.label7)
    107         self._run_scheduler()
    108         self._assert_job_scheduled_on_number_of(job_c.id, (8, 9), 2)
    109         self._check_for_extra_schedulings()
    110 
    111 
    112     def test_atomic_group_scheduling_no_metahost(self):
    113         # Force it to schedule on the other group for a reliable test.
    114         self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
    115         # An atomic job without a metahost.
    116         job = self._create_job(synchronous=True, atomic_group=1)
    117         self._run_scheduler()
    118         self._assert_job_scheduled_on_number_of(job.id, (5, 6, 7), 2)
    119         self._check_for_extra_schedulings()
    120 
    121 
    122     def test_atomic_group_scheduling_partial_group(self):
    123         # Make one host in labels[3] unavailable so that there are only two
    124         # hosts left in the group.
    125         self._do_query('UPDATE afe_hosts SET status="Repair Failed" WHERE id=5')
    126         job = self._create_job(synchronous=True, metahosts=[self.label4.id],
    127                          atomic_group=1)
    128         self._run_scheduler()
    129         # Verify that it was scheduled on the 2 ready hosts in that group.
    130         self._assert_job_scheduled_on(job.id, 6)
    131         self._assert_job_scheduled_on(job.id, 7)
    132         self._check_for_extra_schedulings()
    133 
    134 
    135     def test_atomic_group_scheduling_not_enough_available(self):
    136         # Mark some hosts in each atomic group label as not usable.
    137         # One host running, another invalid in the first group label.
    138         self._do_query('UPDATE afe_hosts SET status="Running" WHERE id=5')
    139         self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=6')
    140         # One host invalid in the second group label.
    141         self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id=9')
    142         # Nothing to schedule when no group label has enough (2) good hosts..
    143         self._create_job(atomic_group=1, synchronous=True)
    144         self._run_scheduler()
    145         # There are not enough hosts in either atomic group,
    146         # No more scheduling should occur.
    147         self._check_for_extra_schedulings()
    148 
    149         # Now create an atomic job that has a synch count of 1.  It should
    150         # schedule on exactly one of the hosts.
    151         onehost_job = self._create_job(atomic_group=1)
    152         self._run_scheduler()
    153         self._assert_job_scheduled_on_number_of(onehost_job.id, (7, 8), 1)
    154 
    155 
    156     def test_atomic_group_scheduling_no_valid_hosts(self):
    157         self._do_query('UPDATE afe_hosts SET invalid=1 WHERE id in (8,9)')
    158         self._create_job(synchronous=True, metahosts=[self.label5.id],
    159                          atomic_group=1)
    160         self._run_scheduler()
    161         # no hosts in the selected group and label are valid.  no schedulings.
    162         self._check_for_extra_schedulings()
    163 
    164 
    165     def test_atomic_group_scheduling_metahost_works(self):
    166         # Test that atomic group scheduling also obeys metahosts.
    167         self._create_job(metahosts=[0], atomic_group=1)
    168         self._run_scheduler()
    169         # There are no atomic group hosts that also have that metahost.
    170         self._check_for_extra_schedulings()
    171 
    172         job_b = self._create_job(metahosts=[self.label5.id], atomic_group=1)
    173         self._run_scheduler()
    174         self._assert_job_scheduled_on(job_b.id, 8)
    175         self._assert_job_scheduled_on(job_b.id, 9)
    176         self._check_for_extra_schedulings()
    177 
    178 
    179     def test_atomic_group_skips_ineligible_hosts(self):
    180         # Test hosts marked ineligible for this job are not eligible.
    181         # How would this ever happen anyways?
    182         job = self._create_job(metahosts=[self.label4.id], atomic_group=1)
    183         models.IneligibleHostQueue.objects.create(job=job, host_id=5)
    184         models.IneligibleHostQueue.objects.create(job=job, host_id=6)
    185         models.IneligibleHostQueue.objects.create(job=job, host_id=7)
    186         self._run_scheduler()
    187         # No scheduling should occur as all desired hosts were ineligible.
    188         self._check_for_extra_schedulings()
    189 
    190 
    191     def test_atomic_group_scheduling_fail(self):
    192         # If synch_count is > the atomic group number of machines, the job
    193         # should be aborted immediately.
    194         model_job = self._create_job(synchronous=True, atomic_group=1)
    195         model_job.synch_count = 4
    196         model_job.save()
    197         job = scheduler_models.Job(id=model_job.id)
    198         self._run_scheduler()
    199         self._check_for_extra_schedulings()
    200         queue_entries = job.get_host_queue_entries()
    201         self.assertEqual(1, len(queue_entries))
    202         self.assertEqual(queue_entries[0].status,
    203                          models.HostQueueEntry.Status.ABORTED)
    204 
    205 
    206     def test_atomic_group_no_labels_no_scheduling(self):
    207         # Never schedule on atomic groups marked invalid.
    208         job = self._create_job(metahosts=[self.label5.id], synchronous=True,
    209                                atomic_group=1)
    210         # Deleting an atomic group via the frontend marks it invalid and
    211         # removes all label references to the group.  The job now references
    212         # an invalid atomic group with no labels associated with it.
    213         self.label5.atomic_group.invalid = True
    214         self.label5.atomic_group.save()
    215         self.label5.atomic_group = None
    216         self.label5.save()
    217 
    218         self._run_scheduler()
    219         self._check_for_extra_schedulings()
    220 
    221 
    222     def test_schedule_directly_on_atomic_group_host_fail(self):
    223         # Scheduling a job directly on hosts in an atomic group must
    224         # fail to avoid users inadvertently holding up the use of an
    225         # entire atomic group by using the machines individually.
    226         job = self._create_job(hosts=[5])
    227         self._run_scheduler()
    228         self._check_for_extra_schedulings()
    229 
    230 
    231     def test_schedule_directly_on_atomic_group_host(self):
    232         # Scheduling a job directly on one host in an atomic group will
    233         # work when the atomic group is listed on the HQE in addition
    234         # to the host (assuming the sync count is 1).
    235         job = self._create_job(hosts=[5], atomic_group=1)
    236         self._run_scheduler()
    237         self._assert_job_scheduled_on(job.id, 5)
    238         self._check_for_extra_schedulings()
    239 
    240 
    241     def test_schedule_directly_on_atomic_group_hosts_sync2(self):
    242         job = self._create_job(hosts=[5,8], atomic_group=1, synchronous=True)
    243         self._run_scheduler()
    244         self._assert_job_scheduled_on(job.id, 5)
    245         self._assert_job_scheduled_on(job.id, 8)
    246         self._check_for_extra_schedulings()
    247 
    248 
    249     def test_schedule_directly_on_atomic_group_hosts_wrong_group(self):
    250         job = self._create_job(hosts=[5,8], atomic_group=2, synchronous=True)
    251         self._run_scheduler()
    252         self._check_for_extra_schedulings()
    253 
    254 
    255     # TODO(gps): These should probably live in their own TestCase class
    256     # specific to testing HostScheduler methods directly.  It was convenient
    257     # to put it here for now to share existing test environment setup code.
    258     def test_HostScheduler_check_atomic_group_labels(self):
    259         normal_job = self._create_job(metahosts=[0])
    260         atomic_job = self._create_job(atomic_group=1)
    261         # Indirectly initialize the internal state of the host scheduler.
    262         self._dispatcher._refresh_pending_queue_entries()
    263 
    264         atomic_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' %
    265                                                      atomic_job.id)[0]
    266         normal_hqe = scheduler_models.HostQueueEntry.fetch(where='job_id=%d' %
    267                                                      normal_job.id)[0]
    268 
    269         host_scheduler = self._dispatcher._host_scheduler
    270         self.assertTrue(host_scheduler._check_atomic_group_labels(
    271                 [self.label4.id], atomic_hqe))
    272         self.assertFalse(host_scheduler._check_atomic_group_labels(
    273                 [self.label4.id], normal_hqe))
    274         self.assertFalse(host_scheduler._check_atomic_group_labels(
    275                 [self.label5.id, self.label6.id, self.label7.id], normal_hqe))
    276         self.assertTrue(host_scheduler._check_atomic_group_labels(
    277                 [self.label4.id, self.label6.id], atomic_hqe))
    278         self.assertTrue(host_scheduler._check_atomic_group_labels(
    279                         [self.label4.id, self.label5.id],
    280                         atomic_hqe))
    281 
    282 
    283 class OnlyIfNeededTest(monitor_db_unittest.DispatcherSchedulingTest):
    284 
    285     def _setup_test_only_if_needed_labels(self):
    286         # apply only_if_needed label3 to host1
    287         models.Host.smart_get('host1').labels.add(self.label3)
    288         return self._create_job_simple([1], use_metahost=True)
    289 
    290 
    291     def test_only_if_needed_labels_avoids_host(self):
    292         job = self._setup_test_only_if_needed_labels()
    293         # if the job doesn't depend on label3, there should be no scheduling
    294         self._run_scheduler()
    295         self._check_for_extra_schedulings()
    296 
    297 
    298     def test_only_if_needed_labels_schedules(self):
    299         job = self._setup_test_only_if_needed_labels()
    300         job.dependency_labels.add(self.label3)
    301         self._run_scheduler()
    302         self._assert_job_scheduled_on(1, 1)
    303         self._check_for_extra_schedulings()
    304 
    305 
    306     def test_only_if_needed_labels_via_metahost(self):
    307         job = self._setup_test_only_if_needed_labels()
    308         job.dependency_labels.add(self.label3)
    309         # should also work if the metahost is the only_if_needed label
    310         self._do_query('DELETE FROM afe_jobs_dependency_labels')
    311         self._create_job(metahosts=[3])
    312         self._run_scheduler()
    313         self._assert_job_scheduled_on(2, 1)
    314         self._check_for_extra_schedulings()
    315 
    316 
    317     def test_metahosts_obey_blocks(self):
    318         """
    319         Metahosts can't get scheduled on hosts already scheduled for
    320         that job.
    321         """
    322         self._create_job(metahosts=[1], hosts=[1])
    323         # make the nonmetahost entry complete, so the metahost can try
    324         # to get scheduled
    325         self._update_hqe(set='complete = 1', where='host_id=1')
    326         self._run_scheduler()
    327         self._check_for_extra_schedulings()
    328 
    329 
    330