1## @file
2#  Check a patch for various format issues
3#
4#  Copyright (c) 2015 - 2016, Intel Corporation. All rights reserved.<BR>
5#
6#  This program and the accompanying materials are licensed and made
7#  available under the terms and conditions of the BSD License which
8#  accompanies this distribution. The full text of the license may be
9#  found at http://opensource.org/licenses/bsd-license.php
10#
11#  THE PROGRAM IS DISTRIBUTED UNDER THE BSD LICENSE ON AN "AS IS"
12#  BASIS, WITHOUT WARRANTIES OR REPRESENTATIONS OF ANY KIND, EITHER
13#  EXPRESS OR IMPLIED.
14#
15
16from __future__ import print_function
17
18VersionNumber = '0.1'
19__copyright__ = "Copyright (c) 2015 - 2016, Intel Corporation  All rights reserved."
20
21import email
22import argparse
23import os
24import re
25import subprocess
26import sys
27
28class Verbose:
29    SILENT, ONELINE, NORMAL = range(3)
30    level = NORMAL
31
32class CommitMessageCheck:
33    """Checks the contents of a git commit message."""
34
35    def __init__(self, subject, message):
36        self.ok = True
37
38        if subject is None and  message is None:
39            self.error('Commit message is missing!')
40            return
41
42        self.subject = subject
43        self.msg = message
44
45        self.check_contributed_under()
46        self.check_signed_off_by()
47        self.check_misc_signatures()
48        self.check_overall_format()
49        self.report_message_result()
50
51    url = 'https://github.com/tianocore/tianocore.github.io/wiki/Commit-Message-Format'
52
53    def report_message_result(self):
54        if Verbose.level < Verbose.NORMAL:
55            return
56        if self.ok:
57            # All checks passed
58            return_code = 0
59            print('The commit message format passed all checks.')
60        else:
61            return_code = 1
62        if not self.ok:
63            print(self.url)
64
65    def error(self, *err):
66        if self.ok and Verbose.level > Verbose.ONELINE:
67            print('The commit message format is not valid:')
68        self.ok = False
69        if Verbose.level < Verbose.NORMAL:
70            return
71        count = 0
72        for line in err:
73            prefix = (' *', '  ')[count > 0]
74            print(prefix, line)
75            count += 1
76
77    def check_contributed_under(self):
78        cu_msg='Contributed-under: TianoCore Contribution Agreement 1.0'
79        if self.msg.find(cu_msg) < 0:
80            self.error('Missing Contributed-under! (Note: this must be ' +
81                       'added by the code contributor!)')
82
83    @staticmethod
84    def make_signature_re(sig, re_input=False):
85        if re_input:
86            sub_re = sig
87        else:
88            sub_re = sig.replace('-', r'[-\s]+')
89        re_str = (r'^(?P<tag>' + sub_re +
90                  r')(\s*):(\s*)(?P<value>\S.*?)(?:\s*)$')
91        try:
92            return re.compile(re_str, re.MULTILINE|re.IGNORECASE)
93        except Exception:
94            print("Tried to compile re:", re_str)
95            raise
96
97    sig_block_re = \
98        re.compile(r'''^
99                        (?: (?P<tag>[^:]+) \s* : \s*
100                            (?P<value>\S.*?) )
101                            |
102                        (?: \[ (?P<updater>[^:]+) \s* : \s*
103                               (?P<note>.+?) \s* \] )
104                    \s* $''',
105                   re.VERBOSE | re.MULTILINE)
106
107    def find_signatures(self, sig):
108        if not sig.endswith('-by') and sig != 'Cc':
109            sig += '-by'
110        regex = self.make_signature_re(sig)
111
112        sigs = regex.findall(self.msg)
113
114        bad_case_sigs = filter(lambda m: m[0] != sig, sigs)
115        for s in bad_case_sigs:
116            self.error("'" +s[0] + "' should be '" + sig + "'")
117
118        for s in sigs:
119            if s[1] != '':
120                self.error('There should be no spaces between ' + sig +
121                           " and the ':'")
122            if s[2] != ' ':
123                self.error("There should be a space after '" + sig + ":'")
124
125            self.check_email_address(s[3])
126
127        return sigs
128
129    email_re1 = re.compile(r'(?:\s*)(.*?)(\s*)<(.+)>\s*$',
130                           re.MULTILINE|re.IGNORECASE)
131
132    def check_email_address(self, email):
133        email = email.strip()
134        mo = self.email_re1.match(email)
135        if mo is None:
136            self.error("Email format is invalid: " + email.strip())
137            return
138
139        name = mo.group(1).strip()
140        if name == '':
141            self.error("Name is not provided with email address: " +
142                       email)
143        else:
144            quoted = len(name) > 2 and name[0] == '"' and name[-1] == '"'
145            if name.find(',') >= 0 and not quoted:
146                self.error('Add quotes (") around name with a comma: ' +
147                           name)
148
149        if mo.group(2) == '':
150            self.error("There should be a space between the name and " +
151                       "email address: " + email)
152
153        if mo.group(3).find(' ') >= 0:
154            self.error("The email address cannot contain a space: " +
155                       mo.group(3))
156
157    def check_signed_off_by(self):
158        sob='Signed-off-by'
159        if self.msg.find(sob) < 0:
160            self.error('Missing Signed-off-by! (Note: this must be ' +
161                       'added by the code contributor!)')
162            return
163
164        sobs = self.find_signatures('Signed-off')
165
166        if len(sobs) == 0:
167            self.error('Invalid Signed-off-by format!')
168            return
169
170    sig_types = (
171        'Reviewed',
172        'Reported',
173        'Tested',
174        'Suggested',
175        'Acked',
176        'Cc'
177        )
178
179    def check_misc_signatures(self):
180        for sig in self.sig_types:
181            self.find_signatures(sig)
182
183    def check_overall_format(self):
184        lines = self.msg.splitlines()
185
186        if len(lines) >= 1 and lines[0].endswith('\r\n'):
187            empty_line = '\r\n'
188        else:
189            empty_line = '\n'
190
191        lines.insert(0, empty_line)
192        lines.insert(0, self.subject + empty_line)
193
194        count = len(lines)
195
196        if count <= 0:
197            self.error('Empty commit message!')
198            return
199
200        if count >= 1 and len(lines[0]) >= 72:
201            self.error('First line of commit message (subject line) ' +
202                       'is too long.')
203
204        if count >= 1 and len(lines[0].strip()) == 0:
205            self.error('First line of commit message (subject line) ' +
206                       'is empty.')
207
208        if count >= 2 and lines[1].strip() != '':
209            self.error('Second line of commit message should be ' +
210                       'empty.')
211
212        for i in range(2, count):
213            if (len(lines[i]) >= 76 and
214                len(lines[i].split()) > 1 and
215                not lines[i].startswith('git-svn-id:')):
216                self.error('Line %d of commit message is too long.' % (i + 1))
217
218        last_sig_line = None
219        for i in range(count - 1, 0, -1):
220            line = lines[i]
221            mo = self.sig_block_re.match(line)
222            if mo is None:
223                if line.strip() == '':
224                    break
225                elif last_sig_line is not None:
226                    err2 = 'Add empty line before "%s"?' % last_sig_line
227                    self.error('The line before the signature block ' +
228                               'should be empty', err2)
229                else:
230                    self.error('The signature block was not found')
231                break
232            last_sig_line = line.strip()
233
234(START, PRE_PATCH, PATCH) = range(3)
235
236class GitDiffCheck:
237    """Checks the contents of a git diff."""
238
239    def __init__(self, diff):
240        self.ok = True
241        self.format_ok = True
242        self.lines = diff.splitlines(True)
243        self.count = len(self.lines)
244        self.line_num = 0
245        self.state = START
246        while self.line_num < self.count and self.format_ok:
247            line_num = self.line_num
248            self.run()
249            assert(self.line_num > line_num)
250        self.report_message_result()
251
252    def report_message_result(self):
253        if Verbose.level < Verbose.NORMAL:
254            return
255        if self.ok:
256            print('The code passed all checks.')
257
258    def run(self):
259        line = self.lines[self.line_num]
260
261        if self.state in (PRE_PATCH, PATCH):
262            if line.startswith('diff --git'):
263                self.state = START
264        if self.state == PATCH:
265            if line.startswith('@@ '):
266                self.state = PRE_PATCH
267            elif len(line) >= 1 and line[0] not in ' -+' and \
268                 not line.startswith(r'\ No newline '):
269                for line in self.lines[self.line_num + 1:]:
270                    if line.startswith('diff --git'):
271                        self.format_error('diff found after end of patch')
272                        break
273                self.line_num = self.count
274                return
275
276        if self.state == START:
277            if line.startswith('diff --git'):
278                self.state = PRE_PATCH
279                self.set_filename(None)
280            elif len(line.rstrip()) != 0:
281                self.format_error("didn't find diff command")
282            self.line_num += 1
283        elif self.state == PRE_PATCH:
284            if line.startswith('+++ b/'):
285                self.set_filename(line[6:].rstrip())
286            if line.startswith('@@ '):
287                self.state = PATCH
288                self.binary = False
289            elif line.startswith('GIT binary patch'):
290                self.state = PATCH
291                self.binary = True
292            else:
293                ok = False
294                for pfx in self.pre_patch_prefixes:
295                    if line.startswith(pfx):
296                        ok = True
297                if not ok:
298                    self.format_error("didn't find diff hunk marker (@@)")
299            self.line_num += 1
300        elif self.state == PATCH:
301            if self.binary:
302                pass
303            if line.startswith('-'):
304                pass
305            elif line.startswith('+'):
306                self.check_added_line(line[1:])
307            elif line.startswith(r'\ No newline '):
308                pass
309            elif not line.startswith(' '):
310                self.format_error("unexpected patch line")
311            self.line_num += 1
312
313    pre_patch_prefixes = (
314        '--- ',
315        '+++ ',
316        'index ',
317        'new file ',
318        'deleted file ',
319        'old mode ',
320        'new mode ',
321        'similarity index ',
322        'rename ',
323        'Binary files ',
324        )
325
326    line_endings = ('\r\n', '\n\r', '\n', '\r')
327
328    def set_filename(self, filename):
329        self.hunk_filename = filename
330        if filename:
331            self.force_crlf = not filename.endswith('.sh')
332        else:
333            self.force_crlf = True
334
335    def added_line_error(self, msg, line):
336        lines = [ msg ]
337        if self.hunk_filename is not None:
338            lines.append('File: ' + self.hunk_filename)
339        lines.append('Line: ' + line)
340
341        self.error(*lines)
342
343    old_debug_re = \
344        re.compile(r'''
345                        DEBUG \s* \( \s* \( \s*
346                        (?: DEBUG_[A-Z_]+ \s* \| \s*)*
347                        EFI_D_ ([A-Z_]+)
348                   ''',
349                   re.VERBOSE)
350
351    def check_added_line(self, line):
352        eol = ''
353        for an_eol in self.line_endings:
354            if line.endswith(an_eol):
355                eol = an_eol
356                line = line[:-len(eol)]
357
358        stripped = line.rstrip()
359
360        if self.force_crlf and eol != '\r\n':
361            self.added_line_error('Line ending (%s) is not CRLF' % repr(eol),
362                                  line)
363        if '\t' in line:
364            self.added_line_error('Tab character used', line)
365        if len(stripped) < len(line):
366            self.added_line_error('Trailing whitespace found', line)
367
368        mo = self.old_debug_re.search(line)
369        if mo is not None:
370            self.added_line_error('EFI_D_' + mo.group(1) + ' was used, '
371                                  'but DEBUG_' + mo.group(1) +
372                                  ' is now recommended', line)
373
374    split_diff_re = re.compile(r'''
375                                   (?P<cmd>
376                                       ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
377                                   )
378                                   (?P<index>
379                                       ^ index \s+ .+ $
380                                   )
381                               ''',
382                               re.IGNORECASE | re.VERBOSE | re.MULTILINE)
383
384    def format_error(self, err):
385        self.format_ok = False
386        err = 'Patch format error: ' + err
387        err2 = 'Line: ' + self.lines[self.line_num].rstrip()
388        self.error(err, err2)
389
390    def error(self, *err):
391        if self.ok and Verbose.level > Verbose.ONELINE:
392            print('Code format is not valid:')
393        self.ok = False
394        if Verbose.level < Verbose.NORMAL:
395            return
396        count = 0
397        for line in err:
398            prefix = (' *', '  ')[count > 0]
399            print(prefix, line)
400            count += 1
401
402class CheckOnePatch:
403    """Checks the contents of a git email formatted patch.
404
405    Various checks are performed on both the commit message and the
406    patch content.
407    """
408
409    def __init__(self, name, patch):
410        self.patch = patch
411        self.find_patch_pieces()
412
413        msg_check = CommitMessageCheck(self.commit_subject, self.commit_msg)
414        msg_ok = msg_check.ok
415
416        diff_ok = True
417        if self.diff is not None:
418            diff_check = GitDiffCheck(self.diff)
419            diff_ok = diff_check.ok
420
421        self.ok = msg_ok and diff_ok
422
423        if Verbose.level == Verbose.ONELINE:
424            if self.ok:
425                result = 'ok'
426            else:
427                result = list()
428                if not msg_ok:
429                    result.append('commit message')
430                if not diff_ok:
431                    result.append('diff content')
432                result = 'bad ' + ' and '.join(result)
433            print(name, result)
434
435
436    git_diff_re = re.compile(r'''
437                                 ^ diff \s+ --git \s+ a/.+ \s+ b/.+ $
438                             ''',
439                             re.IGNORECASE | re.VERBOSE | re.MULTILINE)
440
441    stat_re = \
442        re.compile(r'''
443                       (?P<commit_message> [\s\S\r\n]* )
444                       (?P<stat>
445                           ^ --- $ [\r\n]+
446                           (?: ^ \s+ .+ \s+ \| \s+ \d+ \s+ \+* \-*
447                               $ [\r\n]+ )+
448                           [\s\S\r\n]+
449                       )
450                   ''',
451                   re.IGNORECASE | re.VERBOSE | re.MULTILINE)
452
453    subject_prefix_re = \
454        re.compile(r'''^
455                       \s* (\[
456                        [^\[\]]* # Allow all non-brackets
457                       \])* \s*
458                   ''',
459                   re.VERBOSE)
460
461    def find_patch_pieces(self):
462        if sys.version_info < (3, 0):
463            patch = self.patch.encode('ascii', 'ignore')
464        else:
465            patch = self.patch
466
467        self.commit_msg = None
468        self.stat = None
469        self.commit_subject = None
470        self.commit_prefix = None
471        self.diff = None
472
473        if patch.startswith('diff --git'):
474            self.diff = patch
475            return
476
477        pmail = email.message_from_string(patch)
478        parts = list(pmail.walk())
479        assert(len(parts) == 1)
480        assert(parts[0].get_content_type() == 'text/plain')
481        content = parts[0].get_payload(decode=True).decode('utf-8', 'ignore')
482
483        mo = self.git_diff_re.search(content)
484        if mo is not None:
485            self.diff = content[mo.start():]
486            content = content[:mo.start()]
487
488        mo = self.stat_re.search(content)
489        if mo is None:
490            self.commit_msg = content
491        else:
492            self.stat = mo.group('stat')
493            self.commit_msg = mo.group('commit_message')
494
495        self.commit_subject = pmail['subject'].replace('\r\n', '')
496        self.commit_subject = self.commit_subject.replace('\n', '')
497        self.commit_subject = self.subject_prefix_re.sub('', self.commit_subject, 1)
498
499class CheckGitCommits:
500    """Reads patches from git based on the specified git revision range.
501
502    The patches are read from git, and then checked.
503    """
504
505    def __init__(self, rev_spec, max_count):
506        commits = self.read_commit_list_from_git(rev_spec, max_count)
507        if len(commits) == 1 and Verbose.level > Verbose.ONELINE:
508            commits = [ rev_spec ]
509        self.ok = True
510        blank_line = False
511        for commit in commits:
512            if Verbose.level > Verbose.ONELINE:
513                if blank_line:
514                    print()
515                else:
516                    blank_line = True
517                print('Checking git commit:', commit)
518            patch = self.read_patch_from_git(commit)
519            self.ok &= CheckOnePatch(commit, patch).ok
520
521    def read_commit_list_from_git(self, rev_spec, max_count):
522        # Run git to get the commit patch
523        cmd = [ 'rev-list', '--abbrev-commit', '--no-walk' ]
524        if max_count is not None:
525            cmd.append('--max-count=' + str(max_count))
526        cmd.append(rev_spec)
527        out = self.run_git(*cmd)
528        return out.split()
529
530    def read_patch_from_git(self, commit):
531        # Run git to get the commit patch
532        return self.run_git('show', '--pretty=email', commit)
533
534    def run_git(self, *args):
535        cmd = [ 'git' ]
536        cmd += args
537        p = subprocess.Popen(cmd,
538                     stdout=subprocess.PIPE,
539                     stderr=subprocess.STDOUT)
540        return p.communicate()[0].decode('utf-8', 'ignore')
541
542class CheckOnePatchFile:
543    """Performs a patch check for a single file.
544
545    stdin is used when the filename is '-'.
546    """
547
548    def __init__(self, patch_filename):
549        if patch_filename == '-':
550            patch = sys.stdin.read()
551            patch_filename = 'stdin'
552        else:
553            f = open(patch_filename, 'rb')
554            patch = f.read().decode('utf-8', 'ignore')
555            f.close()
556        if Verbose.level > Verbose.ONELINE:
557            print('Checking patch file:', patch_filename)
558        self.ok = CheckOnePatch(patch_filename, patch).ok
559
560class CheckOneArg:
561    """Performs a patch check for a single command line argument.
562
563    The argument will be handed off to a file or git-commit based
564    checker.
565    """
566
567    def __init__(self, param, max_count=None):
568        self.ok = True
569        if param == '-' or os.path.exists(param):
570            checker = CheckOnePatchFile(param)
571        else:
572            checker = CheckGitCommits(param, max_count)
573        self.ok = checker.ok
574
575class PatchCheckApp:
576    """Checks patches based on the command line arguments."""
577
578    def __init__(self):
579        self.parse_options()
580        patches = self.args.patches
581
582        if len(patches) == 0:
583            patches = [ 'HEAD' ]
584
585        self.ok = True
586        self.count = None
587        for patch in patches:
588            self.process_one_arg(patch)
589
590        if self.count is not None:
591            self.process_one_arg('HEAD')
592
593        if self.ok:
594            self.retval = 0
595        else:
596            self.retval = -1
597
598    def process_one_arg(self, arg):
599        if len(arg) >= 2 and arg[0] == '-':
600            try:
601                self.count = int(arg[1:])
602                return
603            except ValueError:
604                pass
605        self.ok &= CheckOneArg(arg, self.count).ok
606        self.count = None
607
608    def parse_options(self):
609        parser = argparse.ArgumentParser(description=__copyright__)
610        parser.add_argument('--version', action='version',
611                            version='%(prog)s ' + VersionNumber)
612        parser.add_argument('patches', nargs='*',
613                            help='[patch file | git rev list]')
614        group = parser.add_mutually_exclusive_group()
615        group.add_argument("--oneline",
616                           action="store_true",
617                           help="Print one result per line")
618        group.add_argument("--silent",
619                           action="store_true",
620                           help="Print nothing")
621        self.args = parser.parse_args()
622        if self.args.oneline:
623            Verbose.level = Verbose.ONELINE
624        if self.args.silent:
625            Verbose.level = Verbose.SILENT
626
627if __name__ == "__main__":
628    sys.exit(PatchCheckApp().retval)
629