mklog.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392
  1. #!/usr/bin/env python3
  2. # Copyright (C) 2020 Free Software Foundation, Inc.
  3. #
  4. # This file is part of GCC.
  5. #
  6. # GCC is free software; you can redistribute it and/or modify
  7. # it under the terms of the GNU General Public License as published by
  8. # the Free Software Foundation; either version 3, or (at your option)
  9. # any later version.
  10. #
  11. # GCC is distributed in the hope that it will be useful,
  12. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. # GNU General Public License for more details.
  15. #
  16. # You should have received a copy of the GNU General Public License
  17. # along with GCC; see the file COPYING. If not, write to
  18. # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  19. # Boston, MA 02110-1301, USA.
  20. # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
  21. # and adds a skeleton ChangeLog file to the file. It does not try to be
  22. # too smart when parsing function names, but it produces a reasonable
  23. # approximation.
  24. #
  25. # Author: Martin Liska <mliska@suse.cz>
  26. import argparse
  27. import datetime
  28. import os
  29. import re
  30. import subprocess
  31. import sys
  32. from itertools import takewhile
  33. import requests
  34. from unidiff import PatchSet
  35. LINE_LIMIT = 100
  36. TAB_WIDTH = 8
  37. CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
  38. pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
  39. prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
  40. dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
  41. dg_regex = re.compile(r'{\s+dg-(error|warning)')
  42. pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
  43. identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
  44. comment_regex = re.compile(r'^\/\*')
  45. struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
  46. r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
  47. macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
  48. super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
  49. fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
  50. template_and_param_regex = re.compile(r'<[^<>]*>')
  51. md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
  52. bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
  53. 'include_fields=summary,component'
  54. function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
  55. # NB: Makefile.in isn't listed as it's not always generated.
  56. generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
  57. help_message = """\
  58. Generate ChangeLog template for PATCH.
  59. PATCH must be generated using diff(1)'s -up or -cp options
  60. (or their equivalent in git).
  61. """
  62. script_folder = os.path.realpath(__file__)
  63. root = os.path.dirname(os.path.dirname(script_folder))
  64. firstpr = ''
  65. def find_changelog(path):
  66. folder = os.path.split(path)[0]
  67. while True:
  68. if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
  69. return folder
  70. folder = os.path.dirname(folder)
  71. if folder == '':
  72. return folder
  73. raise AssertionError()
  74. def extract_function_name(line):
  75. if comment_regex.match(line):
  76. return None
  77. m = struct_regex.search(line)
  78. if m:
  79. # Struct declaration
  80. return m.group(1) + ' ' + m.group(3)
  81. m = macro_regex.search(line)
  82. if m:
  83. # Macro definition
  84. return m.group(2)
  85. m = super_macro_regex.search(line)
  86. if m:
  87. # Supermacro
  88. return m.group(1)
  89. m = fn_regex.search(line)
  90. if m:
  91. # Discard template and function parameters.
  92. fn = m.group(1)
  93. fn = re.sub(template_and_param_regex, '', fn)
  94. return fn.rstrip()
  95. return None
  96. def try_add_function(functions, line):
  97. fn = extract_function_name(line)
  98. if fn and fn not in functions:
  99. functions.append(fn)
  100. return bool(fn)
  101. def sort_changelog_files(changed_file):
  102. return (changed_file.is_added_file, changed_file.is_removed_file)
  103. def get_pr_titles(prs):
  104. output = []
  105. for idx, pr in enumerate(prs):
  106. pr_id = pr.split('/')[-1]
  107. r = requests.get(bugzilla_url % pr_id)
  108. bugs = r.json()['bugs']
  109. if len(bugs) == 1:
  110. prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
  111. out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
  112. if out not in output:
  113. output.append(out)
  114. if output:
  115. output.append('')
  116. return '\n'.join(output)
  117. def append_changelog_line(out, relative_path, text):
  118. line = f'\t* {relative_path}:'
  119. if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
  120. out += f'{line} {text}\n'
  121. else:
  122. out += f'{line}\n'
  123. out += f'\t{text}\n'
  124. return out
  125. def get_rel_path_if_prefixed(path, folder):
  126. if path.startswith(folder):
  127. return path[len(folder):].lstrip('/')
  128. else:
  129. return path
  130. def generate_changelog(data, no_functions=False, fill_pr_titles=False,
  131. additional_prs=None):
  132. changelogs = {}
  133. changelog_list = []
  134. prs = []
  135. out = ''
  136. diff = PatchSet(data)
  137. global firstpr
  138. if additional_prs:
  139. for apr in additional_prs:
  140. if not apr.startswith('PR ') and '/' in apr:
  141. apr = 'PR ' + apr
  142. if apr not in prs:
  143. prs.append(apr)
  144. for file in diff:
  145. # skip files that can't be parsed
  146. if file.path == '/dev/null':
  147. continue
  148. changelog = find_changelog(file.path)
  149. if changelog not in changelogs:
  150. changelogs[changelog] = []
  151. changelog_list.append(changelog)
  152. changelogs[changelog].append(file)
  153. # Extract PR entries from newly added tests
  154. if 'testsuite' in file.path and file.is_added_file:
  155. # Only search first ten lines as later lines may
  156. # contains commented code which a note that it
  157. # has not been tested due to a certain PR or DR.
  158. this_file_prs = []
  159. for line in list(file)[0][0:10]:
  160. m = pr_regex.search(line.value)
  161. if m:
  162. pr = m.group('pr')
  163. if pr not in prs:
  164. prs.append(pr)
  165. this_file_prs.append(pr.split('/')[-1])
  166. else:
  167. m = dr_regex.search(line.value)
  168. if m:
  169. dr = m.group('dr')
  170. if dr not in prs:
  171. prs.append(dr)
  172. this_file_prs.append(dr.split('/')[-1])
  173. elif dg_regex.search(line.value):
  174. # Found dg-warning/dg-error line
  175. break
  176. # PR number in the file name
  177. fname = os.path.basename(file.path)
  178. m = pr_filename_regex.search(fname)
  179. if m:
  180. pr = m.group('pr')
  181. pr2 = 'PR ' + pr
  182. if pr not in this_file_prs and pr2 not in prs:
  183. prs.append(pr2)
  184. if prs:
  185. firstpr = prs[0]
  186. if fill_pr_titles:
  187. out += get_pr_titles(prs)
  188. # print list of PR entries before ChangeLog entries
  189. if prs:
  190. if not out:
  191. out += '\n'
  192. for pr in prs:
  193. out += '\t%s\n' % pr
  194. out += '\n'
  195. # sort ChangeLog so that 'testsuite' is at the end
  196. for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
  197. files = changelogs[changelog]
  198. out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
  199. out += '\n'
  200. # new and deleted files should be at the end
  201. for file in sorted(files, key=sort_changelog_files):
  202. assert file.path.startswith(changelog)
  203. in_tests = 'testsuite' in changelog or 'testsuite' in file.path
  204. relative_path = get_rel_path_if_prefixed(file.path, changelog)
  205. functions = []
  206. if file.is_added_file:
  207. msg = 'New test.' if in_tests else 'New file.'
  208. out = append_changelog_line(out, relative_path, msg)
  209. elif file.is_removed_file:
  210. out = append_changelog_line(out, relative_path, 'Removed.')
  211. elif hasattr(file, 'is_rename') and file.is_rename:
  212. # A file can be theoretically moved to a location that
  213. # belongs to a different ChangeLog. Let user fix it.
  214. #
  215. # Since unidiff 0.7.0, path.file == path.target_file[2:],
  216. # it used to be path.source_file[2:]
  217. relative_path = get_rel_path_if_prefixed(file.source_file[2:],
  218. changelog)
  219. out = append_changelog_line(out, relative_path, 'Moved to...')
  220. new_path = get_rel_path_if_prefixed(file.target_file[2:],
  221. changelog)
  222. out += f'\t* {new_path}: ...here.\n'
  223. elif os.path.basename(file.path) in generated_files:
  224. out += '\t* %s: Regenerate.\n' % (relative_path)
  225. append_changelog_line(out, relative_path, 'Regenerate.')
  226. else:
  227. if not no_functions:
  228. for hunk in file:
  229. # Do not add function names for testsuite files
  230. extension = os.path.splitext(relative_path)[1]
  231. if not in_tests and extension in function_extensions:
  232. last_fn = None
  233. modified_visited = False
  234. success = False
  235. for line in hunk:
  236. m = identifier_regex.match(line.value)
  237. if line.is_added or line.is_removed:
  238. # special-case definition in .md files
  239. m2 = md_def_regex.match(line.value)
  240. if extension == '.md' and m2:
  241. fn = m2.group(1)
  242. if fn not in functions:
  243. functions.append(fn)
  244. last_fn = None
  245. success = True
  246. if not line.value.strip():
  247. continue
  248. modified_visited = True
  249. if m and try_add_function(functions,
  250. m.group(1)):
  251. last_fn = None
  252. success = True
  253. elif line.is_context:
  254. if last_fn and modified_visited:
  255. try_add_function(functions, last_fn)
  256. last_fn = None
  257. modified_visited = False
  258. success = True
  259. elif m:
  260. last_fn = m.group(1)
  261. modified_visited = False
  262. if not success:
  263. try_add_function(functions,
  264. hunk.section_header)
  265. if functions:
  266. out += '\t* %s (%s):\n' % (relative_path, functions[0])
  267. for fn in functions[1:]:
  268. out += '\t(%s):\n' % fn
  269. else:
  270. out += '\t* %s:\n' % relative_path
  271. out += '\n'
  272. return out
  273. def update_copyright(data):
  274. current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
  275. username = subprocess.check_output('git config user.name', shell=True,
  276. encoding='utf8').strip()
  277. email = subprocess.check_output('git config user.email', shell=True,
  278. encoding='utf8').strip()
  279. changelogs = set()
  280. diff = PatchSet(data)
  281. for file in diff:
  282. changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
  283. if changelog not in changelogs:
  284. changelogs.add(changelog)
  285. with open(changelog) as f:
  286. content = f.read()
  287. with open(changelog, 'w+') as f:
  288. f.write(f'{current_timestamp} {username} <{email}>\n\n')
  289. f.write('\tUpdate copyright years.\n\n')
  290. f.write(content)
  291. def skip_line_in_changelog(line):
  292. if line.lower().startswith(CO_AUTHORED_BY_PREFIX) or line.startswith('#'):
  293. return False
  294. return True
  295. if __name__ == '__main__':
  296. parser = argparse.ArgumentParser(description=help_message)
  297. parser.add_argument('input', nargs='?',
  298. help='Patch file (or missing, read standard input)')
  299. parser.add_argument('-b', '--pr-numbers', action='store',
  300. type=lambda arg: arg.split(','), nargs='?',
  301. help='Add the specified PRs (comma separated)')
  302. parser.add_argument('-s', '--no-functions', action='store_true',
  303. help='Do not generate function names in ChangeLogs')
  304. parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
  305. help='Download title of mentioned PRs')
  306. parser.add_argument('-d', '--directory',
  307. help='Root directory where to search for ChangeLog '
  308. 'files')
  309. parser.add_argument('-c', '--changelog',
  310. help='Append the ChangeLog to a git commit message '
  311. 'file')
  312. parser.add_argument('--update-copyright', action='store_true',
  313. help='Update copyright in ChangeLog files')
  314. args = parser.parse_args()
  315. if args.input == '-':
  316. args.input = None
  317. if args.directory:
  318. root = args.directory
  319. data = open(args.input) if args.input else sys.stdin
  320. if args.update_copyright:
  321. update_copyright(data)
  322. else:
  323. output = generate_changelog(data, args.no_functions,
  324. args.fill_up_bug_titles, args.pr_numbers)
  325. if args.changelog:
  326. lines = open(args.changelog).read().split('\n')
  327. start = list(takewhile(skip_line_in_changelog, lines))
  328. end = lines[len(start):]
  329. with open(args.changelog, 'w') as f:
  330. if not start or not start[0]:
  331. # initial commit subject line 'component: [PRnnnnn]'
  332. m = prnum_regex.match(firstpr)
  333. if m:
  334. title = f'{m.group("comp")}: [PR{m.group("num")}]'
  335. start.insert(0, title)
  336. if start:
  337. # append empty line
  338. if start[-1] != '':
  339. start.append('')
  340. else:
  341. # append 2 empty lines
  342. start = 2 * ['']
  343. f.write('\n'.join(start))
  344. f.write('\n')
  345. f.write(output)
  346. f.write('\n'.join(end))
  347. else:
  348. print(output, end='')