"""
Angular commit style parser
https://github.com/angular/angular/blob/master/CONTRIBUTING.md#-commit-message-guidelines
"""
from __future__ import annotations
import logging
import re
from functools import reduce
from itertools import zip_longest
from re import compile as regexp
from textwrap import dedent
from typing import TYPE_CHECKING, Tuple
from git.objects.commit import Commit
from pydantic.dataclasses import dataclass
from semantic_release.commit_parser._base import CommitParser, ParserOptions
from semantic_release.commit_parser.token import (
ParsedCommit,
ParsedMessageResult,
ParseError,
ParseResult,
)
from semantic_release.commit_parser.util import (
breaking_re,
deep_copy_commit,
force_str,
parse_paragraphs,
)
from semantic_release.enums import LevelBump
from semantic_release.errors import InvalidParserOptions
from semantic_release.helpers import sort_numerically, text_reducer
if TYPE_CHECKING: # pragma: no cover
from git.objects.commit import Commit
logger = logging.getLogger(__name__)
def _logged_parse_error(commit: Commit, error: str) -> ParseError:
logger.debug(error)
return ParseError(commit, error=error)
# TODO: Remove from here, allow for user customization instead via options
# types with long names in changelog
LONG_TYPE_NAMES = {
"build": "build system",
"ci": "continuous integration",
"chore": "chores",
"docs": "documentation",
"feat": "features",
"fix": "bug fixes",
"perf": "performance improvements",
"refactor": "refactoring",
"style": "code style",
"test": "testing",
}
[docs]
@dataclass
class AngularParserOptions(ParserOptions):
"""Options dataclass for AngularCommitParser"""
minor_tags: Tuple[str, ...] = ("feat",)
"""Commit-type prefixes that should result in a minor release bump."""
patch_tags: Tuple[str, ...] = ("fix", "perf")
"""Commit-type prefixes that should result in a patch release bump."""
other_allowed_tags: Tuple[str, ...] = (
"build",
"chore",
"ci",
"docs",
"style",
"refactor",
"test",
)
"""Commit-type prefixes that are allowed but do not result in a version bump."""
allowed_tags: Tuple[str, ...] = (
*minor_tags,
*patch_tags,
*other_allowed_tags,
)
"""
All commit-type prefixes that are allowed.
These are used to identify a valid commit message. If a commit message does not start with
one of these prefixes, it will not be considered a valid commit message.
"""
default_bump_level: LevelBump = LevelBump.NO_RELEASE
"""The minimum bump level to apply to valid commit message."""
# TODO: breaking change v10, change default to True
parse_squash_commits: bool = False
"""Toggle flag for whether or not to parse squash commits"""
@property
def tag_to_level(self) -> dict[str, LevelBump]:
"""A mapping of commit tags to the level bump they should result in."""
return self._tag_to_level
def __post_init__(self) -> None:
self._tag_to_level: dict[str, LevelBump] = {
str(tag): level
for tag, level in [
# we have to do a type ignore as zip_longest provides a type that is not specific enough
# for our expected output. Due to the empty second array, we know the first is always longest
# and that means no values in the first entry of the tuples will ever be a LevelBump. We
# apply a str() to make mypy happy although it will never happen.
*zip_longest(self.allowed_tags, (), fillvalue=self.default_bump_level),
*zip_longest(self.patch_tags, (), fillvalue=LevelBump.PATCH),
*zip_longest(self.minor_tags, (), fillvalue=LevelBump.MINOR),
]
if "|" not in str(tag)
}
[docs]
class AngularCommitParser(CommitParser[ParseResult, AngularParserOptions]):
"""
A commit parser for projects conforming to the angular style of conventional
commits. See https://www.conventionalcommits.org/en/v1.0.0-beta.4/
"""
# TODO: Deprecate in lieu of get_default_options()
parser_options = AngularParserOptions
def __init__(self, options: AngularParserOptions | None = None) -> None:
super().__init__(options)
try:
commit_type_pattern = regexp(
r"(?P<type>%s)" % str.join("|", self.options.allowed_tags)
)
except re.error as err:
raise InvalidParserOptions(
str.join(
"\n",
[
f"Invalid options for {self.__class__.__name__}",
"Unable to create regular expression from configured commit-types.",
"Please check the configured commit-types and remove or escape any regular expression characters.",
],
)
) from err
self.commit_prefix = regexp(
str.join(
"",
[
f"^{commit_type_pattern.pattern}",
r"(?:\((?P<scope>[^\n]+)\))?",
# TODO: remove ! support as it is not part of the angular commit spec (its part of conventional commits spec)
r"(?P<break>!)?:\s+",
],
)
)
self.re_parser = regexp(
str.join(
"",
[
self.commit_prefix.pattern,
r"(?P<subject>[^\n]+)",
r"(?:\n\n(?P<text>.+))?", # commit body
],
),
flags=re.DOTALL,
)
# GitHub & Gitea use (#123), GitLab uses (!123), and BitBucket uses (pull request #123)
self.mr_selector = regexp(
r"[\t ]+\((?:pull request )?(?P<mr_number>[#!]\d+)\)[\t ]*$"
)
self.issue_selector = regexp(
str.join(
"",
[
r"^(?:clos(?:e|es|ed|ing)|fix(?:es|ed|ing)?|resolv(?:e|es|ed|ing)|implement(?:s|ed|ing)?):",
r"[\t ]+(?P<issue_predicate>.+)[\t ]*$",
],
),
flags=re.MULTILINE | re.IGNORECASE,
)
self.filters = {
"typo-extra-spaces": (regexp(r"(\S) +(\S)"), r"\1 \2"),
"git-header-commit": (
regexp(r"^[\t ]*commit [0-9a-f]+$\n?", flags=re.MULTILINE),
"",
),
"git-header-author": (
regexp(r"^[\t ]*Author: .+$\n?", flags=re.MULTILINE),
"",
),
"git-header-date": (
regexp(r"^[\t ]*Date: .+$\n?", flags=re.MULTILINE),
"",
),
"git-squash-heading": (
regexp(
r"^[\t ]*Squashed commit of the following:.*$\n?",
flags=re.MULTILINE,
),
"",
),
"git-squash-commit-prefix": (
regexp(
str.join(
"",
[
r"^(?:[\t ]*[*-][\t ]+|[\t ]+)?", # bullet points or indentation
commit_type_pattern.pattern + r"\b", # prior to commit type
],
),
flags=re.MULTILINE,
),
# move commit type to the start of the line
r"\1",
),
}
[docs]
@staticmethod
def get_default_options() -> AngularParserOptions:
return AngularParserOptions()
[docs]
def commit_body_components_separator(
self, accumulator: dict[str, list[str]], text: str
) -> dict[str, list[str]]:
if match := breaking_re.match(text):
accumulator["breaking_descriptions"].append(match.group(1) or "")
# TODO: breaking change v10, removes breaking change footers from descriptions
# return accumulator
elif match := self.issue_selector.search(text):
# if match := self.issue_selector.search(text):
predicate = regexp(r",? and | *[,;/& ] *").sub(
",", match.group("issue_predicate") or ""
)
# Almost all issue trackers use a number to reference an issue so
# we use a simple regexp to validate the existence of a number which helps filter out
# any non-issue references that don't fit our expected format
has_number = regexp(r"\d+")
new_issue_refs: set[str] = set(
filter(
lambda issue_str, validator=has_number: validator.search(issue_str), # type: ignore[arg-type]
predicate.split(","),
)
)
accumulator["linked_issues"] = sort_numerically(
set(accumulator["linked_issues"]).union(new_issue_refs)
)
# TODO: breaking change v10, removes resolution footers from descriptions
# return accumulator
# Prevent appending duplicate descriptions
if text not in accumulator["descriptions"]:
accumulator["descriptions"].append(text)
return accumulator
[docs]
def parse_message(self, message: str) -> ParsedMessageResult | None:
if not (parsed := self.re_parser.match(message)):
return None
parsed_break = parsed.group("break")
parsed_scope = parsed.group("scope") or ""
parsed_subject = parsed.group("subject")
parsed_text = parsed.group("text")
parsed_type = parsed.group("type")
linked_merge_request = ""
if mr_match := self.mr_selector.search(parsed_subject):
linked_merge_request = mr_match.group("mr_number")
# TODO: breaking change v10, removes PR number from subject/descriptions
# expects changelog template to format the line accordingly
# parsed_subject = self.pr_selector.sub("", parsed_subject).strip()
body_components: dict[str, list[str]] = reduce(
self.commit_body_components_separator,
[
# Insert the subject before the other paragraphs
parsed_subject,
*parse_paragraphs(parsed_text or ""),
],
{
"breaking_descriptions": [],
"descriptions": [],
"linked_issues": [],
},
)
level_bump = (
LevelBump.MAJOR
# TODO: remove parsed break support as it is not part of the angular commit spec (its part of conventional commits spec)
if body_components["breaking_descriptions"] or parsed_break
else self.options.tag_to_level.get(
parsed_type, self.options.default_bump_level
)
)
return ParsedMessageResult(
bump=level_bump,
type=parsed_type,
category=LONG_TYPE_NAMES.get(parsed_type, parsed_type),
scope=parsed_scope,
descriptions=tuple(body_components["descriptions"]),
breaking_descriptions=tuple(body_components["breaking_descriptions"]),
linked_issues=tuple(body_components["linked_issues"]),
linked_merge_request=linked_merge_request,
)
[docs]
def parse_commit(self, commit: Commit) -> ParseResult:
if not (parsed_msg_result := self.parse_message(force_str(commit.message))):
return _logged_parse_error(
commit,
f"Unable to parse commit message: {commit.message!r}",
)
return ParsedCommit.from_parsed_message_result(commit, parsed_msg_result)
# Maybe this can be cached as an optimization, similar to how
# mypy/pytest use their own caching directories, for very large commit
# histories?
# The problem is the cache likely won't be present in CI environments
[docs]
def parse(self, commit: Commit) -> ParseResult | list[ParseResult]:
"""
Parse a commit message
If the commit message is a squashed merge commit, it will be split into
multiple commits, each of which will be parsed separately. Single commits
will be returned as a list of a single ParseResult.
"""
separate_commits: list[Commit] = (
self.unsquash_commit(commit)
if self.options.parse_squash_commits
else [commit]
)
# Parse each commit individually if there were more than one
parsed_commits: list[ParseResult] = list(
map(self.parse_commit, separate_commits)
)
def add_linked_merge_request(
parsed_result: ParseResult, mr_number: str
) -> ParseResult:
return (
parsed_result
if not isinstance(parsed_result, ParsedCommit)
else ParsedCommit(
**{
**parsed_result._asdict(),
"linked_merge_request": mr_number,
}
)
)
# TODO: improve this for other VCS systems other than GitHub & BitBucket
# Github works as the first commit in a squash merge commit has the PR number
# appended to the first line of the commit message
lead_commit = next(iter(parsed_commits))
if isinstance(lead_commit, ParsedCommit) and lead_commit.linked_merge_request:
# If the first commit has linked merge requests, assume all commits
# are part of the same PR and add the linked merge requests to all
# parsed commits
parsed_commits = [
lead_commit,
*map(
lambda parsed_result, mr=lead_commit.linked_merge_request: ( # type: ignore[misc]
add_linked_merge_request(parsed_result, mr)
),
parsed_commits[1:],
),
]
elif isinstance(lead_commit, ParseError) and (
mr_match := self.mr_selector.search(force_str(lead_commit.message))
):
# Handle BitBucket Squash Merge Commits (see #1085), which have non angular commit
# format but include the PR number in the commit subject that we want to extract
linked_merge_request = mr_match.group("mr_number")
# apply the linked MR to all commits
parsed_commits = [
add_linked_merge_request(parsed_result, linked_merge_request)
for parsed_result in parsed_commits
]
return parsed_commits
[docs]
def unsquash_commit(self, commit: Commit) -> list[Commit]:
# GitHub EXAMPLE:
# feat(changelog): add autofit_text_width filter to template environment (#1062)
#
# This change adds an equivalent style formatter that can apply a text alignment
# to a maximum width and also maintain an indent over paragraphs of text
#
# * docs(changelog-templates): add definition & usage of autofit_text_width template filter
#
# * test(changelog-context): add test cases to check autofit_text_width filter use
#
# `git merge --squash` EXAMPLE:
# Squashed commit of the following:
#
# commit 63ec09b9e844e616dcaa7bae35a0b66671b59fbb
# Author: codejedi365 <codejedi365@gmail.com>
# Date: Sun Oct 13 12:05:23 2024 -0600
#
# feat(release-config): some commit subject
#
# Return a list of artificial commits (each with a single commit message)
return [
# create a artificial commit object (copy of original but with modified message)
Commit(
**{
**deep_copy_commit(commit),
"message": commit_msg,
}
)
for commit_msg in self.unsquash_commit_message(force_str(commit.message))
] or [commit]
[docs]
def unsquash_commit_message(self, message: str) -> list[str]:
normalized_message = message.replace("\r", "").strip()
# split by obvious separate commits (applies to manual git squash merges)
obvious_squashed_commits = self.filters["git-header-commit"][0].split(
normalized_message
)
separate_commit_msgs: list[str] = reduce(
lambda all_msgs, msgs: all_msgs + msgs,
map(self._find_squashed_commits_in_str, obvious_squashed_commits),
[],
)
return separate_commit_msgs
def _find_squashed_commits_in_str(self, message: str) -> list[str]:
separate_commit_msgs: list[str] = []
current_msg = ""
for paragraph in filter(None, message.strip().split("\n\n")):
# Apply filters to normalize the paragraph
clean_paragraph = reduce(text_reducer, self.filters.values(), paragraph)
# remove any filtered (and now empty) paragraphs (ie. the git headers)
if not clean_paragraph.strip():
continue
# Check if the paragraph is the start of a new angular commit
if not self.commit_prefix.search(clean_paragraph):
if not separate_commit_msgs and not current_msg:
# if there are no separate commit messages and no current message
# then this is the first commit message
current_msg = dedent(clean_paragraph)
continue
# append the paragraph as part of the previous commit message
if current_msg:
current_msg += f"\n\n{dedent(clean_paragraph)}"
# else: drop the paragraph
continue
# Since we found the start of the new commit, store any previous commit
# message separately and start the new commit message
if current_msg:
separate_commit_msgs.append(current_msg)
current_msg = clean_paragraph
# Store the last commit message (if its not empty)
if current_msg:
separate_commit_msgs.append(current_msg)
return separate_commit_msgs