Source code for fgen.fortran_parsing

"""
Support for parsing Fortran type declarations

We just want to make this as helpful as possible
when auto-generating our wrappers
while keeping the implemenation here as minimal as possible
because duplicating the Fortran implementation logic is not the point of this module.
The fortran compiler will catch any odd type declarations at compile-time.

To understand what is going on,
we start with the syntax for declaring fortran types as defined in
Section 5.1 of the `Fortran 2003 standard <https://j3-fortran.org/doc/year/04/04-007.pdf>`_.
It looks like the below:

.. syntax higlighting below is wrong, not sure what language the specs are given in
.. code-block:: fortran

    R501 type-declaration-stmt  is declaration-type-spec [ [ , attr-spec ] ... :: ] entity-decl-list
    R502 declaration-type-spec  is intrinsic-type-spec
                                or TYPE ( derived-type-spec )
                                or CLASS ( derived-type-spec )
                                or CLASS ( * )

In short, a type declaration statement is composed of a declaration type specification,
a collection of one or more attribute specifications
and then an entity declaration list.
The declaration type specification can be either an intrinsic type
that has been defined by the language,
or a derived type which has been declared using a ``type`` block elsewhere.

We focus on the combination of the declaration type specification
(which we typically shorten to type specification)
and the attribute specifications i.e. we largely ignore the entity declarations.
In the absence of a defined term in the Fortran standard,
we call this combination the type attribute declaration throughout.

To illustrate, consider the following example

.. code-block:: fortran

    real(8), dimension(3), pointer :: heat_uptake

Here is how we would refer to the parts of this example:

- type declaration statement: ``real(8), dimension(3), pointer :: heat_uptake``
- declaration type specification (or simply type specification): ``real(8)``
- attribute specifications: ``dimension(3), pointer``
- type attribute declaration: ``real(8), dimension(3), pointer``
- entity declaration (largely ignored by this module, we treat the double colon ``::`` as part of the entity declaration): ``:: heat_uptake``

One other example using a derived type

.. code-block:: fortran

    type(my_calculator) :: heat_uptake_calculator

Here is how we would refer to the parts of this example:

- type declaration statement: ``type(my_calculator) :: heat_uptake_calculator``
- declaration type specification (or simply type specification): ``type(my_calculator)``
- attribute specifications: none
- type attribute declaration (same as type specification in this case): ``type(my_calculator)``
- entity declaration (largely ignored by this module, we treat the double colon ``::`` as part of the entity declaration): ``:: heat_uptake_calculator``

Notes
-----
Fortran type declaration statements (and Fortran in general) are case-insensitive

This module relies heavily on regular expressions. If needed, use an online tool like
`regex101.com <https://regex101.com/>`_ to help you understand them.
"""  # noqa: E501
from __future__ import annotations

import re
from collections.abc import Sequence
from typing import Any, Optional, Union

from attrs import define, field

_AT_LEAST_ONE_ALPHANUMERIC_REGEXP: str = r"[a-z0-9]+"
"""Regex representing a search for at least one alphanumeric character"""

_DIMENSION_SPECIFICATION: str = f"(\\*|:|{_AT_LEAST_ONE_ALPHANUMERIC_REGEXP})"
"""
Regex representing a possible dimension attribute specification in a Fortran definition

This also captures deferred and assumed shaped arrays
which are not currently supported by fgen,
but may be supported in future.

e.g. "(4, 3)", "(n, m)", "(5)", "(n, 4)"
"""

_DIMENSION_REGEX: str = (
    r"DIMENSION\("
    r"(?P<dimension>"
    f"{_DIMENSION_SPECIFICATION}(,\\s*{_DIMENSION_SPECIFICATION})*"
    ")"
    r"\)"
)
"""
Regex string used to check for dimension information within attribute specifications

e.g. "dimension(4, 3)", "dimension(n)"
"""

_KIND_SPECIFICATION: str = r"(\(.*\)|)$"
"""
Regex string to capture optional type modifiers

e.g. "(kind=8)" in "real(kind=8)"
"""

_INTRINSIC_TYPE_SPECIFICATIONS: tuple[str, ...] = (
    "INTEGER",
    "REAL",
    "COMPLEX",
    "CHARACTER",
    "LOGICAL",
)
"""Intrinsic Fortran type specifications that we support (case insensitive)"""

_ENUM_LIKE_REGEX: str = r"integer\(\s?kind\s?\(\s?(?P<enum_name>[a-z0-9]+)\s?\)\s?\)"
"""
Regex string to capture Fortran enum-like specifications

Matches things like "integer(kind(Enum))" and "integer(kind(EnumValue))".
The enum type is captured in a group called "enum_name".
"""

_DERIVED_TYPE_REGEX: str = r"type\(\s?(?P<derived_type_name>[a-z0-9]+)\s?\)"
"""
Regex string to capture derived type specifications

Matches things like "type(my_derived_type)" and "type(calculator)".
The derived type is captured in a group called "derived_type_name".
We currently don't support "class(abc)" specifications.
"""

SUPPORTED_TYPE_SPECIFICATIONS: tuple[str, ...] = (
    *[
        f"^{intrinsic_type_spec}{_KIND_SPECIFICATION}"
        for intrinsic_type_spec in _INTRINSIC_TYPE_SPECIFICATIONS
    ],
    _DERIVED_TYPE_REGEX,
    _ENUM_LIKE_REGEX,
)
"""
The collection of valid type specifications that are supported

This covers both intrinsic type specifications and derived type specifications.
"""

SUPPORTED_ATTRIBUTE_SPECIFICATIONS: tuple[Union[str, re.Pattern[str]], ...] = (
    "ALLOCATABLE",
    # "ASYNCHRONOUS",
    _DIMENSION_REGEX,
    # "INTENT",
    # "INTRINSIC",
    # "language-binding-spec"
    # "OPTIONAL",
    "POINTER",
    # "PROTECTED",
    # "SAVE",
    "TARGET",
    # "VALUE",
    # "VOLATILE",
)
"""
The collection of valid attribute specifications that are supported

In the type declaration, "type(my_calculator), pointer",
the attribute specification is "pointer"
(the other part, "type(my_calculator)",
is simply the type specification
(or declaration type specification if you're being very precise))

Note that only a subset of valid attributes can be specified.
The more esoteric attributes have been excluded
until the use case for these attributes is better understood.
"""


def _get_type_attribute_declaration(type_declaration_statement: str) -> str:
    """
    Get type attribute declaration from a type declaration statement

    Simply removes the entity declaration
    """
    return type_declaration_statement.split("::")[0]


def _get_parts(type_declaration_statement: str) -> tuple[str, tuple[str, ...]]:
    """
    Split a type declaration statement into its parts

    Specifically, into a type specification and a collection of attribute specifications

    This function is used below by :meth:`FortranDataType.as_str`.
    That method takes care of
    validating the different parts of a Fortran type declaration statement.

    See the docstring of :mod:`fgen.fortran_parsing`
    for further details on how we interpret Fortran type declaration statements.

    Parameters
    ----------
    type_declaration_statement
        Type declaration statement to parse

        The entity declaration component
        (any parts including and after the "::")
        is optional and will be ignored

    Returns
    -------
        The type specification and any additional attribute specifications

    Examples
    --------
    >>> _get_parts("real :: my_variable")
    ('real', ())
    >>> _get_parts("real, dimension(5)")
    ('real', ('dimension(5)',))
    >>> _get_parts("real, dimension(5, 3)")
    ('real', ('dimension(5, 3)',))
    >>> _get_parts("integer, dimension(:, :)")
    ('integer', ('dimension(:, :)',))
    >>> _get_parts("logical, dimension(2, :)")
    ('logical', ('dimension(2, :)',))
    >>> _get_parts("logical, dimension(2, :), pointer")
    ('logical', ('dimension(2, :)', 'pointer'))
    >>> _get_parts("integer(kind(EnumValue))")
    ('integer(kind(EnumValue))', ())
    """
    type_attribute_declaration = _get_type_attribute_declaration(
        type_declaration_statement
    )

    # Check if this is enum type, such things break everything below
    enum_regex_match = re.match(
        _ENUM_LIKE_REGEX, type_attribute_declaration, flags=re.IGNORECASE
    )
    if enum_regex_match:
        return type_attribute_declaration, tuple()

    part_regexp_including_brackets = r"[^,\s]+(?:\([^\(]*?\))"
    part_regexp_no_brackets = r"[^,\s]+"
    part_regexp: str = "|".join(
        [part_regexp_including_brackets, part_regexp_no_brackets]
    )
    """
    Regular expression that captures the parts of the type declaration statement

    It effectively just splits the string into comma separated pieces without
    surrounding whitespace (with a little extra piece to make sure you don't
    get nested brackets (e.g. "dimension((4, 3))") in any part, which is invalid
    Fortran as far as we know anyway)

    e.g. this can be used to split "real, dimension(2, 3, 4)"
    into ["real", "dimension(2, 3, 4)"]
    """
    parts = re.findall(part_regexp, type_attribute_declaration)

    if not len(parts):
        raise ValueError(  # noqa: TRY003
            "An invalid type attribute declaration was provided: "
            f"{type_attribute_declaration!r}"
        )

    # The Fortran spec states that the first part must be the type specification and
    # the rest are attribute specifications so we can safely assume order here
    return parts[0], tuple(parts[1:])


def _validate_fortran_type_attribute_declaration(
    type_specification: str, attribute_specifications: tuple[str, ...]
) -> None:
    """
    Validate that a fortran type attribute declaration is supported by fgen

    The type attribute declaration must have been split
    (using e.g. :func:`_get_parts`)
    before using this function.

    We don't support all valid fortran.
    See the examples below and the tests for examples of supported options.

    This validation is pretty crude as invalid Fortran will be caught at compile time.

    See the docstring of :mod:`fgen.fortran_parsing`
    for further details on how we interpret Fortran type declaration statements.

    Parameters
    ----------
    type_specification
        Type specification

    attribute_specifications
        Attribute specifications

        See :data:`SUPPORTED_ATTRIBUTE_SPECIFICATIONS`
        for the attributes that are currently supported by fgen

    Raises
    ------
    ValueError
        An unsupported fortran type specification is supplied

    Examples
    --------
    Below are some examples of (split) fortran type declarations that pass

    >>> _validate_fortran_type_attribute_declaration("integer", ())
    >>> _validate_fortran_type_attribute_declaration("real", ("dimension(5)",))
    >>> _validate_fortran_type_attribute_declaration(
    ...     "type(calculator)", ("dimension(5)", "pointer")
    ... )

    See Also
    --------
    :func:`_get_parts`
    """
    # Any attributes must match the regex of a supported attribute
    if not any(
        re.match(supported_type, type_specification, flags=re.IGNORECASE)
        for supported_type in SUPPORTED_TYPE_SPECIFICATIONS
    ):
        raise ValueError(  # noqa: TRY003
            f"Unsupported type specification: {type_specification}"
        )

    # Any attributes must match the regex of a supported attribute
    for attribute_specification in attribute_specifications:
        if not any(
            re.match(attribute_regex, attribute_specification, flags=re.IGNORECASE)
            for attribute_regex in SUPPORTED_ATTRIBUTE_SPECIFICATIONS
        ):
            raise ValueError(  # noqa: TRY003
                f"Unsupported attribute specification: {attribute_specification}"
            )


def _convert_complex_attribute_specifications(
    attribute_specifications: Sequence[str],
) -> tuple[Union[str, DimensionAttributeSpecification], ...]:
    """
    Convert any complex attribute specifications to their supporting classes

    For example, a dimension attribute specification is converted to a
    :class:`DimensionAttributeSpecification`
    which holds additional information about the attribute.
    If the attribute specification doesn't have a matching class
    then it will remain as a string.

    Parameters
    ----------
    attribute_specifications
        Collection of attribute specifications

    Returns
    -------
        Tuple that contains a combination of strings and classes in the case of the more
        complicated attributes
    """
    attributes: list[Union[str, DimensionAttributeSpecification]] = list(
        attribute_specifications
    )

    for i in range(len(attribute_specifications)):
        match = re.search(
            _DIMENSION_REGEX, attribute_specifications[i], flags=re.IGNORECASE
        )
        if match:
            attributes[i] = DimensionAttributeSpecification.from_dimension_info(
                match.group("dimension")
            )

    return tuple(attributes)


[docs]@define class DimensionAttributeSpecification: """ Dimension attribute specification Defines the shape of a data type. See Section 5.1.2.5 of the `Fortran 2003 standard <https://j3-fortran.org/doc/year/04/04-007.pdf>`_ for a description of the dimension attribute specification. Currently, we only support explicit dimensions where the size of the dimension is explicitly declared at compile-time with an integer or execution-time using a variable provided to a procedure (e.g. using "n" which is a variable that is also provided to a procedure). The later form is an automatic explicit dimension and will rely upon the Fortran compiler to validate the attribute declaration since additional context is required about where the attribute is used. Assumed-sized dimensions ("*") are not currently supported. """ dimensions: tuple[Union[str, int], ...] = field() """ Collection of dimensions of the attribute specification Can include explicit dimensions (e.g. 5) or automatic explicit dimensions (e.g. "n"). Deferred dimensions (e.g. ":") are allowed. """ @dimensions.validator def _check_dimensions( self, attribute: Any, value: tuple[Union[str, int], ...] ) -> None: for dimension in value: if dimension == "*": raise ValueError( # noqa: TRY003 "Assumed-sized dimensions are not supported" )
[docs] @classmethod def from_dimension_info( cls, dimension_info: str ) -> DimensionAttributeSpecification: """ Create a DimensionAttributeSpecification from the dimension information The dimension information describes the shape and dimensionality of a variable. Parameters ---------- dimension_info The dimension information is the content between the () for a dimension attribute. For example, for an attribute "dimension(8, n)" the dimension_info would be `"8, n"`. Static dimensions are converted to integers before initialising. Returns ------- New DimensionAttributeSpecification """ toks = dimension_info.split(",") type_info: list[Union[str, int]] = [] for tok in toks: try: size: int | str = int(tok) except ValueError: # assume : or n or something i.e. not static size size = tok.strip() type_info.append(size) return DimensionAttributeSpecification(dimensions=tuple(type_info))
@property def ndim(self) -> int: """ Number of dimensions of the attribute """ return len(self.dimensions) def __str__(self) -> str: dimensions_as_str = ", ".join(str(dim) for dim in self.dimensions) return f"dimension({dimensions_as_str})"
def _get_python_type_with_dimensions( dimension_info: Sequence[Union[str, int]], base: str, ) -> str: """ Get a Python type that supports dimension information See [#1](https://gitlab.com/magicc/fgen/-/issues/1) for ongoing discussions about updating this to return more sophisticated Python types. Parameters ---------- dimension_info Dimension information (e.g. extracted elsewhere from a Fortran type declaration statement) base Base Python type to which the dimension information applies (e.g. "float", "int", "str") Returns ------- Python type with dimension information Examples -------- >>> _get_python_type_with_dimensions([2], "float") 'tuple[float, float]' >>> _get_python_type_with_dimensions(["n"], "int") 'tuple[int, ...]' >>> _get_python_type_with_dimensions([2, 3], "float") 'tuple[tuple[float, float, float], tuple[float, float, float]]' >>> _get_python_type_with_dimensions([2, 1], "float") 'tuple[tuple[float], tuple[float]]' >>> _get_python_type_with_dimensions(["n", "m", 2], "Quantity") 'tuple[tuple[tuple[Quantity, Quantity], ...], ...]' """ def _dimension(inner_type: str, dimension_len: Union[int, str]) -> str: if isinstance(dimension_len, str): content = [inner_type, "..."] else: content = [inner_type] * dimension_len return f"tuple[{', '.join(content)}]" # Start with base res = base # then wrap dimension information on top as it applies for dim in dimension_info[::-1]: res = _dimension(res, dim) return res
[docs]@define class FortranDataType: """ A fortran data type including its attributes See the docstring of :mod:`fgen.fortran_parsing` for further details on Fortran type declaration statements and how we handle and describe them. We are only interested in the type attribute declaration so anything including and after a "::" should be stripped before initialising this class. """ type_specification: str """ This can be either an intrinsic type, or a derived type. For example, "real(8)", "logical", "type(my_calculator)" """ attribute_specifications: tuple[ Union[str, DimensionAttributeSpecification], ... ] = field() """ List of attribute specifications that apply to the Fortran data type These are our internal representation of attribute specifications e.g. "pointer", "target", "dimension(3, 3)" """ def __str__(self) -> str: return self.fortran_type_attribute_declaration @attribute_specifications.validator def _check_attribute_specifications( self, attribute: Any, value: tuple[Union[str, DimensionAttributeSpecification], ...], ) -> None: dimension_count = sum( isinstance(a, DimensionAttributeSpecification) for a in value ) if dimension_count > 1: raise ValueError( # noqa: TRY003 "More than one dimension attribute specification " "(can Fortran be compiled " "with more than one dimension attribute specification?)" )
[docs] @classmethod def from_str(cls, fortran_type_attribute_declaration: str) -> FortranDataType: """ Create :obj:`FortranDataType` from type declaration statement Parses and validates a type declaration statement Parameters ---------- fortran_type_attribute_declaration Fortran type declaration statement For example, "real(8), dimension(2) :: heat_uptake_sensitivity" The entity declaration component (including and after "::") is ignored hence is optional. See the docstring of :mod:`fgen.fortran_parsing` for further details. Raises ------ ValueError The type declaration statement is invalid """ type_specification, attribute_specifications = _get_parts( fortran_type_attribute_declaration ) _validate_fortran_type_attribute_declaration( type_specification, attribute_specifications ) return cls( type_specification=type_specification, attribute_specifications=_convert_complex_attribute_specifications( attribute_specifications ), )
@property def fortran_type_attribute_declaration(self) -> str: """ Fortran type attribute declaration, including attribute specifications. Attribute specifications are only included if they are applicable. See the docstring of :mod:`fgen.fortran_parsing` for further details. """ if not self.attribute_specifications: return self.type_specification attribute_specifications = ", ".join( str(item) for item in self.attribute_specifications ) return f"{self.type_specification}, {attribute_specifications}" @property def python_equivalent_type_annotation(self) -> str: """ Type annotation for the python-equivalent of this Fortran type """ base_type = self.base_python_type dimension_attribute_specification = self.dimension_attribute_specification if not dimension_attribute_specification: return base_type # TODO: Fix this mess. # The custom handling of shapes is silly if we have an array of say, # size 100, so it makes more sense to just use numpy return types here. return _get_python_type_with_dimensions( dimension_attribute_specification.dimensions, base_type ) @property def has_deferred_size(self) -> bool: """ Whether this instance represents a type with a deferred size or not. Returns ------- ``True`` if self represents a type with a deferred size, ``False`` otherwise. """ return ":" in self.fortran_type_attribute_declaration @property def is_array(self) -> bool: """ Whether this instance represents an array or not. Returns ------- ``True`` if self represents an array, ``False`` otherwise. """ return self.dimension_attribute_specification is not None @property def is_array_of_derived_type(self) -> bool: """ Whether this instance represents an array of a derived type or not. Returns ------- ``True`` if self represents an array of a derived type, ``False`` otherwise. """ return self.type_specification.startswith("type(") and self.is_array @property def is_array_of_float_double(self) -> bool: """ Whether this instance represents an array of floating double-point type. Returns ------- ``True`` if self represents a type which is an of floating double-point type, ``False`` otherwise. """ return ( "real" in self.type_specification.lower() and "8" in self.type_specification and self.is_array ) @property def is_character(self) -> bool: """ Whether this instance represents a character type Returns ------- ``True`` if self represents a character type, ``False`` otherwise. """ return self.type_specification.startswith("character") @property def is_deferred_array(self) -> bool: """ Whether this instance represents a deferred size array Returns ------- ``True`` if self represents a type which is a deferred size array ``False`` otherwise. """ dim_attrs = self.dimension_attribute_specification if dim_attrs is None: return False return ":" in dim_attrs.dimensions @property def is_deferred_array_of_derived_type(self) -> bool: """ Whether this instance represents a deferred size array of a derived type Returns ------- ``True`` if self represents a type which is a deferred size array of a derived type, ``False`` otherwise. """ return self.is_array_of_derived_type and self.is_deferred_array @property def is_derived_type(self) -> bool: """ Whether this instance represents a type which is a derived type Returns ------- ``True`` if self represents a type which is a derived type ``False`` otherwise. True if a type is a derived type, False if it is an intrinsic type """ return self.type_specification.startswith("type(") and not self.is_array @property def is_enum(self) -> bool: """ Whether this instance represents a type which is an enum. Returns ------- ``True`` if self represents a type which is an enum. ``False`` otherwise. """ # Hmm, not sure whether integer(kind(8)) is valid Fortran # and would trip this up.... return ( "integer" in self.type_specification and "kind(" in self.type_specification ) @property def is_logical(self) -> bool: """ Whether this instance represents a logical type Returns ------- ``True`` if self represents a logical type, ``False`` otherwise. """ return self.type_specification.startswith("logical") @property def is_pointer(self) -> bool: """ Whether this instance represents a type which is a pointer Returns ------- ``True`` if self represents a type which is a pointer ``False`` otherwise. """ return "pointer" in self.attribute_specifications @property def equivalent_python_type(self) -> str: """ Python type-hint for the data type The "base" type of the equivalent Python type will be: If the Fortran type has a "dimension" attribute specification, this modifies the "base" type to a `tuple` that respects the intended shape of the value. n-dimensional variables are supported. TODO: remove this behaviour, doesn't scale well to long arrays and doesn't help with deferred shape arrays. """ base_type = self.base_python_type dimension_attribute_specification = self.dimension_attribute_specification if base_type != "Quantity" and dimension_attribute_specification: # Pint doesn't support dimension attributes # so we only do this if the Python type is not a pint type return _get_python_type_with_dimensions( dimension_attribute_specification.dimensions, base_type ) return base_type @property def base_python_type(self) -> str: """ Determine the "base" Python type that represents the Fortran type declaration This focuses on the base type (float, int, DerivedType etc.) and ignores the modification of this type from attributes such as "dimension" (which mean the 'full' python type is some sort of array or iterable). Such dimension handling is done in :attr:`~equivalent_python_type`. Returns ------- The string representation of the python type that is equivalent to the base type (i.e. ignoring any array or other container) of ``self`` """ if self.is_derived_type or self.is_array_of_derived_type: match = re.match( _DERIVED_TYPE_REGEX, self.type_specification, flags=re.IGNORECASE ) if not match: raise ValueError( # noqa: TRY003 # pragma: no cover # should be impossible to get here f"Could not determine python type for {self.type_specification}" ) return match.group("derived_type_name").strip() else: tsl = self.type_specification.lower() if tsl.startswith("real"): return "float" if tsl.startswith("integer"): match = re.match( _ENUM_LIKE_REGEX, self.type_specification, flags=re.IGNORECASE ) if match: return match.group("enum_name").strip() return "int" if tsl.startswith("character"): return "str" if tsl.startswith("logical"): return "bool" if tsl.startswith("complex"): msg = ( "We have not yet worked out how to support " f"{self.type_specification}, please raise an issue" ) raise NotImplementedError(msg) raise ValueError( # pragma: no cover # not sure how you could reach here self.type_specification ) @property def dimension_attribute_specification( self, ) -> Optional[DimensionAttributeSpecification]: """ Get the dimension attribute of the type if it exists Returns ------- The dimension attribute if it is present otherwise None """ for attr in self.attribute_specifications: if isinstance(attr, DimensionAttributeSpecification): # there can only be one dimension attribute so as soon as we # find it, we can stop looking return attr return None