Python API

`xarray_eopf.backend.EopfBackend`

Bases: BackendEntrypoint

Backend for EOPF Data Products using the Zarr format.

Note, that the chunks parameter passed to xarray top level functions xr.open_datatree() and xr.open_dataset() is not passed to backend. Instead, xarray uses them to (re)chunk the results from calling the backend equivalents, hence, after backend code.

Source code in xarray_eopf\backend.py

class EopfBackend(BackendEntrypoint):
    """Backend for EOPF Data Products using the Zarr format.

    Note, that the `chunks` parameter passed to xarray top level functions
    `xr.open_datatree()` and `xr.open_dataset()` is _not_ passed to
    backend. Instead, xarray uses them to (re)chunk the results
    from calling the backend equivalents, hence, _after_ backend code.
    """

    def open_datatree(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        product_type: str | None = None,
        storage_options: Mapping[str, Any] | None = None,
        drop_variables: str | Iterable[str] | None = None,
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.DataTree:
        """Backend implementation delegated to by
        [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Args:
            filename_or_obj: File path, or URL, a path-like string, or
                a Zarr store, or other key to object mapping.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Returns:
            A new data-tree instance.
        """

        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        source = normalize_source(filename_or_obj, storage_options)
        _, subgroup_path = get_source_paths(source)

        # noinspection PyTypeChecker
        datatree = xr.open_datatree(
            source,
            engine="zarr",
            # prefer the chunking from the Zarr metadata
            chunks={},
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
            # subgroups don't have consolidated metadata
            consolidated=False if subgroup_path else None,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode, so we return tree as-is
            return datatree
        else:  # op_mode == OP_MODE_ANALYSIS
            # analysis mode
            if subgroup_path:
                # subgroup level, return subtree as-is
                return datatree
            else:
                # product level, so we transform the tree
                analysis_mode = AnalysisMode.guess(
                    filename_or_obj, product_type=product_type
                )
                return analysis_mode.transform_datatree(datatree)

    def open_dataset(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        # params for op_mode=native/analysis
        storage_options: Mapping[str, Any] | None = None,
        group_sep: str = "_",
        variables: str | Iterable[str] | None = None,
        # params for op_mode=analysis
        product_type: str | None = None,
        resolution: int | float | None = None,
        spline_orders: SplineOrders | None = None,
        agg_methods: AggMethods | None = None,
        # params required by xarray backend interface
        drop_variables: str | Iterable[str] | None = None,
        # params for other reasons
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.Dataset:
        """Backend implementation delegated to by
        [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Args:
            filename_or_obj: File path, or URL, or path-like string.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            group_sep: Separator string used to concatenate groups names
                to create prefixes for unique variable and dimension names.
                Defaults to the underscore character (`"_"`)
            resolution: Target resolution for all spatial data variables / bands.
                Must be one of `10`, `20`, or `60`.
                Only used if `op_mode="analysis"`.
            spline_orders: Optional spline orders to be used for upsampling
                spatial data variables / bands. Can be a single spline order
                for all variables or a dictionary that maps a spline order to
                applicable variable names or array data types.
                A spline order is given by one of `0` (nearest neighbor),
                `1` (linear), `2` (bi-linear), or `3` (cubic).
                The default is `3`, except for product specific overrides.
                For example, the Sentinel-2 variable `scl` uses the default `0`.
                Only used if `op_mode="analysis"`
            agg_methods: Optional aggregation methods to be used for downsampling
                spatial data variables / bands. Can be a single aggregation method
                for all variables or a dictionary that maps an aggregation method to
                applicable variable names or array data types.
                An aggregation method is one of
                `"center"`, `"count"`, `"first"`, `"last"`, `"max"`,
                `"mean"`, `"median"`, `"mode"`, `"min"`, `"prod"`,
                `"std"`, `"sum"`, or `"var"`.
                The default is `"mean"`, except for product specific overrides.
                For example, the Sentinel-2 variable `scl` uses the default `"center"`.
                Only used if `op_mode="analysis"`
            variables: Variables to include in the dataset. Can be a name or
                regex pattern or iterable of the latter.
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Returns:
            A new dataset instance.
        """
        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        datatree = self.open_datatree(
            filename_or_obj,
            op_mode="native",
            storage_options=storage_options,
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode
            if datatree.has_data:
                # subgroup level, so we return dataset as-is
                dataset = datatree.to_dataset()
            else:
                # product level, so we flatten the tree
                dataset = flatten_datatree(datatree, sep=group_sep)
            dataset = filter_dataset(dataset, variables)
        else:
            # analysis mode
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            if datatree.has_data:
                # subgroup level, so we transform the dataset
                dataset = datatree.to_dataset()
                dataset = analysis_mode.transform_dataset(dataset)
            else:
                # product level, so we convert the tree into a dataset
                params = analysis_mode.get_applicable_params(
                    resolution=resolution,
                    spline_orders=spline_orders,
                    agg_methods=agg_methods,
                )
                dataset = analysis_mode.convert_datatree(
                    datatree, includes=variables, **params
                )

        return dataset

    def guess_can_open(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    ) -> bool:
        """Check if the given `filename_or_obj` refers to an object that
        can be opened by this backend.

        The function returns `False` to indicate that this backend should
        only be used when specified by passing `engine="eopf-zarr"`.

        Args:
            filename_or_obj: File path, or URL, or path-like string.

        Returns:
            Always `False`.
        """
        return False

`open_datatree(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, product_type=None, storage_options=None, drop_variables=None, decode_timedelta=False)`

Backend implementation delegated to by xarray.open_datatree().

Parameters:

Name	Type	Description	Default
`filename_or_obj`	`str \| PathLike[Any] \| ReadBuffer \| AbstractDataStore`	File path, or URL, a path-like string, or a Zarr store, or other key to object mapping.	required
`op_mode`	`OpMode`	Mode of operation, either "analysis" or "native". Defaults to "analysis".	`OP_MODE_ANALYSIS`
`product_type`	`str \| None`	Optional product type name, such as `"MSIL1C"`. Only used if `op_mode="analysis"`; typically not required if the filename inherent to `filename_or_obj` adheres to EOPF naming conventions.	`None`
`storage_options`	`Mapping[str, Any] \| None`	If `filename_or_obj` is a file path or URL, these options specify the source filesystem. Will be passed to `fsspec.filesystem()`.	`None`
`drop_variables`	`str \| Iterable[str] \| None`	Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.	`None`
`decode_timedelta`	`bool \| CFTimedeltaCoder \| Mapping[str, bool \| CFTimedeltaCoder] \| None`	How to decode time-delta units. See xarray documentation.	`False`

Returns:

Type	Description
`DataTree`	A new data-tree instance.

Source code in xarray_eopf\backend.py

def open_datatree(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    product_type: str | None = None,
    storage_options: Mapping[str, Any] | None = None,
    drop_variables: str | Iterable[str] | None = None,
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.DataTree:
    """Backend implementation delegated to by
    [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Args:
        filename_or_obj: File path, or URL, a path-like string, or
            a Zarr store, or other key to object mapping.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Returns:
        A new data-tree instance.
    """

    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    source = normalize_source(filename_or_obj, storage_options)
    _, subgroup_path = get_source_paths(source)

    # noinspection PyTypeChecker
    datatree = xr.open_datatree(
        source,
        engine="zarr",
        # prefer the chunking from the Zarr metadata
        chunks={},
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
        # subgroups don't have consolidated metadata
        consolidated=False if subgroup_path else None,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode, so we return tree as-is
        return datatree
    else:  # op_mode == OP_MODE_ANALYSIS
        # analysis mode
        if subgroup_path:
            # subgroup level, return subtree as-is
            return datatree
        else:
            # product level, so we transform the tree
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            return analysis_mode.transform_datatree(datatree)

`open_dataset(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, storage_options=None, group_sep='_', variables=None, product_type=None, resolution=None, spline_orders=None, agg_methods=None, drop_variables=None, decode_timedelta=False)`

Backend implementation delegated to by xarray.open_dataset().

Parameters:

Name	Type	Description	Default
`filename_or_obj`	`str \| PathLike[Any] \| ReadBuffer \| AbstractDataStore`	File path, or URL, or path-like string.	required
`op_mode`	`OpMode`	Mode of operation, either "analysis" or "native". Defaults to "analysis".	`OP_MODE_ANALYSIS`
`product_type`	`str \| None`	Optional product type name, such as `"MSIL1C"`. Only used if `op_mode="analysis"`; typically not required if the filename inherent to `filename_or_obj` adheres to EOPF naming conventions.	`None`
`storage_options`	`Mapping[str, Any] \| None`	If `filename_or_obj` is a file path or URL, these options specify the source filesystem. Will be passed to `fsspec.filesystem()`.	`None`
`group_sep`	`str`	Separator string used to concatenate groups names to create prefixes for unique variable and dimension names. Defaults to the underscore character (`"_"`)	`'_'`
`resolution`	`int \| float \| None`	Target resolution for all spatial data variables / bands. Must be one of `10`, `20`, or `60`. Only used if `op_mode="analysis"`.	`None`
`spline_orders`	`SplineOrders \| None`	Optional spline orders to be used for upsampling spatial data variables / bands. Can be a single spline order for all variables or a dictionary that maps a spline order to applicable variable names or array data types. A spline order is given by one of `0` (nearest neighbor), `1` (linear), `2` (bi-linear), or `3` (cubic). The default is `3`, except for product specific overrides. For example, the Sentinel-2 variable `scl` uses the default `0`. Only used if `op_mode="analysis"`	`None`
`agg_methods`	`AggMethods \| None`	Optional aggregation methods to be used for downsampling spatial data variables / bands. Can be a single aggregation method for all variables or a dictionary that maps an aggregation method to applicable variable names or array data types. An aggregation method is one of `"center"`, `"count"`, `"first"`, `"last"`, `"max"`, `"mean"`, `"median"`, `"mode"`, `"min"`, `"prod"`, `"std"`, `"sum"`, or `"var"`. The default is `"mean"`, except for product specific overrides. For example, the Sentinel-2 variable `scl` uses the default `"center"`. Only used if `op_mode="analysis"`	`None`
`variables`	`str \| Iterable[str] \| None`	Variables to include in the dataset. Can be a name or regex pattern or iterable of the latter.	`None`
`drop_variables`	`str \| Iterable[str] \| None`	Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.	`None`
`decode_timedelta`	`bool \| CFTimedeltaCoder \| Mapping[str, bool \| CFTimedeltaCoder] \| None`	How to decode time-delta units. See xarray documentation.	`False`

Returns:

Type	Description
`Dataset`	A new dataset instance.

Source code in xarray_eopf\backend.py

def open_dataset(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    # params for op_mode=native/analysis
    storage_options: Mapping[str, Any] | None = None,
    group_sep: str = "_",
    variables: str | Iterable[str] | None = None,
    # params for op_mode=analysis
    product_type: str | None = None,
    resolution: int | float | None = None,
    spline_orders: SplineOrders | None = None,
    agg_methods: AggMethods | None = None,
    # params required by xarray backend interface
    drop_variables: str | Iterable[str] | None = None,
    # params for other reasons
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.Dataset:
    """Backend implementation delegated to by
    [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Args:
        filename_or_obj: File path, or URL, or path-like string.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        group_sep: Separator string used to concatenate groups names
            to create prefixes for unique variable and dimension names.
            Defaults to the underscore character (`"_"`)
        resolution: Target resolution for all spatial data variables / bands.
            Must be one of `10`, `20`, or `60`.
            Only used if `op_mode="analysis"`.
        spline_orders: Optional spline orders to be used for upsampling
            spatial data variables / bands. Can be a single spline order
            for all variables or a dictionary that maps a spline order to
            applicable variable names or array data types.
            A spline order is given by one of `0` (nearest neighbor),
            `1` (linear), `2` (bi-linear), or `3` (cubic).
            The default is `3`, except for product specific overrides.
            For example, the Sentinel-2 variable `scl` uses the default `0`.
            Only used if `op_mode="analysis"`
        agg_methods: Optional aggregation methods to be used for downsampling
            spatial data variables / bands. Can be a single aggregation method
            for all variables or a dictionary that maps an aggregation method to
            applicable variable names or array data types.
            An aggregation method is one of
            `"center"`, `"count"`, `"first"`, `"last"`, `"max"`,
            `"mean"`, `"median"`, `"mode"`, `"min"`, `"prod"`,
            `"std"`, `"sum"`, or `"var"`.
            The default is `"mean"`, except for product specific overrides.
            For example, the Sentinel-2 variable `scl` uses the default `"center"`.
            Only used if `op_mode="analysis"`
        variables: Variables to include in the dataset. Can be a name or
            regex pattern or iterable of the latter.
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Returns:
        A new dataset instance.
    """
    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    datatree = self.open_datatree(
        filename_or_obj,
        op_mode="native",
        storage_options=storage_options,
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode
        if datatree.has_data:
            # subgroup level, so we return dataset as-is
            dataset = datatree.to_dataset()
        else:
            # product level, so we flatten the tree
            dataset = flatten_datatree(datatree, sep=group_sep)
        dataset = filter_dataset(dataset, variables)
    else:
        # analysis mode
        analysis_mode = AnalysisMode.guess(
            filename_or_obj, product_type=product_type
        )
        if datatree.has_data:
            # subgroup level, so we transform the dataset
            dataset = datatree.to_dataset()
            dataset = analysis_mode.transform_dataset(dataset)
        else:
            # product level, so we convert the tree into a dataset
            params = analysis_mode.get_applicable_params(
                resolution=resolution,
                spline_orders=spline_orders,
                agg_methods=agg_methods,
            )
            dataset = analysis_mode.convert_datatree(
                datatree, includes=variables, **params
            )

    return dataset

`guess_can_open(filename_or_obj)`

Check if the given filename_or_obj refers to an object that can be opened by this backend.

The function returns False to indicate that this backend should only be used when specified by passing engine="eopf-zarr".

Parameters:

Name	Type	Description	Default
`filename_or_obj`	`str \| PathLike[Any] \| ReadBuffer \| AbstractDataStore`	File path, or URL, or path-like string.	required

Returns:

Type	Description
`bool`	Always `False`.

Source code in xarray_eopf\backend.py

def guess_can_open(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
) -> bool:
    """Check if the given `filename_or_obj` refers to an object that
    can be opened by this backend.

    The function returns `False` to indicate that this backend should
    only be used when specified by passing `engine="eopf-zarr"`.

    Args:
        filename_or_obj: File path, or URL, or path-like string.

    Returns:
        Always `False`.
    """
    return False