Skip to content

Python API

xarray_eopf.backend.EopfBackend

Bases: BackendEntrypoint

Backend for EOPF Data Products using the Zarr format.

Note, that the chunks parameter passed to xarray top level functions xr.open_datatree() and xr.open_dataset() is not passed to backend. Instead, xarray uses them to (re)chunk the results from calling the backend equivalents, hence, after backend code.

Source code in xarray_eopf/backend.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
class EopfBackend(BackendEntrypoint):
    """Backend for EOPF Data Products using the Zarr format.

    Note, that the `chunks` parameter passed to xarray top level functions
    `xr.open_datatree()` and `xr.open_dataset()` is _not_ passed to
    backend. Instead, xarray uses them to (re)chunk the results
    from calling the backend equivalents, hence, _after_ backend code.
    """

    def open_datatree(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        product_type: str | None = None,
        storage_options: Mapping[str, Any] | None = None,
        drop_variables: str | Iterable[str] | None = None,
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.DataTree:
        """Backend implementation delegated to by
        [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Args:
            filename_or_obj: File path, or URL, a path-like string, or
                a Zarr store, or other key to object mapping.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Returns:
            A new data-tree instance.
        """
        # Disable attribute expansion for cleaner, more concise rendering in notebooks
        xr.set_options(display_expand_attrs=False)

        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        filename_or_obj, subgroup_path = normalize_source_path(filename_or_obj)
        source = normalize_source(filename_or_obj, storage_options)

        # noinspection PyTypeChecker
        datatree = xr.open_datatree(
            source,
            engine="zarr",
            group=subgroup_path,
            # prefer the chunking from the Zarr metadata
            chunks={},
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode, so we return tree as-is
            return datatree
        else:  # op_mode == OP_MODE_ANALYSIS
            # analysis mode
            if subgroup_path:
                # subgroup level, return subtree as-is
                return datatree
            else:
                # product level, so we transform the tree
                analysis_mode = AnalysisMode.guess(
                    filename_or_obj, product_type=product_type
                )
                return analysis_mode.transform_datatree(datatree)

    def open_dataset(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        # params for op_mode=native/analysis
        storage_options: Mapping[str, Any] | None = None,
        group_sep: str = "_",
        variables: str | Iterable[str] | None = None,
        # params for op_mode=analysis
        product_type: str | None = None,
        resolution: int | float | None = None,
        interp_methods: SpatialInterpMethods | None = None,
        agg_methods: SpatialAggMethods | None = None,
        # params required by xarray backend interface
        drop_variables: str | Iterable[str] | None = None,
        # params for other reasons
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.Dataset:
        """Backend implementation delegated to by
        [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Args:
            filename_or_obj: File path, or URL, or path-like string.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            group_sep: Separator string used to concatenate groups names
                to create prefixes for unique variable and dimension names.
                Defaults to the underscore character (`"_"`)
            resolution: Target resolution for all spatial
                data variables / bands. For Sentinel-2 products it be one of
                `10`, `20`, or `60`. Only used if `op_mode="analysis"`.
            interp_methods: Optional interpolation method to be used if
                `op_mode="analysis"`, for upsampling / interpolating
                spatial data variables. Can be a single interpolation method for all
                variables or a dictionary mapping variable names or dtypes to
                interpolation method. Supported methods include:

                - `0` (nearest neighbor)
                - `1` (linear / bilinear)
                - `"nearest"`
                - `"triangular"`
                - `"bilinear"`

                The default is `0` for integer arrays (e.g. Sentinel-2 L2A SCL),
                else `1`.
            agg_methods: Optional aggregation methods to be used if
                `op_mode="analysis"`, for downsampling spatial variables.
                Can be a single method for all variables or a dictionary mapping
                variable names or dtypes to methods. Supported methods include:
                    "center", "count", "first", "last", "max", "mean", "median",
                    "mode", "min", "prod", "std", "sum", and "var".
                Defaults to "center" for integer arrays (e.g. Sentinel-2 L2A SCL),
                else "mean".
            variables: Variables to include in the dataset. Can be a name or
                regex pattern or iterable of the latter.
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Returns:
            A new dataset instance.
        """
        # Disable attribute expansion for cleaner, more concise rendering in notebooks
        xr.set_options(display_expand_attrs=False)

        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        datatree = self.open_datatree(
            filename_or_obj,
            op_mode="native",
            storage_options=storage_options,
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode
            if datatree.has_data:
                # subgroup level, so we return dataset as-is
                dataset = datatree.to_dataset()
            else:
                # product level, so we flatten the tree
                dataset = flatten_datatree(datatree, sep=group_sep)
            dataset = filter_dataset(dataset, variables)
        else:
            # analysis mode
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            if datatree.has_data:
                # subgroup level, so we transform the dataset
                dataset = datatree.to_dataset()
                dataset = analysis_mode.transform_dataset(dataset)
            else:
                # product level, so we convert the tree into a dataset
                params = analysis_mode.get_applicable_params(
                    resolution=resolution,
                    interp_methods=interp_methods,
                    agg_methods=agg_methods,
                )
                dataset = analysis_mode.convert_datatree(
                    datatree, includes=variables, **params
                )
                # add preferred chunking to encoding
                dataset = add_chunking_encoding(dataset)

        return dataset

    def guess_can_open(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    ) -> bool:
        """Check if the given `filename_or_obj` refers to an object that
        can be opened by this backend.

        The function returns `False` to indicate that this backend should
        only be used when specified by passing `engine="eopf-zarr"`.

        Args:
            filename_or_obj: File path, or URL, or path-like string.

        Returns:
            Always `False`.
        """
        return False

open_datatree(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, product_type=None, storage_options=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_datatree().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, a path-like string, or a Zarr store, or other key to object mapping.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
DataTree

A new data-tree instance.

Source code in xarray_eopf/backend.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
def open_datatree(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    product_type: str | None = None,
    storage_options: Mapping[str, Any] | None = None,
    drop_variables: str | Iterable[str] | None = None,
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.DataTree:
    """Backend implementation delegated to by
    [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Args:
        filename_or_obj: File path, or URL, a path-like string, or
            a Zarr store, or other key to object mapping.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Returns:
        A new data-tree instance.
    """
    # Disable attribute expansion for cleaner, more concise rendering in notebooks
    xr.set_options(display_expand_attrs=False)

    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    filename_or_obj, subgroup_path = normalize_source_path(filename_or_obj)
    source = normalize_source(filename_or_obj, storage_options)

    # noinspection PyTypeChecker
    datatree = xr.open_datatree(
        source,
        engine="zarr",
        group=subgroup_path,
        # prefer the chunking from the Zarr metadata
        chunks={},
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode, so we return tree as-is
        return datatree
    else:  # op_mode == OP_MODE_ANALYSIS
        # analysis mode
        if subgroup_path:
            # subgroup level, return subtree as-is
            return datatree
        else:
            # product level, so we transform the tree
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            return analysis_mode.transform_datatree(datatree)

open_dataset(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, storage_options=None, group_sep='_', variables=None, product_type=None, resolution=None, interp_methods=None, agg_methods=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_dataset().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
group_sep str

Separator string used to concatenate groups names to create prefixes for unique variable and dimension names. Defaults to the underscore character ("_")

'_'
resolution int | float | None

Target resolution for all spatial data variables / bands. For Sentinel-2 products it be one of 10, 20, or 60. Only used if op_mode="analysis".

None
interp_methods SpatialInterpMethods | None

Optional interpolation method to be used if op_mode="analysis", for upsampling / interpolating spatial data variables. Can be a single interpolation method for all variables or a dictionary mapping variable names or dtypes to interpolation method. Supported methods include:

  • 0 (nearest neighbor)
  • 1 (linear / bilinear)
  • "nearest"
  • "triangular"
  • "bilinear"

The default is 0 for integer arrays (e.g. Sentinel-2 L2A SCL), else 1.

None
agg_methods SpatialAggMethods | None

Optional aggregation methods to be used if op_mode="analysis", for downsampling spatial variables. Can be a single method for all variables or a dictionary mapping variable names or dtypes to methods. Supported methods include: "center", "count", "first", "last", "max", "mean", "median", "mode", "min", "prod", "std", "sum", and "var". Defaults to "center" for integer arrays (e.g. Sentinel-2 L2A SCL), else "mean".

None
variables str | Iterable[str] | None

Variables to include in the dataset. Can be a name or regex pattern or iterable of the latter.

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
Dataset

A new dataset instance.

Source code in xarray_eopf/backend.py
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
def open_dataset(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    # params for op_mode=native/analysis
    storage_options: Mapping[str, Any] | None = None,
    group_sep: str = "_",
    variables: str | Iterable[str] | None = None,
    # params for op_mode=analysis
    product_type: str | None = None,
    resolution: int | float | None = None,
    interp_methods: SpatialInterpMethods | None = None,
    agg_methods: SpatialAggMethods | None = None,
    # params required by xarray backend interface
    drop_variables: str | Iterable[str] | None = None,
    # params for other reasons
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.Dataset:
    """Backend implementation delegated to by
    [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Args:
        filename_or_obj: File path, or URL, or path-like string.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        group_sep: Separator string used to concatenate groups names
            to create prefixes for unique variable and dimension names.
            Defaults to the underscore character (`"_"`)
        resolution: Target resolution for all spatial
            data variables / bands. For Sentinel-2 products it be one of
            `10`, `20`, or `60`. Only used if `op_mode="analysis"`.
        interp_methods: Optional interpolation method to be used if
            `op_mode="analysis"`, for upsampling / interpolating
            spatial data variables. Can be a single interpolation method for all
            variables or a dictionary mapping variable names or dtypes to
            interpolation method. Supported methods include:

            - `0` (nearest neighbor)
            - `1` (linear / bilinear)
            - `"nearest"`
            - `"triangular"`
            - `"bilinear"`

            The default is `0` for integer arrays (e.g. Sentinel-2 L2A SCL),
            else `1`.
        agg_methods: Optional aggregation methods to be used if
            `op_mode="analysis"`, for downsampling spatial variables.
            Can be a single method for all variables or a dictionary mapping
            variable names or dtypes to methods. Supported methods include:
                "center", "count", "first", "last", "max", "mean", "median",
                "mode", "min", "prod", "std", "sum", and "var".
            Defaults to "center" for integer arrays (e.g. Sentinel-2 L2A SCL),
            else "mean".
        variables: Variables to include in the dataset. Can be a name or
            regex pattern or iterable of the latter.
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Returns:
        A new dataset instance.
    """
    # Disable attribute expansion for cleaner, more concise rendering in notebooks
    xr.set_options(display_expand_attrs=False)

    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    datatree = self.open_datatree(
        filename_or_obj,
        op_mode="native",
        storage_options=storage_options,
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode
        if datatree.has_data:
            # subgroup level, so we return dataset as-is
            dataset = datatree.to_dataset()
        else:
            # product level, so we flatten the tree
            dataset = flatten_datatree(datatree, sep=group_sep)
        dataset = filter_dataset(dataset, variables)
    else:
        # analysis mode
        analysis_mode = AnalysisMode.guess(
            filename_or_obj, product_type=product_type
        )
        if datatree.has_data:
            # subgroup level, so we transform the dataset
            dataset = datatree.to_dataset()
            dataset = analysis_mode.transform_dataset(dataset)
        else:
            # product level, so we convert the tree into a dataset
            params = analysis_mode.get_applicable_params(
                resolution=resolution,
                interp_methods=interp_methods,
                agg_methods=agg_methods,
            )
            dataset = analysis_mode.convert_datatree(
                datatree, includes=variables, **params
            )
            # add preferred chunking to encoding
            dataset = add_chunking_encoding(dataset)

    return dataset

guess_can_open(filename_or_obj)

Check if the given filename_or_obj refers to an object that can be opened by this backend.

The function returns False to indicate that this backend should only be used when specified by passing engine="eopf-zarr".

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required

Returns:

Type Description
bool

Always False.

Source code in xarray_eopf/backend.py
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
def guess_can_open(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
) -> bool:
    """Check if the given `filename_or_obj` refers to an object that
    can be opened by this backend.

    The function returns `False` to indicate that this backend should
    only be used when specified by passing `engine="eopf-zarr"`.

    Args:
        filename_or_obj: File path, or URL, or path-like string.

    Returns:
        Always `False`.
    """
    return False