Skip to content

Python API

xarray_eopf.backend.EopfBackend

Bases: BackendEntrypoint

Backend for EOPF Data Products using the Zarr format.

Note, that the chunks parameter passed to xarray top level functions xr.open_datatree() and xr.open_dataset() is not passed to backend. Instead, xarray uses them to (re)chunk the results from calling the backend equivalents, hence, after backend code.

Source code in xarray_eopf\backend.py
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
class EopfBackend(BackendEntrypoint):
    """Backend for EOPF Data Products using the Zarr format.

    Note, that the `chunks` parameter passed to xarray top level functions
    `xr.open_datatree()` and `xr.open_dataset()` is _not_ passed to
    backend. Instead, xarray uses them to (re)chunk the results
    from calling the backend equivalents, hence, _after_ backend code.
    """

    def open_datatree(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        product_type: str | None = None,
        storage_options: Mapping[str, Any] | None = None,
        drop_variables: str | Iterable[str] | None = None,
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.DataTree:
        """Backend implementation delegated to by
        [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Args:
            filename_or_obj: File path, or URL, a path-like string, or
                a Zarr store, or other key to object mapping.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Returns:
            A new data-tree instance.
        """

        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        source = normalize_source(filename_or_obj, storage_options)
        _, subgroup_path = get_source_paths(source)

        # noinspection PyTypeChecker
        datatree = xr.open_datatree(
            source,
            engine="zarr",
            # prefer the chunking from the Zarr metadata
            chunks={},
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
            # subgroups don't have consolidated metadata
            consolidated=False if subgroup_path else None,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode, so we return tree as-is
            return datatree
        else:  # op_mode == OP_MODE_ANALYSIS
            # analysis mode
            if subgroup_path:
                # subgroup level, return subtree as-is
                return datatree
            else:
                # product level, so we transform the tree
                analysis_mode = AnalysisMode.guess(
                    filename_or_obj, product_type=product_type
                )
                return analysis_mode.transform_datatree(datatree)

    def open_dataset(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        # params for op_mode=native/analysis
        storage_options: Mapping[str, Any] | None = None,
        group_sep: str = "_",
        variables: str | Iterable[str] | None = None,
        # params for op_mode=analysis
        product_type: str | None = None,
        resolution: int | float | None = None,
        spline_orders: SplineOrders | None = None,
        agg_methods: AggMethods | None = None,
        # params required by xarray backend interface
        drop_variables: str | Iterable[str] | None = None,
        # params for other reasons
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.Dataset:
        """Backend implementation delegated to by
        [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Args:
            filename_or_obj: File path, or URL, or path-like string.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            group_sep: Separator string used to concatenate groups names
                to create prefixes for unique variable and dimension names.
                Defaults to the underscore character (`"_"`)
            resolution: Target resolution for all spatial data variables / bands.
                Must be one of `10`, `20`, or `60`.
                Only used if `op_mode="analysis"`.
            spline_orders: Optional spline orders to be used for upsampling
                spatial data variables / bands. Can be a single spline order
                for all variables or a dictionary that maps a spline order to
                applicable variable names or array data types.
                A spline order is given by one of `0` (nearest neighbor),
                `1` (linear), `2` (bi-linear), or `3` (cubic).
                The default is `3`, except for product specific overrides.
                For example, the Sentinel-2 variable `scl` uses the default `0`.
                Only used if `op_mode="analysis"`
            agg_methods: Optional aggregation methods to be used for downsampling
                spatial data variables / bands. Can be a single aggregation method
                for all variables or a dictionary that maps an aggregation method to
                applicable variable names or array data types.
                An aggregation method is one of
                `"center"`, `"count"`, `"first"`, `"last"`, `"max"`,
                `"mean"`, `"median"`, `"mode"`, `"min"`, `"prod"`,
                `"std"`, `"sum"`, or `"var"`.
                The default is `"mean"`, except for product specific overrides.
                For example, the Sentinel-2 variable `scl` uses the default `"center"`.
                Only used if `op_mode="analysis"`
            variables: Variables to include in the dataset. Can be a name or
                regex pattern or iterable of the latter.
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Returns:
            A new dataset instance.
        """
        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        datatree = self.open_datatree(
            filename_or_obj,
            op_mode="native",
            storage_options=storage_options,
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            # native mode
            if datatree.has_data:
                # subgroup level, so we return dataset as-is
                dataset = datatree.to_dataset()
            else:
                # product level, so we flatten the tree
                dataset = flatten_datatree(datatree, sep=group_sep)
            dataset = filter_dataset(dataset, variables)
        else:
            # analysis mode
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            if datatree.has_data:
                # subgroup level, so we transform the dataset
                dataset = datatree.to_dataset()
                dataset = analysis_mode.transform_dataset(dataset)
            else:
                # product level, so we convert the tree into a dataset
                params = analysis_mode.get_applicable_params(
                    resolution=resolution,
                    spline_orders=spline_orders,
                    agg_methods=agg_methods,
                )
                dataset = analysis_mode.convert_datatree(
                    datatree, includes=variables, **params
                )

        return dataset

    def guess_can_open(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    ) -> bool:
        """Check if the given `filename_or_obj` refers to an object that
        can be opened by this backend.

        The function returns `False` to indicate that this backend should
        only be used when specified by passing `engine="eopf-zarr"`.

        Args:
            filename_or_obj: File path, or URL, or path-like string.

        Returns:
            Always `False`.
        """
        return False

open_datatree(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, product_type=None, storage_options=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_datatree().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, a path-like string, or a Zarr store, or other key to object mapping.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
DataTree

A new data-tree instance.

Source code in xarray_eopf\backend.py
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
def open_datatree(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    product_type: str | None = None,
    storage_options: Mapping[str, Any] | None = None,
    drop_variables: str | Iterable[str] | None = None,
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.DataTree:
    """Backend implementation delegated to by
    [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Args:
        filename_or_obj: File path, or URL, a path-like string, or
            a Zarr store, or other key to object mapping.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Returns:
        A new data-tree instance.
    """

    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    source = normalize_source(filename_or_obj, storage_options)
    _, subgroup_path = get_source_paths(source)

    # noinspection PyTypeChecker
    datatree = xr.open_datatree(
        source,
        engine="zarr",
        # prefer the chunking from the Zarr metadata
        chunks={},
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
        # subgroups don't have consolidated metadata
        consolidated=False if subgroup_path else None,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode, so we return tree as-is
        return datatree
    else:  # op_mode == OP_MODE_ANALYSIS
        # analysis mode
        if subgroup_path:
            # subgroup level, return subtree as-is
            return datatree
        else:
            # product level, so we transform the tree
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            return analysis_mode.transform_datatree(datatree)

open_dataset(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, storage_options=None, group_sep='_', variables=None, product_type=None, resolution=None, spline_orders=None, agg_methods=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_dataset().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
group_sep str

Separator string used to concatenate groups names to create prefixes for unique variable and dimension names. Defaults to the underscore character ("_")

'_'
resolution int | float | None

Target resolution for all spatial data variables / bands. Must be one of 10, 20, or 60. Only used if op_mode="analysis".

None
spline_orders SplineOrders | None

Optional spline orders to be used for upsampling spatial data variables / bands. Can be a single spline order for all variables or a dictionary that maps a spline order to applicable variable names or array data types. A spline order is given by one of 0 (nearest neighbor), 1 (linear), 2 (bi-linear), or 3 (cubic). The default is 3, except for product specific overrides. For example, the Sentinel-2 variable scl uses the default 0. Only used if op_mode="analysis"

None
agg_methods AggMethods | None

Optional aggregation methods to be used for downsampling spatial data variables / bands. Can be a single aggregation method for all variables or a dictionary that maps an aggregation method to applicable variable names or array data types. An aggregation method is one of "center", "count", "first", "last", "max", "mean", "median", "mode", "min", "prod", "std", "sum", or "var". The default is "mean", except for product specific overrides. For example, the Sentinel-2 variable scl uses the default "center". Only used if op_mode="analysis"

None
variables str | Iterable[str] | None

Variables to include in the dataset. Can be a name or regex pattern or iterable of the latter.

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
Dataset

A new dataset instance.

Source code in xarray_eopf\backend.py
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
def open_dataset(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    # params for op_mode=native/analysis
    storage_options: Mapping[str, Any] | None = None,
    group_sep: str = "_",
    variables: str | Iterable[str] | None = None,
    # params for op_mode=analysis
    product_type: str | None = None,
    resolution: int | float | None = None,
    spline_orders: SplineOrders | None = None,
    agg_methods: AggMethods | None = None,
    # params required by xarray backend interface
    drop_variables: str | Iterable[str] | None = None,
    # params for other reasons
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.Dataset:
    """Backend implementation delegated to by
    [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Args:
        filename_or_obj: File path, or URL, or path-like string.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        group_sep: Separator string used to concatenate groups names
            to create prefixes for unique variable and dimension names.
            Defaults to the underscore character (`"_"`)
        resolution: Target resolution for all spatial data variables / bands.
            Must be one of `10`, `20`, or `60`.
            Only used if `op_mode="analysis"`.
        spline_orders: Optional spline orders to be used for upsampling
            spatial data variables / bands. Can be a single spline order
            for all variables or a dictionary that maps a spline order to
            applicable variable names or array data types.
            A spline order is given by one of `0` (nearest neighbor),
            `1` (linear), `2` (bi-linear), or `3` (cubic).
            The default is `3`, except for product specific overrides.
            For example, the Sentinel-2 variable `scl` uses the default `0`.
            Only used if `op_mode="analysis"`
        agg_methods: Optional aggregation methods to be used for downsampling
            spatial data variables / bands. Can be a single aggregation method
            for all variables or a dictionary that maps an aggregation method to
            applicable variable names or array data types.
            An aggregation method is one of
            `"center"`, `"count"`, `"first"`, `"last"`, `"max"`,
            `"mean"`, `"median"`, `"mode"`, `"min"`, `"prod"`,
            `"std"`, `"sum"`, or `"var"`.
            The default is `"mean"`, except for product specific overrides.
            For example, the Sentinel-2 variable `scl` uses the default `"center"`.
            Only used if `op_mode="analysis"`
        variables: Variables to include in the dataset. Can be a name or
            regex pattern or iterable of the latter.
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Returns:
        A new dataset instance.
    """
    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    datatree = self.open_datatree(
        filename_or_obj,
        op_mode="native",
        storage_options=storage_options,
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        # native mode
        if datatree.has_data:
            # subgroup level, so we return dataset as-is
            dataset = datatree.to_dataset()
        else:
            # product level, so we flatten the tree
            dataset = flatten_datatree(datatree, sep=group_sep)
        dataset = filter_dataset(dataset, variables)
    else:
        # analysis mode
        analysis_mode = AnalysisMode.guess(
            filename_or_obj, product_type=product_type
        )
        if datatree.has_data:
            # subgroup level, so we transform the dataset
            dataset = datatree.to_dataset()
            dataset = analysis_mode.transform_dataset(dataset)
        else:
            # product level, so we convert the tree into a dataset
            params = analysis_mode.get_applicable_params(
                resolution=resolution,
                spline_orders=spline_orders,
                agg_methods=agg_methods,
            )
            dataset = analysis_mode.convert_datatree(
                datatree, includes=variables, **params
            )

    return dataset

guess_can_open(filename_or_obj)

Check if the given filename_or_obj refers to an object that can be opened by this backend.

The function returns False to indicate that this backend should only be used when specified by passing engine="eopf-zarr".

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required

Returns:

Type Description
bool

Always False.

Source code in xarray_eopf\backend.py
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
def guess_can_open(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
) -> bool:
    """Check if the given `filename_or_obj` refers to an object that
    can be opened by this backend.

    The function returns `False` to indicate that this backend should
    only be used when specified by passing `engine="eopf-zarr"`.

    Args:
        filename_or_obj: File path, or URL, or path-like string.

    Returns:
        Always `False`.
    """
    return False