Skip to content

Python API

xarray_eopf.backend.EopfBackend

Bases: BackendEntrypoint

Backend for EOPF Data Products using the Zarr format.

Note, that the chunks parameter passed to xarray top level functions xr.open_datatree() and xr.open_dataset() is not passed to backend. Instead, xarray uses them to (re)chunk the results from calling the backend equivalents, hence, after backend code.

Source code in xarray_eopf/backend.py
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
class EopfBackend(BackendEntrypoint):
    """Backend for EOPF Data Products using the Zarr format.

    Note, that the `chunks` parameter passed to xarray top level functions
    `xr.open_datatree()` and `xr.open_dataset()` is _not_ passed to
    backend. Instead, xarray uses them to (re)chunk the results
    from calling the backend equivalents, hence, _after_ backend code.
    """

    def open_datatree(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        product_type: str | None = None,
        protocol: str | None = None,
        storage_options: Mapping[str, Any] | None = None,
        drop_variables: str | Iterable[str] | None = None,
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.DataTree:
        """Backend implementation delegated to by
        [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Args:
            filename_or_obj: File path, or URL, a path-like string, or
                a Zarr store, or other key to object mapping.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            protocol: If `filename_or_obj` is a file path or URL,
                it forces using the specified filesystem protocol.
                Otherwise, the protocol will be derived from the file path or URL.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

        Returns:
            A new data-tree instance.
        """

        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        fs_store = open_store(filename_or_obj, protocol, storage_options)

        datatree = xr.open_datatree(
            fs_store,
            engine="zarr",
            # prefer the chunking from the Zarr metadata
            chunks={},
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            return datatree
        else:  # op_mode == OP_MODE_ANALYSIS
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            return analysis_mode.transform_datatree(datatree)

    def open_dataset(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
        *,
        op_mode: OpMode = OP_MODE_ANALYSIS,
        # params for op_mode=native/analysis
        protocol: str | None = None,
        storage_options: Mapping[str, Any] | None = None,
        group_sep: str = "_",
        variables: str | Iterable[str] | None = None,
        # params for op_mode=analysis
        product_type: str | None = None,
        resolution: int | float | None = None,
        spline_order: int | None = None,
        # params required by xarray backend interface
        drop_variables: str | Iterable[str] | None = None,
        # params for other reasons
        decode_timedelta: (
            bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
        ) = False,
    ) -> xr.Dataset:
        """Backend implementation delegated to by
        [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Args:
            filename_or_obj: File path, or URL, or path-like string.
            op_mode: Mode of operation, either "analysis" or "native".
                Defaults to "analysis".
            product_type: Optional product type name, such as `"MSIL1C"`.
                Only used if `op_mode="analysis"`; typically not required
                if the filename inherent to `filename_or_obj`
                adheres to EOPF naming conventions.
            protocol: If `filename_or_obj` is a file path or URL,
                it forces using the specified filesystem protocol.
                Otherwise, the protocol will be derived from the file path or URL.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            storage_options: If `filename_or_obj` is a file path or URL,
                these options specify the source filesystem.
                Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
            group_sep: Separator string used to concatenate groups names
                to create prefixes for unique variable and dimension names.
                Defaults to the underscore character (`"_"`)
            resolution: Target resolution for all spatial data variables / bands.
                Must be one of `10`, `20`, or `60`.
                Only used if `op_mode="analysis"`.
            spline_order: Spline order to be used for resampling
                spatial data variables / bands.
                Must be one of `0` (nearest neighbor), `1` (linear),
                `2` (bi-linear), or `3` (cubic).
                Only used if `op_mode="analysis"`
            variables: Variables to include in the dataset. Can be a name or
                regex pattern or iterable of the latter.
            drop_variables: Variable name or iterable of variable names
                to drop from the underlying file. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
            decode_timedelta: How to decode time-delta units. See
                [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

        Returns:
            A new dataset instance.
        """
        assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

        datatree = self.open_datatree(
            filename_or_obj,
            op_mode="native",
            protocol=protocol,
            storage_options=storage_options,
            # here as it is required for all backends
            drop_variables=drop_variables,
            # here to silence xarray warnings
            decode_timedelta=decode_timedelta,
        )

        _assert_datatree_is_chunked(datatree)

        if op_mode == OP_MODE_NATIVE:
            dataset = flatten_datatree(datatree, sep=group_sep)
            dataset = filter_dataset(dataset, variables)
        else:  # op_mode == OP_MODE_ANALYSIS
            analysis_mode = AnalysisMode.guess(
                filename_or_obj, product_type=product_type
            )
            params = analysis_mode.get_applicable_params(
                resolution=resolution, spline_order=spline_order
            )
            dataset = analysis_mode.convert_datatree(
                datatree, includes=variables, **params
            )

        return dataset

    def guess_can_open(
        self,
        filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    ) -> bool:
        """Check if the given `filename_or_obj` refers to an object that
        can be opened by this backend.

        The function returns `False` to indicate that this backend should
        only be used when specified by passing `engine="eopf-zarr"`.

        Args:
            filename_or_obj: File path, or URL, or path-like string.

        Returns:
            Always `False`.
        """
        return False

open_datatree(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, product_type=None, protocol=None, storage_options=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_datatree().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, a path-like string, or a Zarr store, or other key to object mapping.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
protocol str | None

If filename_or_obj is a file path or URL, it forces using the specified filesystem protocol. Otherwise, the protocol will be derived from the file path or URL. Will be passed to fsspec.filesystem().

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
DataTree

A new data-tree instance.

Source code in xarray_eopf/backend.py
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
def open_datatree(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    product_type: str | None = None,
    protocol: str | None = None,
    storage_options: Mapping[str, Any] | None = None,
    drop_variables: str | Iterable[str] | None = None,
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.DataTree:
    """Backend implementation delegated to by
    [`xarray.open_datatree()`](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Args:
        filename_or_obj: File path, or URL, a path-like string, or
            a Zarr store, or other key to object mapping.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        protocol: If `filename_or_obj` is a file path or URL,
            it forces using the specified filesystem protocol.
            Otherwise, the protocol will be derived from the file path or URL.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_datatree.html).

    Returns:
        A new data-tree instance.
    """

    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    fs_store = open_store(filename_or_obj, protocol, storage_options)

    datatree = xr.open_datatree(
        fs_store,
        engine="zarr",
        # prefer the chunking from the Zarr metadata
        chunks={},
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        return datatree
    else:  # op_mode == OP_MODE_ANALYSIS
        analysis_mode = AnalysisMode.guess(
            filename_or_obj, product_type=product_type
        )
        return analysis_mode.transform_datatree(datatree)

open_dataset(filename_or_obj, *, op_mode=OP_MODE_ANALYSIS, protocol=None, storage_options=None, group_sep='_', variables=None, product_type=None, resolution=None, spline_order=None, drop_variables=None, decode_timedelta=False)

Backend implementation delegated to by xarray.open_dataset().

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required
op_mode OpMode

Mode of operation, either "analysis" or "native". Defaults to "analysis".

OP_MODE_ANALYSIS
product_type str | None

Optional product type name, such as "MSIL1C". Only used if op_mode="analysis"; typically not required if the filename inherent to filename_or_obj adheres to EOPF naming conventions.

None
protocol str | None

If filename_or_obj is a file path or URL, it forces using the specified filesystem protocol. Otherwise, the protocol will be derived from the file path or URL. Will be passed to fsspec.filesystem().

None
storage_options Mapping[str, Any] | None

If filename_or_obj is a file path or URL, these options specify the source filesystem. Will be passed to fsspec.filesystem().

None
group_sep str

Separator string used to concatenate groups names to create prefixes for unique variable and dimension names. Defaults to the underscore character ("_")

'_'
resolution int | float | None

Target resolution for all spatial data variables / bands. Must be one of 10, 20, or 60. Only used if op_mode="analysis".

None
spline_order int | None

Spline order to be used for resampling spatial data variables / bands. Must be one of 0 (nearest neighbor), 1 (linear), 2 (bi-linear), or 3 (cubic). Only used if op_mode="analysis"

None
variables str | Iterable[str] | None

Variables to include in the dataset. Can be a name or regex pattern or iterable of the latter.

None
drop_variables str | Iterable[str] | None

Variable name or iterable of variable names to drop from the underlying file. See xarray documentation.

None
decode_timedelta bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None

How to decode time-delta units. See xarray documentation.

False

Returns:

Type Description
Dataset

A new dataset instance.

Source code in xarray_eopf/backend.py
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
def open_dataset(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
    *,
    op_mode: OpMode = OP_MODE_ANALYSIS,
    # params for op_mode=native/analysis
    protocol: str | None = None,
    storage_options: Mapping[str, Any] | None = None,
    group_sep: str = "_",
    variables: str | Iterable[str] | None = None,
    # params for op_mode=analysis
    product_type: str | None = None,
    resolution: int | float | None = None,
    spline_order: int | None = None,
    # params required by xarray backend interface
    drop_variables: str | Iterable[str] | None = None,
    # params for other reasons
    decode_timedelta: (
        bool | CFTimedeltaCoder | Mapping[str, bool | CFTimedeltaCoder] | None
    ) = False,
) -> xr.Dataset:
    """Backend implementation delegated to by
    [`xarray.open_dataset()`](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Args:
        filename_or_obj: File path, or URL, or path-like string.
        op_mode: Mode of operation, either "analysis" or "native".
            Defaults to "analysis".
        product_type: Optional product type name, such as `"MSIL1C"`.
            Only used if `op_mode="analysis"`; typically not required
            if the filename inherent to `filename_or_obj`
            adheres to EOPF naming conventions.
        protocol: If `filename_or_obj` is a file path or URL,
            it forces using the specified filesystem protocol.
            Otherwise, the protocol will be derived from the file path or URL.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        storage_options: If `filename_or_obj` is a file path or URL,
            these options specify the source filesystem.
            Will be passed to [`fsspec.filesystem()`](https://filesystem-spec.readthedocs.io/en/latest/usage.html).
        group_sep: Separator string used to concatenate groups names
            to create prefixes for unique variable and dimension names.
            Defaults to the underscore character (`"_"`)
        resolution: Target resolution for all spatial data variables / bands.
            Must be one of `10`, `20`, or `60`.
            Only used if `op_mode="analysis"`.
        spline_order: Spline order to be used for resampling
            spatial data variables / bands.
            Must be one of `0` (nearest neighbor), `1` (linear),
            `2` (bi-linear), or `3` (cubic).
            Only used if `op_mode="analysis"`
        variables: Variables to include in the dataset. Can be a name or
            regex pattern or iterable of the latter.
        drop_variables: Variable name or iterable of variable names
            to drop from the underlying file. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).
        decode_timedelta: How to decode time-delta units. See
            [xarray documentation](https://docs.xarray.dev/en/stable/generated/xarray.open_dataset.html).

    Returns:
        A new dataset instance.
    """
    assert_arg_is_one_of(op_mode, "op_mode", OP_MODES)

    datatree = self.open_datatree(
        filename_or_obj,
        op_mode="native",
        protocol=protocol,
        storage_options=storage_options,
        # here as it is required for all backends
        drop_variables=drop_variables,
        # here to silence xarray warnings
        decode_timedelta=decode_timedelta,
    )

    _assert_datatree_is_chunked(datatree)

    if op_mode == OP_MODE_NATIVE:
        dataset = flatten_datatree(datatree, sep=group_sep)
        dataset = filter_dataset(dataset, variables)
    else:  # op_mode == OP_MODE_ANALYSIS
        analysis_mode = AnalysisMode.guess(
            filename_or_obj, product_type=product_type
        )
        params = analysis_mode.get_applicable_params(
            resolution=resolution, spline_order=spline_order
        )
        dataset = analysis_mode.convert_datatree(
            datatree, includes=variables, **params
        )

    return dataset

guess_can_open(filename_or_obj)

Check if the given filename_or_obj refers to an object that can be opened by this backend.

The function returns False to indicate that this backend should only be used when specified by passing engine="eopf-zarr".

Parameters:

Name Type Description Default
filename_or_obj str | PathLike[Any] | ReadBuffer | AbstractDataStore

File path, or URL, or path-like string.

required

Returns:

Type Description
bool

Always False.

Source code in xarray_eopf/backend.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
def guess_can_open(
    self,
    filename_or_obj: str | os.PathLike[Any] | ReadBuffer | AbstractDataStore,
) -> bool:
    """Check if the given `filename_or_obj` refers to an object that
    can be opened by this backend.

    The function returns `False` to indicate that this backend should
    only be used when specified by passing `engine="eopf-zarr"`.

    Args:
        filename_or_obj: File path, or URL, or path-like string.

    Returns:
        Always `False`.
    """
    return False