eorst/rasterdataset/
builder.rs

1//! RasterDatasetBuilder for creating and configuring raster datasets.
2//!
3//! This module provides the builder pattern for constructing [RasterDataset](crate::rasterdataset::RasterDataset)
4//! instances from various data sources including files, STAC collections, or from scratch.
5
6use std::collections::HashMap;
7use std::path::{Path, PathBuf};
8use std::sync::{Arc, Mutex};
9
10use log::debug;
11use math::round;
12use num_traits::{NumCast, ToPrimitive};
13use rayon::iter::{IndexedParallelIterator, IntoParallelRefIterator, ParallelIterator};
14use stac::ItemCollection;
15
16use crate::core_types::RasterType;
17use crate::data_sources::{DataSource, DateType};
18use crate::metadata::{Layer, RasterMetadata};
19use crate::rasterdataset::RasterDataset;
20use crate::types::{BlockSize, Dimension, GeoTransform, ImageResolution, ImageSize, Overlap, RasterDataShape, ReadWindow, Offset, Size};
21use crate::gdal_utils::{BasicRasterInfo, create_temp_file, warp, warp_with_te_tr, change_res, extract_band, mosaic, read_basic_raster_info, run_gdal_command};
22
23/// Main builder struct for constructing RasterDataset instances.
24#[derive(Default)]
25pub struct RasterDatasetBuilder<T>
26where
27    T: RasterType,
28{
29    /// Source file paths.
30    pub sources: Vec<PathBuf>,
31    /// Band indices per source.
32    pub bands: HashMap<String, Vec<usize>>,
33    /// Band indices as vector.
34    pub bands_vec: Vec<Vec<usize>>,
35    /// EPSG code for the coordinate reference system.
36    pub epsg: u32,
37    /// Overlap size between blocks.
38    pub overlap_size: usize,
39    /// Block size for reading.
40    pub block_size: BlockSize,
41    /// Dimension for band composition.
42    pub composition_bands: Dimension,
43    /// Dimension for source composition.
44    pub composition_sources: Dimension,
45    /// Image dimensions.
46    pub image_size: ImageSize,
47    /// Geotransform parameters.
48    pub geo_transform: GeoTransform,
49    /// Date indices for temporal data.
50    pub date_indices: Vec<DateType>,
51    /// Image resolution.
52    pub resolution: ImageResolution,
53    /// STAC feature collection.
54    pub feature_collection: Option<ItemCollection>,
55    /// No-data value.
56    pub na_value: T,
57    /// Layer names from DataSource.
58    pub layer_names: Vec<String>,
59}
60
61impl<T> RasterDatasetBuilder<T>
62where
63    T: RasterType,
64{
65    /// Creates a RasterDataset from scratch (empty raster).
66    ///
67    /// Creates a new empty raster dataset with the specified extent, resolution, and CRS.
68    /// Useful for creating output rasters for rasterization or other operations that
69    /// need a pre-defined grid.
70    ///
71    /// # Arguments
72    ///
73    /// * `extent` - Geographic extent as [Extent](crate::metadata::Extent) struct
74    /// * `resolution` - Pixel size (will be converted to f64)
75    /// * `epsg_code` - EPSG coordinate reference system code (e.g., 32755 for UTM zone 55S)
76    /// * `block_size` - Size of processing blocks as [BlockSize]
77    ///
78    /// # Example
79    ///
80    /// ```rust,ignore
81    /// use eorst::rasterdataset::RasterDatasetBuilder;
82    /// use eorst::metadata::Extent;
83    /// use eorst::types::BlockSize;
84    ///
85    /// let extent = Extent { xmin: 0., ymin: 0., xmax: 1000., ymax: 1000. };
86    /// let block_size = BlockSize { cols: 256, rows: 256 };
87    ///
88    /// let rds: RasterDataset<i32> = RasterDatasetBuilder::from_scratch(
89    ///     extent, 30.0, 32755, block_size
90    /// );
91    /// ```
92    pub fn from_scratch<U>(
93        extent: crate::metadata::Extent,
94        resolution: U,
95        epsg_code: u32,
96        block_size: BlockSize,
97    ) -> RasterDataset<T>
98    where
99        U: ToPrimitive + std::fmt::Debug,
100    {
101        debug!("Extent: {:?}", extent);
102        let resolution = resolution.to_f64().expect("Unable to convert");
103        debug!("Resolution {:?}", resolution);
104
105        let layers = Vec::new();
106        let rows = ((extent.ymax - extent.ymin) / resolution) as usize;
107        let cols = ((extent.xmax - extent.xmin) / resolution) as usize;
108        debug!("Creating raster with rows: {rows:?} and cols: {cols:?}");
109
110        let shape = RasterDataShape {
111            times: 1,
112            layers: 1,
113            rows,
114            cols,
115        };
116
117        let geo_transform = GeoTransform {
118            x_ul: extent.xmin,
119            x_res: resolution,
120            x_rot: 0.,
121            y_ul: extent.ymin,
122            y_rot: 0.,
123            y_res: resolution,
124        };
125
126        let overlap_size = 0;
127        let date_indices = Vec::new();
128        let layer_indices = Vec::new();
129
130        let metadata = RasterMetadata {
131            layers: layers.clone(),
132            shape,
133            block_size,
134            epsg_code,
135            geo_transform,
136            overlap_size,
137            date_indices,
138            layer_indices,
139            na_value: T::zero(),
140        };
141
142        let tmp_layers = Vec::new();
143        let image_size = ImageSize { rows, cols };
144
145        let n_block_cols = n_block_cols(image_size, block_size);
146        let n_block_rows = n_block_rows(image_size, block_size);
147        let n_blocks = n_block_rows * n_block_cols;
148
149        let mut blocks: Vec<crate::blocks::RasterBlock<T>> = Vec::new();
150        (0..n_blocks).for_each(|i| {
151            let block_attributes = block_from_id(
152                i,
153                shape,
154                &layers,
155                epsg_code as usize,
156                image_size,
157                block_size,
158                overlap_size,
159                geo_transform,
160            );
161            blocks.push(block_attributes);
162        });
163
164        let rds = RasterDataset {
165            metadata,
166            blocks,
167            tmp_layers,
168        };
169
170        debug!("rds from scratch: {rds}");
171        rds
172    }
173
174    /// Creates a builder from a single data source.
175    ///
176    /// Convenience method that wraps [`from_sources`](Self::from_sources).
177    ///
178    /// # Example
179    ///
180    /// ```rust,ignore
181    /// use eorst::rasterdataset::RasterDatasetBuilder;
182    /// use eorst::data_sources::DataSourceBuilder;
183    ///
184    /// let data_source = DataSourceBuilder::from_file(&path_to_file).build();
185    /// let rds = RasterDatasetBuilder::from_source(&data_source).build();
186    /// ```
187    pub fn from_source(data_source: &DataSource) -> RasterDatasetBuilder<T> {
188        RasterDatasetBuilder::from_sources(&vec![data_source.to_owned()])
189    }
190
191    /// Creates a builder from a STAC item collection.
192    ///
193    /// This is a convenience alias for [from_stac_query](Self::from_stac_query).
194    pub fn from_item_collection(feature_collection: &ItemCollection) -> Self {
195        RasterDatasetBuilder::from_stac_query(feature_collection)
196    }
197
198    /// Creates a RasterDataset from a STAC item collection.
199    ///
200    /// Downloads and mosaics imagery from a STAC ItemCollection. This is the primary way
201    /// to load cloud-optimized satellite imagery from providers like DEA, Element84, or
202    /// Planetary Computer.
203    ///
204    /// The method finds the minimum extent of all images, reprojects and resamples as
205    /// needed, and creates a unified dataset. Tiles with different EPSG codes are
206    /// automatically reprojected to the target CRS. Resolution can also be changed.
207    ///
208    /// Use builder methods to configure the output:
209    /// - [`set_epsg`](Self::set_epsg) — Set the target coordinate reference system
210    /// - [`set_resolution`](Self::set_resolution) — Change the output pixel resolution
211    /// - [`block_size`](Self::block_size) — Set the processing block size
212    ///
213    /// # Example
214    ///
215    /// ```rust,ignore
216    /// use eorst::rasterdataset::RasterDatasetBuilder;
217    /// use stac::ItemCollection;
218    ///
219    /// let items: ItemCollection = /* ... from a STAC API query ... */;
220    /// let rds = RasterDatasetBuilder::from_stac_query(&items)
221    ///     .set_epsg(32755)          // UTM zone 55S
222    ///     .set_resolution(30.0)     // 30m resolution
223    ///     .block_size(BlockSize { cols: 2048, rows: 2048 })
224    ///     .build();
225    /// ```
226    pub fn from_stac_query(feature_collection: &ItemCollection) -> Self {
227        let bands_vec = Vec::new();
228
229        // default behaviour
230        let composition_bands = Dimension::Time;
231        let composition_sources = Dimension::Layer; // as many times as sources
232        let geo_transform = GeoTransform::default();
233        let image_size = ImageSize::default();
234        let block_size = BlockSize::default();
235        let overlap_size = 0;
236        let first_item_assets = feature_collection.items[0].assets.values().next().unwrap();
237        let href = Path::new(&first_item_assets.href);
238        let epsg = Self::get_epsg_code(href);
239        let resolution = Self::get_resolution(href);
240
241        let sources: Vec<PathBuf> = Vec::new();
242        let mut date_indices: Vec<DateType> = Vec::new();
243        let mut bands = HashMap::new();
244        for item in feature_collection.items.iter() {
245            let date = item.properties.datetime.unwrap();
246            date_indices.push(DateType::Date(date.into()));
247            for (name, _) in item.assets.iter() {
248                let title = name.to_owned();
249                let band = 1; // FIX
250                bands.insert(title, vec![band]);
251            }
252        }
253        date_indices.sort();
254
255        let date_indices = date_indices.into_iter().collect();
256
257        RasterDatasetBuilder {
258            sources,
259            bands,
260            bands_vec,
261            epsg,
262            overlap_size,
263            block_size,
264            composition_bands,
265            composition_sources,
266            image_size,
267            geo_transform,
268            date_indices,
269            resolution,
270            feature_collection: Some(feature_collection.clone()),
271            na_value: NumCast::from(0).unwrap(),
272            layer_names: Vec::new(),
273        }
274    }
275
276    /// Creates a builder from multiple data sources.
277    ///
278    /// All sources must have the same shape and GeoTransform. They are stacked along
279    /// the time or layer dimension depending on the composition settings.
280    ///
281    /// # Example
282    ///
283    /// ```rust,ignore
284    /// use eorst::rasterdataset::RasterDatasetBuilder;
285    /// use eorst::data_sources::DataSourceBuilder;
286    ///
287    /// let sources = vec![
288    ///     DataSourceBuilder::from_file(&PathBuf::from("scene_2023.tif")).build(),
289    ///     DataSourceBuilder::from_file(&PathBuf::from("scene_2024.tif")).build(),
290    /// ];
291    /// let rds = RasterDatasetBuilder::from_sources(&sources)
292    ///     .block_size(BlockSize { cols: 2048, rows: 2048 })
293    ///     .build();
294    /// ```
295    pub fn from_sources(data_sources: &Vec<DataSource>) -> RasterDatasetBuilder<T> {
296        let mut sources: Vec<PathBuf> = Vec::new();
297        let mut bands_vec = Vec::new();
298        let bands = HashMap::new();
299
300        let geo_transform = GeoTransform::default();
301        let image_size = ImageSize::default();
302        let block_size = BlockSize::default();
303
304        let overlap_size = 0;
305
306        // default behaviour for compositions
307        let composition_bands = Dimension::Layer;
308        let composition_sources = Dimension::Time; // as many times as sources
309
310        for data_source in data_sources {
311            sources.push(data_source.source.clone());
312            bands_vec.push(data_source.bands.clone());
313        }
314
315        let date_indices = (0..sources.len()).map(DateType::Index).collect();
316
317        // Single GDAL open to extract all metadata (replaces 3 separate opens)
318        let info = read_basic_raster_info(&data_sources[0].source);
319        let epsg = info.epsg_code;
320        let resolution = info.resolution();
321        let na_value = info.na_value::<T>();
322
323        // Propagate layer_names from DataSource
324        let layer_names = if !data_sources.is_empty() {
325            data_sources[0].layer_names.clone()
326        } else {
327            Vec::new()
328        };
329
330        RasterDatasetBuilder {
331            sources,
332            bands,
333            bands_vec,
334            epsg,
335            overlap_size,
336            block_size,
337            composition_bands,
338            composition_sources,
339            image_size,
340            geo_transform,
341            date_indices,
342            resolution,
343            feature_collection: None,
344            na_value,
345            layer_names,
346        }
347    }
348
349    /// Sets the desired EPSG code for the raster dataset.
350    ///
351    /// If the source data has a different CRS, it will be automatically reprojected
352    /// during `build()`.
353    ///
354    /// # Example
355    ///
356    /// ```rust,ignore
357    /// RasterDatasetBuilder::from_source(&data_source)
358    ///     .set_epsg(32755)  // UTM zone 55S
359    ///     .build()
360    /// ```
361    pub fn set_epsg(mut self, epsg_code: u32) -> Self {
362        self.epsg = epsg_code;
363        self
364    }
365
366    /// Sets the image size.
367    pub fn set_image_size(mut self, image_size: ImageSize) -> Self {
368        self.image_size = image_size;
369        self
370    }
371
372    /// Set the desired geo_transform
373    pub fn set_geo_transform(mut self, geo_transform: GeoTransform) -> Self {
374        self.geo_transform = geo_transform;
375        self
376    }
377
378    /// Sets the desired resolution for the raster dataset.
379    ///
380    /// If the source data has a different resolution, it will be resampled
381    /// during `build()`.
382    pub fn set_resolution(mut self, resolution: ImageResolution) -> Self {
383        self.resolution = resolution;
384        self
385    }
386
387    /// Sets the block size for chunked raster operations.
388    ///
389    /// Larger blocks reduce I/O overhead but use more memory. Default is 1024×1024.
390    /// A good choice for Sentinel-2 data is 2048×2048.
391    ///
392    /// # Example
393    ///
394    /// ```rust,ignore
395    /// RasterDatasetBuilder::from_source(&data_source)
396    ///     .block_size(BlockSize { cols: 2048, rows: 2048 })
397    ///     .build()
398    /// ```
399    pub fn block_size(mut self, block_size: BlockSize) -> RasterDatasetBuilder<T> {
400        self.block_size = block_size;
401        self
402    }
403
404    /// Sets the overlap size for block-based reads.
405    ///
406    /// Overlap pixels are included on all sides of each block, useful for
407    /// neighborhood operations that require data from adjacent blocks.
408    pub fn overlap_size(mut self, overlap_size: usize) -> RasterDatasetBuilder<T> {
409        self.overlap_size = overlap_size;
410        self
411    }
412
413    /// Sets the source composition dimension.
414    ///
415    /// Controls how multiple data sources are arranged along the time/layer axes.
416    pub fn source_composition_dimension(
417        mut self,
418        composition_dimension: Dimension,
419    ) -> RasterDatasetBuilder<T> {
420        self.composition_sources = composition_dimension;
421        self
422    }
423
424    /// Sets the band composition dimension.
425    ///
426    /// Controls how bands from each source are arranged along the time/layer axes.
427    pub fn band_composition_dimension(
428        mut self,
429        composition_dimension: Dimension,
430    ) -> RasterDatasetBuilder<T> {
431        self.composition_bands = composition_dimension;
432        self
433    }
434
435    /// Sets the date indices for temporal data.
436    pub fn set_date_indices(mut self, date_indices: &[DateType]) -> Self {
437        self.date_indices = date_indices.to_vec();
438        self
439    }
440
441    /// Sets the band mapping.
442    ///
443    /// Maps band names to their source band indices. Useful when working with
444    /// multi-band files or semantic band names.
445    pub fn bands(mut self, bands: HashMap<String, Vec<usize>>) -> Self {
446        self.bands = bands;
447        self
448    }
449
450    /// Sets the template for the raster dataset.
451    ///
452    /// Copies the image size and GeoTransform from another dataset, ensuring
453    /// the output matches its geometry. The EPSG code is also copied.
454    pub fn set_template<V>(mut self, other: &RasterDataset<V>) -> RasterDatasetBuilder<T>
455    where
456        V: RasterType,
457    {
458        self.image_size = ImageSize {
459            rows: other.metadata.shape.rows,
460            cols: other.metadata.shape.cols,
461        };
462        self.geo_transform = other.metadata.geo_transform;
463        self.epsg = other.metadata.epsg_code;
464        self
465    }
466
467    fn get_resolution(source: &Path) -> ImageResolution {
468        read_basic_raster_info(source).resolution()
469    }
470
471    fn get_geotransform(source: &Path) -> GeoTransform {
472        read_basic_raster_info(source).geo_transform_struct()
473    }
474
475    fn get_max_extent(extents: Vec<crate::metadata::Extent>) -> crate::metadata::Extent {
476        let xmin = extents
477            .iter()
478            .map(|ex| ex.xmin)
479            .min_by(|i, j| i.partial_cmp(j).unwrap())
480            .unwrap();
481        let ymin = extents
482            .iter()
483            .map(|ex| ex.ymin)
484            .min_by(|i, j| i.partial_cmp(j).unwrap())
485            .unwrap();
486        let xmax = extents
487            .iter()
488            .map(|ex| ex.xmax)
489            .max_by(|i, j| i.partial_cmp(j).unwrap())
490            .unwrap();
491        let ymax = extents
492            .iter()
493            .map(|ex| ex.ymax)
494            .max_by(|i, j| i.partial_cmp(j).unwrap())
495            .unwrap();
496        crate::metadata::Extent {
497            xmin,
498            ymin,
499            xmax,
500            ymax,
501        }
502    }
503
504    fn get_extent(source: &Path, target_epsg: u32) -> crate::metadata::Extent {
505        let info = read_basic_raster_info(source);
506        let gt = info.geo_transform_struct();
507        let is = info.image_size();
508
509        if info.epsg_code == target_epsg {
510            let xmin = [gt.x_ul];
511            let xmax = [xmin[0] + gt.x_res * is.cols as f64];
512            let ymax = [gt.y_ul];
513            let ymin = [ymax[0] + gt.y_res * is.rows as f64];
514            crate::metadata::Extent {
515                xmin: xmin[0],
516                ymin: ymin[0],
517                xmax: xmax[0],
518                ymax: ymax[0],
519            }
520        } else {
521            let new_source = create_temp_file("vrt");
522            let epsg_s = format!("EPSG:{}", target_epsg);
523            let source_s = source.to_string_lossy();
524            let argv = vec![
525                "gdalwarp", "-t_srs", &epsg_s, "-q", &source_s, &new_source,
526            ];
527            run_gdal_command(&argv);
528            let new_info = read_basic_raster_info(&PathBuf::from(new_source));
529            let gt = new_info.geo_transform_struct();
530            let is = new_info.image_size();
531            let xmin = [gt.x_ul];
532            let ymax = [gt.y_ul];
533            let ymin = [ymax[0] + gt.y_res * is.rows as f64];
534            let xmax = [xmin[0] + gt.x_res * is.cols as f64];
535            crate::metadata::Extent {
536                xmin: xmin[0],
537                ymin: ymin[0],
538                xmax: xmax[0],
539                ymax: ymax[0],
540            }
541        }
542    }
543
544    fn get_epsg_code(source: &Path) -> u32 {
545        debug!("get_epsg_code: source {:?}", source);
546        let info = read_basic_raster_info(source);
547        info.epsg_code
548    }
549
550    fn get_blocks(
551        &self,
552        block_shape: RasterDataShape,
553        layers: &[Layer],
554        epsg_code: usize,
555    ) -> Vec<crate::blocks::RasterBlock<T>>
556    where
557        T: RasterType,
558    {
559        let n_blocks = self.n_blocks();
560        let mut blocks: Vec<crate::blocks::RasterBlock<T>> = Vec::new();
561        (0..n_blocks).for_each(|i| {
562            let block_attributes = self.block_from_id(i, block_shape, layers, epsg_code);
563            blocks.push(block_attributes)
564        });
565        blocks
566    }
567
568    fn n_block_cols(&self) -> usize {
569        let image_size = self.image_size;
570        let block_size = self.block_size;
571        round::ceil(image_size.cols as f64 / block_size.cols as f64, 0) as usize
572    }
573
574    fn n_block_rows(&self) -> usize {
575        let image_size = self.image_size;
576        let block_size = self.block_size;
577        round::ceil(image_size.rows as f64 / block_size.rows as f64, 0) as usize
578    }
579
580    fn n_blocks(&self) -> usize {
581        self.n_block_cols() * self.n_block_rows()
582    }
583
584    fn block_from_id(
585        &self,
586        id: usize,
587        block_shape: RasterDataShape,
588        _layers: &[Layer],
589        epsg_code: usize,
590    ) -> crate::blocks::RasterBlock<T> {
591        let tile_col = self.block_col_row(id).0;
592        let tile_row = self.block_col_row(id).1;
593        let overlap = get_overlap(
594            self.image_size,
595            self.block_size,
596            self.block_col_row(id),
597            self.overlap_size,
598        );
599
600        let ul_x = (self.block_size.cols * tile_col) as isize;
601        let ul_y = (self.block_size.rows * tile_row) as isize;
602
603        let ul_x_overlap = ul_x - overlap.left as isize;
604        let ul_y_overlap = ul_y - overlap.top as isize;
605        let lr_x_overlap = std::cmp::min(
606            self.image_size.cols as isize,
607            ul_x + self.block_size.cols as isize + overlap.right as isize,
608        );
609        let lr_y_overlap = std::cmp::min(
610            self.image_size.rows as isize,
611            ul_y + self.block_size.rows as isize + overlap.bottom as isize,
612        );
613
614        let win_width = lr_x_overlap - ul_x_overlap;
615        let win_height = lr_y_overlap - ul_y_overlap;
616
617        let read_window = ReadWindow {
618            offset: Offset {
619                cols: ul_x_overlap,
620                rows: ul_y_overlap,
621            },
622            size: Size {
623                cols: win_width,
624                rows: win_height,
625            },
626        };
627
628        let block_geo_transform = get_block_gt(read_window, self.geo_transform);
629        let empty_metadata: RasterMetadata<T> = RasterMetadata::new();
630        crate::blocks::RasterBlock {
631            block_index: id,
632            read_window,
633            overlap_size: self.overlap_size,
634            geo_transform: block_geo_transform,
635            overlap,
636            shape: block_shape,
637            epsg_code,
638            raster_metadata: empty_metadata,
639        }
640    }
641
642    fn block_col_row(&self, id: usize) -> (usize, usize) {
643        let block_row = id / self.n_block_cols();
644        let block_col = id - (block_row * self.n_block_cols());
645        (block_col, block_row)
646    }
647
648
649    /// Builds the RasterDataset.
650    pub fn build(mut self) -> RasterDataset<T> {
651        debug!("Building RasterDataset");
652
653        let raster_dataset = match self.feature_collection {
654            None => {
655                let mut layers = Vec::new();
656                let mut blocks: Vec<crate::blocks::RasterBlock<T>> = Vec::new();
657                let mut tmp_layers: Vec<PathBuf> = Vec::new();
658                // Pre-read metadata for all sources (single open per source instead of 2)
659                let t_meta = std::time::Instant::now();
660                let source_meta = self.sources.iter()
661                    .map(|s| read_basic_raster_info(s))
662                    .collect::<Vec<BasicRasterInfo>>();
663                let t_meta_elapsed = t_meta.elapsed().as_secs_f64() * 1000.0;
664
665                for (i, source) in self.sources.iter_mut().enumerate() {
666                    let target_epsg = self.epsg;
667                    let target_resolution = self.resolution;
668
669                    let current_epsg = source_meta[i].epsg_code;
670                    let current_resolution = source_meta[i].resolution();
671
672                    if current_epsg != target_epsg {
673                        *source = warp(source.to_path_buf(), None, target_epsg);
674                        tmp_layers.push(source.clone());
675                    }
676
677                    if current_resolution != target_resolution {
678                        *source = change_res(source.to_path_buf(), target_resolution);
679                        tmp_layers.push(source.clone());
680                    }
681                }
682
683                let t_loop2 = std::time::Instant::now();
684                for source in self.sources.iter_mut() {
685                    let current_gt = Self::get_geotransform(source);
686                    let target_gt = self.geo_transform;
687
688                    if (current_gt != target_gt) & (target_gt != GeoTransform::default()) {
689                        let current_extent =
690                            Self::get_extent(source, self.epsg);
691                        let x_ll = target_gt.x_ul + (self.image_size.cols as f64 * target_gt.x_res);
692                        let y_ll = target_gt.y_ul + (self.image_size.rows as f64 * target_gt.y_res);
693
694                        let target_extent = crate::metadata::Extent {
695                            xmin: target_gt.x_ul,
696                            ymin: y_ll,
697                            xmax: x_ll,
698                            ymax: target_gt.y_ul,
699                        };
700                        let te = if current_extent != target_extent {
701                            crate::metadata::Extent {
702                                xmin: target_gt.x_ul,
703                                ymin: y_ll,
704                                xmax: x_ll,
705                                ymax: target_gt.y_ul,
706                            }
707                        } else {
708                            current_extent
709                        };
710
711                        let tr = ImageResolution {
712                            x: target_gt.x_res,
713                            y: target_gt.y_res,
714                        };
715
716                        *source = warp_with_te_tr(source.to_path_buf(), &te, tr);
717                        tmp_layers.push(source.clone());
718                    }
719                }
720                let t_loop2_elapsed = t_loop2.elapsed().as_secs_f64() * 1000.0;
721
722                let t_info = std::time::Instant::now();
723                let info = read_basic_raster_info(&self.sources[0]);
724                let t_info_elapsed = t_info.elapsed().as_secs_f64() * 1000.0;
725                let geo_transform = info.geo_transform_struct();
726                debug!("GeoTransform template from {:?}", &self.sources[0]);
727                let image_size = info.image_size();
728
729                self.geo_transform = geo_transform;
730                self.image_size = image_size;
731
732                let n_rows = image_size.rows;
733                let n_cols = image_size.cols;
734                // Validate that all sources match the template geometry.
735                // source[0] is already confirmed by the template read above.
736                for source in self.sources.iter().skip(1) {
737                    debug!("checking {:?}", source);
738                    let src_info = read_basic_raster_info(source);
739                    let gt = src_info.geo_transform_struct();
740                    let is = src_info.image_size();
741                    if gt != geo_transform {
742                        panic!("Sources have different geo_transforms!");
743                    }
744
745                    if is != image_size {
746                        panic!("Sources have different size!")
747                    }
748                }
749                let mut time_pos = 1;
750                let mut layer_pos = 1;
751
752                let t_bands = std::time::Instant::now();
753                for (source_idx, source) in self.sources.iter().enumerate() {
754                    let bands = self.bands_vec[source_idx].clone();
755
756                    for band in bands.iter() {
757                        let source = PathBuf::from(source);
758                        let new_source = extract_band(&source, *band);
759                        tmp_layers.push(new_source.clone());
760                        let source = new_source;
761                        let layer = Layer {
762                            source,
763                            time_pos: time_pos - 1,
764                            layer_pos: layer_pos - 1,
765                        };
766                        layers.push(layer);
767
768                        match self.composition_bands {
769                            Dimension::Layer => layer_pos += 1,
770                            Dimension::Time => time_pos += 1,
771                        }
772                    }
773
774                    match self.composition_sources {
775                        Dimension::Layer => {
776                            if self.composition_bands != Dimension::Layer {
777                                layer_pos += 1
778                            }
779                        }
780                        Dimension::Time => {
781                            if self.composition_bands != Dimension::Time {
782                                time_pos += 1
783                            }
784                        }
785                    }
786
787                    match self.composition_sources {
788                        Dimension::Layer => time_pos = 1,
789                        Dimension::Time => layer_pos = 1,
790                    }
791                }
792
793                let t_bands_elapsed = t_bands.elapsed().as_secs_f64() * 1000.0;
794
795                let n_times = layers.iter().map(|l| l.time_pos).max().unwrap() + 1;
796                let n_layers = layers.iter().map(|l| l.layer_pos).max().unwrap() + 1;
797
798                // Use layer_names from DataSource if provided, otherwise generate defaults
799                let layer_names = if !self.layer_names.is_empty() {
800                    self.layer_names.clone()
801                } else {
802                    (0..n_layers)
803                        .map(|i| format!("Layer_{}", i))
804                        .collect()
805                };
806
807                let block_shape = RasterDataShape {
808                    times: n_times,
809                    layers: n_layers,
810                    rows: n_rows,
811                    cols: n_cols,
812                };
813
814                let t_blocks = std::time::Instant::now();
815                blocks.extend(self.get_blocks(block_shape, &layers, self.epsg.try_into().unwrap()));
816                let t_blocks_elapsed = t_blocks.elapsed().as_secs_f64() * 1000.0;
817
818                debug!("build() timing: meta_read={:.1}ms loop2={:.1}ms template={:.1}ms bands={:.1}ms blocks={:.1}ms",
819                    t_meta_elapsed, t_loop2_elapsed, t_info_elapsed, t_bands_elapsed, t_blocks_elapsed);
820
821                let metadata = RasterMetadata {
822                    layers,
823                    shape: block_shape,
824                    block_size: self.block_size,
825                    epsg_code: self.epsg,
826                    geo_transform,
827                    overlap_size: self.overlap_size,
828                    date_indices: self.date_indices,
829                    layer_indices: layer_names,
830                    na_value: self.na_value,
831                };
832
833                RasterDataset {
834                    metadata,
835                    blocks,
836                    tmp_layers,
837                }
838            }
839            Some(ref feature_collection) => {
840                let mut sources = Vec::new();
841                for item in &feature_collection.items {
842                    for asset in item.assets.values() {
843                        debug!("Asset {:?}", asset);
844
845                        let href = &asset.href;
846                        sources.push(PathBuf::from(href));
847                    }
848                }
849                debug!("Assets added");
850
851                let tmp_layers = Arc::new(Mutex::new(Vec::new()));
852                let layers = Arc::new(Mutex::new(Vec::new()));
853
854                let mut blocks: Vec<crate::blocks::RasterBlock<T>> = Vec::new();
855
856                let extents: Vec<_> = sources
857                    .par_iter()
858                    .map(|source| Self::get_extent(source, self.epsg))
859                    .collect();
860
861                let global_extent = Self::get_max_extent(extents);
862
863                let original_times_indices = crate::stac_helpers::get_sorted_datetimes(feature_collection);
864
865                let target_time_indices = crate::stac_helpers::unique_datetimes_in_range(original_times_indices);
866
867                let asset_names = crate::stac_helpers::get_asset_names(feature_collection);
868                debug!("Asset names: {:?} ", asset_names);
869
870                target_time_indices
871                    .par_iter()
872                    .enumerate()
873                    .for_each(|(time_idx, time)| {
874                        let mut layer_idx = 0;
875
876                        let date_items = crate::stac_helpers::get_items_for_date(feature_collection, time);
877
878                        for asset_name in asset_names.iter() {
879                            let asset_bands = [1];
880
881                            for _ in asset_bands.iter() {
882                                let sources = crate::stac_helpers::get_sources_for_asset(&date_items, asset_name);
883                                let date_mosaic_tmp = PathBuf::from(create_temp_file("vrt"));
884                                mosaic(&sources, &date_mosaic_tmp, self.epsg, None, None).unwrap();
885                                let mut single_band = date_mosaic_tmp;
886                                let raster_info = read_basic_raster_info(&single_band);
887                                let current_epsg = raster_info.epsg_code;
888
889                                let target_epsg = self.epsg;
890                                if current_epsg != target_epsg {
891                                    single_band = warp(single_band, None, target_epsg);
892                                }
893
894                                let current_resoultion = read_basic_raster_info(&single_band).resolution();
895                                let target_resolution = self.resolution;
896                                let tr = if current_resoultion == target_resolution {
897                                    current_resoultion
898                                } else {
899                                    target_resolution
900                                };
901
902                                let current_gt = read_basic_raster_info(&single_band).geo_transform_struct();
903                                let target_gt = self.geo_transform;
904                                let mut local_extent = global_extent.clone();
905                                let mut local_tr = tr;
906
907                                if (current_gt != target_gt)
908                                    & (target_gt != GeoTransform::default())
909                                {
910                                    let x_ll = target_gt.x_ul
911                                        + (self.image_size.cols as f64 * target_gt.x_res);
912                                    let y_ll = target_gt.y_ul
913                                        + (self.image_size.rows as f64 * target_gt.y_res);
914                                    local_extent = crate::metadata::Extent {
915                                        xmin: (target_gt.x_ul * 100.).round() / 100.,
916                                        ymin: (y_ll * 100.).round() / 100.,
917                                        xmax: (x_ll * 100.).round() / 100.,
918                                        ymax: (target_gt.y_ul * 100.).round() / 100.,
919                                    };
920                                    local_tr = ImageResolution {
921                                        x: (target_gt.x_res * 100.).round() / 100.,
922                                        y: (target_gt.y_res * 100.).round() / 100.,
923                                    };
924                                }
925
926                                single_band = warp_with_te_tr(single_band, &local_extent, local_tr);
927
928                                let layer = Layer {
929                                    source: single_band.clone(),
930                                    time_pos: time_idx,
931                                    layer_pos: layer_idx,
932                                };
933
934                                tmp_layers.lock().unwrap().push(single_band);
935                                layers.lock().unwrap().push(layer);
936                                layer_idx += 1;
937                            }
938                        }
939                    });
940                let tmp_layers = Arc::try_unwrap(tmp_layers).unwrap().into_inner().unwrap();
941                let layers = Arc::try_unwrap(layers).unwrap().into_inner().unwrap();
942
943                // update image size and geotransform fields.
944                let info = read_basic_raster_info(&layers[0].source);
945                self.image_size = info.image_size();
946                self.geo_transform = info.geo_transform_struct();
947
948                // Start computing the metadata
949                let n_times = target_time_indices.len();
950
951                let mut n_layers = 0;
952                for (_, v) in self.bands.clone() {
953                    n_layers += v.len();
954                }
955                let block_shape = RasterDataShape {
956                    times: n_times,
957                    layers: n_layers,
958                    rows: self.image_size.rows,
959                    cols: self.image_size.cols,
960                };
961                blocks.extend(self.get_blocks(block_shape, &layers, self.epsg.try_into().unwrap()));
962                let target_time_indices: Vec<DateType> = target_time_indices
963                    .into_iter()
964                    .map(DateType::Date)
965                    .collect();
966
967                let metadata = RasterMetadata {
968                    layers,
969                    shape: block_shape,
970                    block_size: self.block_size,
971                    epsg_code: self.epsg,
972                    geo_transform: self.geo_transform,
973                    overlap_size: self.overlap_size,
974                    date_indices: target_time_indices,
975                    layer_indices: asset_names,
976                    na_value: T::zero(),
977                };
978
979                RasterDataset {
980                    metadata,
981                    blocks,
982                    tmp_layers,
983                }
984            }
985        };
986        raster_dataset
987    }
988}
989
990// ─── Shared helper functions (used by both method and free-function paths) ───
991
992/// Computes overlap for a block based on its position in the grid.
993pub(crate) fn get_overlap(
994    image_size: ImageSize,
995    block_size: BlockSize,
996    block_col_row: (usize, usize),
997    overlap_size: usize,
998) -> Overlap {
999    let mut overlap = Overlap {
1000        left: overlap_size,
1001        top: overlap_size,
1002        right: overlap_size,
1003        bottom: overlap_size,
1004    };
1005    let tile_col = block_col_row.0;
1006    let tile_row = block_col_row.1;
1007    let n_rows_tile = image_size.rows.div_ceil(block_size.rows);
1008    let n_cols_tile = image_size.cols.div_ceil(block_size.cols);
1009    let is_top = tile_row == 0;
1010    let is_bottom = tile_row + 1 == n_rows_tile;
1011    let is_left = tile_col == 0;
1012    let is_right = tile_col + 1 == n_cols_tile;
1013    if is_top {
1014        overlap.top = 0
1015    };
1016    if is_bottom {
1017        overlap.bottom = 0
1018    };
1019    if is_left {
1020        overlap.left = 0
1021    };
1022    if is_right {
1023        overlap.right = 0
1024    };
1025    overlap
1026}
1027
1028/// Computes the number of block columns for the given image and block sizes.
1029pub(crate) fn n_block_cols(image_size: ImageSize, block_size: BlockSize) -> usize {
1030    round::ceil(image_size.cols as f64 / block_size.cols as f64, 0) as usize
1031}
1032
1033/// Computes the number of block rows for the given image and block sizes.
1034pub(crate) fn n_block_rows(image_size: ImageSize, block_size: BlockSize) -> usize {
1035    round::ceil(image_size.rows as f64 / block_size.rows as f64, 0) as usize
1036}
1037
1038/// Converts a block ID to (col, row) position in the block grid.
1039fn block_col_row(id: usize, image_size: ImageSize, block_size: BlockSize) -> (usize, usize) {
1040    let ncols = n_block_cols(image_size, block_size);
1041    let block_row = id / ncols;
1042    let block_col = id - (block_row * ncols);
1043    (block_col, block_row)
1044}
1045
1046/// Computes the block-level GeoTransform from a read window and image GeoTransform.
1047fn get_block_gt(read_window: ReadWindow, geo_transform: GeoTransform) -> GeoTransform {
1048    let x_ul_image = geo_transform.x_ul;
1049    let y_ul_image = geo_transform.y_ul;
1050
1051    let x_res = geo_transform.x_res;
1052    let y_res = geo_transform.y_res;
1053    let x_pos = read_window.offset.cols;
1054    let y_pos = read_window.offset.rows;
1055
1056    let x_ul_block = x_ul_image + x_res * x_pos as f64;
1057    let y_ul_block = y_ul_image + y_res * y_pos as f64;
1058    GeoTransform {
1059        x_ul: x_ul_block,
1060        x_res,
1061        x_rot: geo_transform.x_rot,
1062        y_ul: y_ul_block,
1063        y_rot: geo_transform.x_rot,
1064        y_res,
1065    }
1066}
1067
1068/// Creates a RasterBlock from an ID and parameters.
1069/// Used by both `from_scratch()` and the builder's `block_from_id()` method.
1070fn block_from_id<U>(
1071    id: usize,
1072    block_shape: RasterDataShape,
1073    _layers: &[Layer],
1074    epsg_code: usize,
1075    image_size: ImageSize,
1076    block_size: BlockSize,
1077    overlap_size: usize,
1078    geo_transform: GeoTransform,
1079) -> crate::blocks::RasterBlock<U>
1080where
1081    U: RasterType,
1082{
1083    let tile_col = block_col_row(id, image_size, block_size).0;
1084    let tile_row = block_col_row(id, image_size, block_size).1;
1085    let overlap = get_overlap(
1086        image_size,
1087        block_size,
1088        block_col_row(id, image_size, block_size),
1089        overlap_size,
1090    );
1091
1092    let ul_x = (block_size.cols * tile_col) as isize;
1093    let ul_y = (block_size.rows * tile_row) as isize;
1094
1095    // now compute with overlap
1096    let ul_x_overlap = ul_x - overlap.left as isize;
1097    let ul_y_overlap = ul_y - overlap.top as isize;
1098    let lr_x_overlap = std::cmp::min(
1099        image_size.cols as isize,
1100        ul_x + block_size.cols as isize + overlap.right as isize,
1101    );
1102
1103    let lr_y_overlap = std::cmp::min(
1104        image_size.rows as isize,
1105        ul_y + block_size.rows as isize + overlap.bottom as isize,
1106    );
1107
1108    let win_width = lr_x_overlap - ul_x_overlap;
1109    let win_height = lr_y_overlap - ul_y_overlap;
1110
1111    let arr_width = win_width;
1112    let arr_height = win_height;
1113
1114    let read_window = ReadWindow {
1115        offset: Offset {
1116            cols: ul_x_overlap,
1117            rows: ul_y_overlap,
1118        },
1119        size: Size {
1120            cols: arr_width,
1121            rows: arr_height,
1122        },
1123    };
1124
1125    let block_geo_transform = get_block_gt(read_window, geo_transform);
1126    let empty_metadata: RasterMetadata<U> = RasterMetadata::new();
1127    crate::blocks::RasterBlock {
1128        block_index: id,
1129        read_window,
1130        overlap_size,
1131        geo_transform: block_geo_transform,
1132        overlap,
1133        shape: block_shape,
1134        epsg_code,
1135        raster_metadata: empty_metadata,
1136    }
1137}