Source code for flusstools.fuzzycorr.prepro

"""

Hints:
    - many class methods could be imported from geotools
    - already removed: clip_raster, which is a duplicate of raster_mgmt
"""


from geotools import *


[docs]class FuzzyPreProcessor:
    """Parent pre-processing structure for the comparison of numeric maps

    :param df: pandas.DataFrame, can be obtained by reading the textfile as pandas dataframe
    :param attribute (str): name of the attribute to burn in the raster (ex.: deltaZ, Z)
    :param crs (str): coordinate reference system
    :param nodatavalue (float): value to indicate nodata cells
    :param res (float): resolution of the cell (cell size), is the same for x and y
    :param ulc: tuple of floats, upper left corner coordinate, optional
    :param lrc: tuple of floats, lower right corner coordinate, optional
    """

    def __init__(self, df, attribute, crs, nodatavalue, res=None, ulc=(np.nan, np.nan),
                 lrc=(np.nan, np.nan)):

        if not isinstance(attribute, str):
            print("ERROR: attribute must be a string - check the name on your textfile")

        self.crs = pyproj.CRS(crs)
        self.attribute = attribute
        self.nodatavalue = nodatavalue

        # Standardize the dataframe
        df.dropna(how='any', inplace=True, axis=0)
        # Create the dictionary with new label names and then rename for standardization
        new_names = {df.columns[0]: 'x', df.columns[1]
            : 'y', df.columns[2]: self.attribute}
        self.df = df.rename(columns=new_names)

        # Create geodataframe from the dataframe
        gdf = geopandas.GeoDataFrame(
            self.df, geometry=geopandas.points_from_xy(self.df.x, self.df.y))
        gdf.crs = self.crs
        self.gdf = gdf
        self.x = gdf.geometry.x.values
        self.y = gdf.geometry.y.values
        self.z = gdf[attribute].values

        if np.isfinite(ulc[0]) and np.isfinite(lrc[0]):
            self.xmax = lrc[0]
            self.xmin = ulc[0]
            self.ymax = ulc[1]
            self.ymin = lrc[1]
        else:
            self.xmax = self.gdf.geometry.x.max()
            self.xmin = self.gdf.geometry.x.min()
            self.ymax = self.gdf.geometry.y.max()
            self.ymin = self.gdf.geometry.y.min()

        self.extent = (self.xmin, self.xmax, self.ymin, self.ymax)

        if np.isfinite(res):
            self.res = res
        else:
            # if res not passed, then res will be the distance between xmin and xmax / 1000
            self.res = (self.xmax - self.xmin) / 1000

        self.ncol = int(np.ceil((self.xmax - self.xmin) / self.res))  # delx
        self.nrow = int(np.ceil((self.ymax - self.ymin) / self.res))  # dely

[docs]    def points_to_grid(self):
        """Creates a grid of new points in the target resolution

        :returns: array of size nrow, ncol

        Hints:
            Read more at http://chris35wills.github.io/gridding_data/
        """
        hrange = ((self.ymin, self.ymax),
                  (self.xmin, self.xmax))  # any points outside of this will be condisdered outliers and not used

        zi, yi, xi = np.histogram2d(self.y, self.x, bins=(int(self.nrow), int(self.ncol)), weights=self.z, normed=False,
                                    range=hrange)
        counts, _, _ = np.histogram2d(self.y, self.x, bins=(
            int(self.nrow), int(self.ncol)), range=hrange)
        # ignores errors if dividing by zero
        np.seterr(divide='ignore', invalid='ignore')
        zi = np.divide(zi, counts)
        np.seterr(divide=None, invalid=None)  # set it back now
        zi = np.ma.masked_invalid(zi)
        array = np.flipud(np.array(zi))  # flips each column upside down

        return array

[docs]    def norm_array(self, method='linear'):
        """ Normalizes the raw data in equally distanced points depending on the selected resolution

        :returns: interpolated and normalized array with selected resolution

        Hint:
            Read more at https://github.com/rosskush/skspatial
        """
        array = self.points_to_grid()
        x = np.arange(0, self.ncol)  # creates 1d array with values [0, ncol[
        y = np.arange(0, self.nrow)

        # mask invalid values
        # all invalid values are masked (ex.: np.inf or np.nan)
        array = np.ma.masked_invalid(array)
        # creates a grid of values with (x,y) based on the x and y provided
        xx, yy = np.meshgrid(x, y)

        # get only the valid values
        x1 = xx[~array.mask]  # takes only unmasked points
        y1 = yy[~array.mask]
        newarr = array[~array.mask]

        out_array = interpolate.griddata((x1, y1), newarr.ravel(
        ), (xx, yy), method=method, fill_value=self.nodatavalue)

        return out_array

[docs]    def random_raster(self, raster_file, save_ascii=True, **kwargs):
        """ Creates a raster of randomly generated values

        :kwarg minmax: tuple of floats, (zmin, zmax) min and max ranges for random values

        :returns numpy.ndarray: array of random values within a range of the same size and shape as the original
        """

        if kwargs['minmax'] is None:
            zmin, zmax = self.z.min(), self.z.max()
        else:
            zmin, zmax = kwargs['minmax']

        array = np.random.uniform(
            low=zmin, high=zmax, size=(self.nrow, self.ncol))

        if '.' not in raster_file[-4:]:
            raster_file += '.tif'

        transform = rio.transform.from_origin(
            self.xmin, self.ymax, self.res, self.res)

        new_dataset = rio.open(raster_file, 'w', driver='GTiff',
                               height=array.shape[0], width=array.shape[1], count=1, dtype=array.dtype,
                               crs=self.crs, transform=transform, nodata=self.nodatavalue)
        print('The array has size: ', np.shape(array))

        new_dataset.write(array, 1)
        new_dataset.close()

        if save_ascii:
            map_asc = str(Path(raster_file[0:-4] + '.asc'))
            gdal.Translate(map_asc, raster_file, format='AAIGrid')

        return new_dataset

[docs]    def plain_raster(self, shapefile, raster_file, res):
        """Converts a shapefile(.shp) to a GeoTIFF raster without normalizing

        :param shapefile (str): filename with path of the input shapefile (*.shp)
        :param raster_file (str): filename with path of the output raster (*.tif)
        :param res (float): resolution of the cell

        :returns None: saves the raster in the default directory
        """
        if '.' not in shapefile[-4:]:
            shapefile += '.shp'

        if '.' not in raster_file[-4:]:
            raster_file += '.tif'
        source_ds = ogr.Open(shapefile)
        source_layer = source_ds.GetLayer()
        x_min, x_max, y_min, y_max = source_layer.GetExtent()

        # Create Target - TIFF
        cols = int((x_max - x_min) / res)
        rows = int((y_max - y_min) / res)
        _raster = gdal.GetDriverByName('GTiff').Create(
            raster_file, cols, rows, 1, gdal.GDT_Float32)
        _raster.SetGeoTransform((x_min, res, 0, y_max, 0, res))
        _band = _raster.GetRasterBand(1)
        _band.SetNoDataValue(self.nodatavalue)

        # Rasterize
        gdal.RasterizeLayer(_raster, [1], source_layer, options=[
                            'ATTRIBUTE=' + self.attribute])

[docs]    def array2raster(self, array, raster_file, save_ascii=True):
        """Saves a raster using interpolation

        :param raster_file (str): path to save the rasterfile
        :param save_ascii (bool): true to save also an ascii raster

        :returns None: Saves the raster with the selected filename

        Hint:
            Function will be moved to ``geotools/raster_mgmt`` in a future release (operated by Bea)
        """
        if '.' not in raster_file[-4:]:
            raster_file += '.tif'

        transform = rio.transform.from_origin(
            self.xmin, self.ymax, self.res, self.res)
        new_dataset = rio.open(raster_file, 'w', driver='GTiff',
                               height=array.shape[0], width=array.shape[1], count=1, dtype=array.dtype,
                               crs=self.crs, transform=transform, nodata=self.nodatavalue)
        print(np.shape(array))
        new_dataset.write(array, 1)
        new_dataset.close()

        if save_ascii:
            map_asc = str(Path(raster_file[0:-4] + '.asc'))
            gdal.Translate(map_asc, raster_file, format='AAIGrid')

        return new_dataset

[docs]    def create_polygon(self, shape_polygon, alpha=np.nan):
        """ Creates a polygon surrounding a cloud of shapepoints

        :param shape_polygon (str): path to save the shapefile
        :param alpha (float): excentricity of the alphashape (polygon) to be created

        :returns: saves the polygon (*.shp) with the selected filename

        Hint:
            Function can be moved to geotools/shp_mgmt
        """
        if np.isfinite(alpha):
            try:
                polygon = alphashape.alphashape(self.gdf, alpha)
                polygon.crs = self.crs
                polygon.to_file(shape_polygon)
                print('Polygon *.shp saved successfully.')
            except FileNotFoundError as e:
                print(e)
        else:
            try:
                polygon = alphashape.alphashape(self.gdf)
            except FileNotFoundError as e:
                print(e)
            else:
                polygon.crs = self.crs
                polygon.to_file(shape_polygon)
                print('Polygon *.shp saved successfully.')


[docs]class CategorizationPreProcessor:
    """Structured for ... (Description to be implemented by Bea)

    :param raster (str): path of the raster to be categorized
    """

    def __init__(self, raster):
        self.raster = raster

        with rio.open(self.raster) as src:
            raster_np = src.read(1, masked=True)
            self.nodatavalue = src.nodata  # storing nodatavalue of raster
            self.meta = src.meta.copy()
        self.array = raster_np

[docs]    def nb_classes(self, n_classes):
        """ Generates class bins based on the Natural Breaks method

        :param n_classes: integer, number of classes

        :returns: list of optimized bins
        """
        # Classification based on Natural Breaks
        array_values = self.array[~self.array.mask].ravel()
        breaks = mc.NaturalBreaks(array_values, k=n_classes)
        # bins being (], (], (]....(] always including the right
        print('The upper bound of the classes are:', breaks.bins)
        print('Number of counts for each class, respectively:', breaks.counts)
        print('max: ', array_values.max(), 'min: ', array_values.min())
        return breaks.bins

[docs]    def categorize_raster(self, class_bins, map_out, save_ascii=True):
        """Classifies the raster according to the classification bins

        :param map_out: path of the project directory
        :param class_bins: list of floats
        :param save_ascii: bool

        :returns: saves the classified raster in the chosen directory
        """
        # Classify the original image array (digitize makes nodatavalues take the class 0)
        raster_fi = np.ma.filled(self.array, fill_value=-np.inf)
        # bins[i-1] < array <= bins[i]
        raster_class = np.digitize(raster_fi, class_bins, right=True)

        # Assigns nodatavalues back to array
        raster_ma = np.ma.masked_where(raster_class == 0,
                                       raster_class,
                                       copy=True)

        # Fill nodatavalues into array
        raster_ma_fi = np.ma.filled(raster_ma, fill_value=self.nodatavalue)
        # raster_ma_fi = np.ma.filled(raster_class, fill_value=self.nodatavalue)

        if raster_ma_fi.min() == self.nodatavalue or type(raster_ma_fi) != np.ma.MaskedArray:
            with rio.open(map_out, 'w', **self.meta) as outf:
                outf.write(raster_ma_fi.astype(rio.float64), 1)
        else:
            raise TypeError("Error filling NoDataValue to raster file")

        if save_ascii:
            map_asc = str(Path(map_out[0:-4] + '.asc'))
            gdal.Translate(map_asc, map_out, format='AAIGrid')