<no title> — OceanHackWeek

Exercise 6: Advanced Dashboarding

This exercise is entirely freeform. Get into groups of 3-4 people and start building a dashboard, with everything you have learned in this tutorial. By the end of the exercise you should have a dashboard that:

Uses datashading to render the whole dataset
Builds a pipeline using the .apply method
Uses a widget to filter the data either by cross-filtering with a linked stream (e.g. a BoundsXY stream) or using a widget (e.g. a RangeSlider)
Uses a widget to control some aspect of the styling of the plot (e.g. to select a colormap, color, or size)
Is servable by running panel serve Advanced_Dashboarding.ipynb in the exercise directory

import colorcet as cc  # noqa
import holoviews as hv  # noqa
import numpy as np  # noqa
import dask.dataframe as dd
import panel as pn
import xarray as xr

import hvplot.pandas  # noqa: API import
import hvplot.xarray  # noqa: API import

pn.extension()

# Run this if you haven't already to fetch earthquake and population data files
from fetch_data import *

get_earthquake_data()
get_population_data()

Earthquakes dataset present, skipping download

---------------------------------------------------------------------------
PermissionError                           Traceback (most recent call last)
Input In [3], in <cell line: 5>()
      2 from fetch_data import *
      4 get_earthquake_data()
----> 5 get_population_data()

File ~\Documents\GitHub\ohw-tutorials\data-viz\tutorial\exercises\fetch_data.py:65, in get_population_data()
     63 download_url(f'https://earth-data.s3.amazonaws.com/{fname}', fname)
     64 with zipfile.ZipFile(fname, 'r') as zip_ref:
---> 65     zip_ref.extractall(data_path)
     66 os.remove(fname)

File ~\anaconda3\envs\OHW\lib\zipfile.py:1642, in ZipFile.extractall(self, path, members, pwd)
   1639     path = os.fspath(path)
   1641 for zipinfo in members:
-> 1642     self._extract_member(zipinfo, path, pwd)

File ~\anaconda3\envs\OHW\lib\zipfile.py:1697, in ZipFile._extract_member(self, member, targetpath, pwd)
   1693     return targetpath
   1695 with self.open(member, pwd=pwd) as source, \
   1696      open(targetpath, "wb") as target:
-> 1697     shutil.copyfileobj(source, target)
   1699 return targetpath

File ~\anaconda3\envs\OHW\lib\shutil.py:208, in copyfileobj(fsrc, fdst, length)
    206 if not buf:
    207     break
--> 208 fdst_write(buf)

PermissionError: [Errno 13] Permission denied

As a starting point we will load the data; everything else is up to you:

df = (
    dd.read_parquet("../../data/earthquakes.parq", index='index')
    .repartition(npartitions=4)
    .persist()
)

ds = xr.open_dataarray(
    "../../data/gpw_v4_population_density_rev11_2010_2pt5_min.nc", 
)
cleaned_ds = ds.where(ds.values != ds.nodatavals).sel(band=1)
cleaned_ds.name = "population"

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Input In [4], in <cell line: 7>()
df = (
   dd.read_parquet("../../data/earthquakes.parq", index='index')
   .repartition(npartitions=4)
   .persist()
)
----> 7 ds = xr.open_dataarray(
   "../../data/gpw_v4_population_density_rev11_2010_2pt5_min.nc"
)
cleaned_ds = ds.where(ds.values != ds.nodatavals).sel(band=1)
cleaned_ds.name = "population"

File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\api.py:692, in open_dataarray(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
def open_dataarray(
   filename_or_obj: str | os.PathLike,
   *,
   (...)
   **kwargs,
) -> DataArray:
   """Open an DataArray from a file or file-like object containing a single
   data variable.

   (...)
   open_dataset
   """
--> 692     dataset = open_dataset(
       filename_or_obj,
       decode_cf=decode_cf,
       mask_and_scale=mask_and_scale,
       decode_times=decode_times,
       concat_characters=concat_characters,
       decode_coords=decode_coords,
       engine=engine,
       chunks=chunks,
       cache=cache,
       drop_variables=drop_variables,
       inline_array=inline_array,
       backend_kwargs=backend_kwargs,
       use_cftime=use_cftime,
       decode_timedelta=decode_timedelta,
       **kwargs,
   )
   if len(dataset.data_vars) != 1:
       raise ValueError(
           "Given file dataset contains more than one data "
           "variable. Please read with xarray.open_dataset and "
           "then select the variable you want."
       )

File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\api.py:515, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
   kwargs.update(backend_kwargs)
if engine is None:
--> 515     engine = plugins.guess_engine(filename_or_obj)
backend = plugins.get_backend(engine)
decoders = _resolve_decoders_kwargs(
   decode_cf,
   open_backend_dataset_parameters=backend.open_dataset_parameters,
   (...)
   decode_coords=decode_coords,
)

File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\plugins.py:155, in guess_engine(store_spec)
else:
   error_msg = (
       "found the following matches with the input file in xarray's IO "
       f"backends: {compatible_engines}. But their dependencies may not be installed, see:\n"
       "https://docs.xarray.dev/en/stable/user-guide/io.html \n"
       "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
   )
--> 155 raise ValueError(error_msg)

ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'h5netcdf', 'scipy', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html

You don’t really know what to build? Here are some ideas:

Build a dashboard with a pipeline that filters the data on one or more of the columns (e.g. magnitude using a RangeSlider or time using a DateRangeSlider) and then datashades it
Build a dashboard with multiple views of the data (e.g. longitude vs. latitude, magnitude vs. depth etc.) and cross-filters the data using BoundsXY streams (see the Glaciers notebook for reference)
Build a dashboard that allows you to select multiple earthquakes using a ‘box_select’ tool and a Selection1D stream and compute statistics on them.