Exercise 6: Advanced Dashboarding
This exercise is entirely freeform. Get into groups of 3-4 people and start building a dashboard, with everything you have learned in this tutorial. By the end of the exercise you should have a dashboard that:
Uses datashading to render the whole dataset
Builds a pipeline using the
.apply
methodUses a widget to filter the data either by cross-filtering with a linked stream (e.g. a BoundsXY stream) or using a widget (e.g. a RangeSlider)
Uses a widget to control some aspect of the styling of the plot (e.g. to select a colormap, color, or size)
Is servable by running
panel serve Advanced_Dashboarding.ipynb
in the exercise directory
import colorcet as cc # noqa
import holoviews as hv # noqa
import numpy as np # noqa
import dask.dataframe as dd
import panel as pn
import xarray as xr
import hvplot.pandas # noqa: API import
import hvplot.xarray # noqa: API import
pn.extension()
# Run this if you haven't already to fetch earthquake and population data files
from fetch_data import *
get_earthquake_data()
get_population_data()
Earthquakes dataset present, skipping download
---------------------------------------------------------------------------
PermissionError Traceback (most recent call last)
Input In [3], in <cell line: 5>()
2 from fetch_data import *
4 get_earthquake_data()
----> 5 get_population_data()
File ~\Documents\GitHub\ohw-tutorials\data-viz\tutorial\exercises\fetch_data.py:65, in get_population_data()
63 download_url(f'https://earth-data.s3.amazonaws.com/{fname}', fname)
64 with zipfile.ZipFile(fname, 'r') as zip_ref:
---> 65 zip_ref.extractall(data_path)
66 os.remove(fname)
File ~\anaconda3\envs\OHW\lib\zipfile.py:1642, in ZipFile.extractall(self, path, members, pwd)
1639 path = os.fspath(path)
1641 for zipinfo in members:
-> 1642 self._extract_member(zipinfo, path, pwd)
File ~\anaconda3\envs\OHW\lib\zipfile.py:1697, in ZipFile._extract_member(self, member, targetpath, pwd)
1693 return targetpath
1695 with self.open(member, pwd=pwd) as source, \
1696 open(targetpath, "wb") as target:
-> 1697 shutil.copyfileobj(source, target)
1699 return targetpath
File ~\anaconda3\envs\OHW\lib\shutil.py:208, in copyfileobj(fsrc, fdst, length)
206 if not buf:
207 break
--> 208 fdst_write(buf)
PermissionError: [Errno 13] Permission denied
As a starting point we will load the data; everything else is up to you:
df = (
dd.read_parquet("../../data/earthquakes.parq", index='index')
.repartition(npartitions=4)
.persist()
)
ds = xr.open_dataarray(
"../../data/gpw_v4_population_density_rev11_2010_2pt5_min.nc",
)
cleaned_ds = ds.where(ds.values != ds.nodatavals).sel(band=1)
cleaned_ds.name = "population"
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Input In [4], in <cell line: 7>()
1 df = (
2 dd.read_parquet("../../data/earthquakes.parq", index='index')
3 .repartition(npartitions=4)
4 .persist()
5 )
----> 7 ds = xr.open_dataarray(
8 "../../data/gpw_v4_population_density_rev11_2010_2pt5_min.nc"
9 )
10 cleaned_ds = ds.where(ds.values != ds.nodatavals).sel(band=1)
11 cleaned_ds.name = "population"
File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\api.py:692, in open_dataarray(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
552 def open_dataarray(
553 filename_or_obj: str | os.PathLike,
554 *,
(...)
568 **kwargs,
569 ) -> DataArray:
570 """Open an DataArray from a file or file-like object containing a single
571 data variable.
572
(...)
689 open_dataset
690 """
--> 692 dataset = open_dataset(
693 filename_or_obj,
694 decode_cf=decode_cf,
695 mask_and_scale=mask_and_scale,
696 decode_times=decode_times,
697 concat_characters=concat_characters,
698 decode_coords=decode_coords,
699 engine=engine,
700 chunks=chunks,
701 cache=cache,
702 drop_variables=drop_variables,
703 inline_array=inline_array,
704 backend_kwargs=backend_kwargs,
705 use_cftime=use_cftime,
706 decode_timedelta=decode_timedelta,
707 **kwargs,
708 )
710 if len(dataset.data_vars) != 1:
711 raise ValueError(
712 "Given file dataset contains more than one data "
713 "variable. Please read with xarray.open_dataset and "
714 "then select the variable you want."
715 )
File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\api.py:515, in open_dataset(filename_or_obj, engine, chunks, cache, decode_cf, mask_and_scale, decode_times, decode_timedelta, use_cftime, concat_characters, decode_coords, drop_variables, inline_array, backend_kwargs, **kwargs)
512 kwargs.update(backend_kwargs)
514 if engine is None:
--> 515 engine = plugins.guess_engine(filename_or_obj)
517 backend = plugins.get_backend(engine)
519 decoders = _resolve_decoders_kwargs(
520 decode_cf,
521 open_backend_dataset_parameters=backend.open_dataset_parameters,
(...)
527 decode_coords=decode_coords,
528 )
File ~\anaconda3\envs\OHW\lib\site-packages\xarray\backends\plugins.py:155, in guess_engine(store_spec)
147 else:
148 error_msg = (
149 "found the following matches with the input file in xarray's IO "
150 f"backends: {compatible_engines}. But their dependencies may not be installed, see:\n"
151 "https://docs.xarray.dev/en/stable/user-guide/io.html \n"
152 "https://docs.xarray.dev/en/stable/getting-started-guide/installing.html"
153 )
--> 155 raise ValueError(error_msg)
ValueError: did not find a match in any of xarray's currently installed IO backends ['netcdf4', 'h5netcdf', 'scipy', 'zarr']. Consider explicitly selecting one of the installed engines via the ``engine`` parameter, or installing additional IO dependencies, see:
https://docs.xarray.dev/en/stable/getting-started-guide/installing.html
https://docs.xarray.dev/en/stable/user-guide/io.html
You don’t really know what to build? Here are some ideas:
Build a dashboard with a pipeline that filters the data on one or more of the columns (e.g. magnitude using a
RangeSlider
or time using aDateRangeSlider
) and then datashades itBuild a dashboard with multiple views of the data (e.g. longitude vs. latitude, magnitude vs. depth etc.) and cross-filters the data using
BoundsXY
streams (see the Glaciers notebook for reference)Build a dashboard that allows you to select multiple earthquakes using a ‘box_select’ tool and a
Selection1D
stream and compute statistics on them.