Collection of public redshift catalogs made available by spectroscopic surveys combined with photometric data from DES DR2.
Contact: Julia Gschwend (julia@linea.org.br)
Latest verified run: 19-Jul-2024
If you use this dataset to generate scientific results, please add a reference to Gschwend et al., 2018 and acknowledge LIneA in the acknowledgments section of your publication. For instance:
'This research used computational resources from the Associação Laboratório Interinstitucional de e-Astronomia (LIneA) with the financial support of INCT do e-Universo (Process no. 465376/2014-2).'
The training set was created based on the spatial correspondence between the objects present in the redshift catalog described above and the object table (coadd_objects) of DES DR2, with a search radius of 1.0 arcsec. The aim was to include the columns of the set photometric measurements that are useful for calculating photo-z (apparent magnitudes and their respective errors).
The characterization of the spectroscopic redshifts catalog is available in a separate notebook. If you have questions, please get in touch with us.
Imports and configs
# General
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys
# Astropy
from astropy import units as u
from astropy.coordinates import SkyCoord
#from astropy.units.quantity import Quantity
# Bokeh
import bokeh
from bokeh.io import output_notebook, show, output_file, reset_output
#from bokeh.models import ColumnDataSource, Range1d, HoverTool
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, ColorBar
from bokeh.models import CDSView, GroupFilter
from bokeh.plotting import figure, show, gridplot, output_notebook
from bokeh.models import Range1d, LinearColorMapper, ColorBar
from bokeh.transform import factor_cmap
from bokeh.plotting import show
output_notebook()
# HoloViews
import holoviews as hv
from holoviews import streams, opts
from holoviews.operation.datashader import datashade, dynspread
from holoviews.plotting.util import process_cmap
# PZ Server
from pzserver import PzServer
with open('token.txt', 'r') as file:
token = file.read()
pz_server = PzServer(token=token, host="pz-dev") # "pz-dev" is the temporary host for test phase
# Configs
import warnings
warnings.filterwarnings('ignore')
sns.set(color_codes=True, font_scale=1.5)
sns.set_style('whitegrid')
plt.rcParams.update({'figure.max_open_warning': 0})
hv.extension('bokeh')
%reload_ext autoreload
%autoreload 2
%matplotlib inline
print('Python version: ' + sys.version)
print('Numpy version: ' + np.__version__)
print('Bokeh version: ' + bokeh.__version__)
print('HoloViews version: ' + hv.__version__)
Python version: 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:36:13) [GCC 12.3.0] Numpy version: 1.26.4 Bokeh version: 3.4.1 HoloViews version: 1.18.3
Auxiliary file: des-round19-poly.txt (contours of the area covered by the survey, i.e., DES footprint, 2019 version)
Download the file from the repository kadrlica/skymap on GitHub:
! wget https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt
--2024-07-19 22:29:46-- https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ... Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected. HTTP request sent, awaiting response... 200 OK Length: 9947 (9.7K) [text/plain] Saving to: ‘des-round19-poly.txt.13’ des-round19-poly.tx 100%[===================>] 9.71K --.-KB/s in 0s 2024-07-19 22:29:46 (37.2 MB/s) - ‘des-round19-poly.txt.13’ saved [9947/9947]
Read DES footprint file des-round19-poly.txt:
foot_ra, foot_dec = np.loadtxt('des-round19-poly.txt', unpack=True)
foot_coords = SkyCoord(ra=-foot_ra*u.degree, dec=foot_dec*u.degree, frame='icrs')
foot_df = pd.DataFrame({'foot_ra': np.array(foot_coords.ra.wrap_at(180*u.degree)),
'foot_dec': np.array(foot_coords.dec)})
Retrieve training set from PZ Server
training_set_obj = pz_server.get_product('27_public_training_set_des_dr2')
Connecting to PZ Server... column_list None Done!
training_set_obj.display_metadata()
| key | value |
|---|---|
| id | 27 |
| release | None |
| product_type | Training Set |
| uploaded_by | gschwend |
| internal_name | 27_public_training_set_des_dr2 |
| product_name | Public Training Set DES DR2 |
| official_product | False |
| pz_code | |
| description | Result of cross-matching the public spec-z compilation with DES DR2 coadd objects catalog. |
| created_at | 2023-10-17T21:32:21.727199Z |
| main_file | public_pz_training_set.pq |
training_set = training_set_obj.data
type(training_set)
pandas.core.frame.DataFrame
assert len(training_set) == 592493
training_set.info(memory_usage="deep")
<class 'pandas.core.frame.DataFrame'> RangeIndex: 592493 entries, 0 to 592492 Data columns (total 18 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 coadd_object_id 592493 non-null int64 1 ra 592493 non-null float64 2 dec 592493 non-null float64 3 z 592493 non-null float64 4 err_z 592493 non-null float64 5 flag_des 592493 non-null int64 6 survey 592493 non-null object 7 flag_survey 592493 non-null float64 8 mag_auto_g_dered 592493 non-null float64 9 mag_auto_r_dered 592493 non-null float64 10 mag_auto_i_dered 592493 non-null float64 11 mag_auto_z_dered 592493 non-null float64 12 mag_auto_y_dered 592493 non-null float64 13 magerr_auto_g 592493 non-null float64 14 magerr_auto_r 592493 non-null float64 15 magerr_auto_i 592493 non-null float64 16 magerr_auto_z 592493 non-null float64 17 magerr_auto_y 592493 non-null float64 dtypes: float64(15), int64(2), object(1) memory usage: 113.1 MB
training_set.head()
| coadd_object_id | ra | dec | z | err_z | flag_des | survey | flag_survey | mag_auto_g_dered | mag_auto_r_dered | mag_auto_i_dered | mag_auto_z_dered | mag_auto_y_dered | magerr_auto_g | magerr_auto_r | magerr_auto_i | magerr_auto_z | magerr_auto_y | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1011353819 | 352.677720 | -41.760673 | 0.2143 | 99.0 | 4 | 2DF | 4.0 | 18.490623 | 17.264482 | 16.840298 | 16.531605 | 16.396919 | 0.004156 | 0.002004 | 0.002048 | 0.002808 | 0.007311 |
| 1 | 1012577456 | 352.603841 | -41.669178 | 0.0867 | 99.0 | 4 | 2DF | 4.0 | 18.502615 | 18.069166 | 17.864685 | 17.740002 | 17.689028 | 0.002561 | 0.002389 | 0.002843 | 0.004955 | 0.013531 |
| 2 | 1012581210 | 352.783951 | -41.707007 | 0.0937 | 99.0 | 4 | 2DF | 4.0 | 19.023310 | 18.133617 | 17.750835 | 17.465248 | 17.351370 | 0.003100 | 0.001907 | 0.002012 | 0.003469 | 0.009423 |
| 3 | 1012565977 | 352.797599 | -41.553470 | 0.1886 | 99.0 | 3 | 2DF | 3.0 | 18.697731 | 17.617779 | 17.249151 | 16.962301 | 16.844101 | 0.004236 | 0.002269 | 0.002361 | 0.003703 | 0.009919 |
| 4 | 1012559214 | 352.703943 | -41.483863 | 0.0425 | 99.0 | 4 | 2DF | 4.0 | 18.547184 | 18.249432 | 18.208136 | 18.075872 | 18.044237 | 0.004910 | 0.004967 | 0.007760 | 0.014178 | 0.040168 |
Meaning of columns:
| Column name | Meaning |
|---|---|
| coadd_object_id | Unique object identifier in the DES DR2 photometric catalog (coadd_objects table). |
| ra | Right Ascension (degrees) |
| dec | Declination (degrees) |
| z | Redshift |
| err_z | Redshift error. When unavailable, replaced by 99.0 |
| flag_des | Standardized quality marker (details above) |
| survey | Name of the project or survey of origin. |
| flag_survey | Original quality flag given by the origin survey. |
| mag_auto_[g,r,i,z,y]_dered | Apparent magnitude in bands [g, r, i, z, y], corrected for reddening |
| magerr_auto_[g,r,i,z,y] | Apparent magnitude error in bands [g, r, i, z, y] |
Compute colors $(g-r)$ e $(r-i)$
training_set['gmr'] = training_set['mag_auto_g_dered'] - training_set['mag_auto_r_dered']
training_set['rmi'] = training_set['mag_auto_r_dered'] - training_set['mag_auto_i_dered']
Basic statistics
training_set.describe()
| coadd_object_id | ra | dec | z | err_z | flag_des | flag_survey | mag_auto_g_dered | mag_auto_r_dered | mag_auto_i_dered | mag_auto_z_dered | mag_auto_y_dered | magerr_auto_g | magerr_auto_r | magerr_auto_i | magerr_auto_z | magerr_auto_y | gmr | rmi | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 5.924930e+05 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 | 592493.000000 |
| mean | 1.168246e+09 | 102.918168 | -8.280426 | 0.506927 | 88.805911 | 3.889619 | 3.890351 | 21.190736 | 20.096983 | 19.560508 | 19.283521 | 19.909032 | 0.210438 | 0.047986 | 0.042376 | 0.086674 | 1.190375 | 1.093753 | 0.536475 |
| std | 1.538701e+08 | 133.019805 | 14.442487 | 0.364025 | 30.116786 | 0.313364 | 0.591797 | 3.754093 | 2.509359 | 2.358146 | 2.555866 | 8.020347 | 4.822718 | 1.531331 | 1.232518 | 12.413287 | 26.793895 | 2.868310 | 1.260123 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 50% | 1.181015e+09 | 35.338629 | -1.853868 | 0.505440 | 99.000000 | 4.000000 | 4.000000 | 21.853054 | 20.398115 | 19.660595 | 19.293842 | 19.181950 | 0.034565 | 0.013383 | 0.011344 | 0.015025 | 0.046342 | 0.859777 | 0.493069 |
| 75% | 1.278415e+09 | 74.161748 | 0.509743 | 0.754656 | 99.000000 | 4.000000 | 4.000000 | 22.671642 | 21.872482 | 21.251059 | 20.961576 | 20.922132 | 0.061131 | 0.036342 | 0.033264 | 0.046926 | 0.152634 | 1.441675 | 0.695646 |
| max | 1.700549e+09 | 359.999898 | 5.370753 | 5.810000 | 704.016000 | 4.000000 | 14.000000 | 99.000000 | 99.000000 | 99.000000 | 99.000000 | 99.000000 | 2242.325439 | 592.804504 | 416.795624 | 9464.922852 | 14221.452148 | 84.850241 | 84.985229 |
8 rows × 19 columns
Check below a brief characterization of the data contained in the compiled collection of spectroscopic catalogs.
frac = 0.06
train_sample_for_plots = training_set.sample(frac=frac, axis='index')
assert len(train_sample_for_plots) == round(frac * len(training_set))
print(len(train_sample_for_plots))
train_sample_for_plots = training_set # comment this line to use a fraction of the sample
35550
coords = SkyCoord(ra=-np.array(train_sample_for_plots.ra)*u.degree,
dec=np.array(train_sample_for_plots.dec)*u.degree, frame='icrs')
train_sample_for_plots.ra = np.array(coords.ra.wrap_at(180*u.degree))
train_sample_for_plots.dec = np.array(coords.dec)
%%time
fig = plt.figure(figsize=[14,6])
ax = fig.add_subplot(111, projection='mollweide')
ra_rad = coords.ra.wrap_at(180 * u.deg).radian
dec_rad = coords.dec.radian
plt.plot(ra_rad, dec_rad, '.', alpha=0.1)
plt.plot(-np.radians(foot_ra), np.radians(foot_dec), '-', color='darkorange')
org=0.0
tick_labels = np.array([150, 120, 90, 60, 30, 0, 330, 300, 270, 240, 210])
tick_labels = np.remainder(tick_labels+360+org,360)
ax.set_xticklabels(tick_labels) # we add the scale on the x axis
ax.set_xlabel('R.A.')
ax.xaxis.label.set_fontsize(14)
ax.set_ylabel('Dec.')
ax.yaxis.label.set_fontsize(14)
ax.grid(True)
plt.tight_layout()
CPU times: user 208 ms, sys: 14 ms, total: 222 ms Wall time: 219 ms
Redshift distribution
redshift = hv.Dimension('z', label='spec-z', range=(0.0, 2.0))
(count, z_bin) = np.histogram(train_sample_for_plots.z, bins='fd')
z_distribution = hv.Histogram((count, z_bin), kdims=redshift).opts(
title='Distribuição de redshifts', xlabel='spec-z', height=400, width=800)
z_distribution
training_set.flag_des.value_counts()
flag_des 4 527093 3 65400 Name: count, dtype: int64
def fmt(x):
return '{:.1f}%'.format(x)
counts = pd.DataFrame(data={'flag_des':[len(training_set.query('flag_des ==3')),
len(training_set.query('flag_des ==4'))]}, index= [3, 4])
counts.plot.pie(y='flag_des', labels=None, autopct=fmt, colors=['darkorange', 'steelblue'])
counts
| flag_des | |
|---|---|
| 3 | 65400 |
| 4 | 527093 |
Redshift distributions depending on the quality flag
(count4, z_bin4) = np.histogram(train_sample_for_plots.query('flag_des == 4').z, bins='fd')
z_distribution4 = hv.Histogram((count4, z_bin4), kdims=redshift).opts(
title='flag_des = 4', xlabel='spec-z', height=400, width=400, xlim=(0., 2.))
(count3, z_bin3) = np.histogram(train_sample_for_plots.query('flag_des == 3').z, bins='fd')
z_distribution3 = hv.Histogram((count3, z_bin3), kdims=redshift).opts(
title='flag_des = 3', color='darkorange', xlabel='spec-z', height=400, width=400, xlim=(0., 2.))
z_dist_by_flag = z_distribution4.options(height=350, width=450) + z_distribution3.options(height=350, width=450)
z_dist_by_flag
bands = ['g', 'r', 'i', 'z', 'y']
fig = plt.figure(figsize=[12,4])
plt.subplot(1,2,1)
for band in bands:
plt.hist(train_sample_for_plots.query(f'mag_auto_{band}_dered != 99.')[f'mag_auto_{band}_dered'],
bins=30, histtype='step', lw=2, log=True)
plt.xlabel('magnitude')
plt.ylabel('counts')
plt.xlim(12,28)
plt.ylim(10,)
plt.subplot(1,2,2)
for band in bands:
plt.hist(train_sample_for_plots.query(f'mag_auto_{band}_dered != 99. & magerr_auto_{band} < 1.')[f'magerr_auto_{band}'],
bins=30, label=band, histtype='step', lw=2, log=True)
plt.xlabel('magnitude error')
plt.ylabel('counts')
plt.xlim(0,1)
plt.ylim(10,)
plt.legend(loc='upper right')
plt.tight_layout()
plt.figure(figsize=[18,4])
for i, band in enumerate(bands):
plt.subplot(int(f'15{str(i+1)}'))
query = f'mag_auto_{band}_dered != 99. & magerr_auto_{band} < 2.'
plt.plot(train_sample_for_plots.query(query)[f'mag_auto_{band}_dered'],
train_sample_for_plots.query(query)[f'magerr_auto_{band}'],
'.', alpha=0.3, color='steelblue')
plt.xlabel(f'mag {band}')
if i == 0:
plt.ylabel('error')
plt.xlim(16, 28)
plt.ylim(0, 2)
plt.tight_layout()
clean = 'magerr_auto_i < 0.1 & mag_auto_g_dered != 99. & mag_auto_r_dered != 99. & mag_auto_i_dered != 99.'
train_sample_for_plots.query(clean, inplace=True)
mag_vs_z = hv.Scatter(train_sample_for_plots[['z', 'mag_auto_i_dered']]).opts(
toolbar='above', tools=['hover'], height=400, width=800, alpha=0.5,
size=2, xlim=(0,2), ylim=(14,24), xlabel='spec-z', ylabel='mag i')
mag_vs_z