# General
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# Astropy
from astropy import units as u
from astropy.coordinates import SkyCoord
#from astropy.units.quantity import Quantity

# Bokeh
import bokeh
from bokeh.io import output_notebook, show, output_file, reset_output
#from bokeh.models import ColumnDataSource, Range1d, HoverTool
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, ColorBar
from bokeh.models import CDSView, GroupFilter
from bokeh.plotting import figure, show, gridplot, output_notebook
from bokeh.models import Range1d, LinearColorMapper, ColorBar
from bokeh.transform import factor_cmap
from bokeh.plotting import show
output_notebook()

# HoloViews
import holoviews as hv
from holoviews import streams, opts
from holoviews.operation.datashader import datashade, dynspread
from holoviews.plotting.util import process_cmap

# PZ Server
from pzserver import PzServer
with open('token.txt', 'r') as file:
    token = file.read()
pz_server = PzServer(token=token, host="pz-dev") # "pz-dev" is the temporary host for test phase  

# Configs
import warnings
warnings.filterwarnings('ignore')
sns.set(color_codes=True, font_scale=1.5) 
sns.set_style('whitegrid')
plt.rcParams.update({'figure.max_open_warning': 0})
hv.extension('bokeh')
%reload_ext autoreload 
%autoreload 2 
%matplotlib inline


print('Python version: ' + sys.version)
print('Numpy version: ' + np.__version__)
print('Bokeh version: ' + bokeh.__version__)
print('HoloViews version: ' + hv.__version__)

Python version: 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:36:13) [GCC 12.3.0]
Numpy version: 1.26.4
Bokeh version: 3.4.1
HoloViews version: 1.18.3


! wget https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt

--2024-07-19 22:29:46--  https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.108.133, 185.199.109.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.108.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9947 (9.7K) [text/plain]
Saving to: ‘des-round19-poly.txt.13’

des-round19-poly.tx 100%[===================>]   9.71K  --.-KB/s    in 0s      

2024-07-19 22:29:46 (37.2 MB/s) - ‘des-round19-poly.txt.13’ saved [9947/9947]


foot_ra, foot_dec = np.loadtxt('des-round19-poly.txt', unpack=True)
foot_coords = SkyCoord(ra=-foot_ra*u.degree, dec=foot_dec*u.degree, frame='icrs')
foot_df = pd.DataFrame({'foot_ra': np.array(foot_coords.ra.wrap_at(180*u.degree)), 
                        'foot_dec': np.array(foot_coords.dec)})


training_set_obj = pz_server.get_product('27_public_training_set_des_dr2')

Connecting to PZ Server...
column_list None
Done!


training_set_obj.display_metadata()


training_set = training_set_obj.data


type(training_set)

pandas.core.frame.DataFrame


assert len(training_set) == 592493


training_set.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 592493 entries, 0 to 592492
Data columns (total 18 columns):
 #   Column            Non-Null Count   Dtype  
---  ------            --------------   -----  
 0   coadd_object_id   592493 non-null  int64  
 1   ra                592493 non-null  float64
 2   dec               592493 non-null  float64
 3   z                 592493 non-null  float64
 4   err_z             592493 non-null  float64
 5   flag_des          592493 non-null  int64  
 6   survey            592493 non-null  object 
 7   flag_survey       592493 non-null  float64
 8   mag_auto_g_dered  592493 non-null  float64
 9   mag_auto_r_dered  592493 non-null  float64
 10  mag_auto_i_dered  592493 non-null  float64
 11  mag_auto_z_dered  592493 non-null  float64
 12  mag_auto_y_dered  592493 non-null  float64
 13  magerr_auto_g     592493 non-null  float64
 14  magerr_auto_r     592493 non-null  float64
 15  magerr_auto_i     592493 non-null  float64
 16  magerr_auto_z     592493 non-null  float64
 17  magerr_auto_y     592493 non-null  float64
dtypes: float64(15), int64(2), object(1)
memory usage: 113.1 MB


training_set.head()


training_set['gmr'] = training_set['mag_auto_g_dered'] - training_set['mag_auto_r_dered']
training_set['rmi'] = training_set['mag_auto_r_dered'] - training_set['mag_auto_i_dered']


training_set.describe()


frac = 0.06
train_sample_for_plots = training_set.sample(frac=frac, axis='index')
assert len(train_sample_for_plots) == round(frac * len(training_set))
print(len(train_sample_for_plots))
train_sample_for_plots = training_set # comment this line to use a fraction of the sample

35550


coords = SkyCoord(ra=-np.array(train_sample_for_plots.ra)*u.degree, 
                  dec=np.array(train_sample_for_plots.dec)*u.degree, frame='icrs')
train_sample_for_plots.ra = np.array(coords.ra.wrap_at(180*u.degree))
train_sample_for_plots.dec = np.array(coords.dec)


%%time
fig = plt.figure(figsize=[14,6])
ax = fig.add_subplot(111, projection='mollweide')   
ra_rad = coords.ra.wrap_at(180 * u.deg).radian
dec_rad = coords.dec.radian
plt.plot(ra_rad, dec_rad, '.', alpha=0.1)
plt.plot(-np.radians(foot_ra), np.radians(foot_dec), '-', color='darkorange')
org=0.0
tick_labels = np.array([150, 120, 90, 60, 30, 0, 330, 300, 270, 240, 210])
tick_labels = np.remainder(tick_labels+360+org,360)
ax.set_xticklabels(tick_labels)     # we add the scale on the x axis
ax.set_xlabel('R.A.')
ax.xaxis.label.set_fontsize(14)
ax.set_ylabel('Dec.')
ax.yaxis.label.set_fontsize(14)
ax.grid(True)
plt.tight_layout()

CPU times: user 208 ms, sys: 14 ms, total: 222 ms
Wall time: 219 ms


redshift = hv.Dimension('z', label='spec-z', range=(0.0, 2.0))
(count, z_bin) = np.histogram(train_sample_for_plots.z, bins='fd')


z_distribution = hv.Histogram((count, z_bin), kdims=redshift).opts(
    title='Distribuição de redshifts', xlabel='spec-z', height=400, width=800)   
z_distribution


training_set.flag_des.value_counts()

flag_des
4    527093
3     65400
Name: count, dtype: int64


def fmt(x):
    return '{:.1f}%'.format(x)
counts = pd.DataFrame(data={'flag_des':[len(training_set.query('flag_des ==3')), 
                                        len(training_set.query('flag_des ==4'))]}, index= [3, 4])
counts.plot.pie(y='flag_des', labels=None, autopct=fmt, colors=['darkorange', 'steelblue']) 
counts


(count4, z_bin4) = np.histogram(train_sample_for_plots.query('flag_des == 4').z, bins='fd')
z_distribution4 = hv.Histogram((count4, z_bin4), kdims=redshift).opts(
    title='flag_des = 4', xlabel='spec-z', height=400, width=400, xlim=(0., 2.))
(count3, z_bin3) = np.histogram(train_sample_for_plots.query('flag_des == 3').z, bins='fd')
z_distribution3 = hv.Histogram((count3, z_bin3), kdims=redshift).opts(
    title='flag_des = 3',  color='darkorange', xlabel='spec-z', height=400, width=400, xlim=(0., 2.))
z_dist_by_flag = z_distribution4.options(height=350, width=450)  +  z_distribution3.options(height=350, width=450)             
z_dist_by_flag


bands = ['g', 'r', 'i', 'z', 'y']


fig = plt.figure(figsize=[12,4])
plt.subplot(1,2,1)
for band in bands:
    plt.hist(train_sample_for_plots.query(f'mag_auto_{band}_dered != 99.')[f'mag_auto_{band}_dered'], 
             bins=30, histtype='step', lw=2, log=True)
plt.xlabel('magnitude')
plt.ylabel('counts')
plt.xlim(12,28)
plt.ylim(10,)
plt.subplot(1,2,2)
for band in bands:
    plt.hist(train_sample_for_plots.query(f'mag_auto_{band}_dered != 99. & magerr_auto_{band} < 1.')[f'magerr_auto_{band}'], 
             bins=30, label=band, histtype='step', lw=2, log=True)
plt.xlabel('magnitude error')
plt.ylabel('counts')
plt.xlim(0,1)
plt.ylim(10,)
plt.legend(loc='upper right')
plt.tight_layout()


plt.figure(figsize=[18,4])
for i, band in enumerate(bands): 
    plt.subplot(int(f'15{str(i+1)}'))
    query = f'mag_auto_{band}_dered != 99. & magerr_auto_{band} < 2.'
    plt.plot(train_sample_for_plots.query(query)[f'mag_auto_{band}_dered'],
             train_sample_for_plots.query(query)[f'magerr_auto_{band}'], 
             '.', alpha=0.3, color='steelblue')
    plt.xlabel(f'mag {band}')
    if i == 0: 
        plt.ylabel('error')
    plt.xlim(16, 28)    
    plt.ylim(0, 2)
    plt.tight_layout()


clean = 'magerr_auto_i < 0.1 & mag_auto_g_dered != 99. & mag_auto_r_dered != 99. & mag_auto_i_dered != 99.'
train_sample_for_plots.query(clean, inplace=True)


mag_vs_z = hv.Scatter(train_sample_for_plots[['z', 'mag_auto_i_dered']]).opts(
        toolbar='above', tools=['hover'], height=400, width=800, alpha=0.5, 
        size=2, xlim=(0,2), ylim=(14,24), xlabel='spec-z', ylabel='mag i')
mag_vs_z


plot_style_bkh = dict(alpha=0.2,# color='steelblue',
                      marker='triangle', size=3,
                      xticks=5, yticks=5,
                      height=400, width=400,
                      toolbar='above')
plot_style = plot_style_bkh


points = train_sample_for_plots


imag = hv.Dimension('mag_auto_i_dered', label='mag i', range=(12, 24))
gmr = hv.Dimension('gmr', label='(g-r)', range=(-0.8, 3.0))
col_mag = hv.Scatter(points, kdims=imag, vdims=gmr).opts(**plot_style)
col_mag = col_mag.hist(dimension=[imag, gmr], num_bins=100, adjoin=True)


rmi = hv.Dimension('rmi', label='(r-i)', range=(-0.8, 2.5))
gmr = hv.Dimension('gmr', label='(g-r)', range=(-0.8, 3.5))
col_col = hv.Scatter(points, kdims=rmi, vdims=gmr).opts(**plot_style)
col_col = col_col.hist(dimension=[rmi, gmr], num_bins=100, adjoin=True)


col_mag + col_col

key	value
id	27
release	None
product_type	Training Set
uploaded_by	gschwend
internal_name	27_public_training_set_des_dr2
product_name	Public Training Set DES DR2
official_product	False
pz_code
description	Result of cross-matching the public spec-z compilation with DES DR2 coadd objects catalog.
created_at	2023-10-17T21:32:21.727199Z
main_file	public_pz_training_set.pq

	coadd_object_id	ra	dec	z	err_z	flag_des	survey	flag_survey	mag_auto_g_dered	mag_auto_r_dered	mag_auto_i_dered	mag_auto_z_dered	mag_auto_y_dered	magerr_auto_g	magerr_auto_r	magerr_auto_i	magerr_auto_z	magerr_auto_y
0	1011353819	352.677720	-41.760673	0.2143	99.0	4	2DF	4.0	18.490623	17.264482	16.840298	16.531605	16.396919	0.004156	0.002004	0.002048	0.002808	0.007311
1	1012577456	352.603841	-41.669178	0.0867	99.0	4	2DF	4.0	18.502615	18.069166	17.864685	17.740002	17.689028	0.002561	0.002389	0.002843	0.004955	0.013531
2	1012581210	352.783951	-41.707007	0.0937	99.0	4	2DF	4.0	19.023310	18.133617	17.750835	17.465248	17.351370	0.003100	0.001907	0.002012	0.003469	0.009423
3	1012565977	352.797599	-41.553470	0.1886	99.0	3	2DF	3.0	18.697731	17.617779	17.249151	16.962301	16.844101	0.004236	0.002269	0.002361	0.003703	0.009919
4	1012559214	352.703943	-41.483863	0.0425	99.0	4	2DF	4.0	18.547184	18.249432	18.208136	18.075872	18.044237	0.004910	0.004967	0.007760	0.014178	0.040168

Column name	Meaning
coadd_object_id	Unique object identifier in the DES DR2 photometric catalog (coadd_objects table).
ra	Right Ascension (degrees)
dec	Declination (degrees)
z	Redshift
err_z	Redshift error. When unavailable, replaced by 99.0
flag_des	Standardized quality marker (details above)
survey	Name of the project or survey of origin.
flag_survey	Original quality flag given by the origin survey.
mag_auto_[g,r,i,z,y]_dered	Apparent magnitude in bands [g, r, i, z, y], corrected for reddening
magerr_auto_[g,r,i,z,y]	Apparent magnitude error in bands [g, r, i, z, y]

	coadd_object_id	ra	dec	z	err_z	flag_des	flag_survey	mag_auto_g_dered	mag_auto_r_dered	mag_auto_i_dered	mag_auto_z_dered	mag_auto_y_dered	magerr_auto_g	magerr_auto_r	magerr_auto_i	magerr_auto_z	magerr_auto_y	gmr	rmi
count	5.924930e+05	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000	592493.000000
mean	1.168246e+09	102.918168	-8.280426	0.506927	88.805911	3.889619	3.890351	21.190736	20.096983	19.560508	19.283521	19.909032	0.210438	0.047986	0.042376	0.086674	1.190375	1.093753	0.536475
std	1.538701e+08	133.019805	14.442487	0.364025	30.116786	0.313364	0.591797	3.754093	2.509359	2.358146	2.555866	8.020347	4.822718	1.531331	1.232518	12.413287	26.793895	2.868310	1.260123
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
50%	1.181015e+09	35.338629	-1.853868	0.505440	99.000000	4.000000	4.000000	21.853054	20.398115	19.660595	19.293842	19.181950	0.034565	0.013383	0.011344	0.015025	0.046342	0.859777	0.493069
75%	1.278415e+09	74.161748	0.509743	0.754656	99.000000	4.000000	4.000000	22.671642	21.872482	21.251059	20.961576	20.922132	0.061131	0.036342	0.033264	0.046926	0.152634	1.441675	0.695646
max	1.700549e+09	359.999898	5.370753	5.810000	704.016000	4.000000	14.000000	99.000000	99.000000	99.000000	99.000000	99.000000	2242.325439	592.804504	416.795624	9464.922852	14221.452148	84.850241	84.985229

Photo-z Training Set¶

Acknowledgments¶

Notes¶

DES DR2¶

Retrieve data¶

Sample characterization¶

Spatial Distribution¶

Quality Flags¶

Characteristics of the photometric sample¶

Magnitude distributions and their respective errors¶

Magnitude errors¶

Magnitude X redshift¶

CMD and color-color plots¶