# General
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import sys

# Astropy
from astropy import units as u
from astropy.coordinates import SkyCoord

# Bokeh
import bokeh
from bokeh.io import output_notebook, show, output_file, reset_output
from bokeh.models import ColumnDataSource, HoverTool, LinearColorMapper, ColorBar
from bokeh.models import CDSView, GroupFilter
from bokeh.plotting import figure, show, gridplot, output_notebook

# HoloViews
import holoviews as hv

# PZ Server
from pzserver import PzServer
with open('token.txt', 'r') as file:
    token = file.read()
pz_server = PzServer(token=token, host="pz-dev") # "pz-dev" is the temporary host for test phase  

# Configs
import warnings
warnings.filterwarnings('ignore')
sns.set(color_codes=True, font_scale=1.5) 
sns.set_style('whitegrid')
plt.rcParams.update({'figure.max_open_warning': 0})
hv.extension('bokeh')
output_notebook()
%reload_ext autoreload 
%autoreload 2 
%matplotlib inline


print('Python version: ' + sys.version)
print('Numpy version: ' + np.__version__)
print('Bokeh version: ' + bokeh.__version__)
print('HoloViews version: ' + hv.__version__)

Python version: 3.11.9 | packaged by conda-forge | (main, Apr 19 2024, 18:36:13) [GCC 12.3.0]
Numpy version: 1.26.4
Bokeh version: 3.4.1
HoloViews version: 1.18.3


! wget https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt

--2024-07-19 22:30:12--  https://raw.githubusercontent.com/kadrlica/skymap/master/skymap/data/des-round19-poly.txt
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.109.133, 185.199.108.133, 185.199.111.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.109.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 9947 (9.7K) [text/plain]
Saving to: ‘des-round19-poly.txt.14’

des-round19-poly.tx 100%[===================>]   9.71K  --.-KB/s    in 0s      

2024-07-19 22:30:12 (52.5 MB/s) - ‘des-round19-poly.txt.14’ saved [9947/9947]


foot_ra, foot_dec = np.loadtxt('des-round19-poly.txt', unpack=True)
foot_coords = SkyCoord(ra=-foot_ra*u.degree, dec=foot_dec*u.degree, frame='icrs')
foot_df = pd.DataFrame({'foot_ra': np.array(foot_coords.ra.wrap_at(180*u.degree)), 
                        'foot_dec': np.array(foot_coords.dec)})


specz_catalog_obj = pz_server.get_product('26_public_specz_compilation')

Connecting to PZ Server...
column_list None
Done!


specz_catalog_obj.display_metadata()


specz_catalog = specz_catalog_obj.data


type(specz_catalog)

pandas.core.frame.DataFrame


assert len(specz_catalog) == 3661690


specz_catalog.info(memory_usage="deep")

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3661690 entries, 0 to 3661689
Data columns (total 8 columns):
 #   Column       Dtype  
---  ------       -----  
 0   ra           float64
 1   dec          float64
 2   z            float64
 3   err_z        float64
 4   flag_des     int64  
 5   survey       object 
 6   flag_survey  float64
 7   id_spec      int64  
dtypes: float64(5), int64(2), object(1)
memory usage: 422.3 MB


specz_catalog.head()


specz_catalog.describe()


frac = 0.02
spec_sample_for_plots = specz_catalog.sample(frac=frac, axis='index')
assert len(spec_sample_for_plots) == round(frac * len(specz_catalog))
print(len(spec_sample_for_plots))
#spec_sample_for_plots = specz_catalog # comment this line to use a fraction of the data

73234


coords = SkyCoord(ra=-np.array(spec_sample_for_plots.ra)*u.degree, 
                  dec=np.array(spec_sample_for_plots.dec)*u.degree, frame='icrs')
spec_sample_for_plots.ra = np.array(coords.ra.wrap_at(180*u.degree))
spec_sample_for_plots.dec = np.array(coords.dec)


%%time
fig = plt.figure(figsize=[14,6])
ax = fig.add_subplot(111, projection='mollweide')   
ra_rad = coords.ra.wrap_at(180 * u.deg).radian
dec_rad = coords.dec.radian
plt.plot(ra_rad, dec_rad, '.', alpha=0.3)
plt.plot(-np.radians(foot_ra), np.radians(foot_dec), '-', color='darkorange')
org=0.0
tick_labels = np.array([150, 120, 90, 60, 30, 0, 330, 300, 270, 240, 210])
tick_labels = np.remainder(tick_labels+360+org,360)
ax.set_xticklabels(tick_labels)     # we add the scale on the x axis
ax.set_xlabel('R.A.')
ax.xaxis.label.set_fontsize(14)
ax.set_ylabel('Dec.')
ax.yaxis.label.set_fontsize(14)
ax.grid(True)
plt.tight_layout()

CPU times: user 162 ms, sys: 2.97 ms, total: 165 ms
Wall time: 163 ms


redshift = hv.Dimension('z', label='spec-z')#, range=(0.0, 2.0))
(count, z_bin) = np.histogram(spec_sample_for_plots.z, bins='fd')


z_distribution = hv.Histogram((count, z_bin), kdims=redshift).opts(
    title='Distribuição de redshifts', xlabel='spec-z', height=400, width=800,  xlim=(0.,2.))   
z_distribution


specz_catalog.flag_des.value_counts()

flag_des
4    3358323
3     303367
Name: count, dtype: int64


def fmt(x):
    return '{:.1f}%'.format(x)
counts = pd.DataFrame(data={'flag_des':[len(specz_catalog.query('flag_des ==3')), 
                                        len(specz_catalog.query('flag_des ==4'))]}, index= [3, 4])
counts.plot.pie(y='flag_des', labels=None, autopct=fmt, colors=['darkorange', 'steelblue']) 
counts


(count4, z_bin4) = np.histogram(spec_sample_for_plots.query('flag_des == 4').z, bins='fd')
z_distribution4 = hv.Histogram((count4, z_bin4), kdims=redshift).opts(
    title='flag_des = 4', xlabel='spec-z', height=400, width=400, xlim=(0.,2.5))
(count3, z_bin3) = np.histogram(spec_sample_for_plots.query('flag_des == 3').z, bins='fd')
z_distribution3 = hv.Histogram((count3, z_bin3), kdims=redshift).opts(
    title='flag_des = 3',  color='darkorange', xlabel='spec-z', height=400, width=400,  xlim=(0.,2.5))
z_dist_by_flag = z_distribution4.options(height=350, width=450)  +  z_distribution3.options(height=350, width=450)    
z_dist_by_flag


counts_table = specz_catalog.survey.value_counts().sort_values(ascending=False).reset_index(name='count')
counts_table


coords_all = SkyCoord(ra=-np.array(specz_catalog.ra)*u.degree, 
                  dec=np.array(specz_catalog.dec)*u.degree, frame='icrs')
specz_catalog.ra = np.array(coords_all.ra.wrap_at(180*u.degree))
specz_catalog.dec = np.array(coords_all.dec)


for index, row in counts_table.iterrows():
    survey = row['survey']
    query = f'survey == "{survey}" '
    data = specz_catalog.query(query)
    plt.figure(figsize=[15,5])
    survey = row['survey']
    query = f'survey == "{survey}" '
    plt.subplot(121)
    plt.plot(data.ra, data.dec, '.')
    plt.plot(foot_df.foot_ra, foot_df.foot_dec, '-', color='darkorange')
    plt.xlabel('R.A. (deg)')
    plt.ylabel('Dec. (deg)')
    plt.xlim(-180, 180)
    plt.subplot(122)    
    sns.histplot(data.z, bins=50, stat='count', label=f'{row["survey"]}: {row["count"]} objects')
    plt.xlabel('spec-$z$')#, fontsize=13)
    plt.xlim(0,)
    plt.legend()
    plt.tight_layout()

seq.	Survey name (link to the website)	Number of redshifts in the original sample	Reference (link to the paper)
1	2dF	245,591	Colless et al. 2001
2	2dFLenS	70,079	Blake et al. 2016
3	3DHST	207,967	Momcheva et al. 2016
4	6dF (DR3)	109,831	Jones et al. 2009
5	ACES	13,963	Cooper et al. 2012
6	ATLAS (DR2)	1,074	Mao et al. 2012
7	C3R2 (DR2)	4,525	Masters et al. 2019
8	CDB	541	Sullivan et al. 2011
9	CLASH-VLT	10,183	Biviano et al. 2013 Annunziatella et al. 2016 Balestra et al. 2016 Grillo et al. 2016 Caminha et al. 2017 Karman et al. 2017 Monna et al. 2017
10	DEEP2 (DR4)	50,319	Newman et al. 2013
11	DEIMOS 10K	10,770	Hasinger et al. 2018
12	FMOS-COSMOS	1,153	Silverman et al. 2015
13	GAMA (DR3)	166,332	Baldry et al. 2018
14	GLASS (DR2)	3,289	Abramson et al. 2020
15	MOSFIRE	267	McLean et al. 2012
16	MUSE	1,602	Urrutia et al. 2019
17	SAGA	68,644	Geha et al. 2017
18	SDSS (DR16)	4,613,773	Ahumada et al. 2020
19	SpARCS	410	Muzzin et al. 2012
20	SPT-GMOS	2,243	Bayliss et al. 2016
21	UDS	1,511	Galametz wt al. 2013
22	VANDELS	1,362	Pentericci et al. 2018
23	VIPERS	91,507	Garilli et al. 2014
24	VUDS	698	Tasca et al. 2017
25	VVDS	40,927	Le Fèvre et al. 2004 Garilli et al. 2008
26	WiggleZ	81,362	Parkinson et al. 2012
27	zCOSMOS	20,689	Lilly et al. 2009 Knobel et al. 2012 Lilly 2016 (DR description)
28	ZFIRE	216	Nanayakkara et al. 2016

flag_des	Meaning
1	redshift unknown
2	unreliable guess
3	95% confidence
4	99% confidence

Survey	flag_des = 1	flag_des = 2	flag_des = 3	flag_des = 4
2DF	1	2	3	4,5
2dFLenS	1	2	3	4,5
3DHST	-1,1	2	0	-
6DF	1	2	3	4,6
ACES	-2,0,1	2	3	4,-1
ATLAS	-	4	-	-
C3R2	-	-	3,3.5	4
CDB	-	4	-	-
CLASH-VLT	-	2	4,5,6,9	3
DEEP2	-2,0,1	2	3	4,-1
DEIMOS_10K	0.0	1.0	1.5	2.0
FMOS_COSMOS	0,1	2	3	4
GAMA	<0.0,0.68>	<0.68,0.95>	<0.95,0.99>	<0.99,1.0>
GLASS	0,0.5,1,1.5	2,2.5	3 = 3,3.5	4
MOSFIRE	1	-	3	-
MUSE	-	1	2	3
SAGA	-	-	3	4
SDSS_DR16	-	-	-	0
SPARCS	4	3	2	1
SPT_GMOS	0,1	2	3	4
UDS	-	-	3,B,B*	4,A
VANDELS	1	2,9	3	4
VIPERS	0,1,11,211	2,9,12,19,212,213	3 = 3,13	4,14,24
VUDS	-	-	3,B,B*	4,A
VVDS	0,1	2,9	3	4
WIGGLEZ	1	2	3	4,5
ZCOSMOS	0,1,11,20,21,211	2,12,22	9,19,29,18	3,4,13,14,23,24
ZFIRE	0,1	2	3	4

key	value
id	26
release	None
product_type	Spec-z Catalog
uploaded_by	gschwend
internal_name	26_public_specz_compilation
product_name	Public spec-z compilation
official_product	False
pz_code
description	A compilation of public spec-z catalogs collected over the years of operation of the Dark Energy Survey (DES) and systematically grouped by a DES Science Portal tool to form the basis of a training set for photo-z algorithms based on machine learning.
created_at	2023-10-17T21:29:08.341090Z
main_file	public_specz_compilation.pq

	ra	dec	z	err_z	flag_des	survey	flag_survey	id_spec
0	344.762375	-30.936083	0.1141	99.0	4	2DF	4.0	9999
1	353.433375	-18.816306	0.0950	99.0	4	2DF	5.0	9999
2	348.495625	-34.821056	0.2024	99.0	4	2DF	4.0	9999
3	348.505542	-34.805139	0.1424	99.0	4	2DF	4.0	9999
4	348.339167	-34.702556	0.1426	99.0	4	2DF	4.0	9999

Spectroscopic Redshifts Compilation¶

Acknowledgments¶

Notes¶

Spectroscopic surveys¶

Unified flags system¶

In case of multiple measurements¶

Retrieve Data¶

Sample characterization¶

Spatial distribution¶

Quality Flags¶

Characterization of subsamples (by survey)¶

Column name	Meaning
ra	Right Ascension (degrees)
dec	Declination (degrees)
z	redshift
err_z	Redshift error. When unavailable, replaced by 99.0
flag_des	Standardized quality marker (details above)
survey	Name of the project or survey of origin.
flag_survey	Original quality flag given by the origin survey.
id_spec	Original unique identifier given by the survey. When unavailable, replaced by 9999.

	ra	dec	z	err_z	flag_des	flag_survey	id_spec
count	3.661690e+06	3.661690e+06	3.661690e+06	3.661690e+06	3.661690e+06	3.661690e+06	3661690.0
mean	1.700340e+02	1.592614e+01	4.642166e-01	9.451649e+01	3.917151e+00	3.844424e+00	9999.0
std	9.680731e+01	2.456493e+01	4.547598e-01	2.073263e+01	2.756537e-01	7.167758e-01	0.0
...	...	...	...	...	...	...	...
50%	1.735271e+02	1.455978e+01	4.316670e-01	9.900000e+01	4.000000e+00	4.000000e+00	9999.0
75%	2.223870e+02	3.359415e+01	6.102090e-01	9.900000e+01	4.000000e+00	4.000000e+00	9999.0
max	3.599998e+02	8.427039e+01	7.010940e+00	4.845040e+03	4.000000e+00	2.900000e+01	9999.0

	survey	count
0	SDSS_DR16	2765946
1	2DF	188825
2	3DHST	178968
...	...	...
25	ZFIRE	191
26	MOSFIRE	134
27	VUDS	127