Python CDO

Python CDO (Climate Data Operations)

1. Documents and Tips on CDO

2. How to install pycdo

$conda install -c ioos pycdo
In [1]:
from cdo import *

#--Initialize CDO
cdo=Cdo()

#print cdo.operators # query operators

3. If the avove initialization does not work, you need to edit the function “getOperators” in cdo.py

  • Modify the function “getOperators” as follows.
    $vi (YOUR_ANACONDA_DIR)/lib/python2.7/site-packages/cdo.py
    def getOperators(self):
      import os
      proc = subprocess.Popen([self.CDO,'--operators'],stderr = subprocess.PIPE,stdout = subprocess.PIPE)
      ret  = proc.communicate()
      ops  = ret[1].decode("utf-8")[0:-1].split(os.linesep)[1:-1]
      opes=[]
      for text in ret[0].decode("utf-8")[0:-1].split("\n"):
         s    = re.sub("\s+" , " ", text)
         opes.append(s.split(" ")[0])
      return opes

4. Basic syntax

cdo.OPERATOR("PARAETERS", input="FILENAME.nc", output="FILENAME.nc")

Parameters and output are not always necessary depending on operators.

In [11]:
#--input file (Monthly HadISST data 1870.01-2016.02)
infile="/work/hnm/dataset/HadISST1_1/netcdf_data_temp.20160412/HadISST1_SST_187001-201602.nc"
#infile="HadISST1_SST_187001-201602.nc"

# sinfo : Display file info
cdo.sinfo(input=infile)
Out[11]:
[u'File format : NetCDF',
 u'-1 : Institut Source   Steptype Levels Num    Points Num Dtype : Parameter ID',
 u'1 : unknown  unknown  instant       1   1     64800   1  F32  : -1',
 u'Grid coordinates :',
 u'1 : lonlat                   : points=64800 (360x180)',
 u'lon : 0.5 to 359.5 by 1 degrees_east  circular',
 u'lat : -89.5 to 89.5 by 1 degrees_north',
 u'Vertical coordinates :',
 u'1 : pressure                 : levels=1',
 u'st_ocean : 0 millibar',
 u'Time coordinate :  1754 steps',
 u'RefTime =  1870-01-16 11:59:00  Units = minutes  Calendar = standard',
 u'YYYY-MM-DD hh:mm:ss  YYYY-MM-DD hh:mm:ss  YYYY-MM-DD hh:mm:ss  YYYY-MM-DD hh:mm:ss',
 u'1870-01-16 11:59:00  1870-02-16 11:59:00  1870-03-16 11:59:00  1870-04-16 11:59:00',
 u'1870-05-16 11:59:00  1870-06-16 11:59:00  1870-07-16 11:59:00  1870-08-16 11:59:00',
 u'1870-09-16 11:59:00  1870-10-16 11:59:00  1870-11-16 11:59:00  1870-12-16 11:59:00',
 u'1871-01-16 11:59:00  1871-02-16 11:59:00  1871-03-16 11:59:00  1871-04-16 11:59:00',
 u'1871-05-16 11:59:00  1871-06-16 11:59:00  1871-07-16 11:59:00  1871-08-16 11:59:00',
 u'1871-09-16 11:59:00  1871-10-16 11:59:00  1871-11-16 11:59:00  1871-12-16 11:59:00',
 u'1872-01-16 11:59:00  1872-02-16 11:59:00  1872-03-16 11:59:00  1872-04-16 11:59:00',
 u'1872-05-16 11:59:00  1872-06-16 11:59:00  1872-07-16 11:59:00  1872-08-16 11:59:00',
 u'1872-09-16 11:59:00  1872-10-16 11:59:00  1872-11-16 11:59:00  1872-12-16 11:59:00',
 u'1873-01-16 11:59:00  1873-02-16 11:59:00  1873-03-16 11:59:00  1873-04-16 11:59:00',
 u'1873-05-16 11:59:00  1873-06-16 11:59:00  1873-07-16 11:59:00  1873-08-16 11:59:00',
 u'1873-09-16 11:59:00  1873-10-16 11:59:00  1873-11-16 11:59:00  1873-12-16 11:59:00',
 u'1874-01-16 11:59:00  1874-02-16 11:59:00  1874-03-16 11:59:00  1874-04-16 11:59:00',
 u'1874-05-16 11:59:00  1874-06-16 11:59:00  1874-07-16 11:59:00  1874-08-16 11:59:00',
 u'1874-09-16 11:59:00  1874-10-16 11:59:00  1874-11-16 11:59:00  1874-12-16 11:59:00',
 u'................................................................................',
 u'................................................................................',
 u'........',
 u'2011-05-17 11:59:00  2011-06-17 11:59:00  2011-07-17 11:59:00  2011-08-17 11:59:00',
 u'2011-09-17 11:59:00  2011-10-17 11:59:00  2011-11-17 11:59:00  2011-12-17 11:59:00',
 u'2012-01-17 11:59:00  2012-02-17 11:59:00  2012-03-17 11:59:00  2012-04-17 11:59:00',
 u'2012-05-17 11:59:00  2012-06-17 11:59:00  2012-07-17 11:59:00  2012-08-17 11:59:00',
 u'2012-09-17 11:59:00  2012-10-17 11:59:00  2012-11-17 11:59:00  2012-12-17 11:59:00',
 u'2013-01-17 11:59:00  2013-02-17 11:59:00  2013-03-17 11:59:00  2013-04-17 11:59:00',
 u'2013-05-17 11:59:00  2013-06-17 11:59:00  2013-07-17 11:59:00  2013-08-17 11:59:00',
 u'2013-09-17 11:59:00  2013-10-17 11:59:00  2013-11-17 11:59:00  2013-12-17 11:59:00',
 u'2014-01-17 11:59:00  2014-02-17 11:59:00  2014-03-17 11:59:00  2014-04-17 11:59:00',
 u'2014-05-17 11:59:00  2014-06-17 11:59:00  2014-07-17 11:59:00  2014-08-17 11:59:00',
 u'2014-09-17 11:59:00  2014-10-17 11:59:00  2014-11-17 11:59:00  2014-12-17 11:59:00',
 u'2015-01-17 11:59:00  2015-02-17 11:59:00  2015-03-17 11:59:00  2015-04-17 11:59:00',
 u'2015-05-17 11:59:00  2015-06-17 11:59:00  2015-07-17 11:59:00  2015-08-17 11:59:00',
 u'2015-09-17 11:59:00  2015-10-17 11:59:00  2015-11-17 11:59:00  2015-12-17 11:59:00',
 u'2016-01-17 11:59:00  2016-02-17 11:59:00']
In [12]:
#--step 1 pick up the data for 1980-2015 from infile
# seldate : give range of dates
cdo.seldate('1980-01-01,2015-12-31', input=infile, output="output1.nc")

# selyear : give range of years (same )
cdo.selyear('1980/2015', input=infile, output="output1.nc")

# showyear : display years in the file
cdo.showyear(input="output1.nc")
Out[12]:
[u'1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015']
In [13]:
#--step 2: extract domain of interest
# sellonlatbox
cdo.sellonlatbox('100,280,-50,50', input="output1.nc", output="output2.nc")

cdo.griddes(input="output2.nc") # show grid info
Out[13]:
[u'#',
 u'# gridID 1',
 u'#',
 u'gridtype  = lonlat',
 u'gridsize  = 18000',
 u'xname     = lon',
 u'xlongname = longitude',
 u'xunits    = degrees_east',
 u'yname     = lat',
 u'ylongname = latitude',
 u'yunits    = degrees_north',
 u'xsize     = 180',
 u'ysize     = 100',
 u'xfirst    = 100.5',
 u'xinc      = 1',
 u'yfirst    = -49.5',
 u'yinc      = 1']
In [14]:
#--step 3 make multiple-year monthly average
# ymonavg input output (output: (12,jmax,imax))
cdo.ymonavg(input="output2.nc", output="cmean.nc")

cdo.info(input="cmean.nc") #show data info
Out[14]:
[u'-1 :       Date     Time   Level Gridsize    Miss :     Minimum        Mean     Maximum : Parameter ID',
 u'1 : 2015-01-17 11:59:00       0    18000    2599 :    -0.34528      21.783      30.324 : -1',
 u'2 : 2015-02-17 11:59:00       0    18000    2600 :     -1.3959      21.869      30.138 : -1',
 u'3 : 2015-03-17 11:59:00       0    18000    2599 :     -1.2622      21.920      30.303 : -1',
 u'4 : 2015-04-17 11:59:00       0    18000    2599 :     0.14799      21.940      30.060 : -1',
 u'5 : 2015-05-17 11:59:00       0    18000    2599 :      1.5601      21.939      30.419 : -1',
 u'6 : 2015-06-17 11:59:00       0    18000    2599 :      3.6562      21.983      30.112 : -1',
 u'7 : 2015-07-17 11:59:00       0    18000    2599 :      4.6406      22.129      30.111 : -1',
 u'8 : 2015-08-17 11:59:00       0    18000    2599 :      4.2122      22.272      30.362 : -1',
 u'9 : 2015-09-17 11:59:00       0    18000    2599 :      4.3865      22.219      30.413 : -1',
 u'10 : 2015-10-17 11:59:00       0    18000    2599 :      4.6319      22.014      29.822 : -1',
 u'11 : 2015-11-17 11:59:00       0    18000    2599 :      3.5475      21.839      30.238 : -1',
 u'12 : 2015-12-17 11:59:00       0    18000    2599 :      1.3763      21.751      30.643 : -1']

Piping

Before passing the data to the main operator, some operations can be done beforehand.

cdo.OPERATOR3("PARAM3",input="-OPERATOR2,PARAM2 -OPERATOR1,PARAM1 INPUT.nc", output="OUTPUT.nc")
In [15]:
#--steps 1-3 can be combined using "piping".
cdo.ymonavg(input="-sellonlatbox,100,280,-50,50 -seldate,1980-01-01,2015-12-31 %s" % (infile), output="cmean.nc")
cdo.info(input="cmean.nc") #show data info
Out[15]:
[u'-1 :       Date     Time   Level Gridsize    Miss :     Minimum        Mean     Maximum : Parameter ID',
 u'1 : 2015-01-17 11:59:00       0    18000    2599 :    -0.34528      21.783      30.324 : -1',
 u'2 : 2015-02-17 11:59:00       0    18000    2600 :     -1.3959      21.869      30.138 : -1',
 u'3 : 2015-03-17 11:59:00       0    18000    2599 :     -1.2622      21.920      30.303 : -1',
 u'4 : 2015-04-17 11:59:00       0    18000    2599 :     0.14799      21.940      30.060 : -1',
 u'5 : 2015-05-17 11:59:00       0    18000    2599 :      1.5601      21.939      30.419 : -1',
 u'6 : 2015-06-17 11:59:00       0    18000    2599 :      3.6562      21.983      30.112 : -1',
 u'7 : 2015-07-17 11:59:00       0    18000    2599 :      4.6406      22.129      30.111 : -1',
 u'8 : 2015-08-17 11:59:00       0    18000    2599 :      4.2122      22.272      30.362 : -1',
 u'9 : 2015-09-17 11:59:00       0    18000    2599 :      4.3865      22.219      30.413 : -1',
 u'10 : 2015-10-17 11:59:00       0    18000    2599 :      4.6319      22.014      29.822 : -1',
 u'11 : 2015-11-17 11:59:00       0    18000    2599 :      3.5475      21.839      30.238 : -1',
 u'12 : 2015-12-17 11:59:00       0    18000    2599 :      1.3763      21.751      30.643 : -1']

Reading output as a netcdf file

In [16]:
#--output can be read directory as a netcdf file
import numpy as np

f1 = cdo.ymonavg(input="-sellonlatbox,100,280,-50,50 -seldate,1980-01-01,2015-12-31 %s" % (infile), 
                 options='-f nc', returnCdf=True) # As if f1=Dataset("cmean.nc","r")

clim = f1.variables["temp"][:]
print "np.shape(clim)=",np.shape(clim),"=(12-months, zmax, jmax, imax)"
np.shape(clim)= (12, 1, 100, 180) =(12-months, zmax, jmax, imax)
In [17]:
#--step 4 make anomaly (the raw data is subtracted from the climatological monthly mean)
# ymonsub raw-data mean-data anomaly-data
cdo.ymonsub(input="output2.nc cmean.nc", output="anom.nc")
Out[17]:
'anom.nc'
In [18]:
#-- Do step1-step4 with one line without temporaly files using "piping".
f2 = cdo.ymonsub(input='-sellonlatbox,100,290,-50,50 -seldate,1980-01-01,2015-12-31 %s \
                        -ymonavg -sellonlatbox,100,290,-50,50 -seldate,1980-01-01,2015-12-31 %s' % (infile,infile), 
                         options='-f nc', returnCdf=True)
anom = f2.variables["temp"]
print "np.shape(anom)=",np.shape(anom[:])
np.shape(anom)= (432, 1, 100, 190)
In [20]:
#-- Draw anomalies for 1997 and 1998
%matplotlib inline
import os
import pandas as pd
from netCDF4 import  num2date
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.cm as cm
from mpl_toolkits.basemap import Basemap

anom = f2.variables["temp"] #read anomaly from previous resuts
time = f2.variables["time"] #read time
dlons = f2.variables["lon"][:] # read longitudes
dlats = f2.variables["lat"][:] # read latitudes

ndate = num2date(time[:],units=time.units,calendar=time.calendar) #convert time to date
dates = pd.DatetimeIndex(ndate) # convert date to date object in pandas

syear=1997 # start of plot
eyear=1998 # end of plot

fig = plt.figure(figsize=(24,24)) # set figure environemnt

contours=np.arange(-4.0,4.0,0.5) # set contours
cmap=cm.bwr # set colormap
smon={1:"JAN",2:"FEB",3:"MAR",4:"APR",5:"MAY",6:"JUN",7:"JUL",8:"AUG",9:"SEP",10:"OCT",11:"NOV",12:"DEC"} # set dictionary

ii=1
for year in range(syear,eyear+1): #loop for each year
   idxs = np.where(dates.year == year)[0]

   for im, idx in enumerate(idxs): #loop for each month in a year

     ax = plt.subplot(6,4,ii) # set up panel plot
     m = Basemap(projection='mill',llcrnrlat=dlats[0], urcrnrlat=dlats[-1], llcrnrlon=dlons[0],urcrnrlon=dlons[-1], 
                 lat_ts=5, resolution='c') # draw basemap
     m.drawcoastlines() # draw coast line
    
     flon,flat=np.meshgrid(dlons,dlats) # meshgrid
     x,y=m(flon,flat) # lon,lat => x,y

     cs = m.contourf(x,y,anom[idx,0,:,:],contours, cmap=cmap, extend="both") # plot contours

     ax.text(0.015,0.9, "%4.4i %s" % (year, smon[im+1]), transform=ax.transAxes, fontsize=20, bbox=dict(boxstyle='square',
            fc='w', alpha=1.0), zorder=100) # draw label

     ii=ii+1

# set up label
cax = fig.add_axes([0.2, -0.050, 0.6, 0.03]) 
art = plt.colorbar(cs, cax, orientation='horizontal')
art.set_label('SST Anomaly [K]', fontsize=20)
art.ax.tick_params(labelsize=18)

# adjust layout
plt.tight_layout(pad=0.2, w_pad=0.2, h_pad=0.3)

# show
plt.show()

2. EOF using CDO

Syntax

To compute eigen values and eigen vectors.
cdo.eof("Number of Modes", input="INPUT.nc", output="eval.nc evector.nc")
To compute principal coefficients.
cdo.eofcoeff(input="evector.nc INPUT.nc", output="eof_pc_") # eof_pc_00000.nc, eof_pc_00001.nc, .....
In [22]:
#cdo.eof("3", input="anom.nc", output="eval.nc eof.nc") #error because anom.nc contains missing value
cdo.eof("3", input="-setmisstoc,0 anom.nc", output="eval.nc eof.nc") # compute first 3 modes 
cdo.eofcoeff(input="eof.nc anom.nc", output="eof_pc_") # compute principal coefficients
                                                       #  eof_pc_0000{0,1,2,3}.nc are created
Out[22]:
'eof_pc_'
In [23]:
#--read output data
ff  = cdo.seltimestep("1/3", input="eof.nc", options='-f nc', returnCdf=True) # read eigen vectors for the first 3 modes
eof = ff.variables["temp"][:,0,:,:] # read eigen vectors

print "np.shape(eof)=",np.shape(eof),"=(mode,jmax,imax)"
np.shape(eof)= (3, 100, 180) =(mode,jmax,imax)
In [24]:
#--plot
fig = plt.figure(figsize=(12,16))

contours=np.arange(-4.0,4.0,0.5) # create contours
cmap=cm.bwr #set color bar

dlons = ff.variables["lon"][:] # get longitudes
dlats = ff.variables["lat"][:] # get latitudes

sign=[1,1,-1] # sign for [1st,2nd,3rd mode]

jj=0
for ii in range(3): # loop for first 3 modes
    
    #----draw eigen vector
     jj=jj+1
     ax = plt.subplot(5,2,jj)
     m = Basemap(projection='mill',llcrnrlat=dlats[0], urcrnrlat=dlats[