Skip to content

Commit

Permalink
Merge pull request #142 from JuliaOcean/XBT_batch_processing
Browse files Browse the repository at this point in the history
Xbt batch processing
  • Loading branch information
gaelforget authored Jan 15, 2025
2 parents ae2e0ef + 97af4ab commit c703fc2
Show file tree
Hide file tree
Showing 5 changed files with 454 additions and 327 deletions.
2 changes: 1 addition & 1 deletion examples/XBT_transect.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
### A Pluto.jl notebook ###
# v0.20.3
# v0.20.4

using Markdown
using InteractiveUtils
Expand Down
1 change: 1 addition & 0 deletions src/OceanRobots.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import Base: read
include("types.jl")
include("thredds_servers.jl")
include("files.jl")
include("files_XBT.jl")
include("example_GOM.jl")

export GDP, GDP_CloudDrift, NOAA, GliderFiles, ArgoFiles, OceanSites, OceanOPS, CCHDO, XBT
Expand Down
331 changes: 5 additions & 326 deletions src/files.jl
Original file line number Diff line number Diff line change
Expand Up @@ -860,13 +860,13 @@ function index()

#main table
oceansites_index=DataFrame(CSV.File(fil; header=false, skipto=9, silencewarnings=true))

#treat lines which seem mis-formatted
aa=findall((ismissing).(oceansites_index.Column17))
oceansites_index=oceansites_index[aa,:]
# aa=findall((ismissing).(oceansites_index.Column17))
# oceansites_index=oceansites_index[aa,:]

test=sum([sum((!ismissing).(oceansites_index[:,i])) for i in 17:22])
test>0 ? error("unclear lines remain") : oceansites_index=oceansites_index[!,1:16]
# test=sum([sum((!ismissing).(oceansites_index[:,i])) for i in 17:22])
# test>0 ? error("unclear lines remain") : oceansites_index=oceansites_index[!,1:16]

#column names
tmp=readlines(fil)[7]
Expand Down Expand Up @@ -1065,324 +1065,3 @@ end

##

module XBT

using TableScraper, HTTP, Downloads, CodecZlib, Dates, Glob, DataFrames, CSV, Dataverse
import OceanRobots: XBTtransect
import Base: read

"""# XBT transect
For more information, [see this page](https://www-hrx.ucsd.edu/index.html).
_Data were made available by the Scripps High Resolution XBT program (www-hrx.ucsd.edu)_
"""

"""
list_transects(; group="SIO")
known groups : AOML, SIO
```
using OceanRobots
OceanRobots.list_transects("SIO")
```
"""
function list_transects(group="SIO")
if group=="AOML"
list_of_transects_AOML
elseif group=="SIO"
list_of_transects_SIO
else
@warn "unknown group"
[]
end
end

list_of_transects_SIO=[
"PX05", "PX06", "PX30", "PX34", "PX37", "PX37-South", "PX38", "PX40",
"PX06-Loop", "PX08", "PX10", "PX25", "PX44", "PX50", "PX81",
"AX22", "IX15", "IX28"]

list_of_transects=list_of_transects_SIO #alias, deprecated

list_of_transects_AOML=[
"AX01","AX02","AX04","AX07","AX08","AX10","AX18","AX20","AX25","AX32","AX90","AX97",
"AXCOAST","AXWBTS","MX01","MX02","MX04"]

### SIO transects

dep = -(5:10:895) # Depth (m), same for all profiles

function get_url_to_download(url1)
r = HTTP.get(url1)
h = String(r.body)
tmp=split(split(h,"../www-hrx/")[2],".gz")[1]
"https://www-hrx.ucsd.edu/www-hrx/"*tmp*".gz"
end

function download_file_if_needed(url2)
path1=joinpath(tempdir(),basename(url2))
isfile(path1) ? nothing : Downloads.download(url2,path1)

path2=path1[1:end-3]*".txt"
open(GzipDecompressorStream, path1) do stream
write(path2,stream)
end

path2
end

function read_SIO_XBT(path2)
txt=readlines(path2)

nlines=parse(Int,txt[1])
T_all=zeros(nlines,length(dep))
meta_all=Array{Any}(undef,nlines,4)

for li in 1:nlines
i=2+(li-1)*9

lat=parse(Float64,txt[i][1:11])
lon=parse(Float64,txt[i][12:19])
lon+=(lon>180 ? -360 : 0)
day=parse(Float64,txt[i][19:21])
mon=parse(Float64,txt[i][23:24])
year=parse(Float64,txt[i][26:27])
hour=parse(Float64,txt[i][29:30])
min=parse(Float64,txt[i][32:33])
sec=parse(Float64,txt[i][35:36])
profile_number=parse(Float64,txt[i][38:40])
year=year+(year > 50 ? 1900 : 2000)
date=DateTime(year,mon,day,hour,min,sec)

meta_all[li,:]=[lon lat date profile_number]

T=[]
for ii in 1:8
push!(T,1/1000*parse.(Int,split(txt[i+ii]))...)
end
T[T.<0.0].=NaN
T_all[li,:].=T
end

T_all,meta_all
# lines(T,dep)
end

"""
list_of_cruises(transect)
```
include("parse_xbt_html.jl")
transect="PX05"
cruises,years,months,url_base=list_of_cruises(transect)
CR=cruises[1]
url1=url_base*CR*".html"
url2=get_url_to_download(url1)
path2=download_file(url2)
```
"""
function list_of_cruises(transect="PX05")
PX=transect[3:end]
PX=( transect=="PX06-South" ? "37s" : PX )
PX=( transect=="PX06-Loop" ? "06" : PX )

pp="p"
pp=( transect[1]=='I' ? "i" : pp )
pp=( transect[1]=='A' ? "a" : pp )

url0="https://www-hrx.ucsd.edu/$(pp)x$(PX).html"
url_base=url0[1:end-5]*"/$(pp)$(PX)"
x=scrape_tables(url0)
y=x[4].rows

months=Int[]; years=Int[]; cruises=String[]
for row in 3:length(y)
z=y[row]
a=findall( (z.!==" \n ").&&(z.!==" ") )
if length(a)>1
push!(months,Int.(a[2:end].-1)...)
push!(years,parse(Int,z[1])*ones(length(a)-1)...)
push!(cruises,z[a[2:end]]...)
end
end

DataFrame("cruise" => cruises, "year" => years, "month" => months, "url" => .*(.*(url_base,cruises),".html"))
end

"""
read(x::XBTtransect;transect="PX05",cr=1,cruise="")
```
using OceanRobots
read(XBTtransect(),source="SIO",transect="PX05",cruise="0910")
```
"""
function read(x::XBTtransect;source="SIO",transect="PX05",cr=1,cruise="")
if source=="SIO"
cruises=list_of_cruises(transect)
CR=(isempty(cruise) ? cr : findall(cruises.cruise.=="0910")[1])
url1=cruises.url[CR]
url2=get_url_to_download(url1)
path2=download_file_if_needed(url2)
T_all,meta_all=read_SIO_XBT(path2)
XBTtransect(source,transect,[T_all,meta_all,cruises.cruise[CR]],path2)
elseif source=="AOML"
list1=XBT.list_files_on_server(transect)
# list2=XBT.get_url_to_transect(transect)
CR=(isempty(cruise) ? cr : findall(list1.==cruise)[1])
files=XBT.download_file_if_needed_AOML(transect,list1[CR])
path=dirname(files[1])
(data,meta)=read_NOAA_XBT(path)
XBTtransect(source,string(transect),[data,meta,list1[CR]],path)
else
@warn "unknown source"
end
end

### AOML transects

"""
read_NOAA_XBT(path)
```
using OceanRobots
files=XBT.download_file_if_needed_AOML(8,"ax80102_qc.tgz")
(data,meta)=XBT.read_NOAA_XBT(dirname(files[1]))
```
List of variables:
- "te" is for in situ temperature
- "th" is for potential temperature
- “sa” for salinity (climatology from WOA)
- “ht” for dynamic height reference to sea surface
- “de” for depth
- “ox” for oxygen
- “Cast” for oxygen
"""
function read_NOAA_XBT(path; silencewarnings=true)
list=glob("*.???",path)
data=DataFrame()
meta=DataFrame()
for ii in 1:length(list)
fil=list[ii]
tmp1=CSV.read(fil,DataFrame,header=1,limit=1,delim=' ',ignorerepeated=true, silencewarnings=silencewarnings)
#
tmp2=tmp1[1,5]
t=(if size(tmp1,2)==7
tmp2*"200"*string(tmp1[1,6])
else
tmp2a=tmp2[end-1:end]
tmp2b=parse(Int,tmp2a)
tmp2[1:end-2]*(tmp2b<50 ? "19"*tmp2a : "20"*tmp2a)
end)
d=Date(t,"mm/dd/yyyy")
h=div(tmp1[1,end],100)
m=rem(tmp1[1,end],100)
t=DateTime(d,Time(h,m,0))
#
append!(meta,DataFrame("lon"=>tmp1.long,"lat"=>tmp1.lat,"time"=>t,"cast"=>tmp1.Cast))
d=CSV.read(fil,DataFrame,header=11,skipto=13,delim=' ',ignorerepeated=true, silencewarnings=silencewarnings)
d.lon.=meta.lon[end]
d.lat.=meta.lat[end]
d.time.=meta.time[end]
d.cast.=meta.cast[end]
append!(data,d)
end
(data,meta)
end

function get_url_to_transect(transect="AX08")
ax=name_on_API(transect)
url1="https://www.aoml.noaa.gov/phod/hdenxbt/ax_home.php?ax="*string(ax)
r = HTTP.get(url1)
h = String(r.body)
txt0="<select name=\"cnum\">"
txt1="</select></p><br><p>"
h1=split(split(h,txt0)[2],txt1)[1]
h2=split(h1,"value=")[2:end]
[split(split(i,">")[2]," \n")[1] for i in h2]
end

function list_files_on_server(transect="AX08")
ax=name_on_server(transect)
url1="https://www.aoml.noaa.gov/phod/hdenxbt/"*ax*"/"
r = HTTP.get(url1)
h = String(r.body)
#in the html look for "ax*_qc.tgz" etc:
txt0="Parent Directory</a></li>\n"
txt1="</ul>"
h1=split(split(h,txt0)[2],txt1)[1]
txt2="<li><a href=\""
h2=split(h1,txt2)
h2=h2[findall( (!isempty).(h2) )]
h3=[split(i,"\">")[1] for i in h2]
h3[findall(occursin.("_qc.tgz",h3).||occursin.("_qc_2.tgz",h3).||occursin.("_qc_3.tgz",h3))]
end

function name_on_server(transect)
ax=if transect=="AXCOAST"
"axcs"
elseif transect=="AXWBTS"
"axwbts"
elseif transect[1:2]=="MX"
"mx"*string(parse(Int,transect[3:end]))
else
"ax"*string(parse(Int,transect[3:end]))
end
end

function name_on_API(transect)
ax=if transect=="AXCOAST"
"cs"
elseif transect=="AXWBTS"
"wbts"
elseif transect[1:2]=="MX"
"1"*transect[3:end]
else
transect[3:end]
end
end

"""
XBT.download_file_if_needed_AOML(transect="AX08",file="ax80102_qc.tgz")
```
using OceanRobots
list=XBT.list_transects("AOML")
transect="AX08"
list1=XBT.list_files_on_server(transect)
list2=XBT.get_url_to_transect(transect)
files=XBT.download_file_if_needed_AOML(transect,"ax80102_qc.tgz")
path=dirname(files[1])
(data,meta)=XBT.read_NOAA_XBT(path)
```
"""
function download_file_if_needed_AOML(transect="AX08",file="ax80102_qc.tgz")
ax=name_on_server(transect)
url1="https://www.aoml.noaa.gov/phod/hdenxbt/"*ax*"/"*file
path1=joinpath(tempdir(),file)
isfile(path1) ? nothing : Downloads.download(url1,path1)
tmp_path=Dataverse.untargz(path1)

p=[tmp_path]
f=glob("*.???",p[1])
while(isempty(f))
p.=joinpath(p[1],readdir(p[1])[1])
f=glob("*.???",p[1])
end

glob("*.???",p[1])
end

end

##
Loading

0 comments on commit c703fc2

Please sign in to comment.