-
Notifications
You must be signed in to change notification settings - Fork 28
/
Copy pathNmrPreprocessing_script.R
1244 lines (1004 loc) · 41 KB
/
NmrPreprocessing_script.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
## ==========================
# Internal functions
## ==========================
# beginTreatment
beginTreatment <- function(name, Signal_data = NULL, Signal_info = NULL,
force.real = FALSE) {
cat("Begin", name, "\n")
# Formatting the Signal_data and Signal_info -----------------------
vec <- is.vector(Signal_data)
if (vec) {
Signal_data <- vec2mat(Signal_data)
}
if (is.vector(Signal_info)) {
Signal_info <- vec2mat(Signal_info)
}
if (!is.null(Signal_data)) {
if (!is.matrix(Signal_data)) {
stop("Signal_data is not a matrix.")
}
if (!is.complex(Signal_data) && !is.numeric(Signal_data)) {
stop("Signal_data contains non-numerical values.")
}
}
if (!is.null(Signal_info) && !is.matrix(Signal_info)) {
stop("Signal_info is not a matrix.")
}
Original_data <- Signal_data
# Extract the real part of the spectrum ---------------------------
if (force.real) {
if (is.complex(Signal_data)) {
Signal_data <- Re(Signal_data)
} else {
# The signal is numeric Im(Signal_data) is zero anyway so let's avoid
# using complex(real=...,imaginary=0) which would give a complex signal
# in endTreatment()
force.real <- FALSE
}
}
# Return the formatted data and metadata entries --------------------
return(list(start = proc.time(), vec = vec, force.real = force.real,
Original_data = Original_data, Signal_data = Signal_data, Signal_info = Signal_info))
}
# endTreatment
endTreatment <- function(name, begin_info, Signal_data) {
# begin_info: object outputted from beginTreatment
# Formatting the entries and printing process time -----------------------
end_time <- proc.time() # record it as soon as possible
start_time <- begin_info[["start"]]
delta_time <- end_time - start_time
delta <- delta_time[]
cat("End", name, "\n")
cat("It lasted", round(delta["user.self"], 3), "s user time,", round(delta["sys.self"],3),
"s system time and", round(delta["elapsed"], 3), "s elapsed time.\n")
if (begin_info[["force.real"]]) {
# The imaginary part is left untouched
i <- complex(real = 0, imaginary = 1)
Signal_data <- Signal_data + i * Im(begin_info[["Original_data"]])
}
if (begin_info[["vec"]]) {
Signal_data <- Signal_data[1, ]
}
# Return the formatted data and metadata entries --------------------
return(Signal_data)
}
# checkArg
checkArg <- function(arg, checks, can.be.null=FALSE) {
check.list <- list(bool=c(is.logical, "a boolean"),
int =c(function(x){x%%1==0}, "an integer"),
num =c(is.numeric, "a numeric"),
str =c(is.character, "a string"),
pos =c(function(x){x>0}, "positive"),
pos0=c(function(x){x>=0}, "positive or zero"),
l1 =c(function(x){length(x)==1}, "of length 1")
)
if (is.null(arg)) {
if (!can.be.null) {
stop(deparse(substitute(arg)), " is null.")
}
} else {
if (is.matrix(arg)) {
stop(deparse(substitute(arg)), " is not scalar.")
}
for (c in checks) {
if (!check.list[[c]][[1]](arg)) {
stop(deparse(substitute(arg)), " is not ", check.list[[c]][[2]], ".")
}
}
}
}
# getArg
getArg <- function(arg, info, argname, can.be.absent=FALSE) {
if (is.null(arg)) {
start <- paste("impossible to get argument", argname, "it was not given directly and");
if (!is.matrix(info)) {
stop(paste(start, "the info matrix was not given"))
}
if (!(argname %in% colnames(info))) {
if (can.be.absent) {
return(NULL)
} else {
stop(paste(start, "is not in the info matrix"))
}
}
if (nrow(info) < 1) {
stop(paste(start, "the info matrix has no row"))
}
arg <- info[1,argname]
if (is.na(arg)) {
stop(paste(start, "it is NA in the info matrix"))
}
}
return(arg)
}
# binarySearch
binarySearch <- function(a, target, lower = TRUE) {
# search the index i in a such that a[i] == target
# if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
# if !lower, it seraches the closer a[i] such that a[i] > target
# a should be monotone but can be increasing or decreasing
# if a is increasing INVARIANT: a[amin] < target < a[amax]
N <- length(a)
if ((a[N] - target) * (a[N] - a[1]) <= 0) {
return(N)
}
if ((a[1] - target) * (a[N] - a[1]) >= 0) {
return(1)
}
amin <- 1
amax <- N
while (amin + 1 < amax) {
amid <- floor((amin + amax)/2)
if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
amin <- amid
} else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
amax <- amid
} else {
# a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
# target
return(amid)
}
}
if (xor(lower, a[amin] > a[amax])) {
# (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
# If increasing and we want the lower, we take amin
# If decreasing and we want the bigger, we take amin too
return(amin)
} else {
return(amax)
}
}
# Interpol
Interpol <- function(t, y) {
# y: sample
# t : warping function
m <- length(y)
# t <= m-1
# because if t > m-1, y[ti+1] will be NA when we compute g
valid <- 1 <= t & t <= m-1 # FIXME it was '<' in Bubble v2
s <- (1:m)[valid]
ti <- floor(t[s])
tr <- t[s] - ti
g <- y[ti + 1] - y[ti]
f <- y[ti] + tr * g
list(f=f, s=s, g=g)
}
# vec2mat
vec2mat <- function(vec) {
return(matrix(vec, nrow = 1, dimnames = list(c(1), names(vec))))
}
# binarySearch
binarySearch <- function(a, target, lower = TRUE) {
# search the index i in a such that a[i] == target
# if it doesn't exists and lower, it searches the closer a[i] such that a[i] < target
# if !lower, it seraches the closer a[i] such that a[i] > target
# a should be monotone but can be increasing or decreasing
# if a is increasing INVARIANT: a[amin] < target < a[amax]
N <- length(a)
if ((a[N] - target) * (a[N] - a[1]) <= 0) {
return(N)
}
if ((a[1] - target) * (a[N] - a[1]) >= 0) {
return(1)
}
amin <- 1
amax <- N
while (amin + 1 < amax) {
amid <- floor((amin + amax)/2)
if ((a[amid] - target) * (a[amax] - a[amid]) < 0) {
amin <- amid
} else if ((a[amid] - target) * (a[amax] - a[amid]) > 0) {
amax <- amid
} else {
# a[amid] == a[amax] or a[amid] == target In both cases, a[amid] ==
# target
return(amid)
}
}
if (xor(lower, a[amin] > a[amax])) {
# (lower && a[amin] < a[amax]) || (!lower && a[min] > a[max])
# If increasing and we want the lower, we take amin
# If decreasing and we want the bigger, we take amin too
return(amin)
} else {
return(amax)
}
}
# indexInterval
indexInterval <- function (a, from, to, inclusive=TRUE) {
# If inclusive and from <= to, we need to take the lower
# If not inclusive and from > to, we need to take the lower too
lowerFrom <- xor(inclusive, from > to)
fromIndex <- binarySearch(a, from, lowerFrom)
toIndex <- binarySearch(a, to, !lowerFrom)
return(fromIndex:toIndex)
}
## ==========================
# GroupDelayCorrection
## ==========================
GroupDelayCorrection <- function(Fid_data, Fid_info = NULL, group_delay = NULL) {
# Data initialisation and checks ----------------------------------------------
begin_info <- beginTreatment("GroupDelayCorrection", Fid_data, Fid_info)
Fid_data <- begin_info[["Signal_data"]]
dimension_names <- dimnames(Fid_data)
Fid_info <- begin_info[["Signal_info"]]
checkArg(group_delay, c("num", "pos0"), can.be.null = TRUE)
# if Fid_info and group_delay are NULL, getArg will generate an error
group_delay <- getArg(group_delay, Fid_info, "GRPDLY", can.be.absent = TRUE)
if (is.null(group_delay)) {
# See DetermineBrukerDigitalFilter.m in matNMR MATLAB library
group_delay_matrix <- matrix(c(44.75, 46, 46.311, 33.5, 36.5, 36.53, 66.625,
48, 47.87, 59.0833, 50.1667, 50.229, 68.5625, 53.25, 53.289, 60.375,
69.5, 69.551, 69.5313, 72.25, 71.6, 61.0208, 70.1667, 70.184, 70.0156,
72.75, 72.138, 61.3438, 70.5, 70.528, 70.2578, 73, 72.348, 61.5052, 70.6667,
70.7, 70.3789, 72.5, 72.524, 61.5859, 71.3333, NA, 70.4395, 72.25, NA,
61.6263, 71.6667, NA, 70.4697, 72.125, NA, 61.6465, 71.8333, NA, 70.4849,
72.0625, NA, 61.6566, 71.9167, NA, 70.4924, 72.0313, NA), nrow = 21,
ncol = 3, byrow = TRUE, dimnames = list(c(2, 3, 4, 6, 8, 12, 16, 24,
32, 48, 64, 96, 128, 192, 256, 384, 512, 768, 1024, 1536, 2048),
c(10, 11, 12)))
decim <- Fid_info[1, "DECIM"]
dspfvs <- Fid_info[1, "DSPFVS"]
if (!(toString(decim) %in% rownames(group_delay_matrix))) {
stop(paste("Invalid DECIM", decim, "it should be one of", rownames(group_delay_matrix)))
}
if (!(toString(dspfvs) %in% colnames(group_delay_matrix))) {
stop(paste("Invalid DSPFVS", dspfvs, "it should be one of", colnames(group_delay_matrix)))
}
group_delay <- group_delay_matrix[toString(decim), toString(dspfvs)]
if (is.na(group_delay)) {
stop(paste("Invalid DECIM", decim, "for DSPFVS", dspfvs))
}
}
m <- ncol(Fid_data)
n <- nrow(Fid_data)
# GroupDelayCorrection ----------------------------------------------
# We do the shifting in the Fourier domain because the shift can be non-integer.
# That way we automatically have the circular behaviour of the shift and the
# interpolation if it is non-integer.
Spectrum <- t(stats::mvfft(t(Fid_data)))
# Spectrum <- FourierTransform(Fid_data, Fid_info)
p <- ceiling(m/2)
new_index <- c((p + 1):m, 1:p)
Spectrum <- Spectrum[,new_index]
Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
Omega <- (0:(m - 1))/m
i <- complex(real = 0, imaginary = 1)
if (n>1) {
Spectrum <- sweep(Spectrum, MARGIN = 2, exp(i * group_delay * 2 * pi * Omega), `*`)
Spectrum <- Spectrum[,new_index]
}else {
Spectrum <- Spectrum* exp(i * group_delay * 2 * pi * Omega)
Spectrum <- Spectrum[new_index]
Spectrum <- matrix(data = Spectrum, ncol = m, nrow = n)
}
Fid_data <- t(stats::mvfft(t(Spectrum), inverse = TRUE))/m
colnames(Fid_data) <- dimension_names[[2]]
rownames(Fid_data) <- dimension_names[[1]]
# Data finalisation ----------------------------------------------
return(endTreatment("GroupDelayCorrection", begin_info, Fid_data))
}
## ==========================
# SolventSuppression
## ==========================
SolventSuppression <- function(Fid_data, lambda.ss = 1e+06, ptw.ss = TRUE,
plotSolvent = F, returnSolvent = F) {
# Data initialisation and checks ----------------------------------------------
begin_info <- beginTreatment("SolventSuppression", Fid_data)
Fid_data <- begin_info[["Signal_data"]]
checkArg(ptw.ss, c("bool"))
checkArg(lambda.ss, c("num", "pos0"))
# difsm function definition for the smoother -----------------------------------
if (ptw.ss) {
# Use of the function in ptw that smoothes signals with a finite difference
# penalty of order 2
difsm <- ptw::difsm
} else {
# Or manual implementation based on sparse matrices for large data series (cf.
# Eilers, 2003. 'A perfect smoother')
difsm <- function(y, d = 2, lambda) {
m <- length(y)
# Sparse identity matrix m x m
E <- Matrix::Diagonal(m)
D <- Matrix::diff(E, differences = d)
A <- E + lambda.ss * Matrix::t(D) %*% D
# base::chol does not take into account that A is sparse and is extremely slow
C <- Matrix::chol(A)
x <- Matrix::solve(C, Matrix::solve(Matrix::t(C), y))
return(as.numeric(x))
}
}
# Solvent Suppression ----------------------------------------------
n <- dim(Fid_data)[1]
if (returnSolvent) {
SolventRe <- Fid_data
SolventIm <- Fid_data
}
for (i in 1:n) {
FidRe <- Re(Fid_data[i, ])
FidIm <- Im(Fid_data[i, ])
solventRe <- difsm(y = FidRe, lambda = lambda.ss)
solventIm <- difsm(y = FidIm, lambda = lambda.ss)
if (plotSolvent) {
m <- length(FidRe)
graphics::plot(1:m, FidRe, type = "l", col = "red")
graphics::lines(1:m, solventRe, type = "l", col = "blue")
graphics::plot(1:m, FidIm, type = "l", col = "red")
graphics::lines(1:m, solventIm, type = "l", col = "blue")
}
FidRe <- FidRe - solventRe
FidIm <- FidIm - solventIm
Fid_data[i, ] <- complex(real = FidRe, imaginary = FidIm)
if (returnSolvent) {
SolventRe[i, ] <- solventRe
SolventIm[i, ] <- solventIm
}
}
# Data finalisation ----------------------------------------------
Fid_data <- endTreatment("SolventSuppression", begin_info, Fid_data)
if (returnSolvent) {
return(list(Fid_data = Fid_data, SolventRe = SolventRe, SolventIm = SolventIm))
} else {
return(Fid_data)
}
}
## ==========================
# Apodization
# =============================
Apodization <- function(Fid_data, Fid_info = NULL, DT = NULL,
type.apod = c("exp","cos2", "blockexp", "blockcos2",
"gauss", "hanning", "hamming"), phase = 0, rectRatio = 1/2,
gaussLB = 1, expLB = 1, plotWindow = F, returnFactor = F) {
# Data initialisation and checks ----------------------------------------------
begin_info <- beginTreatment("Apodization", Fid_data, Fid_info)
Fid_data <- begin_info[["Signal_data"]]
Fid_info <- begin_info[["Signal_info"]]
# Data check
type.apod <- match.arg(type.apod)
checkArg(DT, c("num", "pos"), can.be.null = TRUE)
checkArg(phase, c("num"))
# Apodization ----------------------------------------------
DT <- getArg(DT, Fid_info, "DT") # Dwell Time
m <- ncol(Fid_data)
t <- (1:m) * DT # Time
rectSize <- ceiling(rectRatio * m)
gaussLB <- (gaussLB/(sqrt(8 * log(2))))
# Define the types of apodization:
switch(type.apod, exp = {
# exponential
Factor <- exp(-expLB * t)
}, cos2 = {
# cos^2
c <- cos((1:m) * pi/(2 * m) - phase * pi/2)
Factor <- c * c
}, blockexp = {
# block and exponential
Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
# | rectSize | 1 ___________ | \ 0 \____
Factor[(rectSize + 1):m] <- exp(-expLB * t[1:(m - rectSize)])
}, blockcos2 = {
# block and cos^2
Factor <- c(rep.int(1, rectSize), rep.int(0, m - rectSize))
c <- cos((1:(m - rectSize)) * pi/(2 * (m - rectSize)))
Factor[(rectSize + 1):m] <- c * c
}, gauss = {
# gaussian
Factor <- exp(-(gaussLB * t)^2/2)
Factor <- Factor/max(Factor)
}, hanning = {
# Hanning
Factor <- 0.5 + 0.5 * cos((1:m) * pi/m - phase * pi)
}, hamming = {
# Hamming
Factor <- 0.54 + 0.46 * cos((1:m) * pi/m - phase * pi)
})
if (plotWindow) {
graphics::plot(1:m, Factor, "l")
# dev.off() # device independent, it is the responsability of the
# caller to do it
}
# Apply the apodization factor on the spectra
Fid_data <- sweep(Fid_data, MARGIN = 2, Factor, `*`)
# Data finalisation ----------------------------------------------
Fid_data <- endTreatment("Apodization", begin_info, Fid_data)
if (returnFactor) {
return(list(Fid_data = Fid_data, Factor = Factor))
} else {
return(Fid_data)
}
}
## ====================================================
# FourierTransform
## ====================================================
# fftshift1D2D
fftshift1D2D <- function(x) {
vec <- F
if (is.vector(x)) {
x <- vec2mat(x)
vec <- T
}
m <- dim(x)[2]
p <- ceiling(m/2)
new_index <- c((p + 1):m, 1:p)
y <- x[, new_index, drop = vec]
}
# FourierTransform
FourierTransform <- function(Fid_data, Fid_info = NULL, SW_h = NULL, SW = NULL, O1 = NULL, reverse.axis = TRUE) {
# Data initialisation and checks ----------------------------------------------
begin_info <- beginTreatment("FourierTransform", Fid_data, Fid_info)
Fid_data <- begin_info[["Signal_data"]]
Fid_info <- begin_info[["Signal_info"]]
m <- ncol(Fid_data)
n <- nrow(Fid_data)
if (is.null(SW_h)) {
SW_h <- getArg(SW_h, Fid_info, "SW_h")
}
if (is.null(SW)) {
SW <- getArg(SW, Fid_info, "SW") # Sweep Width in ppm (semi frequency scale in ppm)
}
if (is.null(O1)) {
O1 <- getArg(O1, Fid_info, "O1")
}
checkArg(reverse.axis, c("bool"))
# Fourier Transformation ----------------------------------------------
# mvfft does the unnormalized fourier transform (see ?mvfft), so we need divide
# by m. It does not matter a lot in our case since the spectrum will be
# normalized.
# FT
RawSpect_data <- fftshift1D2D(t(stats::mvfft(t(Fid_data))))
# recover the frequencies values
f <- ((0:(m - 1)) - floor(m/2)) * Fid_info[1, "SW_h"]/(m-1)
if(reverse.axis == TRUE) {
revind <- rev(1:m)
RawSpect_data <- RawSpect_data[,revind] # reverse the spectrum
}
RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = m)
colnames(RawSpect_data) <- f
rownames(RawSpect_data) <- rownames(Fid_data)
# PPM conversion ----------------------------------------------
# The Sweep Width has to be the same since the column names are the same
ppmInterval <- SW/(m-1)
O1index = round((m+1)/2+O1*(m - 1) / SW_h)
end <- O1index - m
start <- O1index -1
ppmScale <- (start:end) * ppmInterval
RawSpect_data <- matrix(RawSpect_data, nrow = n, ncol = -(end - start) + 1, dimnames =
list(rownames(RawSpect_data), ppmScale))
# Data finalisation ----------------------------------------------
return(endTreatment("FourierTransform", begin_info, RawSpect_data))
}
## ====================================================
# InternalReferencing
## ====================================================
InternalReferencing <- function(Spectrum_data, Fid_info, method = c("max", "thres"),
range = c("nearvalue", "all", "window"), ppm.value = 0,
direction = "left", shiftHandling = c("zerofilling", "cut",
"NAfilling", "circular"), c = 2, pc = 0.02, fromto.RC = NULL,
ppm.ir = TRUE, rowindex_graph = NULL) {
# Data initialisation and checks ----------------------------------------------
begin_info <- beginTreatment("InternalReferencing", Spectrum_data, Fid_info)
Spectrum_data <- begin_info[["Signal_data"]]
Fid_info <- begin_info[["Signal_info"]]
######## Check input arguments
range <- match.arg(range)
shiftHandling <- match.arg(shiftHandling)
method <- match.arg(method)
plots <- NULL
checkArg(ppm.ir, c("bool"))
checkArg(unlist(fromto.RC), c("num"), can.be.null = TRUE)
checkArg(pc, c("num"))
checkArg(ppm.value, c("num"))
checkArg(rowindex_graph, "num", can.be.null = TRUE)
# fromto.RC : if range == "window",
# fromto.RC defines the spectral window where to search for the peak
if (!is.null(fromto.RC)) {
diff <- diff(unlist(fromto.RC))[1:length(diff(unlist(fromto.RC)))%%2 !=0]
for (i in 1:length(diff)) {
if (diff[i] >= 0) {
fromto <- c(fromto.RC[[i]][2], fromto.RC[[i]][1])
fromto.RC[[i]] <- fromto
}
}
}
# findTMSPpeak function ----------------------------------------------
# If method == "tresh", findTMSPpeak will find the position of the first
# peak (from left or right) which is higher than a predefined threshold
# and is computed as: c*(cumulated_mean/cumulated_sd)
findTMSPpeak <- function(ft, c = 2, direction = "left") {
ft <- Re(ft) # extraction de la partie réelle
N <- length(ft)
if (direction == "left") {
newindex <- rev(1:N)
ft <- rev(ft)
}
thres <- 99999
i <- 1000 # Start at point 1000 to find the peak
vect <- ft[1:i]
while (vect[i] <= (c * thres)) {
cumsd <- stats::sd(vect)
cummean <- mean(vect)
thres <- cummean + 3 * cumsd
i <- i + 1
vect <- ft[1:i]
}
if (direction == "left") {
v <- newindex[i]
} else {v <- i}
if (is.na(v)) {
warning("No peak found, need to lower the threshold.")
return(NA)
} else {
# recherche dans les 1% de points suivants du max trouve pour etre au sommet du
# pic
d <- which.max(ft[v:(v + N * 0.01)])
new.peak <- v + d - 1 # nouveau pic du TMSP si d > 0
if (names(which.max(ft[v:(v + N * 0.01)])) != names(which.max(ft[v:(v + N * 0.03)]))) {
# recherche dans les 3% de points suivants du max trouve pour eviter un faux
# positif
warning("the TMSP peak might be located further away, increase the threshold to check.")
}
return(new.peak)
}
}
# Define the search zone ----------------------------------------
n <- nrow(Spectrum_data)
m <- ncol(Spectrum_data)
# The Sweep Width (SW) has to be the same since the column names are the same
SW <- Fid_info[1, "SW"] # Sweep Width in ppm
ppmInterval <- SW/(m-1) # size of a ppm interval
# range: How the search zone is defined ("all", "nearvalue" or "window")
if (range == "all") {
Data <- Spectrum_data
} else { # range = "nearvalue" or "window"
# Need to define colindex (column indexes) to apply indexInterval on it
if (range == "nearvalue") {
fromto.RC <- list(c(-(SW * pc)/2 + ppm.value, (SW * pc)/2 + ppm.value)) # automatic fromto values in ppm
colindex <- as.numeric(colnames(Spectrum_data))
} else {
# range == "window"
# fromto.RC is already user-defined
if (ppm.ir == TRUE) {
colindex <- as.numeric(colnames(Spectrum_data))
} else {
colindex <- 1:m
}
}
# index intervals taking into account the different elements in the list fromto.RC
Int <- vector("list", length(fromto.RC))
for (i in 1:length(fromto.RC)) {
Int[[i]] <- indexInterval(colindex, from = fromto.RC[[i]][1],
to = fromto.RC[[i]][2], inclusive = TRUE)
}
# define Data as the cropped spectrum including the index intervals
# outside the research zone, the intensities are set to the minimal
# intensity of the research zone
if (n > 1){
Data <- apply(Re(Spectrum_data[,unlist(Int)]),1, function(x) rep(min(x), m))
Data <- t(Data)
Data[,unlist(Int)] <- Re(Spectrum_data[,unlist(Int)])
} else {
Data <- rep(min(Re(Spectrum_data)) ,m)
Data[unlist(Int)] <- Re(Spectrum_data[unlist(Int)])
}
}
# Apply the peak location search method ('thres' or 'max') on spectra
# -----------------------------------------------------------------------
if (method == "thres") {
TMSPpeaks <- apply(Data, 1, findTMSPpeak, c = c, direction = direction)
} else { # method == "max
TMSPpeaks <- apply(Re(Data), 1, which.max)
}
# Shift spectra according to the TMSPpeaks found --------------------------------
# Depends on the shiftHandling
# TMSPpeaks is a column index
maxpeak <- max(TMSPpeaks) # max accross spectra
minpeak <- min(TMSPpeaks) # min accross spectra
if (shiftHandling %in% c("zerofilling", "NAfilling", "cut")) {
fill <- NA
if (shiftHandling == "zerofilling") {
fill <- 0
}
start <- maxpeak - 1
end <- minpeak - m
# ppm values of each interval for the whole spectral range of the spectral matrix
ppmScale <- (start:end) * ppmInterval
# check if ppm.value is in the ppmScale interval
if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
ppm.value = 0
}
# if ppm.value != 0, ppmScale is adapted
ppmScale <- ppmScale + ppm.value
# create the spectral matrix with realigned spectra
Spectrum_data_calib <- matrix(fill, nrow = n, ncol = -(end - start) + 1,
dimnames = list(rownames(Spectrum_data), ppmScale))
# fills in Spectrum_data_calib with shifted spectra
for (i in 1:n) {
shift <- (1 - TMSPpeaks[i]) + start
Spectrum_data_calib[i, (1 + shift):(m + shift)] <- Spectrum_data[i, ]
}
if (shiftHandling == "cut") {
Spectrum_data_calib = as.matrix(stats::na.omit(t(Spectrum_data_calib)))
Spectrum_data_calib = t(Spectrum_data_calib)
base::attr(Spectrum_data_calib, "na.action") <- NULL
}
} else {
# circular
start <- 1 - maxpeak
end <- m - maxpeak
ppmScale <- (start:end) * ppmInterval
# check if ppm.value in is the ppmScale interval
if(ppm.value < min(ppmScale) | ppm.value > max(ppmScale)) {
warning("ppm.value = ", ppm.value, " is not in the ppm interval [",
round(min(ppmScale),2), ",", round(max(ppmScale),2), "], and is set to its default ppm.value 0")
ppm.value = 0
}
# if ppm.value != 0, ppmScale is adapted
ppmScale <- ppmScale + ppm.value
# create the spectral matrix with realigned spectra
Spectrum_data_calib <- matrix(nrow=n, ncol=end-start+1,
dimnames=list(rownames(Spectrum_data), ppmScale))
# fills in Spectrum_data_calib with shifted spectra
for (i in 1:n) {
shift <- (maxpeak-TMSPpeaks[i])
Spectrum_data_calib[i,(1+shift):m] <- Spectrum_data[i,1:(m-shift)]
if (shift > 0) {
Spectrum_data_calib[i,1:shift] <- Spectrum_data[i,(m-shift+1):m]
}
}
}
# Plot of the spectra (depending on rowindex_graph) ---------------------------------------------------
ppm = xstart = value = xend = Legend = NULL # only for R CMD check
# with the search zone for TMSP and the location of the peaks just found
if (!is.null(rowindex_graph)) {
if (range == "window") {
if (ppm.ir == TRUE) {
fromto <- fromto.RC
} else {
fromto <- list()
idcol <- as.numeric(colnames(Spectrum_data))
for (i in 1:length(fromto.RC)) {
fromto[[i]] <- as.numeric(colnames(Spectrum_data))[fromto.RC[[i]]]
}
}
} else {
fromto <- fromto.RC
}
# TMSPloc in ppm
TMSPloc <- as.numeric(colnames(Spectrum_data))[TMSPpeaks[rowindex_graph]]
# num plot per window
num.stacked <- 6
# rectanglar bands of color for the search zone
rects <- data.frame(xstart = sapply(fromto, function(x) x[[1]]),
xend = sapply(fromto, function(x) x[[2]]),
Legend = "Peak search zone and location")
# vlines for TMSP peak
addlines <- data.frame(rowname = rownames(Spectrum_data)[rowindex_graph],TMSPloc)
nn <- length(rowindex_graph)
i <- 1
j <- 1
plots <- vector(mode = "list", length = ceiling(nn/num.stacked))
while (i <= nn) {
last <- min(i + num.stacked - 1, nn)
melted <- reshape2::melt(Re(Spectrum_data[i:last, ]),
varnames = c("rowname", "ppm"))
plots[[j]] <- ggplot2::ggplot() + ggplot2::theme_bw() +
ggplot2::geom_line(data = melted,
ggplot2::aes(x = ppm, y = value)) +
ggplot2::geom_rect(data = rects, ggplot2::aes(xmin = xstart, xmax = xend,
ymin = -Inf, ymax = Inf, fill = Legend), alpha = 0.4) +
ggplot2::facet_grid(rowname ~ ., scales = "free_y") +
ggplot2::theme(legend.position = "none") +
ggplot2::geom_vline(data = addlines, ggplot2::aes(xintercept = TMSPloc),
color = "red", show.legend = TRUE) +
ggplot2::ggtitle("Peak search zone and location") +
ggplot2::theme(legend.position = "top", legend.text = ggplot2::element_text())
if ((melted[1, "ppm"] - melted[(dim(melted)[1]), "ppm"]) > 0) {
plots[[j]] <- plots[[j]] + ggplot2::scale_x_reverse()
}
i <- last + 1
j <- j + 1
}
plots
}
# Return the results ----------------------------------------------
Spectrum_data <- endTreatment("InternalReferencing", begin_info, Spectrum_data_calib)
if (is.null(plots)) {
return(Spectrum_data)
} else {
return(list(Spectrum_data = Spectrum_data, plots = plots))
}
}
## ====================================================
# ZeroOrderPhaseCorrection
## ====================================================
ZeroOrderPhaseCorrection <- function(Spectrum_data, type.zopc = c("rms", "manual", "max"),
plot_rms = NULL, returnAngle = FALSE, createWindow = TRUE,
angle = NULL, plot_spectra = FALSE,
ppm.zopc = TRUE, exclude.zopc = list(c(5.1,4.5))) {
# Data initialisation and checks ----------------------------------------------
# Entry arguments definition:
# plot_rms : graph of rms criterion returnAngle : if TRUE, returns avector of
# optimal angles createWindow : for plot_rms plots angle : If angle is not NULL,
# spectra are rotated according to the angle vector values
# plot_spectra : if TRUE, plot rotated spectra
begin_info <- beginTreatment("ZeroOrderPhaseCorrection", Spectrum_data)
Spectrum_data <- begin_info[["Signal_data"]]
n <- nrow(Spectrum_data)
m <- ncol(Spectrum_data)
rnames <- rownames(Spectrum_data)
# Check input arguments
type.zopc <- match.arg(type.zopc)
checkArg(ppm.zopc, c("bool"))
checkArg(unlist(exclude.zopc), c("num"), can.be.null = TRUE)
# type.zopc in c("max", "rms") -----------------------------------------
if (type.zopc %in% c("max", "rms")) {
# angle is found by optimization
# rms function to be optimised
rms <- function(ang, y, meth = c("max", "rms")) {
# if (debug_plot) { graphics::abline(v=ang, col='gray60') }
roty <- y * exp(complex(real = 0, imaginary = ang)) # spectrum rotation
Rey <- Re(roty)
if (meth == "rms") {
ReyPos <- Rey[Rey >= 0] # select positive intensities
POSss <- sum((ReyPos)^2, na.rm = TRUE) # SS for positive intensities
ss <- sum((Rey)^2, na.rm = TRUE) # SS for all intensities
return(POSss/ss) # criterion : SS for positive values / SS for all intensities
} else {
maxi <- max(Rey, na.rm = TRUE)
return(maxi)
}
}
# Define the interval where to search for (by defining Data)
if (is.null(exclude.zopc)) {
Data <- Spectrum_data
} else {
# if ppm.zopc == TRUE, then exclude.zopc is in the colnames values, else, in the column
# index
if (ppm.zopc == TRUE) {
colindex <- as.numeric(colnames(Spectrum_data))
} else {
colindex <- 1:m
}
# Second check for the argument exclude.zopc
diff <- diff(unlist(exclude.zopc))[1:length(diff(unlist(exclude.zopc)))%%2 !=0]
for (i in 1:length(diff)) {
if (ppm.zopc == TRUE & diff[i] >= 0) {
stop(paste("Invalid region removal because from <= to in ppm.zopc"))
} else if (ppm.zopc == FALSE & diff[i] <= 0) {stop(paste("Invalid region removal because from >= to in column index"))}
}
Int <- vector("list", length(exclude.zopc))
for (i in 1:length(exclude.zopc)) {
Int[[i]] <- indexInterval(colindex, from = exclude.zopc[[i]][1],
to = exclude.zopc[[i]][2], inclusive = TRUE)
}
vector <- rep(1, m)
vector[unlist(Int)] <- 0
if (n > 1) {
Data <- sweep(Spectrum_data, MARGIN = 2, FUN = "*", vector) # Cropped_Spectrum
} else {
Data <- Spectrum_data * vector
} # Cropped_Spectrum
}
# angles computation
Angle <- c()
for (k in 1:n)
{
# The function is rms is periodic (period 2pi) and it seems that there is a phase
# x such that rms is unimodal (i.e. decreasing then increasing) on the interval
# [x; x+2pi]. However, if we do the optimization for example on [x-pi; x+pi],
# instead of being decreasing then increasing, it might be increasing then
# decreasing in which case optimize, thinking it is a valley will have to choose
# between the left or the right of this hill and if it chooses wrong, it will end
# up at like x-pi while the minimum is close to x+pi.
# Supposing that rms is unimodal, the classical 1D unimodal optimization will
# work in either [-pi;pi] or [0;2pi] (this is not easy to be convinced by that I
# agree) and we can check which one it is simply by the following trick
f0 <- rms(0, Data[k, ],meth = type.zopc)
fpi <- rms(pi, Data[k, ], meth = type.zopc)
if (f0 < fpi) {
interval <- c(-pi, pi)
} else {
interval <- c(0, 2 * pi)
}