-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathFunction_SCC_by_DFS_in_R_4GitHub.R
586 lines (517 loc) · 33.2 KB
/
Function_SCC_by_DFS_in_R_4GitHub.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
### FUNCTION: Find Strongly Connected Components in a digraph by Depth-First Search [Tarjan]
#### -1.0 - Contacts ####
#
# Author:
# Ettore Settanni
# Cambridge
#
#### -1.1 - Revision notes ####
# Coding timeline
# Start: 13 07 2021
# End: 06 08 2021
# Fixes: 19 08 2021
# for GitHub: 14 09 2021
# REV01 02 11 2021 # loops the lowlink update in backtracking until the node's lowlink value pick up the smallest lowlink amongst its successors
# REV02 10 11 2021 # a node's successor may haves successors but these may be sinks - in which case, don't update lowlink
# REV03 (this) 13 11 2021 # fixes how the sack is popped (REV02 was a patch, but would fail). Keeps the intuition of REV01: the iteration is still needed
# One thing I didn't notice when using 'while loops' is the need for extra iterations when we're done backtracking to ensure a correct lowlink update (affecting stack-popping)
# Iterations may be needed before all nodes' lowlink values are actually set to correspond to the lowest lowlink value among their successors.
# Also until REV02 I've got the stack-popping routine misplaced, which would fail in certain instances.
#### -1.2 - motivation, references & caveats ####
# i have developed this implementation from scratch for self-learning/self-study
# it is based on my own (limited) understadinding of the original pseudocode, and build on flowcharts I sketched during Summer 2021
## PSEUDOCODES
# 1) Original academic paper: Page 157, Tarjan (1972) doi:10.1137/0201010
# 2) Deo (1974) Graph Theory [but for undirected graphs]
# 3) wikipedia: https://en.wikipedia.org/wiki/Tarjan%27s_strongly_connected_components_algorithm#The_algorithm_in_pseudocode
# 4) Princeton web resources linked to the book "Algorithms 4th ed": https://algs4.cs.princeton.edu/42digraph/
## Link with Block triangular permutation
# 5) Duff and Reid (1978) - FORTRAN code link to block triangularisation. But uses 'go to' statements hence cannot replicate with R
# 6) Strang G. (1986) Introduction to Applied Mathematics, Ch. 16 (doesn't implement Tarjan but refers to earlier work)
## Benchmarks - existing BUILT-IN functions
# Gephi - claims it implement Tarjan's algorithm
# igraph - obviously has a fucntion for SCC, not sure what runds underneath (C wrapper)
#### 00.1 - initialise: Clear ####
rm(list = ls())
gc()
#### 00.2 - initialise: data ####
#
# test_m <- matrix(c(
# 0, 1, 0, 0, 0, 0, 0, 0,
# 0, 0, 1, 0, 0, 0, 0, 0,
# 1, 0, 0, 0, 0, 0, 0, 0,
# 0, 1, 1, 0, 1, 0, 0, 0,
# 0, 0, 0, 1, 0, 1, 0, 0,
# 0, 0, 1, 0, 0, 0, 1, 0,
# 0, 0, 0, 0, 0, 1, 0, 0,
# 0, 0, 0, 0, 1, 0, 1, 1
# ),ncol = 8, byrow = TRUE)
## STRANG EXAMPLE intro to applied math p 637
# test_m <- matrix(c(
# 0, 0, 1, 0,
# 1, 0, 0, 1,
# 1, 0, 0, 0,
# 0, 0, 1, 0
# ), ncol = 4, byrow = TRUE)
# Princeton example https://algs4.cs.princeton.edu/42digraph/
test_m <- matrix(c(
0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0,
0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1,
0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0
),ncol = 13, byrow = TRUE)
#### 01.0 - declare FUNCTION: DFS_4_ISM returning DFS_output$SCC, DFS_output$BlockTri_m, and DFS_output$Transitive_Closure - modified DFS (Tarjan) ####
# The below builds on my original code: ISM_vs_Tarjan.R
# I've modified it to add transitive closure as per https://www.cs.princeton.edu/courses/archive/spr03/cs226/lectures/digraph.4up.pdf
# returns 3 output
# --- component
# --- block triangular permutation
# --- transitive closure (alternative to Warshall)
DFS_4_ISM <- function(test_m){
n_nodes <- ncol(test_m)
node_names <- formatC(0:(ncol(test_m)-1),width = 2, flag = "0") # thread: https://stackoverflow.com/questions/8266915/format-number-as-fixed-width-with-leading-zeros
colnames(test_m) <- node_names
rownames(test_m) <- node_names
## step 1: create list of successors for each node
successors <- lapply(node_names, function(x){
current_node <- x
# search the row corresponding to the current node for successors
all_successors_idx <- expand.grid(current_node, node_names, width = 2, flag = "0") # expand on current node to find successors
all_successors_links <- apply(all_successors_idx, 1, function(y){
test_m[y[1], y[2]]
})
if(length(as.character(all_successors_idx[which(all_successors_links != 0),2])) > 0){
as.character(all_successors_idx[which(all_successors_links != 0),2])
} else {
NA
}
})
names(successors) <- node_names
## step 2: lookup table (going to use this for depth descent and backtracking)
util_table <- lapply(1:n_nodes, function(z){
x <- successors[z]
a <- match(names(x), node_names)
b <- match(unlist(x), node_names)
temp_col <- expand.grid(a,b)
tab_head <- c("i",
"j",
"level",
"v_node",
#"v_label",
"v_number",
"v_lowlink",
"successors",
"k_successor_idx",
"w_k_node",
#"w_k_label",
"w_k_number",
"w_k_lowlink")
temp_mat <- matrix(NA, ncol = length(tab_head), nrow = nrow(temp_col))
colnames(temp_mat) <- tab_head
rownames(temp_mat) <- rep(names(x),nrow(temp_mat))
temp_mat[,"v_node"] <- temp_col[,1]
temp_mat[, "w_k_node"] <- temp_col[,2]
return(temp_mat)
})
util_table_df <- do.call(rbind, util_table)
## step 3: initialise stuff
# counters
n <- n_nodes
i <- 0
comp_count <- 0 # component counter
# initialise condition flags
nodes_not_numbered_yet <- n
test0 <- TRUE
# initialise lists
Stack_S <- list()
Component_list <- list()
# initialise table of node numbers
node_numbering <- cbind(rep(NA,length(node_names)),rep(NA,length(node_names)),rep(NA,length(node_names)))
colnames(node_numbering) <- c("node_number","node_lowlink","node_onStack")
rownames(node_numbering) <- node_names
## Step 4: Main loop - the magic happens here (if it works)
# Loop #0: restart at each strongly connected component
dummy4 <- TRUE
while(dummy4){
level <- 1 # I AM NOT ENTIRELY SURE ABOUT THIS SYSTEM I CAME UP WITH. it's like a thread that pulls you back up once the depth-first search reached an end.
j <- 0
# pick the first node not numbered yet
nodes_to_explore <- which(is.na(node_numbering[, "node_number"]))
if(nodes_not_numbered_yet != 0){ # there are nodes not numbered yet
v <- as.numeric(nodes_to_explore[1]) # just picking the first unnumbered node. In the first iteration this will be node 1
v_label <- node_names[v]
}
# REV01
flag_extra_round <- TRUE # for use later during backtracking to add an extra round and avoid some issues with updating values at the very end
extra_round_counter <- 0
# Loop #1: explores successors "depth first", jumping between nodes
dummy0 <- TRUE
while(dummy0){
i <- i + 1 # labels nodes as we visit them
j <- j + 1 # position in stack of nodes
# update node numbering
Stack_S[j] <- v
node_numbering[v,"node_number"] <- i
node_numbering[v,"node_lowlink"] <- i
node_numbering[v,"node_onStack"] <- 1
# successors
w_labels <- unlist(successors[v])
# update records were v features as successor
back_idx <- which(util_table_df[,"w_k_node"] == v)
if (length(back_idx) > 0){
util_table_df[back_idx, "w_k_number"] <- node_numbering[v,"node_number"]
util_table_df[back_idx, "w_k_lowlink"] <- node_numbering[v,"node_lowlink"]
}
# check if successor is a sink node
sink_test <- which(is.na(w_labels))
if (length(sink_test) == 0){
w <- match(w_labels, node_names)
n_successors <- length(w)
} else {
w <- NA
n_successors <- 0
}
# initialise a progressive counter for incident nodes reachable from the current node
k <- 0
# Loop #2: explores incident nodes "sequentially" for a given node
dummy1 <- TRUE
while(dummy1){
if(length(sink_test) == 0){ # the current node has a successor
k <- k + 1
w_k <- w[k] # next incident node (neighbor)
# update main tableau (df version)
util_table_df_subset <- which(rownames(util_table_df) == v_label) # filter for the current node v
if(length(util_table_df_subset) > 1){
if(k == 1){
util_table_df[util_table_df_subset,][k,"i"] <- i
util_table_df[util_table_df_subset,][k,"j"] <- j
util_table_df[util_table_df_subset,][k,"level"] <- level
util_table_df[util_table_df_subset,][k,"v_node"] <- v
util_table_df[util_table_df_subset,][k,"v_number"] <- node_numbering[v,"node_number"]
util_table_df[util_table_df_subset,][k,"v_lowlink"] <- node_numbering[v,"node_lowlink"]
util_table_df[util_table_df_subset,][k,"successors"] <- n_successors
}
util_table_df[util_table_df_subset,][k,"k_successor_idx"] <- k
# util_table_df[util_table_df_subset,][k,"w_k_node"] <- w_k # could be removed...
util_table_df[util_table_df_subset,][k,"w_k_number"] <- node_numbering[w_k,"node_number"]
util_table_df[util_table_df_subset,][k,"w_k_lowlink"] <- node_numbering[w_k, "node_lowlink"]
} else {
if(k == 1){
util_table_df[util_table_df_subset,"i"] <- i
util_table_df[util_table_df_subset,"j"] <- j
util_table_df[util_table_df_subset,"level"] <- level
util_table_df[util_table_df_subset,"v_node"] <- v
util_table_df[util_table_df_subset,"v_number"] <- node_numbering[v,"node_number"]
util_table_df[util_table_df_subset,"v_lowlink"] <- node_numbering[v,"node_lowlink"]
util_table_df[util_table_df_subset,"successors"] <- n_successors
}
util_table_df[util_table_df_subset,"k_successor_idx"] <- k
# util_table_df[util_table_df_subset,"w_k_node"] <- w_k # could be removed...
util_table_df[util_table_df_subset,"w_k_number"] <- node_numbering[w_k,"node_number"]
util_table_df[util_table_df_subset,"w_k_lowlink"] <- node_numbering[w_k, "node_lowlink"]
}
# tests
test0 <- is.na(node_numbering[w_k,"node_number"]) # we can jump onto this node (depth first)
test1 <- node_numbering[w_k,"node_number"] < node_numbering[v,"node_number"] # the next node has been visited already?
test2 <- node_numbering[w_k,"node_onStack"] == 1 # the next node is in the stack already OR has been on the stack
} else { # there is no successor to the current node (sink)
# tests
test0 <- FALSE # forces to backtrack if there is no successor
test1 <- FALSE
test2 <- FALSE
# update main tableau (df version)
util_table_df_subset <- which(rownames(util_table_df) == v_label) # filter for the current node v
util_table_df[util_table_df_subset,"i"] <- i
util_table_df[util_table_df_subset,"j"] <- j
util_table_df[util_table_df_subset,"level"] <- level
util_table_df[util_table_df_subset,"v_node"] <- v
util_table_df[util_table_df_subset,"v_number"] <- node_numbering[v,"node_number"]
util_table_df[util_table_df_subset,"v_lowlink"] <- node_numbering[v,"node_lowlink"]
util_table_df[util_table_df_subset,"successors"] <- n_successors
util_table_df[util_table_df_subset,"k_successor_idx"] <- k
util_table_df[util_table_df_subset,"w_k_node"] <- (-1)
util_table_df[util_table_df_subset,"w_k_number"] <- (-1)
util_table_df[util_table_df_subset,"w_k_lowlink"] <- (-1)
}
test_POP <- FALSE
# THIS IS WHERE WE DECIDE WHETHER TO CONTINUE DEPTH-FIRST, LOOK INTO A NEIGHBOUR, OR BACKTRACK
if (test0){
# continue depth-first search
dummy1 <- FALSE # break Loop 2 (exit sequential exploration of successors for a given node)
v <- w_k # move on to successor
v_label <- node_names[v]
# the below avoids having more than one line with the same level
level_max <- max(util_table_df[,"level"], na.rm = T)
if (level < level_max){ # probably we backtracked and re-winded the level counter.This should get us back where we were before the rewind
level <- level_max + 1
} else {
level <- level + 1
}
} else {
# start "back tracking"
# Loop #3: are there neighbors left to explore?
dummy5 <- TRUE
while(dummy5){
if(k != 0 & k < n_successors) { # there are other adjacent nodes left to explore
dummy5 <- FALSE
# update lowlink values - FIRST TYPE (going downwards)
if (test1 == TRUE & test2 == TRUE){ # the current k successor has been visited PRIOR to v (has lower number), and it is on the stack already
node_numbering[v,"node_lowlink"] <- min(node_numbering[v,"node_lowlink"],node_numbering[w_k,"node_number"])
util_table_df_subset <- which(rownames(util_table_df) == v_label) # filter for the current node v
if(length(util_table_df_subset) > 1){
util_table_df[util_table_df_subset,][1,"v_lowlink"] <- node_numbering[v,"node_lowlink"] # always update the first line, corresponding to v
} else {
util_table_df[util_table_df_subset,"v_lowlink"] <- node_numbering[v,"node_lowlink"]
}
# update lowlink elsewhere
other_idx <- which(util_table_df[,"w_k_node"] == v)
if(length(other_idx)>0){
util_table_df[other_idx, "w_k_lowlink"] <- node_numbering[v,"node_lowlink"]
}
}
} else {
dummy3 <- TRUE
# Loop #4: revisit all the neighbours already visited
while (dummy3) {
if(n_successors !=0 & k !=0){ # we are not at a sink node / we have not re-visited all the neighbors
# re-do test
test0 <- is.na(node_numbering[w_k,"node_number"]) # we can jump onto this node (depth first)
test1 <- node_numbering[w_k,"node_number"] < node_numbering[v,"node_number"] # the next node has been visited already?
test2 <- node_numbering[w_k,"node_onStack"] == 1 # the next node is in the stack already
if (test2){ # the next node is STILL on the stack PLEASE DO NOT ADD TEST 1 OR WILL MESS UP
# update lowlink values - SECOND TYPE (ascending)
old_v_lowlink <- node_numbering[v,"node_lowlink"]
node_numbering[v,"node_lowlink"] <- min(node_numbering[v,"node_lowlink"],node_numbering[w_k,"node_lowlink"])
util_table_df_subset <- which(rownames(util_table_df) == v_label) # filter for the current node v
if(length(util_table_df_subset) > 1){
util_table_df[util_table_df_subset,][1,"v_lowlink"] <- node_numbering[v,"node_lowlink"] # always update the first line, corresponding to v
} else {
util_table_df[util_table_df_subset,"v_lowlink"] <- node_numbering[v,"node_lowlink"]
}
# update lowlink elsewhere
other_idx <- which(util_table_df[,"w_k_node"] == v)
if(length(other_idx)>0){
util_table_df[other_idx, "w_k_lowlink"] <- node_numbering[v,"node_lowlink"]
}
}
# go back to the previous neighbour already visited
k <- k - 1
if (k !=0){
w_k <- w[k]
#re-do the test
test0 <- is.na(node_numbering[w_k,"node_number"]) # we can jump onto this node (depth first)
test1 <- node_numbering[w_k,"node_number"] < node_numbering[v,"node_number"] # the next node has been visited already?
test2 <- node_numbering[w_k,"node_onStack"] == 1 # the next node is in the stack already
} else {
test2 <- FALSE
test1 <- FALSE
test0 <- FALSE
w_k <- (-1)
}
} else {
# either we are at a sink node or we have re-visited all the neighbors
# NEW: WHEN WE ARE HERE, WE ARE FORCING THE BACKTRACKING - THERE WAS NO PATH LEADING US back to previously encountered nodes
# therefore we MUST NOT UPDATE LOWLINK when we encounter a node we visited before
# if v is a root node (lowlink = number), we can pop the stack at this stage
test_POP <- node_numbering[v,"node_number"] == node_numbering[v,"node_lowlink"] # from Tarjan's original paper
# BEFORE POPPING
abort_POP <- (test_POP & level == 1)
if (abort_POP & flag_extra_round){
# we reached the last level (at least until we restart with level 1); WE NEED TO make sure that all nodes' lowlink values matches the lowest lowlink of their successors.
# It happens that the root node gets popped out then the lowlink update occurs, and a bunch of nodes are left without root and can't pop next
# if that's not the case it may be that we need more rounds of update
extra_round_counter <- extra_round_counter + 1
# the below checks that we DON'T HAVE NODES WITH THE LATEST SUCCESSOR'S LOWLINK VALUE BEING lower THAN THE NODE'S LOWLINK
# if a node's LOWLINK is larger than the lowest among its successors' LOWLINK the we need to keep updating
update_needed <- lapply(Stack_S, function(x){
v_lowlink <- node_numbering[x,"node_lowlink"]
v_neigb_lowest_lowlink <- min(util_table_df[which(util_table_df[,"v_node"] == x) , "w_k_lowlink"])
if(v_neigb_lowest_lowlink > 0){ # sink nodes by convention get "-1" in this field which might cause looping forever
if(v_lowlink != v_neigb_lowest_lowlink){
1
} else {0}
} else {0}
})
updates_test <- sum(unlist(update_needed))
if(updates_test > 0 & extra_round_counter < 4){
# reset the level to a max but add one and let the loop later pick which level is righ
level <- max(util_table_df[which(!is.na(util_table_df[,"level"])),"level"]) + 1 # start all over again, hoping we get all the lowlinks updated
test_POP <- FALSE # ABORT POPPING FOR NOW
} else {
flag_extra_round <- FALSE # break out of this
abort_POP <- FALSE
}
}
if (test_POP){
# THIS IS WHERE WE POP THE STACK AND POPULATE THE COMPONENT
# update count of nodes not numbered yet
nodes_not_numbered_yet <- length(which(is.na(node_numbering[,"node_number"])))
# NEW: we want to pop the stack but only using v as a root (leave other nodes alone, if any) - different test to enter the stack-popping stage
temp_idx <- as.numeric(which(!is.na(node_numbering[,"node_number"])))
temp_subset_a <- which(node_numbering[temp_idx, "node_number"] >= node_numbering[v,"node_number"])
nodes_visited_so_far <- match(names(temp_subset_a[as.numeric(which(node_numbering[names(temp_subset_a), "node_onStack"] == 1))]), node_names)
Stack_S_BACKUP <- Stack_S
Component_list_BACKUP <- Component_list
# Start of "Pop nodes" sub, but only if we're done with depth-first search
# NEW: notice that the loop below should go from node "v" onward
for(r in nodes_visited_so_far){
x <- as.numeric(node_numbering[r,])
if (x[1] == x[2]){ # the node is a root node
comp_count <- comp_count + 1 # component number
# put nodes in component
temp_component_list <- lapply(1:length(Stack_S), function(s){
y <- Stack_S[[s]]
pop_test1 <- node_numbering[y, "node_number"] >= x[1]
pop_test2 <- node_numbering[y, "node_lowlink"] == x[2]
if(pop_test1 & pop_test2){
y
}
})
to_delete <- unlist(temp_component_list)
Component_list[[comp_count]] <- to_delete
# pop elements out of stack
#idx_delete <- which(Stack_S == to_delete)
temp_idx_delete <- rep(seq_along(Stack_S), sapply(Stack_S, length)) # thread: https://stackoverflow.com/questions/11002391/fast-way-of-getting-index-of-match-in-list
idx_delete <- temp_idx_delete[match(to_delete, unlist(Stack_S))]
Stack_S[idx_delete] <- NULL
# update j or the size of the stack will change later
j <- j - length(idx_delete)
# update table
node_numbering[to_delete,"node_onStack"] <- (-1) # mark elements that have been on stack, but no longer are
}
}
}
dummy3 <- FALSE # break loop 4 after moving one level up - REGARDLESS of popping the stack
# NEW: modified version of finding the "right level" to go back to
# ignore if we're forcing one last iteration to update lowling - special case
test_skip <- FALSE
while(!test_skip & level > 0){
level <- level - 1 # if there is no successor OR the node at this level is not on the stack, keep going backwards
if(level != 0){
v_temp <- util_table_df[which(util_table_df[,"level"] == level),"v_node"]
level_on_stack <- (node_numbering[v_temp,"node_onStack"] == 1) # skip levels that correspond to nodes that have been removed from stack
test_any_successor <- length(which(util_table_df[,"level"] == level & util_table_df[, "successors"] !=0)) # (no successor going up one level)
test_skip <- (level_on_stack & test_any_successor > 0)
}
}
# GET BACK TO WHERE YOU LEFT THINGS AT THE PREVIOUS LEVEL
if (level > 0){
back_idx <- which(util_table_df[,"level"] == level & util_table_df[, "successors"] !=0) # there may be more than one node with the same level if E.G. ONE NEIGHBOUR IS A SINK but the other isn't. We ignore the sink
v <- util_table_df[back_idx, "v_node"]
v_label <- node_names[v]
n_successors <- util_table_df[back_idx, "successors"]
k_back_idx_a <- as.numeric(which(util_table_df[, "v_node"] == v))
util_table_df_subset <- util_table_df[k_back_idx_a,]
if(length(k_back_idx_a) > 1){
k_back_idx_b <- max(which(!is.na(util_table_df_subset[, "k_successor_idx"]))) # retrieve last successor index explored for the current node
k <- util_table_df_subset[k_back_idx_b , "k_successor_idx"]
w_k <- util_table_df_subset[k_back_idx_b , "w_k_node"]
} else {
k <- as.numeric(util_table_df_subset["k_successor_idx"])
w_k <- as.numeric(util_table_df_subset["w_k_node"])
}
w <- as.numeric(util_table_df[k_back_idx_a, "w_k_node"])
w_labels <- node_names[w]
sink_test <- which(is.na(w_labels)) # check if the successor is a sink node
# re-do test
test0 <- is.na(node_numbering[w_k,"node_number"]) # we can jump onto this node (depth first)
test1 <- node_numbering[w_k,"node_number"] < node_numbering[v,"node_number"] # the next node has been visited already?
test2 <- node_numbering[w_k,"node_onStack"] == 1 # the next node is in the stack already
}
}
} # end of Loop 4 (dummy 3)
if(level == 0) {
dummy5 <- FALSE # break loop 3
dummy1 <- FALSE # break loop 2 (ends up in the same place as if there were no successors)
}
}
} # end of Loop 3 (dummy 5)
} # end of ELSE - "back tracking" sub
} # end of Loop 2 (dummy1)
nodes_not_numbered_yet <- length(which(is.na(node_numbering[,"node_number"])))
nodes_visited_so_far <- as.numeric(which(!is.na(node_numbering[,"node_number"]) & node_numbering[,"node_onStack"] == 1))
if(!test0 & test_POP){
if(nodes_not_numbered_yet == 0){ # break Loop 1 (depths first exploration of successors, jumping between nodes) and move up one level
dummy4 <- FALSE # break Loop 5: YOU'RE DONE
}
dummy0 <- FALSE # break Loop 1
# if we are here, we'll restart the level counter. re-label the LEVELS used so that they do not interefere with the next component
util_table_df_BACKUP <- util_table_df
idx_change_level <- which(!is.na(util_table_df[,"level"]) & util_table_df[,"level"] > 0)
util_table_df[idx_change_level,"level"] <- (-1)*util_table_df[idx_change_level,"level"]
}
} # end of Loop 1 (dummy0)
} # end of Loop 0 (dummy4)
## Step 5: Block triangular form
# Plain english description from: Hume D., Litsey J., and Plemmons (1981) p 272 - see this google book https://books.google.co.uk/books?id=pEMsAQAAIAAJ&pg=PA272&lpg=PA272&dq=duff+and+reid+block+triangularization&source=bl&ots=zlTT95Usx-&sig=ACfU3U3GkBU0av0mI459KObNAqyrc-1sTw&hl=en&sa=X&ved=2ahUKEwiWkKnBlKbyAhUSesAKHXmZArcQ6AF6BAgREAM#v=onepage&q=duff%20and%20reid%20block%20triangularization&f=false
# - all vertices within a strongly connected component are numbered sequentially
# - if there exists and edge from a vertex in component i to a vertex in component j, then all vertices in component i are labelled before all those in component j
n_components <- length(Component_list)
x_components_idx <- expand.grid(1:n_components, 1:n_components) # equivalent to nested for
x_components_idx <- x_components_idx[, ncol(x_components_idx):1] # flip
x_comp_link <- apply(x_components_idx, 1, function(x){
if(x[1] != x[2]){
comp_nodes_a <- Component_list[[as.numeric(x[1])]]
comp_nodes_b <- Component_list[[as.numeric(x[2])]]
sum(test_m[comp_nodes_a, comp_nodes_b])
} else {
NA
}
})
preced_comp <- x_components_idx[which(x_comp_link > 0),]
colnames(preced_comp) <- c("precedent", "consequent")
ord_comp <- c(1:comp_count) # arrange components number in an array
ord_comp_UPDATE <- ord_comp
swap_track <- list()
for(i in 1:nrow(preced_comp)){
if(i ==1){
dummy_ord <- ord_comp
} else {
dummy_ord <- swap_track[[i - 1]]
}
a <- as.numeric(preced_comp[i,1])
b <- as.numeric(preced_comp[i,2])
a_pos <- as.numeric(match(a, dummy_ord))
b_pos <- as.numeric(match(b, dummy_ord))
if (a_pos > b_pos){ # a should precede b but a is currently placed after b
dummy_ord[b_pos] <- a
dummy_ord[a_pos] <- b
}
swap_track[[i]] <- dummy_ord
ord_comp_UPDATE <- dummy_ord
}
# obtain permutation
permuted_lines <- unlist(Component_list[ord_comp_UPDATE]) # re-order components in the list
permuted_m <- test_m[permuted_lines, permuted_lines]
## Step 7: Output list
list(SCC = Component_list,
BlockTri_m = permuted_m
)
}
#### 02.0 - example use & benchmark ####
out_SCC <- DFS_4_ISM(test_m)
Component_list <- out_SCC$SCC
test_SCC_tab <- lapply(1:length(Component_list),function(comp_count){
x <- Component_list[[comp_count]]
temp_rows <- length(x)
Comp_ID <- rep(comp_count,temp_rows)
cbind(Comp_ID, x)
})
tab_comp <- do.call(rbind, test_SCC_tab)
tab_comp_df <- as.data.frame(tab_comp)
tab_comp_df[order(tab_comp_df$x),]
table(tab_comp_df$Comp_ID)
library(igraph)
G_ig <- graph_from_adjacency_matrix(test_m, mode = "directed")
# from instruction: The weakly connected components are found by a simple breadth-first search. The strongly connected components are implemented by two consecutive depth-first searches.
components(G_ig, mode = "strong")