-
Notifications
You must be signed in to change notification settings - Fork 544
/
Copy pathlinkify.js
1848 lines (1712 loc) · 61.3 KB
/
linkify.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
// THIS FILE IS AUTOMATICALLY GENERATED DO NOT EDIT DIRECTLY
// See update-tlds.js for encoding/decoding format
// https://data.iana.org/TLD/tlds-alpha-by-domain.txt
const encodedTlds = 'aaa1rp3bb0ott3vie4c1le2ogado5udhabi7c0ademy5centure6ountant0s9o1tor4d0s1ult4e0g1ro2tna4f0l1rica5g0akhan5ency5i0g1rbus3force5tel5kdn3l0ibaba4pay4lfinanz6state5y2sace3tom5m0azon4ericanexpress7family11x2fam3ica3sterdam8nalytics7droid5quan4z2o0l2partments8p0le4q0uarelle8r0ab1mco4chi3my2pa2t0e3s0da2ia2sociates9t0hleta5torney7u0ction5di0ble3o3spost5thor3o0s4vianca6w0s2x0a2z0ure5ba0by2idu3namex3narepublic11d1k2r0celona5laycard4s5efoot5gains6seball5ketball8uhaus5yern5b0c1t1va3cg1n2d1e0ats2uty4er2ntley5rlin4st0buy5t2f1g1h0arti5i0ble3d1ke2ng0o3o1z2j1lack0friday9ockbuster8g1omberg7ue3m0s1w2n0pparibas9o0ats3ehringer8fa2m1nd2o0k0ing5sch2tik2on4t1utique6x2r0adesco6idgestone9oadway5ker3ther5ussels7s1t1uild0ers6siness6y1zz3v1w1y1z0h3ca0b1fe2l0l1vinklein9m0era3p2non3petown5ital0one8r0avan4ds2e0er0s4s2sa1e1h1ino4t0ering5holic7ba1n1re3c1d1enter4o1rn3f0a1d2g1h0anel2nel4rity4se2t2eap3intai5ristmas6ome4urch5i0priani6rcle4sco3tadel4i0c2y3k1l0aims4eaning6ick2nic1que6othing5ud3ub0med6m1n1o0ach3des3ffee4llege4ogne5m0cast4mbank4unity6pany2re3uter5sec4ndos3struction8ulting7tact3ractors9oking4l1p2rsica5untry4pon0s4rses6pa2r0edit0card4union9icket5own3s1uise0s6u0isinella9v1w1x1y0mru3ou3z2dabur3d1nce3ta1e1ing3sun4y2clk3ds2e0al0er2s3gree4livery5l1oitte5ta3mocrat6ntal2ist5si0gn4v2hl2iamonds6et2gital5rect0ory7scount3ver5h2y2j1k1m1np2o0cs1tor4g1mains5t1wnload7rive4tv2ubai3nlop4pont4rban5vag2r2z2earth3t2c0o2deka3u0cation8e1g1mail3erck5nergy4gineer0ing9terprises10pson4quipment8r0icsson6ni3s0q1tate5t1u0rovision8s2vents5xchange6pert3osed4ress5traspace10fage2il1rwinds6th3mily4n0s2rm0ers5shion4t3edex3edback6rrari3ero6i0delity5o2lm2nal1nce1ial7re0stone6mdale6sh0ing5t0ness6j1k1lickr3ghts4r2orist4wers5y2m1o0o0d1tball6rd1ex2sale4um3undation8x2r0ee1senius7l1ogans4ntier7tr2ujitsu5n0d2rniture7tbol5yi3ga0l0lery3o1up4me0s3p1rden4y2b0iz3d0n2e0a1nt0ing5orge5f1g0ee3h1i0ft0s3ves2ing5l0ass3e1obal2o4m0ail3bh2o1x2n1odaddy5ld0point6f2o0dyear5g0le4p1t1v2p1q1r0ainger5phics5tis4een3ipe3ocery4up4s1t1u0ardian6cci3ge2ide2tars5ru3w1y2hair2mburg5ngout5us3bo2dfc0bank7ealth0care8lp1sinki6re1mes5iphop4samitsu7tachi5v2k0t2m1n1ockey4ldings5iday5medepot5goods5s0ense7nda3rse3spital5t0ing5t0els3mail5use3w2r1sbc3t1u0ghes5yatt3undai7ibm2cbc2e1u2d1e0ee3fm2kano4l1m0amat4db2mo0bilien9n0c1dustries8finiti5o2g1k1stitute6urance4e4t0ernational10uit4vestments10o1piranga7q1r0ish4s0maili5t0anbul7t0au2v3jaguar4va3cb2e0ep2tzt3welry6io2ll2m0p2nj2o0bs1urg4t1y2p0morgan6rs3uegos4niper7kaufen5ddi3e0rryhotels6logistics9properties14fh2g1h1i0a1ds2m1ndle4tchen5wi3m1n1oeln3matsu5sher5p0mg2n2r0d1ed3uokgroup8w1y0oto4z2la0caixa5mborghini8er3ncaster6d0rover6xess5salle5t0ino3robe5w0yer5b1c1ds2ease3clerc5frak4gal2o2xus4gbt3i0dl2fe0insurance9style7ghting6ke2lly3mited4o2ncoln4k2psy3ve1ing5k1lc1p2oan0s3cker3us3l1ndon4tte1o3ve3pl0financial11r1s1t0d0a3u0ndbeck6xe1ury5v1y2ma0drid4if1son4keup4n0agement7go3p1rket0ing3s4riott5shalls7ttel5ba2c0kinsey7d1e0d0ia3et2lbourne7me1orial6n0u2rckmsd7g1h1iami3crosoft7l1ni1t2t0subishi9k1l0b1s2m0a2n1o0bi0le4da2e1i1m1nash3ey2ster5rmon3tgage6scow4to0rcycles9v0ie4p1q1r1s0d2t0n1r2u0seum3ic4v1w1x1y1z2na0b1goya4me2tura4vy3ba2c1e0c1t0bank4flix4work5ustar5w0s2xt0direct7us4f0l2g0o2hk2i0co2ke1on3nja3ssan1y5l1o0kia3rton4w0ruz3tv4p1r0a1w2tt2u1yc2z2obi1server7ffice5kinawa6layan0group9dnavy5lo3m0ega4ne1g1l0ine5oo2pen3racle3nge4g0anic5igins6saka4tsuka4t2vh3pa0ge2nasonic7ris2s1tners4s1y3y2ccw3e0t2f0izer5g1h0armacy6d1ilips5one2to0graphy6s4ysio5ics1tet2ures6d1n0g1k2oneer5zza4k1l0ace2y0station9umbing5s3m1n0c2ohl2ker3litie5rn2st3r0america6xi3ess3ime3o0d0uctions8f1gressive8mo2perties3y5tection8u0dential9s1t1ub2w0c2y2qa1pon3uebec3st5racing4dio4e0ad1lestate6tor2y4cipes5d0stone5umbrella9hab3ise0n3t2liance6n0t0als5pair3ort3ublican8st0aurant8view0s5xroth6ich0ardli6oh3l1o1p2o0cks3deo3gers4om3s0vp3u0gby3hr2n2w0e2yukyu6sa0arland6fe0ty4kura4le1on3msclub4ung5ndvik0coromant12ofi4p1rl2s1ve2xo3b0i1s2c0a1b1haeffler7midt4olarships8ol3ule3warz5ience5ot3d1e0arch3t2cure1ity6ek2lect4ner3rvices6ven3w1x0y3fr2g1h0angrila6rp2w2ell3ia1ksha5oes2p0ping5uji3w3i0lk2na1gles5te3j1k0i0n2y0pe4l0ing4m0art3ile4n0cf3o0ccer3ial4ftbank4ware6hu2lar2utions7ng1y2y2pa0ce3ort2t3r0l2s1t0ada2ples4r1tebank4farm7c0group6ockholm6rage3e3ream4udio2y3yle4u0cks3pplies3y2ort5rf1gery5zuki5v1watch4iss4x1y0dney4stems6z2tab1ipei4lk2obao4rget4tamotors6r2too4x0i3c0i2d0k2eam2ch0nology8l1masek5nnis4va3f1g1h0d1eater2re6iaa2ckets5enda4ps2res2ol4j0maxx4x2k0maxx5l1m0all4n1o0day3kyo3ols3p1ray3shiba5tal3urs3wn2yota3s3r0ade1ing4ining5vel0ers0insurance16ust3v2t1ube2i1nes3shu4v0s2w1z2ua1bank3s2g1k1nicom3versity8o2ol2ps2s1y1z2va0cations7na1guard7c1e0gas3ntures6risign5mögensberater2ung14sicherung10t2g1i0ajes4deo3g1king4llas4n1p1rgin4sa1ion4va1o3laanderen9n1odka3lvo3te1ing3o2yage5u2wales2mart4ter4ng0gou5tch0es6eather0channel12bcam3er2site5d0ding5ibo2r3f1hoswho6ien2ki2lliamhill9n0dows4e1ners6me2olterskluwer11odside6rk0s2ld3w2s1tc1f3xbox3erox4finity6ihuan4n2xx2yz3yachts4hoo3maxun5ndex5e1odobashi7ga2kohama6u0tube6t1un3za0ppos4ra3ero3ip2m1one3uerich6w2';
// Internationalized domain names containing non-ASCII
const encodedUtlds = 'ελ1υ2бг1ел3дети4ею2католик6ом3мкд2он1сква6онлайн5рг3рус2ф2сайт3рб3укр3қаз3հայ3ישראל5קום3ابوظبي5رامكو5لاردن4بحرين5جزائر5سعودية6عليان5مغرب5مارات5یران5بارت2زار4يتك3ھارت5تونس4سودان3رية5شبكة4عراق2ب2مان4فلسطين6قطر3كاثوليك6وم3مصر2ليسيا5وريتانيا7قع4همراه5پاکستان7ڀارت4कॉम3नेट3भारत0म्3ोत5संगठन5বাংলা5ভারত2ৰত4ਭਾਰਤ4ભારત4ଭାରତ4இந்தியா6லங்கை6சிங்கப்பூர்11భారత్5ಭಾರತ4ഭാരതം5ලංකා4คอม3ไทย3ລາວ3გე2みんな3アマゾン4クラウド4グーグル4コム2ストア3セール3ファッション6ポイント4世界2中信1国1國1文网3亚马逊3企业2佛山2信息2健康2八卦2公司1益2台湾1灣2商城1店1标2嘉里0大酒店5在线2大拿2天主教3娱乐2家電2广东2微博2慈善2我爱你3手机2招聘2政务1府2新加坡2闻2时尚2書籍2机构2淡马锡3游戏2澳門2点看2移动2组织机构4网址1店1站1络2联通2谷歌2购物2通販2集团2電訊盈科4飞利浦3食品2餐厅2香格里拉3港2닷넷1컴2삼성2한국2';
/**
* @template A
* @template B
* @param {A} target
* @param {B} properties
* @return {A & B}
*/
const assign = (target, properties) => {
for (const key in properties) {
target[key] = properties[key];
}
return target;
};
/**
* Finite State Machine generation utilities
*/
/**
* @template T
* @typedef {{ [group: string]: T[] }} Collections
*/
/**
* @typedef {{ [group: string]: true }} Flags
*/
// Keys in scanner Collections instances
const numeric = 'numeric';
const ascii = 'ascii';
const alpha = 'alpha';
const asciinumeric = 'asciinumeric';
const alphanumeric = 'alphanumeric';
const domain = 'domain';
const emoji = 'emoji';
const scheme = 'scheme';
const slashscheme = 'slashscheme';
const whitespace = 'whitespace';
/**
* @template T
* @param {string} name
* @param {Collections<T>} groups to register in
* @returns {T[]} Current list of tokens in the given collection
*/
function registerGroup(name, groups) {
if (!(name in groups)) {
groups[name] = [];
}
return groups[name];
}
/**
* @template T
* @param {T} t token to add
* @param {Collections<T>} groups
* @param {Flags} flags
*/
function addToGroups(t, flags, groups) {
if (flags[numeric]) {
flags[asciinumeric] = true;
flags[alphanumeric] = true;
}
if (flags[ascii]) {
flags[asciinumeric] = true;
flags[alpha] = true;
}
if (flags[asciinumeric]) {
flags[alphanumeric] = true;
}
if (flags[alpha]) {
flags[alphanumeric] = true;
}
if (flags[alphanumeric]) {
flags[domain] = true;
}
if (flags[emoji]) {
flags[domain] = true;
}
for (const k in flags) {
const group = registerGroup(k, groups);
if (group.indexOf(t) < 0) {
group.push(t);
}
}
}
/**
* @template T
* @param {T} t token to check
* @param {Collections<T>} groups
* @returns {Flags} group flags that contain this token
*/
function flagsForToken(t, groups) {
const result = {};
for (const c in groups) {
if (groups[c].indexOf(t) >= 0) {
result[c] = true;
}
}
return result;
}
/**
* @template T
* @typedef {null | T } Transition
*/
/**
* Define a basic state machine state. j is the list of character transitions,
* jr is the list of regex-match transitions, jd is the default state to
* transition to t is the accepting token type, if any. If this is the terminal
* state, then it does not emit a token.
*
* The template type T represents the type of the token this state accepts. This
* should be a string (such as of the token exports in `text.js`) or a
* MultiToken subclass (from `multi.js`)
*
* @template T
* @param {T} [token] Token that this state emits
*/
function State(token) {
if (token === void 0) {
token = null;
}
// this.n = null; // DEBUG: State name
/** @type {{ [input: string]: State<T> }} j */
this.j = {}; // IMPLEMENTATION 1
// this.j = []; // IMPLEMENTATION 2
/** @type {[RegExp, State<T>][]} jr */
this.jr = [];
/** @type {?State<T>} jd */
this.jd = null;
/** @type {?T} t */
this.t = token;
}
/**
* Scanner token groups
* @type Collections<string>
*/
State.groups = {};
State.prototype = {
accepts() {
return !!this.t;
},
/**
* Follow an existing transition from the given input to the next state.
* Does not mutate.
* @param {string} input character or token type to transition on
* @returns {?State<T>} the next state, if any
*/
go(input) {
const state = this;
const nextState = state.j[input];
if (nextState) {
return nextState;
}
for (let i = 0; i < state.jr.length; i++) {
const regex = state.jr[i][0];
const nextState = state.jr[i][1]; // note: might be empty to prevent default jump
if (nextState && regex.test(input)) {
return nextState;
}
}
// Nowhere left to jump! Return default, if any
return state.jd;
},
/**
* Whether the state has a transition for the given input. Set the second
* argument to true to only look for an exact match (and not a default or
* regular-expression-based transition)
* @param {string} input
* @param {boolean} exactOnly
*/
has(input, exactOnly) {
if (exactOnly === void 0) {
exactOnly = false;
}
return exactOnly ? input in this.j : !!this.go(input);
},
/**
* Short for "transition all"; create a transition from the array of items
* in the given list to the same final resulting state.
* @param {string | string[]} inputs Group of inputs to transition on
* @param {Transition<T> | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
*/
ta(inputs, next, flags, groups) {
for (let i = 0; i < inputs.length; i++) {
this.tt(inputs[i], next, flags, groups);
}
},
/**
* Short for "take regexp transition"; defines a transition for this state
* when it encounters a token which matches the given regular expression
* @param {RegExp} regexp Regular expression transition (populate first)
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
tr(regexp, next, flags, groups) {
groups = groups || State.groups;
let nextState;
if (next && next.j) {
nextState = next;
} else {
// Token with maybe token groups
nextState = new State(next);
if (flags && groups) {
addToGroups(next, flags, groups);
}
}
this.jr.push([regexp, nextState]);
return nextState;
},
/**
* Short for "take transitions", will take as many sequential transitions as
* the length of the given input and returns the
* resulting final state.
* @param {string | string[]} input
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of token groups
* @returns {State<T>} taken after the given input
*/
ts(input, next, flags, groups) {
let state = this;
const len = input.length;
if (!len) {
return state;
}
for (let i = 0; i < len - 1; i++) {
state = state.tt(input[i]);
}
return state.tt(input[len - 1], next, flags, groups);
},
/**
* Short for "take transition", this is a method for building/working with
* state machines.
*
* If a state already exists for the given input, returns it.
*
* If a token is specified, that state will emit that token when reached by
* the linkify engine.
*
* If no state exists, it will be initialized with some default transitions
* that resemble existing default transitions.
*
* If a state is given for the second argument, that state will be
* transitioned to on the given input regardless of what that input
* previously did.
*
* Specify a token group flags to define groups that this token belongs to.
* The token will be added to corresponding entires in the given groups
* object.
*
* @param {string} input character, token type to transition on
* @param {T | State<T>} [next] Transition options
* @param {Flags} [flags] Collections flags to add token to
* @param {Collections<T>} [groups] Master list of groups
* @returns {State<T>} taken after the given input
*/
tt(input, next, flags, groups) {
groups = groups || State.groups;
const state = this;
// Check if existing state given, just a basic transition
if (next && next.j) {
state.j[input] = next;
return next;
}
const t = next;
// Take the transition with the usual default mechanisms and use that as
// a template for creating the next state
let nextState,
templateState = state.go(input);
if (templateState) {
nextState = new State();
assign(nextState.j, templateState.j);
nextState.jr.push.apply(nextState.jr, templateState.jr);
nextState.jd = templateState.jd;
nextState.t = templateState.t;
} else {
nextState = new State();
}
if (t) {
// Ensure newly token is in the same groups as the old token
if (groups) {
if (nextState.t && typeof nextState.t === 'string') {
const allFlags = assign(flagsForToken(nextState.t, groups), flags);
addToGroups(t, allFlags, groups);
} else if (flags) {
addToGroups(t, flags, groups);
}
}
nextState.t = t; // overwrite anything that was previously there
}
state.j[input] = nextState;
return nextState;
}
};
// Helper functions to improve minification (not exported outside linkifyjs module)
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ta = (state, input, next, flags, groups) => state.ta(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {RegExp} regexp
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const tr = (state, regexp, next, flags, groups) => state.tr(regexp, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string | string[]} input
* @param {T | State<T>} [next]
* @param {Flags} [flags]
* @param {Collections<T>} [groups]
*/
const ts = (state, input, next, flags, groups) => state.ts(input, next, flags, groups);
/**
* @template T
* @param {State<T>} state
* @param {string} input
* @param {T | State<T>} [next]
* @param {Collections<T>} [groups]
* @param {Flags} [flags]
*/
const tt = (state, input, next, flags, groups) => state.tt(input, next, flags, groups);
/******************************************************************************
Text Tokens
Identifiers for token outputs from the regexp scanner
******************************************************************************/
// A valid web domain token
const WORD = 'WORD'; // only contains a-z
const UWORD = 'UWORD'; // contains letters other than a-z, used for IDN
// Special case of word
const LOCALHOST = 'LOCALHOST';
// Valid top-level domain, special case of WORD (see tlds.js)
const TLD = 'TLD';
// Valid IDN TLD, special case of UWORD (see tlds.js)
const UTLD = 'UTLD';
// The scheme portion of a web URI protocol. Supported types include: `mailto`,
// `file`, and user-defined custom protocols. Limited to schemes that contain
// only letters
const SCHEME = 'SCHEME';
// Similar to SCHEME, except makes distinction for schemes that must always be
// followed by `://`, not just `:`. Supported types include `http`, `https`,
// `ftp`, `ftps`
const SLASH_SCHEME = 'SLASH_SCHEME';
// Any sequence of digits 0-9
const NUM = 'NUM';
// Any number of consecutive whitespace characters that are not newline
const WS = 'WS';
// New line (unix style)
const NL$1 = 'NL'; // \n
// Opening/closing bracket classes
// TODO: Rename OPEN -> LEFT and CLOSE -> RIGHT in v5 to fit with Unicode names
// Also rename angle brackes to LESSTHAN and GREATER THAN
const OPENBRACE = 'OPENBRACE'; // {
const CLOSEBRACE = 'CLOSEBRACE'; // }
const OPENBRACKET = 'OPENBRACKET'; // [
const CLOSEBRACKET = 'CLOSEBRACKET'; // ]
const OPENPAREN = 'OPENPAREN'; // (
const CLOSEPAREN = 'CLOSEPAREN'; // )
const OPENANGLEBRACKET = 'OPENANGLEBRACKET'; // <
const CLOSEANGLEBRACKET = 'CLOSEANGLEBRACKET'; // >
const FULLWIDTHLEFTPAREN = 'FULLWIDTHLEFTPAREN'; // (
const FULLWIDTHRIGHTPAREN = 'FULLWIDTHRIGHTPAREN'; // )
const LEFTCORNERBRACKET = 'LEFTCORNERBRACKET'; // 「
const RIGHTCORNERBRACKET = 'RIGHTCORNERBRACKET'; // 」
const LEFTWHITECORNERBRACKET = 'LEFTWHITECORNERBRACKET'; // 『
const RIGHTWHITECORNERBRACKET = 'RIGHTWHITECORNERBRACKET'; // 』
const FULLWIDTHLESSTHAN = 'FULLWIDTHLESSTHAN'; // <
const FULLWIDTHGREATERTHAN = 'FULLWIDTHGREATERTHAN'; // >
// Various symbols
const AMPERSAND = 'AMPERSAND'; // &
const APOSTROPHE = 'APOSTROPHE'; // '
const ASTERISK = 'ASTERISK'; // *
const AT = 'AT'; // @
const BACKSLASH = 'BACKSLASH'; // \
const BACKTICK = 'BACKTICK'; // `
const CARET = 'CARET'; // ^
const COLON = 'COLON'; // :
const COMMA = 'COMMA'; // ,
const DOLLAR = 'DOLLAR'; // $
const DOT = 'DOT'; // .
const EQUALS = 'EQUALS'; // =
const EXCLAMATION = 'EXCLAMATION'; // !
const HYPHEN = 'HYPHEN'; // -
const PERCENT = 'PERCENT'; // %
const PIPE = 'PIPE'; // |
const PLUS = 'PLUS'; // +
const POUND = 'POUND'; // #
const QUERY = 'QUERY'; // ?
const QUOTE = 'QUOTE'; // "
const SEMI = 'SEMI'; // ;
const SLASH = 'SLASH'; // /
const TILDE = 'TILDE'; // ~
const UNDERSCORE = 'UNDERSCORE'; // _
// Emoji symbol
const EMOJI$1 = 'EMOJI';
// Default token - anything that is not one of the above
const SYM = 'SYM';
var tk = /*#__PURE__*/Object.freeze({
__proto__: null,
WORD: WORD,
UWORD: UWORD,
LOCALHOST: LOCALHOST,
TLD: TLD,
UTLD: UTLD,
SCHEME: SCHEME,
SLASH_SCHEME: SLASH_SCHEME,
NUM: NUM,
WS: WS,
NL: NL$1,
OPENBRACE: OPENBRACE,
CLOSEBRACE: CLOSEBRACE,
OPENBRACKET: OPENBRACKET,
CLOSEBRACKET: CLOSEBRACKET,
OPENPAREN: OPENPAREN,
CLOSEPAREN: CLOSEPAREN,
OPENANGLEBRACKET: OPENANGLEBRACKET,
CLOSEANGLEBRACKET: CLOSEANGLEBRACKET,
FULLWIDTHLEFTPAREN: FULLWIDTHLEFTPAREN,
FULLWIDTHRIGHTPAREN: FULLWIDTHRIGHTPAREN,
LEFTCORNERBRACKET: LEFTCORNERBRACKET,
RIGHTCORNERBRACKET: RIGHTCORNERBRACKET,
LEFTWHITECORNERBRACKET: LEFTWHITECORNERBRACKET,
RIGHTWHITECORNERBRACKET: RIGHTWHITECORNERBRACKET,
FULLWIDTHLESSTHAN: FULLWIDTHLESSTHAN,
FULLWIDTHGREATERTHAN: FULLWIDTHGREATERTHAN,
AMPERSAND: AMPERSAND,
APOSTROPHE: APOSTROPHE,
ASTERISK: ASTERISK,
AT: AT,
BACKSLASH: BACKSLASH,
BACKTICK: BACKTICK,
CARET: CARET,
COLON: COLON,
COMMA: COMMA,
DOLLAR: DOLLAR,
DOT: DOT,
EQUALS: EQUALS,
EXCLAMATION: EXCLAMATION,
HYPHEN: HYPHEN,
PERCENT: PERCENT,
PIPE: PIPE,
PLUS: PLUS,
POUND: POUND,
QUERY: QUERY,
QUOTE: QUOTE,
SEMI: SEMI,
SLASH: SLASH,
TILDE: TILDE,
UNDERSCORE: UNDERSCORE,
EMOJI: EMOJI$1,
SYM: SYM
});
// Note that these two Unicode ones expand into a really big one with Babel
const ASCII_LETTER = /[a-z]/;
const LETTER = /\p{L}/u; // Any Unicode character with letter data type
const EMOJI = /\p{Emoji}/u; // Any Unicode emoji character
const EMOJI_VARIATION$1 = /\ufe0f/;
const DIGIT = /\d/;
const SPACE = /\s/;
var regexp = /*#__PURE__*/Object.freeze({
__proto__: null,
ASCII_LETTER: ASCII_LETTER,
LETTER: LETTER,
EMOJI: EMOJI,
EMOJI_VARIATION: EMOJI_VARIATION$1,
DIGIT: DIGIT,
SPACE: SPACE
});
/**
The scanner provides an interface that takes a string of text as input, and
outputs an array of tokens instances that can be used for easy URL parsing.
*/
const NL = '\n'; // New line character
const EMOJI_VARIATION = '\ufe0f'; // Variation selector, follows heart and others
const EMOJI_JOINER = '\u200d'; // zero-width joiner
let tlds = null,
utlds = null; // don't change so only have to be computed once
/**
* Scanner output token:
* - `t` is the token name (e.g., 'NUM', 'EMOJI', 'TLD')
* - `v` is the value of the token (e.g., '123', '❤️', 'com')
* - `s` is the start index of the token in the original string
* - `e` is the end index of the token in the original string
* @typedef {{t: string, v: string, s: number, e: number}} Token
*/
/**
* @template T
* @typedef {{ [collection: string]: T[] }} Collections
*/
/**
* Initialize the scanner character-based state machine for the given start
* state
* @param {[string, boolean][]} customSchemes List of custom schemes, where each
* item is a length-2 tuple with the first element set to the string scheme, and
* the second element set to `true` if the `://` after the scheme is optional
*/
function init$2(customSchemes) {
if (customSchemes === void 0) {
customSchemes = [];
}
// Frequently used states (name argument removed during minification)
/** @type Collections<string> */
const groups = {}; // of tokens
State.groups = groups;
/** @type State<string> */
const Start = new State();
if (tlds == null) {
tlds = decodeTlds(encodedTlds);
}
if (utlds == null) {
utlds = decodeTlds(encodedUtlds);
}
// States for special URL symbols that accept immediately after start
tt(Start, "'", APOSTROPHE);
tt(Start, '{', OPENBRACE);
tt(Start, '}', CLOSEBRACE);
tt(Start, '[', OPENBRACKET);
tt(Start, ']', CLOSEBRACKET);
tt(Start, '(', OPENPAREN);
tt(Start, ')', CLOSEPAREN);
tt(Start, '<', OPENANGLEBRACKET);
tt(Start, '>', CLOSEANGLEBRACKET);
tt(Start, '(', FULLWIDTHLEFTPAREN);
tt(Start, ')', FULLWIDTHRIGHTPAREN);
tt(Start, '「', LEFTCORNERBRACKET);
tt(Start, '」', RIGHTCORNERBRACKET);
tt(Start, '『', LEFTWHITECORNERBRACKET);
tt(Start, '』', RIGHTWHITECORNERBRACKET);
tt(Start, '<', FULLWIDTHLESSTHAN);
tt(Start, '>', FULLWIDTHGREATERTHAN);
tt(Start, '&', AMPERSAND);
tt(Start, '*', ASTERISK);
tt(Start, '@', AT);
tt(Start, '`', BACKTICK);
tt(Start, '^', CARET);
tt(Start, ':', COLON);
tt(Start, ',', COMMA);
tt(Start, '$', DOLLAR);
tt(Start, '.', DOT);
tt(Start, '=', EQUALS);
tt(Start, '!', EXCLAMATION);
tt(Start, '-', HYPHEN);
tt(Start, '%', PERCENT);
tt(Start, '|', PIPE);
tt(Start, '+', PLUS);
tt(Start, '#', POUND);
tt(Start, '?', QUERY);
tt(Start, '"', QUOTE);
tt(Start, '/', SLASH);
tt(Start, ';', SEMI);
tt(Start, '~', TILDE);
tt(Start, '_', UNDERSCORE);
tt(Start, '\\', BACKSLASH);
const Num = tr(Start, DIGIT, NUM, {
[numeric]: true
});
tr(Num, DIGIT, Num);
// State which emits a word token
const Word = tr(Start, ASCII_LETTER, WORD, {
[ascii]: true
});
tr(Word, ASCII_LETTER, Word);
// Same as previous, but specific to non-fsm.ascii alphabet words
const UWord = tr(Start, LETTER, UWORD, {
[alpha]: true
});
tr(UWord, ASCII_LETTER); // Non-accepting
tr(UWord, LETTER, UWord);
// Whitespace jumps
// Tokens of only non-newline whitespace are arbitrarily long
// If any whitespace except newline, more whitespace!
const Ws = tr(Start, SPACE, WS, {
[whitespace]: true
});
tt(Start, NL, NL$1, {
[whitespace]: true
});
tt(Ws, NL); // non-accepting state to avoid mixing whitespaces
tr(Ws, SPACE, Ws);
// Emoji tokens. They are not grouped by the scanner except in cases where a
// zero-width joiner is present
const Emoji = tr(Start, EMOJI, EMOJI$1, {
[emoji]: true
});
tr(Emoji, EMOJI, Emoji);
tt(Emoji, EMOJI_VARIATION, Emoji);
// tt(Start, EMOJI_VARIATION, Emoji); // This one is sketchy
const EmojiJoiner = tt(Emoji, EMOJI_JOINER);
tr(EmojiJoiner, EMOJI, Emoji);
// tt(EmojiJoiner, EMOJI_VARIATION, Emoji); // also sketchy
// Generates states for top-level domains
// Note that this is most accurate when tlds are in alphabetical order
const wordjr = [[ASCII_LETTER, Word]];
const uwordjr = [[ASCII_LETTER, null], [LETTER, UWord]];
for (let i = 0; i < tlds.length; i++) {
fastts(Start, tlds[i], TLD, WORD, wordjr);
}
for (let i = 0; i < utlds.length; i++) {
fastts(Start, utlds[i], UTLD, UWORD, uwordjr);
}
addToGroups(TLD, {
tld: true,
ascii: true
}, groups);
addToGroups(UTLD, {
utld: true,
alpha: true
}, groups);
// Collect the states generated by different protocols. NOTE: If any new TLDs
// get added that are also protocols, set the token to be the same as the
// protocol to ensure parsing works as expected.
fastts(Start, 'file', SCHEME, WORD, wordjr);
fastts(Start, 'mailto', SCHEME, WORD, wordjr);
fastts(Start, 'http', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'https', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftp', SLASH_SCHEME, WORD, wordjr);
fastts(Start, 'ftps', SLASH_SCHEME, WORD, wordjr);
addToGroups(SCHEME, {
scheme: true,
ascii: true
}, groups);
addToGroups(SLASH_SCHEME, {
slashscheme: true,
ascii: true
}, groups);
// Register custom schemes. Assumes each scheme is asciinumeric with hyphens
customSchemes = customSchemes.sort((a, b) => a[0] > b[0] ? 1 : -1);
for (let i = 0; i < customSchemes.length; i++) {
const sch = customSchemes[i][0];
const optionalSlashSlash = customSchemes[i][1];
const flags = optionalSlashSlash ? {
[scheme]: true
} : {
[slashscheme]: true
};
if (sch.indexOf('-') >= 0) {
flags[domain] = true;
} else if (!ASCII_LETTER.test(sch)) {
flags[numeric] = true; // numbers only
} else if (DIGIT.test(sch)) {
flags[asciinumeric] = true;
} else {
flags[ascii] = true;
}
ts(Start, sch, sch, flags);
}
// Localhost token
ts(Start, 'localhost', LOCALHOST, {
ascii: true
});
// Set default transition for start state (some symbol)
Start.jd = new State(SYM);
return {
start: Start,
tokens: assign({
groups
}, tk)
};
}
/**
Given a string, returns an array of TOKEN instances representing the
composition of that string.
@method run
@param {State<string>} start scanner starting state
@param {string} str input string to scan
@return {Token[]} list of tokens, each with a type and value
*/
function run$1(start, str) {
// State machine is not case sensitive, so input is tokenized in lowercased
// form (still returns regular case). Uses selective `toLowerCase` because
// lowercasing the entire string causes the length and character position to
// vary in some non-English strings with V8-based runtimes.
const iterable = stringToArray(str.replace(/[A-Z]/g, c => c.toLowerCase()));
const charCount = iterable.length; // <= len if there are emojis, etc
const tokens = []; // return value
// cursor through the string itself, accounting for characters that have
// width with length 2 such as emojis
let cursor = 0;
// Cursor through the array-representation of the string
let charCursor = 0;
// Tokenize the string
while (charCursor < charCount) {
let state = start;
let nextState = null;
let tokenLength = 0;
let latestAccepting = null;
let sinceAccepts = -1;
let charsSinceAccepts = -1;
while (charCursor < charCount && (nextState = state.go(iterable[charCursor]))) {
state = nextState;
// Keep track of the latest accepting state
if (state.accepts()) {
sinceAccepts = 0;
charsSinceAccepts = 0;
latestAccepting = state;
} else if (sinceAccepts >= 0) {
sinceAccepts += iterable[charCursor].length;
charsSinceAccepts++;
}
tokenLength += iterable[charCursor].length;
cursor += iterable[charCursor].length;
charCursor++;
}
// Roll back to the latest accepting state
cursor -= sinceAccepts;
charCursor -= charsSinceAccepts;
tokenLength -= sinceAccepts;
// No more jumps, just make a new token from the last accepting one
tokens.push({
t: latestAccepting.t,
// token type/name
v: str.slice(cursor - tokenLength, cursor),
// string value
s: cursor - tokenLength,
// start index
e: cursor // end index (excluding)
});
}
return tokens;
}
/**
* Convert a String to an Array of characters, taking into account that some
* characters like emojis take up two string indexes.
*
* Adapted from core-js (MIT license)
* https://github.com/zloirock/core-js/blob/2d69cf5f99ab3ea3463c395df81e5a15b68f49d9/packages/core-js/internals/string-multibyte.js
*
* @function stringToArray
* @param {string} str
* @returns {string[]}
*/
function stringToArray(str) {
const result = [];
const len = str.length;
let index = 0;
while (index < len) {
let first = str.charCodeAt(index);
let second;
let char = first < 0xd800 || first > 0xdbff || index + 1 === len || (second = str.charCodeAt(index + 1)) < 0xdc00 || second > 0xdfff ? str[index] // single character
: str.slice(index, index + 2); // two-index characters
result.push(char);
index += char.length;
}
return result;
}
/**
* Fast version of ts function for when transition defaults are well known
* @param {State<string>} state
* @param {string} input
* @param {string} t
* @param {string} defaultt
* @param {[RegExp, State<string>][]} jr
* @returns {State<string>}
*/
function fastts(state, input, t, defaultt, jr) {
let next;
const len = input.length;
for (let i = 0; i < len - 1; i++) {
const char = input[i];
if (state.j[char]) {
next = state.j[char];
} else {
next = new State(defaultt);
next.jr = jr.slice();
state.j[char] = next;
}
state = next;
}
next = new State(t);
next.jr = jr.slice();
state.j[input[len - 1]] = next;
return next;
}
/**
* Converts a string of Top-Level Domain names encoded in update-tlds.js back
* into a list of strings.
* @param {str} encoded encoded TLDs string
* @returns {str[]} original TLDs list
*/
function decodeTlds(encoded) {
const words = [];
const stack = [];
let i = 0;
let digits = '0123456789';
while (i < encoded.length) {
let popDigitCount = 0;
while (digits.indexOf(encoded[i + popDigitCount]) >= 0) {
popDigitCount++; // encountered some digits, have to pop to go one level up trie
}
if (popDigitCount > 0) {
words.push(stack.join('')); // whatever preceded the pop digits must be a word
for (let popCount = parseInt(encoded.substring(i, i + popDigitCount), 10); popCount > 0; popCount--) {
stack.pop();
}
i += popDigitCount;
} else {
stack.push(encoded[i]); // drop down a level into the trie
i++;
}
}
return words;
}
/**
* An object where each key is a valid DOM Event Name such as `click` or `focus`
* and each value is an event handler function.
*
* https://developer.mozilla.org/en-US/docs/Web/API/Element#events
* @typedef {?{ [event: string]: Function }} EventListeners
*/
/**
* All formatted properties required to render a link, including `tagName`,
* `attributes`, `content` and `eventListeners`.
* @typedef {{ tagName: any, attributes: {[attr: string]: any}, content: string,
* eventListeners: EventListeners }} IntermediateRepresentation
*/
/**
* Specify either an object described by the template type `O` or a function.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `O`
* @template O
* @typedef {O | ((value: string, type: string, token: MultiToken) => O)} OptObj
*/
/**
* Specify either a function described by template type `F` or an object.
*
* Each key in the object should be a link type (`'url'`, `'hashtag`', etc.). Each
* value should be a function with template type `F` that is called when the
* corresponding link type is encountered.
* @template F
* @typedef {F | { [type: string]: F}} OptFn
*/
/**
* Specify either a value with template type `V`, a function that returns `V` or
* an object where each value resolves to `V`.
*
* The function takes a string value (usually the link's href attribute), the
* link type (`'url'`, `'hashtag`', etc.) and an internal token representation
* of the link. It should return an object of the template type `V`
*
* For the object, each key should be a link type (`'url'`, `'hashtag`', etc.).
* Each value should either have type `V` or a function that returns V. This
* function similarly takes a string value and a token.
*
* Example valid types for `Opt<string>`:
*
* ```js
* 'hello'
* (value, type, token) => 'world'
* { url: 'hello', email: (value, token) => 'world'}
* ```
* @template V
* @typedef {V | ((value: string, type: string, token: MultiToken) => V) | { [type: string]: V | ((value: string, token: MultiToken) => V) }} Opt
*/
/**
* See available options: https://linkify.js.org/docs/options.html
* @typedef {{
* defaultProtocol?: string,
* events?: OptObj<EventListeners>,
* format?: Opt<string>,
* formatHref?: Opt<string>,
* nl2br?: boolean,
* tagName?: Opt<any>,
* target?: Opt<string>,
* rel?: Opt<string>,
* validate?: Opt<boolean>,
* truncate?: Opt<number>,
* className?: Opt<string>,
* attributes?: OptObj<({ [attr: string]: any })>,
* ignoreTags?: string[],
* render?: OptFn<((ir: IntermediateRepresentation) => any)>
* }} Opts
*/
/**
* @type Required<Opts>
*/
const defaults = {
defaultProtocol: 'http',
events: null,
format: noop,
formatHref: noop,
nl2br: false,
tagName: 'a',
target: null,
rel: null,
validate: true,
truncate: Infinity,
className: null,
attributes: null,
ignoreTags: [],
render: null
};
/**
* Utility class for linkify interfaces to apply specified
* {@link Opts formatting and rendering options}.
*
* @param {Opts | Options} [opts] Option value overrides.
* @param {(ir: IntermediateRepresentation) => any} [defaultRender] (For
* internal use) default render function that determines how to generate an
* HTML element based on a link token's derived tagName, attributes and HTML.
* Similar to render option
*/
function Options(opts, defaultRender) {
if (defaultRender === void 0) {
defaultRender = null;
}
let o = assign({}, defaults);
if (opts) {
o = assign(o, opts instanceof Options ? opts.o : opts);
}
// Ensure all ignored tags are uppercase