diff --git a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java index d9f2c46006..8f9c12aad1 100755 --- a/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java +++ b/grobid-core/src/main/java/org/grobid/core/document/TEIFormatter.java @@ -923,8 +923,8 @@ else if (biblio.getE_Year().length() == 4) tei.append("\t\t\t\n"); } - if ((abstractText != null) && (abstractText.length() != 0)) { - if ( (biblio.getLabeledAbstract() != null) && (biblio.getLabeledAbstract().length() > 0) ) { + if (StringUtils.isNotBlank(abstractText)) { + if (StringUtils.isNotBlank(biblio.getLabeledAbstract())) { // we have available structured abstract, which can be serialized as a full text "piece" StringBuilder buffer = new StringBuilder(); try { diff --git a/grobid-home/pdfalto/languages/xpdf-others/fitted.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/fitted.nameToUnicode new file mode 100644 index 0000000000..9aba076d9f --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/fitted.nameToUnicode @@ -0,0 +1,7 @@ +0030 zero.fitted +0031 one.fitted +0032 two.fitted +0033 three.fitted +0034 four.fitted +0035 five.fitted +0036 six.fitted \ No newline at end of file diff --git a/grobid-home/pdfalto/languages/xpdf-others/ligatures.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/ligatures.nameToUnicode new file mode 100644 index 0000000000..831b07c8ff --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/ligatures.nameToUnicode @@ -0,0 +1,6 @@ +fb00 f_f +fb01 f_i +fb02 f_l +fb03 f_f_i + + diff --git a/grobid-home/pdfalto/languages/xpdf-others/oldstyle.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/oldstyle.nameToUnicode new file mode 100644 index 0000000000..7af14c9a80 --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/oldstyle.nameToUnicode @@ -0,0 +1,10 @@ +0030 zero.oldstyle +0031 one.oldstyle +0032 two.oldstyle +0033 three.oldstyle +0034 four.oldstyle +0035 five.oldstyle +0036 six.oldstyle +0037 seven.oldstyle +0038 eight.oldstyle +0039 nine.oldstyle \ No newline at end of file diff --git a/grobid-home/pdfalto/languages/xpdf-others/others.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/others.nameToUnicode new file mode 100644 index 0000000000..9851bb556c --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/others.nameToUnicode @@ -0,0 +1,4 @@ +2113 lscript +2202 partialdiff +21A9 arrowhookleft +21A9 arrowrighttophalf \ No newline at end of file diff --git a/grobid-home/pdfalto/languages/xpdf-others/sc.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/sc.nameToUnicode new file mode 100644 index 0000000000..8418b37b30 --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/sc.nameToUnicode @@ -0,0 +1,26 @@ +0061 a.sc +0062 b.sc +0063 c.sc +0064 d.sc +0065 e.sc +0066 f.sc +0067 g.sc +0068 h.sc +0069 i.sc +006a j.sc +006c l.sc +006d m.sc +006e n.sc +006f o.sc +0070 p.sc +0071 q.sc +0072 r.sc +0073 s.sc +0074 t.sc +0075 u.sc +0076 v.sc +0077 w.sc +0078 x.sc +0079 y.sc +007a z.sc +002d hyphen.sc \ No newline at end of file diff --git a/grobid-home/pdfalto/languages/xpdf-others/taboldstyle.nameToUnicode b/grobid-home/pdfalto/languages/xpdf-others/taboldstyle.nameToUnicode new file mode 100644 index 0000000000..1093439560 --- /dev/null +++ b/grobid-home/pdfalto/languages/xpdf-others/taboldstyle.nameToUnicode @@ -0,0 +1,10 @@ +0030 zero.taboldstyle +0031 one.taboldstyle +0032 two.taboldstyle +0033 three.taboldstyle +0034 four.taboldstyle +0035 five.taboldstyle +0036 six.taboldstyle +0037 seven.taboldstyle +0038 eight.taboldstyle +0039 nine.taboldstyle \ No newline at end of file diff --git a/grobid-home/pdfalto/languages/xpdfrc b/grobid-home/pdfalto/languages/xpdfrc index 895c5debed..be909550ca 100644 --- a/grobid-home/pdfalto/languages/xpdfrc +++ b/grobid-home/pdfalto/languages/xpdfrc @@ -59,3 +59,11 @@ unicodeMap TIS-620 languages/xpdf-thai/TIS-620.unicodeMap #----- begin Turkish support package (2011-aug-15) unicodeMap ISO-8859-9 languages/xpdf-turkish/ISO-8859-9.unicodeMap #----- end Turkish support package +#----- begin oldstyle support package (2024-dec-31) +nameToUnicode languages/xpdf-others/oldstyle.nameToUnicode +nameToUnicode languages/xpdf-others/taboldstyle.nameToUnicode +nameToUnicode languages/xpdf-others/ligatures.nameToUnicode +nameToUnicode languages/xpdf-others/fitted.nameToUnicode +nameToUnicode languages/xpdf-others/others.nameToUnicode +nameToUnicode languages/xpdf-others/sc.nameToUnicode +#----- end oldstyle support package diff --git a/grobid-home/pdfalto/lin-64/xpdfrc b/grobid-home/pdfalto/lin-64/xpdfrc index 5b909e6b33..6c997423ea 100644 --- a/grobid-home/pdfalto/lin-64/xpdfrc +++ b/grobid-home/pdfalto/lin-64/xpdfrc @@ -59,3 +59,12 @@ unicodeMap TIS-620 ../languages/xpdf-thai/TIS-620.unicodeMap #----- begin Turkish support package (2011-aug-15) unicodeMap ISO-8859-9 ../languages/xpdf-turkish/ISO-8859-9.unicodeMap #----- end Turkish support package +#----- begin oldstyle support package (2024-dec-31) +nameToUnicode ../languages/xpdf-others/oldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/taboldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/ligatures.nameToUnicode +nameToUnicode ../languages/xpdf-others/fitted.nameToUnicode +nameToUnicode ../languages/xpdf-others/others.nameToUnicode +nameToUnicode ../languages/xpdf-others/sc.nameToUnicode +#----- end oldstyle support package + diff --git a/grobid-home/pdfalto/mac-64/xpdfrc b/grobid-home/pdfalto/mac-64/xpdfrc index 5b909e6b33..e0dc52b508 100644 --- a/grobid-home/pdfalto/mac-64/xpdfrc +++ b/grobid-home/pdfalto/mac-64/xpdfrc @@ -59,3 +59,11 @@ unicodeMap TIS-620 ../languages/xpdf-thai/TIS-620.unicodeMap #----- begin Turkish support package (2011-aug-15) unicodeMap ISO-8859-9 ../languages/xpdf-turkish/ISO-8859-9.unicodeMap #----- end Turkish support package +#----- begin oldstyle support package (2024-dec-31) +nameToUnicode ../languages/xpdf-others/oldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/taboldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/ligatures.nameToUnicode +nameToUnicode ../languages/xpdf-others/fitted.nameToUnicode +nameToUnicode ../languages/xpdf-others/others.nameToUnicode +nameToUnicode ../languages/xpdf-others/sc.nameToUnicode +#----- end oldstyle support package diff --git a/grobid-home/pdfalto/mac_arm-64/xpdfrc b/grobid-home/pdfalto/mac_arm-64/xpdfrc index 5b909e6b33..e0dc52b508 100644 --- a/grobid-home/pdfalto/mac_arm-64/xpdfrc +++ b/grobid-home/pdfalto/mac_arm-64/xpdfrc @@ -59,3 +59,11 @@ unicodeMap TIS-620 ../languages/xpdf-thai/TIS-620.unicodeMap #----- begin Turkish support package (2011-aug-15) unicodeMap ISO-8859-9 ../languages/xpdf-turkish/ISO-8859-9.unicodeMap #----- end Turkish support package +#----- begin oldstyle support package (2024-dec-31) +nameToUnicode ../languages/xpdf-others/oldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/taboldstyle.nameToUnicode +nameToUnicode ../languages/xpdf-others/ligatures.nameToUnicode +nameToUnicode ../languages/xpdf-others/fitted.nameToUnicode +nameToUnicode ../languages/xpdf-others/others.nameToUnicode +nameToUnicode ../languages/xpdf-others/sc.nameToUnicode +#----- end oldstyle support package