Skip to content

Commit

Permalink
Add real EBCDIC build and tests
Browse files Browse the repository at this point in the history
  • Loading branch information
NWilson committed Jan 13, 2025
1 parent 03c0977 commit fdeb41f
Show file tree
Hide file tree
Showing 32 changed files with 2,527 additions and 1,310 deletions.
20 changes: 20 additions & 0 deletions .github/workflows/dev.yml
Original file line number Diff line number Diff line change
Expand Up @@ -439,6 +439,26 @@ jobs:
- name: Test
run: bazelisk test //... --enable_runfiles --incompatible_strict_action_env --test_output=all

ebcdic:
# Tests the full support for EBCDIC on a non-EBCDIC platform, using a
# hardcoded EBCDIC-1047 codepage.
name: EBCDIC
runs-on: ubuntu-24.04
steps:
- name: Checkout
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
submodules: true

- name: Configure
run: cmake -DPCRE2_SUPPORT_JIT=OFF -DPCRE2_SUPPORT_UNICODE=OFF -DPCRE2_EBCDIC=ON -DPCRE2_EBCDIC_IGNORING_COMPILER=ON -DPCRE2_DEBUG=ON -DCMAKE_C_FLAGS="$CFLAGS_GCC_STYLE" -DCMAKE_COMPILE_WARNING_AS_ERROR=ON -DCMAKE_BUILD_TYPE=Release -B build

- name: Build
run: cd build && make -j3

- name: Test
run: cd build && ../RunTest

heron:
# Job to verify that the tasks performed by PrepareRelease have been done. It is
# the committer's responsibility (currently) to run PrepareRelease themselves when
Expand Down
64 changes: 55 additions & 9 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -261,6 +261,10 @@ set(

set(PCRE2_EBCDIC_NL25 OFF CACHE BOOL "Use 0x25 as EBCDIC NL character instead of 0x15; implies EBCDIC.")

set(PCRE2_EBCDIC_IGNORING_COMPILER OFF CACHE BOOL "Force EBCDIC 1047 using numeric literals rather than C character literals; implies EBCDIC.")

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)

set(
PCRE2_LINK_SIZE
"2"
Expand Down Expand Up @@ -568,13 +572,42 @@ if(NEWLINE_DEFAULT STREQUAL "")
)
endif()

set(REBUILD_CHARTABLES OFF)
if(PCRE2_REBUILD_CHARTABLES)
set(REBUILD_CHARTABLES ON)
endif()

set(EBCDIC OFF)
if(PCRE2_EBCDIC)
set(EBCDIC 1)
set(EBCDIC ON)
endif()

if(PCRE2_EBCDIC_NL25)
set(EBCDIC 1)
set(EBCDIC_NL25 1)
set(EBCDIC ON)
set(EBCDIC_NL25 ON)
endif()

if(PCRE2_EBCDIC_IGNORING_COMPILER)
set(EBCDIC ON)
set(EBCDIC_IGNORING_COMPILER ON)
endif()

# Make sure that if EBCDIC is set (without EBCDIC_IGNORING_COMPILER), then
# REBUILD_CHARTABLES is also enabled.
# Also check that UTF support is not requested, because PCRE2 cannot handle
# EBCDIC and UTF in the same build. To do so it would need to use different
# character constants depending on the mode.
# Also, EBCDIC cannot be used with 16-bit and 32-bit libraries.
if(EBCDIC)
if(NOT EBCDIC_IGNORING_COMPILER)
set(REBUILD_CHARTABLES ON)
endif()
if(PCRE2_SUPPORT_UNICODE)
message(FATAL_ERROR "Support for EBCDIC and Unicode cannot be enabled at the same time")
endif()
if(PCRE2_BUILD_PCRE2_16 OR PCRE2_BUILD_PCRE2_32)
message(FATAL_ERROR "EBCDIC support is available only for the 8-bit library")
endif()
endif()

# Output files
Expand Down Expand Up @@ -648,8 +681,7 @@ endif()

# Character table generation

option(PCRE2_REBUILD_CHARTABLES "Rebuild char tables" OFF)
if(PCRE2_REBUILD_CHARTABLES)
if(REBUILD_CHARTABLES)
add_executable(pcre2_dftables src/pcre2_dftables.c)
add_custom_command(
OUTPUT ${PROJECT_BINARY_DIR}/pcre2_chartables.c
Expand All @@ -659,8 +691,12 @@ if(PCRE2_REBUILD_CHARTABLES)
COMMENT "Generating character tables (pcre2_chartables.c) for current locale"
VERBATIM
)
else()
elseif(NOT PCRE2_EBCDIC)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.dist ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
elseif(PCRE2_EBCDIC_NL25)
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl25 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
else()
configure_file(${PROJECT_SOURCE_DIR}/src/pcre2_chartables.c.ebcdic-1047-nl15 ${PROJECT_BINARY_DIR}/pcre2_chartables.c COPYONLY)
endif()

# Source code
Expand Down Expand Up @@ -1334,9 +1370,19 @@ if(PCRE2_SHOW_REPORT)
message(STATUS " Newline char/sequence ............. : ${PCRE2_NEWLINE}")
message(STATUS " \\R matches only ANYCRLF ........... : ${PCRE2_SUPPORT_BSR_ANYCRLF}")
message(STATUS " \\C is disabled .................... : ${PCRE2_NEVER_BACKSLASH_C}")
message(STATUS " EBCDIC coding ..................... : ${PCRE2_EBCDIC}")
message(STATUS " EBCDIC coding with NL=0x25 ........ : ${PCRE2_EBCDIC_NL25}")
message(STATUS " Rebuild char tables ............... : ${PCRE2_REBUILD_CHARTABLES}")

if(NOT EBCDIC)
set(EBCDIC_NL_CODE "n/a")
elseif(EBCDIC_NL25)
set(EBCDIC_NL_CODE "0x25")
else()
set(EBCDIC_NL_CODE "0x15")
endif()
message(STATUS " EBCDIC coding ..................... : ${EBCDIC}")
message(STATUS " EBCDIC code for NL ................ : ${EBCDIC_NL_CODE}")
message(STATUS " EBCDIC coding ignoring compiler ... : ${PCRE2_EBCDIC_IGNORING_COMPILER}")
message(STATUS " Rebuild char tables ............... : ${REBUILD_CHARTABLES}")

message(STATUS " Internal link size ................ : ${PCRE2_LINK_SIZE}")
message(STATUS " Maximum variable lookbehind ....... : ${PCRE2_MAX_VARLOOKBEHIND}")
message(STATUS " Parentheses nest limit ............ : ${PCRE2_PARENS_NEST_LIMIT}")
Expand Down
17 changes: 16 additions & 1 deletion Makefile.am
Original file line number Diff line number Diff line change
Expand Up @@ -362,9 +362,21 @@ src/pcre2_chartables.c: pcre2_dftables$(EXEEXT)
rm -f $@
./pcre2_dftables$(EXEEXT) $@
else
if WITH_EBCDIC
if WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl25 $(abs_builddir)/src/pcre2_chartables.c
else # WITH_EBCDIC_NL25
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.ebcdic-1047-nl15 $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC_NL25
else # WITH_EBCDIC
src/pcre2_chartables.c: $(srcdir)/src/pcre2_chartables.c.dist
rm -f $@
$(LN_S) $(abs_srcdir)/src/pcre2_chartables.c.dist $(abs_builddir)/src/pcre2_chartables.c
endif # WITH_EBCDIC
endif # WITH_REBUILD_CHARTABLES

BUILT_SOURCES = src/pcre2_chartables.c
Expand Down Expand Up @@ -460,7 +472,10 @@ endif # WITH_PCRE2_32
# The pcre2_chartables.c.dist file is the default version of
# pcre2_chartables.c, used unless --enable-rebuild-chartables is specified.

EXTRA_DIST += src/pcre2_chartables.c.dist
EXTRA_DIST += \
src/pcre2_chartables.c.dist \
src/pcre2_chartables.c.ebcdic-1047-nl15 \
src/pcre2_chartables.c.ebcdic-1047-nl25
CLEANFILES += src/pcre2_chartables.c

# The JIT compiler lives in a separate directory, but its files are #included
Expand Down
25 changes: 20 additions & 5 deletions README
Original file line number Diff line number Diff line change
Expand Up @@ -309,11 +309,22 @@ library. They are also documented in the pcre2build man page.

--enable-ebcdic --disable-unicode

This automatically implies --enable-rebuild-chartables (see above). However,
when PCRE2 is built this way, it always operates in EBCDIC. It cannot support
both EBCDIC and UTF-8/16/32. There is a second option, --enable-ebcdic-nl25,
which specifies that the code value for the EBCDIC NL character is 0x25
instead of the default 0x15.
This automatically implies --enable-rebuild-chartables (see above), in order
to ensure that you have the correct default character tables for your system's
codepage. There is an exception when you set --enable-ebcdic-ignoring-compiler
(see below), which allows using a default set of EBCDIC 1047 character tables
rather than forcing use of --enable-rebuild-chartables.

When PCRE2 is built with EBCDIC support, it always operates in EBCDIC. It
cannot support both EBCDIC and ASCII or UTF-8/16/32.

There is a second option, --enable-ebcdic-nl25, which specifies that the code
value for the EBCDIC NL character is 0x25 instead of the default 0x15.

There is a third option, --enable-ebcdic-ignoring-compiler, which disregards
the compiler's codepage for determining the numeric value of C character
constants such as 'z', and instead forces PCRE2 to use numeric constants for
the EBCDIC 1047 codepage instead.

. If you specify --enable-debug, additional debugging code is included in the
build. This option is intended for use by the PCRE2 maintainers.
Expand Down Expand Up @@ -822,6 +833,10 @@ The distribution should contain the files listed below.
src/pcre2_chartables.c.dist a default set of character tables that assume
ASCII coding; unless --enable-rebuild-chartables is
specified, used by copying to pcre2_chartables.c
src/pcre2_chartables.c.ebcdic-1047-{nl15,nl25} a default set of character
tables for EBCDIC 1047; used if
--enable-ebcdic-ignoring-compiler is specified
without --enable-rebuild-chartables

src/pcre2posix.c )
src/pcre2_auto_possess.c )
Expand Down
92 changes: 63 additions & 29 deletions RunTest
Original file line number Diff line number Diff line change
Expand Up @@ -45,15 +45,9 @@
# very much more stack than normal. In environments where the stack can be
# set at runtime, -bigstack sets a gigantic stack.
#
# There are two special cases where only one argument is allowed:
#
# If the first and only argument is "ebcdic", the script runs the special
# EBCDIC test that can be useful for checking certain EBCDIC features, even
# when run in an ASCII environment. PCRE2 must be built with EBCDIC support for
# this test to be run.
#
# If the script is obeyed as "RunTest list", a list of available tests is
# output, but none of them are run.
# Special cases where only one argument is allowed:
# - If the script is invoked as "RunTest list", a list of available tests is
# output, but none of them are run.
###############################################################################

# Define test titles in variables so that they can be output as a list. Some
Expand Down Expand Up @@ -90,7 +84,8 @@ title24="Test 24: Non-UTF pattern conversion tests"
title25="Test 25: UTF pattern conversion tests"
title26="Test 26: Unicode property tests (compatible with Perl >= 5.38)"
title27="Test 27: Auto-generated unicode property tests"
maxtest=27
title28="Test 28: EBCDIC-specific tests"
maxtest=28
titleheap="Test 'heap': Environment-specific heap tests"

if [ $# -eq 1 -a "$1" = "list" ]; then
Expand Down Expand Up @@ -122,6 +117,7 @@ if [ $# -eq 1 -a "$1" = "list" ]; then
echo $title25
echo $title26
echo $title27
echo $title28
echo ""
echo $titleheap
echo ""
Expand Down Expand Up @@ -247,8 +243,8 @@ do24=no
do25=no
do26=no
do27=no
do28=no
doheap=no
doebcdic=no

while [ $# -gt 0 ] ; do
case $1 in
Expand Down Expand Up @@ -280,8 +276,8 @@ while [ $# -gt 0 ] ; do
25) do25=yes;;
26) do26=yes;;
27) do27=yes;;
28) do28=yes;;
heap) doheap=yes;;
ebcdic) doebcdic=yes;;
-8) arg8=yes;;
-16) arg16=yes;;
-32) arg32=yes;;
Expand Down Expand Up @@ -357,6 +353,22 @@ support32=$?
$sim $pcre2test -C backslash-C >/dev/null
supportBSC=$?

# Check if compiled in EBCDIC mode, and whether we have EBCDIC I/O
$sim $pcre2test -C ebcdic >/dev/null
ebcdic=$?
$sim $pcre2test -C ebcdic-io >/dev/null
ebcdic_io=$?

# Check for EBCDIC newline
ebcdic_nl=`$sim $pcre2test -C ebcdic-nl`
if [ "x$ebcdic_nl" = x21 ]; then
ebcdic_nl=15
elif [ "x$ebcdic_nl" = x37 ]; then
ebcdic_nl=25
else
ebcdic_nl='n/a'
fi

# Initialize all bitsizes skipped

test8=skip
Expand Down Expand Up @@ -433,7 +445,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
$do16 = no -a $do17 = no -a $do18 = no -a $do19 = no -a \
$do20 = no -a $do21 = no -a $do22 = no -a $do23 = no -a \
$do24 = no -a $do25 = no -a $do26 = no -a $do27 = no -a \
$doheap = no -a $doebcdic = no \
$do28 = no -a $doheap = no \
]; then
do0=yes
do1=yes
Expand Down Expand Up @@ -463,6 +475,7 @@ if [ $do0 = no -a $do1 = no -a $do2 = no -a $do3 = no -a \
do25=yes
do26=yes
do27=yes
do28=yes
fi

# Handle any explicit skips at this stage, so that an argument list may consist
Expand Down Expand Up @@ -922,6 +935,43 @@ for bmode in "$test8" "$test16" "$test32"; do
fi
fi

# EBCDIC tests

if [ $do28 = yes ] ; then
echo $title28
if [ $ebcdic -eq 0 ] ; then
echo " Skipped because EBCDIC support is not available"
else
if [ $ebcdic_io -eq 0 ] ; then
# Our testdata files are in ASCII, and the pcre2test program is using
# ASCII input: all easy.
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $setstack $bmode $opt $testdata/testinput28 testtry
checkresult $? "28-nl$ebcdic_nl" "$opt"
done
else
echo "Cannot run EBCDIC tests:"
echo " Ironically this particular test script does not automatically"
echo " run the tests on an actual EBCDIC system. The testdata files"
echo " shipped with PCRE2 are in ASCII."
echo " You may be able to run the tests manually if you know which"
echo " EBCDIC codepage you used when compiling PCRE2, and then convert"
echo " the testdata to match. For example, if the C compiler used to build"
echo " PCRE2 was using IBM-1047:"
echo ""
echo " iconv -f ISO8859-1 -t IBM-1047 <testdata/testinputEBC >testinputEBC-native"
echo " pcre2test -q -$bmode testinputEBC-native >testoutputEBC-native"
echo " [ $? -eq 0 ] || echo 'pcre2test failed'"
echo " iconv -f IBM-1047 -t ISO8859-1 <testoutputEBC-native >testoutputEBC-ascii"
echo " $cf testdata/testoutputEBC testoutputEBC-ascii"
echo ""
echo "This is speculative. The PCRE2 maintainers do not have access to an"
echo "EBCDIC system to test this. Please report back if you try it."
exit 1
fi
fi
fi

# Manually selected heap tests - output may vary in different environments,
# which is why that are not automatically run.

Expand All @@ -935,22 +985,6 @@ for bmode in "$test8" "$test16" "$test32"; do
done


# ------ Special EBCDIC Test -------

if [ $doebcdic = yes ] ; then
$sim $valgrind $pcre2test -C ebcdic >/dev/null
ebcdic=$?
if [ $ebcdic -ne 1 ] ; then
echo "Cannot run EBCDIC tests: EBCDIC support not compiled"
exit 1
fi
for opt in "" "-dfa"; do
$sim $valgrind $pcre2test -q $opt $testdata/testinputEBC >testtry
checkresult $? EBC "$opt"
done
fi


# Clean up local working files
rm -f testbtables testSinput test3input testsaved1 testsaved2 test3output test3outputA test3outputB teststdout teststderr testtry

Expand Down
Loading

0 comments on commit fdeb41f

Please sign in to comment.