-
Notifications
You must be signed in to change notification settings - Fork 1
Wiki to Text
Adrian Wilke edited this page Feb 9, 2021
·
5 revisions
#!/bin/bash
if [[ $# -ne 2 ]] ; then
echo 'Please provide: <input directory> <output directory>'
exit 1
fi
# Remove slash at end
INDIR=${1%/}
OUTDIR=${2%/}
for FILEPATH in $INDIR/*
do
# Only file name
FILE="$(basename -- $FILEPATH)"
# Convert from wiki-markup to plain text
pandoc --filter pandoc-citeproc -f mediawiki -t plain -o $OUTDIR/$FILE $INDIR/$FILE
# Remove markers [1]
sed -i 's/\[[^]]*\]//g' $OUTDIR/$FILE
# Remove empty lines
sed -i '/^[[:space:]]*$/d' $OUTDIR/$FILE
done
# Data Science Group (DICE) at Paderborn University
# This work has been supported by the German Federal Ministry of Education and Research (BMBF) within the project EML4U under the grant no 01IS19080B.