#!/bin/sh
# $Id: tesseract.SlackBuild,v 1.7 2023/09/26 19:06:23 root Exp root $
# Copyright 2011, 2023  Eric Hameleers, Eindhoven, NL
# All rights reserved.
#
#   Permission to use, copy, modify, and distribute this software for
#   any purpose with or without fee is hereby granted, provided that
#   the above copyright notice and this permission notice appear in all
#   copies.
#
#   THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
#   WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
#   MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
#   IN NO EVENT SHALL THE AUTHORS AND COPYRIGHT HOLDERS AND THEIR
#   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
#   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
#   LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
#   USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
#   ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
#   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
#   OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
#   SUCH DAMAGE.
# -----------------------------------------------------------------------------
#
# Slackware SlackBuild script 
# ===========================
# By:          Eric Hameleers <alien@slackware.com>
# For:         tesseract
# Descr:       an OCR system
# URL:         https://github.com/tesseract-ocr
# Needs:       leptonica
# Changelog:   
# 3.00-1:      25/sep/2011 by Eric Hameleers <alien@slackware.com>
#              * Initial build. One language is contained in the main package
#                ('eng' by default), the other languages are split off into
#                data packages.
# 5.3.2-1:     26/sep/2023 by Eric Hameleers <alien@slackware.com>
#              * Update.
# 
# Run 'sh tesseract.SlackBuild' to build a Slackware package.
# The package (.txz) and .txt file as well as build logs are created in /tmp .
# Install it using 'installpkg'. 
#
# -----------------------------------------------------------------------------

# Set initial variables:

PRGNAM=tesseract
VERSION=${VERSION:-5.3.2}
DATAVER=${DATAVER:-4.1.0}
BUILD=${BUILD:-1}
NUMJOBS=${NUMJOBS:-" -j$(nproc) "}
TAG=${TAG:-alien}

DOCS="AUTHORS CONTRIBUTING.md ChangeLog INSTALL* LICENSE README.md VERSION"

# Where do we look for sources?
SRCDIR=$(cd $(dirname $0); pwd)

# Default language which is going to be built into the main tesseract package.
DEFLANG=${DEFLANG:-"eng"}

# If you want only a few language packs, define them like this instead:
# LANGPACKS="eng nld fra"
LANGPACKS=${LANGPACKS:-"$(cat $SRCDIR/languages 2>/dev/null |grep -v "^#" |cut -f1 -d= |tr '\n' ' ' |tr -s ' ' |sed -e 's/ *$//')"}

# The list of languages is found at: https://tesseract-ocr.github.io/tessdoc/Data-Files#data-files-for-version-400-november-29-2016
# We need to have at least one language pack or tesseract will not be useable:
if [ -z "$LANGPACKS" ]; then
  LANGPACKS="${DEFLANG}"
elif ! echo "$LANGPACKS" | grep -qw $DEFLANG ; then
  # The default language must also be listed in LANGPACKS:
  LANGPACKS="$DEFLANG $LANGPACKS"
fi

# Place to build (TMP) package (PKG) and output (OUTPUT) the program:
TMP=${TMP:-/tmp/build}
PKG=$TMP/package-$PRGNAM
OUTPUT=${OUTPUT:-/tmp}

SOURCE[0]="$SRCDIR/${PRGNAM}-${VERSION}.tar.gz"
SRCURL[0]="https://github.com/tesseract-ocr/${PRGNAM}/archive/${VERSION}/${PRGNAM}-${VERSION}.tar.gz"

# We have an arbitrary amount of language packs to download:
III=1
for TB in $LANGPACKS ; do
SOURCE[$III]="$SRCDIR/${TB}.traineddata"
SRCURL[$III]="https://github.com/tesseract-ocr/tessdata/raw/${DATAVER}/${TB}.traineddata"
let III=($III + 1)
done

##
## --- with a little luck, you won't have to edit below this point --- ##
##

# Automatically determine the architecture we're building on:
if [ -z "$ARCH" ]; then
  case "$(uname -m)" in
    i?86) ARCH=i586 ;;
    arm*) readelf /usr/bin/file -A | egrep -q "Tag_CPU.*[4,5]" && ARCH=arm || ARCH=armv7hl ;;
    # Unless $ARCH is already set, use uname -m for all other archs:
    *) ARCH=$(uname -m) ;;
  esac
  export ARCH
fi

# Set CFLAGS/CXXFLAGS and LIBDIRSUFFIX:
case "$ARCH" in
  i?86)      SLKCFLAGS="-O2 -march=${ARCH} -mtune=i686"
             SLKLDFLAGS=""; LIBDIRSUFFIX=""
             ;;
  x86_64)    SLKCFLAGS="-O2 -fPIC"
             SLKLDFLAGS="-L/usr/lib64"; LIBDIRSUFFIX="64"
             ;;
  armv7hl)   SLKCFLAGS="-O2 -march=armv7-a -mfpu=vfpv3-d16"
             SLKLDFLAGS=""; LIBDIRSUFFIX=""
             ;;
  *)         SLKCFLAGS=${SLKCFLAGS:-"-O2"}
             SLKLDFLAGS=${SLKLDFLAGS:-""}; LIBDIRSUFFIX=${LIBDIRSUFFIX:-""}
             ;;
esac

case "$ARCH" in
    arm*)    TARGET=$ARCH-slackware-linux-gnueabi ;;
    *)       TARGET=$ARCH-slackware-linux ;;
esac

# Exit the script on errors:
set -e
trap 'echo "$0 FAILED at line ${LINENO}" | tee $OUTPUT/error-${PRGNAM}.log' ERR
# Catch unitialized variables:
set -u
P1=${1:-1}

# Save old umask and set to 0022:
_UMASK_=$(umask)
umask 0022

# Create working directories:
mkdir -p $OUTPUT          # place for the package to be saved
mkdir -p $TMP/tmp-$PRGNAM # location to build the source
mkdir -p $PKG             # place for the package to be built
rm -rf $PKG/*             # always erase old package's contents
rm -rf $TMP/tmp-$PRGNAM/* # remove the remnants of previous build
rm -rf $OUTPUT/{autogen,configure,make,install,error,makepkg}-$PRGNAM.log
                          # remove old log files

# Source file availability:
for (( i = 0; i < ${#SOURCE[*]}; i++ )) ; do
  if ! [ -f ${SOURCE[$i]} ]; then
    echo "Source '$(basename ${SOURCE[$i]})' not available yet..."
    # Check if the $SRCDIR is writable at all - if not, download to $OUTPUT
    [ -w "$SRCDIR" ] || SOURCE[$i]="$OUTPUT/$(basename ${SOURCE[$i]})"
    if [ -f ${SOURCE[$i]} ]; then echo "Ah, found it!"; continue; fi
    if ! [ "x${SRCURL[$i]}" == "x" ]; then
      echo "Will download file to $(dirname $SOURCE[$i])"
      wget --no-check-certificate -nv -T 20 -O "${SOURCE[$i]}" "${SRCURL[$i]}" || true
      if [ $? -ne 0 -o ! -s "${SOURCE[$i]}" ]; then
        echo "Fail to download '$(basename ${SOURCE[$i]})'. Aborting the build."
        mv -f "${SOURCE[$i]}" "${SOURCE[$i]}".FAIL
        exit 1
      fi
    else
      echo "File '$(basename ${SOURCE[$i]})' not available. Aborting the build."
      exit 1
    fi
  fi
done

if [ "$P1" == "--download" ]; then
  echo "Download complete."
  exit 0
fi

# --- PACKAGE BUILDING ---

echo "++"
echo "|| $PRGNAM-$VERSION"
echo "++"

cd $TMP/tmp-$PRGNAM
echo "Extracting the source archive(s) for $PRGNAM..."
tar -xvf ${SOURCE[0]}
cd ${PRGNAM}-${VERSION}
chown -R root:root .
chmod -R u+w,go+r-w,a+rX-st .

echo Building ...
./autogen.sh \
  2>&1 | tee $OUTPUT/autogen-${PRGNAM}.log
LDFLAGS="$SLKLDFLAGS" \
CXXFLAGS="$SLKCFLAGS" \
CFLAGS="$SLKCFLAGS" \
./configure \
  --prefix=/usr \
  --libdir=/usr/lib${LIBDIRSUFFIX} \
  --mandir=/usr/man \
  --docdir=/usr/doc/$PRGNAM-$VERSION \
  --localstatedir=/var \
  --sysconfdir=/etc \
  --disable-static \
  --program-prefix= \
  --program-suffix= \
  --build=$TARGET \
  2>&1 | tee $OUTPUT/configure-${PRGNAM}.log
make $NUMJOBS 2>&1 | tee $OUTPUT/make-${PRGNAM}.log
make training 2>&1 | tee -a $OUTPUT/make-${PRGNAM}.log

# Now we can install the lot (and then split off the language packs):
make DESTDIR=$PKG install 2>&1 | tee $OUTPUT/install-${PRGNAM}.log
make training-install DESTDIR=$PKG 2>&1 | tee -a $OUTPUT/install-${PRGNAM}.log

# The language files are not automatically installed:
for (( i = 1; i < ${#SOURCE[*]}; i++ )) ; do
  install -D -m0644 ${SOURCE[$i]} -t $PKG/usr/share/tessdata/
done

# Don't ship .la files:
rm -f $PKG/{,usr/}lib${LIBDIRSUFFIX}/*.la

# Add documentation:
mkdir -p $PKG/usr/doc/$PRGNAM-$VERSION
cp -a $DOCS $PKG/usr/doc/$PRGNAM-$VERSION || true
cat $SRCDIR/$(basename $0) > $PKG/usr/doc/$PRGNAM-$VERSION/$PRGNAM.SlackBuild
chown -R root:root $PKG/usr/doc/$PRGNAM-$VERSION
find $PKG/usr/doc -type f -exec chmod 644 {} \;

# Compress the man page(s):
if [ -d $PKG/usr/man ]; then
  find $PKG/usr/man -type f -name "*.?" -exec gzip -9f {} \;
  for i in $(find $PKG/usr/man -type l -name "*.?") ; do ln -s $( readlink $i ).gz $i.gz ; rm $i ; done
fi

# Strip binaries (if any):
find $PKG | xargs file | grep -e "executable" -e "shared object" | grep ELF \
  | cut -f 1 -d : | xargs strip --strip-unneeded 2> /dev/null || true

# Add a package description:
mkdir -p $PKG/install
DEFDESC="$(grep ^${DEFLANG}= $SRCDIR/languages |cut -f2 -d=)"
cat $SRCDIR/slack-desc \
  | sed -e "s/@LANG@/$DEFLANG/g" -e "s/@LANGDESC@/$DEFDESC/g" \
  > ${PKG}/install/slack-desc
cat $SRCDIR/slack-required > $PKG/install/slack-required

# Split out language packs if we have built additional languages:
cd $PKG
for lang in ${LANGPACKS} ; do
  # Do not split off the "default" language data:
  [ "$lang" = "$DEFLANG" ] && continue

  langdesc="$(grep ^${lang}= $SRCDIR/languages |cut -f2 -d=)"
  echo "Splitting out '$lang' ($langdesc) datafile to separate package..."
  rm -rf ${PKG}-data-$lang
  mkdir -p ${PKG}-data-$lang/usr/share/tessdata
  mv ${PKG}/usr/share/tessdata/${lang}.traineddata \
    ${PKG}-data-$lang/usr/share/tessdata/

  # Create slack-desc :
  mkdir -p ${PKG}-data-$lang/install
  cat $SRCDIR/slack-desc.data \
    | sed -e "s/@LANG@/$lang/g" -e "s/@LANGDESC@/$langdesc/g" > \
    ${PKG}-data-$lang/install/slack-desc

  # Create the package:
  cd ${PKG}-data-$lang
  /sbin/makepkg -p -l y -c n $OUTPUT/${PRGNAM}-data-${lang}-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz}
  cd -
  cd $OUTPUT
  md5sum ${PRGNAM}-data-${lang}-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz} > ${PRGNAM}-data-$lang-${VERSION}-noarch-${BUILD}${TAG}.${PKGTYPE:-txz}.md5
  cd -
  cat $PKG-data-$lang/install/slack-desc | grep "^${PRGNAM}-data-$lang" > $OUTPUT/${PRGNAM}-data-$lang-${VERSION}-noarch-${BUILD}${TAG}.txt
done # End of splitting out language packs

# Build the package:
cd $PKG
makepkg --linkadd y --chown n $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz} 2>&1 | tee $OUTPUT/makepkg-${PRGNAM}.log
cd $OUTPUT
md5sum ${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz} > ${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.${PKGTYPE:-txz}.md5
cd -
cat $PKG/install/slack-desc | grep "^${PRGNAM}" > $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.txt
cat $PKG/install/slack-required > $OUTPUT/${PRGNAM}-${VERSION}-${ARCH}-${BUILD}${TAG}.dep

# Restore the original umask:
umask ${_UMASK_}

