#!/usr/bin/env bash
# Copyright (C) 2013-2017 Luke Shumaker <lukeshu@parabola.nu>
#
# License: GNU GPLv2+
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <http://www.gnu.org/licenses/>.

# NB: During normal operation (i.e. not printing `usage()`) text, we
# don't use librelib.  This is important as we need to be able to run
# this program statically in-place when building libretools.

export TEXTDOMAIN='librelib'
export TEXTDOMAINDIR='/usr/share/locale'

default_simple=(
	# xgettext
	--keyword={eval_,}{gettext,'ngettext:1,2'}
	# libmakepkg/util/message.sh
	--keyword={plain,msg,msg2,warning,error}
	# devtools/lib/common.sh
	--keyword={stat_busy,die,lock:3,slock:3}
	# devtools-par/lib/common.sh
	--keyword=_
	# libretools/src/lib/messages.sh
	--keyword={print,term_title}
)
default_prose=(--keyword={prose,bullet})

readonly default_simple default_prose

if ! type gettext &>/dev/null; then
	gettext() { echo "$@"; }
fi

errusage() {
	if [[ $# -gt 0 ]]; then
		fmt="$(gettext "$1")"; shift
		printf "${0##*/}: $fmt\n" "$@" >&2
	fi
	usage >&2
}

usage() {
	. "$(librelib messages)"
	print 'Usage: %s [OPTIONS] FILES...' "${0##*/}"
	print 'Generates .pot files for programs using libremessages'
	echo
	prose 'In librexgettext, there are 2 types of keywords:'
	bullet 'simple: Simple keywords are just like normal xgettext'
	bullet 'prose: Prose keywords are similar, but the text is
	        word-wrapped'
	prose 'The keyword format is the same as in GNU xgettext.'
	echo
	prose 'The libremessages `flag` command is also handled
	       specially, and is not configurable as a keyword.'
	echo
	prose 'The default simple keywords are: %s' "${default_simple[*]#--keyword=}"
	echo
	prose 'The default prose keywords are: %s' "${default_prose[*]#--keyword=}"
	echo
	print 'Options:'
	flag \
	    '--simple=KEYWORD' 'Look for KEYWORD as an additional simple keyword' \
	    '--prose=KEYWORD' 'Look for KEYWORD as an additional prose keyword' \
	    '-k' 'Disable using the default keywords' \
	    '-h, --help' 'Show this text'
}

xgettext-sh() {
	xgettext --omit-header --from-code=UTF-8 -L shell -k -o - "$@"
}

xgettext-flag() {
	local file="$1"
	{
		# Stage 1: Generate
		#
		# Get all of the arguments to `flag`.  Because `flag`
		# takes an arbitrary number of arguments, just iterate
		# through arg1, arg2, ... argN; until we've come up
		# empty 3 times.  Why 3?  Because each flag takes 2
		# arguments, and because we don't keep track of which
		# one of those we're on, waiting for 3 empties ensures
		# us that we've had a complete "round" with nothing.
		#
		# Why can't I just do i+=2, and not have to keep track
		# of empties?  Because, we also allow for arguments
		# ending in a colon to be headings, which changes the
		# offsets.
		declare -i empties=0
		declare -i i
		for (( i=1; empties < 3; i++ )); do
			local out
			out="$(xgettext-sh --keyword="flag:$i,\"$i\"" -- "$file")"
			if [[ -n $out ]]; then
				printf -- '%s\n' "$out"
				empties=0
			else
				empties+=1
			fi
		done
	} | whitespace-collapse | sed '/^\#, sh-format/d' | {
		# Stage 2: Parse
		#
		# Read in the lines, and group them into an array of
		# (multi-line) msgs.  This just makes working with
		# them easier.
		local msgs=()
		declare -i i=-1
		local re='^#\. ([0-9]+)$'
		IFS=''
		local line
		while read -r line; do
			if [[ $line =~ $re ]]; then
				i+=1
			fi
			msgs[$i]+="$line"$'\n'
		done
		# Stage 3: Sort
		#
		# Now, we have the `msgs` array, and it is
		# sorted such that it is all of the arg1's to `flag`,
		# then all of the arg2's, then all of the arg3's, and
		# so on.  We want to re-order them such that it's all
		# of the args for the first invocation then all of the
		# args for the second; and so on.
		#
		# We do this by simply sorting them by the location
		# that they appear in the file.  Then, when we see the
		# argument number go back down, we know that a new
		# invocation has started!
		local locations=()
		readarray -t locations < <(
			local i
			for i in "${!msgs[@]}"; do
				local lines=()
				readarray -t lines < <(printf '%s' "${msgs[$i]}")

				declare -i arg row
				arg=${lines[0]#'#. '}
				row=${lines[1]##*:}

				printf '%d %d %d\n' "$row" "$arg" "$i"
			done | sort -k 1n -k 2n
		)
		# Stage 4: Output
		#
		# Now, we prune out the arguments that aren't
		# localizable.  Also, remove the "#." comment lines.
		# As explained above (in stage 3), when we see $arg go
		# down, that's the beginning of a new invocation.
		local expectflag=true
		local prev_arg=0
		local prev_row=0 # for better error messages only; no real logic
		local location
		for location in "${locations[@]}"; do
			IFS=' '
			local row arg i
			read -r row arg i <<<"$location"
			local msg="${msgs[$i]#*$'\n'}"

			# See if we need to fiddle with $expectflag
			# (and do some sanity checking).
			if [[ $arg != "$((prev_arg+1))" ]]; then
				if ! $expectflag; then
					local pos
					if [[ $row != "$prev_row" ]]; then
						printf -v pos "%s:%d-%d" "$file" "$prev_row" "$row"
					else
						printf -v pos "%s:%d" "$file" "$prev_row"
					fi
					>&2 printf "%s: $(gettext "flag error: Missing expected flag meaning at argument %d")\n" \
					    "$pos" "$((prev_arg+1))"
					exit 1 # $EXIT_FAILURE
				elif [[ $arg == "$((prev_arg+2))" ]]; then
					# skipped flag argument
					expectflag=false
				elif [[ "$arg" == 1 ]]; then
					# started new invocation
					expectflag=true
				elif [[ $arg == 2 ]]; then
					# started new invocation and skipped flag argument
					expectflag=false
				else
					local pos
					if [[ $row != "$prev_row" ]]; then
						printf -v pos "%s:%d-%d" "$file" "$prev_row" "$row"
					else
						printf -v pos "%s:%d" "$file" "$prev_row"
					fi
					>&2 printf "%s: $(gettext "flag error: Jumped from argument %d to %d")\n" \
					    "$pos" "$prev_arg" "$arg"
					exit 1 # $EXIT_FAILURE
				fi
			fi
			prev_arg=$arg
			prev_row=$row

			# Now we operate based on $row, $arg, $msg,
			# and $expectflag.
			if $expectflag; then
				IFS=$'\n'
				local lines=(${msg})
				if [[ ${lines[1]} == *':"' ]]; then
					# We expected a flag, but got
					# a heading
					printf -- '%s\n' "$msg"
				else
					# We expected a flag, and got
					# one!
					expectflag=false
				fi
			else
				printf -- '%s\n' "$msg"
				expectflag=true
			fi
		done
		if ! $expectflag; then
			>&2 printf "%s:%d: $(gettext "flag error: Missing expected flag meaning at argument %d")\n" \
			    "$file" "$prev_row" \
			    "$((prev_arg+1))"
			exit 1 # $EXIT_FAILURE
		fi
	}
}

whitespace-collapse() {
	tr '\n' '\r' | sed 's/"\r\s*"//g' | tr '\r' '\n' | # This removes the awkward word-wrapping done by xgettext
	    sed -r -e 's/(\\n|\\t|\t)/ /g' -e 's/(^|[^.!? ]) +/\1 /g' -e 's/([.!?])  +/\1  /g' # This collapses whitespace
}

main() {
	set -euE -o pipefail
	local simple=()
	local prose=()
	local files=()
	local use_defaults=true
	local error=false

	declare -i i
	for (( i=1; i <= $#; i++ )); do
		case "${!i}" in
			--simple) i+=1; simple+=(--keyword="${!i}");;
			--simple=*)     simple+=(--keyword="${!i#*=}");;
			--prose) i+=1; prose+=(--keyword="${!i}");;
			--prose=*)     prose+=(--keyword="${!i#*=}");;
			-k) use_defaults=false;;
			--help|-h) usage; return 0;; # $EXIT_SUCCESS
			--) i+=1; break;;
			-*) errusage "unrecognized option: %s" "${!i}"; error=true;;
			*) files+=("${!i}");;
		esac
	done
	files+=("${@:$i}")
	if [[ ${#files[@]} -lt 1 ]]; then
		errusage "no input file given"
		error=true
	fi
	if "$error"; then
		return 1 # $EXIT_FAILURE
	fi
	if "$use_defaults"; then
		simple+=("${default_simple[@]}")
		prose+=("${default_prose[@]}")
	fi

	# Main code
	{
		xgettext-sh "${simple[@]}" -- "${files[@]}"
		xgettext-sh "${prose[@]}" -- "${files[@]}" | whitespace-collapse
		for file in "${files[@]}"; do
			xgettext-flag "$file"
		done
	} | sed '/^\#, sh-format/d' | msguniq -Fi --to-code=UTF-8
}

main "$@"
