#!/bin/sh -ef
#
# Copyright (C) 2003-2008  Dmitry V. Levin <ldv@altlinux.org>
# Copyright (C) 2007,2009  Alexey Tourbin <at@altlinux.org>
# 
# Contents index routines for hsh-initroot
#
# This file is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
# 
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
# 
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA.
#

# Contents index is created in two steps:
# 1) for each pkglist, its contents part is created;
# 2) contents parts are merged.
# A number of optimizations are performed to avoid extra work.
# The following flag indicates if the merge is required.
regen_contents=

# Create tmpdir/parts_list of pkglist files.
init_contents_state()
{
	local options='-q --no-generate -o APT::Cache::DumpPackages=false'
	>"$tmpdir"/FAIL
	{ "$aptbox"/apt-cache $options dump || echo 1 >>"$tmpdir"/FAIL; } |
		LC_ALL=C sed -n '/^File: \//{s/.*\///;/_pkglist/p}' |
		LC_ALL=C sort -u >"$tmpdir"/parts_list
	if [ -s "$tmpdir"/FAIL ] || [ ! -s "$tmpdir"/parts_list ]; then
		fatal 'Failed to initialize contents state.'
	fi

	# If parts_list has changed, contents index should be remerged.
	[ -n "$regen_contents" ] || cmp -s {"$contents_dir","$tmpdir"}/parts_list || regen_contents=1

	verbose 'Initialized contents state.'
}

# Directories and their order.
contents_index_bin_dirs=/bin:/sbin:/usr/bin:/usr/sbin:/usr/X11R6/bin:/usr/games:/usr/lib/kde4/bin:/usr/lib/kde4bin

# Create contents part for $pkglist.
create_contents_part()
{
	local pkglist="$1"; shift
	local tab; tab=$(printf '\t')

	# Since pkglist files do not provide SHA1HEADER,
	# we use NVRA to identify a package.
	local NVRA='%{NAME}-%{VERSION}-%{RELEASE}.%{ARCH}'
	# Packaged files are usually mapped to package names.
	local Q1="[%{FILENAMES}\t%{NAME}\t$NVRA\n]"
	# Alternatives-like virtual paths should map to themselves.
	local Q2="[%{PROVIDENAME}\t%{PROVIDENAME}\t$NVRA\n]"

	# Prepare egrep regular expression.
	# We use egrep because it seems to be the fastest tool.
	# And our special hack for leading slash will make it even faster.
	local dirs_or; dirs_or=$(printf %s "$contents_index_bin_dirs" |sed 's@^/@@;s@:/@|@g')
	local RE="^/($dirs_or)/[^/]+$tab"

	>"$tmpdir"/FAIL
	{ pkglist-query "$Q1$Q2" "$pkglists_dir/$pkglist" || echo 1 >>"$tmpdir"/FAIL; } |
		LC_ALL=C egrep "$RE" >"$tmpdir"/part || [ $? -eq 1 ]

	if [ -s "$tmpdir"/FAIL ]; then
		fatal "Failed to create contents part for $pkglist."
	fi

	# Files kill provides, except for virutal paths.
	LC_ALL=C sort -t"$tab" -u -k1,1 -k3,3 -o "$tmpdir"/part{,}

	# Discard NVRA.
	cut -f1,2 <"$tmpdir"/part >"$tmpdir"/part+
	mv -f "$tmpdir"/part+ "$tmpdir"/part

	# Fold identical records and prepare for later 'sort -m'.
	LC_ALL=C sort -u -o "$tmpdir"/part{,}

	# The part has been created from scratch, but it is still rather
	# possible that there is no actual change, which can save us a merge.
	[ -n "$regen_contents" ] || cmp -s "$parts_dir/$pkglist" "$tmpdir"/part || regen_contents=1

	# Bless and install the part.
	touch -r "$pkglists_dir/$pkglist" "$tmpdir"/part
	mv -f "$tmpdir"/part "$parts_dir/$pkglist"

	verbose "Created contents part for $pkglist."
}

contents_match_timestamp()
{
	[ -f "$1" ] && [ -f "$2" ] || return 1
	[ "$1" -nt "$2" ] || [ "$1" -ot "$2" ] || return 0
	return 1
}

valid_contents_part()
{
	set -- "$pkglists_dir/$1" "$parts_dir/$1"
	if contents_match_timestamp "$1" "$2"; then
		verbose "Contents part for $pkglist is up to date."
	else
		return 1
	fi
}

# Merge contents parts into tmpdir/contents_index_bin.
merge_contents_parts()
{
	# Prepare argv for 'sort -m'.
	local pkglist
	set --
	while read -r pkglist; do
		set -- "$@" "$parts_dir/$pkglist"
	done <"$tmpdir"/parts_list
	[ $# -gt 0 ]

	# Note that parts are sorted in ascii order, so that 'sort -m' is possible.
	# However, for contents index, directories should be reordered.
	local dir
	for dir in $(IFS=:; echo $contents_index_bin_dirs); do
		LC_ALL=C sort -m -u "$@" |
			LC_ALL=C grep "^$dir/" >"$tmpdir"/dir || [ $? -eq 1 ]
		if [ ! -s "$tmpdir"/dir ]; then
			verbose "No contents index entries for $dir."
			continue
		fi
		# Fold dups: change adjacent lines
		#	/usr/bin/r	pkgA
		#	/usr/bin/r	pkgB
		# into
		#	/usr/bin/r	/usr/bin/r
		# This will also exclude filenames with spaces.
		LC_ALL=C awk <"$tmpdir"/dir >>"$tmpdir"/contents_index_bin '
			NF==2 {
				if ($1==f)
					p = f
				else {
					if (f)
						print f "\t" p
					f = $1
					p = $2
				}
			}
			END {
				if (f)
					print f "\t" p
			}'
		verbose "Added contents index entries for $dir."
	done

	[ -s "$tmpdir"/contents_index_bin ] ||
		fatal 'Created empty contents index.'

	touch -r "$tmpdir"/parts_list "$tmpdir"/contents_index_bin
	verbose 'Created contents index.'
}

# Copy file, use cp -l if possible
cp_l_file_from_to()
{
	local from="$1"; shift
	local to="$1"; shift
	local cp_args dev_from dev_to

	dev_from="$(stat -c '%d' -- "$from")"
	dev_to="$(stat -c '%d' -- "${to%/*}/")"
	[ "$dev_from" = "$dev_to" ] && cp_args=-l || cp_args=
	cp -f $cp_args $verbose -- "$from" "$to" >&2
}

# Entry point: create and install contents_index_bin.
# Cache data is not reused, new cache is rebuilt from scratch.
create_contents()
{
	local contents_dir="${cache_dir:?}"/contents
	local tmpdir="$contents_dir"/tmp
	rm -rf "$tmpdir"
	# This will also check if $contents_dir exists.
	mkdir "$tmpdir"

	local parts_dir="$contents_dir"/parts
	mkdir -p $verbose "$parts_dir" >&2

	local pkglists_dir=
	get_apt_config Dir::State::lists/d pkglists_dir
	[ -z "${pkglists_dir##/*}" ] && [ -d "$pkglists_dir" ] ||
		fatal "apt-config: broken Dir::State::lists: $pkglists_dir."

	# Will merge anyway, disable optimizations.
	regen_contents=1

	local pkglist
	init_contents_state
	while read -r pkglist; do
		create_contents_part "$pkglist" </dev/null
	done <"$tmpdir"/parts_list

	merge_contents_parts
	mv -f {"$tmpdir","$contents_dir"}/parts_list
	mv -f {"$tmpdir","$contents_dir"}/contents_index_bin

	rm -rf "$tmpdir"

	cp_l_file_from_to {"$contents_dir",chroot/.host}/contents_index_bin
	contents_index_bin=chroot/.host/contents_index_bin
}

# Entry point: update and install contents_index_bin.
# Cache data is reused to validate and possibly rebuild the cache.
update_contents()
{
	local contents_dir="${cache_dir:?}"/contents
	local tmpdir="$contents_dir"/tmp
	rm -rf "$tmpdir"
	# This will also check if $contents_dir exists.
	mkdir "$tmpdir"

	local parts_dir="$contents_dir"/parts
	mkdir -p $verbose "$parts_dir" >&2

	local pkglists_dir=
	get_apt_config Dir::State::lists/d pkglists_dir
	[ -z "${pkglists_dir##/*}" ] && [ -d "$pkglists_dir" ] ||
		fatal "apt-config: broken Dir::State::lists: $pkglists_dir."

	# Check if the cache can be reused as is.
	[ -s "$contents_dir"/contents_index_bin ] &&
		contents_match_timestamp "$contents_dir"/{contents_index_bin,parts_list} ||
		regen_contents=1

	local pkglist
	init_contents_state
	while read -r pkglist; do
		valid_contents_part "$pkglist" ||
		create_contents_part "$pkglist" </dev/null
	done <"$tmpdir"/parts_list

	if [ -z "$regen_contents" ]; then
		verbose 'Contents index is up to date.'
	else
		merge_contents_parts
		mv -f {"$tmpdir","$contents_dir"}/parts_list
		mv -f {"$tmpdir","$contents_dir"}/contents_index_bin
	fi

	rm -rf "$tmpdir"

	cp_l_file_from_to {"$contents_dir",chroot/.host}/contents_index_bin
	contents_index_bin=chroot/.host/contents_index_bin
}
