/*
 * $PostgreSQL: pgsql/contrib/hstore/hstore_compat.c,v 1.2.4.1 2010/09/16 02:54:07 tgl Exp $
 *
 * Notes on old/new hstore format disambiguation.
 *
 * There are three formats to consider:
 * 1) old contrib/hstore (referred to as hstore-old)
 * 2) prerelease pgfoundry hstore
 * 3) new contrib/hstore
 *
 * (2) and (3) are identical except for the HS_FLAG_NEWVERSION
 * bit, which is set in (3) but not (2).
 *
 * Values that are already in format (3), or which are
 * unambiguously in format (2), are handled by the first
 * "return immediately" test in hstoreUpgrade().
 *
 * To stress a point: we ONLY get here with possibly-ambiguous
 * values if we're doing some sort of in-place migration from an
 * old prerelease pgfoundry hstore-new; and we explicitly don't
 * support that without fixing up any potentially padded values
 * first. Most of the code here is serious overkill, but the
 * performance penalty isn't serious (especially compared to the
 * palloc() that we have to do anyway) and the belt-and-braces
 * validity checks provide some reassurance. (If for some reason
 * we get a value that would have worked on the old code, but
 * which would be botched by the conversion code, the validity
 * checks will fail it first so we get an error rather than bad
 * data.)
 *
 * Note also that empty hstores are the same in (2) and (3), so
 * there are some special-case paths for them.
 *
 * We tell the difference between formats (2) and (3) as follows (but
 * note that there are some edge cases where we can't tell; see
 * comments in hstoreUpgrade):
 *
 * First, since there must be at least one entry, we look at
 * how the bits line up. The new format looks like:
 *
 * 10kkkkkkkkkkkkkkkkkkkkkkkkkkkkkk  (k..k = keylen)
 * 0nvvvvvvvvvvvvvvvvvvvvvvvvvvvvvv  (v..v = keylen+vallen)
 *
 * The old format looks like one of these, depending on endianness
 * and bitfield layout: (k..k = keylen, v..v = vallen, p..p = pos,
 * n = isnull)
 *
 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
 * nppppppppppppppppppppppppppppppp
 *
 * kkkkkkkkkkkkkkkkvvvvvvvvvvvvvvvv
 * pppppppppppppppppppppppppppppppn
 *
 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
 * nppppppppppppppppppppppppppppppp
 *
 * vvvvvvvvvvvvvvvvkkkkkkkkkkkkkkkk
 * pppppppppppppppppppppppppppppppn   (usual i386 format)
 *
 * If the entry is in old format, for the first entry "pos" must be 0.
 * We can obviously see that either keylen or vallen must be >32768
 * for there to be any ambiguity (which is why lengths less than that
 * are fasttracked in hstore.h) Since "pos"==0, the "v" field in the
 * new-format interpretation can only be 0 or 1, which constrains all
 * but three bits of the old-format's k and v fields. But in addition
 * to all of this, the data length implied by the keylen and vallen
 * must fit in the varlena size. So the only ambiguous edge case for
 * hstores with only one entry occurs between a new-format entry with
 * an excess (~32k) of padding, and an old-format entry. But we know
 * which format to use in that case based on how we were compiled, so
 * no actual data corruption can occur.
 *
 * If there is more than one entry, the requirement that keys do not
 * decrease in length, and that positions increase contiguously, and
 * that the end of the data not be beyond the end of the varlena
 * itself, disambiguates in almost all other cases. There is a small
 * set of ambiguous cases which could occur if the old-format value
 * has a large excess of padding and just the right pattern of key
 * sizes, but these are also handled based on how we were compiled.
 *
 * The otherwise undocumented function hstore_version_diag is provided
 * for testing purposes.
 */
#include "postgres.h"

#include "funcapi.h"

#include "hstore.h"

/*
 * This is the structure used for entries in the old contrib/hstore
 * implementation. Notice that this is the same size as the new entry
 * (two 32-bit words per key/value pair) and that the header is the
 * same, so the old and new versions of ARRPTR, STRPTR, CALCDATASIZE
 * etc. are compatible.
 *
 * If the above statement isn't true on some bizarre platform, we're
 * a bit hosed (see Assert in hstoreValidOldFormat).
 */
typedef struct
{
	uint16		keylen;
	uint16		vallen;
	uint32
				valisnull:1,
				pos:31;
} HOldEntry;

static int	hstoreValidNewFormat(HStore *hs);
static int	hstoreValidOldFormat(HStore *hs);


/*
 * Validity test for a new-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidNewFormat(HStore *hs)
{
	int			count = HS_COUNT(hs);
	HEntry	   *entries = ARRPTR(hs);
	int			buflen = (count) ? HSE_ENDPOS(entries[2 * (count) - 1]) : 0;
	int			vsize = CALCDATASIZE(count, buflen);
	int			i;

	if (hs->size_ & HS_FLAG_NEWVERSION)
		return 2;

	if (count == 0)
		return 2;

	if (!HSE_ISFIRST(entries[0]))
		return 0;

	if (vsize > VARSIZE(hs))
		return 0;

	/* entry position must be nondecreasing */

	for (i = 1; i < 2 * count; ++i)
	{
		if (HSE_ISFIRST(entries[i])
			|| (HSE_ENDPOS(entries[i]) < HSE_ENDPOS(entries[i - 1])))
			return 0;
	}

	/* key length must be nondecreasing and keys must not be null */

	for (i = 1; i < count; ++i)
	{
		if (HS_KEYLEN(entries, i) < HS_KEYLEN(entries, i - 1))
			return 0;
		if (HSE_ISNULL(entries[2 * i]))
			return 0;
	}

	if (vsize != VARSIZE(hs))
		return 1;

	return 2;
}

/*
 * Validity test for an old-format hstore.
 *	0 = not valid
 *	1 = valid but with "slop" in the length
 *	2 = exactly valid
 */
static int
hstoreValidOldFormat(HStore *hs)
{
	int			count = hs->size_;
	HOldEntry  *entries = (HOldEntry *) ARRPTR(hs);
	int			vsize;
	int			lastpos = 0;
	int			i;

	if (hs->size_ & HS_FLAG_NEWVERSION)
		return 0;

	/* New format uses an HEntry for key and another for value */
	Assert(sizeof(HOldEntry) == (2 * sizeof(HEntry)));

	if (count == 0)
		return 2;

	if (count > 0xFFFFFFF)
		return 0;

	if (CALCDATASIZE(count, 0) > VARSIZE(hs))
		return 0;

	if (entries[0].pos != 0)
		return 0;

	/* key length must be nondecreasing */

	for (i = 1; i < count; ++i)
	{
		if (entries[i].keylen < entries[i - 1].keylen)
			return 0;
	}

	/*
	 * entry position must be strictly increasing, except for the first entry
	 * (which can be ""=>"" and thus zero-length); and all entries must be
	 * properly contiguous
	 */

	for (i = 0; i < count; ++i)
	{
		if (entries[i].pos != lastpos)
			return 0;
		lastpos += (entries[i].keylen
					+ ((entries[i].valisnull) ? 0 : entries[i].vallen));
	}

	vsize = CALCDATASIZE(count, lastpos);

	if (vsize > VARSIZE(hs))
		return 0;

	if (vsize != VARSIZE(hs))
		return 1;

	return 2;
}


/*
 * hstoreUpgrade: PG_DETOAST_DATUM plus support for conversion of old hstores
 */
HStore *
hstoreUpgrade(Datum orig)
{
	HStore	   *hs = (HStore *) PG_DETOAST_DATUM(orig);
	int			valid_new;
	int			valid_old;
	bool		writable;

	/* Return immediately if no conversion needed */
	if ((hs->size_ & HS_FLAG_NEWVERSION) ||
		hs->size_ == 0 ||
		(VARSIZE(hs) < 32768 && HSE_ISFIRST((ARRPTR(hs)[0]))))
		return hs;

	valid_new = hstoreValidNewFormat(hs);
	valid_old = hstoreValidOldFormat(hs);
	/* Do we have a writable copy? */
	writable = ((void *) hs != (void *) DatumGetPointer(orig));

	if (!valid_old || hs->size_ == 0)
	{
		if (valid_new)
		{
			/*
			 * force the "new version" flag and the correct varlena length,
			 * but only if we have a writable copy already (which we almost
			 * always will, since short new-format values won't come through
			 * here)
			 */
			if (writable)
			{
				HS_SETCOUNT(hs, HS_COUNT(hs));
				HS_FIXSIZE(hs, HS_COUNT(hs));
			}
			return hs;
		}
		else
		{
			elog(ERROR, "invalid hstore value found");
		}
	}

	/*
	 * this is the tricky edge case. It is only possible in some quite extreme
	 * cases (the hstore must have had a lot of wasted padding space at the
	 * end). But the only way a "new" hstore value could get here is if we're
	 * upgrading in place from a pre-release version of hstore-new (NOT
	 * contrib/hstore), so we work off the following assumptions: 1. If you're
	 * moving from old contrib/hstore to hstore-new, you're required to fix up
	 * any potential conflicts first, e.g. by running ALTER TABLE ... USING
	 * col::text::hstore; on all hstore columns before upgrading. 2. If you're
	 * moving from old contrib/hstore to new contrib/hstore, then "new" values
	 * are impossible here 3. If you're moving from pre-release hstore-new to
	 * hstore-new, then "old" values are impossible here 4. If you're moving
	 * from pre-release hstore-new to new contrib/hstore, you're not doing so
	 * as an in-place upgrade, so there is no issue So the upshot of all this
	 * is that we can treat all the edge cases as "new" if we're being built
	 * as hstore-new, and "old" if we're being built as contrib/hstore.
	 *
	 * XXX the WARNING can probably be downgraded to DEBUG1 once this has been
	 * beta-tested. But for now, it would be very useful to know if anyone can
	 * actually reach this case in a non-contrived setting.
	 */

	if (valid_new)
	{
#if HSTORE_IS_HSTORE_NEW
		elog(WARNING, "ambiguous hstore value resolved as hstore-new");

		/*
		 * force the "new version" flag and the correct varlena length, but
		 * only if we have a writable copy already (which we almost always
		 * will, since short new-format values won't come through here)
		 */
		if (writable)
		{
			HS_SETCOUNT(hs, HS_COUNT(hs));
			HS_FIXSIZE(hs, HS_COUNT(hs));
		}
		return hs;
#else
		elog(WARNING, "ambiguous hstore value resolved as hstore-old");
#endif
	}

	/*
	 * must have an old-style value. Overwrite it in place as a new-style one,
	 * making sure we have a writable copy first.
	 */

	if (!writable)
		hs = (HStore *) PG_DETOAST_DATUM_COPY(orig);

	{
		int			count = hs->size_;
		HEntry	   *new_entries = ARRPTR(hs);
		HOldEntry  *old_entries = (HOldEntry *) ARRPTR(hs);
		int			i;

		for (i = 0; i < count; ++i)
		{
			uint32		pos = old_entries[i].pos;
			uint32		keylen = old_entries[i].keylen;
			uint32		vallen = old_entries[i].vallen;
			bool		isnull = old_entries[i].valisnull;

			if (isnull)
				vallen = 0;

			new_entries[2 * i].entry = (pos + keylen) & HENTRY_POSMASK;
			new_entries[2 * i + 1].entry = (((pos + keylen + vallen) & HENTRY_POSMASK)
											| ((isnull) ? HENTRY_ISNULL : 0));
		}

		if (count)
			new_entries[0].entry |= HENTRY_ISFIRST;
		HS_SETCOUNT(hs, count);
		HS_FIXSIZE(hs, count);
	}

	return hs;
}


PG_FUNCTION_INFO_V1(hstore_version_diag);
Datum		hstore_version_diag(PG_FUNCTION_ARGS);
Datum
hstore_version_diag(PG_FUNCTION_ARGS)
{
	HStore	   *hs = (HStore *) PG_DETOAST_DATUM(PG_GETARG_DATUM(0));
	int			valid_new = hstoreValidNewFormat(hs);
	int			valid_old = hstoreValidOldFormat(hs);

	PG_RETURN_INT32(valid_old * 10 + valid_new);
}
