/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#pragma ident	"@(#)mlib_v_ImageLookUpS32S16Func.c	9.2	07/11/05 SMI"

#include <vis_proto.h>
#include <mlib_image.h>
#include <mlib_v_ImageLookUpFunc.h>

/* *********************************************************** */

static void mlib_v_ImageLookUp_S32_S16_124_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 *table0,
    const mlib_s16 *table1,
    const mlib_s16 *table2,
    const mlib_s16 *table3);

static void mlib_v_ImageLookUp_S32_S16_3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 *table0,
    const mlib_s16 *table1,
    const mlib_s16 *table2);

/* *********************************************************** */

#define	VIS_LD_U16_I(X, Y)	vis_ld_u16_i((void *)(X), (Y))
#define	HALF_U64	(MLIB_U64_CONST(2147483648) * sizeof (table[0][0]))

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_124_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 *table0,
    const mlib_s16 *table1,
    const mlib_s16 *table2,
    const mlib_s16 *table3)
{
/* pointer to source data */
	mlib_s32 *sp;

/* source data */
	mlib_s32 s0, s1, s2, s3;

/* pointer to start of destination */
	mlib_s16 *dl;

/* pointer to end of destination */
	mlib_s16 *dend;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2;

/* destination data */
	mlib_d64 t3, acc0;

/* edge mask */
	mlib_s32 emask;

/* loop variable */
	mlib_s32 i, num;

/* destination data */
	mlib_d64 acc1;

	dl = dst;
	sp = (void *)src;
	dp = (mlib_d64 *)dl;
	dend = dl + xsize - 1;

	vis_alignaddr((void *)0, 6);

	if (xsize >= 4) {

		s0 = sp[0];
		s1 = sp[1];
		s2 = sp[2];
		s3 = sp[3];
		sp += 4;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
			t3 = VIS_LD_U16_I(table3, ((mlib_addr)2 * s3));
			t2 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s2));
			t1 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s1));
			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = sp[0];
			s1 = sp[1];
			s2 = sp[2];
			s3 = sp[3];
			(*dp++) = vis_bshuffle(acc0, acc1);
		}

		t3 = VIS_LD_U16_I(table3, ((mlib_addr)2 * s3));
		t2 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s2));
		t1 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s1));
		t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		(*dp++) = vis_bshuffle(acc0, acc1);
	}

	if ((mlib_addr)dp <= (mlib_addr)dend) {

		num = (mlib_s32)((mlib_s16 *)dend - (mlib_s16 *)dp);
		sp += num;
		num++;

		if (num == 1) {
			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 2) {
			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 3) {
			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		}

		emask = vis_edge16(dp, dend);
		vis_pst_16(acc0, dp, emask);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_1(
    const mlib_s32 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s32 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab = (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
	mlib_s32 j, i;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s32 *sp = sl;
		mlib_s16 *dp = dl;
		mlib_s32 off, size = xsize;

		off = (mlib_s32)(((8 - ((mlib_addr)dp & 7)) & 7) >> 1);

		off = (off < size) ? off : size;

		for (i = 0; i < off; i++, sp++) {
			(*dp++) = tab[sp[0]];
			size--;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab,
			    tab, tab, tab);
		}

		sl = (mlib_s32 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_2(
    const mlib_s32 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s32 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab;
	mlib_s32 j, i;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s32 *sp = sl;
		mlib_s16 *dp = dl;
		mlib_s32 off, size = xsize * 2;
		const mlib_s16 *tab0 =
		    (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
		const mlib_s16 *tab1 =
		    (void *)&(((mlib_u8 **)table)[1][HALF_U64]);

		off = (mlib_s32)(((8 - ((mlib_addr)dp & 7)) & 7) >> 1);

		off = (off < size) ? off : size;

		for (i = 0; i < off - 1; i += 2, sp += 2) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			size -= 2;
		}

		if ((off & 1) != 0) {
			(*dp++) = tab0[sp[0]];
			size--;
			sp++;
			tab = tab0;
			tab0 = tab1;
			tab1 = tab;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab0,
			    tab1, tab0, tab1);
		}

		sl = (mlib_s32 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_4(
    const mlib_s32 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s32 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab;
	mlib_s32 j;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s32 *sp = sl;
		mlib_s16 *dp = dl;
		const mlib_s16 *tab0 =
		    (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
		const mlib_s16 *tab1 =
		    (void *)&(((mlib_u8 **)table)[1][HALF_U64]);
		const mlib_s16 *tab2 =
		    (void *)&(((mlib_u8 **)table)[2][HALF_U64]);
		const mlib_s16 *tab3 =
		    (void *)&(((mlib_u8 **)table)[3][HALF_U64]);
		mlib_s32 off, size = xsize * 4;

		off = (mlib_s32)(((8 - ((mlib_addr)dp & 7)) & 7) >> 1);

		off = (off < size) ? off : size;

		if (off == 1) {
			(*dp++) = tab0[sp[0]];
			tab = tab0;
			tab0 = tab1;
			tab1 = tab2;
			tab2 = tab3;
			tab3 = tab;
			size--;
			sp++;
		} else if (off == 2) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			tab = tab0;
			tab0 = tab2;
			tab2 = tab;
			tab = tab1;
			tab1 = tab3;
			tab3 = tab;
			size -= 2;
			sp += 2;
		} else if (off == 3) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			(*dp++) = tab2[sp[2]];
			tab = tab3;
			tab3 = tab2;
			tab2 = tab1;
			tab1 = tab0;
			tab0 = tab;
			size -= 3;
			sp += 3;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S32_S16_124_D1(sp, dp, size, tab0,
			    tab1, tab2, tab3);
		}

		sl = (mlib_s32 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_3_D1(
    const mlib_s32 *src,
    mlib_s16 *dst,
    mlib_s32 xsize,
    const mlib_s16 *table0,
    const mlib_s16 *table1,
    const mlib_s16 *table2)
{
/* pointer to source data */
	mlib_s32 *sp;

/* source data */
	mlib_s32 s0, s1, s2, s3;

/* pointer to start of destination */
	mlib_s16 *dl;

/* pointer to end of destination */
	mlib_s16 *dend;

/* aligned pointer to destination */
	mlib_d64 *dp;

/* destination data */
	mlib_d64 t0, t1, t2, t3;

/* destination data */
	mlib_d64 acc0;

/* edge mask */
	mlib_s32 emask;

/* loop variable */
	mlib_s32 i, num;
	const mlib_s16 *table;

/* destination data */
	mlib_d64 acc1;

	dl = dst;
	sp = (void *)src;
	dp = (mlib_d64 *)dl;
	dend = dl + xsize - 1;

	vis_alignaddr((void *)0, 6);

	if (xsize >= 4) {

		s0 = sp[0];
		s1 = sp[1];
		s2 = sp[2];
		s3 = sp[3];
		sp += 4;

		vis_write_bmask(0x012389ab, 0);

#pragma pipeloop(0)
		for (i = 0; i <= xsize - 8; i += 4, sp += 4) {
			t3 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s3));
			t2 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s2));
			t1 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s1));
			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc1 = vis_faligndata(t3, acc1);
			acc1 = vis_faligndata(t2, acc1);
			acc0 = vis_faligndata(t1, acc0);
			acc0 = vis_faligndata(t0, acc0);
			s0 = sp[0];
			s1 = sp[1];
			s2 = sp[2];
			s3 = sp[3];
			(*dp++) = vis_bshuffle(acc0, acc1);
			table = table0;
			table0 = table1;
			table1 = table2;
			table2 = table;
		}

		t3 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s3));
		t2 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s2));
		t1 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s1));
		t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
		acc1 = vis_faligndata(t3, acc1);
		acc1 = vis_faligndata(t2, acc1);
		acc0 = vis_faligndata(t1, acc0);
		acc0 = vis_faligndata(t0, acc0);
		(*dp++) = vis_bshuffle(acc0, acc1);
		table = table0;
		table0 = table1;
		table1 = table2;
		table2 = table;
	}

	if ((mlib_addr)dp <= (mlib_addr)dend) {

		num = (mlib_s32)((mlib_s16 *)dend - (mlib_s16 *)dp);
		sp += num;
		num++;

		if (num == 1) {
			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 2) {
			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		} else if (num == 3) {
			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table2, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;
			sp--;

			t0 = VIS_LD_U16_I(table1, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);

			s0 = *sp;

			t0 = VIS_LD_U16_I(table0, ((mlib_addr)2 * s0));
			acc0 = vis_faligndata(t0, acc0);
		}

		emask = vis_edge16(dp, dend);
		vis_pst_16(acc0, dp, emask);
	}
}

/* *********************************************************** */

void
mlib_v_ImageLookUp_S32_S16_3(
    const mlib_s32 *src,
    mlib_s32 slb,
    mlib_s16 *dst,
    mlib_s32 dlb,
    mlib_s32 xsize,
    mlib_s32 ysize,
    const mlib_s16 **table)
{
	mlib_s32 *sl;
	mlib_s16 *dl;
	const mlib_s16 *tab;
	mlib_s32 j, i;

	sl = (void *)src;
	dl = dst;

/* row loop */
	for (j = 0; j < ysize; j++) {
		mlib_s32 *sp = sl;
		mlib_s16 *dp = dl;
		const mlib_s16 *tab0 =
		    (void *)&(((mlib_u8 **)table)[0][HALF_U64]);
		const mlib_s16 *tab1 =
		    (void *)&(((mlib_u8 **)table)[1][HALF_U64]);
		const mlib_s16 *tab2 =
		    (void *)&(((mlib_u8 **)table)[2][HALF_U64]);
		mlib_s32 off, size = xsize * 3;

		off = (mlib_s32)(((8 - ((mlib_addr)dp & 7)) & 7) >> 1);

		off = (off < size) ? off : size;

		for (i = 0; i < off - 2; i += 3, sp += 3) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			(*dp++) = tab2[sp[2]];
			size -= 3;
		}

		off -= i;

		if (off == 1) {
			(*dp++) = tab0[sp[0]];
			tab = tab0;
			tab0 = tab1;
			tab1 = tab2;
			tab2 = tab;
			size--;
			sp++;
		} else if (off == 2) {
			(*dp++) = tab0[sp[0]];
			(*dp++) = tab1[sp[1]];
			tab = tab2;
			tab2 = tab1;
			tab1 = tab0;
			tab0 = tab;
			size -= 2;
			sp += 2;
		}

		if (size > 0) {
			mlib_v_ImageLookUp_S32_S16_3_D1(sp, dp, size, tab0,
			    tab1, tab2);
		}

		sl = (mlib_s32 *)((mlib_u8 *)sl + slb);
		dl = (mlib_s16 *)((mlib_u8 *)dl + dlb);
	}
}

/* *********************************************************** */
