00001 /* 00002 * 00003 * mblock_sub44_sads_x86_h.c 00004 * Copyright (C) 2000 Andrew Stevens <as@comlab.ox.ac.uk> 00005 * 00006 * Fast block sum-absolute difference computation for a rectangular area 4*x 00007 * by y where y > h against a 4 by h block. 00008 * 00009 * Used for 4*4 sub-sampled motion compensation calculations. 00010 * 00011 * 00012 * This file is part of mpeg2enc, a free MPEG-2 video stream encoder 00013 * based on the original MSSG reference design 00014 * 00015 * mpeg2enc is free software; you can redistribute new parts 00016 * and/or modify under the terms of the GNU General Public License 00017 * as published by 00018 * the Free Software Foundation; either version 2 of the License, or 00019 * (at your option) any later version. 00020 * 00021 * mpeg2enc is distributed in the hope that it will be useful, 00022 * but WITHOUT ANY WARRANTY; without even the implied warranty of 00023 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00024 * GNU General Public License for more details. 00025 * 00026 * See the files for those sections (c) MSSG 00027 * 00028 * You should have received a copy of the GNU General Public License 00029 * along with this program; if not, write to the Free Software 00030 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 00031 */ 00032 00033 /* 00034 * 00035 * Generates a vector sad's for 4*4 sub-sampled pel (qpel) data (with 00036 * co-ordinates and top-left qpel address) from specified rectangle 00037 * against a specified 16*h pel (4*4 qpel) reference block. The 00038 * generated vector contains results only for those sad's that fall 00039 * below twice the running best sad and are aligned on 8-pel 00040 * boundaries 00041 * 00042 * Invariant: blk points to top-left sub-sampled pel for macroblock 00043 * at (ilow,ihigh) 00044 * i{low,high) j(low,high) must be multiples of 4. 00045 * 00046 * sad = Sum Absolute Differences 00047 * 00048 * NOTES: for best efficiency i{low,high) should be multiples of 16. 00049 * 00050 * */ 00051 00052 int SIMD_SUFFIX(mblock_sub44_dists)( uint8_t *blk, uint8_t *ref, 00053 int ilow,int jlow, 00054 int ihigh, int jhigh, 00055 int h, int rowstride, 00056 int threshold, 00057 mc_result_s *resvec) 00058 { 00059 int32_t x,y; 00060 uint8_t *currowblk = blk; 00061 uint8_t *curblk; 00062 mc_result_s *cres = resvec; 00063 int gridrowstride = (rowstride); 00064 00065 for( y=jlow; y <= jhigh ; y+=4) 00066 { 00067 curblk = currowblk; 00068 for( x = ilow; x <= ihigh; x += 4) 00069 { 00070 int weight; 00071 if( (x & 15) == (ilow & 15) ) 00072 { 00073 load_blk( curblk, rowstride, h ); 00074 } 00075 weight = SIMD_SUFFIX(qblock_sad)(ref, h, rowstride); 00076 if( weight <= threshold ) 00077 { 00078 threshold = intmin(weight<<2,threshold); 00079 /* Rough and-ready absolute distance penalty */ 00080 /* NOTE: This penalty is *vital* to correct operation 00081 as otherwise the sub-mean filtering won't work on very 00082 uniform images. 00083 */ 00084 cres->weight = (uint16_t)weight+((intabs(x)+intabs(y))>>3); 00085 cres->x = (uint8_t)x; 00086 cres->y = (uint8_t)y; 00087 ++cres; 00088 } 00089 curblk += 1; 00090 shift_blk(8); 00091 } 00092 currowblk += gridrowstride; 00093 } 00094 emms(); 00095 return cres - resvec; 00096 } 00097 00098 #undef concat
1.5.5