00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019 #include "../dsputil.h"
00020
00021 #include "gcc_fixes.h"
00022
00023 #include "dsputil_altivec.h"
00024
00025 #define PUT_OP_U8_ALTIVEC(d, s, dst) d = s
00026 #define AVG_OP_U8_ALTIVEC(d, s, dst) d = vec_avg(dst, s)
00027
00028 #define OP_U8_ALTIVEC PUT_OP_U8_ALTIVEC
00029 #define PREFIX_h264_chroma_mc8_altivec put_h264_chroma_mc8_altivec
00030 #define PREFIX_h264_chroma_mc8_num altivec_put_h264_chroma_mc8_num
00031 #define PREFIX_h264_qpel16_h_lowpass_altivec put_h264_qpel16_h_lowpass_altivec
00032 #define PREFIX_h264_qpel16_h_lowpass_num altivec_put_h264_qpel16_h_lowpass_num
00033 #define PREFIX_h264_qpel16_v_lowpass_altivec put_h264_qpel16_v_lowpass_altivec
00034 #define PREFIX_h264_qpel16_v_lowpass_num altivec_put_h264_qpel16_v_lowpass_num
00035 #define PREFIX_h264_qpel16_hv_lowpass_altivec put_h264_qpel16_hv_lowpass_altivec
00036 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_put_h264_qpel16_hv_lowpass_num
00037 #include "dsputil_h264_template_altivec.c"
00038 #undef OP_U8_ALTIVEC
00039 #undef PREFIX_h264_chroma_mc8_altivec
00040 #undef PREFIX_h264_chroma_mc8_num
00041 #undef PREFIX_h264_qpel16_h_lowpass_altivec
00042 #undef PREFIX_h264_qpel16_h_lowpass_num
00043 #undef PREFIX_h264_qpel16_v_lowpass_altivec
00044 #undef PREFIX_h264_qpel16_v_lowpass_num
00045 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
00046 #undef PREFIX_h264_qpel16_hv_lowpass_num
00047
00048 #define OP_U8_ALTIVEC AVG_OP_U8_ALTIVEC
00049 #define PREFIX_h264_chroma_mc8_altivec avg_h264_chroma_mc8_altivec
00050 #define PREFIX_h264_chroma_mc8_num altivec_avg_h264_chroma_mc8_num
00051 #define PREFIX_h264_qpel16_h_lowpass_altivec avg_h264_qpel16_h_lowpass_altivec
00052 #define PREFIX_h264_qpel16_h_lowpass_num altivec_avg_h264_qpel16_h_lowpass_num
00053 #define PREFIX_h264_qpel16_v_lowpass_altivec avg_h264_qpel16_v_lowpass_altivec
00054 #define PREFIX_h264_qpel16_v_lowpass_num altivec_avg_h264_qpel16_v_lowpass_num
00055 #define PREFIX_h264_qpel16_hv_lowpass_altivec avg_h264_qpel16_hv_lowpass_altivec
00056 #define PREFIX_h264_qpel16_hv_lowpass_num altivec_avg_h264_qpel16_hv_lowpass_num
00057 #include "dsputil_h264_template_altivec.c"
00058 #undef OP_U8_ALTIVEC
00059 #undef PREFIX_h264_chroma_mc8_altivec
00060 #undef PREFIX_h264_chroma_mc8_num
00061 #undef PREFIX_h264_qpel16_h_lowpass_altivec
00062 #undef PREFIX_h264_qpel16_h_lowpass_num
00063 #undef PREFIX_h264_qpel16_v_lowpass_altivec
00064 #undef PREFIX_h264_qpel16_v_lowpass_num
00065 #undef PREFIX_h264_qpel16_hv_lowpass_altivec
00066 #undef PREFIX_h264_qpel16_hv_lowpass_num
00067
00068 #define H264_MC(OPNAME, SIZE, CODETYPE) \
00069 static void OPNAME ## h264_qpel ## SIZE ## _mc00_ ## CODETYPE (uint8_t *dst, uint8_t *src, int stride){\
00070 OPNAME ## pixels ## SIZE ## _ ## CODETYPE(dst, src, stride, SIZE);\
00071 }\
00072 \
00073 static void OPNAME ## h264_qpel ## SIZE ## _mc10_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){ \
00074 uint64_t temp[SIZE*SIZE/8] __align16;\
00075 uint8_t * const half= (uint8_t*)temp;\
00076 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
00077 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
00078 }\
00079 \
00080 static void OPNAME ## h264_qpel ## SIZE ## _mc20_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00081 OPNAME ## h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(dst, src, stride, stride);\
00082 }\
00083 \
00084 static void OPNAME ## h264_qpel ## SIZE ## _mc30_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00085 uint64_t temp[SIZE*SIZE/8] __align16;\
00086 uint8_t * const half= (uint8_t*)temp;\
00087 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
00088 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+1, half, stride, stride, SIZE);\
00089 }\
00090 \
00091 static void OPNAME ## h264_qpel ## SIZE ## _mc01_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00092 uint64_t temp[SIZE*SIZE/8] __align16;\
00093 uint8_t * const half= (uint8_t*)temp;\
00094 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
00095 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src, half, stride, stride, SIZE);\
00096 }\
00097 \
00098 static void OPNAME ## h264_qpel ## SIZE ## _mc02_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00099 OPNAME ## h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(dst, src, stride, stride);\
00100 }\
00101 \
00102 static void OPNAME ## h264_qpel ## SIZE ## _mc03_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00103 uint64_t temp[SIZE*SIZE/8] __align16;\
00104 uint8_t * const half= (uint8_t*)temp;\
00105 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(half, src, SIZE, stride);\
00106 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, src+stride, half, stride, stride, SIZE);\
00107 }\
00108 \
00109 static void OPNAME ## h264_qpel ## SIZE ## _mc11_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00110 uint64_t temp[SIZE*SIZE/4] __align16;\
00111 uint8_t * const halfH= (uint8_t*)temp;\
00112 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
00113 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
00114 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
00115 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
00116 }\
00117 \
00118 static void OPNAME ## h264_qpel ## SIZE ## _mc31_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00119 uint64_t temp[SIZE*SIZE/4] __align16;\
00120 uint8_t * const halfH= (uint8_t*)temp;\
00121 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
00122 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
00123 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
00124 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
00125 }\
00126 \
00127 static void OPNAME ## h264_qpel ## SIZE ## _mc13_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00128 uint64_t temp[SIZE*SIZE/4] __align16;\
00129 uint8_t * const halfH= (uint8_t*)temp;\
00130 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
00131 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
00132 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
00133 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
00134 }\
00135 \
00136 static void OPNAME ## h264_qpel ## SIZE ## _mc33_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00137 uint64_t temp[SIZE*SIZE/4] __align16;\
00138 uint8_t * const halfH= (uint8_t*)temp;\
00139 uint8_t * const halfV= ((uint8_t*)temp) + SIZE*SIZE;\
00140 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
00141 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
00142 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfV, stride, SIZE, SIZE);\
00143 }\
00144 \
00145 static void OPNAME ## h264_qpel ## SIZE ## _mc22_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00146 uint64_t temp[SIZE*(SIZE+8)/4] __align16;\
00147 int16_t * const tmp= (int16_t*)temp;\
00148 OPNAME ## h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(dst, tmp, src, stride, SIZE, stride);\
00149 }\
00150 \
00151 static void OPNAME ## h264_qpel ## SIZE ## _mc21_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00152 uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
00153 uint8_t * const halfH= (uint8_t*)temp;\
00154 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
00155 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
00156 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src, SIZE, stride);\
00157 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
00158 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
00159 }\
00160 \
00161 static void OPNAME ## h264_qpel ## SIZE ## _mc23_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00162 uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
00163 uint8_t * const halfH= (uint8_t*)temp;\
00164 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
00165 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
00166 put_h264_qpel ## SIZE ## _h_lowpass_ ## CODETYPE(halfH, src + stride, SIZE, stride);\
00167 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
00168 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfH, halfHV, stride, SIZE, SIZE);\
00169 }\
00170 \
00171 static void OPNAME ## h264_qpel ## SIZE ## _mc12_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00172 uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
00173 uint8_t * const halfV= (uint8_t*)temp;\
00174 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
00175 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
00176 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src, SIZE, stride);\
00177 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
00178 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
00179 }\
00180 \
00181 static void OPNAME ## h264_qpel ## SIZE ## _mc32_ ## CODETYPE(uint8_t *dst, uint8_t *src, int stride){\
00182 uint64_t temp[SIZE*(SIZE+8)/4 + SIZE*SIZE/4] __align16;\
00183 uint8_t * const halfV= (uint8_t*)temp;\
00184 uint8_t * const halfHV= ((uint8_t*)temp) + SIZE*SIZE;\
00185 int16_t * const tmp= ((int16_t*)temp) + SIZE*SIZE;\
00186 put_h264_qpel ## SIZE ## _v_lowpass_ ## CODETYPE(halfV, src+1, SIZE, stride);\
00187 put_h264_qpel ## SIZE ## _hv_lowpass_ ## CODETYPE(halfHV, tmp, src, SIZE, SIZE, stride);\
00188 OPNAME ## pixels ## SIZE ## _l2_ ## CODETYPE(dst, halfV, halfHV, stride, SIZE, SIZE);\
00189 }\
00190
00191
00192
00193 static inline void put_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
00194 int i;
00195 for (i = 0; i < h; i++) {
00196 uint32_t a, b;
00197 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
00198 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
00199 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(a, b);
00200 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
00201 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
00202 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(a, b);
00203 }
00204 } static inline void avg_pixels8_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
00205 int i;
00206 for (i = 0; i < h; i++) {
00207 uint32_t a, b;
00208 a = (((const struct unaligned_32 *) (&src1[i * src_stride1]))->l);
00209 b = (((const struct unaligned_32 *) (&src2[i * src_stride2]))->l);
00210 *((uint32_t *) & dst[i * dst_stride]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride]), rnd_avg32(a, b));
00211 a = (((const struct unaligned_32 *) (&src1[i * src_stride1 + 4]))->l);
00212 b = (((const struct unaligned_32 *) (&src2[i * src_stride2 + 4]))->l);
00213 *((uint32_t *) & dst[i * dst_stride + 4]) = rnd_avg32(*((uint32_t *) & dst[i * dst_stride + 4]), rnd_avg32(a, b));
00214 }
00215 } static inline void put_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
00216 put_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
00217 put_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
00218 } static inline void avg_pixels16_l2(uint8_t * dst, const uint8_t * src1, const uint8_t * src2, int dst_stride, int src_stride1, int src_stride2, int h) {
00219 avg_pixels8_l2(dst, src1, src2, dst_stride, src_stride1, src_stride2, h);
00220 avg_pixels8_l2(dst + 8, src1 + 8, src2 + 8, dst_stride, src_stride1, src_stride2, h);
00221 }
00222
00223
00224 #define put_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) put_pixels16_l2(d,s1,s2,ds,s1s,16,h)
00225 #define avg_pixels16_l2_altivec(d,s1,s2,ds,s1s,h) avg_pixels16_l2(d,s1,s2,ds,s1s,16,h)
00226
00227 H264_MC(put_, 16, altivec)
00228 H264_MC(avg_, 16, altivec)
00229
00230 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx) {
00231
00232 #ifdef HAVE_ALTIVEC
00233 if (has_altivec()) {
00234 c->put_h264_chroma_pixels_tab[0] = put_h264_chroma_mc8_altivec;
00235 c->avg_h264_chroma_pixels_tab[0] = avg_h264_chroma_mc8_altivec;
00236
00237 #define dspfunc(PFX, IDX, NUM) \
00238 c->PFX ## _pixels_tab[IDX][ 0] = PFX ## NUM ## _mc00_altivec; \
00239 c->PFX ## _pixels_tab[IDX][ 1] = PFX ## NUM ## _mc10_altivec; \
00240 c->PFX ## _pixels_tab[IDX][ 2] = PFX ## NUM ## _mc20_altivec; \
00241 c->PFX ## _pixels_tab[IDX][ 3] = PFX ## NUM ## _mc30_altivec; \
00242 c->PFX ## _pixels_tab[IDX][ 4] = PFX ## NUM ## _mc01_altivec; \
00243 c->PFX ## _pixels_tab[IDX][ 5] = PFX ## NUM ## _mc11_altivec; \
00244 c->PFX ## _pixels_tab[IDX][ 6] = PFX ## NUM ## _mc21_altivec; \
00245 c->PFX ## _pixels_tab[IDX][ 7] = PFX ## NUM ## _mc31_altivec; \
00246 c->PFX ## _pixels_tab[IDX][ 8] = PFX ## NUM ## _mc02_altivec; \
00247 c->PFX ## _pixels_tab[IDX][ 9] = PFX ## NUM ## _mc12_altivec; \
00248 c->PFX ## _pixels_tab[IDX][10] = PFX ## NUM ## _mc22_altivec; \
00249 c->PFX ## _pixels_tab[IDX][11] = PFX ## NUM ## _mc32_altivec; \
00250 c->PFX ## _pixels_tab[IDX][12] = PFX ## NUM ## _mc03_altivec; \
00251 c->PFX ## _pixels_tab[IDX][13] = PFX ## NUM ## _mc13_altivec; \
00252 c->PFX ## _pixels_tab[IDX][14] = PFX ## NUM ## _mc23_altivec; \
00253 c->PFX ## _pixels_tab[IDX][15] = PFX ## NUM ## _mc33_altivec
00254
00255 dspfunc(put_h264_qpel, 0, 16);
00256 dspfunc(avg_h264_qpel, 0, 16);
00257 #undef dspfunc
00258
00259 } else
00260 #endif
00261 {
00262
00263
00264
00265 }
00266 }