00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021 #include "../dsputil.h"
00022
00023 #include "dsputil_ppc.h"
00024
00025 #ifdef HAVE_ALTIVEC
00026 #include "dsputil_altivec.h"
00027 #endif
00028
00029 extern void fdct_altivec(int16_t *block);
00030 extern void idct_put_altivec(uint8_t *dest, int line_size, int16_t *block);
00031 extern void idct_add_altivec(uint8_t *dest, int line_size, int16_t *block);
00032
00033 int mm_flags = 0;
00034
00035 int mm_support(void)
00036 {
00037 int result = 0;
00038 #ifdef HAVE_ALTIVEC
00039 if (has_altivec()) {
00040 result |= MM_ALTIVEC;
00041 }
00042 #endif
00043 return result;
00044 }
00045
00046 #ifdef POWERPC_PERFORMANCE_REPORT
00047 unsigned long long perfdata[POWERPC_NUM_PMC_ENABLED][powerpc_perf_total][powerpc_data_total];
00048
00049 static unsigned char* perfname[] = {
00050 "ff_fft_calc_altivec",
00051 "gmc1_altivec",
00052 "dct_unquantize_h263_altivec",
00053 "fdct_altivec",
00054 "idct_add_altivec",
00055 "idct_put_altivec",
00056 "put_pixels16_altivec",
00057 "avg_pixels16_altivec",
00058 "avg_pixels8_altivec",
00059 "put_pixels8_xy2_altivec",
00060 "put_no_rnd_pixels8_xy2_altivec",
00061 "put_pixels16_xy2_altivec",
00062 "put_no_rnd_pixels16_xy2_altivec",
00063 "hadamard8_diff8x8_altivec",
00064 "hadamard8_diff16_altivec",
00065 "avg_pixels8_xy2_altivec",
00066 "clear_blocks_dcbz32_ppc",
00067 "clear_blocks_dcbz128_ppc",
00068 "put_h264_chroma_mc8_altivec",
00069 "avg_h264_chroma_mc8_altivec",
00070 "put_h264_qpel16_h_lowpass_altivec",
00071 "avg_h264_qpel16_h_lowpass_altivec",
00072 "put_h264_qpel16_v_lowpass_altivec",
00073 "avg_h264_qpel16_v_lowpass_altivec",
00074 "put_h264_qpel16_hv_lowpass_altivec",
00075 "avg_h264_qpel16_hv_lowpass_altivec",
00076 ""
00077 };
00078 #include <stdio.h>
00079 #endif
00080
00081 #ifdef POWERPC_PERFORMANCE_REPORT
00082 void powerpc_display_perf_report(void)
00083 {
00084 int i, j;
00085 av_log(NULL, AV_LOG_INFO, "PowerPC performance report\n Values are from the PMC registers, and represent whatever the registers are set to record.\n");
00086 for(i = 0 ; i < powerpc_perf_total ; i++)
00087 {
00088 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
00089 {
00090 if (perfdata[j][i][powerpc_data_num] != (unsigned long long)0)
00091 av_log(NULL, AV_LOG_INFO,
00092 " Function \"%s\" (pmc%d):\n\tmin: %llu\n\tmax: %llu\n\tavg: %1.2lf (%llu)\n",
00093 perfname[i],
00094 j+1,
00095 perfdata[j][i][powerpc_data_min],
00096 perfdata[j][i][powerpc_data_max],
00097 (double)perfdata[j][i][powerpc_data_sum] /
00098 (double)perfdata[j][i][powerpc_data_num],
00099 perfdata[j][i][powerpc_data_num]);
00100 }
00101 }
00102 }
00103 #endif
00104
00105
00106
00107
00108
00109
00110
00111
00112
00113
00114
00115
00116
00117
00118
00119
00120
00121
00122
00123
00124
00125
00126
00127
00128
00129
00130 void clear_blocks_dcbz32_ppc(DCTELEM *blocks)
00131 {
00132 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz32, 1);
00133 register int misal = ((unsigned long)blocks & 0x00000010);
00134 register int i = 0;
00135 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz32, 1);
00136 #if 1
00137 if (misal) {
00138 ((unsigned long*)blocks)[0] = 0L;
00139 ((unsigned long*)blocks)[1] = 0L;
00140 ((unsigned long*)blocks)[2] = 0L;
00141 ((unsigned long*)blocks)[3] = 0L;
00142 i += 16;
00143 }
00144 for ( ; i < sizeof(DCTELEM)*6*64-31 ; i += 32) {
00145 #ifndef __MWERKS__
00146 asm volatile("dcbz %0,%1" : : "b" (blocks), "r" (i) : "memory");
00147 #else
00148 __dcbz( blocks, i );
00149 #endif
00150 }
00151 if (misal) {
00152 ((unsigned long*)blocks)[188] = 0L;
00153 ((unsigned long*)blocks)[189] = 0L;
00154 ((unsigned long*)blocks)[190] = 0L;
00155 ((unsigned long*)blocks)[191] = 0L;
00156 i += 16;
00157 }
00158 #else
00159 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00160 #endif
00161 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz32, 1);
00162 }
00163
00164
00165
00166 #ifndef NO_DCBZL
00167 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00168 {
00169 POWERPC_PERF_DECLARE(powerpc_clear_blocks_dcbz128, 1);
00170 register int misal = ((unsigned long)blocks & 0x0000007f);
00171 register int i = 0;
00172 POWERPC_PERF_START_COUNT(powerpc_clear_blocks_dcbz128, 1);
00173 #if 1
00174 if (misal) {
00175
00176
00177
00178 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00179 }
00180 else
00181 for ( ; i < sizeof(DCTELEM)*6*64 ; i += 128) {
00182 asm volatile("dcbzl %0,%1" : : "b" (blocks), "r" (i) : "memory");
00183 }
00184 #else
00185 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00186 #endif
00187 POWERPC_PERF_STOP_COUNT(powerpc_clear_blocks_dcbz128, 1);
00188 }
00189 #else
00190 void clear_blocks_dcbz128_ppc(DCTELEM *blocks)
00191 {
00192 memset(blocks, 0, sizeof(DCTELEM)*6*64);
00193 }
00194 #endif
00195
00196 #ifndef NO_DCBZL
00197
00198
00199
00200
00201
00202 long check_dcbzl_effect(void)
00203 {
00204 register char *fakedata = (char*)av_malloc(1024);
00205 register char *fakedata_middle;
00206 register long zero = 0;
00207 register long i = 0;
00208 long count = 0;
00209
00210 if (!fakedata)
00211 {
00212 return 0L;
00213 }
00214
00215 fakedata_middle = (fakedata + 512);
00216
00217 memset(fakedata, 0xFF, 1024);
00218
00219
00220
00221 asm volatile("dcbzl %0, %1" : : "b" (fakedata_middle), "r" (zero));
00222
00223 for (i = 0; i < 1024 ; i ++)
00224 {
00225 if (fakedata[i] == (char)0)
00226 count++;
00227 }
00228
00229 av_free(fakedata);
00230
00231 return count;
00232 }
00233 #else
00234 long check_dcbzl_effect(void)
00235 {
00236 return 0;
00237 }
00238 #endif
00239
00240
00241 void dsputil_h264_init_ppc(DSPContext* c, AVCodecContext *avctx);
00242
00243 void dsputil_init_ppc(DSPContext* c, AVCodecContext *avctx)
00244 {
00245
00246
00247 switch (check_dcbzl_effect()) {
00248 case 32:
00249 c->clear_blocks = clear_blocks_dcbz32_ppc;
00250 break;
00251 case 128:
00252 c->clear_blocks = clear_blocks_dcbz128_ppc;
00253 break;
00254 default:
00255 break;
00256 }
00257
00258 #ifdef HAVE_ALTIVEC
00259 dsputil_h264_init_ppc(c, avctx);
00260
00261 if (has_altivec()) {
00262 mm_flags |= MM_ALTIVEC;
00263
00264
00265 c->pix_abs[0][1] = sad16_x2_altivec;
00266 c->pix_abs[0][2] = sad16_y2_altivec;
00267 c->pix_abs[0][3] = sad16_xy2_altivec;
00268 c->pix_abs[0][0] = sad16_altivec;
00269 c->pix_abs[1][0] = sad8_altivec;
00270 c->sad[0]= sad16_altivec;
00271 c->sad[1]= sad8_altivec;
00272 c->pix_norm1 = pix_norm1_altivec;
00273 c->sse[1]= sse8_altivec;
00274 c->sse[0]= sse16_altivec;
00275 c->pix_sum = pix_sum_altivec;
00276 c->diff_pixels = diff_pixels_altivec;
00277 c->get_pixels = get_pixels_altivec;
00278
00279 #if 0
00280 c->add_bytes= add_bytes_altivec;
00281 #endif
00282 c->put_pixels_tab[0][0] = put_pixels16_altivec;
00283
00284 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_altivec;
00285 c->avg_pixels_tab[0][0] = avg_pixels16_altivec;
00286 c->avg_pixels_tab[1][0] = avg_pixels8_altivec;
00287 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_altivec;
00288 c->put_pixels_tab[1][3] = put_pixels8_xy2_altivec;
00289 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_altivec;
00290 c->put_pixels_tab[0][3] = put_pixels16_xy2_altivec;
00291 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_altivec;
00292
00293 c->gmc1 = gmc1_altivec;
00294
00295 #ifdef CONFIG_DARWIN // ATM gcc-3.3 and gcc-3.4 fail to compile these in linux...
00296 c->hadamard8_diff[0] = hadamard8_diff16_altivec;
00297 c->hadamard8_diff[1] = hadamard8_diff8x8_altivec;
00298 #endif
00299
00300 #ifdef CONFIG_ENCODERS
00301 if (avctx->dct_algo == FF_DCT_AUTO ||
00302 avctx->dct_algo == FF_DCT_ALTIVEC)
00303 {
00304 c->fdct = fdct_altivec;
00305 }
00306 #endif //CONFIG_ENCODERS
00307
00308 if (avctx->lowres==0)
00309 {
00310 if ((avctx->idct_algo == FF_IDCT_AUTO) ||
00311 (avctx->idct_algo == FF_IDCT_ALTIVEC))
00312 {
00313 c->idct_put = idct_put_altivec;
00314 c->idct_add = idct_add_altivec;
00315 #ifndef ALTIVEC_USE_REFERENCE_C_CODE
00316 c->idct_permutation_type = FF_TRANSPOSE_IDCT_PERM;
00317 #else
00318 c->idct_permutation_type = FF_NO_IDCT_PERM;
00319 #endif
00320 }
00321 }
00322
00323 #ifdef POWERPC_PERFORMANCE_REPORT
00324 {
00325 int i, j;
00326 for (i = 0 ; i < powerpc_perf_total ; i++)
00327 {
00328 for (j = 0; j < POWERPC_NUM_PMC_ENABLED ; j++)
00329 {
00330 perfdata[j][i][powerpc_data_min] = 0xFFFFFFFFFFFFFFFFULL;
00331 perfdata[j][i][powerpc_data_max] = 0x0000000000000000ULL;
00332 perfdata[j][i][powerpc_data_sum] = 0x0000000000000000ULL;
00333 perfdata[j][i][powerpc_data_num] = 0x0000000000000000ULL;
00334 }
00335 }
00336 }
00337 #endif
00338 } else
00339 #endif
00340 {
00341
00342
00343
00344 }
00345 }