00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include "../dsputil.h"
00021
00022 #define DEF(x, y) x ## _no_rnd_ ## y ##_iwmmxt
00023 #define SET_RND(regd) __asm__ __volatile__ ("mov r12, #1 \n\t tbcsth " #regd ", r12":::"r12");
00024 #define WAVG2B "wavg2b"
00025 #include "dsputil_iwmmxt_rnd.h"
00026 #undef DEF
00027 #undef SET_RND
00028 #undef WAVG2B
00029
00030 #define DEF(x, y) x ## _ ## y ##_iwmmxt
00031 #define SET_RND(regd) __asm__ __volatile__ ("mov r12, #2 \n\t tbcsth " #regd ", r12":::"r12");
00032 #define WAVG2B "wavg2br"
00033 #include "dsputil_iwmmxt_rnd.h"
00034 #undef DEF
00035 #undef SET_RND
00036 #undef WAVG2BR
00037
00038
00039 #define OP(AVG) \
00040 asm volatile ( \
00041 \
00042 "and r12, %[pixels], #7 \n\t" \
00043 "bic %[pixels], %[pixels], #7 \n\t" \
00044 "tmcr wcgr1, r12 \n\t" \
00045 \
00046 "wldrd wr0, [%[pixels]] \n\t" \
00047 "wldrd wr1, [%[pixels], #8] \n\t" \
00048 "add %[pixels], %[pixels], %[line_size] \n\t" \
00049 "walignr1 wr4, wr0, wr1 \n\t" \
00050 \
00051 "1: \n\t" \
00052 \
00053 "wldrd wr2, [%[pixels]] \n\t" \
00054 "wldrd wr3, [%[pixels], #8] \n\t" \
00055 "add %[pixels], %[pixels], %[line_size] \n\t" \
00056 "pld [%[pixels]] \n\t" \
00057 "walignr1 wr5, wr2, wr3 \n\t" \
00058 AVG " wr6, wr4, wr5 \n\t" \
00059 "wstrd wr6, [%[block]] \n\t" \
00060 "add %[block], %[block], %[line_size] \n\t" \
00061 \
00062 "wldrd wr0, [%[pixels]] \n\t" \
00063 "wldrd wr1, [%[pixels], #8] \n\t" \
00064 "add %[pixels], %[pixels], %[line_size] \n\t" \
00065 "walignr1 wr4, wr0, wr1 \n\t" \
00066 "pld [%[pixels]] \n\t" \
00067 AVG " wr6, wr4, wr5 \n\t" \
00068 "wstrd wr6, [%[block]] \n\t" \
00069 "add %[block], %[block], %[line_size] \n\t" \
00070 \
00071 "subs %[h], %[h], #2 \n\t" \
00072 "bne 1b \n\t" \
00073 : [block]"+r"(block), [pixels]"+r"(pixels), [h]"+r"(h) \
00074 : [line_size]"r"(line_size) \
00075 : "memory", "r12");
00076 void put_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
00077 {
00078 OP("wavg2br");
00079 }
00080 void put_no_rnd_pixels8_y2_iwmmxt(uint8_t *block, const uint8_t *pixels, const int line_size, int h)
00081 {
00082 OP("wavg2b");
00083 }
00084 #undef OP
00085
00086 void add_pixels_clamped_iwmmxt(const DCTELEM *block, uint8_t *pixels, int line_size)
00087 {
00088 uint8_t *pixels2 = pixels + line_size;
00089
00090 __asm__ __volatile__ (
00091 "mov r12, #4 \n\t"
00092 "1: \n\t"
00093 "pld [%[pixels], %[line_size2]] \n\t"
00094 "pld [%[pixels2], %[line_size2]] \n\t"
00095 "wldrd wr4, [%[pixels]] \n\t"
00096 "wldrd wr5, [%[pixels2]] \n\t"
00097 "pld [%[block], #32] \n\t"
00098 "wunpckelub wr6, wr4 \n\t"
00099 "wldrd wr0, [%[block]] \n\t"
00100 "wunpckehub wr7, wr4 \n\t"
00101 "wldrd wr1, [%[block], #8] \n\t"
00102 "wunpckelub wr8, wr5 \n\t"
00103 "wldrd wr2, [%[block], #16] \n\t"
00104 "wunpckehub wr9, wr5 \n\t"
00105 "wldrd wr3, [%[block], #24] \n\t"
00106 "add %[block], %[block], #32 \n\t"
00107 "waddhss wr10, wr0, wr6 \n\t"
00108 "waddhss wr11, wr1, wr7 \n\t"
00109 "waddhss wr12, wr2, wr8 \n\t"
00110 "waddhss wr13, wr3, wr9 \n\t"
00111 "wpackhus wr14, wr10, wr11 \n\t"
00112 "wpackhus wr15, wr12, wr13 \n\t"
00113 "wstrd wr14, [%[pixels]] \n\t"
00114 "add %[pixels], %[pixels], %[line_size2] \n\t"
00115 "subs r12, r12, #1 \n\t"
00116 "wstrd wr15, [%[pixels2]] \n\t"
00117 "add %[pixels2], %[pixels2], %[line_size2] \n\t"
00118 "bne 1b \n\t"
00119 : [block]"+r"(block), [pixels]"+r"(pixels), [pixels2]"+r"(pixels2)
00120 : [line_size2]"r"(line_size << 1)
00121 : "cc", "memory", "r12");
00122 }
00123
00124 static void nop(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00125 {
00126 return;
00127 }
00128
00129 int mm_flags;
00130
00131 int mm_support(void)
00132 {
00133 return 0;
00134 }
00135
00136 void dsputil_init_iwmmxt(DSPContext* c, AVCodecContext *avctx)
00137 {
00138 mm_flags = mm_support();
00139
00140 if (avctx->dsp_mask) {
00141 if (avctx->dsp_mask & FF_MM_FORCE)
00142 mm_flags |= (avctx->dsp_mask & 0xffff);
00143 else
00144 mm_flags &= ~(avctx->dsp_mask & 0xffff);
00145 }
00146
00147 if (!(mm_flags & MM_IWMMXT)) return;
00148
00149 c->add_pixels_clamped = add_pixels_clamped_iwmmxt;
00150
00151 c->put_pixels_tab[0][0] = put_pixels16_iwmmxt;
00152 c->put_pixels_tab[0][1] = put_pixels16_x2_iwmmxt;
00153 c->put_pixels_tab[0][2] = put_pixels16_y2_iwmmxt;
00154 c->put_pixels_tab[0][3] = put_pixels16_xy2_iwmmxt;
00155 c->put_no_rnd_pixels_tab[0][0] = put_pixels16_iwmmxt;
00156 c->put_no_rnd_pixels_tab[0][1] = put_no_rnd_pixels16_x2_iwmmxt;
00157 c->put_no_rnd_pixels_tab[0][2] = put_no_rnd_pixels16_y2_iwmmxt;
00158 c->put_no_rnd_pixels_tab[0][3] = put_no_rnd_pixels16_xy2_iwmmxt;
00159
00160 c->put_pixels_tab[1][0] = put_pixels8_iwmmxt;
00161 c->put_pixels_tab[1][1] = put_pixels8_x2_iwmmxt;
00162 c->put_pixels_tab[1][2] = put_pixels8_y2_iwmmxt;
00163 c->put_pixels_tab[1][3] = put_pixels8_xy2_iwmmxt;
00164 c->put_no_rnd_pixels_tab[1][0] = put_pixels8_iwmmxt;
00165 c->put_no_rnd_pixels_tab[1][1] = put_no_rnd_pixels8_x2_iwmmxt;
00166 c->put_no_rnd_pixels_tab[1][2] = put_no_rnd_pixels8_y2_iwmmxt;
00167 c->put_no_rnd_pixels_tab[1][3] = put_no_rnd_pixels8_xy2_iwmmxt;
00168
00169 c->avg_pixels_tab[0][0] = avg_pixels16_iwmmxt;
00170 c->avg_pixels_tab[0][1] = avg_pixels16_x2_iwmmxt;
00171 c->avg_pixels_tab[0][2] = avg_pixels16_y2_iwmmxt;
00172 c->avg_pixels_tab[0][3] = avg_pixels16_xy2_iwmmxt;
00173 c->avg_no_rnd_pixels_tab[0][0] = avg_pixels16_iwmmxt;
00174 c->avg_no_rnd_pixels_tab[0][1] = avg_no_rnd_pixels16_x2_iwmmxt;
00175 c->avg_no_rnd_pixels_tab[0][2] = avg_no_rnd_pixels16_y2_iwmmxt;
00176 c->avg_no_rnd_pixels_tab[0][3] = avg_no_rnd_pixels16_xy2_iwmmxt;
00177
00178 c->avg_pixels_tab[1][0] = avg_pixels8_iwmmxt;
00179 c->avg_pixels_tab[1][1] = avg_pixels8_x2_iwmmxt;
00180 c->avg_pixels_tab[1][2] = avg_pixels8_y2_iwmmxt;
00181 c->avg_pixels_tab[1][3] = avg_pixels8_xy2_iwmmxt;
00182 c->avg_no_rnd_pixels_tab[1][0] = avg_no_rnd_pixels8_iwmmxt;
00183 c->avg_no_rnd_pixels_tab[1][1] = avg_no_rnd_pixels8_x2_iwmmxt;
00184 c->avg_no_rnd_pixels_tab[1][2] = avg_no_rnd_pixels8_y2_iwmmxt;
00185 c->avg_no_rnd_pixels_tab[1][3] = avg_no_rnd_pixels8_xy2_iwmmxt;
00186 }