00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026 static void DEF(put, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00027 {
00028 MOVQ_BFE(mm6);
00029 __asm __volatile(
00030 "lea (%3, %3), %%"REG_a" \n\t"
00031 ".balign 8 \n\t"
00032 "1: \n\t"
00033 "movq (%1), %%mm0 \n\t"
00034 "movq 1(%1), %%mm1 \n\t"
00035 "movq (%1, %3), %%mm2 \n\t"
00036 "movq 1(%1, %3), %%mm3 \n\t"
00037 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00038 "movq %%mm4, (%2) \n\t"
00039 "movq %%mm5, (%2, %3) \n\t"
00040 "add %%"REG_a", %1 \n\t"
00041 "add %%"REG_a", %2 \n\t"
00042 "movq (%1), %%mm0 \n\t"
00043 "movq 1(%1), %%mm1 \n\t"
00044 "movq (%1, %3), %%mm2 \n\t"
00045 "movq 1(%1, %3), %%mm3 \n\t"
00046 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00047 "movq %%mm4, (%2) \n\t"
00048 "movq %%mm5, (%2, %3) \n\t"
00049 "add %%"REG_a", %1 \n\t"
00050 "add %%"REG_a", %2 \n\t"
00051 "subl $4, %0 \n\t"
00052 "jnz 1b \n\t"
00053 :"+g"(h), "+S"(pixels), "+D"(block)
00054 :"r"((long)line_size)
00055 :REG_a, "memory");
00056 }
00057
00058 static void DEF(put, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00059 {
00060 MOVQ_BFE(mm6);
00061 __asm __volatile(
00062 "testl $1, %0 \n\t"
00063 " jz 1f \n\t"
00064 "movq (%1), %%mm0 \n\t"
00065 "movq (%2), %%mm1 \n\t"
00066 "add %4, %1 \n\t"
00067 "add $8, %2 \n\t"
00068 PAVGB(%%mm0, %%mm1, %%mm4, %%mm6)
00069 "movq %%mm4, (%3) \n\t"
00070 "add %5, %3 \n\t"
00071 "decl %0 \n\t"
00072 ".balign 8 \n\t"
00073 "1: \n\t"
00074 "movq (%1), %%mm0 \n\t"
00075 "movq (%2), %%mm1 \n\t"
00076 "add %4, %1 \n\t"
00077 "movq (%1), %%mm2 \n\t"
00078 "movq 8(%2), %%mm3 \n\t"
00079 "add %4, %1 \n\t"
00080 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00081 "movq %%mm4, (%3) \n\t"
00082 "add %5, %3 \n\t"
00083 "movq %%mm5, (%3) \n\t"
00084 "add %5, %3 \n\t"
00085 "movq (%1), %%mm0 \n\t"
00086 "movq 16(%2), %%mm1 \n\t"
00087 "add %4, %1 \n\t"
00088 "movq (%1), %%mm2 \n\t"
00089 "movq 24(%2), %%mm3 \n\t"
00090 "add %4, %1 \n\t"
00091 "add $32, %2 \n\t"
00092 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00093 "movq %%mm4, (%3) \n\t"
00094 "add %5, %3 \n\t"
00095 "movq %%mm5, (%3) \n\t"
00096 "add %5, %3 \n\t"
00097 "subl $4, %0 \n\t"
00098 "jnz 1b \n\t"
00099 #ifdef PIC
00100 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00101 #else
00102 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00103 #endif
00104 :"S"((long)src1Stride), "D"((long)dstStride)
00105 :"memory");
00106 }
00107
00108 static void DEF(put, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00109 {
00110 MOVQ_BFE(mm6);
00111 __asm __volatile(
00112 "lea (%3, %3), %%"REG_a" \n\t"
00113 ".balign 8 \n\t"
00114 "1: \n\t"
00115 "movq (%1), %%mm0 \n\t"
00116 "movq 1(%1), %%mm1 \n\t"
00117 "movq (%1, %3), %%mm2 \n\t"
00118 "movq 1(%1, %3), %%mm3 \n\t"
00119 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00120 "movq %%mm4, (%2) \n\t"
00121 "movq %%mm5, (%2, %3) \n\t"
00122 "movq 8(%1), %%mm0 \n\t"
00123 "movq 9(%1), %%mm1 \n\t"
00124 "movq 8(%1, %3), %%mm2 \n\t"
00125 "movq 9(%1, %3), %%mm3 \n\t"
00126 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00127 "movq %%mm4, 8(%2) \n\t"
00128 "movq %%mm5, 8(%2, %3) \n\t"
00129 "add %%"REG_a", %1 \n\t"
00130 "add %%"REG_a", %2 \n\t"
00131 "movq (%1), %%mm0 \n\t"
00132 "movq 1(%1), %%mm1 \n\t"
00133 "movq (%1, %3), %%mm2 \n\t"
00134 "movq 1(%1, %3), %%mm3 \n\t"
00135 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00136 "movq %%mm4, (%2) \n\t"
00137 "movq %%mm5, (%2, %3) \n\t"
00138 "movq 8(%1), %%mm0 \n\t"
00139 "movq 9(%1), %%mm1 \n\t"
00140 "movq 8(%1, %3), %%mm2 \n\t"
00141 "movq 9(%1, %3), %%mm3 \n\t"
00142 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00143 "movq %%mm4, 8(%2) \n\t"
00144 "movq %%mm5, 8(%2, %3) \n\t"
00145 "add %%"REG_a", %1 \n\t"
00146 "add %%"REG_a", %2 \n\t"
00147 "subl $4, %0 \n\t"
00148 "jnz 1b \n\t"
00149 :"+g"(h), "+S"(pixels), "+D"(block)
00150 :"r"((long)line_size)
00151 :REG_a, "memory");
00152 }
00153
00154 static void DEF(put, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00155 {
00156 MOVQ_BFE(mm6);
00157 __asm __volatile(
00158 "testl $1, %0 \n\t"
00159 " jz 1f \n\t"
00160 "movq (%1), %%mm0 \n\t"
00161 "movq (%2), %%mm1 \n\t"
00162 "movq 8(%1), %%mm2 \n\t"
00163 "movq 8(%2), %%mm3 \n\t"
00164 "add %4, %1 \n\t"
00165 "add $16, %2 \n\t"
00166 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00167 "movq %%mm4, (%3) \n\t"
00168 "movq %%mm5, 8(%3) \n\t"
00169 "add %5, %3 \n\t"
00170 "decl %0 \n\t"
00171 ".balign 8 \n\t"
00172 "1: \n\t"
00173 "movq (%1), %%mm0 \n\t"
00174 "movq (%2), %%mm1 \n\t"
00175 "movq 8(%1), %%mm2 \n\t"
00176 "movq 8(%2), %%mm3 \n\t"
00177 "add %4, %1 \n\t"
00178 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00179 "movq %%mm4, (%3) \n\t"
00180 "movq %%mm5, 8(%3) \n\t"
00181 "add %5, %3 \n\t"
00182 "movq (%1), %%mm0 \n\t"
00183 "movq 16(%2), %%mm1 \n\t"
00184 "movq 8(%1), %%mm2 \n\t"
00185 "movq 24(%2), %%mm3 \n\t"
00186 "add %4, %1 \n\t"
00187 PAVGBP(%%mm0, %%mm1, %%mm4, %%mm2, %%mm3, %%mm5)
00188 "movq %%mm4, (%3) \n\t"
00189 "movq %%mm5, 8(%3) \n\t"
00190 "add %5, %3 \n\t"
00191 "add $32, %2 \n\t"
00192 "subl $2, %0 \n\t"
00193 "jnz 1b \n\t"
00194 #ifdef PIC
00195 :"+m"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00196 #else
00197 :"+b"(h), "+a"(src1), "+c"(src2), "+d"(dst)
00198 #endif
00199 :"S"((long)src1Stride), "D"((long)dstStride)
00200 :"memory");
00201 }
00202
00203 static void DEF(put, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00204 {
00205 MOVQ_BFE(mm6);
00206 __asm __volatile(
00207 "lea (%3, %3), %%"REG_a" \n\t"
00208 "movq (%1), %%mm0 \n\t"
00209 ".balign 8 \n\t"
00210 "1: \n\t"
00211 "movq (%1, %3), %%mm1 \n\t"
00212 "movq (%1, %%"REG_a"),%%mm2 \n\t"
00213 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00214 "movq %%mm4, (%2) \n\t"
00215 "movq %%mm5, (%2, %3) \n\t"
00216 "add %%"REG_a", %1 \n\t"
00217 "add %%"REG_a", %2 \n\t"
00218 "movq (%1, %3), %%mm1 \n\t"
00219 "movq (%1, %%"REG_a"),%%mm0 \n\t"
00220 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00221 "movq %%mm4, (%2) \n\t"
00222 "movq %%mm5, (%2, %3) \n\t"
00223 "add %%"REG_a", %1 \n\t"
00224 "add %%"REG_a", %2 \n\t"
00225 "subl $4, %0 \n\t"
00226 "jnz 1b \n\t"
00227 :"+g"(h), "+S"(pixels), "+D"(block)
00228 :"r"((long)line_size)
00229 :REG_a, "memory");
00230 }
00231
00232 static void DEF(put, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00233 {
00234 MOVQ_ZERO(mm7);
00235 SET_RND(mm6);
00236 __asm __volatile(
00237 "movq (%1), %%mm0 \n\t"
00238 "movq 1(%1), %%mm4 \n\t"
00239 "movq %%mm0, %%mm1 \n\t"
00240 "movq %%mm4, %%mm5 \n\t"
00241 "punpcklbw %%mm7, %%mm0 \n\t"
00242 "punpcklbw %%mm7, %%mm4 \n\t"
00243 "punpckhbw %%mm7, %%mm1 \n\t"
00244 "punpckhbw %%mm7, %%mm5 \n\t"
00245 "paddusw %%mm0, %%mm4 \n\t"
00246 "paddusw %%mm1, %%mm5 \n\t"
00247 "xor %%"REG_a", %%"REG_a" \n\t"
00248 "add %3, %1 \n\t"
00249 ".balign 8 \n\t"
00250 "1: \n\t"
00251 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00252 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00253 "movq %%mm0, %%mm1 \n\t"
00254 "movq %%mm2, %%mm3 \n\t"
00255 "punpcklbw %%mm7, %%mm0 \n\t"
00256 "punpcklbw %%mm7, %%mm2 \n\t"
00257 "punpckhbw %%mm7, %%mm1 \n\t"
00258 "punpckhbw %%mm7, %%mm3 \n\t"
00259 "paddusw %%mm2, %%mm0 \n\t"
00260 "paddusw %%mm3, %%mm1 \n\t"
00261 "paddusw %%mm6, %%mm4 \n\t"
00262 "paddusw %%mm6, %%mm5 \n\t"
00263 "paddusw %%mm0, %%mm4 \n\t"
00264 "paddusw %%mm1, %%mm5 \n\t"
00265 "psrlw $2, %%mm4 \n\t"
00266 "psrlw $2, %%mm5 \n\t"
00267 "packuswb %%mm5, %%mm4 \n\t"
00268 "movq %%mm4, (%2, %%"REG_a") \n\t"
00269 "add %3, %%"REG_a" \n\t"
00270
00271 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00272 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00273 "movq %%mm2, %%mm3 \n\t"
00274 "movq %%mm4, %%mm5 \n\t"
00275 "punpcklbw %%mm7, %%mm2 \n\t"
00276 "punpcklbw %%mm7, %%mm4 \n\t"
00277 "punpckhbw %%mm7, %%mm3 \n\t"
00278 "punpckhbw %%mm7, %%mm5 \n\t"
00279 "paddusw %%mm2, %%mm4 \n\t"
00280 "paddusw %%mm3, %%mm5 \n\t"
00281 "paddusw %%mm6, %%mm0 \n\t"
00282 "paddusw %%mm6, %%mm1 \n\t"
00283 "paddusw %%mm4, %%mm0 \n\t"
00284 "paddusw %%mm5, %%mm1 \n\t"
00285 "psrlw $2, %%mm0 \n\t"
00286 "psrlw $2, %%mm1 \n\t"
00287 "packuswb %%mm1, %%mm0 \n\t"
00288 "movq %%mm0, (%2, %%"REG_a") \n\t"
00289 "add %3, %%"REG_a" \n\t"
00290
00291 "subl $2, %0 \n\t"
00292 "jnz 1b \n\t"
00293 :"+g"(h), "+S"(pixels)
00294 :"D"(block), "r"((long)line_size)
00295 :REG_a, "memory");
00296 }
00297
00298
00299 static void DEF(avg, pixels4)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00300 {
00301 MOVQ_BFE(mm6);
00302 JUMPALIGN();
00303 do {
00304 __asm __volatile(
00305 "movd %0, %%mm0 \n\t"
00306 "movd %1, %%mm1 \n\t"
00307 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00308 "movd %%mm2, %0 \n\t"
00309 :"+m"(*block)
00310 :"m"(*pixels)
00311 :"memory");
00312 pixels += line_size;
00313 block += line_size;
00314 }
00315 while (--h);
00316 }
00317
00318
00319 static void DEF(avg, pixels8)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00320 {
00321 MOVQ_BFE(mm6);
00322 JUMPALIGN();
00323 do {
00324 __asm __volatile(
00325 "movq %0, %%mm0 \n\t"
00326 "movq %1, %%mm1 \n\t"
00327 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00328 "movq %%mm2, %0 \n\t"
00329 :"+m"(*block)
00330 :"m"(*pixels)
00331 :"memory");
00332 pixels += line_size;
00333 block += line_size;
00334 }
00335 while (--h);
00336 }
00337
00338 static void DEF(avg, pixels16)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00339 {
00340 MOVQ_BFE(mm6);
00341 JUMPALIGN();
00342 do {
00343 __asm __volatile(
00344 "movq %0, %%mm0 \n\t"
00345 "movq %1, %%mm1 \n\t"
00346 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00347 "movq %%mm2, %0 \n\t"
00348 "movq 8%0, %%mm0 \n\t"
00349 "movq 8%1, %%mm1 \n\t"
00350 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00351 "movq %%mm2, 8%0 \n\t"
00352 :"+m"(*block)
00353 :"m"(*pixels)
00354 :"memory");
00355 pixels += line_size;
00356 block += line_size;
00357 }
00358 while (--h);
00359 }
00360
00361 static void DEF(avg, pixels8_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00362 {
00363 MOVQ_BFE(mm6);
00364 JUMPALIGN();
00365 do {
00366 __asm __volatile(
00367 "movq %1, %%mm0 \n\t"
00368 "movq 1%1, %%mm1 \n\t"
00369 "movq %0, %%mm3 \n\t"
00370 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00371 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00372 "movq %%mm0, %0 \n\t"
00373 :"+m"(*block)
00374 :"m"(*pixels)
00375 :"memory");
00376 pixels += line_size;
00377 block += line_size;
00378 } while (--h);
00379 }
00380
00381 static __attribute__((unused)) void DEF(avg, pixels8_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00382 {
00383 MOVQ_BFE(mm6);
00384 JUMPALIGN();
00385 do {
00386 __asm __volatile(
00387 "movq %1, %%mm0 \n\t"
00388 "movq %2, %%mm1 \n\t"
00389 "movq %0, %%mm3 \n\t"
00390 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00391 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00392 "movq %%mm0, %0 \n\t"
00393 :"+m"(*dst)
00394 :"m"(*src1), "m"(*src2)
00395 :"memory");
00396 dst += dstStride;
00397 src1 += src1Stride;
00398 src2 += 8;
00399 } while (--h);
00400 }
00401
00402 static void DEF(avg, pixels16_x2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00403 {
00404 MOVQ_BFE(mm6);
00405 JUMPALIGN();
00406 do {
00407 __asm __volatile(
00408 "movq %1, %%mm0 \n\t"
00409 "movq 1%1, %%mm1 \n\t"
00410 "movq %0, %%mm3 \n\t"
00411 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00412 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00413 "movq %%mm0, %0 \n\t"
00414 "movq 8%1, %%mm0 \n\t"
00415 "movq 9%1, %%mm1 \n\t"
00416 "movq 8%0, %%mm3 \n\t"
00417 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00418 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00419 "movq %%mm0, 8%0 \n\t"
00420 :"+m"(*block)
00421 :"m"(*pixels)
00422 :"memory");
00423 pixels += line_size;
00424 block += line_size;
00425 } while (--h);
00426 }
00427
00428 static __attribute__((unused)) void DEF(avg, pixels16_l2)(uint8_t *dst, uint8_t *src1, uint8_t *src2, int dstStride, int src1Stride, int h)
00429 {
00430 MOVQ_BFE(mm6);
00431 JUMPALIGN();
00432 do {
00433 __asm __volatile(
00434 "movq %1, %%mm0 \n\t"
00435 "movq %2, %%mm1 \n\t"
00436 "movq %0, %%mm3 \n\t"
00437 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00438 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00439 "movq %%mm0, %0 \n\t"
00440 "movq 8%1, %%mm0 \n\t"
00441 "movq 8%2, %%mm1 \n\t"
00442 "movq 8%0, %%mm3 \n\t"
00443 PAVGB(%%mm0, %%mm1, %%mm2, %%mm6)
00444 PAVGB(%%mm3, %%mm2, %%mm0, %%mm6)
00445 "movq %%mm0, 8%0 \n\t"
00446 :"+m"(*dst)
00447 :"m"(*src1), "m"(*src2)
00448 :"memory");
00449 dst += dstStride;
00450 src1 += src1Stride;
00451 src2 += 16;
00452 } while (--h);
00453 }
00454
00455 static void DEF(avg, pixels8_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00456 {
00457 MOVQ_BFE(mm6);
00458 __asm __volatile(
00459 "lea (%3, %3), %%"REG_a" \n\t"
00460 "movq (%1), %%mm0 \n\t"
00461 ".balign 8 \n\t"
00462 "1: \n\t"
00463 "movq (%1, %3), %%mm1 \n\t"
00464 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00465 PAVGBP(%%mm1, %%mm0, %%mm4, %%mm2, %%mm1, %%mm5)
00466 "movq (%2), %%mm3 \n\t"
00467 PAVGB(%%mm3, %%mm4, %%mm0, %%mm6)
00468 "movq (%2, %3), %%mm3 \n\t"
00469 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
00470 "movq %%mm0, (%2) \n\t"
00471 "movq %%mm1, (%2, %3) \n\t"
00472 "add %%"REG_a", %1 \n\t"
00473 "add %%"REG_a", %2 \n\t"
00474
00475 "movq (%1, %3), %%mm1 \n\t"
00476 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00477 PAVGBP(%%mm1, %%mm2, %%mm4, %%mm0, %%mm1, %%mm5)
00478 "movq (%2), %%mm3 \n\t"
00479 PAVGB(%%mm3, %%mm4, %%mm2, %%mm6)
00480 "movq (%2, %3), %%mm3 \n\t"
00481 PAVGB(%%mm3, %%mm5, %%mm1, %%mm6)
00482 "movq %%mm2, (%2) \n\t"
00483 "movq %%mm1, (%2, %3) \n\t"
00484 "add %%"REG_a", %1 \n\t"
00485 "add %%"REG_a", %2 \n\t"
00486
00487 "subl $4, %0 \n\t"
00488 "jnz 1b \n\t"
00489 :"+g"(h), "+S"(pixels), "+D"(block)
00490 :"r"((long)line_size)
00491 :REG_a, "memory");
00492 }
00493
00494
00495 static void DEF(avg, pixels8_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h)
00496 {
00497 MOVQ_ZERO(mm7);
00498 SET_RND(mm6);
00499 __asm __volatile(
00500 "movq (%1), %%mm0 \n\t"
00501 "movq 1(%1), %%mm4 \n\t"
00502 "movq %%mm0, %%mm1 \n\t"
00503 "movq %%mm4, %%mm5 \n\t"
00504 "punpcklbw %%mm7, %%mm0 \n\t"
00505 "punpcklbw %%mm7, %%mm4 \n\t"
00506 "punpckhbw %%mm7, %%mm1 \n\t"
00507 "punpckhbw %%mm7, %%mm5 \n\t"
00508 "paddusw %%mm0, %%mm4 \n\t"
00509 "paddusw %%mm1, %%mm5 \n\t"
00510 "xor %%"REG_a", %%"REG_a" \n\t"
00511 "add %3, %1 \n\t"
00512 ".balign 8 \n\t"
00513 "1: \n\t"
00514 "movq (%1, %%"REG_a"), %%mm0 \n\t"
00515 "movq 1(%1, %%"REG_a"), %%mm2 \n\t"
00516 "movq %%mm0, %%mm1 \n\t"
00517 "movq %%mm2, %%mm3 \n\t"
00518 "punpcklbw %%mm7, %%mm0 \n\t"
00519 "punpcklbw %%mm7, %%mm2 \n\t"
00520 "punpckhbw %%mm7, %%mm1 \n\t"
00521 "punpckhbw %%mm7, %%mm3 \n\t"
00522 "paddusw %%mm2, %%mm0 \n\t"
00523 "paddusw %%mm3, %%mm1 \n\t"
00524 "paddusw %%mm6, %%mm4 \n\t"
00525 "paddusw %%mm6, %%mm5 \n\t"
00526 "paddusw %%mm0, %%mm4 \n\t"
00527 "paddusw %%mm1, %%mm5 \n\t"
00528 "psrlw $2, %%mm4 \n\t"
00529 "psrlw $2, %%mm5 \n\t"
00530 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00531 "packuswb %%mm5, %%mm4 \n\t"
00532 "pcmpeqd %%mm2, %%mm2 \n\t"
00533 "paddb %%mm2, %%mm2 \n\t"
00534 PAVGB(%%mm3, %%mm4, %%mm5, %%mm2)
00535 "movq %%mm5, (%2, %%"REG_a") \n\t"
00536 "add %3, %%"REG_a" \n\t"
00537
00538 "movq (%1, %%"REG_a"), %%mm2 \n\t"
00539 "movq 1(%1, %%"REG_a"), %%mm4 \n\t"
00540 "movq %%mm2, %%mm3 \n\t"
00541 "movq %%mm4, %%mm5 \n\t"
00542 "punpcklbw %%mm7, %%mm2 \n\t"
00543 "punpcklbw %%mm7, %%mm4 \n\t"
00544 "punpckhbw %%mm7, %%mm3 \n\t"
00545 "punpckhbw %%mm7, %%mm5 \n\t"
00546 "paddusw %%mm2, %%mm4 \n\t"
00547 "paddusw %%mm3, %%mm5 \n\t"
00548 "paddusw %%mm6, %%mm0 \n\t"
00549 "paddusw %%mm6, %%mm1 \n\t"
00550 "paddusw %%mm4, %%mm0 \n\t"
00551 "paddusw %%mm5, %%mm1 \n\t"
00552 "psrlw $2, %%mm0 \n\t"
00553 "psrlw $2, %%mm1 \n\t"
00554 "movq (%2, %%"REG_a"), %%mm3 \n\t"
00555 "packuswb %%mm1, %%mm0 \n\t"
00556 "pcmpeqd %%mm2, %%mm2 \n\t"
00557 "paddb %%mm2, %%mm2 \n\t"
00558 PAVGB(%%mm3, %%mm0, %%mm1, %%mm2)
00559 "movq %%mm1, (%2, %%"REG_a") \n\t"
00560 "add %3, %%"REG_a" \n\t"
00561
00562 "subl $2, %0 \n\t"
00563 "jnz 1b \n\t"
00564 :"+g"(h), "+S"(pixels)
00565 :"D"(block), "r"((long)line_size)
00566 :REG_a, "memory");
00567 }
00568
00569
00570 static void DEF(put, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00571 DEF(put, pixels8_y2)(block , pixels , line_size, h);
00572 DEF(put, pixels8_y2)(block+8, pixels+8, line_size, h);
00573 }
00574
00575 static void DEF(put, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00576 DEF(put, pixels8_xy2)(block , pixels , line_size, h);
00577 DEF(put, pixels8_xy2)(block+8, pixels+8, line_size, h);
00578 }
00579
00580 static void DEF(avg, pixels16_y2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00581 DEF(avg, pixels8_y2)(block , pixels , line_size, h);
00582 DEF(avg, pixels8_y2)(block+8, pixels+8, line_size, h);
00583 }
00584
00585 static void DEF(avg, pixels16_xy2)(uint8_t *block, const uint8_t *pixels, int line_size, int h){
00586 DEF(avg, pixels8_xy2)(block , pixels , line_size, h);
00587 DEF(avg, pixels8_xy2)(block+8, pixels+8, line_size, h);
00588 }
00589
00590