1/* 2 * Copyright (C) 2014 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17#include "asm_support_x86.S" 18 19#define MEMCMP __memcmp16 20 21/* int32_t memcmp16_compare(const uint16_t* s0, const uint16_t* s1, size_t count); */ 22 23#ifndef L 24# define L(label) .L##label 25#endif 26 27#define CFI_PUSH(REG) \ 28 CFI_ADJUST_CFA_OFFSET(4); \ 29 CFI_REL_OFFSET(REG, 0) 30 31#define CFI_POP(REG) \ 32 CFI_ADJUST_CFA_OFFSET(-4); \ 33 CFI_RESTORE(REG) 34 35#define PUSH(REG) pushl REG; CFI_PUSH (REG) 36#define POP(REG) popl REG; CFI_POP (REG) 37 38#define PARMS 4 39#define BLK1 PARMS 40#define BLK2 BLK1+4 41#define LEN BLK2+4 42#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret 43#define RETURN RETURN_END; CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16); CFI_REMEMBER_STATE 44 45DEFINE_FUNCTION MEMCMP 46 movl LEN(%esp), %ecx 47 48 shl $1, %ecx 49 jz L(zero) 50 51 movl BLK1(%esp), %eax 52 cmp $48, %ecx 53 movl BLK2(%esp), %edx 54 jae L(48bytesormore) 55 56 PUSH (%ebx) 57 add %ecx, %edx 58 add %ecx, %eax 59 jmp L(less48bytes) 60 61 CFI_POP (%ebx) 62 63 .p2align 4 64L(zero): 65 xor %eax, %eax 66 ret 67 68 .p2align 4 69L(48bytesormore): 70 PUSH (%ebx) 71 PUSH (%esi) 72 PUSH (%edi) 73 CFI_REMEMBER_STATE 74 movdqu (%eax), %xmm3 75 movdqu (%edx), %xmm0 76 movl %eax, %edi 77 movl %edx, %esi 78 pcmpeqb %xmm0, %xmm3 79 pmovmskb %xmm3, %edx 80 lea 16(%edi), %edi 81 82 sub $0xffff, %edx 83 lea 16(%esi), %esi 84 jnz L(less16bytes) 85 mov %edi, %edx 86 and $0xf, %edx 87 xor %edx, %edi 88 sub %edx, %esi 89 add %edx, %ecx 90 mov %esi, %edx 91 and $0xf, %edx 92 jz L(shr_0) 93 xor %edx, %esi 94 95 cmp $0, %edx 96 je L(shr_0) 97 cmp $2, %edx 98 je L(shr_2) 99 cmp $4, %edx 100 je L(shr_4) 101 cmp $6, %edx 102 je L(shr_6) 103 cmp $8, %edx 104 je L(shr_8) 105 cmp $10, %edx 106 je L(shr_10) 107 cmp $12, %edx 108 je L(shr_12) 109 jmp L(shr_14) 110 111 .p2align 4 112L(shr_0): 113 cmp $80, %ecx 114 jae L(shr_0_gobble) 115 lea -48(%ecx), %ecx 116 xor %eax, %eax 117 movaps (%esi), %xmm1 118 pcmpeqb (%edi), %xmm1 119 movaps 16(%esi), %xmm2 120 pcmpeqb 16(%edi), %xmm2 121 pand %xmm1, %xmm2 122 pmovmskb %xmm2, %edx 123 add $32, %edi 124 add $32, %esi 125 sub $0xffff, %edx 126 jnz L(exit) 127 128 lea (%ecx, %edi,1), %eax 129 lea (%ecx, %esi,1), %edx 130 POP (%edi) 131 POP (%esi) 132 jmp L(less48bytes) 133 134 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 135 CFI_REMEMBER_STATE 136 .p2align 4 137L(shr_0_gobble): 138 lea -48(%ecx), %ecx 139 movdqa (%esi), %xmm0 140 xor %eax, %eax 141 pcmpeqb (%edi), %xmm0 142 sub $32, %ecx 143 movdqa 16(%esi), %xmm2 144 pcmpeqb 16(%edi), %xmm2 145L(shr_0_gobble_loop): 146 pand %xmm0, %xmm2 147 sub $32, %ecx 148 pmovmskb %xmm2, %edx 149 movdqa %xmm0, %xmm1 150 movdqa 32(%esi), %xmm0 151 movdqa 48(%esi), %xmm2 152 sbb $0xffff, %edx 153 pcmpeqb 32(%edi), %xmm0 154 pcmpeqb 48(%edi), %xmm2 155 lea 32(%edi), %edi 156 lea 32(%esi), %esi 157 jz L(shr_0_gobble_loop) 158 159 pand %xmm0, %xmm2 160 cmp $0, %ecx 161 jge L(shr_0_gobble_loop_next) 162 inc %edx 163 add $32, %ecx 164L(shr_0_gobble_loop_next): 165 test %edx, %edx 166 jnz L(exit) 167 168 pmovmskb %xmm2, %edx 169 movdqa %xmm0, %xmm1 170 lea 32(%edi), %edi 171 lea 32(%esi), %esi 172 sub $0xffff, %edx 173 jnz L(exit) 174 lea (%ecx, %edi,1), %eax 175 lea (%ecx, %esi,1), %edx 176 POP (%edi) 177 POP (%esi) 178 jmp L(less48bytes) 179 180 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 181 CFI_REMEMBER_STATE 182 .p2align 4 183L(shr_2): 184 cmp $80, %ecx 185 lea -48(%ecx), %ecx 186 mov %edx, %eax 187 jae L(shr_2_gobble) 188 189 movdqa 16(%esi), %xmm1 190 movdqa %xmm1, %xmm2 191 palignr $2,(%esi), %xmm1 192 pcmpeqb (%edi), %xmm1 193 194 movdqa 32(%esi), %xmm3 195 palignr $2,%xmm2, %xmm3 196 pcmpeqb 16(%edi), %xmm3 197 198 pand %xmm1, %xmm3 199 pmovmskb %xmm3, %edx 200 lea 32(%edi), %edi 201 lea 32(%esi), %esi 202 sub $0xffff, %edx 203 jnz L(exit) 204 lea (%ecx, %edi,1), %eax 205 lea 2(%ecx, %esi,1), %edx 206 POP (%edi) 207 POP (%esi) 208 jmp L(less48bytes) 209 210 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 211 CFI_REMEMBER_STATE 212 .p2align 4 213L(shr_2_gobble): 214 sub $32, %ecx 215 movdqa 16(%esi), %xmm0 216 palignr $2,(%esi), %xmm0 217 pcmpeqb (%edi), %xmm0 218 219 movdqa 32(%esi), %xmm3 220 palignr $2,16(%esi), %xmm3 221 pcmpeqb 16(%edi), %xmm3 222 223L(shr_2_gobble_loop): 224 pand %xmm0, %xmm3 225 sub $32, %ecx 226 pmovmskb %xmm3, %edx 227 movdqa %xmm0, %xmm1 228 229 movdqa 64(%esi), %xmm3 230 palignr $2,48(%esi), %xmm3 231 sbb $0xffff, %edx 232 movdqa 48(%esi), %xmm0 233 palignr $2,32(%esi), %xmm0 234 pcmpeqb 32(%edi), %xmm0 235 lea 32(%esi), %esi 236 pcmpeqb 48(%edi), %xmm3 237 238 lea 32(%edi), %edi 239 jz L(shr_2_gobble_loop) 240 pand %xmm0, %xmm3 241 242 cmp $0, %ecx 243 jge L(shr_2_gobble_next) 244 inc %edx 245 add $32, %ecx 246L(shr_2_gobble_next): 247 test %edx, %edx 248 jnz L(exit) 249 250 pmovmskb %xmm3, %edx 251 movdqa %xmm0, %xmm1 252 lea 32(%edi), %edi 253 lea 32(%esi), %esi 254 sub $0xffff, %edx 255 jnz L(exit) 256 257 lea (%ecx, %edi,1), %eax 258 lea 2(%ecx, %esi,1), %edx 259 POP (%edi) 260 POP (%esi) 261 jmp L(less48bytes) 262 263 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 264 CFI_REMEMBER_STATE 265 .p2align 4 266L(shr_4): 267 cmp $80, %ecx 268 lea -48(%ecx), %ecx 269 mov %edx, %eax 270 jae L(shr_4_gobble) 271 272 movdqa 16(%esi), %xmm1 273 movdqa %xmm1, %xmm2 274 palignr $4,(%esi), %xmm1 275 pcmpeqb (%edi), %xmm1 276 277 movdqa 32(%esi), %xmm3 278 palignr $4,%xmm2, %xmm3 279 pcmpeqb 16(%edi), %xmm3 280 281 pand %xmm1, %xmm3 282 pmovmskb %xmm3, %edx 283 lea 32(%edi), %edi 284 lea 32(%esi), %esi 285 sub $0xffff, %edx 286 jnz L(exit) 287 lea (%ecx, %edi,1), %eax 288 lea 4(%ecx, %esi,1), %edx 289 POP (%edi) 290 POP (%esi) 291 jmp L(less48bytes) 292 293 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 294 CFI_REMEMBER_STATE 295 .p2align 4 296L(shr_4_gobble): 297 sub $32, %ecx 298 movdqa 16(%esi), %xmm0 299 palignr $4,(%esi), %xmm0 300 pcmpeqb (%edi), %xmm0 301 302 movdqa 32(%esi), %xmm3 303 palignr $4,16(%esi), %xmm3 304 pcmpeqb 16(%edi), %xmm3 305 306L(shr_4_gobble_loop): 307 pand %xmm0, %xmm3 308 sub $32, %ecx 309 pmovmskb %xmm3, %edx 310 movdqa %xmm0, %xmm1 311 312 movdqa 64(%esi), %xmm3 313 palignr $4,48(%esi), %xmm3 314 sbb $0xffff, %edx 315 movdqa 48(%esi), %xmm0 316 palignr $4,32(%esi), %xmm0 317 pcmpeqb 32(%edi), %xmm0 318 lea 32(%esi), %esi 319 pcmpeqb 48(%edi), %xmm3 320 321 lea 32(%edi), %edi 322 jz L(shr_4_gobble_loop) 323 pand %xmm0, %xmm3 324 325 cmp $0, %ecx 326 jge L(shr_4_gobble_next) 327 inc %edx 328 add $32, %ecx 329L(shr_4_gobble_next): 330 test %edx, %edx 331 jnz L(exit) 332 333 pmovmskb %xmm3, %edx 334 movdqa %xmm0, %xmm1 335 lea 32(%edi), %edi 336 lea 32(%esi), %esi 337 sub $0xffff, %edx 338 jnz L(exit) 339 340 lea (%ecx, %edi,1), %eax 341 lea 4(%ecx, %esi,1), %edx 342 POP (%edi) 343 POP (%esi) 344 jmp L(less48bytes) 345 346 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 347 CFI_REMEMBER_STATE 348 .p2align 4 349L(shr_6): 350 cmp $80, %ecx 351 lea -48(%ecx), %ecx 352 mov %edx, %eax 353 jae L(shr_6_gobble) 354 355 movdqa 16(%esi), %xmm1 356 movdqa %xmm1, %xmm2 357 palignr $6,(%esi), %xmm1 358 pcmpeqb (%edi), %xmm1 359 360 movdqa 32(%esi), %xmm3 361 palignr $6,%xmm2, %xmm3 362 pcmpeqb 16(%edi), %xmm3 363 364 pand %xmm1, %xmm3 365 pmovmskb %xmm3, %edx 366 lea 32(%edi), %edi 367 lea 32(%esi), %esi 368 sub $0xffff, %edx 369 jnz L(exit) 370 lea (%ecx, %edi,1), %eax 371 lea 6(%ecx, %esi,1), %edx 372 POP (%edi) 373 POP (%esi) 374 jmp L(less48bytes) 375 376 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 377 CFI_REMEMBER_STATE 378 .p2align 4 379L(shr_6_gobble): 380 sub $32, %ecx 381 movdqa 16(%esi), %xmm0 382 palignr $6,(%esi), %xmm0 383 pcmpeqb (%edi), %xmm0 384 385 movdqa 32(%esi), %xmm3 386 palignr $6,16(%esi), %xmm3 387 pcmpeqb 16(%edi), %xmm3 388 389L(shr_6_gobble_loop): 390 pand %xmm0, %xmm3 391 sub $32, %ecx 392 pmovmskb %xmm3, %edx 393 movdqa %xmm0, %xmm1 394 395 movdqa 64(%esi), %xmm3 396 palignr $6,48(%esi), %xmm3 397 sbb $0xffff, %edx 398 movdqa 48(%esi), %xmm0 399 palignr $6,32(%esi), %xmm0 400 pcmpeqb 32(%edi), %xmm0 401 lea 32(%esi), %esi 402 pcmpeqb 48(%edi), %xmm3 403 404 lea 32(%edi), %edi 405 jz L(shr_6_gobble_loop) 406 pand %xmm0, %xmm3 407 408 cmp $0, %ecx 409 jge L(shr_6_gobble_next) 410 inc %edx 411 add $32, %ecx 412L(shr_6_gobble_next): 413 test %edx, %edx 414 jnz L(exit) 415 416 pmovmskb %xmm3, %edx 417 movdqa %xmm0, %xmm1 418 lea 32(%edi), %edi 419 lea 32(%esi), %esi 420 sub $0xffff, %edx 421 jnz L(exit) 422 423 lea (%ecx, %edi,1), %eax 424 lea 6(%ecx, %esi,1), %edx 425 POP (%edi) 426 POP (%esi) 427 jmp L(less48bytes) 428 429 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 430 CFI_REMEMBER_STATE 431 .p2align 4 432L(shr_8): 433 cmp $80, %ecx 434 lea -48(%ecx), %ecx 435 mov %edx, %eax 436 jae L(shr_8_gobble) 437 438 movdqa 16(%esi), %xmm1 439 movdqa %xmm1, %xmm2 440 palignr $8,(%esi), %xmm1 441 pcmpeqb (%edi), %xmm1 442 443 movdqa 32(%esi), %xmm3 444 palignr $8,%xmm2, %xmm3 445 pcmpeqb 16(%edi), %xmm3 446 447 pand %xmm1, %xmm3 448 pmovmskb %xmm3, %edx 449 lea 32(%edi), %edi 450 lea 32(%esi), %esi 451 sub $0xffff, %edx 452 jnz L(exit) 453 lea (%ecx, %edi,1), %eax 454 lea 8(%ecx, %esi,1), %edx 455 POP (%edi) 456 POP (%esi) 457 jmp L(less48bytes) 458 459 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 460 CFI_REMEMBER_STATE 461 .p2align 4 462L(shr_8_gobble): 463 sub $32, %ecx 464 movdqa 16(%esi), %xmm0 465 palignr $8,(%esi), %xmm0 466 pcmpeqb (%edi), %xmm0 467 468 movdqa 32(%esi), %xmm3 469 palignr $8,16(%esi), %xmm3 470 pcmpeqb 16(%edi), %xmm3 471 472L(shr_8_gobble_loop): 473 pand %xmm0, %xmm3 474 sub $32, %ecx 475 pmovmskb %xmm3, %edx 476 movdqa %xmm0, %xmm1 477 478 movdqa 64(%esi), %xmm3 479 palignr $8,48(%esi), %xmm3 480 sbb $0xffff, %edx 481 movdqa 48(%esi), %xmm0 482 palignr $8,32(%esi), %xmm0 483 pcmpeqb 32(%edi), %xmm0 484 lea 32(%esi), %esi 485 pcmpeqb 48(%edi), %xmm3 486 487 lea 32(%edi), %edi 488 jz L(shr_8_gobble_loop) 489 pand %xmm0, %xmm3 490 491 cmp $0, %ecx 492 jge L(shr_8_gobble_next) 493 inc %edx 494 add $32, %ecx 495L(shr_8_gobble_next): 496 test %edx, %edx 497 jnz L(exit) 498 499 pmovmskb %xmm3, %edx 500 movdqa %xmm0, %xmm1 501 lea 32(%edi), %edi 502 lea 32(%esi), %esi 503 sub $0xffff, %edx 504 jnz L(exit) 505 506 lea (%ecx, %edi,1), %eax 507 lea 8(%ecx, %esi,1), %edx 508 POP (%edi) 509 POP (%esi) 510 jmp L(less48bytes) 511 512 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 513 CFI_REMEMBER_STATE 514 .p2align 4 515L(shr_10): 516 cmp $80, %ecx 517 lea -48(%ecx), %ecx 518 mov %edx, %eax 519 jae L(shr_10_gobble) 520 521 movdqa 16(%esi), %xmm1 522 movdqa %xmm1, %xmm2 523 palignr $10, (%esi), %xmm1 524 pcmpeqb (%edi), %xmm1 525 526 movdqa 32(%esi), %xmm3 527 palignr $10,%xmm2, %xmm3 528 pcmpeqb 16(%edi), %xmm3 529 530 pand %xmm1, %xmm3 531 pmovmskb %xmm3, %edx 532 lea 32(%edi), %edi 533 lea 32(%esi), %esi 534 sub $0xffff, %edx 535 jnz L(exit) 536 lea (%ecx, %edi,1), %eax 537 lea 10(%ecx, %esi,1), %edx 538 POP (%edi) 539 POP (%esi) 540 jmp L(less48bytes) 541 542 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 543 CFI_REMEMBER_STATE 544 .p2align 4 545L(shr_10_gobble): 546 sub $32, %ecx 547 movdqa 16(%esi), %xmm0 548 palignr $10, (%esi), %xmm0 549 pcmpeqb (%edi), %xmm0 550 551 movdqa 32(%esi), %xmm3 552 palignr $10, 16(%esi), %xmm3 553 pcmpeqb 16(%edi), %xmm3 554 555L(shr_10_gobble_loop): 556 pand %xmm0, %xmm3 557 sub $32, %ecx 558 pmovmskb %xmm3, %edx 559 movdqa %xmm0, %xmm1 560 561 movdqa 64(%esi), %xmm3 562 palignr $10,48(%esi), %xmm3 563 sbb $0xffff, %edx 564 movdqa 48(%esi), %xmm0 565 palignr $10,32(%esi), %xmm0 566 pcmpeqb 32(%edi), %xmm0 567 lea 32(%esi), %esi 568 pcmpeqb 48(%edi), %xmm3 569 570 lea 32(%edi), %edi 571 jz L(shr_10_gobble_loop) 572 pand %xmm0, %xmm3 573 574 cmp $0, %ecx 575 jge L(shr_10_gobble_next) 576 inc %edx 577 add $32, %ecx 578L(shr_10_gobble_next): 579 test %edx, %edx 580 jnz L(exit) 581 582 pmovmskb %xmm3, %edx 583 movdqa %xmm0, %xmm1 584 lea 32(%edi), %edi 585 lea 32(%esi), %esi 586 sub $0xffff, %edx 587 jnz L(exit) 588 589 lea (%ecx, %edi,1), %eax 590 lea 10(%ecx, %esi,1), %edx 591 POP (%edi) 592 POP (%esi) 593 jmp L(less48bytes) 594 595 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 596 CFI_REMEMBER_STATE 597 .p2align 4 598L(shr_12): 599 cmp $80, %ecx 600 lea -48(%ecx), %ecx 601 mov %edx, %eax 602 jae L(shr_12_gobble) 603 604 movdqa 16(%esi), %xmm1 605 movdqa %xmm1, %xmm2 606 palignr $12, (%esi), %xmm1 607 pcmpeqb (%edi), %xmm1 608 609 movdqa 32(%esi), %xmm3 610 palignr $12, %xmm2, %xmm3 611 pcmpeqb 16(%edi), %xmm3 612 613 pand %xmm1, %xmm3 614 pmovmskb %xmm3, %edx 615 lea 32(%edi), %edi 616 lea 32(%esi), %esi 617 sub $0xffff, %edx 618 jnz L(exit) 619 lea (%ecx, %edi,1), %eax 620 lea 12(%ecx, %esi,1), %edx 621 POP (%edi) 622 POP (%esi) 623 jmp L(less48bytes) 624 625 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 626 CFI_REMEMBER_STATE 627 .p2align 4 628L(shr_12_gobble): 629 sub $32, %ecx 630 movdqa 16(%esi), %xmm0 631 palignr $12, (%esi), %xmm0 632 pcmpeqb (%edi), %xmm0 633 634 movdqa 32(%esi), %xmm3 635 palignr $12, 16(%esi), %xmm3 636 pcmpeqb 16(%edi), %xmm3 637 638L(shr_12_gobble_loop): 639 pand %xmm0, %xmm3 640 sub $32, %ecx 641 pmovmskb %xmm3, %edx 642 movdqa %xmm0, %xmm1 643 644 movdqa 64(%esi), %xmm3 645 palignr $12,48(%esi), %xmm3 646 sbb $0xffff, %edx 647 movdqa 48(%esi), %xmm0 648 palignr $12,32(%esi), %xmm0 649 pcmpeqb 32(%edi), %xmm0 650 lea 32(%esi), %esi 651 pcmpeqb 48(%edi), %xmm3 652 653 lea 32(%edi), %edi 654 jz L(shr_12_gobble_loop) 655 pand %xmm0, %xmm3 656 657 cmp $0, %ecx 658 jge L(shr_12_gobble_next) 659 inc %edx 660 add $32, %ecx 661L(shr_12_gobble_next): 662 test %edx, %edx 663 jnz L(exit) 664 665 pmovmskb %xmm3, %edx 666 movdqa %xmm0, %xmm1 667 lea 32(%edi), %edi 668 lea 32(%esi), %esi 669 sub $0xffff, %edx 670 jnz L(exit) 671 672 lea (%ecx, %edi,1), %eax 673 lea 12(%ecx, %esi,1), %edx 674 POP (%edi) 675 POP (%esi) 676 jmp L(less48bytes) 677 678 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 679 CFI_REMEMBER_STATE 680 .p2align 4 681L(shr_14): 682 cmp $80, %ecx 683 lea -48(%ecx), %ecx 684 mov %edx, %eax 685 jae L(shr_14_gobble) 686 687 movdqa 16(%esi), %xmm1 688 movdqa %xmm1, %xmm2 689 palignr $14, (%esi), %xmm1 690 pcmpeqb (%edi), %xmm1 691 692 movdqa 32(%esi), %xmm3 693 palignr $14, %xmm2, %xmm3 694 pcmpeqb 16(%edi), %xmm3 695 696 pand %xmm1, %xmm3 697 pmovmskb %xmm3, %edx 698 lea 32(%edi), %edi 699 lea 32(%esi), %esi 700 sub $0xffff, %edx 701 jnz L(exit) 702 lea (%ecx, %edi,1), %eax 703 lea 14(%ecx, %esi,1), %edx 704 POP (%edi) 705 POP (%esi) 706 jmp L(less48bytes) 707 708 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 709 CFI_REMEMBER_STATE 710 .p2align 4 711L(shr_14_gobble): 712 sub $32, %ecx 713 movdqa 16(%esi), %xmm0 714 palignr $14, (%esi), %xmm0 715 pcmpeqb (%edi), %xmm0 716 717 movdqa 32(%esi), %xmm3 718 palignr $14, 16(%esi), %xmm3 719 pcmpeqb 16(%edi), %xmm3 720 721L(shr_14_gobble_loop): 722 pand %xmm0, %xmm3 723 sub $32, %ecx 724 pmovmskb %xmm3, %edx 725 movdqa %xmm0, %xmm1 726 727 movdqa 64(%esi), %xmm3 728 palignr $14,48(%esi), %xmm3 729 sbb $0xffff, %edx 730 movdqa 48(%esi), %xmm0 731 palignr $14,32(%esi), %xmm0 732 pcmpeqb 32(%edi), %xmm0 733 lea 32(%esi), %esi 734 pcmpeqb 48(%edi), %xmm3 735 736 lea 32(%edi), %edi 737 jz L(shr_14_gobble_loop) 738 pand %xmm0, %xmm3 739 740 cmp $0, %ecx 741 jge L(shr_14_gobble_next) 742 inc %edx 743 add $32, %ecx 744L(shr_14_gobble_next): 745 test %edx, %edx 746 jnz L(exit) 747 748 pmovmskb %xmm3, %edx 749 movdqa %xmm0, %xmm1 750 lea 32(%edi), %edi 751 lea 32(%esi), %esi 752 sub $0xffff, %edx 753 jnz L(exit) 754 755 lea (%ecx, %edi,1), %eax 756 lea 14(%ecx, %esi,1), %edx 757 POP (%edi) 758 POP (%esi) 759 jmp L(less48bytes) 760 761 CFI_RESTORE_STATE_AND_DEF_CFA(esp, 16) 762 CFI_REMEMBER_STATE 763 .p2align 4 764L(exit): 765 pmovmskb %xmm1, %ebx 766 sub $0xffff, %ebx 767 jz L(first16bytes) 768 lea -16(%esi), %esi 769 lea -16(%edi), %edi 770 mov %ebx, %edx 771 772L(first16bytes): 773 add %eax, %esi 774L(less16bytes): 775 test %dl, %dl 776 jz L(next_four_words) 777 test $15, %dl 778 jz L(second_two_words) 779 test $3, %dl 780 jz L(second_word) 781 movzwl -16(%edi), %eax 782 movzwl -16(%esi), %ebx 783 subl %ebx, %eax 784 RETURN 785 786 .p2align 4 787L(second_word): 788 movzwl -14(%edi), %eax 789 movzwl -14(%esi), %ebx 790 subl %ebx, %eax 791 RETURN 792 793 .p2align 4 794L(second_two_words): 795 test $63, %dl 796 jz L(fourth_word) 797 movzwl -12(%edi), %eax 798 movzwl -12(%esi), %ebx 799 subl %ebx, %eax 800 RETURN 801 802 .p2align 4 803L(fourth_word): 804 movzwl -10(%edi), %eax 805 movzwl -10(%esi), %ebx 806 subl %ebx, %eax 807 RETURN 808 809 .p2align 4 810L(next_four_words): 811 test $15, %dh 812 jz L(fourth_two_words) 813 test $3, %dh 814 jz L(sixth_word) 815 movzwl -8(%edi), %eax 816 movzwl -8(%esi), %ebx 817 subl %ebx, %eax 818 RETURN 819 820 .p2align 4 821L(sixth_word): 822 movzwl -6(%edi), %eax 823 movzwl -6(%esi), %ebx 824 subl %ebx, %eax 825 RETURN 826 827 .p2align 4 828L(fourth_two_words): 829 test $63, %dh 830 jz L(eighth_word) 831 movzwl -4(%edi), %eax 832 movzwl -4(%esi), %ebx 833 subl %ebx, %eax 834 RETURN 835 836 .p2align 4 837L(eighth_word): 838 movzwl -2(%edi), %eax 839 movzwl -2(%esi), %ebx 840 subl %ebx, %eax 841 RETURN 842 843 844 CFI_PUSH (%ebx) 845 846 .p2align 4 847L(more8bytes): 848 cmp $16, %ecx 849 jae L(more16bytes) 850 cmp $8, %ecx 851 je L(8bytes) 852 cmp $10, %ecx 853 je L(10bytes) 854 cmp $12, %ecx 855 je L(12bytes) 856 jmp L(14bytes) 857 858 .p2align 4 859L(more16bytes): 860 cmp $24, %ecx 861 jae L(more24bytes) 862 cmp $16, %ecx 863 je L(16bytes) 864 cmp $18, %ecx 865 je L(18bytes) 866 cmp $20, %ecx 867 je L(20bytes) 868 jmp L(22bytes) 869 870 .p2align 4 871L(more24bytes): 872 cmp $32, %ecx 873 jae L(more32bytes) 874 cmp $24, %ecx 875 je L(24bytes) 876 cmp $26, %ecx 877 je L(26bytes) 878 cmp $28, %ecx 879 je L(28bytes) 880 jmp L(30bytes) 881 882 .p2align 4 883L(more32bytes): 884 cmp $40, %ecx 885 jae L(more40bytes) 886 cmp $32, %ecx 887 je L(32bytes) 888 cmp $34, %ecx 889 je L(34bytes) 890 cmp $36, %ecx 891 je L(36bytes) 892 jmp L(38bytes) 893 894 .p2align 4 895L(less48bytes): 896 cmp $8, %ecx 897 jae L(more8bytes) 898 cmp $2, %ecx 899 je L(2bytes) 900 cmp $4, %ecx 901 je L(4bytes) 902 jmp L(6bytes) 903 904 .p2align 4 905L(more40bytes): 906 cmp $40, %ecx 907 je L(40bytes) 908 cmp $42, %ecx 909 je L(42bytes) 910 cmp $44, %ecx 911 je L(44bytes) 912 jmp L(46bytes) 913 914 .p2align 4 915L(46bytes): 916 movzwl -46(%eax), %ecx 917 movzwl -46(%edx), %ebx 918 subl %ebx, %ecx 919 jne L(memcmp16_exit) 920L(44bytes): 921 movzwl -44(%eax), %ecx 922 movzwl -44(%edx), %ebx 923 subl %ebx, %ecx 924 jne L(memcmp16_exit) 925L(42bytes): 926 movzwl -42(%eax), %ecx 927 movzwl -42(%edx), %ebx 928 subl %ebx, %ecx 929 jne L(memcmp16_exit) 930L(40bytes): 931 movzwl -40(%eax), %ecx 932 movzwl -40(%edx), %ebx 933 subl %ebx, %ecx 934 jne L(memcmp16_exit) 935L(38bytes): 936 movzwl -38(%eax), %ecx 937 movzwl -38(%edx), %ebx 938 subl %ebx, %ecx 939 jne L(memcmp16_exit) 940L(36bytes): 941 movzwl -36(%eax), %ecx 942 movzwl -36(%edx), %ebx 943 subl %ebx, %ecx 944 jne L(memcmp16_exit) 945L(34bytes): 946 movzwl -34(%eax), %ecx 947 movzwl -34(%edx), %ebx 948 subl %ebx, %ecx 949 jne L(memcmp16_exit) 950L(32bytes): 951 movzwl -32(%eax), %ecx 952 movzwl -32(%edx), %ebx 953 subl %ebx, %ecx 954 jne L(memcmp16_exit) 955L(30bytes): 956 movzwl -30(%eax), %ecx 957 movzwl -30(%edx), %ebx 958 subl %ebx, %ecx 959 jne L(memcmp16_exit) 960L(28bytes): 961 movzwl -28(%eax), %ecx 962 movzwl -28(%edx), %ebx 963 subl %ebx, %ecx 964 jne L(memcmp16_exit) 965L(26bytes): 966 movzwl -26(%eax), %ecx 967 movzwl -26(%edx), %ebx 968 subl %ebx, %ecx 969 jne L(memcmp16_exit) 970L(24bytes): 971 movzwl -24(%eax), %ecx 972 movzwl -24(%edx), %ebx 973 subl %ebx, %ecx 974 jne L(memcmp16_exit) 975L(22bytes): 976 movzwl -22(%eax), %ecx 977 movzwl -22(%edx), %ebx 978 subl %ebx, %ecx 979 jne L(memcmp16_exit) 980L(20bytes): 981 movzwl -20(%eax), %ecx 982 movzwl -20(%edx), %ebx 983 subl %ebx, %ecx 984 jne L(memcmp16_exit) 985L(18bytes): 986 movzwl -18(%eax), %ecx 987 movzwl -18(%edx), %ebx 988 subl %ebx, %ecx 989 jne L(memcmp16_exit) 990L(16bytes): 991 movzwl -16(%eax), %ecx 992 movzwl -16(%edx), %ebx 993 subl %ebx, %ecx 994 jne L(memcmp16_exit) 995L(14bytes): 996 movzwl -14(%eax), %ecx 997 movzwl -14(%edx), %ebx 998 subl %ebx, %ecx 999 jne L(memcmp16_exit) 1000L(12bytes): 1001 movzwl -12(%eax), %ecx 1002 movzwl -12(%edx), %ebx 1003 subl %ebx, %ecx 1004 jne L(memcmp16_exit) 1005L(10bytes): 1006 movzwl -10(%eax), %ecx 1007 movzwl -10(%edx), %ebx 1008 subl %ebx, %ecx 1009 jne L(memcmp16_exit) 1010L(8bytes): 1011 movzwl -8(%eax), %ecx 1012 movzwl -8(%edx), %ebx 1013 subl %ebx, %ecx 1014 jne L(memcmp16_exit) 1015L(6bytes): 1016 movzwl -6(%eax), %ecx 1017 movzwl -6(%edx), %ebx 1018 subl %ebx, %ecx 1019 jne L(memcmp16_exit) 1020L(4bytes): 1021 movzwl -4(%eax), %ecx 1022 movzwl -4(%edx), %ebx 1023 subl %ebx, %ecx 1024 jne L(memcmp16_exit) 1025L(2bytes): 1026 movzwl -2(%eax), %eax 1027 movzwl -2(%edx), %ebx 1028 subl %ebx, %eax 1029 POP (%ebx) 1030 ret 1031 CFI_PUSH (%ebx) 1032 1033 .p2align 4 1034L(memcmp16_exit): 1035 POP (%ebx) 1036 mov %ecx, %eax 1037 ret 1038END_FUNCTION MEMCMP 1039