/* * Copyright (C) 2002 Paul Mackerras, IBM Corp. * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include #include #include #ifdef __BIG_ENDIAN__ #define sLd sld /* Shift towards low-numbered address. */ #define sHd srd /* Shift towards high-numbered address. */ #else #define sLd srd /* Shift towards low-numbered address. */ #define sHd sld /* Shift towards high-numbered address. */ #endif .align 7 _GLOBAL_TOC(__copy_tofrom_user) BEGIN_FTR_SECTION nop FTR_SECTION_ELSE b __copy_tofrom_user_power7 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_VMX_COPY) _GLOBAL(__copy_tofrom_user_base) /* first check for a whole page copy on a page boundary */ cmpldi cr1,r5,16 cmpdi cr6,r5,4096 or r0,r3,r4 neg r6,r3 /* LS 3 bits = # bytes to 8-byte dest bdry */ andi. r0,r0,4095 std r3,-24(r1) crand cr0*4+2,cr0*4+2,cr6*4+2 std r4,-16(r1) std r5,-8(r1) dcbt 0,r4 beq .Lcopy_page_4K andi. r6,r6,7 PPC_MTOCRF(0x01,r5) blt cr1,.Lshort_copy /* Below we want to nop out the bne if we're on a CPU that has the * CPU_FTR_UNALIGNED_LD_STD bit set and the CPU_FTR_CP_USE_DCBTZ bit * cleared. * At the time of writing the only CPU that has this combination of bits * set is Power6. */ BEGIN_FTR_SECTION nop FTR_SECTION_ELSE bne .Ldst_unaligned ALT_FTR_SECTION_END(CPU_FTR_UNALIGNED_LD_STD | CPU_FTR_CP_USE_DCBTZ, \ CPU_FTR_UNALIGNED_LD_STD) .Ldst_aligned: addi r3,r3,-16 BEGIN_FTR_SECTION andi. r0,r4,7 bne .Lsrc_unaligned END_FTR_SECTION_IFCLR(CPU_FTR_UNALIGNED_LD_STD) blt cr1,.Ldo_tail /* if < 16 bytes to copy */ srdi r0,r5,5 cmpdi cr1,r0,0 20: ld r7,0(r4) 220: ld r6,8(r4) addi r4,r4,16 mtctr r0 andi. r0,r5,0x10 beq 22f addi r3,r3,16 addi r4,r4,-16 mr r9,r7 mr r8,r6 beq cr1,72f 21: ld r7,16(r4) 221: ld r6,24(r4) addi r4,r4,32 70: std r9,0(r3) 270: std r8,8(r3) 22: ld r9,0(r4) 222: ld r8,8(r4) 71: std r7,16(r3) 271: std r6,24(r3) addi r3,r3,32 bdnz 21b 72: std r9,0(r3) 272: std r8,8(r3) andi. r5,r5,0xf beq+ 3f addi r4,r4,16 .Ldo_tail: addi r3,r3,16 bf cr7*4+0,246f 244: ld r9,0(r4) addi r4,r4,8 245: std r9,0(r3) addi r3,r3,8 246: bf cr7*4+1,1f 23: lwz r9,0(r4) addi r4,r4,4 73: stw r9,0(r3) addi r3,r3,4 1: bf cr7*4+2,2f 44: lhz r9,0(r4) addi r4,r4,2 74: sth r9,0(r3) addi r3,r3,2 2: bf cr7*4+3,3f 45: lbz r9,0(r4) 75: stb r9,0(r3) 3: li r3,0 blr .Lsrc_unaligned: srdi r6,r5,3 addi r5,r5,-16 subf r4,r0,r4 srdi r7,r5,4 sldi r10,r0,3 cmpldi cr6,r6,3 andi. r5,r5,7 mtctr r7 subfic r11,r10,64 add r5,r5,r0 bt cr7*4+0,28f 24: ld r9,0(r4) /* 3+2n loads, 2+2n stores */ 25: ld r0,8(r4) sLd r6,r9,r10 26: ldu r9,16(r4) sHd r7,r0,r11 sLd r8,r0,r10 or r7,r7,r6 blt cr6,79f 27: ld r0,8(r4) b 2f 28: ld r0,0(r4) /* 4+2n loads, 3+2n stores */ 29: ldu r9,8(r4) sLd r8,r0,r10 addi r3,r3,-8 blt cr6,5f 30: ld r0,8(r4) sHd r12,r9,r11 sLd r6,r9,r10 31: ldu r9,16(r4) or r12,r8,r12 sHd r7,r0,r11 sLd r8,r0,r10 addi r3,r3,16 beq cr6,78f 1: or r7,r7,r6 32: ld r0,8(r4) 76: std r12,8(r3) 2: sHd r12,r9,r11 sLd r6,r9,r10 33: ldu r9,16(r4) or r12,r8,r12 77: stdu r7,16(r3) sHd r7,r0,r11 sLd r8,r0,r10 bdnz 1b 78: std r12,8(r3) or r7,r7,r6 79: std r7,16(r3) 5: sHd r12,r9,r11 or r12,r8,r12 80: std r12,24(r3) bne 6f li r3,0 blr 6: cmpwi cr1,r5,8 addi r3,r3,32 sLd r9,r9,r10 ble cr1,7f 34: ld r0,8(r4) sHd r7,r0,r11 or r9,r7,r9 7: bf cr7*4+1,1f #ifdef __BIG_ENDIAN__ rotldi r9,r9,32 #endif 94: stw r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,32 #endif addi r3,r3,4 1: bf cr7*4+2,2f #ifdef __BIG_ENDIAN__ rotldi r9,r9,16 #endif 95: sth r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,16 #endif addi r3,r3,2 2: bf cr7*4+3,3f #ifdef __BIG_ENDIAN__ rotldi r9,r9,8 #endif 96: stb r9,0(r3) #ifdef __LITTLE_ENDIAN__ rotrdi r9,r9,8 #endif 3: li r3,0 blr .Ldst_unaligned: PPC_MTOCRF(0x01,r6) /* put #bytes to 8B bdry into cr7 */ subf r5,r6,r5 li r7,0 cmpldi cr1,r5,16 bf cr7*4+3,1f 35: lbz r0,0(r4) 81: stb r0,0(r3) addi r7,r7,1 1: bf cr7*4+2,2f 36: lhzx r0,r7,r4 82: sthx r0,r7,r3 addi r7,r7,2 2: bf cr7*4+1,3f 37: lwzx r0,r7,r4 83: stwx r0,r7,r3 3: PPC_MTOCRF(0x01,r5) add r4,r6,r4 add r3,r6,r3 b .Ldst_aligned .Lshort_copy: bf cr7*4+0,1f 38: lwz r0,0(r4) 39: lwz r9,4(r4) addi r4,r4,8 84: stw r0,0(r3) 85: stw r9,4(r3) addi r3,r3,8 1: bf cr7*4+1,2f 40: lwz r0,0(r4) addi r4,r4,4 86: stw r0,0(r3) addi r3,r3,4 2: bf cr7*4+2,3f 41: lhz r0,0(r4) addi r4,r4,2 87: sth r0,0(r3) addi r3,r3,2 3: bf cr7*4+3,4f 42: lbz r0,0(r4) 88: stb r0,0(r3) 4: li r3,0 blr /* * exception handlers follow * we have to return the number of bytes not copied * for an exception on a load, we set the rest of the destination to 0 */ 136: 137: add r3,r3,r7 b 1f 130: 131: addi r3,r3,8 120: 320: 122: 322: 124: 125: 126: 127: 128: 129: 133: addi r3,r3,8 132: addi r3,r3,8 121: 321: 344: 134: 135: 138: 139: 140: 141: 142: 123: 144: 145: /* * here we have had a fault on a load and r3 points to the first * unmodified byte of the destination */ 1: ld r6,-24(r1) ld r4,-16(r1) ld r5,-8(r1) subf r6,r6,r3 add r4,r4,r6 subf r5,r6,r5 /* #bytes left to go */ /* * first see if we can copy any more bytes before hitting another exception */ mtctr r5 43: lbz r0,0(r4) addi r4,r4,1 89: stb r0,0(r3) addi r3,r3,1 bdnz 43b li r3,0 /* huh? all copied successfully this time? */ blr /* * here we have trapped again, need to clear ctr bytes starting at r3 */ 143: mfctr r5 li r0,0 mr r4,r3 mr r3,r5 /* return the number of bytes not copied */ 1: andi. r9,r4,7 beq 3f 90: stb r0,0(r4) addic. r5,r5,-1 addi r4,r4,1 bne 1b blr 3: cmpldi cr1,r5,8 srdi r9,r5,3 andi. r5,r5,7 blt cr1,93f mtctr r9 91: std r0,0(r4) addi r4,r4,8 bdnz 91b 93: beqlr mtctr r5 92: stb r0,0(r4) addi r4,r4,1 bdnz 92b blr /* * exception handlers for stores: we just need to work * out how many bytes weren't copied */ 182: 183: add r3,r3,r7 b 1f 371: 180: addi r3,r3,8 171: 177: 179: addi r3,r3,8 370: 372: 176: 178: addi r3,r3,4 185: addi r3,r3,4 170: 172: 345: 173: 174: 175: 181: 184: 186: 187: 188: 189: 194: 195: 196: 1: ld r6,-24(r1) ld r5,-8(r1) add r6,r6,r5 subf r3,r3,r6 /* #bytes not copied */ 190: 191: 192: blr /* #bytes not copied in r3 */ EX_TABLE(20b,120b) EX_TABLE(220b,320b) EX_TABLE(21b,121b) EX_TABLE(221b,321b) EX_TABLE(70b,170b) EX_TABLE(270b,370b) EX_TABLE(22b,122b) EX_TABLE(222b,322b) EX_TABLE(71b,171b) EX_TABLE(271b,371b) EX_TABLE(72b,172b) EX_TABLE(272b,372b) EX_TABLE(244b,344b) EX_TABLE(245b,345b) EX_TABLE(23b,123b) EX_TABLE(73b,173b) EX_TABLE(44b,144b) EX_TABLE(74b,174b) EX_TABLE(45b,145b) EX_TABLE(75b,175b) EX_TABLE(24b,124b) EX_TABLE(25b,125b) EX_TABLE(26b,126b) EX_TABLE(27b,127b) EX_TABLE(28b,128b) EX_TABLE(29b,129b) EX_TABLE(30b,130b) EX_TABLE(31b,131b) EX_TABLE(32b,132b) EX_TABLE(76b,176b) EX_TABLE(33b,133b) EX_TABLE(77b,177b) EX_TABLE(78b,178b) EX_TABLE(79b,179b) EX_TABLE(80b,180b) EX_TABLE(34b,134b) EX_TABLE(94b,194b) EX_TABLE(95b,195b) EX_TABLE(96b,196b) EX_TABLE(35b,135b) EX_TABLE(81b,181b) EX_TABLE(36b,136b) EX_TABLE(82b,182b) EX_TABLE(37b,137b) EX_TABLE(83b,183b) EX_TABLE(38b,138b) EX_TABLE(39b,139b) EX_TABLE(84b,184b) EX_TABLE(85b,185b) EX_TABLE(40b,140b) EX_TABLE(86b,186b) EX_TABLE(41b,141b) EX_TABLE(87b,187b) EX_TABLE(42b,142b) EX_TABLE(88b,188b) EX_TABLE(43b,143b) EX_TABLE(89b,189b) EX_TABLE(90b,190b) EX_TABLE(91b,191b) EX_TABLE(92b,192b) /* * Routine to copy a whole page of data, optimized for POWER4. * On POWER4 it is more than 50% faster than the simple loop * above (following the .Ldst_aligned label). */ .Lcopy_page_4K: std r31,-32(1) std r30,-40(1) std r29,-48(1) std r28,-56(1) std r27,-64(1) std r26,-72(1) std r25,-80(1) std r24,-88(1) std r23,-96(1) std r22,-104(1) std r21,-112(1) std r20,-120(1) li r5,4096/32 - 1 addi r3,r3,-8 li r0,5 0: addi r5,r5,-24 mtctr r0 20: ld r22,640(4) 21: ld r21,512(4) 22: ld r20,384(4) 23: ld r11,256(4) 24: ld r9,128(4) 25: ld r7,0(4) 26: ld r25,648(4) 27: ld r24,520(4) 28: ld r23,392(4) 29: ld r10,264(4) 30: ld r8,136(4) 31: ldu r6,8(4) cmpwi r5,24 1: 32: std r22,648(3) 33: std r21,520(3) 34: std r20,392(3) 35: std r11,264(3) 36: std r9,136(3) 37: std r7,8(3) 38: ld r28,648(4) 39: ld r27,520(4) 40: ld r26,392(4) 41: ld r31,264(4) 42: ld r30,136(4) 43: ld r29,8(4) 44: std r25,656(3) 45: std r24,528(3) 46: std r23,400(3) 47: std r10,272(3) 48: std r8,144(3) 49: std r6,16(3) 50: ld r22,656(4) 51: ld r21,528(4) 52: ld r20,400(4) 53: ld r11,272(4) 54: ld r9,144(4) 55: ld r7,16(4) 56: std r28,664(3) 57: std r27,536(3) 58: std r26,408(3) 59: std r31,280(3) 60: std r30,152(3) 61: stdu r29,24(3) 62: ld r25,664(4) 63: ld r24,536(4) 64: ld r23,408(4) 65: ld r10,280(4) 66: ld r8,152(4) 67: ldu r6,24(4) bdnz 1b 68: std r22,648(3) 69: std r21,520(3) 70: std r20,392(3) 71: std r11,264(3) 72: std r9,136(3) 73: std r7,8(3) 74: addi r4,r4,640 75: addi r3,r3,648 bge 0b mtctr r5 76: ld r7,0(4) 77: ld r8,8(4) 78: ldu r9,16(4) 3: 79: ld r10,8(4) 80: std r7,8(3) 81: ld r7,16(4) 82: std r8,16(3) 83: ld r8,24(4) 84: std r9,24(3) 85: ldu r9,32(4) 86: stdu r10,32(3) bdnz 3b 4: 87: ld r10,8(4) 88: std r7,8(3) 89: std r8,16(3) 90: std r9,24(3) 91: std r10,32(3) 9: ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) ld r23,-96(1) ld r24,-88(1) ld r25,-80(1) ld r26,-72(1) ld r27,-64(1) ld r28,-56(1) ld r29,-48(1) ld r30,-40(1) ld r31,-32(1) li r3,0 blr /* * on an exception, reset to the beginning and jump back into the * standard __copy_tofrom_user */ 100: ld r20,-120(1) ld r21,-112(1) ld r22,-104(1) ld r23,-96(1) ld r24,-88(1) ld r25,-80(1) ld r26,-72(1) ld r27,-64(1) ld r28,-56(1) ld r29,-48(1) ld r30,-40(1) ld r31,-32(1) ld r3,-24(r1) ld r4,-16(r1) li r5,4096 b .Ldst_aligned EX_TABLE(20b,100b) EX_TABLE(21b,100b) EX_TABLE(22b,100b) EX_TABLE(23b,100b) EX_TABLE(24b,100b) EX_TABLE(25b,100b) EX_TABLE(26b,100b) EX_TABLE(27b,100b) EX_TABLE(28b,100b) EX_TABLE(29b,100b) EX_TABLE(30b,100b) EX_TABLE(31b,100b) EX_TABLE(32b,100b) EX_TABLE(33b,100b) EX_TABLE(34b,100b) EX_TABLE(35b,100b) EX_TABLE(36b,100b) EX_TABLE(37b,100b) EX_TABLE(38b,100b) EX_TABLE(39b,100b) EX_TABLE(40b,100b) EX_TABLE(41b,100b) EX_TABLE(42b,100b) EX_TABLE(43b,100b) EX_TABLE(44b,100b) EX_TABLE(45b,100b) EX_TABLE(46b,100b) EX_TABLE(47b,100b) EX_TABLE(48b,100b) EX_TABLE(49b,100b) EX_TABLE(50b,100b) EX_TABLE(51b,100b) EX_TABLE(52b,100b) EX_TABLE(53b,100b) EX_TABLE(54b,100b) EX_TABLE(55b,100b) EX_TABLE(56b,100b) EX_TABLE(57b,100b) EX_TABLE(58b,100b) EX_TABLE(59b,100b) EX_TABLE(60b,100b) EX_TABLE(61b,100b) EX_TABLE(62b,100b) EX_TABLE(63b,100b) EX_TABLE(64b,100b) EX_TABLE(65b,100b) EX_TABLE(66b,100b) EX_TABLE(67b,100b) EX_TABLE(68b,100b) EX_TABLE(69b,100b) EX_TABLE(70b,100b) EX_TABLE(71b,100b) EX_TABLE(72b,100b) EX_TABLE(73b,100b) EX_TABLE(74b,100b) EX_TABLE(75b,100b) EX_TABLE(76b,100b) EX_TABLE(77b,100b) EX_TABLE(78b,100b) EX_TABLE(79b,100b) EX_TABLE(80b,100b) EX_TABLE(81b,100b) EX_TABLE(82b,100b) EX_TABLE(83b,100b) EX_TABLE(84b,100b) EX_TABLE(85b,100b) EX_TABLE(86b,100b) EX_TABLE(87b,100b) EX_TABLE(88b,100b) EX_TABLE(89b,100b) EX_TABLE(90b,100b) EX_TABLE(91b,100b) EXPORT_SYMBOL(__copy_tofrom_user)