summaryrefslogtreecommitdiffstats
path: root/arch/arm/include/asm/assembler.h
blob: 5db0f692eec632961ddbe6e0d35d2147a69d4284 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
/* SPDX-License-Identifier: GPL-2.0-only */
/* SPDX-FileCopyrightText: 1996-2000 Russell King */

/*
 *  arch/arm/include/asm/assembler.h
 *
 *  This file contains arm architecture specific defines
 *  for the different processors.
 *
 *  Do not include any C declarations in this file - it is included by
 *  assembler source.
 */
#ifndef __ASSEMBLY__
#error "Only include this from assembly code"
#endif

#include <asm/ptrace.h>

/*
 * Endian independent macros for shifting bytes within registers.
 */
#ifndef __ARMEB__
#define pull            lsr
#define push            lsl
#define get_byte_0      lsl #0
#define get_byte_1	lsr #8
#define get_byte_2	lsr #16
#define get_byte_3	lsr #24
#define put_byte_0      lsl #0
#define put_byte_1	lsl #8
#define put_byte_2	lsl #16
#define put_byte_3	lsl #24
#else
#define pull            lsl
#define push            lsr
#define get_byte_0	lsr #24
#define get_byte_1	lsr #16
#define get_byte_2	lsr #8
#define get_byte_3      lsl #0
#define put_byte_0	lsl #24
#define put_byte_1	lsl #16
#define put_byte_2	lsl #8
#define put_byte_3      lsl #0
#endif

/*
 * Data preload for architectures that support it
 */
#if __LINUX_ARM_ARCH__ >= 5
#define PLD(code...)	code
#else
#define PLD(code...)
#endif

/*
 * This can be used to enable code to cacheline align the destination
 * pointer when bulk writing to memory.  Experiments on StrongARM and
 * XScale didn't show this a worthwhile thing to do when the cache is not
 * set to write-allocate (this would need further testing on XScale when WA
 * is used).
 *
 * On Feroceon there is much to gain however, regardless of cache mode.
 */
#ifdef CONFIG_CPU_FEROCEON
#define CALGN(code...) code
#else
#define CALGN(code...)
#endif

/*
 * Enable and disable interrupts
 */
#if __LINUX_ARM_ARCH__ >= 6
	.macro	disable_irq
	cpsid	i
	.endm

	.macro	enable_irq
	cpsie	i
	.endm
#else
	.macro	disable_irq
	msr	cpsr_c, #PSR_I_BIT | SVC_MODE
	.endm

	.macro	enable_irq
	msr	cpsr_c, #SVC_MODE
	.endm
#endif

/*
 * Save the current IRQ state and disable IRQs.  Note that this macro
 * assumes FIQs are enabled, and that the processor is in SVC mode.
 */
	.macro	save_and_disable_irqs, oldcpsr
	mrs	\oldcpsr, cpsr
	disable_irq
	.endm

/*
 * Restore interrupt state previously stored in a register.  We don't
 * guarantee that this will preserve the flags.
 */
	.macro	restore_irqs, oldcpsr
	msr	cpsr_c, \oldcpsr
	.endm

#define USER(x...)				\
9999:	x;					\
	.section __ex_table,"a";		\
	.align	3;				\
	.long	9999b,9001f;			\
	.previous


/*
 * Select code when configured for BE.
 */
#ifdef CONFIG_CPU_BIG_ENDIAN
#define CPU_BE(code...) code
#else
#define CPU_BE(code...)
#endif

/*
 * Select code when configured for LE.
 */
#ifdef CONFIG_CPU_BIG_ENDIAN
#define CPU_LE(code...)
#else
#define CPU_LE(code...) code
#endif

#ifdef CONFIG_CPU_64
/*
 * Pseudo-ops for PC-relative adr/ldr/str <reg>, <symbol> where
 * <symbol> is within the range +/- 4 GB of the PC.
 */
	/*
	 * @dst: destination register (64 bit wide)
	 * @sym: name of the symbol
	 */
	.macro	adr_l, dst, sym
	adrp	\dst, \sym
	add	\dst, \dst, :lo12:\sym
	.endm

	/*
	 * @dst: destination register (32 or 64 bit wide)
	 * @sym: name of the symbol
	 * @tmp: optional 64-bit scratch register to be used if <dst> is a
	 *       32-bit wide register, in which case it cannot be used to hold
	 *       the address
	 */
	.macro	ldr_l, dst, sym, tmp=
	.ifb	\tmp
	adrp	\dst, \sym
	ldr	\dst, [\dst, :lo12:\sym]
	.else
	adrp	\tmp, \sym
	ldr	\dst, [\tmp, :lo12:\sym]
	.endif
	.endm

	/*
	 * @src: source register (32 or 64 bit wide)
	 * @sym: name of the symbol
	 * @tmp: mandatory 64-bit scratch register to calculate the address
	 *       while <src> needs to be preserved.
	 */
	.macro	str_l, src, sym, tmp
	adrp	\tmp, \sym
	str	\src, [\tmp, :lo12:\sym]
	.endm

#else

	.macro		__adldst_l, op, reg, sym, tmp, c
	.if		__LINUX_ARM_ARCH__ < 7
	ldr\c		\tmp, .La\@
	.subsection	1
	.align		2
.La\@:	.long		\sym - .Lpc\@
	.previous
	.else
	.ifnb		\c
 THUMB(	ittt		\c			)
	.endif
	movw\c		\tmp, #:lower16:\sym - .Lpc\@
	movt\c		\tmp, #:upper16:\sym - .Lpc\@
	.endif

#ifndef CONFIG_THUMB2_BAREBOX
	.set		.Lpc\@, . + 8			// PC bias
	.ifc		\op, add
	add\c		\reg, \tmp, pc
	.else
	\op\c		\reg, [pc, \tmp]
	.endif
#else
.Lb\@:	add\c		\tmp, \tmp, pc
	/*
	 * In Thumb-2 builds, the PC bias depends on whether we are currently
	 * emitting into a .arm or a .thumb section. The size of the add opcode
	 * above will be 2 bytes when emitting in Thumb mode and 4 bytes when
	 * emitting in ARM mode, so let's use this to account for the bias.
	 */
	.set		.Lpc\@, . + (. - .Lb\@)

	.ifnc		\op, add
	\op\c		\reg, [\tmp]
	.endif
#endif
	.endm

	/*
	 * mov_l - move a constant value or [relocated] address into a register
	 */
	.macro		mov_l, dst:req, imm:req, cond
	.if		__LINUX_ARM_ARCH__ < 7
	ldr\cond	\dst, =\imm
	.else
	movw\cond	\dst, #:lower16:\imm
	movt\cond	\dst, #:upper16:\imm
	.endif
	.endm

	/*
	 * adr_l - adr pseudo-op with unlimited range
	 *
	 * @dst: destination register
	 * @sym: name of the symbol
	 * @cond: conditional opcode suffix
	 */
	.macro		adr_l, dst:req, sym:req, cond
	__adldst_l	add, \dst, \sym, \dst, \cond
	.endm

	/*
	 * ldr_l - ldr <literal> pseudo-op with unlimited range
	 *
	 * @dst: destination register
	 * @sym: name of the symbol
	 * @cond: conditional opcode suffix
	 */
	.macro		ldr_l, dst:req, sym:req, cond
	__adldst_l	ldr, \dst, \sym, \dst, \cond
	.endm

	/*
	 * str_l - str <literal> pseudo-op with unlimited range
	 *
	 * @src: source register
	 * @sym: name of the symbol
	 * @tmp: mandatory scratch register
	 * @cond: conditional opcode suffix
	 */
	.macro		str_l, src:req, sym:req, tmp:req, cond
	__adldst_l	str, \src, \sym, \tmp, \cond
	.endm

	.macro		__ldst_va, op, reg, tmp, sym, cond, offset
#if __LINUX_ARM_ARCH__ >= 7 || \
    (defined(MODULE) && defined(CONFIG_ARM_MODULE_PLTS))
	mov_l		\tmp, \sym, \cond
#else
	/*
	 * Avoid a literal load, by emitting a sequence of ADD/LDR instructions
	 * with the appropriate relocations. The combined sequence has a range
	 * of -/+ 256 MiB, which should be sufficient for the core kernel and
	 * for modules loaded into the module region.
	 */
	.globl		\sym
	.reloc		.L0_\@, R_ARM_ALU_PC_G0_NC, \sym
	.reloc		.L1_\@, R_ARM_ALU_PC_G1_NC, \sym
	.reloc		.L2_\@, R_ARM_LDR_PC_G2, \sym
.L0_\@: sub\cond	\tmp, pc, #8 - \offset
.L1_\@: sub\cond	\tmp, \tmp, #4 - \offset
.L2_\@:
#endif
	\op\cond	\reg, [\tmp, #\offset]
	.endm

	/*
	 * ldr_va - load a 32-bit word from the virtual address of \sym
	 */
	.macro		ldr_va, rd:req, sym:req, cond, tmp, offset=0
	.ifnb		\tmp
	__ldst_va	ldr, \rd, \tmp, \sym, \cond, \offset
	.else
	__ldst_va	ldr, \rd, \rd, \sym, \cond, \offset
	.endif
	.endm

	/*
	 * str_va - store a 32-bit word to the virtual address of \sym
	 */
	.macro		str_va, rn:req, sym:req, tmp:req, cond
	__ldst_va	str, \rn, \tmp, \sym, \cond, 0
	.endm

	/*
	 * ldr_this_cpu - Load a 32-bit word from the per-CPU variable 'sym'
	 *		  into register 'rd', which may be the stack pointer,
	 *		  using 't1' and 't2' as general temp registers. These
	 *		  are permitted to overlap with 'rd' if != sp
	 */
	.macro		ldr_this_cpu, rd:req, sym:req, t1:req, t2:req
	ldr_va		\rd, \sym, tmp=\t1
	.endm

	/*
	 * rev_l - byte-swap a 32-bit value
	 *
	 * @val: source/destination register
	 * @tmp: scratch register
	 */
	.macro		rev_l, val:req, tmp:req
	.if		__LINUX_ARM_ARCH__ < 6
	eor		\tmp, \val, \val, ror #16
	bic		\tmp, \tmp, #0x00ff0000
	mov		\val, \val, ror #8
	eor		\val, \val, \tmp, lsr #8
	.else
	rev		\val, \val
	.endif
	.endm

	/*
	 * bl_r - branch and link to register
	 *
	 * @dst: target to branch to
	 * @c: conditional opcode suffix
	 */
	.macro		bl_r, dst:req, c
	.if		__LINUX_ARM_ARCH__ < 6
	mov\c		lr, pc
	mov\c		pc, \dst
	.else
	blx\c		\dst
	.endif
	.endm
#endif