mirror of
				https://github.com/ossrs/srs.git
				synced 2025-03-09 15:49:59 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			578 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			578 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
| ! Copyright 2005-2016 The OpenSSL Project Authors. All Rights Reserved.
 | ||
| !
 | ||
| ! Licensed under the OpenSSL license (the "License").  You may not use
 | ||
| ! this file except in compliance with the License.  You can obtain a copy
 | ||
| ! in the file LICENSE in the source distribution or at
 | ||
| ! https://www.openssl.org/source/license.html
 | ||
| 
 | ||
| #if defined(__SUNPRO_C) && defined(__sparcv9)
 | ||
| # define ABI64  /* They've said -xarch=v9 at command line */
 | ||
| #elif defined(__GNUC__) && defined(__arch64__)
 | ||
| # define ABI64  /* They've said -m64 at command line */
 | ||
| #endif
 | ||
| 
 | ||
| #ifdef ABI64
 | ||
|   .register	%g2,#scratch
 | ||
|   .register	%g3,#scratch
 | ||
| # define	FRAME	-192
 | ||
| # define	BIAS	2047
 | ||
| #else
 | ||
| # define	FRAME	-96
 | ||
| # define	BIAS	0
 | ||
| #endif
 | ||
| 
 | ||
| .text
 | ||
| .align	32
 | ||
| .global	OPENSSL_wipe_cpu
 | ||
| .type	OPENSSL_wipe_cpu,#function
 | ||
| ! Keep in mind that this does not excuse us from wiping the stack!
 | ||
| ! This routine wipes registers, but not the backing store [which
 | ||
| ! resides on the stack, toward lower addresses]. To facilitate for
 | ||
| ! stack wiping I return pointer to the top of stack of the *caller*.
 | ||
| OPENSSL_wipe_cpu:
 | ||
| 	save	%sp,FRAME,%sp
 | ||
| 	nop
 | ||
| #ifdef __sun
 | ||
| #include <sys/trap.h>
 | ||
| 	ta	ST_CLEAN_WINDOWS
 | ||
| #else
 | ||
| 	call	.walk.reg.wins
 | ||
| #endif
 | ||
| 	nop
 | ||
| 	call	.PIC.zero.up
 | ||
| 	mov	.zero-(.-4),%o0
 | ||
| 	ld	[%o0],%f0
 | ||
| 	ld	[%o0],%f1
 | ||
| 
 | ||
| 	subcc	%g0,1,%o0
 | ||
| 	! Following is V9 "rd %ccr,%o0" instruction. However! V8
 | ||
| 	! specification says that it ("rd %asr2,%o0" in V8 terms) does
 | ||
| 	! not cause illegal_instruction trap. It therefore can be used
 | ||
| 	! to determine if the CPU the code is executing on is V8- or
 | ||
| 	! V9-compliant, as V9 returns a distinct value of 0x99,
 | ||
| 	! "negative" and "borrow" bits set in both %icc and %xcc.
 | ||
| 	.word	0x91408000	!rd	%ccr,%o0
 | ||
| 	cmp	%o0,0x99
 | ||
| 	bne	.v8
 | ||
| 	nop
 | ||
| 			! Even though we do not use %fp register bank,
 | ||
| 			! we wipe it as memcpy might have used it...
 | ||
| 			.word	0xbfa00040	!fmovd	%f0,%f62
 | ||
| 			.word	0xbba00040	!...
 | ||
| 			.word	0xb7a00040
 | ||
| 			.word	0xb3a00040
 | ||
| 			.word	0xafa00040
 | ||
| 			.word	0xaba00040
 | ||
| 			.word	0xa7a00040
 | ||
| 			.word	0xa3a00040
 | ||
| 			.word	0x9fa00040
 | ||
| 			.word	0x9ba00040
 | ||
| 			.word	0x97a00040
 | ||
| 			.word	0x93a00040
 | ||
| 			.word	0x8fa00040
 | ||
| 			.word	0x8ba00040
 | ||
| 			.word	0x87a00040
 | ||
| 			.word	0x83a00040	!fmovd	%f0,%f32
 | ||
| .v8:			fmovs	%f1,%f31
 | ||
| 	clr	%o0
 | ||
| 			fmovs	%f0,%f30
 | ||
| 	clr	%o1
 | ||
| 			fmovs	%f1,%f29
 | ||
| 	clr	%o2
 | ||
| 			fmovs	%f0,%f28
 | ||
| 	clr	%o3
 | ||
| 			fmovs	%f1,%f27
 | ||
| 	clr	%o4
 | ||
| 			fmovs	%f0,%f26
 | ||
| 	clr	%o5
 | ||
| 			fmovs	%f1,%f25
 | ||
| 	clr	%o7
 | ||
| 			fmovs	%f0,%f24
 | ||
| 	clr	%l0
 | ||
| 			fmovs	%f1,%f23
 | ||
| 	clr	%l1
 | ||
| 			fmovs	%f0,%f22
 | ||
| 	clr	%l2
 | ||
| 			fmovs	%f1,%f21
 | ||
| 	clr	%l3
 | ||
| 			fmovs	%f0,%f20
 | ||
| 	clr	%l4
 | ||
| 			fmovs	%f1,%f19
 | ||
| 	clr	%l5
 | ||
| 			fmovs	%f0,%f18
 | ||
| 	clr	%l6
 | ||
| 			fmovs	%f1,%f17
 | ||
| 	clr	%l7
 | ||
| 			fmovs	%f0,%f16
 | ||
| 	clr	%i0
 | ||
| 			fmovs	%f1,%f15
 | ||
| 	clr	%i1
 | ||
| 			fmovs	%f0,%f14
 | ||
| 	clr	%i2
 | ||
| 			fmovs	%f1,%f13
 | ||
| 	clr	%i3
 | ||
| 			fmovs	%f0,%f12
 | ||
| 	clr	%i4
 | ||
| 			fmovs	%f1,%f11
 | ||
| 	clr	%i5
 | ||
| 			fmovs	%f0,%f10
 | ||
| 	clr	%g1
 | ||
| 			fmovs	%f1,%f9
 | ||
| 	clr	%g2
 | ||
| 			fmovs	%f0,%f8
 | ||
| 	clr	%g3
 | ||
| 			fmovs	%f1,%f7
 | ||
| 	clr	%g4
 | ||
| 			fmovs	%f0,%f6
 | ||
| 	clr	%g5
 | ||
| 			fmovs	%f1,%f5
 | ||
| 			fmovs	%f0,%f4
 | ||
| 			fmovs	%f1,%f3
 | ||
| 			fmovs	%f0,%f2
 | ||
| 
 | ||
| 	add	%fp,BIAS,%i0	! return pointer to caller´s top of stack
 | ||
| 
 | ||
| 	ret
 | ||
| 	restore
 | ||
| 
 | ||
| .zero:	.long	0x0,0x0
 | ||
| .PIC.zero.up:
 | ||
| 	retl
 | ||
| 	add	%o0,%o7,%o0
 | ||
| #ifdef DEBUG
 | ||
| .global	walk_reg_wins
 | ||
| .type	walk_reg_wins,#function
 | ||
| walk_reg_wins:
 | ||
| #endif
 | ||
| .walk.reg.wins:
 | ||
| 	save	%sp,FRAME,%sp
 | ||
| 	cmp	%i7,%o7
 | ||
| 	be	2f
 | ||
| 	clr	%o0
 | ||
| 	cmp	%o7,0	! compiler never cleans %o7...
 | ||
| 	be	1f	! could have been a leaf function...
 | ||
| 	clr	%o1
 | ||
| 	call	.walk.reg.wins
 | ||
| 	nop
 | ||
| 1:	clr	%o2
 | ||
| 	clr	%o3
 | ||
| 	clr	%o4
 | ||
| 	clr	%o5
 | ||
| 	clr	%o7
 | ||
| 	clr	%l0
 | ||
| 	clr	%l1
 | ||
| 	clr	%l2
 | ||
| 	clr	%l3
 | ||
| 	clr	%l4
 | ||
| 	clr	%l5
 | ||
| 	clr	%l6
 | ||
| 	clr	%l7
 | ||
| 	add	%o0,1,%i0	! used for debugging
 | ||
| 2:	ret
 | ||
| 	restore
 | ||
| .size	OPENSSL_wipe_cpu,.-OPENSSL_wipe_cpu
 | ||
| 
 | ||
| .global	OPENSSL_atomic_add
 | ||
| .type	OPENSSL_atomic_add,#function
 | ||
| .align	32
 | ||
| OPENSSL_atomic_add:
 | ||
| #ifndef ABI64
 | ||
| 	subcc	%g0,1,%o2
 | ||
| 	.word	0x95408000	!rd	%ccr,%o2, see comment above
 | ||
| 	cmp	%o2,0x99
 | ||
| 	be	.v9
 | ||
| 	nop
 | ||
| 	save	%sp,FRAME,%sp
 | ||
| 	ba	.enter
 | ||
| 	nop
 | ||
| #ifdef __sun
 | ||
| ! Note that you do not have to link with libthread to call thr_yield,
 | ||
| ! as libc provides a stub, which is overloaded the moment you link
 | ||
| ! with *either* libpthread or libthread...
 | ||
| #define	YIELD_CPU	thr_yield
 | ||
| #else
 | ||
| ! applies at least to Linux and FreeBSD... Feedback expected...
 | ||
| #define	YIELD_CPU	sched_yield
 | ||
| #endif
 | ||
| .spin:	call	YIELD_CPU
 | ||
| 	nop
 | ||
| .enter:	ld	[%i0],%i2
 | ||
| 	cmp	%i2,-4096
 | ||
| 	be	.spin
 | ||
| 	mov	-1,%i2
 | ||
| 	swap	[%i0],%i2
 | ||
| 	cmp	%i2,-1
 | ||
| 	be	.spin
 | ||
| 	add	%i2,%i1,%i2
 | ||
| 	stbar
 | ||
| 	st	%i2,[%i0]
 | ||
| 	sra	%i2,%g0,%i0
 | ||
| 	ret
 | ||
| 	restore
 | ||
| .v9:
 | ||
| #endif
 | ||
| 	ld	[%o0],%o2
 | ||
| 1:	add	%o1,%o2,%o3
 | ||
| 	.word	0xd7e2100a	!cas [%o0],%o2,%o3, compare [%o0] with %o2 and swap %o3
 | ||
| 	cmp	%o2,%o3
 | ||
| 	bne	1b
 | ||
| 	mov	%o3,%o2		! cas is always fetching to dest. register
 | ||
| 	add	%o1,%o2,%o0	! OpenSSL expects the new value
 | ||
| 	retl
 | ||
| 	sra	%o0,%g0,%o0	! we return signed int, remember?
 | ||
| .size	OPENSSL_atomic_add,.-OPENSSL_atomic_add
 | ||
| 
 | ||
| .global	_sparcv9_rdtick
 | ||
| .align	32
 | ||
| _sparcv9_rdtick:
 | ||
| 	subcc	%g0,1,%o0
 | ||
| 	.word	0x91408000	!rd	%ccr,%o0
 | ||
| 	cmp	%o0,0x99
 | ||
| 	bne	.notick
 | ||
| 	xor	%o0,%o0,%o0
 | ||
| 	.word	0x91410000	!rd	%tick,%o0
 | ||
| 	retl
 | ||
| 	.word	0x93323020	!srlx	%o0,32,%o1
 | ||
| .notick:
 | ||
| 	retl
 | ||
| 	xor	%o1,%o1,%o1
 | ||
| .type	_sparcv9_rdtick,#function
 | ||
| .size	_sparcv9_rdtick,.-_sparcv9_rdtick
 | ||
| 
 | ||
| .global	_sparcv9_vis1_probe
 | ||
| .align	8
 | ||
| _sparcv9_vis1_probe:
 | ||
| 	add	%sp,BIAS+2,%o1
 | ||
| 	.word	0xc19a5a40	!ldda	[%o1]ASI_FP16_P,%f0
 | ||
| 	retl
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| .type	_sparcv9_vis1_probe,#function
 | ||
| .size	_sparcv9_vis1_probe,.-_sparcv9_vis1_probe
 | ||
| 
 | ||
| ! Probe and instrument VIS1 instruction. Output is number of cycles it
 | ||
| ! takes to execute rdtick and pair of VIS1 instructions. US-Tx VIS unit
 | ||
| ! is slow (documented to be 6 cycles on T2) and the core is in-order
 | ||
| ! single-issue, it should be possible to distinguish Tx reliably...
 | ||
| ! Observed return values are:
 | ||
| !
 | ||
| !	UltraSPARC IIe		7
 | ||
| !	UltraSPARC III		7
 | ||
| !	UltraSPARC T1		24
 | ||
| !	SPARC T4		65(*)
 | ||
| !
 | ||
| ! (*)	result has lesser to do with VIS instruction latencies, rdtick
 | ||
| !	appears that slow, but it does the trick in sense that FP and
 | ||
| !	VIS code paths are still slower than integer-only ones.
 | ||
| !
 | ||
| ! Numbers for T2 and SPARC64 V-VII are more than welcomed.
 | ||
| !
 | ||
| ! It would be possible to detect specifically US-T1 by instrumenting
 | ||
| ! fmul8ulx16, which is emulated on T1 and as such accounts for quite
 | ||
| ! a lot of %tick-s, couple of thousand on Linux...
 | ||
| .global	_sparcv9_vis1_instrument
 | ||
| .align	8
 | ||
| _sparcv9_vis1_instrument:
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	.word	0x91410000	!rd	%tick,%o0
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	.word	0x93410000	!rd	%tick,%o1
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	.word	0x95410000	!rd	%tick,%o2
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	.word	0x97410000	!rd	%tick,%o3
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	.word	0x99410000	!rd	%tick,%o4
 | ||
| 
 | ||
| 	! calculate intervals
 | ||
| 	sub	%o1,%o0,%o0
 | ||
| 	sub	%o2,%o1,%o1
 | ||
| 	sub	%o3,%o2,%o2
 | ||
| 	sub	%o4,%o3,%o3
 | ||
| 
 | ||
| 	! find minimum value
 | ||
| 	cmp	%o0,%o1
 | ||
| 	.word	0x38680002	!bgu,a	%xcc,.+8
 | ||
| 	mov	%o1,%o0
 | ||
| 	cmp	%o0,%o2
 | ||
| 	.word	0x38680002	!bgu,a	%xcc,.+8
 | ||
| 	mov	%o2,%o0
 | ||
| 	cmp	%o0,%o3
 | ||
| 	.word	0x38680002	!bgu,a	%xcc,.+8
 | ||
| 	mov	%o3,%o0
 | ||
| 
 | ||
| 	retl
 | ||
| 	nop
 | ||
| .type	_sparcv9_vis1_instrument,#function
 | ||
| .size	_sparcv9_vis1_instrument,.-_sparcv9_vis1_instrument
 | ||
| 
 | ||
| .global	_sparcv9_vis2_probe
 | ||
| .align	8
 | ||
| _sparcv9_vis2_probe:
 | ||
| 	retl
 | ||
| 	.word	0x81b00980	!bshuffle	%f0,%f0,%f0
 | ||
| .type	_sparcv9_vis2_probe,#function
 | ||
| .size	_sparcv9_vis2_probe,.-_sparcv9_vis2_probe
 | ||
| 
 | ||
| .global	_sparcv9_fmadd_probe
 | ||
| .align	8
 | ||
| _sparcv9_fmadd_probe:
 | ||
| 	.word	0x81b00d80	!fxor	%f0,%f0,%f0
 | ||
| 	.word	0x85b08d82	!fxor	%f2,%f2,%f2
 | ||
| 	retl
 | ||
| 	.word	0x81b80440	!fmaddd	%f0,%f0,%f2,%f0
 | ||
| .type	_sparcv9_fmadd_probe,#function
 | ||
| .size	_sparcv9_fmadd_probe,.-_sparcv9_fmadd_probe
 | ||
| 
 | ||
| .global	_sparcv9_rdcfr
 | ||
| .align	8
 | ||
| _sparcv9_rdcfr:
 | ||
| 	retl
 | ||
| 	.word	0x91468000	!rd	%asr26,%o0
 | ||
| .type	_sparcv9_rdcfr,#function
 | ||
| .size	_sparcv9_rdcfr,.-_sparcv9_rdcfr
 | ||
| 
 | ||
| .global	_sparcv9_vis3_probe
 | ||
| .align	8
 | ||
| _sparcv9_vis3_probe:
 | ||
| 	retl
 | ||
| 	.word	0x81b022a0	!xmulx	%g0,%g0,%g0
 | ||
| .type	_sparcv9_vis3_probe,#function
 | ||
| .size	_sparcv9_vis3_probe,.-_sparcv9_vis3_probe
 | ||
| 
 | ||
| .global	_sparcv9_random
 | ||
| .align	8
 | ||
| _sparcv9_random:
 | ||
| 	retl
 | ||
| 	.word	0x91b002a0	!random	%o0
 | ||
| .type	_sparcv9_random,#function
 | ||
| .size	_sparcv9_random,.-_sparcv9_vis3_probe
 | ||
| 
 | ||
| .global	_sparcv9_fjaesx_probe
 | ||
| .align	8
 | ||
| _sparcv9_fjaesx_probe:
 | ||
| 	.word	0x81b09206	!faesencx %f2,%f6,%f0
 | ||
| 	retl
 | ||
| 	nop
 | ||
| .size	_sparcv9_fjaesx_probe,.-_sparcv9_fjaesx_probe
 | ||
| 
 | ||
| .global	OPENSSL_cleanse
 | ||
| .align	32
 | ||
| OPENSSL_cleanse:
 | ||
| 	cmp	%o1,14
 | ||
| 	nop
 | ||
| #ifdef ABI64
 | ||
| 	bgu	%xcc,.Lot
 | ||
| #else
 | ||
| 	bgu	.Lot
 | ||
| #endif
 | ||
| 	cmp	%o1,0
 | ||
| 	bne	.Little
 | ||
| 	nop
 | ||
| 	retl
 | ||
| 	nop
 | ||
| 
 | ||
| .Little:
 | ||
| 	stb	%g0,[%o0]
 | ||
| 	subcc	%o1,1,%o1
 | ||
| 	bnz	.Little
 | ||
| 	add	%o0,1,%o0
 | ||
| 	retl
 | ||
| 	nop
 | ||
| .align	32
 | ||
| .Lot:
 | ||
| #ifndef ABI64
 | ||
| 	subcc	%g0,1,%g1
 | ||
| 	! see above for explanation
 | ||
| 	.word	0x83408000	!rd	%ccr,%g1
 | ||
| 	cmp	%g1,0x99
 | ||
| 	bne	.v8lot
 | ||
| 	nop
 | ||
| #endif
 | ||
| 
 | ||
| .v9lot:	andcc	%o0,7,%g0
 | ||
| 	bz	.v9aligned
 | ||
| 	nop
 | ||
| 	stb	%g0,[%o0]
 | ||
| 	sub	%o1,1,%o1
 | ||
| 	ba	.v9lot
 | ||
| 	add	%o0,1,%o0
 | ||
| .align	16,0x01000000
 | ||
| .v9aligned:
 | ||
| 	.word	0xc0720000	!stx	%g0,[%o0]
 | ||
| 	sub	%o1,8,%o1
 | ||
| 	andcc	%o1,-8,%g0
 | ||
| #ifdef ABI64
 | ||
| 	.word	0x126ffffd	!bnz	%xcc,.v9aligned
 | ||
| #else
 | ||
| 	.word	0x124ffffd	!bnz	%icc,.v9aligned
 | ||
| #endif
 | ||
| 	add	%o0,8,%o0
 | ||
| 
 | ||
| 	cmp	%o1,0
 | ||
| 	bne	.Little
 | ||
| 	nop
 | ||
| 	retl
 | ||
| 	nop
 | ||
| #ifndef ABI64
 | ||
| .v8lot:	andcc	%o0,3,%g0
 | ||
| 	bz	.v8aligned
 | ||
| 	nop
 | ||
| 	stb	%g0,[%o0]
 | ||
| 	sub	%o1,1,%o1
 | ||
| 	ba	.v8lot
 | ||
| 	add	%o0,1,%o0
 | ||
| 	nop
 | ||
| .v8aligned:
 | ||
| 	st	%g0,[%o0]
 | ||
| 	sub	%o1,4,%o1
 | ||
| 	andcc	%o1,-4,%g0
 | ||
| 	bnz	.v8aligned
 | ||
| 	add	%o0,4,%o0
 | ||
| 
 | ||
| 	cmp	%o1,0
 | ||
| 	bne	.Little
 | ||
| 	nop
 | ||
| 	retl
 | ||
| 	nop
 | ||
| #endif
 | ||
| .type	OPENSSL_cleanse,#function
 | ||
| .size	OPENSSL_cleanse,.-OPENSSL_cleanse
 | ||
| 
 | ||
| .global	CRYPTO_memcmp
 | ||
| .align	16
 | ||
| CRYPTO_memcmp:
 | ||
| 	cmp	%o2,0
 | ||
| #ifdef ABI64
 | ||
| 	beq,pn	%xcc,.Lno_data
 | ||
| #else
 | ||
| 	beq	.Lno_data
 | ||
| #endif
 | ||
| 	xor	%g1,%g1,%g1
 | ||
| 	nop
 | ||
| 
 | ||
| .Loop_cmp:
 | ||
| 	ldub	[%o0],%o3
 | ||
| 	add	%o0,1,%o0
 | ||
| 	ldub	[%o1],%o4
 | ||
| 	add	%o1,1,%o1
 | ||
| 	subcc	%o2,1,%o2
 | ||
| 	xor	%o3,%o4,%o4
 | ||
| #ifdef ABI64
 | ||
| 	bnz	%xcc,.Loop_cmp
 | ||
| #else
 | ||
| 	bnz	.Loop_cmp
 | ||
| #endif
 | ||
| 	or	%o4,%g1,%g1
 | ||
| 
 | ||
| 	sub	%g0,%g1,%g1
 | ||
| 	srl	%g1,31,%g1
 | ||
| .Lno_data:
 | ||
| 	retl
 | ||
| 	mov	%g1,%o0
 | ||
| .type	CRYPTO_memcmp,#function
 | ||
| .size	CRYPTO_memcmp,.-CRYPTO_memcmp
 | ||
| 
 | ||
| .global	_sparcv9_vis1_instrument_bus
 | ||
| .align	8
 | ||
| _sparcv9_vis1_instrument_bus:
 | ||
| 	mov	%o1,%o3					! save cnt
 | ||
| 	.word	0x99410000	!rd	%tick,%o4	! tick
 | ||
| 	mov	%o4,%o5					! lasttick = tick
 | ||
| 	set	0,%g4					! diff
 | ||
| 
 | ||
| 	andn	%o0,63,%g1
 | ||
| 	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	ld	[%o0],%o4
 | ||
| 	add	%o4,%g4,%g4
 | ||
| 	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
 | ||
| 
 | ||
| .Loop:	.word	0x99410000	!rd	%tick,%o4
 | ||
| 	sub	%o4,%o5,%g4				! diff=tick-lasttick
 | ||
| 	mov	%o4,%o5					! lasttick=tick
 | ||
| 
 | ||
| 	andn	%o0,63,%g1
 | ||
| 	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	ld	[%o0],%o4
 | ||
| 	add	%o4,%g4,%g4
 | ||
| 	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
 | ||
| 	subcc	%o1,1,%o1				! --$cnt
 | ||
| 	bnz	.Loop
 | ||
| 	add	%o0,4,%o0				! ++$out
 | ||
| 
 | ||
| 	retl
 | ||
| 	mov	%o3,%o0
 | ||
| .type	_sparcv9_vis1_instrument_bus,#function
 | ||
| .size	_sparcv9_vis1_instrument_bus,.-_sparcv9_vis1_instrument_bus
 | ||
| 
 | ||
| .global	_sparcv9_vis1_instrument_bus2
 | ||
| .align	8
 | ||
| _sparcv9_vis1_instrument_bus2:
 | ||
| 	mov	%o1,%o3					! save cnt
 | ||
| 	sll	%o1,2,%o1				! cnt*=4
 | ||
| 
 | ||
| 	.word	0x99410000	!rd	%tick,%o4	! tick
 | ||
| 	mov	%o4,%o5					! lasttick = tick
 | ||
| 	set	0,%g4					! diff
 | ||
| 
 | ||
| 	andn	%o0,63,%g1
 | ||
| 	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	ld	[%o0],%o4
 | ||
| 	add	%o4,%g4,%g4
 | ||
| 	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
 | ||
| 
 | ||
| 	.word	0x99410000	!rd	%tick,%o4	! tick
 | ||
| 	sub	%o4,%o5,%g4				! diff=tick-lasttick
 | ||
| 	mov	%o4,%o5					! lasttick=tick
 | ||
| 	mov	%g4,%g5					! lastdiff=diff
 | ||
| .Loop2:
 | ||
| 	andn	%o0,63,%g1
 | ||
| 	.word	0xc1985e00	!ldda	[%g1]0xf0,%f0	! block load
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	.word	0xc1b85c00	!stda	%f0,[%g1]0xe0	! block store and commit
 | ||
| 	.word	0x8143e040	!membar	#Sync
 | ||
| 	ld	[%o0],%o4
 | ||
| 	add	%o4,%g4,%g4
 | ||
| 	.word	0xc9e2100c	!cas	[%o0],%o4,%g4
 | ||
| 
 | ||
| 	subcc	%o2,1,%o2				! --max
 | ||
| 	bz	.Ldone2
 | ||
| 	nop
 | ||
| 
 | ||
| 	.word	0x99410000	!rd	%tick,%o4	! tick
 | ||
| 	sub	%o4,%o5,%g4				! diff=tick-lasttick
 | ||
| 	mov	%o4,%o5					! lasttick=tick
 | ||
| 	cmp	%g4,%g5
 | ||
| 	mov	%g4,%g5					! lastdiff=diff
 | ||
| 
 | ||
| 	.word	0x83408000	!rd	%ccr,%g1
 | ||
| 	and	%g1,4,%g1				! isolate zero flag
 | ||
| 	xor	%g1,4,%g1				! flip zero flag
 | ||
| 
 | ||
| 	subcc	%o1,%g1,%o1				! conditional --$cnt
 | ||
| 	bnz	.Loop2
 | ||
| 	add	%o0,%g1,%o0				! conditional ++$out
 | ||
| 
 | ||
| .Ldone2:
 | ||
| 	srl	%o1,2,%o1
 | ||
| 	retl
 | ||
| 	sub	%o3,%o1,%o0
 | ||
| .type	_sparcv9_vis1_instrument_bus2,#function
 | ||
| .size	_sparcv9_vis1_instrument_bus2,.-_sparcv9_vis1_instrument_bus2
 | ||
| 
 | ||
| .section	".init",#alloc,#execinstr
 | ||
| 	call	OPENSSL_cpuid_setup
 | ||
| 	nop
 |