Introduction to PPC Assembly
 
  
   
   
   
  
#include <stdio.h> 
int dosomething(int,int);
int main(int argc, char ** argv){
 int a,b,r;
    
 a=1;
 b=4;
 r=dosomething(a,b);
 printf("\ndosomething(%d,%d)=%d\n",a,b,r);
 return 0;
}
int dosomething(int x, int y){
 int z;
 z=x+y;
 return z;
}
${CC} test.c -o test01.ppc
I used ${CC} so everyone can place its own.
dosomething(1,4)=5   
#include <stdio.h>
extern int dosomething(int,int);
int main(int argc, char ** argv){
 int a,b,r;
    
 a=1;
 b=4;
 r=dosomething(a,b);
 printf("\ndosomething(%d,%d)=%d\n",a,b,r);
 return 0;
}
int dosomething(int x, int y){
 int z;
 z=x+y;
 return z;
}
${CC} dosomething.c test.c -o test02.ppc
.text ;code section
    
.global dosomething
dosomething:
    
 or   r7,  r3,  r3
    
 or   r8,  r4,  r4
   
 add  r9,  r7,  r8
 mr   r3,  r9
#exit
bclr 20,   0   ;( exit )
${CC} dosomething.s test.c -o test03.ppc
                    
${GDB} test02.ppc
disas dosomething
Dump of assembler code for function stext:
0x40000000 <stext+ 0>:   or   r11, r3, r3
0x40000004 <stext+ 4>:   or   r12, r4, r4
0x40000008 <stext+ 8>:  add   r12,r11,r12
0x4000000c <stext+12>:   or    r3,r12,r12
0x40000010 <stext+16>: bclr    20, 0
0x40000014 <stext+20>:  .long 0
0x40000018 <stext+24>:  .long 0
0x4000001c <stext+28>:  .long 0
End of assembler dump.
            
${GDB} test03.ppc
...
disas dosomething
Dump of assembler code for function stext:
0x40000000 <stext+ 0>:   or    r7, r3, r3
0x40000004 <stext+ 4>:   or    r8, r4, r4
0x40000008 <stext+ 8>:  add    r9, r7, r8
0x4000000c <stext+12>:   or    r3, r9, r9
0x40000010 <stext+16>: bclr    20, 0
0x40000014 <stext+20>:  .long 0
0x40000018 <stext+24>:  .long 0
0x4000001c <stext+28>:  .long 0
End of assembler dump.
 Byte       1 byte
 HalfWord   2 bytes
 Word       4 bytes
 DoubleWord 8 bytes
 QuadWord  16 bytes
GPR0      Volatile            Depends on the context
GPR1      Volatile Dedicated  Stack pointer (SP)
GPR2      Volatile Dedicated  Read-only small data area anchor
GPR3      Volatile            Argument passed and/or returned value
GPR4      Volatile            Argument passed and/or returned value
GPR5      Volatile            Argument passed 
...
GPR10     Volatile            Argument passed 
GPR11     Volatile            
GPR12     Volatile            
GPR13  Nonvolatile Dedicated  Read-only small data area anchor
GPR14  Nonvolatile         
...
GPR31  Nonvolatile         
FPR0      Volatile            Depends on the context
FPR1      Volatile            Argument passed and/or returned value
FPR2      Volatile            Argument passed
...
FPR8      Volatile            Argument passed
FPR9      Volatile            
...
FPR13     Volatile            
FPR14  Nonvolatile         
...
FPR31  Nonvolatile         
CR0       Volatile            
CR1       Volatile            
CR2    Nonvolatile         
CR3    Nonvolatile         
CR4    Nonvolatile         
CR5       Volatile            
...
CR7       Volatile             
            
dosomething: mflr r0 ; Get Link register
stwu  r1,-88(r1) ; Save Back chain and move SP
stw   r0,+92(r1) ; Save Link register
stmw r28,+72(r1) ; Save 4 non-volatiles r28-r31
...
...
lwz   r0,+92(r1) ; Get saved Link register
mtlr  r0         ; Restore Link register
lmw  r28,+72(r1) ; Restore non-volatiles
addi  r1,  r1,88 ; Remove frame from stack
bclr  20,0
void floatSwap(float* f1, float* f2){
    
 float tmp;
 tmp=*f1;
 *f1=*f2;
 *f2=tmp;
}
0x00000140 <floatSwap+0>:    or  r11, r3, r3
0x00000144 <floatSwap+4>:    or  r12, r4, r4
0x00000148 <floatSwap+8>:   lfs  fr0, 0(r11)
0x0000014c <floatSwap+12>:   lfs fr13, 0(r12)
0x00000150 <floatSwap+16>:  stfs fr13, 0(r11)
0x00000154 <floatSwap+20>:  stfs  fr0, 0(r12)
0x00000158 <floatSwap+24>:  bclr   20, 0
0x0000015c <floatSwap+28>: .long 0
 
            
...
extern floatSwap(float *, float *);
static void show_times(int *,int *,char *,int);
            
...
int main(int argc, char ** argv){
    
 float a[1024], b[1024];
...
 for(i=0;i<1024;i++){
    a[i]=(i+1)/(float)1000;
    b[i]=-a[i];
 }
 for(i=0;i<10;i++)
    printf("\n a[%d]=%f b[%d]=%f",i,a[i],i,b[i]);
 GET_TIME(time_start[0]);
 for(i=0;i<1024;i++)
    floatSwap(&a[i],&b[i]);
 GET_TIME(time_end[0]);
 printf("\n ");
 for(i=0;i<10;i++)
    printf("\n a[%d]=%f b[%d]=%f",i,a[i],i,b[i]);
 show_times(time_start, time_end," ",1);
 return 0;
}
a[0]=0.001000 b[0]=-0.001000
a[1]=0.002000 b[1]=-0.002000
a[2]=0.003000 b[2]=-0.003000
a[3]=0.004000 b[3]=-0.004000
a[4]=0.005000 b[4]=-0.005000
a[5]=0.006000 b[5]=-0.006000
a[6]=0.007000 b[6]=-0.007000
a[7]=0.008000 b[7]=-0.008000
a[8]=0.009000 b[8]=-0.009000
a[9]=0.010000 b[9]=-0.010000
a[0]=-0.001000 b[0]=0.001000
a[1]=-0.002000 b[1]=0.002000
a[2]=-0.003000 b[2]=0.003000
a[3]=-0.004000 b[3]=0.004000
a[4]=-0.005000 b[4]=0.005000
a[5]=-0.006000 b[5]=0.006000
a[6]=-0.007000 b[6]=0.007000
a[7]=-0.008000 b[7]=0.008000
a[8]=-0.009000 b[8]=0.009000
a[9]=-0.010000 b[9]=0.010000  in 1 st pass : 25714 nanoseconds
.text
    
.global floatSwap
floatSwap:
 or    r8, r3,r3
 or    r9, r4,r4
 lfs   fr0,0(r8)
 lfs  fr13,0(r9)
 stfs fr13,0(r8)
 stfs  fr0,0(r9)
 bclr  20,0
a[0]=0.001000 b[0]=-0.001000
a[1]=0.002000 b[1]=-0.002000
a[2]=0.003000 b[2]=-0.003000
a[3]=0.004000 b[3]=-0.004000
a[4]=0.005000 b[4]=-0.005000
a[5]=0.006000 b[5]=-0.006000
a[6]=0.007000 b[6]=-0.007000
a[7]=0.008000 b[7]=-0.008000
a[8]=0.009000 b[8]=-0.009000
a[9]=0.010000 b[9]=-0.010000
a[0]=-0.001000 b[0]=0.001000
a[1]=-0.002000 b[1]=0.002000
a[2]=-0.003000 b[2]=0.003000
a[3]=-0.004000 b[3]=0.004000
a[4]=-0.005000 b[4]=0.005000
a[5]=-0.006000 b[5]=0.006000
a[6]=-0.007000 b[6]=0.007000
a[7]=-0.008000 b[7]=0.008000
a[8]=-0.009000 b[8]=0.009000
a[9]=-0.010000 b[9]=0.010000  in 1 th pass : 25714 nanoseconds
             
.text
    .global floatSwap
floatSwap:
 lfs   fr0,0(r3)
 lfs  fr13,0(r4)
 stfs fr13,0(r3)
 stfs  fr0,0(r4)
 bclr  20,0
a[0]=0.001000 b[0]=-0.001000
a[1]=0.002000 b[1]=-0.002000
a[2]=0.003000 b[2]=-0.003000
a[3]=0.004000 b[3]=-0.004000
a[4]=0.005000 b[4]=-0.005000
a[5]=0.006000 b[5]=-0.006000
a[6]=0.007000 b[6]=-0.007000
a[7]=0.008000 b[7]=-0.008000
a[8]=0.009000 b[8]=-0.009000
a[9]=0.010000 b[9]=-0.010000
a[0]=-0.001000 b[0]=0.001000
a[1]=-0.002000 b[1]=0.002000
a[2]=-0.003000 b[2]=0.003000
a[3]=-0.004000 b[3]=0.004000
a[4]=-0.005000 b[4]=0.005000
a[5]=-0.006000 b[5]=0.006000
a[6]=-0.007000 b[6]=0.007000
a[7]=-0.008000 b[7]=0.008000
a[8]=-0.009000 b[8]=0.009000
a[9]=-0.010000 b[9]=0.010000  in 1 th pass : 23308 nanoseconds
lfsx   fr0, r7,r20 ; load 1st FP32 value 
lfsx  fr12, r7,r21 ; load 2nd one 
stfsx fr12, r7,r20 ; store latter value on 1st addr
stfsx  fr0, r7,r21 ; store former value on 2nd addr
mullw r15, r9, r3
divw  r16,r15, r4
mullw r16,r16, r4
lfsx   fr0, r7,r20 ; load 1st FP32 value 
mullw r15, r9, r3
lfsx  fr12, r7,r21 ; load 2nd one 
divw  r16,r15, r4
stfsx fr12, r7,r20 ; store latter value on 1st addr
mullw r16,r16, r4
stfsx  fr0, r7,r21 ; store former value on 2nd addr