At the time being recalculate_boundary() is implemented with a
loop which shows up as costly in a perf profile, as depicted by
the annotate below:
0.00 :
c057e934: 3d 40 7f ff lis r10,32767
0.03 :
c057e938: 61 4a ff ff ori r10,r10,65535
0.21 :
c057e93c: 7d 49 50 50 subf r10,r9,r10
5.39 :
c057e940: 7d 3c 4b 78 mr r28,r9
2.11 :
c057e944: 55 29 08 3c slwi r9,r9,1
3.04 :
c057e948: 7c 09 50 40 cmplw r9,r10
2.47 :
c057e94c: 40 81 ff f4 ble
c057e940 <snd_pcm_ioctl+0xee0>
Total: 13.2% on that simple loop.
But what the loop does is to multiply the boundary by 2 until it is
over the wanted border. This can be avoided by using fls() to get the
boundary value order and shift it by the appropriate number of bits at
once.
This change provides the following profile:
0.04 :
c057f6e8: 3d 20 7f ff lis r9,32767
0.02 :
c057f6ec: 61 29 ff ff ori r9,r9,65535
0.34 :
c057f6f0: 7d 5a 48 50 subf r10,r26,r9
0.23 :
c057f6f4: 7c 1a 50 40 cmplw r26,r10
0.02 :
c057f6f8: 41 81 00 20 bgt
c057f718 <snd_pcm_ioctl+0xf08>
0.26 :
c057f6fc: 7f 47 00 34 cntlzw r7,r26
0.09 :
c057f700: 7d 48 00 34 cntlzw r8,r10
0.22 :
c057f704: 7d 08 38 50 subf r8,r8,r7
0.04 :
c057f708: 7f 5a 40 30 slw r26,r26,r8
0.35 :
c057f70c: 7c 0a d0 40 cmplw r10,r26
0.13 :
c057f710: 40 80 05 f8 bge
c057fd08 <snd_pcm_ioctl+0x14f8>
0.00 :
c057f714: 57 5a f8 7e srwi r26,r26,1
Total: 1.7% with that loopless alternative.
Signed-off-by: Christophe Leroy <christophe.leroy@csgroup.eu>
Link: https://patch.msgid.link/4836e2cde653eebaf2709ebe30eec736bb8c67fd.1749202237.git.christophe.leroy@csgroup.eu
Signed-off-by: Takashi Iwai <tiwai@suse.de>
#include <sound/minors.h>
#include <linux/uio.h>
#include <linux/delay.h>
+#include <linux/bitops.h>
#include "pcm_local.h"
static snd_pcm_uframes_t recalculate_boundary(struct snd_pcm_runtime *runtime)
{
snd_pcm_uframes_t boundary;
+ snd_pcm_uframes_t border;
+ int order;
if (! runtime->buffer_size)
return 0;
- boundary = runtime->buffer_size;
- while (boundary * 2 <= 0x7fffffffUL - runtime->buffer_size)
- boundary *= 2;
- return boundary;
+
+ border = 0x7fffffffUL - runtime->buffer_size;
+ if (runtime->buffer_size > border)
+ return runtime->buffer_size;
+
+ order = __fls(border) - __fls(runtime->buffer_size);
+ boundary = runtime->buffer_size << order;
+
+ if (boundary <= border)
+ return boundary;
+ else
+ return boundary / 2;
}
static int snd_pcm_ioctl_sync_ptr_compat(struct snd_pcm_substream *substream,