[PATCH V2 01/12] nvme-core: annotate nvme_alloc_request()
Chaitanya Kulkarni
Chaitanya.Kulkarni at wdc.com
Tue Sep 1 23:28:20 EDT 2020
Sagi,
On 9/1/20 10:21, Sagi Grimberg wrote:
>>> diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
>>> index 5702a3843746..a62fdcbfd1cc 100644
>>> --- a/drivers/nvme/host/core.c
>>> +++ b/drivers/nvme/host/core.c
>>> @@ -508,7 +508,7 @@ struct request *nvme_alloc_request(struct request_queue *q,
>>> unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT : REQ_OP_DRV_IN;
>>> struct request *req;
>>>
>>> - if (qid == NVME_QID_ANY) {
>>> + if (unlikely(qid == NVME_QID_ANY)) {
>> From your commit message, this should be likely(), right?
>>
>>> req = blk_mq_alloc_request(q, op, flags);
>>> } else {
>>> req = blk_mq_alloc_request_hctx(q, op, flags,
> Chaitanya, can you check the objdump that the change actually
> resulted in a different machine code, I've seen patches of this
> sort lately where this annotation didn't change anything at all.
>
Agree, that is why I've added performance numbers.
Here is objdump for likely [1] vs default [2]. The generated assembly
is different with likely if someone reports performance regression
we can always revert this patch as it is very independent and
trivial.
With likely [1] :-
00000000000008b0 <nvme_alloc_request>:
904 {
905 8b0: e8 00 00 00 00 callq 8b5
<nvme_alloc_request+0x5>
906 8b5: 53 push %rbx
907 /*
908 * What a mess...
909 *
910 * Why can't we simply have a Fabrics In and Fabrics out
command?
911 */
912 if (unlikely(nvme_is_fabrics(cmd)))
913 8b6: 0f b6 06 movzbl (%rsi),%eax
914 8b9: 48 89 f3 mov %rsi,%rbx
915 return cmd->fabrics.fctype & 1;
916 return cmd->common.opcode & 1;
917 8bc: 89 c6 mov %eax,%esi
918 8be: 83 e6 01 and $0x1,%esi
919 if (unlikely(nvme_is_fabrics(cmd)))
920 8c1: 3c 7f cmp $0x7f,%al
921 8c3: 74 53 je 918
<nvme_alloc_request+0x68>
922 unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT :
REQ_OP_DRV_IN;
923 8c5: 83 e6 01 and $0x1,%esi
924 8c8: 83 c6 22 add $0x22,%esi
925 if (likely(qid == NVME_QID_ANY)) {
926 8cb: 83 f9 ff cmp $0xffffffff,%ecx
927 8ce: 75 34 jne 904
<nvme_alloc_request+0x54>
928 req = blk_mq_alloc_request(q, op, flags);
929 8d0: e8 00 00 00 00 callq 8d5
<nvme_alloc_request+0x25>
930 if (IS_ERR(req))
931 8d5: 48 3d 00 f0 ff ff cmp
$0xfffffffffffff000,%rax
932 8db: 77 25 ja 902
<nvme_alloc_request+0x52>
933 if (!(req->rq_flags & RQF_DONTPREP)) {
934 8dd: 8b 50 1c mov 0x1c(%rax),%edx
935 req->cmd_flags |= REQ_FAILFAST_DRIVER;
936 8e0: 81 48 18 00 04 00 00 orl $0x400,0x18(%rax)
937 if (!(req->rq_flags & RQF_DONTPREP)) {
938 8e7: f6 c2 80 test $0x80,%dl
939 8ea: 75 0f jne 8fb
<nvme_alloc_request+0x4b>
940 nvme_req(req)->retries = 0;
941 8ec: 31 c9 xor %ecx,%ecx
942 req->rq_flags |= RQF_DONTPREP;
943 8ee: 80 ca 80 or $0x80,%dl
944 nvme_req(req)->retries = 0;
945 8f1: 66 89 88 28 01 00 00 mov %cx,0x128(%rax)
946 req->rq_flags |= RQF_DONTPREP;
947 8f8: 89 50 1c mov %edx,0x1c(%rax)
948 nvme_req(req)->cmd = cmd;
949 8fb: 48 89 98 18 01 00 00 mov %rbx,0x118(%rax)
950 }
951 902: 5b pop %rbx
952 903: c3 retq
953 qid ? qid - 1 : 0);
954 904: 85 c9 test %ecx,%ecx
955 906: 8d 41 ff lea -0x1(%rcx),%eax
956 909: b9 00 00 00 00 mov $0x0,%ecx
957 90e: 0f 45 c8 cmovne %eax,%ecx
958 req = blk_mq_alloc_request_hctx(q, op, flags,
959 911: e8 00 00 00 00 callq 916
<nvme_alloc_request+0x66>
960 916: eb bd jmp 8d5
<nvme_alloc_request+0x25>
961 return cmd->fabrics.fctype & 1;
962 918: 0f b6 73 04 movzbl 0x4(%rbx),%esi
963 91c: 83 e6 01 and $0x1,%esi
964 91f: eb a4 jmp 8c5
<nvme_alloc_request+0x15>
965 921: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
966 926: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
967 92d: 00 00 00
Default :-
00000000000008b0 <nvme_alloc_request>:
904 {
905 8b0: e8 00 00 00 00 callq 8b5
<nvme_alloc_request+0x5>
906 8b5: 53 push %rbx
907 /*
908 * What a mess...
909 *
910 * Why can't we simply have a Fabrics In and Fabrics out
command?
911 */
912 if (unlikely(nvme_is_fabrics(cmd)))
913 8b6: 0f b6 06 movzbl (%rsi),%eax
914 8b9: 48 89 f3 mov %rsi,%rbx
915 return cmd->fabrics.fctype & 1;
916 return cmd->common.opcode & 1;
917 8bc: 89 c6 mov %eax,%esi
918 8be: 83 e6 01 and $0x1,%esi
919 if (unlikely(nvme_is_fabrics(cmd)))
920 8c1: 3c 7f cmp $0x7f,%al
921 8c3: 74 53 je 918
<nvme_alloc_request+0x68>
922 unsigned op = nvme_is_write(cmd) ? REQ_OP_DRV_OUT :
REQ_OP_DRV_IN;
923 8c5: 83 e6 01 and $0x1,%esi
924 8c8: 83 c6 22 add $0x22,%esi
925 if (qid == NVME_QID_ANY) {
926 8cb: 83 f9 ff cmp $0xffffffff,%ecx
927 8ce: 74 41 je 911
<nvme_alloc_request+0x61>
928 qid ? qid - 1 : 0);
929 8d0: 8d 41 ff lea -0x1(%rcx),%eax
930 8d3: 85 c9 test %ecx,%ecx
931 8d5: b9 00 00 00 00 mov $0x0,%ecx
932 8da: 0f 45 c8 cmovne %eax,%ecx
933 req = blk_mq_alloc_request_hctx(q, op, flags,
934 8dd: e8 00 00 00 00 callq 8e2
<nvme_alloc_request+0x32>
935 if (IS_ERR(req))
936 8e2: 48 3d 00 f0 ff ff cmp
$0xfffffffffffff000,%rax
937 8e8: 77 25 ja 90f
<nvme_alloc_request+0x5f>
938 if (!(req->rq_flags & RQF_DONTPREP)) {
939 8ea: 8b 50 1c mov 0x1c(%rax),%edx
940 req->cmd_flags |= REQ_FAILFAST_DRIVER;
941 8ed: 81 48 18 00 04 00 00 orl $0x400,0x18(%rax)
942 if (!(req->rq_flags & RQF_DONTPREP)) {
943 8f4: f6 c2 80 test $0x80,%dl
944 8f7: 75 0f jne 908
<nvme_alloc_request+0x58>
945 nvme_req(req)->retries = 0;
946 8f9: 31 c9 xor %ecx,%ecx
947 req->rq_flags |= RQF_DONTPREP;
948 8fb: 80 ca 80 or $0x80,%dl
949 nvme_req(req)->retries = 0;
950 8fe: 66 89 88 28 01 00 00 mov %cx,0x128(%rax)
951 req->rq_flags |= RQF_DONTPREP;
952 905: 89 50 1c mov %edx,0x1c(%rax)
953 nvme_req(req)->cmd = cmd;
954 908: 48 89 98 18 01 00 00 mov %rbx,0x118(%rax)
955 }
956 90f: 5b pop %rbx
957 910: c3 retq
958 req = blk_mq_alloc_request(q, op, flags);
959 911: e8 00 00 00 00 callq 916
<nvme_alloc_request+0x66>
960 916: eb ca jmp 8e2
<nvme_alloc_request+0x32>
961 return cmd->fabrics.fctype & 1;
962 918: 0f b6 73 04 movzbl 0x4(%rbx),%esi
963 91c: 83 e6 01 and $0x1,%esi
964 91f: eb a4 jmp 8c5
<nvme_alloc_request+0x15>
965 921: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
966 926: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
967 92d: 00 00 00
968
969 0000000000000930 <nvme_end_sync_rq>:
970 {
971 930: e8 00 00 00 00 callq 935
<nvme_end_sync_rq+0x5>
972 935: 48 89 f8 mov %rdi,%rax
973 struct completion *waiting = rq->end_io_data;
974 938: 48 8b bf 10 01 00 00 mov 0x110(%rdi),%rdi
975 rq->end_io_data = NULL;
976 93f: 48 c7 80 10 01 00 00 movq $0x0,0x110(%rax)
977 946: 00 00 00 00
978 complete(waiting);
979 94a: e9 00 00 00 00 jmpq 94f
<nvme_end_sync_rq+0x1f>
980 94f: 90 nop
981
More information about the Linux-nvme
mailing list