summaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/9p/trans_xen.c3
-rw-r--r--net/Makefile1
-rw-r--r--net/batman-adv/bat_iv_ogm.c1
-rw-r--r--net/batman-adv/bat_v_elp.c1
-rw-r--r--net/batman-adv/bat_v_ogm.c5
-rw-r--r--net/batman-adv/distributed-arp-table.c2
-rw-r--r--net/batman-adv/gateway_common.c2
-rw-r--r--net/batman-adv/main.h2
-rw-r--r--net/batman-adv/multicast.c251
-rw-r--r--net/batman-adv/multicast.h38
-rw-r--r--net/batman-adv/network-coding.c4
-rw-r--r--net/batman-adv/routing.c7
-rw-r--r--net/batman-adv/soft-interface.c26
-rw-r--r--net/batman-adv/translation-table.c4
-rw-r--r--net/batman-adv/tvlv.c71
-rw-r--r--net/batman-adv/tvlv.h9
-rw-r--r--net/batman-adv/types.h6
-rw-r--r--net/bluetooth/ecdh_helper.c37
-rw-r--r--net/bluetooth/hci_conn.c41
-rw-r--r--net/bluetooth/hci_event.c5
-rw-r--r--net/bluetooth/hci_sync.c19
-rw-r--r--net/bluetooth/hidp/Kconfig2
-rw-r--r--net/bluetooth/hidp/core.c3
-rw-r--r--net/bluetooth/iso.c64
-rw-r--r--net/bluetooth/l2cap_core.c24
-rw-r--r--net/bluetooth/l2cap_sock.c8
-rw-r--r--net/bluetooth/mgmt.c12
-rw-r--r--net/bluetooth/mgmt_util.h2
-rw-r--r--net/bluetooth/rfcomm/core.c4
-rw-r--r--net/bluetooth/rfcomm/sock.c7
-rw-r--r--net/bluetooth/rfcomm/tty.c2
-rw-r--r--net/bpf/bpf_dummy_struct_ops.c18
-rw-r--r--net/bpf/test_run.c77
-rw-r--r--net/bridge/br_if.c2
-rw-r--r--net/bridge/br_mdb.c66
-rw-r--r--net/bridge/br_multicast.c179
-rw-r--r--net/bridge/br_netfilter_hooks.c3
-rw-r--r--net/bridge/br_netlink.c19
-rw-r--r--net/bridge/br_netlink_tunnel.c3
-rw-r--r--net/bridge/br_private.h12
-rw-r--r--net/bridge/br_switchdev.c10
-rw-r--r--net/bridge/br_vlan.c11
-rw-r--r--net/bridge/br_vlan_options.c27
-rw-r--r--net/bridge/netfilter/ebtables.c2
-rw-r--r--net/bridge/netfilter/nf_conntrack_bridge.c4
-rw-r--r--net/caif/caif_socket.c5
-rw-r--r--net/caif/cfctrl.c6
-rw-r--r--net/can/gw.c7
-rw-r--r--net/can/isotp.c72
-rw-r--r--net/can/j1939/address-claim.c40
-rw-r--r--net/can/j1939/transport.c4
-rw-r--r--net/can/raw.c52
-rw-r--r--net/ceph/messenger.c4
-rw-r--r--net/ceph/messenger_v1.c7
-rw-r--r--net/ceph/messenger_v2.c28
-rw-r--r--net/core/Makefile4
-rw-r--r--net/core/dev.c44
-rw-r--r--net/core/dev.h20
-rw-r--r--net/core/dst.c8
-rw-r--r--net/core/filter.c123
-rw-r--r--net/core/gro.c26
-rw-r--r--net/core/neighbour.c32
-rw-r--r--net/core/net-sysfs.c92
-rw-r--r--net/core/net-traces.c3
-rw-r--r--net/core/net_namespace.c12
-rw-r--r--net/core/netdev-genl-gen.c48
-rw-r--r--net/core/netdev-genl-gen.h23
-rw-r--r--net/core/netdev-genl.c179
-rw-r--r--net/core/netpoll.c12
-rw-r--r--net/core/page_pool.c6
-rw-r--r--net/core/rtnetlink.c35
-rw-r--r--net/core/scm.c2
-rw-r--r--net/core/skbuff.c242
-rw-r--r--net/core/skmsg.c5
-rw-r--r--net/core/sock.c59
-rw-r--r--net/core/sock_map.c61
-rw-r--r--net/core/stream.c1
-rw-r--r--net/core/sysctl_net_core.c111
-rw-r--r--net/core/xdp.c88
-rw-r--r--net/dcb/dcbnl.c272
-rw-r--r--net/dccp/ipv6.c7
-rw-r--r--net/devlink/Makefile3
-rw-r--r--net/devlink/core.c320
-rw-r--r--net/devlink/dev.c1346
-rw-r--r--net/devlink/devl_internal.h239
-rw-r--r--net/devlink/health.c1333
-rw-r--r--net/devlink/leftover.c (renamed from net/core/devlink.c)4629
-rw-r--r--net/devlink/netlink.c251
-rw-r--r--net/dsa/master.c6
-rw-r--r--net/dsa/slave.c50
-rw-r--r--net/dsa/tag_ksz.c216
-rw-r--r--net/ethtool/Makefile4
-rw-r--r--net/ethtool/channels.c92
-rw-r--r--net/ethtool/coalesce.c114
-rw-r--r--net/ethtool/common.c8
-rw-r--r--net/ethtool/common.h2
-rw-r--r--net/ethtool/debug.c71
-rw-r--r--net/ethtool/eee.c78
-rw-r--r--net/ethtool/fec.c83
-rw-r--r--net/ethtool/ioctl.c107
-rw-r--r--net/ethtool/linkinfo.c81
-rw-r--r--net/ethtool/linkmodes.c91
-rw-r--r--net/ethtool/mm.c251
-rw-r--r--net/ethtool/module.c89
-rw-r--r--net/ethtool/netlink.c135
-rw-r--r--net/ethtool/netlink.h74
-rw-r--r--net/ethtool/pause.c125
-rw-r--r--net/ethtool/plca.c248
-rw-r--r--net/ethtool/privflags.c84
-rw-r--r--net/ethtool/pse-pd.c81
-rw-r--r--net/ethtool/rings.c118
-rw-r--r--net/ethtool/rss.c11
-rw-r--r--net/ethtool/stats.c159
-rw-r--r--net/ethtool/wol.c79
-rw-r--r--net/ieee802154/header_ops.c24
-rw-r--r--net/ieee802154/nl802154.c283
-rw-r--r--net/ieee802154/nl802154.h4
-rw-r--r--net/ieee802154/rdev-ops.h56
-rw-r--r--net/ieee802154/trace.h61
-rw-r--r--net/ipv4/Makefile1
-rw-r--r--net/ipv4/af_inet.c12
-rw-r--r--net/ipv4/ah4.c8
-rw-r--r--net/ipv4/bpf_tcp_ca.c3
-rw-r--r--net/ipv4/cipso_ipv4.c2
-rw-r--r--net/ipv4/esp4.c20
-rw-r--r--net/ipv4/fib_semantics.c2
-rw-r--r--net/ipv4/fou_core.c (renamed from net/ipv4/fou.c)47
-rw-r--r--net/ipv4/fou_nl.c48
-rw-r--r--net/ipv4/fou_nl.h25
-rw-r--r--net/ipv4/icmp.c3
-rw-r--r--net/ipv4/inet_connection_sock.c73
-rw-r--r--net/ipv4/inet_hashtables.c39
-rw-r--r--net/ipv4/inet_timewait_sock.c42
-rw-r--r--net/ipv4/ip_input.c2
-rw-r--r--net/ipv4/ip_output.c2
-rw-r--r--net/ipv4/ip_sockglue.c18
-rw-r--r--net/ipv4/metrics.c2
-rw-r--r--net/ipv4/netfilter/Kconfig14
-rw-r--r--net/ipv4/netfilter/Makefile1
-rw-r--r--net/ipv4/netfilter/arp_tables.c4
-rw-r--r--net/ipv4/netfilter/ip_tables.c7
-rw-r--r--net/ipv4/netfilter/ipt_CLUSTERIP.c929
-rw-r--r--net/ipv4/netfilter/nf_reject_ipv4.c1
-rw-r--r--net/ipv4/proc.c8
-rw-r--r--net/ipv4/raw.c21
-rw-r--r--net/ipv4/tcp.c13
-rw-r--r--net/ipv4/tcp_bbr.c16
-rw-r--r--net/ipv4/tcp_bpf.c4
-rw-r--r--net/ipv4/tcp_cong.c10
-rw-r--r--net/ipv4/tcp_cubic.c12
-rw-r--r--net/ipv4/tcp_dctcp.c12
-rw-r--r--net/ipv4/tcp_ipv4.c1
-rw-r--r--net/ipv4/tcp_minisocks.c7
-rw-r--r--net/ipv4/tcp_ulp.c4
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf.c59
-rw-r--r--net/ipv6/af_inet6.c11
-rw-r--r--net/ipv6/ah6.c8
-rw-r--r--net/ipv6/datagram.c2
-rw-r--r--net/ipv6/esp6.c20
-rw-r--r--net/ipv6/icmp.c49
-rw-r--r--net/ipv6/ip6_output.c15
-rw-r--r--net/ipv6/ipv6_sockglue.c12
-rw-r--r--net/ipv6/ndisc.c168
-rw-r--r--net/ipv6/netfilter/ip6_tables.c7
-rw-r--r--net/ipv6/netfilter/ip6t_rpfilter.c4
-rw-r--r--net/ipv6/netfilter/nf_reject_ipv6.c1
-rw-r--r--net/ipv6/proc.c1
-rw-r--r--net/ipv6/raw.c20
-rw-r--r--net/ipv6/route.c34
-rw-r--r--net/ipv6/rpl_iptunnel.c2
-rw-r--r--net/ipv6/seg6_local.c352
-rw-r--r--net/ipv6/tcp_ipv6.c14
-rw-r--r--net/kcm/kcmsock.c3
-rw-r--r--net/key/af_key.c2
-rw-r--r--net/l2tp/l2tp_core.c102
-rw-r--r--net/l2tp/l2tp_ppp.c125
-rw-r--r--net/mac80211/agg-tx.c8
-rw-r--r--net/mac80211/cfg.c93
-rw-r--r--net/mac80211/chan.c2
-rw-r--r--net/mac80211/debugfs_netdev.c3
-rw-r--r--net/mac80211/debugfs_sta.c5
-rw-r--r--net/mac80211/driver-ops.c3
-rw-r--r--net/mac80211/driver-ops.h2
-rw-r--r--net/mac80211/ht.c31
-rw-r--r--net/mac80211/ieee80211_i.h8
-rw-r--r--net/mac80211/iface.c5
-rw-r--r--net/mac80211/link.c3
-rw-r--r--net/mac80211/mlme.c167
-rw-r--r--net/mac80211/rx.c603
-rw-r--r--net/mac80211/sta_info.c14
-rw-r--r--net/mac80211/sta_info.h27
-rw-r--r--net/mac80211/tx.c36
-rw-r--r--net/mac80211/util.c68
-rw-r--r--net/mac80211/vht.c25
-rw-r--r--net/mac802154/Makefile2
-rw-r--r--net/mac802154/cfg.c60
-rw-r--r--net/mac802154/ieee802154_i.h61
-rw-r--r--net/mac802154/iface.c6
-rw-r--r--net/mac802154/llsec.c5
-rw-r--r--net/mac802154/main.c37
-rw-r--r--net/mac802154/rx.c37
-rw-r--r--net/mac802154/scan.c456
-rw-r--r--net/mac802154/tx.c42
-rw-r--r--net/mctp/af_mctp.c16
-rw-r--r--net/mctp/route.c34
-rw-r--r--net/mpls/af_mpls.c4
-rw-r--r--net/mptcp/options.c3
-rw-r--r--net/mptcp/pm.c25
-rw-r--r--net/mptcp/pm_netlink.c73
-rw-r--r--net/mptcp/pm_userspace.c12
-rw-r--r--net/mptcp/protocol.c70
-rw-r--r--net/mptcp/protocol.h12
-rw-r--r--net/mptcp/sockopt.c14
-rw-r--r--net/mptcp/subflow.c43
-rw-r--r--net/mptcp/token.c14
-rw-r--r--net/mptcp/token_test.c3
-rw-r--r--net/netfilter/Kconfig3
-rw-r--r--net/netfilter/Makefile7
-rw-r--r--net/netfilter/core.c19
-rw-r--r--net/netfilter/ipset/Kconfig2
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_ip.c4
-rw-r--r--net/netfilter/ipset/ip_set_core.c7
-rw-r--r--net/netfilter/ipset/ip_set_hash_ip.c14
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipmark.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipport.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportip.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_ipportnet.c13
-rw-r--r--net/netfilter/ipset/ip_set_hash_net.c17
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c15
-rw-r--r--net/netfilter/ipset/ip_set_hash_netnet.c23
-rw-r--r--net/netfilter/ipset/ip_set_hash_netport.c19
-rw-r--r--net/netfilter/ipset/ip_set_hash_netportnet.c40
-rw-r--r--net/netfilter/ipvs/ip_vs_est.c2
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c2
-rw-r--r--net/netfilter/nf_conntrack_bpf.c21
-rw-r--r--net/netfilter/nf_conntrack_core.c94
-rw-r--r--net/netfilter/nf_conntrack_ecache.c2
-rw-r--r--net/netfilter/nf_conntrack_helper.c98
-rw-r--r--net/netfilter/nf_conntrack_netlink.c10
-rw-r--r--net/netfilter/nf_conntrack_ovs.c178
-rw-r--r--net/netfilter/nf_conntrack_proto.c27
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c209
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c59
-rw-r--r--net/netfilter/nf_conntrack_proto_udp.c10
-rw-r--r--net/netfilter/nf_conntrack_standalone.c28
-rw-r--r--net/netfilter/nf_flow_table_core.c5
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nf_flow_table_offload.c18
-rw-r--r--net/netfilter/nf_log_syslog.c2
-rw-r--r--net/netfilter/nf_nat_bpf.c6
-rw-r--r--net/netfilter/nf_tables_api.c377
-rw-r--r--net/netfilter/nf_tables_core.c35
-rw-r--r--net/netfilter/nfnetlink.c9
-rw-r--r--net/netfilter/nft_ct.c39
-rw-r--r--net/netfilter/nft_ct_fast.c56
-rw-r--r--net/netfilter/nft_objref.c12
-rw-r--r--net/netfilter/nft_payload.c2
-rw-r--r--net/netfilter/nft_set_rbtree.c332
-rw-r--r--net/netfilter/xt_length.c5
-rw-r--r--net/netlink/af_netlink.c38
-rw-r--r--net/netlink/genetlink.c4
-rw-r--r--net/netrom/af_netrom.c5
-rw-r--r--net/netrom/nr_timer.c1
-rw-r--r--net/nfc/llcp_core.c1
-rw-r--r--net/nfc/netlink.c56
-rw-r--r--net/openvswitch/Kconfig1
-rw-r--r--net/openvswitch/conntrack.c85
-rw-r--r--net/openvswitch/datapath.c20
-rw-r--r--net/openvswitch/flow.c12
-rw-r--r--net/openvswitch/flow.h2
-rw-r--r--net/openvswitch/flow_table.c10
-rw-r--r--net/openvswitch/meter.c4
-rw-r--r--net/packet/af_packet.c8
-rw-r--r--net/phonet/pep-gprs.c4
-rw-r--r--net/qrtr/ns.c8
-rw-r--r--net/rds/ib_recv.c1
-rw-r--r--net/rds/message.c8
-rw-r--r--net/rds/recv.c1
-rw-r--r--net/rds/tcp_listen.c2
-rw-r--r--net/rds/tcp_recv.c2
-rw-r--r--net/rfkill/core.c16
-rw-r--r--net/rfkill/rfkill-gpio.c20
-rw-r--r--net/rose/af_rose.c8
-rw-r--r--net/rxrpc/Kconfig9
-rw-r--r--net/rxrpc/Makefile1
-rw-r--r--net/rxrpc/af_rxrpc.c29
-rw-r--r--net/rxrpc/ar-internal.h229
-rw-r--r--net/rxrpc/call_accept.c59
-rw-r--r--net/rxrpc/call_event.c101
-rw-r--r--net/rxrpc/call_object.c129
-rw-r--r--net/rxrpc/call_state.c69
-rw-r--r--net/rxrpc/conn_client.c709
-rw-r--r--net/rxrpc/conn_event.c384
-rw-r--r--net/rxrpc/conn_object.c67
-rw-r--r--net/rxrpc/conn_service.c8
-rw-r--r--net/rxrpc/input.c237
-rw-r--r--net/rxrpc/insecure.c20
-rw-r--r--net/rxrpc/io_thread.c252
-rw-r--r--net/rxrpc/local_object.c42
-rw-r--r--net/rxrpc/misc.c7
-rw-r--r--net/rxrpc/net_ns.c17
-rw-r--r--net/rxrpc/output.c139
-rw-r--r--net/rxrpc/peer_object.c23
-rw-r--r--net/rxrpc/proc.c21
-rw-r--r--net/rxrpc/recvmsg.c302
-rw-r--r--net/rxrpc/rxkad.c356
-rw-r--r--net/rxrpc/rxperf.c25
-rw-r--r--net/rxrpc/security.c53
-rw-r--r--net/rxrpc/sendmsg.c195
-rw-r--r--net/rxrpc/skbuff.c4
-rw-r--r--net/rxrpc/sysctl.c17
-rw-r--r--net/rxrpc/txbuf.c12
-rw-r--r--net/sched/Kconfig91
-rw-r--r--net/sched/Makefile7
-rw-r--r--net/sched/act_api.c57
-rw-r--r--net/sched/act_connmark.c107
-rw-r--r--net/sched/act_ct.c141
-rw-r--r--net/sched/act_ctinfo.c6
-rw-r--r--net/sched/act_gate.c30
-rw-r--r--net/sched/act_mirred.c23
-rw-r--r--net/sched/act_mpls.c74
-rw-r--r--net/sched/act_nat.c72
-rw-r--r--net/sched/act_pedit.c330
-rw-r--r--net/sched/act_sample.c11
-rw-r--r--net/sched/cls_api.c304
-rw-r--r--net/sched/cls_flower.c80
-rw-r--r--net/sched/cls_matchall.c6
-rw-r--r--net/sched/cls_rsvp.c26
-rw-r--r--net/sched/cls_rsvp.h764
-rw-r--r--net/sched/cls_rsvp6.c26
-rw-r--r--net/sched/cls_tcindex.c724
-rw-r--r--net/sched/sch_api.c92
-rw-r--r--net/sched/sch_atm.c703
-rw-r--r--net/sched/sch_cake.c2
-rw-r--r--net/sched/sch_cbq.c1727
-rw-r--r--net/sched/sch_dsmark.c518
-rw-r--r--net/sched/sch_gred.c2
-rw-r--r--net/sched/sch_htb.c40
-rw-r--r--net/sched/sch_mqprio.c291
-rw-r--r--net/sched/sch_mqprio_lib.c117
-rw-r--r--net/sched/sch_mqprio_lib.h18
-rw-r--r--net/sched/sch_taprio.c747
-rw-r--r--net/sctp/bind_addr.c6
-rw-r--r--net/sctp/diag.c4
-rw-r--r--net/sctp/ipv6.c2
-rw-r--r--net/sctp/protocol.c2
-rw-r--r--net/sctp/socket.c5
-rw-r--r--net/sctp/stream_sched_prio.c52
-rw-r--r--net/sctp/transport.c4
-rw-r--r--net/smc/af_smc.c40
-rw-r--r--net/smc/smc_clc.c11
-rw-r--r--net/smc/smc_core.c105
-rw-r--r--net/smc/smc_core.h6
-rw-r--r--net/smc/smc_diag.c3
-rw-r--r--net/smc/smc_ism.c180
-rw-r--r--net/smc/smc_ism.h3
-rw-r--r--net/smc/smc_llc.c34
-rw-r--r--net/smc/smc_pnet.c40
-rw-r--r--net/smc/smc_rx.c4
-rw-r--r--net/socket.c48
-rw-r--r--net/sunrpc/.kunitconfig30
-rw-r--r--net/sunrpc/Kconfig100
-rw-r--r--net/sunrpc/auth_gss/Makefile2
-rw-r--r--net/sunrpc/auth_gss/auth_gss.c36
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_crypto.c656
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_internal.h232
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_keys.c416
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_mech.c730
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seal.c122
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_seqnum.c2
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_test.c2040
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_unseal.c63
-rw-r--r--net/sunrpc/auth_gss/gss_krb5_wrap.c124
-rw-r--r--net/sunrpc/auth_gss/svcauth_gss.c1085
-rw-r--r--net/sunrpc/clnt.c2
-rw-r--r--net/sunrpc/netns.h1
-rw-r--r--net/sunrpc/stats.c11
-rw-r--r--net/sunrpc/svc.c164
-rw-r--r--net/sunrpc/svc_xprt.c22
-rw-r--r--net/sunrpc/svcauth.c13
-rw-r--r--net/sunrpc/svcauth_unix.c178
-rw-r--r--net/sunrpc/svcsock.c24
-rw-r--r--net/sunrpc/sysfs.c8
-rw-r--r--net/sunrpc/xdr.c84
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_recvfrom.c1
-rw-r--r--net/sunrpc/xprtrdma/svc_rdma_transport.c7
-rw-r--r--net/sunrpc/xprtsock.c3
-rw-r--r--net/tipc/crypto.c12
-rw-r--r--net/tipc/netlink_compat.c16
-rw-r--r--net/tipc/node.c12
-rw-r--r--net/tipc/socket.c5
-rw-r--r--net/tipc/topsrv.c5
-rw-r--r--net/tls/tls.h2
-rw-r--r--net/tls/tls_sw.c47
-rw-r--r--net/unix/af_unix.c29
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/hyperv_transport.c4
-rw-r--r--net/vmw_vsock/virtio_transport.c149
-rw-r--r--net/vmw_vsock/virtio_transport_common.c422
-rw-r--r--net/vmw_vsock/vsock_loopback.c51
-rw-r--r--net/wireless/ap.c2
-rw-r--r--net/wireless/chan.c69
-rw-r--r--net/wireless/core.h4
-rw-r--r--net/wireless/ibss.c5
-rw-r--r--net/wireless/mlme.c5
-rw-r--r--net/wireless/nl80211.c162
-rw-r--r--net/wireless/nl80211.h2
-rw-r--r--net/wireless/reg.c57
-rw-r--r--net/wireless/sme.c54
-rw-r--r--net/wireless/trace.h309
-rw-r--r--net/wireless/util.c185
-rw-r--r--net/wireless/wext-compat.c2
-rw-r--r--net/wireless/wext-core.c20
-rw-r--r--net/wireless/wext-sme.c2
-rw-r--r--net/x25/af_x25.c6
-rw-r--r--net/xdp/xsk.c73
-rw-r--r--net/xdp/xsk_buff_pool.c7
-rw-r--r--net/xdp/xsk_queue.c11
-rw-r--r--net/xdp/xsk_queue.h1
-rw-r--r--net/xfrm/espintcp.c3
-rw-r--r--net/xfrm/xfrm_compat.c4
-rw-r--r--net/xfrm/xfrm_device.c8
-rw-r--r--net/xfrm/xfrm_input.c3
-rw-r--r--net/xfrm/xfrm_interface_bpf.c7
-rw-r--r--net/xfrm/xfrm_interface_core.c54
-rw-r--r--net/xfrm/xfrm_policy.c14
-rw-r--r--net/xfrm/xfrm_state.c20
428 files changed, 21561 insertions, 18221 deletions
diff --git a/net/9p/trans_xen.c b/net/9p/trans_xen.c
index 70aa613c4cfe..c64050e839ac 100644
--- a/net/9p/trans_xen.c
+++ b/net/9p/trans_xen.c
@@ -305,13 +305,12 @@ static void xen_9pfs_front_free(struct xen_9pfs_front_priv *priv)
kfree(priv);
}
-static int xen_9pfs_front_remove(struct xenbus_device *dev)
+static void xen_9pfs_front_remove(struct xenbus_device *dev)
{
struct xen_9pfs_front_priv *priv = dev_get_drvdata(&dev->dev);
dev_set_drvdata(&dev->dev, NULL);
xen_9pfs_front_free(priv);
- return 0;
}
static int xen_9pfs_front_alloc_dataring(struct xenbus_device *dev,
diff --git a/net/Makefile b/net/Makefile
index 6a62e5b27378..0914bea9c335 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -23,6 +23,7 @@ obj-$(CONFIG_BPFILTER) += bpfilter/
obj-$(CONFIG_PACKET) += packet/
obj-$(CONFIG_NET_KEY) += key/
obj-$(CONFIG_BRIDGE) += bridge/
+obj-$(CONFIG_NET_DEVLINK) += devlink/
obj-$(CONFIG_NET_DSA) += dsa/
obj-$(CONFIG_ATALK) += appletalk/
obj-$(CONFIG_X25) += x25/
diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index 114ee5da261f..828fb393ee94 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -27,7 +27,6 @@
#include <linux/netdevice.h>
#include <linux/netlink.h>
#include <linux/pkt_sched.h>
-#include <linux/prandom.h>
#include <linux/printk.h>
#include <linux/random.h>
#include <linux/rculist.h>
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index f9a58fb5442e..acff565849ae 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -21,7 +21,6 @@
#include <linux/minmax.h>
#include <linux/netdevice.h>
#include <linux/nl80211.h>
-#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index addfd8c4fe95..e710e9afe78f 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -21,7 +21,6 @@
#include <linux/minmax.h>
#include <linux/mutex.h>
#include <linux/netdevice.h>
-#include <linux/prandom.h>
#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
@@ -800,8 +799,8 @@ batadv_v_ogm_process_per_outif(struct batadv_priv *bat_priv,
/* only unknown & newer OGMs contain TVLVs we are interested in */
if (seqno_age > 0 && if_outgoing == BATADV_IF_DEFAULT)
- batadv_tvlv_containers_process(bat_priv, true, orig_node,
- NULL, NULL,
+ batadv_tvlv_containers_process(bat_priv, BATADV_OGM2, orig_node,
+ NULL,
(unsigned char *)(ogm2 + 1),
ntohs(ogm2->tvlv_len));
diff --git a/net/batman-adv/distributed-arp-table.c b/net/batman-adv/distributed-arp-table.c
index fefb51a5f606..6968e55eb971 100644
--- a/net/batman-adv/distributed-arp-table.c
+++ b/net/batman-adv/distributed-arp-table.c
@@ -822,7 +822,7 @@ int batadv_dat_init(struct batadv_priv *bat_priv)
batadv_dat_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_dat_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_DAT, 1,
+ NULL, NULL, BATADV_TVLV_DAT, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
batadv_dat_tvlv_container_update(bat_priv);
return 0;
diff --git a/net/batman-adv/gateway_common.c b/net/batman-adv/gateway_common.c
index 9349c76f30c5..6a964a773f57 100644
--- a/net/batman-adv/gateway_common.c
+++ b/net/batman-adv/gateway_common.c
@@ -259,7 +259,7 @@ void batadv_gw_init(struct batadv_priv *bat_priv)
atomic_set(&bat_priv->gw.sel_class, 1);
batadv_tvlv_handler_register(bat_priv, batadv_gw_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_GW, 1,
+ NULL, NULL, BATADV_TVLV_GW, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
}
diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index c48803b32bb0..156ed39eded1 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
#define BATADV_DRIVER_DEVICE "batman-adv"
#ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2022.3"
+#define BATADV_SOURCE_VERSION "2023.1"
#endif
/* B.A.T.M.A.N. parameters */
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index b238455913df..315394f12c55 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -26,7 +26,6 @@
#include <linux/ipv6.h>
#include <linux/jiffies.h>
#include <linux/kernel.h>
-#include <linux/kref.h>
#include <linux/list.h>
#include <linux/lockdep.h>
#include <linux/netdevice.h>
@@ -1137,222 +1136,19 @@ static int batadv_mcast_forw_rtr_count(struct batadv_priv *bat_priv,
}
/**
- * batadv_mcast_forw_tt_node_get() - get a multicast tt node
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: the ether header containing the multicast destination
- *
- * Return: an orig_node matching the multicast address provided by ethhdr
- * via a translation table lookup. This increases the returned nodes refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_tt_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- return batadv_transtable_search(bat_priv, NULL, ethhdr->h_dest,
- BATADV_NO_FLAGS);
-}
-
-/**
- * batadv_mcast_forw_ipv4_node_get() - get a node with an ipv4 flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 flag set and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ipv4_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_ipv4_list,
- mcast_want_all_ipv4_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_ipv6_node_get() - get a node with an ipv6 flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV6 flag set
- * and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ipv6_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_ipv6_list,
- mcast_want_all_ipv6_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_ip_node_get() - get a node with an ipv4/ipv6 flag
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: an ethernet header to determine the protocol family from
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_IPV4 or
- * BATADV_MCAST_WANT_ALL_IPV6 flag, depending on the provided ethhdr, sets and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_ip_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- switch (ntohs(ethhdr->h_proto)) {
- case ETH_P_IP:
- return batadv_mcast_forw_ipv4_node_get(bat_priv);
- case ETH_P_IPV6:
- return batadv_mcast_forw_ipv6_node_get(bat_priv);
- default:
- /* we shouldn't be here... */
- return NULL;
- }
-}
-
-/**
- * batadv_mcast_forw_unsnoop_node_get() - get a node with an unsnoopable flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_ALL_UNSNOOPABLES flag
- * set and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_unsnoop_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_unsnoopables_list,
- mcast_want_all_unsnoopables_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr4_node_get() - get a node with an ipv4 mcast router flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR4 flag unset and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr4_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_rtr4_list,
- mcast_want_all_rtr4_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr6_node_get() - get a node with an ipv6 mcast router flag
- * @bat_priv: the bat priv with all the soft interface information
- *
- * Return: an orig_node which has the BATADV_MCAST_WANT_NO_RTR6 flag unset
- * and increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr6_node_get(struct batadv_priv *bat_priv)
-{
- struct batadv_orig_node *tmp_orig_node, *orig_node = NULL;
-
- rcu_read_lock();
- hlist_for_each_entry_rcu(tmp_orig_node,
- &bat_priv->mcast.want_all_rtr6_list,
- mcast_want_all_rtr6_node) {
- if (!kref_get_unless_zero(&tmp_orig_node->refcount))
- continue;
-
- orig_node = tmp_orig_node;
- break;
- }
- rcu_read_unlock();
-
- return orig_node;
-}
-
-/**
- * batadv_mcast_forw_rtr_node_get() - get a node with an ipv4/ipv6 router flag
- * @bat_priv: the bat priv with all the soft interface information
- * @ethhdr: an ethernet header to determine the protocol family from
- *
- * Return: an orig_node which has no BATADV_MCAST_WANT_NO_RTR4 or
- * BATADV_MCAST_WANT_NO_RTR6 flag, depending on the provided ethhdr, set and
- * increases its refcount.
- */
-static struct batadv_orig_node *
-batadv_mcast_forw_rtr_node_get(struct batadv_priv *bat_priv,
- struct ethhdr *ethhdr)
-{
- switch (ntohs(ethhdr->h_proto)) {
- case ETH_P_IP:
- return batadv_mcast_forw_rtr4_node_get(bat_priv);
- case ETH_P_IPV6:
- return batadv_mcast_forw_rtr6_node_get(bat_priv);
- default:
- /* we shouldn't be here... */
- return NULL;
- }
-}
-
-/**
* batadv_mcast_forw_mode() - check on how to forward a multicast packet
* @bat_priv: the bat priv with all the soft interface information
- * @skb: The multicast packet to check
- * @orig: an originator to be set to forward the skb to
+ * @skb: the multicast packet to check
* @is_routable: stores whether the destination is routable
*
- * Return: the forwarding mode as enum batadv_forw_mode and in case of
- * BATADV_FORW_SINGLE set the orig to the single originator the skb
- * should be forwarded to.
+ * Return: The forwarding mode as enum batadv_forw_mode.
*/
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **orig, int *is_routable)
+ int *is_routable)
{
int ret, tt_count, ip_count, unsnoop_count, total_count;
bool is_unsnoopable = false;
- unsigned int mcast_fanout;
struct ethhdr *ethhdr;
int rtr_count = 0;
@@ -1361,7 +1157,7 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
if (ret == -ENOMEM)
return BATADV_FORW_NONE;
else if (ret < 0)
- return BATADV_FORW_ALL;
+ return BATADV_FORW_BCAST;
ethhdr = eth_hdr(skb);
@@ -1374,32 +1170,15 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
total_count = tt_count + ip_count + unsnoop_count + rtr_count;
- switch (total_count) {
- case 1:
- if (tt_count)
- *orig = batadv_mcast_forw_tt_node_get(bat_priv, ethhdr);
- else if (ip_count)
- *orig = batadv_mcast_forw_ip_node_get(bat_priv, ethhdr);
- else if (unsnoop_count)
- *orig = batadv_mcast_forw_unsnoop_node_get(bat_priv);
- else if (rtr_count)
- *orig = batadv_mcast_forw_rtr_node_get(bat_priv,
- ethhdr);
-
- if (*orig)
- return BATADV_FORW_SINGLE;
-
- fallthrough;
- case 0:
+ if (!total_count)
return BATADV_FORW_NONE;
- default:
- mcast_fanout = atomic_read(&bat_priv->multicast_fanout);
+ else if (unsnoop_count)
+ return BATADV_FORW_BCAST;
- if (!unsnoop_count && total_count <= mcast_fanout)
- return BATADV_FORW_SOME;
- }
+ if (total_count <= atomic_read(&bat_priv->multicast_fanout))
+ return BATADV_FORW_UCASTS;
- return BATADV_FORW_ALL;
+ return BATADV_FORW_BCAST;
}
/**
@@ -1411,10 +1190,10 @@ batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
*
* Return: NET_XMIT_DROP in case of error or NET_XMIT_SUCCESS otherwise.
*/
-int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node)
+static int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
+ struct sk_buff *skb,
+ unsigned short vid,
+ struct batadv_orig_node *orig_node)
{
/* Avoid sending multicast-in-unicast packets to other BLA
* gateways - they already got the frame from the LAN side
@@ -2039,7 +1818,7 @@ static void batadv_mcast_tvlv_ogm_handler(struct batadv_priv *bat_priv,
void batadv_mcast_init(struct batadv_priv *bat_priv)
{
batadv_tvlv_handler_register(bat_priv, batadv_mcast_tvlv_ogm_handler,
- NULL, BATADV_TVLV_MCAST, 2,
+ NULL, NULL, BATADV_TVLV_MCAST, 2,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
INIT_DELAYED_WORK(&bat_priv->mcast.work, batadv_mcast_mla_update);
diff --git a/net/batman-adv/multicast.h b/net/batman-adv/multicast.h
index 8aec818d0bf6..a9770d8d6d36 100644
--- a/net/batman-adv/multicast.h
+++ b/net/batman-adv/multicast.h
@@ -17,23 +17,16 @@
*/
enum batadv_forw_mode {
/**
- * @BATADV_FORW_ALL: forward the packet to all nodes (currently via
- * classic flooding)
+ * @BATADV_FORW_BCAST: forward the packet to all nodes via a batman-adv
+ * broadcast packet
*/
- BATADV_FORW_ALL,
+ BATADV_FORW_BCAST,
/**
- * @BATADV_FORW_SOME: forward the packet to some nodes (currently via
- * a multicast-to-unicast conversion and the BATMAN unicast routing
- * protocol)
+ * @BATADV_FORW_UCASTS: forward the packet to some nodes via one
+ * or more batman-adv unicast packets
*/
- BATADV_FORW_SOME,
-
- /**
- * @BATADV_FORW_SINGLE: forward the packet to a single node (currently
- * via the BATMAN unicast routing protocol)
- */
- BATADV_FORW_SINGLE,
+ BATADV_FORW_UCASTS,
/** @BATADV_FORW_NONE: don't forward, drop it */
BATADV_FORW_NONE,
@@ -43,14 +36,8 @@ enum batadv_forw_mode {
enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **mcast_single_orig,
int *is_routable);
-int batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node);
-
int batadv_mcast_forw_send(struct batadv_priv *bat_priv, struct sk_buff *skb,
unsigned short vid, int is_routable);
@@ -69,20 +56,9 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig_node);
static inline enum batadv_forw_mode
batadv_mcast_forw_mode(struct batadv_priv *bat_priv, struct sk_buff *skb,
- struct batadv_orig_node **mcast_single_orig,
int *is_routable)
{
- return BATADV_FORW_ALL;
-}
-
-static inline int
-batadv_mcast_forw_send_orig(struct batadv_priv *bat_priv,
- struct sk_buff *skb,
- unsigned short vid,
- struct batadv_orig_node *orig_node)
-{
- kfree_skb(skb);
- return NET_XMIT_DROP;
+ return BATADV_FORW_BCAST;
}
static inline int
diff --git a/net/batman-adv/network-coding.c b/net/batman-adv/network-coding.c
index bf29fba4dde5..71ebd0284f95 100644
--- a/net/batman-adv/network-coding.c
+++ b/net/batman-adv/network-coding.c
@@ -25,8 +25,8 @@
#include <linux/lockdep.h>
#include <linux/net.h>
#include <linux/netdevice.h>
-#include <linux/prandom.h>
#include <linux/printk.h>
+#include <linux/random.h>
#include <linux/rculist.h>
#include <linux/rcupdate.h>
#include <linux/skbuff.h>
@@ -160,7 +160,7 @@ int batadv_nc_mesh_init(struct batadv_priv *bat_priv)
batadv_nc_start_timer(bat_priv);
batadv_tvlv_handler_register(bat_priv, batadv_nc_tvlv_ogm_handler_v1,
- NULL, BATADV_TVLV_NC, 1,
+ NULL, NULL, BATADV_TVLV_NC, 1,
BATADV_TVLV_HANDLER_OGM_CIFNOTFND);
batadv_nc_tvlv_container_update(bat_priv);
return 0;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 83f31494ea4d..163cd43c4821 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -1073,10 +1073,9 @@ int batadv_recv_unicast_tvlv(struct sk_buff *skb,
if (tvlv_buff_len > skb->len - hdr_size)
goto free_skb;
- ret = batadv_tvlv_containers_process(bat_priv, false, NULL,
- unicast_tvlv_packet->src,
- unicast_tvlv_packet->dst,
- tvlv_buff, tvlv_buff_len);
+ ret = batadv_tvlv_containers_process(bat_priv, BATADV_UNICAST_TVLV,
+ NULL, skb, tvlv_buff,
+ tvlv_buff_len);
if (ret != NET_RX_SUCCESS) {
ret = batadv_route_unicast_packet(skb, recv_if);
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 0f5c0679b55a..125f4628687c 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -48,7 +48,6 @@
#include "hard-interface.h"
#include "multicast.h"
#include "network-coding.h"
-#include "originator.h"
#include "send.h"
#include "translation-table.h"
@@ -196,8 +195,7 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
unsigned short vid;
u32 seqno;
int gw_mode;
- enum batadv_forw_mode forw_mode = BATADV_FORW_SINGLE;
- struct batadv_orig_node *mcast_single_orig = NULL;
+ enum batadv_forw_mode forw_mode = BATADV_FORW_BCAST;
int mcast_is_routable = 0;
int network_offset = ETH_HLEN;
__be16 proto;
@@ -301,14 +299,18 @@ static netdev_tx_t batadv_interface_tx(struct sk_buff *skb,
send:
if (do_bcast && !is_broadcast_ether_addr(ethhdr->h_dest)) {
forw_mode = batadv_mcast_forw_mode(bat_priv, skb,
- &mcast_single_orig,
&mcast_is_routable);
- if (forw_mode == BATADV_FORW_NONE)
- goto dropped;
-
- if (forw_mode == BATADV_FORW_SINGLE ||
- forw_mode == BATADV_FORW_SOME)
+ switch (forw_mode) {
+ case BATADV_FORW_BCAST:
+ break;
+ case BATADV_FORW_UCASTS:
do_bcast = false;
+ break;
+ case BATADV_FORW_NONE:
+ fallthrough;
+ default:
+ goto dropped;
+ }
}
}
@@ -357,10 +359,7 @@ send:
if (ret)
goto dropped;
ret = batadv_send_skb_via_gw(bat_priv, skb, vid);
- } else if (mcast_single_orig) {
- ret = batadv_mcast_forw_send_orig(bat_priv, skb, vid,
- mcast_single_orig);
- } else if (forw_mode == BATADV_FORW_SOME) {
+ } else if (forw_mode == BATADV_FORW_UCASTS) {
ret = batadv_mcast_forw_send(bat_priv, skb, vid,
mcast_is_routable);
} else {
@@ -386,7 +385,6 @@ dropped:
dropped_freed:
batadv_inc_counter(bat_priv, BATADV_CNT_TX_DROPPED);
end:
- batadv_orig_node_put(mcast_single_orig);
batadv_hardif_put(primary_if);
return NETDEV_TX_OK;
}
diff --git a/net/batman-adv/translation-table.c b/net/batman-adv/translation-table.c
index 01d30c1e412c..36ca31252a73 100644
--- a/net/batman-adv/translation-table.c
+++ b/net/batman-adv/translation-table.c
@@ -4168,11 +4168,11 @@ int batadv_tt_init(struct batadv_priv *bat_priv)
}
batadv_tvlv_handler_register(bat_priv, batadv_tt_tvlv_ogm_handler_v1,
- batadv_tt_tvlv_unicast_handler_v1,
+ batadv_tt_tvlv_unicast_handler_v1, NULL,
BATADV_TVLV_TT, 1, BATADV_NO_FLAGS);
batadv_tvlv_handler_register(bat_priv, NULL,
- batadv_roam_tvlv_unicast_handler_v1,
+ batadv_roam_tvlv_unicast_handler_v1, NULL,
BATADV_TVLV_ROAM, 1, BATADV_NO_FLAGS);
INIT_DELAYED_WORK(&bat_priv->tt.work, batadv_tt_purge);
diff --git a/net/batman-adv/tvlv.c b/net/batman-adv/tvlv.c
index 7ec2e2343884..2a583215d439 100644
--- a/net/batman-adv/tvlv.c
+++ b/net/batman-adv/tvlv.c
@@ -352,10 +352,9 @@ end:
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
* @tvlv_handler: tvlv callback function handling the tvlv content
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @packet_type: indicates for which packet type the TVLV handler is called
* @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
+ * @skb: the skb the TVLV handler is called for
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
@@ -364,15 +363,20 @@ end:
*/
static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
struct batadv_tvlv_handler *tvlv_handler,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
+ struct sk_buff *skb, void *tvlv_value,
+ u16 tvlv_value_len)
{
+ unsigned int tvlv_offset;
+ u8 *src, *dst;
+
if (!tvlv_handler)
return NET_RX_SUCCESS;
- if (ogm_source) {
+ switch (packet_type) {
+ case BATADV_IV_OGM:
+ case BATADV_OGM2:
if (!tvlv_handler->ogm_handler)
return NET_RX_SUCCESS;
@@ -383,19 +387,32 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
BATADV_NO_FLAGS,
tvlv_value, tvlv_value_len);
tvlv_handler->flags |= BATADV_TVLV_HANDLER_OGM_CALLED;
- } else {
- if (!src)
- return NET_RX_SUCCESS;
-
- if (!dst)
+ break;
+ case BATADV_UNICAST_TVLV:
+ if (!skb)
return NET_RX_SUCCESS;
if (!tvlv_handler->unicast_handler)
return NET_RX_SUCCESS;
+ src = ((struct batadv_unicast_tvlv_packet *)skb->data)->src;
+ dst = ((struct batadv_unicast_tvlv_packet *)skb->data)->dst;
+
return tvlv_handler->unicast_handler(bat_priv, src,
dst, tvlv_value,
tvlv_value_len);
+ case BATADV_MCAST:
+ if (!skb)
+ return NET_RX_SUCCESS;
+
+ if (!tvlv_handler->mcast_handler)
+ return NET_RX_SUCCESS;
+
+ tvlv_offset = (unsigned char *)tvlv_value - skb->data;
+ skb_set_network_header(skb, tvlv_offset);
+ skb_set_transport_header(skb, tvlv_offset + tvlv_value_len);
+
+ return tvlv_handler->mcast_handler(bat_priv, skb);
}
return NET_RX_SUCCESS;
@@ -405,10 +422,9 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* batadv_tvlv_containers_process() - parse the given tvlv buffer to call the
* appropriate handlers
* @bat_priv: the bat priv with all the soft interface information
- * @ogm_source: flag indicating whether the tvlv is an ogm or a unicast packet
+ * @packet_type: indicates for which packet type the TVLV handler is called
* @orig_node: orig node emitting the ogm packet
- * @src: source mac address of the unicast packet
- * @dst: destination mac address of the unicast packet
+ * @skb: the skb the TVLV handler is called for
* @tvlv_value: tvlv content
* @tvlv_value_len: tvlv content length
*
@@ -416,10 +432,10 @@ static int batadv_tvlv_call_handler(struct batadv_priv *bat_priv,
* handler callbacks.
*/
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_value, u16 tvlv_value_len)
+ struct sk_buff *skb, void *tvlv_value,
+ u16 tvlv_value_len)
{
struct batadv_tvlv_handler *tvlv_handler;
struct batadv_tvlv_hdr *tvlv_hdr;
@@ -441,20 +457,24 @@ int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
tvlv_hdr->version);
ret |= batadv_tvlv_call_handler(bat_priv, tvlv_handler,
- ogm_source, orig_node,
- src, dst, tvlv_value,
+ packet_type, orig_node, skb,
+ tvlv_value,
tvlv_value_cont_len);
batadv_tvlv_handler_put(tvlv_handler);
tvlv_value = (u8 *)tvlv_value + tvlv_value_cont_len;
tvlv_value_len -= tvlv_value_cont_len;
}
- if (!ogm_source)
+ if (packet_type != BATADV_IV_OGM &&
+ packet_type != BATADV_OGM2)
return ret;
rcu_read_lock();
hlist_for_each_entry_rcu(tvlv_handler,
&bat_priv->tvlv.handler_list, list) {
+ if (!tvlv_handler->ogm_handler)
+ continue;
+
if ((tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CIFNOTFND) &&
!(tvlv_handler->flags & BATADV_TVLV_HANDLER_OGM_CALLED))
tvlv_handler->ogm_handler(bat_priv, orig_node,
@@ -490,7 +510,7 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
tvlv_value = batadv_ogm_packet + 1;
- batadv_tvlv_containers_process(bat_priv, true, orig_node, NULL, NULL,
+ batadv_tvlv_containers_process(bat_priv, BATADV_IV_OGM, orig_node, NULL,
tvlv_value, tvlv_value_len);
}
@@ -504,6 +524,10 @@ void batadv_tvlv_ogm_receive(struct batadv_priv *bat_priv,
* @uptr: unicast tvlv handler callback function. This function receives the
* source & destination of the unicast packet as well as the tvlv content
* to process.
+ * @mptr: multicast packet tvlv handler callback function. This function
+ * receives the full skb to process, with the skb network header pointing
+ * to the current tvlv and the skb transport header pointing to the first
+ * byte after the current tvlv.
* @type: tvlv handler type to be registered
* @version: tvlv handler version to be registered
* @flags: flags to enable or disable TVLV API behavior
@@ -518,6 +542,8 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
u8 *src, u8 *dst,
void *tvlv_value,
u16 tvlv_value_len),
+ int (*mptr)(struct batadv_priv *bat_priv,
+ struct sk_buff *skb),
u8 type, u8 version, u8 flags)
{
struct batadv_tvlv_handler *tvlv_handler;
@@ -539,6 +565,7 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
tvlv_handler->ogm_handler = optr;
tvlv_handler->unicast_handler = uptr;
+ tvlv_handler->mcast_handler = mptr;
tvlv_handler->type = type;
tvlv_handler->version = version;
tvlv_handler->flags = flags;
diff --git a/net/batman-adv/tvlv.h b/net/batman-adv/tvlv.h
index 4cf8af00fc11..e5697230d991 100644
--- a/net/batman-adv/tvlv.h
+++ b/net/batman-adv/tvlv.h
@@ -9,6 +9,7 @@
#include "main.h"
+#include <linux/skbuff.h>
#include <linux/types.h>
#include <uapi/linux/batadv_packet.h>
@@ -34,14 +35,16 @@ void batadv_tvlv_handler_register(struct batadv_priv *bat_priv,
u8 *src, u8 *dst,
void *tvlv_value,
u16 tvlv_value_len),
+ int (*mptr)(struct batadv_priv *bat_priv,
+ struct sk_buff *skb),
u8 type, u8 version, u8 flags);
void batadv_tvlv_handler_unregister(struct batadv_priv *bat_priv,
u8 type, u8 version);
int batadv_tvlv_containers_process(struct batadv_priv *bat_priv,
- bool ogm_source,
+ u8 packet_type,
struct batadv_orig_node *orig_node,
- u8 *src, u8 *dst,
- void *tvlv_buff, u16 tvlv_buff_len);
+ struct sk_buff *skb, void *tvlv_buff,
+ u16 tvlv_buff_len);
void batadv_tvlv_unicast_send(struct batadv_priv *bat_priv, const u8 *src,
const u8 *dst, u8 type, u8 version,
void *tvlv_value, u16 tvlv_value_len);
diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 758cd797a063..ca9449ec9836 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -2335,6 +2335,12 @@ struct batadv_tvlv_handler {
u8 *src, u8 *dst,
void *tvlv_value, u16 tvlv_value_len);
+ /**
+ * @mcast_handler: handler callback which is given the tvlv payload to
+ * process on incoming mcast packet
+ */
+ int (*mcast_handler)(struct batadv_priv *bat_priv, struct sk_buff *skb);
+
/** @type: tvlv type this handler feels responsible for */
u8 type;
diff --git a/net/bluetooth/ecdh_helper.c b/net/bluetooth/ecdh_helper.c
index 989401f116e9..0efc93fdae8a 100644
--- a/net/bluetooth/ecdh_helper.c
+++ b/net/bluetooth/ecdh_helper.c
@@ -25,22 +25,6 @@
#include <linux/scatterlist.h>
#include <crypto/ecdh.h>
-struct ecdh_completion {
- struct completion completion;
- int err;
-};
-
-static void ecdh_complete(struct crypto_async_request *req, int err)
-{
- struct ecdh_completion *res = req->data;
-
- if (err == -EINPROGRESS)
- return;
-
- res->err = err;
- complete(&res->completion);
-}
-
static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
{
int i;
@@ -60,9 +44,9 @@ static inline void swap_digits(u64 *in, u64 *out, unsigned int ndigits)
int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
u8 secret[32])
{
+ DECLARE_CRYPTO_WAIT(result);
struct kpp_request *req;
u8 *tmp;
- struct ecdh_completion result;
struct scatterlist src, dst;
int err;
@@ -76,8 +60,6 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
goto free_tmp;
}
- init_completion(&result.completion);
-
swap_digits((u64 *)public_key, (u64 *)tmp, 4); /* x */
swap_digits((u64 *)&public_key[32], (u64 *)&tmp[32], 4); /* y */
@@ -86,12 +68,9 @@ int compute_ecdh_secret(struct crypto_kpp *tfm, const u8 public_key[64],
kpp_request_set_input(req, &src, 64);
kpp_request_set_output(req, &dst, 32);
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- ecdh_complete, &result);
+ crypto_req_done, &result);
err = crypto_kpp_compute_shared_secret(req);
- if (err == -EINPROGRESS) {
- wait_for_completion(&result.completion);
- err = result.err;
- }
+ err = crypto_wait_req(err, &result);
if (err < 0) {
pr_err("alg: ecdh: compute shared secret failed. err %d\n",
err);
@@ -165,9 +144,9 @@ free_tmp:
*/
int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
{
+ DECLARE_CRYPTO_WAIT(result);
struct kpp_request *req;
u8 *tmp;
- struct ecdh_completion result;
struct scatterlist dst;
int err;
@@ -181,18 +160,14 @@ int generate_ecdh_public_key(struct crypto_kpp *tfm, u8 public_key[64])
goto free_tmp;
}
- init_completion(&result.completion);
sg_init_one(&dst, tmp, 64);
kpp_request_set_input(req, NULL, 0);
kpp_request_set_output(req, &dst, 64);
kpp_request_set_callback(req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- ecdh_complete, &result);
+ crypto_req_done, &result);
err = crypto_kpp_generate_public_key(req);
- if (err == -EINPROGRESS) {
- wait_for_completion(&result.completion);
- err = result.err;
- }
+ err = crypto_wait_req(err, &result);
if (err < 0)
goto free_all;
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index d3e542c2fc3e..17b946f9ba31 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -821,6 +821,7 @@ static void terminate_big_destroy(struct hci_dev *hdev, void *data, int err)
static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x bis 0x%2.2x", big, bis);
@@ -831,8 +832,12 @@ static int hci_le_terminate_big(struct hci_dev *hdev, u8 big, u8 bis)
d->big = big;
d->bis = bis;
- return hci_cmd_sync_queue(hdev, terminate_big_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, terminate_big_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
static int big_terminate_sync(struct hci_dev *hdev, void *data)
@@ -857,6 +862,7 @@ static int big_terminate_sync(struct hci_dev *hdev, void *data)
static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
{
struct iso_list_data *d;
+ int ret;
bt_dev_dbg(hdev, "big 0x%2.2x sync_handle 0x%4.4x", big, sync_handle);
@@ -867,8 +873,12 @@ static int hci_le_big_terminate(struct hci_dev *hdev, u8 big, u16 sync_handle)
d->big = big;
d->sync_handle = sync_handle;
- return hci_cmd_sync_queue(hdev, big_terminate_sync, d,
- terminate_big_destroy);
+ ret = hci_cmd_sync_queue(hdev, big_terminate_sync, d,
+ terminate_big_destroy);
+ if (ret)
+ kfree(d);
+
+ return ret;
}
/* Cleanup BIS connection
@@ -1051,8 +1061,15 @@ int hci_conn_del(struct hci_conn *conn)
if (conn->type == ACL_LINK) {
struct hci_conn *sco = conn->link;
- if (sco)
+ if (sco) {
sco->link = NULL;
+ /* Due to race, SCO connection might be not established
+ * yet at this point. Delete it now, otherwise it is
+ * possible for it to be stuck and can't be deleted.
+ */
+ if (sco->handle == HCI_CONN_HANDLE_UNSET)
+ hci_conn_del(sco);
+ }
/* Unacked frames */
hdev->acl_cnt += conn->sent;
@@ -1233,6 +1250,8 @@ static void create_le_conn_complete(struct hci_dev *hdev, void *data, int err)
if (conn != hci_lookup_le_connect(hdev))
goto done;
+ /* Flush to make sure we send create conn cancel command if needed */
+ flush_delayed_work(&conn->le_conn_timeout);
hci_conn_failed(conn, bt_status(err));
done:
@@ -1971,16 +1990,14 @@ static void hci_iso_qos_setup(struct hci_dev *hdev, struct hci_conn *conn,
qos->latency = conn->le_conn_latency;
}
-static struct hci_conn *hci_bind_bis(struct hci_conn *conn,
- struct bt_iso_qos *qos)
+static void hci_bind_bis(struct hci_conn *conn,
+ struct bt_iso_qos *qos)
{
/* Update LINK PHYs according to QoS preference */
conn->le_tx_phy = qos->out.phy;
conn->le_tx_phy = qos->out.phy;
conn->iso_qos = *qos;
conn->state = BT_BOUND;
-
- return conn;
}
static int create_big_sync(struct hci_dev *hdev, void *data)
@@ -2109,11 +2126,7 @@ struct hci_conn *hci_connect_bis(struct hci_dev *hdev, bdaddr_t *dst,
if (IS_ERR(conn))
return conn;
- conn = hci_bind_bis(conn, qos);
- if (!conn) {
- hci_conn_drop(conn);
- return ERR_PTR(-ENOMEM);
- }
+ hci_bind_bis(conn, qos);
/* Add Basic Announcement into Peridic Adv Data if BASE is set */
if (base_len && base) {
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0594af4e37ca..ad92a4be5851 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -3848,8 +3848,11 @@ static u8 hci_cc_le_set_cig_params(struct hci_dev *hdev, void *data,
conn->handle, conn->link);
/* Create CIS if LE is already connected */
- if (conn->link && conn->link->state == BT_CONNECTED)
+ if (conn->link && conn->link->state == BT_CONNECTED) {
+ rcu_read_unlock();
hci_le_create_cis(conn->link);
+ rcu_read_lock();
+ }
if (i == rp->num_handles)
break;
diff --git a/net/bluetooth/hci_sync.c b/net/bluetooth/hci_sync.c
index 9e2d7e4b850c..117eedb6f709 100644
--- a/net/bluetooth/hci_sync.c
+++ b/net/bluetooth/hci_sync.c
@@ -3572,7 +3572,7 @@ static const struct hci_init_stage hci_init2[] = {
static int hci_le_read_buffer_size_sync(struct hci_dev *hdev)
{
/* Use Read LE Buffer Size V2 if supported */
- if (hdev->commands[41] & 0x20)
+ if (iso_capable(hdev) && hdev->commands[41] & 0x20)
return __hci_cmd_sync_status(hdev,
HCI_OP_LE_READ_BUFFER_SIZE_V2,
0, NULL, HCI_CMD_TIMEOUT);
@@ -3597,10 +3597,10 @@ static int hci_le_read_supported_states_sync(struct hci_dev *hdev)
/* LE Controller init stage 2 command sequence */
static const struct hci_init_stage le_init2[] = {
- /* HCI_OP_LE_READ_BUFFER_SIZE */
- HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_LOCAL_FEATURES */
HCI_INIT(hci_le_read_local_features_sync),
+ /* HCI_OP_LE_READ_BUFFER_SIZE */
+ HCI_INIT(hci_le_read_buffer_size_sync),
/* HCI_OP_LE_READ_SUPPORTED_STATES */
HCI_INIT(hci_le_read_supported_states_sync),
{}
@@ -6187,20 +6187,13 @@ int hci_get_random_address(struct hci_dev *hdev, bool require_privacy,
static int _update_adv_data_sync(struct hci_dev *hdev, void *data)
{
- u8 instance = *(u8 *)data;
-
- kfree(data);
+ u8 instance = PTR_ERR(data);
return hci_update_adv_data_sync(hdev, instance);
}
int hci_update_adv_data(struct hci_dev *hdev, u8 instance)
{
- u8 *inst_ptr = kmalloc(1, GFP_KERNEL);
-
- if (!inst_ptr)
- return -ENOMEM;
-
- *inst_ptr = instance;
- return hci_cmd_sync_queue(hdev, _update_adv_data_sync, inst_ptr, NULL);
+ return hci_cmd_sync_queue(hdev, _update_adv_data_sync,
+ ERR_PTR(instance), NULL);
}
diff --git a/net/bluetooth/hidp/Kconfig b/net/bluetooth/hidp/Kconfig
index 14100f341f33..6746be07e222 100644
--- a/net/bluetooth/hidp/Kconfig
+++ b/net/bluetooth/hidp/Kconfig
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
config BT_HIDP
tristate "HIDP protocol support"
- depends on BT_BREDR && INPUT
+ depends on BT_BREDR && INPUT && HID_SUPPORT
select HID
help
HIDP (Human Interface Device Protocol) is a transport layer
diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c
index cc20e706c639..bed1a7b9205c 100644
--- a/net/bluetooth/hidp/core.c
+++ b/net/bluetooth/hidp/core.c
@@ -739,7 +739,7 @@ static void hidp_stop(struct hid_device *hid)
hid->claimed = 0;
}
-struct hid_ll_driver hidp_hid_driver = {
+static const struct hid_ll_driver hidp_hid_driver = {
.parse = hidp_parse,
.start = hidp_start,
.stop = hidp_stop,
@@ -748,7 +748,6 @@ struct hid_ll_driver hidp_hid_driver = {
.raw_request = hidp_raw_request,
.output_report = hidp_output_report,
};
-EXPORT_SYMBOL_GPL(hidp_hid_driver);
/* This function sets up the hid device. It does not add it
to the HID system. That is done in hidp_add_connection(). */
diff --git a/net/bluetooth/iso.c b/net/bluetooth/iso.c
index 035bb5d25f85..24444b502e58 100644
--- a/net/bluetooth/iso.c
+++ b/net/bluetooth/iso.c
@@ -289,15 +289,15 @@ static int iso_connect_bis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -306,7 +306,6 @@ static int iso_connect_bis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -372,15 +371,15 @@ static int iso_connect_cis(struct sock *sk)
hci_dev_unlock(hdev);
hci_dev_put(hdev);
+ err = iso_chan_add(conn, sk, NULL);
+ if (err)
+ return err;
+
lock_sock(sk);
/* Update source addr of the socket */
bacpy(&iso_pi(sk)->src, &hcon->src);
- err = iso_chan_add(conn, sk, NULL);
- if (err)
- goto release;
-
if (hcon->state == BT_CONNECTED) {
iso_sock_clear_timer(sk);
sk->sk_state = BT_CONNECTED;
@@ -392,7 +391,6 @@ static int iso_connect_cis(struct sock *sk)
iso_sock_set_timer(sk, sk->sk_sndtimeo);
}
-release:
release_sock(sk);
return err;
@@ -895,13 +893,10 @@ static int iso_listen_bis(struct sock *sk)
if (!hdev)
return -EHOSTUNREACH;
- hci_dev_lock(hdev);
-
err = hci_pa_create_sync(hdev, &iso_pi(sk)->dst,
le_addr_type(iso_pi(sk)->dst_type),
iso_pi(sk)->bc_sid);
- hci_dev_unlock(hdev);
hci_dev_put(hdev);
return err;
@@ -1432,33 +1427,29 @@ static void iso_conn_ready(struct iso_conn *conn)
struct sock *parent;
struct sock *sk = conn->sk;
struct hci_ev_le_big_sync_estabilished *ev;
+ struct hci_conn *hcon;
BT_DBG("conn %p", conn);
if (sk) {
iso_sock_ready(conn->sk);
} else {
- iso_conn_lock(conn);
-
- if (!conn->hcon) {
- iso_conn_unlock(conn);
+ hcon = conn->hcon;
+ if (!hcon)
return;
- }
- ev = hci_recv_event_data(conn->hcon->hdev,
+ ev = hci_recv_event_data(hcon->hdev,
HCI_EVT_LE_BIG_SYNC_ESTABILISHED);
if (ev)
- parent = iso_get_sock_listen(&conn->hcon->src,
- &conn->hcon->dst,
+ parent = iso_get_sock_listen(&hcon->src,
+ &hcon->dst,
iso_match_big, ev);
else
- parent = iso_get_sock_listen(&conn->hcon->src,
+ parent = iso_get_sock_listen(&hcon->src,
BDADDR_ANY, NULL, NULL);
- if (!parent) {
- iso_conn_unlock(conn);
+ if (!parent)
return;
- }
lock_sock(parent);
@@ -1466,30 +1457,29 @@ static void iso_conn_ready(struct iso_conn *conn)
BTPROTO_ISO, GFP_ATOMIC, 0);
if (!sk) {
release_sock(parent);
- iso_conn_unlock(conn);
return;
}
iso_sock_init(sk, parent);
- bacpy(&iso_pi(sk)->src, &conn->hcon->src);
- iso_pi(sk)->src_type = conn->hcon->src_type;
+ bacpy(&iso_pi(sk)->src, &hcon->src);
+ iso_pi(sk)->src_type = hcon->src_type;
/* If hcon has no destination address (BDADDR_ANY) it means it
* was created by HCI_EV_LE_BIG_SYNC_ESTABILISHED so we need to
* initialize using the parent socket destination address.
*/
- if (!bacmp(&conn->hcon->dst, BDADDR_ANY)) {
- bacpy(&conn->hcon->dst, &iso_pi(parent)->dst);
- conn->hcon->dst_type = iso_pi(parent)->dst_type;
- conn->hcon->sync_handle = iso_pi(parent)->sync_handle;
+ if (!bacmp(&hcon->dst, BDADDR_ANY)) {
+ bacpy(&hcon->dst, &iso_pi(parent)->dst);
+ hcon->dst_type = iso_pi(parent)->dst_type;
+ hcon->sync_handle = iso_pi(parent)->sync_handle;
}
- bacpy(&iso_pi(sk)->dst, &conn->hcon->dst);
- iso_pi(sk)->dst_type = conn->hcon->dst_type;
+ bacpy(&iso_pi(sk)->dst, &hcon->dst);
+ iso_pi(sk)->dst_type = hcon->dst_type;
- hci_conn_hold(conn->hcon);
- __iso_chan_add(conn, sk, parent);
+ hci_conn_hold(hcon);
+ iso_chan_add(conn, sk, parent);
if (test_bit(BT_SK_DEFER_SETUP, &bt_sk(parent)->flags))
sk->sk_state = BT_CONNECT2;
@@ -1500,8 +1490,6 @@ static void iso_conn_ready(struct iso_conn *conn)
parent->sk_data_ready(parent);
release_sock(parent);
-
- iso_conn_unlock(conn);
}
}
diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index a3e0dc6a6e73..adfc3ea06d08 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -2683,14 +2683,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (IS_ERR(skb))
return PTR_ERR(skb);
- /* Channel lock is released before requesting new skb and then
- * reacquired thus we need to recheck channel state.
- */
- if (chan->state != BT_CONNECTED) {
- kfree_skb(skb);
- return -ENOTCONN;
- }
-
l2cap_do_send(chan, skb);
return len;
}
@@ -2735,14 +2727,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
if (IS_ERR(skb))
return PTR_ERR(skb);
- /* Channel lock is released before requesting new skb and then
- * reacquired thus we need to recheck channel state.
- */
- if (chan->state != BT_CONNECTED) {
- kfree_skb(skb);
- return -ENOTCONN;
- }
-
l2cap_do_send(chan, skb);
err = len;
break;
@@ -2763,14 +2747,6 @@ int l2cap_chan_send(struct l2cap_chan *chan, struct msghdr *msg, size_t len)
*/
err = l2cap_segment_sdu(chan, &seg_queue, msg, len);
- /* The channel could have been closed while segmenting,
- * check that it is still connected.
- */
- if (chan->state != BT_CONNECTED) {
- __skb_queue_purge(&seg_queue);
- err = -ENOTCONN;
- }
-
if (err)
break;
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index ca8f07f3542b..eebe256104bc 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1624,6 +1624,14 @@ static struct sk_buff *l2cap_sock_alloc_skb_cb(struct l2cap_chan *chan,
if (!skb)
return ERR_PTR(err);
+ /* Channel lock is released before requesting new skb and then
+ * reacquired thus we need to recheck channel state.
+ */
+ if (chan->state != BT_CONNECTED) {
+ kfree_skb(skb);
+ return ERR_PTR(-ENOTCONN);
+ }
+
skb->priority = sk->sk_priority;
bt_cb(skb)->l2cap.chan = chan;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index d2ea8e19aa1b..7add66f30e4d 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -859,6 +859,12 @@ static u32 get_supported_settings(struct hci_dev *hdev)
hdev->set_bdaddr)
settings |= MGMT_SETTING_CONFIGURATION;
+ if (cis_central_capable(hdev))
+ settings |= MGMT_SETTING_CIS_CENTRAL;
+
+ if (cis_peripheral_capable(hdev))
+ settings |= MGMT_SETTING_CIS_PERIPHERAL;
+
settings |= MGMT_SETTING_PHY_CONFIGURATION;
return settings;
@@ -932,6 +938,12 @@ static u32 get_current_settings(struct hci_dev *hdev)
if (hci_dev_test_flag(hdev, HCI_WIDEBAND_SPEECH_ENABLED))
settings |= MGMT_SETTING_WIDEBAND_SPEECH;
+ if (cis_central_capable(hdev))
+ settings |= MGMT_SETTING_CIS_CENTRAL;
+
+ if (cis_peripheral_capable(hdev))
+ settings |= MGMT_SETTING_CIS_PERIPHERAL;
+
return settings;
}
diff --git a/net/bluetooth/mgmt_util.h b/net/bluetooth/mgmt_util.h
index 6a8b7e84293d..bdf978605d5a 100644
--- a/net/bluetooth/mgmt_util.h
+++ b/net/bluetooth/mgmt_util.h
@@ -27,7 +27,7 @@ struct mgmt_mesh_tx {
struct sock *sk;
u8 handle;
u8 instance;
- u8 param[sizeof(struct mgmt_cp_mesh_send) + 29];
+ u8 param[sizeof(struct mgmt_cp_mesh_send) + 31];
};
struct mgmt_pending_cmd {
diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c
index 8d6fce9005bd..053ef8f25fae 100644
--- a/net/bluetooth/rfcomm/core.c
+++ b/net/bluetooth/rfcomm/core.c
@@ -35,6 +35,8 @@
#include <net/bluetooth/l2cap.h>
#include <net/bluetooth/rfcomm.h>
+#include <trace/events/sock.h>
+
#define VERSION "1.11"
static bool disable_cfc;
@@ -186,6 +188,8 @@ static void rfcomm_l2state_change(struct sock *sk)
static void rfcomm_l2data_ready(struct sock *sk)
{
+ trace_sk_data_ready(sk);
+
BT_DBG("%p", sk);
rfcomm_schedule();
}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 21e24da4847f..4397e14ff560 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -391,6 +391,7 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
addr->sa_family != AF_BLUETOOTH)
return -EINVAL;
+ sock_hold(sk);
lock_sock(sk);
if (sk->sk_state != BT_OPEN && sk->sk_state != BT_BOUND) {
@@ -410,14 +411,18 @@ static int rfcomm_sock_connect(struct socket *sock, struct sockaddr *addr, int a
d->sec_level = rfcomm_pi(sk)->sec_level;
d->role_switch = rfcomm_pi(sk)->role_switch;
+ /* Drop sock lock to avoid potential deadlock with the RFCOMM lock */
+ release_sock(sk);
err = rfcomm_dlc_open(d, &rfcomm_pi(sk)->src, &sa->rc_bdaddr,
sa->rc_channel);
- if (!err)
+ lock_sock(sk);
+ if (!err && !sock_flag(sk, SOCK_ZAPPED))
err = bt_sock_wait_state(sk, BT_CONNECTED,
sock_sndtimeo(sk, flags & O_NONBLOCK));
done:
release_sock(sk);
+ sock_put(sk);
return err;
}
diff --git a/net/bluetooth/rfcomm/tty.c b/net/bluetooth/rfcomm/tty.c
index 8009e0e93216..5697df9d4394 100644
--- a/net/bluetooth/rfcomm/tty.c
+++ b/net/bluetooth/rfcomm/tty.c
@@ -119,7 +119,7 @@ static int rfcomm_dev_activate(struct tty_port *port, struct tty_struct *tty)
}
/* we block the open until the dlc->state becomes BT_CONNECTED */
-static int rfcomm_dev_carrier_raised(struct tty_port *port)
+static bool rfcomm_dev_carrier_raised(struct tty_port *port)
{
struct rfcomm_dev *dev = container_of(port, struct rfcomm_dev, port);
diff --git a/net/bpf/bpf_dummy_struct_ops.c b/net/bpf/bpf_dummy_struct_ops.c
index 1ac4467928a9..ff4f89a2b02a 100644
--- a/net/bpf/bpf_dummy_struct_ops.c
+++ b/net/bpf/bpf_dummy_struct_ops.c
@@ -154,6 +154,23 @@ static bool bpf_dummy_ops_is_valid_access(int off, int size,
return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
}
+static int bpf_dummy_ops_check_member(const struct btf_type *t,
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
+{
+ u32 moff = __btf_member_bit_offset(t, member) / 8;
+
+ switch (moff) {
+ case offsetof(struct bpf_dummy_ops, test_sleepable):
+ break;
+ default:
+ if (prog->aux->sleepable)
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
static int bpf_dummy_ops_btf_struct_access(struct bpf_verifier_log *log,
const struct bpf_reg_state *reg,
int off, int size, enum bpf_access_type atype,
@@ -208,6 +225,7 @@ static void bpf_dummy_unreg(void *kdata)
struct bpf_struct_ops bpf_bpf_dummy_ops = {
.verifier_ops = &bpf_dummy_verifier_ops,
.init = bpf_dummy_init,
+ .check_member = bpf_dummy_ops_check_member,
.init_member = bpf_dummy_init_member,
.reg = bpf_dummy_reg,
.unreg = bpf_dummy_unreg,
diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 2723623429ac..6f3d654b3339 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -234,7 +234,7 @@ static int xdp_recv_frames(struct xdp_frame **frames, int nframes,
int i, n;
LIST_HEAD(list);
- n = kmem_cache_alloc_bulk(skbuff_head_cache, gfp, nframes, (void **)skbs);
+ n = kmem_cache_alloc_bulk(skbuff_cache, gfp, nframes, (void **)skbs);
if (unlikely(n == 0)) {
for (i = 0; i < nframes; i++)
xdp_return_frame(frames[i]);
@@ -396,10 +396,12 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat,
old_ctx = bpf_set_run_ctx(&run_ctx.run_ctx);
do {
run_ctx.prog_item = &item;
+ local_bh_disable();
if (xdp)
*retval = bpf_prog_run_xdp(prog, ctx);
else
*retval = bpf_prog_run(prog, ctx);
+ local_bh_enable();
} while (bpf_test_timer_continue(&t, 1, repeat, &ret, time));
bpf_reset_run_ctx(old_ctx);
bpf_test_timer_leave(&t);
@@ -484,7 +486,7 @@ out:
__diag_push();
__diag_ignore_all("-Wmissing-prototypes",
"Global functions as their definitions will be in vmlinux BTF");
-int noinline bpf_fentry_test1(int a)
+__bpf_kfunc int bpf_fentry_test1(int a)
{
return a + 1;
}
@@ -529,27 +531,35 @@ int noinline bpf_fentry_test8(struct bpf_fentry_test_t *arg)
return (long)arg->a;
}
-int noinline bpf_modify_return_test(int a, int *b)
+__bpf_kfunc int bpf_modify_return_test(int a, int *b)
{
*b += 1;
return a + *b;
}
-u64 noinline bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
{
return a + b + c + d;
}
-int noinline bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
{
return a + b;
}
-struct sock * noinline bpf_kfunc_call_test3(struct sock *sk)
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
{
return sk;
}
+long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+ /* Provoke the compiler to assume that the caller has sign-extended a,
+ * b and c on platforms where this is required (e.g. s390x).
+ */
+ return (long)a + (long)b + (long)c + d;
+}
+
struct prog_test_member1 {
int a;
};
@@ -574,21 +584,21 @@ static struct prog_test_ref_kfunc prog_test_struct = {
.cnt = REFCOUNT_INIT(1),
};
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
{
refcount_inc(&prog_test_struct.cnt);
return &prog_test_struct;
}
-noinline struct prog_test_member *
+__bpf_kfunc struct prog_test_member *
bpf_kfunc_call_memb_acquire(void)
{
WARN_ON_ONCE(1);
return NULL;
}
-noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
{
if (!p)
return;
@@ -596,11 +606,11 @@ noinline void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p)
refcount_dec(&p->cnt);
}
-noinline void bpf_kfunc_call_memb_release(struct prog_test_member *p)
+__bpf_kfunc void bpf_kfunc_call_memb_release(struct prog_test_member *p)
{
}
-noinline void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
{
WARN_ON_ONCE(1);
}
@@ -613,12 +623,14 @@ static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const i
return (int *)p;
}
-noinline int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+ const int rdwr_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
}
-noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
}
@@ -628,16 +640,17 @@ noinline int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
* Acquire functions must return struct pointers, so these ones are
* failing.
*/
-noinline int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size)
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
{
return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
}
-noinline void bpf_kfunc_call_int_mem_release(int *p)
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
{
}
-noinline struct prog_test_ref_kfunc *
+__bpf_kfunc struct prog_test_ref_kfunc *
bpf_kfunc_call_test_kptr_get(struct prog_test_ref_kfunc **pp, int a, int b)
{
struct prog_test_ref_kfunc *p = READ_ONCE(*pp);
@@ -686,50 +699,55 @@ struct prog_test_fail3 {
char arr2[];
};
-noinline void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
{
}
-noinline void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
{
}
-noinline void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
{
}
-noinline void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
{
}
-noinline void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
{
}
-noinline void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
{
}
-noinline void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
{
}
-noinline void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
{
}
-noinline void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
{
}
-noinline void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
{
}
-noinline void bpf_kfunc_call_test_destructive(void)
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
{
}
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+ return arg;
+}
+
__diag_pop();
BTF_SET8_START(bpf_test_modify_return_ids)
@@ -746,6 +764,7 @@ BTF_SET8_START(test_sk_check_kfunc_ids)
BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_release, KF_RELEASE)
@@ -767,6 +786,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS)
BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
BTF_SET8_END(test_sk_check_kfunc_ids)
static void *bpf_test_init(const union bpf_attr *kattr, u32 user_size,
@@ -1300,6 +1320,9 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
if (kattr->test.flags & ~BPF_F_TEST_XDP_LIVE_FRAMES)
return -EINVAL;
+ if (bpf_prog_is_dev_bound(prog->aux))
+ return -EINVAL;
+
if (do_live) {
if (!batch_size)
batch_size = NAPI_POLL_WEIGHT;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ad13b48e3e08..24f01ff113f0 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -269,7 +269,7 @@ static void brport_get_ownership(const struct kobject *kobj, kuid_t *uid, kgid_t
net_ns_get_ownership(dev_net(p->dev), uid, gid);
}
-static struct kobj_type brport_ktype = {
+static const struct kobj_type brport_ktype = {
#ifdef CONFIG_SYSFS
.sysfs_ops = &brport_sysfs_ops,
#endif
diff --git a/net/bridge/br_mdb.c b/net/bridge/br_mdb.c
index 00e5743647b0..25c48d81a597 100644
--- a/net/bridge/br_mdb.c
+++ b/net/bridge/br_mdb.c
@@ -259,7 +259,7 @@ static int __mdb_fill_info(struct sk_buff *skb,
#endif
} else {
ether_addr_copy(e.addr.u.mac_addr, mp->addr.dst.mac_addr);
- e.state = MDB_PG_FLAGS_PERMANENT;
+ e.state = MDB_PERMANENT;
}
e.addr.proto = mp->addr.proto;
nest_ent = nla_nest_start_noflag(skb,
@@ -421,8 +421,6 @@ static int br_mdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
rcu_read_lock();
- cb->seq = net->dev_base_seq;
-
for_each_netdev_rcu(net, dev) {
if (netif_is_bridge_master(dev)) {
struct net_bridge *br = netdev_priv(dev);
@@ -685,51 +683,58 @@ static const struct nla_policy br_mdbe_attrs_pol[MDBE_ATTR_MAX + 1] = {
[MDBE_ATTR_RTPROT] = NLA_POLICY_MIN(NLA_U8, RTPROT_STATIC),
};
-static bool is_valid_mdb_entry(struct br_mdb_entry *entry,
- struct netlink_ext_ack *extack)
+static int validate_mdb_entry(const struct nlattr *attr,
+ struct netlink_ext_ack *extack)
{
+ struct br_mdb_entry *entry = nla_data(attr);
+
+ if (nla_len(attr) != sizeof(struct br_mdb_entry)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
+ return -EINVAL;
+ }
+
if (entry->ifindex == 0) {
NL_SET_ERR_MSG_MOD(extack, "Zero entry ifindex is not allowed");
- return false;
+ return -EINVAL;
}
if (entry->addr.proto == htons(ETH_P_IP)) {
if (!ipv4_is_multicast(entry->addr.u.ip4)) {
NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is not multicast");
- return false;
+ return -EINVAL;
}
if (ipv4_is_local_multicast(entry->addr.u.ip4)) {
NL_SET_ERR_MSG_MOD(extack, "IPv4 entry group address is local multicast");
- return false;
+ return -EINVAL;
}
#if IS_ENABLED(CONFIG_IPV6)
} else if (entry->addr.proto == htons(ETH_P_IPV6)) {
if (ipv6_addr_is_ll_all_nodes(&entry->addr.u.ip6)) {
NL_SET_ERR_MSG_MOD(extack, "IPv6 entry group address is link-local all nodes");
- return false;
+ return -EINVAL;
}
#endif
} else if (entry->addr.proto == 0) {
/* L2 mdb */
if (!is_multicast_ether_addr(entry->addr.u.mac_addr)) {
NL_SET_ERR_MSG_MOD(extack, "L2 entry group is not multicast");
- return false;
+ return -EINVAL;
}
} else {
NL_SET_ERR_MSG_MOD(extack, "Unknown entry protocol");
- return false;
+ return -EINVAL;
}
if (entry->state != MDB_PERMANENT && entry->state != MDB_TEMPORARY) {
NL_SET_ERR_MSG_MOD(extack, "Unknown entry state");
- return false;
+ return -EINVAL;
}
if (entry->vid >= VLAN_VID_MASK) {
NL_SET_ERR_MSG_MOD(extack, "Invalid entry VLAN id");
- return false;
+ return -EINVAL;
}
- return true;
+ return 0;
}
static bool is_valid_mdb_source(struct nlattr *attr, __be16 proto,
@@ -849,11 +854,10 @@ static int br_mdb_add_group_sg(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- MCAST_INCLUDE, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (S, G) port group");
+ MCAST_INCLUDE, cfg->rt_protocol, extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
+
rcu_assign_pointer(*pp, p);
if (!(flags & MDB_PG_FLAGS_PERMANENT) && !cfg->src_entry)
mod_timer(&p->timer,
@@ -1075,11 +1079,10 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
}
p = br_multicast_new_port_group(cfg->p, &cfg->group, *pp, flags, NULL,
- cfg->filter_mode, cfg->rt_protocol);
- if (unlikely(!p)) {
- NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new (*, G) port group");
+ cfg->filter_mode, cfg->rt_protocol,
+ extack);
+ if (unlikely(!p))
return -ENOMEM;
- }
err = br_mdb_add_group_srcs(cfg, p, brmctx, extack);
if (err)
@@ -1101,8 +1104,7 @@ static int br_mdb_add_group_star_g(const struct br_mdb_config *cfg,
return 0;
err_del_port_group:
- hlist_del_init(&p->mglist);
- kfree(p);
+ br_multicast_del_port_group(p);
return err;
}
@@ -1297,6 +1299,14 @@ static int br_mdb_config_attrs_init(struct nlattr *set_attrs,
return 0;
}
+static const struct nla_policy mdba_policy[MDBA_SET_ENTRY_MAX + 1] = {
+ [MDBA_SET_ENTRY_UNSPEC] = { .strict_start_type = MDBA_SET_ENTRY_ATTRS + 1 },
+ [MDBA_SET_ENTRY] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ validate_mdb_entry,
+ sizeof(struct br_mdb_entry)),
+ [MDBA_SET_ENTRY_ATTRS] = { .type = NLA_NESTED },
+};
+
static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
struct br_mdb_config *cfg,
struct netlink_ext_ack *extack)
@@ -1307,7 +1317,7 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
int err;
err = nlmsg_parse_deprecated(nlh, sizeof(*bpm), tb,
- MDBA_SET_ENTRY_MAX, NULL, extack);
+ MDBA_SET_ENTRY_MAX, mdba_policy, extack);
if (err)
return err;
@@ -1349,14 +1359,8 @@ static int br_mdb_config_init(struct net *net, const struct nlmsghdr *nlh,
NL_SET_ERR_MSG_MOD(extack, "Missing MDBA_SET_ENTRY attribute");
return -EINVAL;
}
- if (nla_len(tb[MDBA_SET_ENTRY]) != sizeof(struct br_mdb_entry)) {
- NL_SET_ERR_MSG_MOD(extack, "Invalid MDBA_SET_ENTRY attribute length");
- return -EINVAL;
- }
cfg->entry = nla_data(tb[MDBA_SET_ENTRY]);
- if (!is_valid_mdb_entry(cfg->entry, extack))
- return -EINVAL;
if (cfg->entry->ifindex != cfg->br->dev->ifindex) {
struct net_device *pdev;
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index dea1ee1bd095..96d1fc78dd39 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -31,6 +31,7 @@
#include <net/ip6_checksum.h>
#include <net/addrconf.h>
#endif
+#include <trace/events/bridge.h>
#include "br_private.h"
#include "br_private_mcast_eht.h"
@@ -234,6 +235,29 @@ out:
return pmctx;
}
+static struct net_bridge_mcast_port *
+br_multicast_port_vid_to_port_ctx(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx = NULL;
+ struct net_bridge_vlan *vlan;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (!br_opt_get(port->br, BROPT_MCAST_VLAN_SNOOPING_ENABLED))
+ return NULL;
+
+ /* Take RCU to access the vlan. */
+ rcu_read_lock();
+
+ vlan = br_vlan_find(nbp_vlan_group_rcu(port), vid);
+ if (vlan && !br_multicast_port_ctx_vlan_disabled(&vlan->port_mcast_ctx))
+ pmctx = &vlan->port_mcast_ctx;
+
+ rcu_read_unlock();
+
+ return pmctx;
+}
+
/* when snooping we need to check if the contexts should be used
* in the following order:
* - if pmctx is non-NULL (port), check if it should be used
@@ -668,6 +692,101 @@ void br_multicast_del_group_src(struct net_bridge_group_src *src,
__br_multicast_del_group_src(src);
}
+static int
+br_multicast_port_ngroups_inc_one(struct net_bridge_mcast_port *pmctx,
+ struct netlink_ext_ack *extack,
+ const char *what)
+{
+ u32 max = READ_ONCE(pmctx->mdb_max_entries);
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ if (max && n >= max) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "%s is already in %u groups, and mcast_max_groups=%u",
+ what, n, max);
+ return -E2BIG;
+ }
+
+ WRITE_ONCE(pmctx->mdb_n_entries, n + 1);
+ return 0;
+}
+
+static void br_multicast_port_ngroups_dec_one(struct net_bridge_mcast_port *pmctx)
+{
+ u32 n = READ_ONCE(pmctx->mdb_n_entries);
+
+ WARN_ON_ONCE(n == 0);
+ WRITE_ONCE(pmctx->mdb_n_entries, n - 1);
+}
+
+static int br_multicast_port_ngroups_inc(struct net_bridge_port *port,
+ const struct br_ip *group,
+ struct netlink_ext_ack *extack)
+{
+ struct net_bridge_mcast_port *pmctx;
+ int err;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ /* Always count on the port context. */
+ err = br_multicast_port_ngroups_inc_one(&port->multicast_ctx, extack,
+ "Port");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ return err;
+ }
+
+ /* Only count on the VLAN context if VID is given, and if snooping on
+ * that VLAN is enabled.
+ */
+ if (!group->vid)
+ return 0;
+
+ pmctx = br_multicast_port_vid_to_port_ctx(port, group->vid);
+ if (!pmctx)
+ return 0;
+
+ err = br_multicast_port_ngroups_inc_one(pmctx, extack, "Port-VLAN");
+ if (err) {
+ trace_br_mdb_full(port->dev, group);
+ goto dec_one_out;
+ }
+
+ return 0;
+
+dec_one_out:
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+ return err;
+}
+
+static void br_multicast_port_ngroups_dec(struct net_bridge_port *port, u16 vid)
+{
+ struct net_bridge_mcast_port *pmctx;
+
+ lockdep_assert_held_once(&port->br->multicast_lock);
+
+ if (vid) {
+ pmctx = br_multicast_port_vid_to_port_ctx(port, vid);
+ if (pmctx)
+ br_multicast_port_ngroups_dec_one(pmctx);
+ }
+ br_multicast_port_ngroups_dec_one(&port->multicast_ctx);
+}
+
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_n_entries);
+}
+
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max)
+{
+ WRITE_ONCE(pmctx->mdb_max_entries, max);
+}
+
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx)
+{
+ return READ_ONCE(pmctx->mdb_max_entries);
+}
+
static void br_multicast_destroy_port_group(struct net_bridge_mcast_gc *gc)
{
struct net_bridge_port_group *pg;
@@ -702,6 +821,7 @@ void br_multicast_del_pg(struct net_bridge_mdb_entry *mp,
} else {
br_multicast_star_g_handle_mode(pg, MCAST_INCLUDE);
}
+ br_multicast_port_ngroups_dec(pg->key.port, pg->key.addr.vid);
hlist_add_head(&pg->mcast_gc.gc_node, &br->mcast_gc_list);
queue_work(system_long_wq, &br->mcast_gc_work);
@@ -1165,6 +1285,7 @@ struct net_bridge_mdb_entry *br_multicast_new_group(struct net_bridge *br,
return mp;
if (atomic_read(&br->mdb_hash_tbl.nelems) >= br->hash_max) {
+ trace_br_mdb_full(br->dev, group);
br_mc_disabled_update(br->dev, false, NULL);
br_opt_toggle(br, BROPT_MULTICAST_ENABLED, false);
return ERR_PTR(-E2BIG);
@@ -1284,14 +1405,22 @@ struct net_bridge_port_group *br_multicast_new_port_group(
unsigned char flags,
const unsigned char *src,
u8 filter_mode,
- u8 rt_protocol)
+ u8 rt_protocol,
+ struct netlink_ext_ack *extack)
{
struct net_bridge_port_group *p;
+ int err;
- p = kzalloc(sizeof(*p), GFP_ATOMIC);
- if (unlikely(!p))
+ err = br_multicast_port_ngroups_inc(port, group, extack);
+ if (err)
return NULL;
+ p = kzalloc(sizeof(*p), GFP_ATOMIC);
+ if (unlikely(!p)) {
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't allocate new port group");
+ goto dec_out;
+ }
+
p->key.addr = *group;
p->key.port = port;
p->flags = flags;
@@ -1305,8 +1434,8 @@ struct net_bridge_port_group *br_multicast_new_port_group(
if (!br_multicast_is_star_g(group) &&
rhashtable_lookup_insert_fast(&port->br->sg_port_tbl, &p->rhnode,
br_sg_port_rht_params)) {
- kfree(p);
- return NULL;
+ NL_SET_ERR_MSG_MOD(extack, "Couldn't insert new port group");
+ goto free_out;
}
rcu_assign_pointer(p->next, next);
@@ -1320,6 +1449,25 @@ struct net_bridge_port_group *br_multicast_new_port_group(
eth_broadcast_addr(p->eth_addr);
return p;
+
+free_out:
+ kfree(p);
+dec_out:
+ br_multicast_port_ngroups_dec(port, group->vid);
+ return NULL;
+}
+
+void br_multicast_del_port_group(struct net_bridge_port_group *p)
+{
+ struct net_bridge_port *port = p->key.port;
+ __u16 vid = p->key.addr.vid;
+
+ hlist_del_init(&p->mglist);
+ if (!br_multicast_is_star_g(&p->key.addr))
+ rhashtable_remove_fast(&port->br->sg_port_tbl, &p->rhnode,
+ br_sg_port_rht_params);
+ kfree(p);
+ br_multicast_port_ngroups_dec(port, vid);
}
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1387,7 +1535,7 @@ __br_multicast_add_group(struct net_bridge_mcast *brmctx,
}
p = br_multicast_new_port_group(pmctx->port, group, *pp, 0, src,
- filter_mode, RTPROT_KERNEL);
+ filter_mode, RTPROT_KERNEL, NULL);
if (unlikely(!p)) {
p = ERR_PTR(-ENOMEM);
goto out;
@@ -1933,6 +2081,25 @@ static void __br_multicast_enable_port_ctx(struct net_bridge_mcast_port *pmctx)
br_ip4_multicast_add_router(brmctx, pmctx);
br_ip6_multicast_add_router(brmctx, pmctx);
}
+
+ if (br_multicast_port_ctx_is_vlan(pmctx)) {
+ struct net_bridge_port_group *pg;
+ u32 n = 0;
+
+ /* The mcast_n_groups counter might be wrong. First,
+ * BR_VLFLAG_MCAST_ENABLED is toggled before temporary entries
+ * are flushed, thus mcast_n_groups after the toggle does not
+ * reflect the true values. And second, permanent entries added
+ * while BR_VLFLAG_MCAST_ENABLED was disabled, are not reflected
+ * either. Thus we have to refresh the counter.
+ */
+
+ hlist_for_each_entry(pg, &pmctx->port->mglist, mglist) {
+ if (pg->key.addr.vid == pmctx->vlan->vid)
+ n++;
+ }
+ WRITE_ONCE(pmctx->mdb_n_entries, n);
+ }
}
void br_multicast_enable_port(struct net_bridge_port *port)
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index f20f4373ff40..638a4d5359db 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -214,7 +214,7 @@ static int br_validate_ipv4(struct net *net, struct sk_buff *skb)
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto csum_error;
- len = ntohs(iph->tot_len);
+ len = skb_ip_totlen(skb);
if (skb->len < len) {
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
goto drop;
@@ -871,6 +871,7 @@ static unsigned int ip_sabotage_in(void *priv,
if (nf_bridge && !nf_bridge->in_prerouting &&
!netif_is_l3_master(skb->dev) &&
!netif_is_l3_slave(skb->dev)) {
+ nf_bridge_info_free(skb);
state->okfn(state->net, state->sk, skb);
return NF_STOLEN;
}
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4316cc82ae17..9173e52b89e2 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -202,6 +202,8 @@ static inline size_t br_port_info_size(void)
+ nla_total_size_64bit(sizeof(u64)) /* IFLA_BRPORT_HOLD_TIMER */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MULTICAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* IFLA_BRPORT_MCAST_MAX_GROUPS */
#endif
+ nla_total_size(sizeof(u16)) /* IFLA_BRPORT_GROUP_FWD_MASK */
+ nla_total_size(sizeof(u8)) /* IFLA_BRPORT_MRP_RING_OPEN */
@@ -298,7 +300,11 @@ static int br_port_fill_attrs(struct sk_buff *skb,
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT,
p->multicast_eht_hosts_limit) ||
nla_put_u32(skb, IFLA_BRPORT_MCAST_EHT_HOSTS_CNT,
- p->multicast_eht_hosts_cnt))
+ p->multicast_eht_hosts_cnt) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&p->multicast_ctx)) ||
+ nla_put_u32(skb, IFLA_BRPORT_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&p->multicast_ctx)))
return -EMSGSIZE;
#endif
@@ -858,6 +864,8 @@ static int br_afspec(struct net_bridge *br,
}
static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
+ [IFLA_BRPORT_UNSPEC] = { .strict_start_type =
+ IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT + 1 },
[IFLA_BRPORT_STATE] = { .type = NLA_U8 },
[IFLA_BRPORT_COST] = { .type = NLA_U32 },
[IFLA_BRPORT_PRIORITY] = { .type = NLA_U16 },
@@ -881,6 +889,8 @@ static const struct nla_policy br_port_policy[IFLA_BRPORT_MAX + 1] = {
[IFLA_BRPORT_MAB] = { .type = NLA_U8 },
[IFLA_BRPORT_BACKUP_PORT] = { .type = NLA_U32 },
[IFLA_BRPORT_MCAST_EHT_HOSTS_LIMIT] = { .type = NLA_U32 },
+ [IFLA_BRPORT_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [IFLA_BRPORT_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
/* Change the state of the port and notify spanning tree */
@@ -1015,6 +1025,13 @@ static int br_setport(struct net_bridge_port *p, struct nlattr *tb[],
if (err)
return err;
}
+
+ if (tb[IFLA_BRPORT_MCAST_MAX_GROUPS]) {
+ u32 max_groups;
+
+ max_groups = nla_get_u32(tb[IFLA_BRPORT_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&p->multicast_ctx, max_groups);
+ }
#endif
if (tb[IFLA_BRPORT_GROUP_FWD_MASK]) {
diff --git a/net/bridge/br_netlink_tunnel.c b/net/bridge/br_netlink_tunnel.c
index 8914290c75d4..17abf092f7ca 100644
--- a/net/bridge/br_netlink_tunnel.c
+++ b/net/bridge/br_netlink_tunnel.c
@@ -188,6 +188,9 @@ initvars:
}
static const struct nla_policy vlan_tunnel_policy[IFLA_BRIDGE_VLAN_TUNNEL_MAX + 1] = {
+ [IFLA_BRIDGE_VLAN_TUNNEL_UNSPEC] = {
+ .strict_start_type = IFLA_BRIDGE_VLAN_TUNNEL_FLAGS + 1
+ },
[IFLA_BRIDGE_VLAN_TUNNEL_ID] = { .type = NLA_U32 },
[IFLA_BRIDGE_VLAN_TUNNEL_VID] = { .type = NLA_U16 },
[IFLA_BRIDGE_VLAN_TUNNEL_FLAGS] = { .type = NLA_U16 },
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 15ef7fd508ee..cef5f6ea850c 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -126,6 +126,8 @@ struct net_bridge_mcast_port {
struct hlist_node ip6_rlist;
#endif /* IS_ENABLED(CONFIG_IPV6) */
unsigned char multicast_router;
+ u32 mdb_n_entries;
+ u32 mdb_max_entries;
#endif /* CONFIG_BRIDGE_IGMP_SNOOPING */
};
@@ -956,7 +958,9 @@ br_multicast_new_port_group(struct net_bridge_port *port,
const struct br_ip *group,
struct net_bridge_port_group __rcu *next,
unsigned char flags, const unsigned char *src,
- u8 filter_mode, u8 rt_protocol);
+ u8 filter_mode, u8 rt_protocol,
+ struct netlink_ext_ack *extack);
+void br_multicast_del_port_group(struct net_bridge_port_group *p);
int br_mdb_hash_init(struct net_bridge *br);
void br_mdb_hash_fini(struct net_bridge *br);
void br_mdb_notify(struct net_device *dev, struct net_bridge_mdb_entry *mp,
@@ -974,6 +978,9 @@ void br_multicast_uninit_stats(struct net_bridge *br);
void br_multicast_get_stats(const struct net_bridge *br,
const struct net_bridge_port *p,
struct br_mcast_stats *dest);
+u32 br_multicast_ngroups_get(const struct net_bridge_mcast_port *pmctx);
+void br_multicast_ngroups_set_max(struct net_bridge_mcast_port *pmctx, u32 max);
+u32 br_multicast_ngroups_get_max(const struct net_bridge_mcast_port *pmctx);
void br_mdb_init(void);
void br_mdb_uninit(void);
void br_multicast_host_join(const struct net_bridge_mcast *brmctx,
@@ -1757,7 +1764,8 @@ static inline u16 br_vlan_flags(const struct net_bridge_vlan *v, u16 pvid)
#ifdef CONFIG_BRIDGE_VLAN_FILTERING
bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
const struct net_bridge_vlan *range_end);
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v);
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p);
size_t br_vlan_opts_nl_size(void);
int br_vlan_process_options(const struct net_bridge *br,
const struct net_bridge_port *p,
diff --git a/net/bridge/br_switchdev.c b/net/bridge/br_switchdev.c
index 7eb6fd5bb917..de18e9c1d7a7 100644
--- a/net/bridge/br_switchdev.c
+++ b/net/bridge/br_switchdev.c
@@ -104,9 +104,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
return 0;
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "bridge flag offload is not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "bridge flag offload is not supported");
return -EOPNOTSUPP;
}
@@ -115,9 +114,8 @@ int br_switchdev_set_port_flag(struct net_bridge_port *p,
err = switchdev_port_attr_set(p->dev, &attr, extack);
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "error setting offload flag on port");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "error setting offload flag on port");
return err;
}
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index bc75fa1e4666..8a3dbc09ba38 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -1816,6 +1816,7 @@ out_err:
/* v_opts is used to dump the options which must be equal in the whole range */
static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
const struct net_bridge_vlan *v_opts,
+ const struct net_bridge_port *p,
u16 flags,
bool dump_stats)
{
@@ -1842,7 +1843,7 @@ static bool br_vlan_fill_vids(struct sk_buff *skb, u16 vid, u16 vid_range,
goto out_err;
if (v_opts) {
- if (!br_vlan_opts_fill(skb, v_opts))
+ if (!br_vlan_opts_fill(skb, v_opts, p))
goto out_err;
if (dump_stats && !br_vlan_stats_fill(skb, v_opts))
@@ -1925,7 +1926,7 @@ void br_vlan_notify(const struct net_bridge *br,
goto out_kfree;
}
- if (!br_vlan_fill_vids(skb, vid, vid_range, v, flags, false))
+ if (!br_vlan_fill_vids(skb, vid, vid_range, v, p, flags, false))
goto out_err;
nlmsg_end(skb, nlh);
@@ -2030,7 +2031,7 @@ static int br_vlan_dump_dev(const struct net_device *dev,
if (!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- vlan_flags, dump_stats)) {
+ p, vlan_flags, dump_stats)) {
err = -EMSGSIZE;
break;
}
@@ -2056,7 +2057,7 @@ update_end:
else if (!dump_global &&
!br_vlan_fill_vids(skb, range_start->vid,
range_end->vid, range_start,
- br_vlan_flags(range_start, pvid),
+ p, br_vlan_flags(range_start, pvid),
dump_stats))
err = -EMSGSIZE;
}
@@ -2131,6 +2132,8 @@ static const struct nla_policy br_vlan_db_policy[BRIDGE_VLANDB_ENTRY_MAX + 1] =
[BRIDGE_VLANDB_ENTRY_STATE] = { .type = NLA_U8 },
[BRIDGE_VLANDB_ENTRY_TUNNEL_INFO] = { .type = NLA_NESTED },
[BRIDGE_VLANDB_ENTRY_MCAST_ROUTER] = { .type = NLA_U8 },
+ [BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS] = { .type = NLA_REJECT },
+ [BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS] = { .type = NLA_U32 },
};
static int br_vlan_rtm_process_one(struct net_device *dev,
diff --git a/net/bridge/br_vlan_options.c b/net/bridge/br_vlan_options.c
index a2724d03278c..e378c2f3a9e2 100644
--- a/net/bridge/br_vlan_options.c
+++ b/net/bridge/br_vlan_options.c
@@ -48,7 +48,8 @@ bool br_vlan_opts_eq_range(const struct net_bridge_vlan *v_curr,
curr_mc_rtr == range_mc_rtr;
}
-bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
+bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v,
+ const struct net_bridge_port *p)
{
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_STATE, br_vlan_get_state(v)) ||
!__vlan_tun_put(skb, v))
@@ -58,6 +59,12 @@ bool br_vlan_opts_fill(struct sk_buff *skb, const struct net_bridge_vlan *v)
if (nla_put_u8(skb, BRIDGE_VLANDB_ENTRY_MCAST_ROUTER,
br_vlan_multicast_router(v)))
return false;
+ if (p && !br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx) &&
+ (nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS,
+ br_multicast_ngroups_get(&v->port_mcast_ctx)) ||
+ nla_put_u32(skb, BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS,
+ br_multicast_ngroups_get_max(&v->port_mcast_ctx))))
+ return false;
#endif
return true;
@@ -70,6 +77,8 @@ size_t br_vlan_opts_nl_size(void)
+ nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_TINFO_ID */
#ifdef CONFIG_BRIDGE_IGMP_SNOOPING
+ nla_total_size(sizeof(u8)) /* BRIDGE_VLANDB_ENTRY_MCAST_ROUTER */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_N_GROUPS */
+ + nla_total_size(sizeof(u32)) /* BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS */
#endif
+ 0;
}
@@ -212,6 +221,22 @@ static int br_vlan_process_one_opts(const struct net_bridge *br,
return err;
*changed = true;
}
+ if (tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]) {
+ u32 val;
+
+ if (!p) {
+ NL_SET_ERR_MSG_MOD(extack, "Can't set mcast_max_groups for non-port vlans");
+ return -EINVAL;
+ }
+ if (br_multicast_port_ctx_vlan_disabled(&v->port_mcast_ctx)) {
+ NL_SET_ERR_MSG_MOD(extack, "Multicast snooping disabled on this VLAN");
+ return -EINVAL;
+ }
+
+ val = nla_get_u32(tb[BRIDGE_VLANDB_ENTRY_MCAST_MAX_GROUPS]);
+ br_multicast_ngroups_set_max(&v->port_mcast_ctx, val);
+ *changed = true;
+ }
#endif
return 0;
diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c
index ce5dfa3babd2..757ec46fc45a 100644
--- a/net/bridge/netfilter/ebtables.c
+++ b/net/bridge/netfilter/ebtables.c
@@ -1090,7 +1090,7 @@ static int do_replace_finish(struct net *net, struct ebt_replace *repl,
audit_log_nfcfg(repl->name, AF_BRIDGE, repl->nentries,
AUDIT_XT_OP_REPLACE, GFP_KERNEL);
- return ret;
+ return 0;
free_unlock:
mutex_unlock(&ebt_mutex);
diff --git a/net/bridge/netfilter/nf_conntrack_bridge.c b/net/bridge/netfilter/nf_conntrack_bridge.c
index 5c5dd437f1c2..71056ee84773 100644
--- a/net/bridge/netfilter/nf_conntrack_bridge.c
+++ b/net/bridge/netfilter/nf_conntrack_bridge.c
@@ -212,7 +212,7 @@ static int nf_ct_br_ip_check(const struct sk_buff *skb)
iph->version != 4)
return -1;
- len = ntohs(iph->tot_len);
+ len = skb_ip_totlen(skb);
if (skb->len < nhoff + len ||
len < (iph->ihl * 4))
return -1;
@@ -256,7 +256,7 @@ static unsigned int nf_ct_bridge_pre(void *priv, struct sk_buff *skb,
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
return NF_ACCEPT;
- len = ntohs(ip_hdr(skb)->tot_len);
+ len = skb_ip_totlen(skb);
if (pskb_trim_rcsum(skb, len))
return NF_ACCEPT;
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 748be7253248..4eebcc66c19a 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -533,10 +533,6 @@ static int caif_seqpkt_sendmsg(struct socket *sock, struct msghdr *msg,
if (msg->msg_namelen)
goto err;
- ret = -EINVAL;
- if (unlikely(msg->msg_iter.nr_segs == 0) ||
- unlikely(msg->msg_iter.iov->iov_base == NULL))
- goto err;
noblock = msg->msg_flags & MSG_DONTWAIT;
timeo = sock_sndtimeo(sk, noblock);
@@ -1015,6 +1011,7 @@ static void caif_sock_destructor(struct sock *sk)
return;
}
sk_stream_kill_queues(&cf_sk->sk);
+ WARN_ON_ONCE(sk->sk_forward_alloc);
caif_free_client(&cf_sk->layer);
}
diff --git a/net/caif/cfctrl.c b/net/caif/cfctrl.c
index cc405d8c7c30..8480684f2762 100644
--- a/net/caif/cfctrl.c
+++ b/net/caif/cfctrl.c
@@ -269,11 +269,15 @@ int cfctrl_linkup_request(struct cflayer *layer,
default:
pr_warn("Request setup of bad link type = %d\n",
param->linktype);
+ cfpkt_destroy(pkt);
return -EINVAL;
}
req = kzalloc(sizeof(*req), GFP_KERNEL);
- if (!req)
+ if (!req) {
+ cfpkt_destroy(pkt);
return -ENOMEM;
+ }
+
req->client_layer = user_layer;
req->cmd = CFCTRL_CMD_LINK_SETUP;
req->param = *param;
diff --git a/net/can/gw.c b/net/can/gw.c
index 23a3d89cad81..37528826935e 100644
--- a/net/can/gw.c
+++ b/net/can/gw.c
@@ -1139,6 +1139,13 @@ static int cgw_create_job(struct sk_buff *skb, struct nlmsghdr *nlh,
if (gwj->dst.dev->type != ARPHRD_CAN)
goto out;
+ /* is sending the skb back to the incoming interface intended? */
+ if (gwj->src.dev == gwj->dst.dev &&
+ !(gwj->flags & CGW_FLAGS_CAN_IIF_TX_OK)) {
+ err = -EINVAL;
+ goto out;
+ }
+
ASSERT_RTNL();
err = cgw_register_filter(net, gwj);
diff --git a/net/can/isotp.c b/net/can/isotp.c
index 608f8c24ae46..9bc344851704 100644
--- a/net/can/isotp.c
+++ b/net/can/isotp.c
@@ -140,7 +140,7 @@ struct isotp_sock {
canid_t rxid;
ktime_t tx_gap;
ktime_t lastrxcf_tstamp;
- struct hrtimer rxtimer, txtimer;
+ struct hrtimer rxtimer, txtimer, txfrtimer;
struct can_isotp_options opt;
struct can_isotp_fc_options rxfc, txfc;
struct can_isotp_ll_options ll;
@@ -871,7 +871,7 @@ static void isotp_rcv_echo(struct sk_buff *skb, void *data)
}
/* start timer to send next consecutive frame with correct delay */
- hrtimer_start(&so->txtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
+ hrtimer_start(&so->txfrtimer, so->tx_gap, HRTIMER_MODE_REL_SOFT);
}
static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
@@ -879,49 +879,39 @@ static enum hrtimer_restart isotp_tx_timer_handler(struct hrtimer *hrtimer)
struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
txtimer);
struct sock *sk = &so->sk;
- enum hrtimer_restart restart = HRTIMER_NORESTART;
- switch (so->tx.state) {
- case ISOTP_SENDING:
+ /* don't handle timeouts in IDLE state */
+ if (so->tx.state == ISOTP_IDLE)
+ return HRTIMER_NORESTART;
- /* cfecho should be consumed by isotp_rcv_echo() here */
- if (!so->cfecho) {
- /* start timeout for unlikely lost echo skb */
- hrtimer_set_expires(&so->txtimer,
- ktime_add(ktime_get(),
- ktime_set(ISOTP_ECHO_TIMEOUT, 0)));
- restart = HRTIMER_RESTART;
+ /* we did not get any flow control or echo frame in time */
- /* push out the next consecutive frame */
- isotp_send_cframe(so);
- break;
- }
+ /* report 'communication error on send' */
+ sk->sk_err = ECOMM;
+ if (!sock_flag(sk, SOCK_DEAD))
+ sk_error_report(sk);
- /* cfecho has not been cleared in isotp_rcv_echo() */
- pr_notice_once("can-isotp: cfecho %08X timeout\n", so->cfecho);
- fallthrough;
-
- case ISOTP_WAIT_FC:
- case ISOTP_WAIT_FIRST_FC:
+ /* reset tx state */
+ so->tx.state = ISOTP_IDLE;
+ wake_up_interruptible(&so->wait);
- /* we did not get any flow control frame in time */
+ return HRTIMER_NORESTART;
+}
- /* report 'communication error on send' */
- sk->sk_err = ECOMM;
- if (!sock_flag(sk, SOCK_DEAD))
- sk_error_report(sk);
+static enum hrtimer_restart isotp_txfr_timer_handler(struct hrtimer *hrtimer)
+{
+ struct isotp_sock *so = container_of(hrtimer, struct isotp_sock,
+ txfrtimer);
- /* reset tx state */
- so->tx.state = ISOTP_IDLE;
- wake_up_interruptible(&so->wait);
- break;
+ /* start echo timeout handling and cover below protocol error */
+ hrtimer_start(&so->txtimer, ktime_set(ISOTP_ECHO_TIMEOUT, 0),
+ HRTIMER_MODE_REL_SOFT);
- default:
- WARN_ONCE(1, "can-isotp: tx timer state %08X cfecho %08X\n",
- so->tx.state, so->cfecho);
- }
+ /* cfecho should be consumed by isotp_rcv_echo() here */
+ if (so->tx.state == ISOTP_SENDING && !so->cfecho)
+ isotp_send_cframe(so);
- return restart;
+ return HRTIMER_NORESTART;
}
static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -1162,6 +1152,10 @@ static int isotp_release(struct socket *sock)
/* wait for complete transmission of current pdu */
wait_event_interruptible(so->wait, so->tx.state == ISOTP_IDLE);
+ /* force state machines to be idle also when a signal occurred */
+ so->tx.state = ISOTP_IDLE;
+ so->rx.state = ISOTP_IDLE;
+
spin_lock(&isotp_notifier_lock);
while (isotp_busy_notifier == so) {
spin_unlock(&isotp_notifier_lock);
@@ -1194,6 +1188,7 @@ static int isotp_release(struct socket *sock)
}
}
+ hrtimer_cancel(&so->txfrtimer);
hrtimer_cancel(&so->txtimer);
hrtimer_cancel(&so->rxtimer);
@@ -1225,6 +1220,9 @@ static int isotp_bind(struct socket *sock, struct sockaddr *uaddr, int len)
if (len < ISOTP_MIN_NAMELEN)
return -EINVAL;
+ if (addr->can_family != AF_CAN)
+ return -EINVAL;
+
/* sanitize tx CAN identifier */
if (tx_id & CAN_EFF_FLAG)
tx_id &= (CAN_EFF_FLAG | CAN_EFF_MASK);
@@ -1597,6 +1595,8 @@ static int isotp_init(struct sock *sk)
so->rxtimer.function = isotp_rx_timer_handler;
hrtimer_init(&so->txtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
so->txtimer.function = isotp_tx_timer_handler;
+ hrtimer_init(&so->txfrtimer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_SOFT);
+ so->txfrtimer.function = isotp_txfr_timer_handler;
init_waitqueue_head(&so->wait);
spin_lock_init(&so->rx_lock);
diff --git a/net/can/j1939/address-claim.c b/net/can/j1939/address-claim.c
index f33c47327927..ca4ad6cdd5cb 100644
--- a/net/can/j1939/address-claim.c
+++ b/net/can/j1939/address-claim.c
@@ -165,6 +165,46 @@ static void j1939_ac_process(struct j1939_priv *priv, struct sk_buff *skb)
* leaving this function.
*/
ecu = j1939_ecu_get_by_name_locked(priv, name);
+
+ if (ecu && ecu->addr == skcb->addr.sa) {
+ /* The ISO 11783-5 standard, in "4.5.2 - Address claim
+ * requirements", states:
+ * d) No CF shall begin, or resume, transmission on the
+ * network until 250 ms after it has successfully claimed
+ * an address except when responding to a request for
+ * address-claimed.
+ *
+ * But "Figure 6" and "Figure 7" in "4.5.4.2 - Address-claim
+ * prioritization" show that the CF begins the transmission
+ * after 250 ms from the first AC (address-claimed) message
+ * even if it sends another AC message during that time window
+ * to resolve the address contention with another CF.
+ *
+ * As stated in "4.4.2.3 - Address-claimed message":
+ * In order to successfully claim an address, the CF sending
+ * an address claimed message shall not receive a contending
+ * claim from another CF for at least 250 ms.
+ *
+ * As stated in "4.4.3.2 - NAME management (NM) message":
+ * 1) A commanding CF can
+ * d) request that a CF with a specified NAME transmit
+ * the address-claimed message with its current NAME.
+ * 2) A target CF shall
+ * d) send an address-claimed message in response to a
+ * request for a matching NAME
+ *
+ * Taking the above arguments into account, the 250 ms wait is
+ * requested only during network initialization.
+ *
+ * Do not restart the timer on AC message if both the NAME and
+ * the address match and so if the address has already been
+ * claimed (timer has expired) or the AC message has been sent
+ * to resolve the contention with another CF (timer is still
+ * running).
+ */
+ goto out_ecu_put;
+ }
+
if (!ecu && j1939_address_is_unicast(skcb->addr.sa))
ecu = j1939_ecu_create_locked(priv, name);
diff --git a/net/can/j1939/transport.c b/net/can/j1939/transport.c
index 5c722b55fe23..fce9b9ebf13f 100644
--- a/net/can/j1939/transport.c
+++ b/net/can/j1939/transport.c
@@ -1092,10 +1092,6 @@ static bool j1939_session_deactivate(struct j1939_session *session)
bool active;
j1939_session_list_lock(priv);
- /* This function should be called with a session ref-count of at
- * least 2.
- */
- WARN_ON_ONCE(kref_read(&session->kref) < 2);
active = j1939_session_deactivate_locked(session);
j1939_session_list_unlock(priv);
diff --git a/net/can/raw.c b/net/can/raw.c
index 81071cdb0301..f64469b98260 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -132,8 +132,8 @@ static void raw_rcv(struct sk_buff *oskb, void *data)
return;
/* make sure to not pass oversized frames to the socket */
- if ((can_is_canfd_skb(oskb) && !ro->fd_frames && !ro->xl_frames) ||
- (can_is_canxl_skb(oskb) && !ro->xl_frames))
+ if ((!ro->fd_frames && can_is_canfd_skb(oskb)) ||
+ (!ro->xl_frames && can_is_canxl_skb(oskb)))
return;
/* eliminate multiple filter matches for the same skb */
@@ -523,6 +523,7 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
struct can_filter sfilter; /* single filter */
struct net_device *dev = NULL;
can_err_mask_t err_mask = 0;
+ int fd_frames;
int count = 0;
int err = 0;
@@ -664,12 +665,17 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
break;
case CAN_RAW_FD_FRAMES:
- if (optlen != sizeof(ro->fd_frames))
+ if (optlen != sizeof(fd_frames))
return -EINVAL;
- if (copy_from_sockptr(&ro->fd_frames, optval, optlen))
+ if (copy_from_sockptr(&fd_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames && !fd_frames)
+ return -EINVAL;
+
+ ro->fd_frames = fd_frames;
break;
case CAN_RAW_XL_FRAMES:
@@ -679,6 +685,9 @@ static int raw_setsockopt(struct socket *sock, int level, int optname,
if (copy_from_sockptr(&ro->xl_frames, optval, optlen))
return -EFAULT;
+ /* Enabling CAN XL includes CAN FD */
+ if (ro->xl_frames)
+ ro->fd_frames = ro->xl_frames;
break;
case CAN_RAW_JOIN_FILTERS:
@@ -786,6 +795,25 @@ static int raw_getsockopt(struct socket *sock, int level, int optname,
return 0;
}
+static bool raw_bad_txframe(struct raw_sock *ro, struct sk_buff *skb, int mtu)
+{
+ /* Classical CAN -> no checks for flags and device capabilities */
+ if (can_is_can_skb(skb))
+ return false;
+
+ /* CAN FD -> needs to be enabled and a CAN FD or CAN XL device */
+ if (ro->fd_frames && can_is_canfd_skb(skb) &&
+ (mtu == CANFD_MTU || can_is_canxl_dev_mtu(mtu)))
+ return false;
+
+ /* CAN XL -> needs to be enabled and a CAN XL device */
+ if (ro->xl_frames && can_is_canxl_skb(skb) &&
+ can_is_canxl_dev_mtu(mtu))
+ return false;
+
+ return true;
+}
+
static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
{
struct sock *sk = sock->sk;
@@ -833,20 +861,8 @@ static int raw_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
goto free_skb;
err = -EINVAL;
- if (ro->xl_frames && can_is_canxl_dev_mtu(dev->mtu)) {
- /* CAN XL, CAN FD and Classical CAN */
- if (!can_is_canxl_skb(skb) && !can_is_canfd_skb(skb) &&
- !can_is_can_skb(skb))
- goto free_skb;
- } else if (ro->fd_frames && dev->mtu == CANFD_MTU) {
- /* CAN FD and Classical CAN */
- if (!can_is_canfd_skb(skb) && !can_is_can_skb(skb))
- goto free_skb;
- } else {
- /* Classical CAN */
- if (!can_is_can_skb(skb))
- goto free_skb;
- }
+ if (raw_bad_txframe(ro, skb, dev->mtu))
+ goto free_skb;
sockcm_init(&sockc, sk);
if (msg->msg_controllen) {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 1d06e114ba3f..cd7b0bf5369e 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -17,6 +17,7 @@
#endif /* CONFIG_BLOCK */
#include <linux/dns_resolver.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include <linux/ceph/ceph_features.h>
#include <linux/ceph/libceph.h>
@@ -344,6 +345,9 @@ static void con_sock_state_closed(struct ceph_connection *con)
static void ceph_sock_data_ready(struct sock *sk)
{
struct ceph_connection *con = sk->sk_user_data;
+
+ trace_sk_data_ready(sk);
+
if (atomic_read(&con->msgr->stopping)) {
return;
}
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index d1787d7d33ef..d664cb1593a7 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -40,15 +40,12 @@ static int ceph_tcp_recvmsg(struct socket *sock, void *buf, size_t len)
static int ceph_tcp_recvpage(struct socket *sock, struct page *page,
int page_offset, size_t length)
{
- struct bio_vec bvec = {
- .bv_page = page,
- .bv_offset = page_offset,
- .bv_len = length
- };
+ struct bio_vec bvec;
struct msghdr msg = { .msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL };
int r;
BUG_ON(page_offset + length > PAGE_SIZE);
+ bvec_set_page(&bvec, page, length, page_offset);
iov_iter_bvec(&msg.msg_iter, ITER_DEST, &bvec, 1, length);
r = sock_recvmsg(sock, &msg, msg.msg_flags);
if (r == -EAGAIN)
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index 3009028c4fa2..301a991dc6a6 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -149,10 +149,10 @@ static int do_try_sendpage(struct socket *sock, struct iov_iter *it)
while (iov_iter_count(it)) {
/* iov_iter_iovec() for ITER_BVEC */
- bv.bv_page = it->bvec->bv_page;
- bv.bv_offset = it->bvec->bv_offset + it->iov_offset;
- bv.bv_len = min(iov_iter_count(it),
- it->bvec->bv_len - it->iov_offset);
+ bvec_set_page(&bv, it->bvec->bv_page,
+ min(iov_iter_count(it),
+ it->bvec->bv_len - it->iov_offset),
+ it->bvec->bv_offset + it->iov_offset);
/*
* sendpage cannot properly handle pages with
@@ -286,9 +286,8 @@ static void set_out_bvec_zero(struct ceph_connection *con)
WARN_ON(iov_iter_count(&con->v2.out_iter));
WARN_ON(!con->v2.out_zero);
- con->v2.out_bvec.bv_page = ceph_zero_page;
- con->v2.out_bvec.bv_offset = 0;
- con->v2.out_bvec.bv_len = min(con->v2.out_zero, (int)PAGE_SIZE);
+ bvec_set_page(&con->v2.out_bvec, ceph_zero_page,
+ min(con->v2.out_zero, (int)PAGE_SIZE), 0);
con->v2.out_iter_sendpage = true;
iov_iter_bvec(&con->v2.out_iter, ITER_SOURCE, &con->v2.out_bvec, 1,
con->v2.out_bvec.bv_len);
@@ -863,10 +862,7 @@ static void get_bvec_at(struct ceph_msg_data_cursor *cursor,
/* get a piece of data, cursor isn't advanced */
page = ceph_msg_data_next(cursor, &off, &len);
-
- bv->bv_page = page;
- bv->bv_offset = off;
- bv->bv_len = len;
+ bvec_set_page(bv, page, len, off);
}
static int calc_sg_cnt(void *buf, int buf_len)
@@ -1855,9 +1851,8 @@ static void prepare_read_enc_page(struct ceph_connection *con)
con->v2.in_enc_resid);
WARN_ON(!con->v2.in_enc_resid);
- bv.bv_page = con->v2.in_enc_pages[con->v2.in_enc_i];
- bv.bv_offset = 0;
- bv.bv_len = min(con->v2.in_enc_resid, (int)PAGE_SIZE);
+ bvec_set_page(&bv, con->v2.in_enc_pages[con->v2.in_enc_i],
+ min(con->v2.in_enc_resid, (int)PAGE_SIZE), 0);
set_in_bvec(con, &bv);
con->v2.in_enc_i++;
@@ -2998,9 +2993,8 @@ static void queue_enc_page(struct ceph_connection *con)
con->v2.out_enc_resid);
WARN_ON(!con->v2.out_enc_resid);
- bv.bv_page = con->v2.out_enc_pages[con->v2.out_enc_i];
- bv.bv_offset = 0;
- bv.bv_len = min(con->v2.out_enc_resid, (int)PAGE_SIZE);
+ bvec_set_page(&bv, con->v2.out_enc_pages[con->v2.out_enc_i],
+ min(con->v2.out_enc_resid, (int)PAGE_SIZE), 0);
set_out_bvec(con, &bv, false);
con->v2.out_enc_i++;
diff --git a/net/core/Makefile b/net/core/Makefile
index 5857cec87b83..8f367813bc68 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -12,7 +12,8 @@ obj-$(CONFIG_SYSCTL) += sysctl_net_core.o
obj-y += dev.o dev_addr_lists.o dst.o netevent.o \
neighbour.o rtnetlink.o utils.o link_watch.o filter.o \
sock_diag.o dev_ioctl.o tso.o sock_reuseport.o \
- fib_notifier.o xdp.o flow_offload.o gro.o
+ fib_notifier.o xdp.o flow_offload.o gro.o \
+ netdev-genl.o netdev-genl-gen.o
obj-$(CONFIG_NETDEV_ADDR_LIST_TEST) += dev_addr_lists_test.o
@@ -33,7 +34,6 @@ obj-$(CONFIG_LWTUNNEL) += lwtunnel.o
obj-$(CONFIG_LWTUNNEL_BPF) += lwt_bpf.o
obj-$(CONFIG_DST_CACHE) += dst_cache.o
obj-$(CONFIG_HWBM) += hwbm.o
-obj-$(CONFIG_NET_DEVLINK) += devlink.o
obj-$(CONFIG_GRO_CELLS) += gro_cells.o
obj-$(CONFIG_FAILOVER) += failover.o
obj-$(CONFIG_NET_SOCK_MSG) += skmsg.o
diff --git a/net/core/dev.c b/net/core/dev.c
index b76fb37b381e..253584777101 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1614,6 +1614,7 @@ const char *netdev_cmd_to_name(enum netdev_cmd cmd)
N(SVLAN_FILTER_PUSH_INFO) N(SVLAN_FILTER_DROP_INFO)
N(PRE_CHANGEADDR) N(OFFLOAD_XSTATS_ENABLE) N(OFFLOAD_XSTATS_DISABLE)
N(OFFLOAD_XSTATS_REPORT_USED) N(OFFLOAD_XSTATS_REPORT_DELTA)
+ N(XDP_FEAT_CHANGE)
}
#undef N
return "UNKNOWN_NETDEV_EVENT";
@@ -1840,7 +1841,7 @@ EXPORT_SYMBOL(register_netdevice_notifier_net);
* @nb: notifier
*
* Unregister a notifier previously registered by
- * register_netdevice_notifier(). The notifier is unlinked into the
+ * register_netdevice_notifier_net(). The notifier is unlinked from the
* kernel structures and may then be reused. A negative errno code
* is returned on a failure.
*
@@ -1869,14 +1870,6 @@ static void __move_netdevice_notifier_net(struct net *src_net,
__register_netdevice_notifier_net(dst_net, nb, true);
}
-void move_netdevice_notifier_net(struct net *src_net, struct net *dst_net,
- struct notifier_block *nb)
-{
- rtnl_lock();
- __move_netdevice_notifier_net(src_net, dst_net, nb);
- rtnl_unlock();
-}
-
int register_netdevice_notifier_dev_net(struct net_device *dev,
struct notifier_block *nb,
struct netdev_net_notifier *nn)
@@ -3001,6 +2994,8 @@ void netif_set_tso_max_size(struct net_device *dev, unsigned int size)
dev->tso_max_size = min(GSO_MAX_SIZE, size);
if (size < READ_ONCE(dev->gso_max_size))
netif_set_gso_max_size(dev, size);
+ if (size < READ_ONCE(dev->gso_ipv4_max_size))
+ netif_set_gso_ipv4_max_size(dev, size);
}
EXPORT_SYMBOL(netif_set_tso_max_size);
@@ -3139,8 +3134,10 @@ void __dev_kfree_skb_any(struct sk_buff *skb, enum skb_free_reason reason)
{
if (in_hardirq() || irqs_disabled())
__dev_kfree_skb_irq(skb, reason);
+ else if (unlikely(reason == SKB_REASON_DROPPED))
+ kfree_skb(skb);
else
- dev_kfree_skb(skb);
+ consume_skb(skb);
}
EXPORT_SYMBOL(__dev_kfree_skb_any);
@@ -5024,7 +5021,7 @@ static __latent_entropy void net_tx_action(struct softirq_action *h)
WARN_ON(refcount_read(&skb->users));
if (likely(get_kfree_skb_cb(skb)->reason == SKB_REASON_CONSUMED))
- trace_consume_skb(skb);
+ trace_consume_skb(skb, net_tx_action);
else
trace_kfree_skb(skb, net_tx_action,
SKB_DROP_REASON_NOT_SPECIFIED);
@@ -6616,17 +6613,16 @@ static int napi_threaded_poll(void *data)
static void skb_defer_free_flush(struct softnet_data *sd)
{
struct sk_buff *skb, *next;
- unsigned long flags;
/* Paired with WRITE_ONCE() in skb_attempt_defer_free() */
if (!READ_ONCE(sd->defer_list))
return;
- spin_lock_irqsave(&sd->defer_lock, flags);
+ spin_lock_irq(&sd->defer_lock);
skb = sd->defer_list;
sd->defer_list = NULL;
sd->defer_count = 0;
- spin_unlock_irqrestore(&sd->defer_lock, flags);
+ spin_unlock_irq(&sd->defer_lock);
while (skb != NULL) {
next = skb->next;
@@ -8319,9 +8315,8 @@ static int __dev_set_promiscuity(struct net_device *dev, int inc, bool notify)
}
}
if (dev->flags != old_flags) {
- pr_info("device %s %s promiscuous mode\n",
- dev->name,
- dev->flags & IFF_PROMISC ? "entered" : "left");
+ netdev_info(dev, "%s promiscuous mode\n",
+ dev->flags & IFF_PROMISC ? "entered" : "left");
if (audit_enabled) {
current_uid_gid(&uid, &gid);
audit_log(audit_context(), GFP_ATOMIC,
@@ -8389,6 +8384,8 @@ static int __dev_set_allmulti(struct net_device *dev, int inc, bool notify)
}
}
if (dev->flags ^ old_flags) {
+ netdev_info(dev, "%s allmulticast mode\n",
+ dev->flags & IFF_ALLMULTI ? "entered" : "left");
dev_change_rx_flags(dev, IFF_ALLMULTI);
dev_set_rx_mode(dev);
if (notify)
@@ -9224,8 +9221,12 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack
NL_SET_ERR_MSG(extack, "Native and generic XDP can't be active at the same time");
return -EEXIST;
}
- if (!offload && bpf_prog_is_dev_bound(new_prog->aux)) {
- NL_SET_ERR_MSG(extack, "Using device-bound program without HW_MODE flag is not supported");
+ if (!offload && bpf_prog_is_offloaded(new_prog->aux)) {
+ NL_SET_ERR_MSG(extack, "Using offloaded program without HW_MODE flag is not supported");
+ return -EINVAL;
+ }
+ if (bpf_prog_is_dev_bound(new_prog->aux) && !bpf_offload_dev_match(new_prog, dev)) {
+ NL_SET_ERR_MSG(extack, "Program bound to different device");
return -EINVAL;
}
if (new_prog->expected_attach_type == BPF_XDP_DEVMAP) {
@@ -10375,7 +10376,7 @@ void netdev_stats_to_stats64(struct rtnl_link_stats64 *stats64,
BUILD_BUG_ON(n > sizeof(*stats64) / sizeof(u64));
for (i = 0; i < n; i++)
- dst[i] = atomic_long_read(&src[i]);
+ dst[i] = (unsigned long)atomic_long_read(&src[i]);
/* zero out counters that only exist in rtnl_link_stats64 */
memset((char *)stats64 + n * sizeof(u64), 0,
sizeof(*stats64) - n * sizeof(u64));
@@ -10611,6 +10612,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
dev->gso_max_size = GSO_LEGACY_MAX_SIZE;
dev->gso_max_segs = GSO_MAX_SEGS;
dev->gro_max_size = GRO_LEGACY_MAX_SIZE;
+ dev->gso_ipv4_max_size = GSO_LEGACY_MAX_SIZE;
+ dev->gro_ipv4_max_size = GRO_LEGACY_MAX_SIZE;
dev->tso_max_size = TSO_LEGACY_MAX_SIZE;
dev->tso_max_segs = TSO_MAX_SEGS;
dev->upper_level = 1;
@@ -10830,6 +10833,7 @@ void unregister_netdevice_many_notify(struct list_head *head,
dev_shutdown(dev);
dev_xdp_uninstall(dev);
+ bpf_dev_bound_netdev_unregister(dev);
netdev_offload_xstats_disable_all(dev);
diff --git a/net/core/dev.h b/net/core/dev.h
index 814ed5b7b960..e075e198092c 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -9,6 +9,7 @@ struct net_device;
struct netdev_bpf;
struct netdev_phys_item_id;
struct netlink_ext_ack;
+struct cpumask;
/* Random bits of netdevice that don't need to be exposed */
#define FLOW_LIMIT_HISTORY (1 << 7) /* must be ^2 and !overflow buckets */
@@ -100,6 +101,8 @@ static inline void netif_set_gso_max_size(struct net_device *dev,
{
/* dev->gso_max_size is read locklessly from sk_setup_caps() */
WRITE_ONCE(dev->gso_max_size, size);
+ if (size <= GSO_LEGACY_MAX_SIZE)
+ WRITE_ONCE(dev->gso_ipv4_max_size, size);
}
static inline void netif_set_gso_max_segs(struct net_device *dev,
@@ -114,6 +117,23 @@ static inline void netif_set_gro_max_size(struct net_device *dev,
{
/* This pairs with the READ_ONCE() in skb_gro_receive() */
WRITE_ONCE(dev->gro_max_size, size);
+ if (size <= GRO_LEGACY_MAX_SIZE)
+ WRITE_ONCE(dev->gro_ipv4_max_size, size);
}
+static inline void netif_set_gso_ipv4_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* dev->gso_ipv4_max_size is read locklessly from sk_setup_caps() */
+ WRITE_ONCE(dev->gso_ipv4_max_size, size);
+}
+
+static inline void netif_set_gro_ipv4_max_size(struct net_device *dev,
+ unsigned int size)
+{
+ /* This pairs with the READ_ONCE() in skb_gro_receive() */
+ WRITE_ONCE(dev->gro_ipv4_max_size, size);
+}
+
+int rps_cpumask_housekeeping(struct cpumask *mask);
#endif
diff --git a/net/core/dst.c b/net/core/dst.c
index 6d2dd03dafa8..31c08a3386d3 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -82,12 +82,8 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
if (ops->gc &&
!(flags & DST_NOCOUNT) &&
- dst_entries_get_fast(ops) > ops->gc_thresh) {
- if (ops->gc(ops)) {
- pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n");
- return NULL;
- }
- }
+ dst_entries_get_fast(ops) > ops->gc_thresh)
+ ops->gc(ops);
dst = kmem_cache_alloc(ops->kmem_cachep, GFP_ATOMIC);
if (!dst)
diff --git a/net/core/filter.c b/net/core/filter.c
index 929358677183..1d6f165923bf 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3180,15 +3180,18 @@ static int bpf_skb_generic_push(struct sk_buff *skb, u32 off, u32 len)
static int bpf_skb_generic_pop(struct sk_buff *skb, u32 off, u32 len)
{
+ void *old_data;
+
/* skb_ensure_writable() is not needed here, as we're
* already working on an uncloned skb.
*/
if (unlikely(!pskb_may_pull(skb, off + len)))
return -ENOMEM;
- skb_postpull_rcsum(skb, skb->data + off, len);
- memmove(skb->data + len, skb->data, off);
+ old_data = skb->data;
__skb_pull(skb, len);
+ skb_postpull_rcsum(skb, old_data + off, len);
+ memmove(skb->data, old_data, off);
return 0;
}
@@ -3378,13 +3381,17 @@ static u32 bpf_skb_net_base_len(const struct sk_buff *skb)
#define BPF_F_ADJ_ROOM_ENCAP_L3_MASK (BPF_F_ADJ_ROOM_ENCAP_L3_IPV4 | \
BPF_F_ADJ_ROOM_ENCAP_L3_IPV6)
+#define BPF_F_ADJ_ROOM_DECAP_L3_MASK (BPF_F_ADJ_ROOM_DECAP_L3_IPV4 | \
+ BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+
#define BPF_F_ADJ_ROOM_MASK (BPF_F_ADJ_ROOM_FIXED_GSO | \
BPF_F_ADJ_ROOM_ENCAP_L3_MASK | \
BPF_F_ADJ_ROOM_ENCAP_L4_GRE | \
BPF_F_ADJ_ROOM_ENCAP_L4_UDP | \
BPF_F_ADJ_ROOM_ENCAP_L2_ETH | \
BPF_F_ADJ_ROOM_ENCAP_L2( \
- BPF_ADJ_ROOM_ENCAP_L2_MASK))
+ BPF_ADJ_ROOM_ENCAP_L2_MASK) | \
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK)
static int bpf_skb_net_grow(struct sk_buff *skb, u32 off, u32 len_diff,
u64 flags)
@@ -3498,6 +3505,7 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
int ret;
if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_DECAP_L3_MASK |
BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
return -EINVAL;
@@ -3516,6 +3524,14 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
if (unlikely(ret < 0))
return ret;
+ /* Match skb->protocol to new outer l3 protocol */
+ if (skb->protocol == htons(ETH_P_IP) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV6)
+ skb->protocol = htons(ETH_P_IPV6);
+ else if (skb->protocol == htons(ETH_P_IPV6) &&
+ flags & BPF_F_ADJ_ROOM_DECAP_L3_IPV4)
+ skb->protocol = htons(ETH_P_IP);
+
if (skb_is_gso(skb)) {
struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -3605,6 +3621,22 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
return -ENOTSUPP;
}
+ if (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ if (!shrink)
+ return -EINVAL;
+
+ switch (flags & BPF_F_ADJ_ROOM_DECAP_L3_MASK) {
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV4:
+ len_min = sizeof(struct iphdr);
+ break;
+ case BPF_F_ADJ_ROOM_DECAP_L3_IPV6:
+ len_min = sizeof(struct ipv6hdr);
+ break;
+ default:
+ return -EINVAL;
+ }
+ }
+
len_cur = skb->len - skb_network_offset(skb);
if ((shrink && (len_diff_abs >= len_cur ||
len_cur - len_diff_abs < len_min)) ||
@@ -4125,9 +4157,13 @@ static const struct bpf_func_proto bpf_xdp_adjust_meta_proto = {
* bpf_redirect_info to actually enqueue the frame into a map type-specific
* bulk queue structure.
*
- * 3. Before exiting its NAPI poll loop, the driver will call xdp_do_flush(),
- * which will flush all the different bulk queues, thus completing the
- * redirect.
+ * 3. Before exiting its NAPI poll loop, the driver will call
+ * xdp_do_flush(), which will flush all the different bulk queues,
+ * thus completing the redirect. Note that xdp_do_flush() must be
+ * called before napi_complete_done() in the driver, as the
+ * XDP_REDIRECT logic relies on being inside a single NAPI instance
+ * through to the xdp_do_flush() call for RCU protection of all
+ * in-kernel data structures.
*/
/*
* Pointers to the map entries will be kept around for this whole sequence of
@@ -4282,16 +4318,13 @@ int xdp_do_redirect(struct net_device *dev, struct xdp_buff *xdp,
struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info);
enum bpf_map_type map_type = ri->map_type;
- /* XDP_REDIRECT is not fully supported yet for xdp frags since
- * not all XDP capable drivers can map non-linear xdp_frame in
- * ndo_xdp_xmit.
- */
- if (unlikely(xdp_buff_has_frags(xdp) &&
- map_type != BPF_MAP_TYPE_CPUMAP))
- return -EOPNOTSUPP;
+ if (map_type == BPF_MAP_TYPE_XSKMAP) {
+ /* XDP_REDIRECT is not supported AF_XDP yet. */
+ if (unlikely(xdp_buff_has_frags(xdp)))
+ return -EOPNOTSUPP;
- if (map_type == BPF_MAP_TYPE_XSKMAP)
return __xdp_do_redirect_xsk(ri, dev, xdp, xdp_prog);
+ }
return __xdp_do_redirect_frame(ri, dev, xdp_convert_buff_to_frame(xdp),
xdp_prog);
@@ -4615,7 +4648,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
struct ip_tunnel_info *info;
if (unlikely(flags & ~(BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
- BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER)))
+ BPF_F_DONT_FRAGMENT | BPF_F_SEQ_NUMBER |
+ BPF_F_NO_TUNNEL_KEY)))
return -EINVAL;
if (unlikely(size != sizeof(struct bpf_tunnel_key))) {
switch (size) {
@@ -4653,6 +4687,8 @@ BPF_CALL_4(bpf_skb_set_tunnel_key, struct sk_buff *, skb,
info->key.tun_flags &= ~TUNNEL_CSUM;
if (flags & BPF_F_SEQ_NUMBER)
info->key.tun_flags |= TUNNEL_SEQ;
+ if (flags & BPF_F_NO_TUNNEL_KEY)
+ info->key.tun_flags &= ~TUNNEL_KEY;
info->key.tun_id = cpu_to_be64(from->tunnel_id);
info->key.tos = from->tunnel_tos;
@@ -5169,7 +5205,7 @@ static int sol_tcp_sockopt(struct sock *sk, int optname,
char *optval, int *optlen,
bool getopt)
{
- if (sk->sk_prot->setsockopt != tcp_setsockopt)
+ if (sk->sk_protocol != IPPROTO_TCP)
return -EINVAL;
switch (optname) {
@@ -5686,12 +5722,8 @@ static const struct bpf_func_proto bpf_skb_get_xfrm_state_proto = {
#endif
#if IS_ENABLED(CONFIG_INET) || IS_ENABLED(CONFIG_IPV6)
-static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params,
- const struct neighbour *neigh,
- const struct net_device *dev, u32 mtu)
+static int bpf_fib_set_fwd_params(struct bpf_fib_lookup *params, u32 mtu)
{
- memcpy(params->dmac, neigh->ha, ETH_ALEN);
- memcpy(params->smac, dev->dev_addr, ETH_ALEN);
params->h_vlan_TCI = 0;
params->h_vlan_proto = 0;
if (mtu)
@@ -5802,21 +5834,29 @@ static int bpf_ipv4_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
if (likely(nhc->nhc_gw_family != AF_INET6)) {
if (nhc->nhc_gw_family)
params->ipv4_dst = nhc->nhc_gw.ipv4;
-
- neigh = __ipv4_neigh_lookup_noref(dev,
- (__force u32)params->ipv4_dst);
} else {
struct in6_addr *dst = (struct in6_addr *)params->ipv6_dst;
params->family = AF_INET6;
*dst = nhc->nhc_gw.ipv6;
- neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
}
- if (!neigh)
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
+ if (likely(nhc->nhc_gw_family != AF_INET6))
+ neigh = __ipv4_neigh_lookup_noref(dev,
+ (__force u32)params->ipv4_dst);
+ else
+ neigh = __ipv6_neigh_lookup_noref_stub(dev, params->ipv6_dst);
+
+ if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);
- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif
@@ -5924,24 +5964,33 @@ static int bpf_ipv6_fib_lookup(struct net *net, struct bpf_fib_lookup *params,
params->rt_metric = res.f6i->fib6_metric;
params->ifindex = dev->ifindex;
+ if (flags & BPF_FIB_LOOKUP_SKIP_NEIGH)
+ goto set_fwd_params;
+
/* xdp and cls_bpf programs are run in RCU-bh so rcu_read_lock_bh is
* not needed here.
*/
neigh = __ipv6_neigh_lookup_noref_stub(dev, dst);
- if (!neigh)
+ if (!neigh || !(neigh->nud_state & NUD_VALID))
return BPF_FIB_LKUP_RET_NO_NEIGH;
+ memcpy(params->dmac, neigh->ha, ETH_ALEN);
+ memcpy(params->smac, dev->dev_addr, ETH_ALEN);
- return bpf_fib_set_fwd_params(params, neigh, dev, mtu);
+set_fwd_params:
+ return bpf_fib_set_fwd_params(params, mtu);
}
#endif
+#define BPF_FIB_LOOKUP_MASK (BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT | \
+ BPF_FIB_LOOKUP_SKIP_NEIGH)
+
BPF_CALL_4(bpf_xdp_fib_lookup, struct xdp_buff *, ctx,
struct bpf_fib_lookup *, params, int, plen, u32, flags)
{
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;
switch (params->family) {
@@ -5979,7 +6028,7 @@ BPF_CALL_4(bpf_skb_fib_lookup, struct sk_buff *, skb,
if (plen < sizeof(*params))
return -EINVAL;
- if (flags & ~(BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_OUTPUT))
+ if (flags & ~BPF_FIB_LOOKUP_MASK)
return -EINVAL;
if (params->tot_len)
@@ -6841,9 +6890,6 @@ u32 bpf_tcp_sock_convert_ctx_access(enum bpf_access_type type,
FIELD)); \
} while (0)
- if (insn > insn_buf)
- return insn - insn_buf;
-
switch (si->off) {
case offsetof(struct bpf_tcp_sock, rtt_min):
BUILD_BUG_ON(sizeof_field(struct tcp_sock, rtt_min) !=
@@ -7500,7 +7546,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv4_proto = {
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct iphdr),
.arg2_type = ARG_PTR_TO_MEM,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_3(bpf_tcp_raw_gen_syncookie_ipv6, struct ipv6hdr *, iph,
@@ -7532,7 +7578,7 @@ static const struct bpf_func_proto bpf_tcp_raw_gen_syncookie_ipv6_proto = {
.arg1_type = ARG_PTR_TO_FIXED_SIZE_MEM,
.arg1_size = sizeof(struct ipv6hdr),
.arg2_type = ARG_PTR_TO_MEM,
- .arg3_type = ARG_CONST_SIZE,
+ .arg3_type = ARG_CONST_SIZE_OR_ZERO,
};
BPF_CALL_2(bpf_tcp_raw_check_syncookie_ipv4, struct iphdr *, iph,
@@ -8728,7 +8774,7 @@ static bool xdp_is_valid_access(int off, int size,
}
if (type == BPF_WRITE) {
- if (bpf_prog_is_dev_bound(prog->aux)) {
+ if (bpf_prog_is_offloaded(prog->aux)) {
switch (off) {
case offsetof(struct xdp_md, rx_queue_index):
return __is_valid_xdp_access(off, size);
@@ -10141,9 +10187,6 @@ static u32 sock_ops_convert_ctx_access(enum bpf_access_type type,
SOCK_OPS_GET_FIELD(BPF_FIELD, OBJ_FIELD, OBJ); \
} while (0)
- if (insn > insn_buf)
- return insn - insn_buf;
-
switch (si->off) {
case offsetof(struct bpf_sock_ops, op):
*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct bpf_sock_ops_kern,
diff --git a/net/core/gro.c b/net/core/gro.c
index fd8c6a7e8d3e..a606705a0859 100644
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -162,16 +162,27 @@ int skb_gro_receive(struct sk_buff *p, struct sk_buff *skb)
struct sk_buff *lp;
int segs;
- /* pairs with WRITE_ONCE() in netif_set_gro_max_size() */
- gro_max_size = READ_ONCE(p->dev->gro_max_size);
+ /* Do not splice page pool based packets w/ non-page pool
+ * packets. This can result in reference count issues as page
+ * pool pages will not decrement the reference count and will
+ * instead be immediately returned to the pool or have frag
+ * count decremented.
+ */
+ if (p->pp_recycle != skb->pp_recycle)
+ return -ETOOMANYREFS;
+
+ /* pairs with WRITE_ONCE() in netif_set_gro(_ipv4)_max_size() */
+ gro_max_size = p->protocol == htons(ETH_P_IPV6) ?
+ READ_ONCE(p->dev->gro_max_size) :
+ READ_ONCE(p->dev->gro_ipv4_max_size);
if (unlikely(p->len + len >= gro_max_size || NAPI_GRO_CB(skb)->flush))
return -E2BIG;
if (unlikely(p->len + len >= GRO_LEGACY_MAX_SIZE)) {
- if (p->protocol != htons(ETH_P_IPV6) ||
- skb_headroom(p) < sizeof(struct hop_jumbo_hdr) ||
- ipv6_hdr(p)->nexthdr != IPPROTO_TCP ||
+ if (NAPI_GRO_CB(skb)->proto != IPPROTO_TCP ||
+ (p->protocol == htons(ETH_P_IPV6) &&
+ skb_headroom(p) < sizeof(struct hop_jumbo_hdr)) ||
p->encapsulation)
return -E2BIG;
}
@@ -505,8 +516,9 @@ found_ptype:
NAPI_GRO_CB(skb)->count = 1;
if (unlikely(skb_is_gso(skb))) {
NAPI_GRO_CB(skb)->count = skb_shinfo(skb)->gso_segs;
- /* Only support TCP at the moment. */
- if (!skb_is_gso_tcp(skb))
+ /* Only support TCP and non DODGY users. */
+ if (!skb_is_gso_tcp(skb) ||
+ (skb_shinfo(skb)->gso_type & SKB_GSO_DODGY))
NAPI_GRO_CB(skb)->flush = 1;
}
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index f00a79fc301b..6798f6d2423b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -269,7 +269,7 @@ static int neigh_forced_gc(struct neigh_table *tbl)
(n->nud_state == NUD_NOARP) ||
(tbl->is_multicast &&
tbl->is_multicast(n->primary_key)) ||
- time_after(tref, n->updated))
+ !time_in_range(n->updated, tref, jiffies))
remove = true;
write_unlock(&n->lock);
@@ -289,7 +289,17 @@ static int neigh_forced_gc(struct neigh_table *tbl)
static void neigh_add_timer(struct neighbour *n, unsigned long when)
{
+ /* Use safe distance from the jiffies - LONG_MAX point while timer
+ * is running in DELAY/PROBE state but still show to user space
+ * large times in the past.
+ */
+ unsigned long mint = jiffies - (LONG_MAX - 86400 * HZ);
+
neigh_hold(n);
+ if (!time_in_range(n->confirmed, mint, jiffies))
+ n->confirmed = mint;
+ if (time_before(n->used, n->confirmed))
+ n->used = n->confirmed;
if (unlikely(mod_timer(&n->timer, when))) {
printk("NEIGH: BUG, double timer add, state is %x\n",
n->nud_state);
@@ -1001,12 +1011,14 @@ static void neigh_periodic_work(struct work_struct *work)
goto next_elt;
}
- if (time_before(n->used, n->confirmed))
+ if (time_before(n->used, n->confirmed) &&
+ time_is_before_eq_jiffies(n->confirmed))
n->used = n->confirmed;
if (refcount_read(&n->refcnt) == 1 &&
(state == NUD_FAILED ||
- time_after(jiffies, n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
+ !time_in_range_open(jiffies, n->used,
+ n->used + NEIGH_VAR(n->parms, GC_STALETIME)))) {
*np = n->next;
neigh_mark_dead(n);
write_unlock(&n->lock);
@@ -1662,11 +1674,21 @@ static void neigh_proxy_process(struct timer_list *t)
spin_unlock(&tbl->proxy_queue.lock);
}
+static unsigned long neigh_proxy_delay(struct neigh_parms *p)
+{
+ /* If proxy_delay is zero, do not call get_random_u32_below()
+ * as it is undefined behavior.
+ */
+ unsigned long proxy_delay = NEIGH_VAR(p, PROXY_DELAY);
+
+ return proxy_delay ?
+ jiffies + get_random_u32_below(proxy_delay) : jiffies;
+}
+
void pneigh_enqueue(struct neigh_table *tbl, struct neigh_parms *p,
struct sk_buff *skb)
{
- unsigned long sched_next = jiffies +
- get_random_u32_below(NEIGH_VAR(p, PROXY_DELAY));
+ unsigned long sched_next = neigh_proxy_delay(p);
if (p->qlen > NEIGH_VAR(p, PROXY_QLEN)) {
kfree_skb(skb);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index ca55dd747d6c..15e3f4606b5f 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -831,42 +831,18 @@ static ssize_t show_rps_map(struct netdev_rx_queue *queue, char *buf)
return len < PAGE_SIZE ? len : -EINVAL;
}
-static ssize_t store_rps_map(struct netdev_rx_queue *queue,
- const char *buf, size_t len)
+static int netdev_rx_queue_set_rps_mask(struct netdev_rx_queue *queue,
+ cpumask_var_t mask)
{
- struct rps_map *old_map, *map;
- cpumask_var_t mask;
- int err, cpu, i;
static DEFINE_MUTEX(rps_map_mutex);
-
- if (!capable(CAP_NET_ADMIN))
- return -EPERM;
-
- if (!alloc_cpumask_var(&mask, GFP_KERNEL))
- return -ENOMEM;
-
- err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
- if (err) {
- free_cpumask_var(mask);
- return err;
- }
-
- if (!cpumask_empty(mask)) {
- cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
- cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
- if (cpumask_empty(mask)) {
- free_cpumask_var(mask);
- return -EINVAL;
- }
- }
+ struct rps_map *old_map, *map;
+ int cpu, i;
map = kzalloc(max_t(unsigned int,
RPS_MAP_SIZE(cpumask_weight(mask)), L1_CACHE_BYTES),
GFP_KERNEL);
- if (!map) {
- free_cpumask_var(mask);
+ if (!map)
return -ENOMEM;
- }
i = 0;
for_each_cpu_and(cpu, mask, cpu_online_mask)
@@ -893,9 +869,45 @@ static ssize_t store_rps_map(struct netdev_rx_queue *queue,
if (old_map)
kfree_rcu(old_map, rcu);
+ return 0;
+}
+
+int rps_cpumask_housekeeping(struct cpumask *mask)
+{
+ if (!cpumask_empty(mask)) {
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_DOMAIN));
+ cpumask_and(mask, mask, housekeeping_cpumask(HK_TYPE_WQ));
+ if (cpumask_empty(mask))
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static ssize_t store_rps_map(struct netdev_rx_queue *queue,
+ const char *buf, size_t len)
+{
+ cpumask_var_t mask;
+ int err;
+
+ if (!capable(CAP_NET_ADMIN))
+ return -EPERM;
+
+ if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+ return -ENOMEM;
+
+ err = bitmap_parse(buf, len, cpumask_bits(mask), nr_cpumask_bits);
+ if (err)
+ goto out;
+
+ err = rps_cpumask_housekeeping(mask);
+ if (err)
+ goto out;
+ err = netdev_rx_queue_set_rps_mask(queue, mask);
+
+out:
free_cpumask_var(mask);
- return len;
+ return err ? : len;
}
static ssize_t show_rps_dev_flow_table_cnt(struct netdev_rx_queue *queue,
@@ -1040,7 +1052,7 @@ static void rx_queue_get_ownership(const struct kobject *kobj,
net_ns_get_ownership(net, uid, gid);
}
-static struct kobj_type rx_queue_ktype __ro_after_init = {
+static const struct kobj_type rx_queue_ktype = {
.sysfs_ops = &rx_queue_sysfs_ops,
.release = rx_queue_release,
.default_groups = rx_queue_default_groups,
@@ -1048,6 +1060,18 @@ static struct kobj_type rx_queue_ktype __ro_after_init = {
.get_ownership = rx_queue_get_ownership,
};
+static int rx_queue_default_mask(struct net_device *dev,
+ struct netdev_rx_queue *queue)
+{
+#if IS_ENABLED(CONFIG_RPS) && IS_ENABLED(CONFIG_SYSCTL)
+ struct cpumask *rps_default_mask = READ_ONCE(dev_net(dev)->core.rps_default_mask);
+
+ if (rps_default_mask && !cpumask_empty(rps_default_mask))
+ return netdev_rx_queue_set_rps_mask(queue, rps_default_mask);
+#endif
+ return 0;
+}
+
static int rx_queue_add_kobject(struct net_device *dev, int index)
{
struct netdev_rx_queue *queue = dev->_rx + index;
@@ -1071,6 +1095,10 @@ static int rx_queue_add_kobject(struct net_device *dev, int index)
goto err;
}
+ error = rx_queue_default_mask(dev, queue);
+ if (error)
+ goto err;
+
kobject_uevent(kobj, KOBJ_ADD);
return error;
@@ -1643,7 +1671,7 @@ static void netdev_queue_get_ownership(const struct kobject *kobj,
net_ns_get_ownership(net, uid, gid);
}
-static struct kobj_type netdev_queue_ktype __ro_after_init = {
+static const struct kobj_type netdev_queue_ktype = {
.sysfs_ops = &netdev_queue_sysfs_ops,
.release = netdev_queue_release,
.default_groups = netdev_queue_default_groups,
diff --git a/net/core/net-traces.c b/net/core/net-traces.c
index c40cd8dd75c7..805b7385dd8d 100644
--- a/net/core/net-traces.c
+++ b/net/core/net-traces.c
@@ -41,6 +41,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_external_learn_add);
EXPORT_TRACEPOINT_SYMBOL_GPL(fdb_delete);
EXPORT_TRACEPOINT_SYMBOL_GPL(br_fdb_update);
+EXPORT_TRACEPOINT_SYMBOL_GPL(br_mdb_full);
#endif
#if IS_ENABLED(CONFIG_PAGE_POOL)
@@ -61,3 +62,5 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(napi_poll);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_send_reset);
EXPORT_TRACEPOINT_SYMBOL_GPL(tcp_bad_csum);
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(sk_data_ready);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 5581d22cc191..7b69cf882b8e 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -137,12 +137,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net)
return 0;
if (ops->id && ops->size) {
-cleanup:
ng = rcu_dereference_protected(net->gen,
lockdep_is_held(&pernet_ops_rwsem));
ng->ptr[*ops->id] = NULL;
}
+cleanup:
kfree(data);
out:
@@ -304,6 +304,12 @@ struct net *get_net_ns_by_id(const struct net *net, int id)
}
EXPORT_SYMBOL_GPL(get_net_ns_by_id);
+/* init code that must occur even if setup_net() is not called. */
+static __net_init void preinit_net(struct net *net)
+{
+ ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
+}
+
/*
* setup_net runs the initializers for the network namespace object.
*/
@@ -316,7 +322,6 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns)
refcount_set(&net->ns.count, 1);
ref_tracker_dir_init(&net->refcnt_tracker, 128);
- ref_tracker_dir_init(&net->notrefcnt_tracker, 128);
refcount_set(&net->passive, 1);
get_random_bytes(&net->hash_mix, sizeof(u32));
@@ -472,6 +477,8 @@ struct net *copy_net_ns(unsigned long flags,
rv = -ENOMEM;
goto dec_ucounts;
}
+
+ preinit_net(net);
refcount_set(&net->passive, 1);
net->ucounts = ucounts;
get_user_ns(user_ns);
@@ -1118,6 +1125,7 @@ void __init net_ns_init(void)
init_net.key_domain = &init_net_key_domain;
#endif
down_write(&pernet_ops_rwsem);
+ preinit_net(&init_net);
if (setup_net(&init_net, &init_user_ns))
panic("Could not setup the initial network namespace");
diff --git a/net/core/netdev-genl-gen.c b/net/core/netdev-genl-gen.c
new file mode 100644
index 000000000000..48812ec843f5
--- /dev/null
+++ b/net/core/netdev-genl-gen.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "netdev-genl-gen.h"
+
+#include <linux/netdev.h>
+
+/* NETDEV_CMD_DEV_GET - do */
+static const struct nla_policy netdev_dev_get_nl_policy[NETDEV_A_DEV_IFINDEX + 1] = {
+ [NETDEV_A_DEV_IFINDEX] = NLA_POLICY_MIN(NLA_U32, 1),
+};
+
+/* Ops table for netdev */
+static const struct genl_split_ops netdev_nl_ops[2] = {
+ {
+ .cmd = NETDEV_CMD_DEV_GET,
+ .doit = netdev_nl_dev_get_doit,
+ .policy = netdev_dev_get_nl_policy,
+ .maxattr = NETDEV_A_DEV_IFINDEX,
+ .flags = GENL_CMD_CAP_DO,
+ },
+ {
+ .cmd = NETDEV_CMD_DEV_GET,
+ .dumpit = netdev_nl_dev_get_dumpit,
+ .flags = GENL_CMD_CAP_DUMP,
+ },
+};
+
+static const struct genl_multicast_group netdev_nl_mcgrps[] = {
+ [NETDEV_NLGRP_MGMT] = { "mgmt", },
+};
+
+struct genl_family netdev_nl_family __ro_after_init = {
+ .name = NETDEV_FAMILY_NAME,
+ .version = NETDEV_FAMILY_VERSION,
+ .netnsok = true,
+ .parallel_ops = true,
+ .module = THIS_MODULE,
+ .split_ops = netdev_nl_ops,
+ .n_split_ops = ARRAY_SIZE(netdev_nl_ops),
+ .mcgrps = netdev_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(netdev_nl_mcgrps),
+};
diff --git a/net/core/netdev-genl-gen.h b/net/core/netdev-genl-gen.h
new file mode 100644
index 000000000000..b16dc7e026bb
--- /dev/null
+++ b/net/core/netdev-genl-gen.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/netdev.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_NETDEV_GEN_H
+#define _LINUX_NETDEV_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/netdev.h>
+
+int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info);
+int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+
+enum {
+ NETDEV_NLGRP_MGMT,
+};
+
+extern struct genl_family netdev_nl_family;
+
+#endif /* _LINUX_NETDEV_GEN_H */
diff --git a/net/core/netdev-genl.c b/net/core/netdev-genl.c
new file mode 100644
index 000000000000..a4270fafdf11
--- /dev/null
+++ b/net/core/netdev-genl.c
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/rtnetlink.h>
+#include <net/net_namespace.h>
+#include <net/sock.h>
+
+#include "netdev-genl-gen.h"
+
+static int
+netdev_nl_dev_fill(struct net_device *netdev, struct sk_buff *rsp,
+ u32 portid, u32 seq, int flags, u32 cmd)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(rsp, portid, seq, &netdev_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(rsp, NETDEV_A_DEV_IFINDEX, netdev->ifindex) ||
+ nla_put_u64_64bit(rsp, NETDEV_A_DEV_XDP_FEATURES,
+ netdev->xdp_features, NETDEV_A_DEV_PAD)) {
+ genlmsg_cancel(rsp, hdr);
+ return -EINVAL;
+ }
+
+ genlmsg_end(rsp, hdr);
+
+ return 0;
+}
+
+static void
+netdev_genl_dev_notify(struct net_device *netdev, int cmd)
+{
+ struct sk_buff *ntf;
+
+ if (!genl_has_listeners(&netdev_nl_family, dev_net(netdev),
+ NETDEV_NLGRP_MGMT))
+ return;
+
+ ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!ntf)
+ return;
+
+ if (netdev_nl_dev_fill(netdev, ntf, 0, 0, 0, cmd)) {
+ nlmsg_free(ntf);
+ return;
+ }
+
+ genlmsg_multicast_netns(&netdev_nl_family, dev_net(netdev), ntf,
+ 0, NETDEV_NLGRP_MGMT, GFP_KERNEL);
+}
+
+int netdev_nl_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct net_device *netdev;
+ struct sk_buff *rsp;
+ u32 ifindex;
+ int err;
+
+ if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_DEV_IFINDEX))
+ return -EINVAL;
+
+ ifindex = nla_get_u32(info->attrs[NETDEV_A_DEV_IFINDEX]);
+
+ rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!rsp)
+ return -ENOMEM;
+
+ rtnl_lock();
+
+ netdev = __dev_get_by_index(genl_info_net(info), ifindex);
+ if (netdev)
+ err = netdev_nl_dev_fill(netdev, rsp, info->snd_portid,
+ info->snd_seq, 0, info->genlhdr->cmd);
+ else
+ err = -ENODEV;
+
+ rtnl_unlock();
+
+ if (err)
+ goto err_free_msg;
+
+ return genlmsg_reply(rsp, info);
+
+err_free_msg:
+ nlmsg_free(rsp);
+ return err;
+}
+
+int netdev_nl_dev_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
+{
+ struct net *net = sock_net(skb->sk);
+ struct net_device *netdev;
+ int idx = 0, s_idx;
+ int h, s_h;
+ int err;
+
+ s_h = cb->args[0];
+ s_idx = cb->args[1];
+
+ rtnl_lock();
+
+ for (h = s_h; h < NETDEV_HASHENTRIES; h++, s_idx = 0) {
+ struct hlist_head *head;
+
+ idx = 0;
+ head = &net->dev_index_head[h];
+ hlist_for_each_entry(netdev, head, index_hlist) {
+ if (idx < s_idx)
+ goto cont;
+ err = netdev_nl_dev_fill(netdev, skb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, 0,
+ NETDEV_CMD_DEV_GET);
+ if (err < 0)
+ break;
+cont:
+ idx++;
+ }
+ }
+
+ rtnl_unlock();
+
+ if (err != -EMSGSIZE)
+ return err;
+
+ cb->args[1] = idx;
+ cb->args[0] = h;
+ cb->seq = net->dev_base_seq;
+
+ return skb->len;
+}
+
+static int netdev_genl_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
+{
+ struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+ switch (event) {
+ case NETDEV_REGISTER:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_ADD_NTF);
+ break;
+ case NETDEV_UNREGISTER:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_DEL_NTF);
+ break;
+ case NETDEV_XDP_FEAT_CHANGE:
+ netdev_genl_dev_notify(netdev, NETDEV_CMD_DEV_CHANGE_NTF);
+ break;
+ }
+
+ return NOTIFY_OK;
+}
+
+static struct notifier_block netdev_genl_nb = {
+ .notifier_call = netdev_genl_netdevice_event,
+};
+
+static int __init netdev_genl_init(void)
+{
+ int err;
+
+ err = register_netdevice_notifier(&netdev_genl_nb);
+ if (err)
+ return err;
+
+ err = genl_register_family(&netdev_nl_family);
+ if (err)
+ goto err_unreg_ntf;
+
+ return 0;
+
+err_unreg_ntf:
+ unregister_netdevice_notifier(&netdev_genl_nb);
+ return err;
+}
+
+subsys_initcall(netdev_genl_init);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 9be762e1d042..a089b704b986 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -682,7 +682,7 @@ int netpoll_setup(struct netpoll *np)
}
if (!netif_running(ndev)) {
- unsigned long atmost, atleast;
+ unsigned long atmost;
np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
@@ -694,7 +694,6 @@ int netpoll_setup(struct netpoll *np)
}
rtnl_unlock();
- atleast = jiffies + HZ/10;
atmost = jiffies + carrier_timeout * HZ;
while (!netif_carrier_ok(ndev)) {
if (time_after(jiffies, atmost)) {
@@ -704,15 +703,6 @@ int netpoll_setup(struct netpoll *np)
msleep(1);
}
- /* If carrier appears to come up instantly, we don't
- * trust it and pause so that we don't pump all our
- * queued console messages into the bitbucket.
- */
-
- if (time_before(jiffies, atleast)) {
- np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
- msleep(4000);
- }
rtnl_lock();
}
diff --git a/net/core/page_pool.c b/net/core/page_pool.c
index 9b203d8660e4..193c18799865 100644
--- a/net/core/page_pool.c
+++ b/net/core/page_pool.c
@@ -511,8 +511,8 @@ static void page_pool_return_page(struct page_pool *pool, struct page *page)
static bool page_pool_recycle_in_ring(struct page_pool *pool, struct page *page)
{
int ret;
- /* BH protection not needed if current is serving softirq */
- if (in_serving_softirq())
+ /* BH protection not needed if current is softirq */
+ if (in_softirq())
ret = ptr_ring_produce(&pool->ring, page);
else
ret = ptr_ring_produce_bh(&pool->ring, page);
@@ -570,7 +570,7 @@ __page_pool_put_page(struct page_pool *pool, struct page *page,
page_pool_dma_sync_for_device(pool, page,
dma_sync_size);
- if (allow_direct && in_serving_softirq() &&
+ if (allow_direct && in_softirq() &&
page_pool_recycle_in_cache(page, pool))
return NULL;
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 64289bc98887..5d8eb57867a9 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -58,7 +58,7 @@
#include "dev.h"
#define RTNL_MAX_TYPE 50
-#define RTNL_SLAVE_MAX_TYPE 40
+#define RTNL_SLAVE_MAX_TYPE 42
struct rtnl_link {
rtnl_doit_func doit;
@@ -1074,6 +1074,8 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
+ nla_total_size(4) /* IFLA_GSO_MAX_SEGS */
+ nla_total_size(4) /* IFLA_GSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_GRO_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_GSO_IPV4_MAX_SIZE */
+ + nla_total_size(4) /* IFLA_GRO_IPV4_MAX_SIZE */
+ nla_total_size(4) /* IFLA_TSO_MAX_SIZE */
+ nla_total_size(4) /* IFLA_TSO_MAX_SEGS */
+ nla_total_size(1) /* IFLA_OPERSTATE */
@@ -1807,6 +1809,8 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb,
nla_put_u32(skb, IFLA_GSO_MAX_SEGS, dev->gso_max_segs) ||
nla_put_u32(skb, IFLA_GSO_MAX_SIZE, dev->gso_max_size) ||
nla_put_u32(skb, IFLA_GRO_MAX_SIZE, dev->gro_max_size) ||
+ nla_put_u32(skb, IFLA_GSO_IPV4_MAX_SIZE, dev->gso_ipv4_max_size) ||
+ nla_put_u32(skb, IFLA_GRO_IPV4_MAX_SIZE, dev->gro_ipv4_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SIZE, dev->tso_max_size) ||
nla_put_u32(skb, IFLA_TSO_MAX_SEGS, dev->tso_max_segs) ||
#ifdef CONFIG_RPS
@@ -1968,6 +1972,8 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
[IFLA_TSO_MAX_SIZE] = { .type = NLA_REJECT },
[IFLA_TSO_MAX_SEGS] = { .type = NLA_REJECT },
[IFLA_ALLMULTI] = { .type = NLA_REJECT },
+ [IFLA_GSO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
+ [IFLA_GRO_IPV4_MAX_SIZE] = { .type = NLA_U32 },
};
static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = {
@@ -2883,6 +2889,29 @@ static int do_setlink(const struct sk_buff *skb,
}
}
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE]) {
+ u32 max_size = nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]);
+
+ if (max_size > dev->tso_max_size) {
+ err = -EINVAL;
+ goto errout;
+ }
+
+ if (dev->gso_ipv4_max_size ^ max_size) {
+ netif_set_gso_ipv4_max_size(dev, max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE]) {
+ u32 gro_max_size = nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]);
+
+ if (dev->gro_ipv4_max_size ^ gro_max_size) {
+ netif_set_gro_ipv4_max_size(dev, gro_max_size);
+ status |= DO_SETLINK_MODIFIED;
+ }
+ }
+
if (tb[IFLA_OPERSTATE])
set_operstate(dev, nla_get_u8(tb[IFLA_OPERSTATE]));
@@ -3325,6 +3354,10 @@ struct net_device *rtnl_create_link(struct net *net, const char *ifname,
netif_set_gso_max_segs(dev, nla_get_u32(tb[IFLA_GSO_MAX_SEGS]));
if (tb[IFLA_GRO_MAX_SIZE])
netif_set_gro_max_size(dev, nla_get_u32(tb[IFLA_GRO_MAX_SIZE]));
+ if (tb[IFLA_GSO_IPV4_MAX_SIZE])
+ netif_set_gso_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GSO_IPV4_MAX_SIZE]));
+ if (tb[IFLA_GRO_IPV4_MAX_SIZE])
+ netif_set_gro_ipv4_max_size(dev, nla_get_u32(tb[IFLA_GRO_IPV4_MAX_SIZE]));
return dev;
}
diff --git a/net/core/scm.c b/net/core/scm.c
index 5c356f0dee30..acb7d776fa6e 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -229,6 +229,8 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
if (msg->msg_control_is_user) {
struct cmsghdr __user *cm = msg->msg_control_user;
+ check_object_size(data, cmlen - sizeof(*cm), true);
+
if (!user_write_access_begin(cm, cmlen))
goto efault;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4a0eb5593275..eb7d33b41e71 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -79,15 +79,44 @@
#include <linux/capability.h>
#include <linux/user_namespace.h>
#include <linux/indirect_call_wrapper.h>
+#include <linux/textsearch.h>
#include "dev.h"
#include "sock_destructor.h"
-struct kmem_cache *skbuff_head_cache __ro_after_init;
+struct kmem_cache *skbuff_cache __ro_after_init;
static struct kmem_cache *skbuff_fclone_cache __ro_after_init;
#ifdef CONFIG_SKB_EXTENSIONS
static struct kmem_cache *skbuff_ext_cache __ro_after_init;
#endif
+
+/* skb_small_head_cache and related code is only supported
+ * for CONFIG_SLAB and CONFIG_SLUB.
+ * As soon as SLOB is removed from the kernel, we can clean up this.
+ */
+#if !defined(CONFIG_SLOB)
+# define HAVE_SKB_SMALL_HEAD_CACHE 1
+#endif
+
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+static struct kmem_cache *skb_small_head_cache __ro_after_init;
+
+#define SKB_SMALL_HEAD_SIZE SKB_HEAD_ALIGN(MAX_TCP_HEADER)
+
+/* We want SKB_SMALL_HEAD_CACHE_SIZE to not be a power of two.
+ * This should ensure that SKB_SMALL_HEAD_HEADROOM is a unique
+ * size, and we can differentiate heads from skb_small_head_cache
+ * vs system slabs by looking at their size (skb_end_offset()).
+ */
+#define SKB_SMALL_HEAD_CACHE_SIZE \
+ (is_power_of_2(SKB_SMALL_HEAD_SIZE) ? \
+ (SKB_SMALL_HEAD_SIZE + L1_CACHE_BYTES) : \
+ SKB_SMALL_HEAD_SIZE)
+
+#define SKB_SMALL_HEAD_HEADROOM \
+ SKB_WITH_OVERHEAD(SKB_SMALL_HEAD_CACHE_SIZE)
+#endif /* HAVE_SKB_SMALL_HEAD_CACHE */
+
int sysctl_max_skb_frags __read_mostly = MAX_SKB_FRAGS;
EXPORT_SYMBOL(sysctl_max_skb_frags);
@@ -256,7 +285,7 @@ static struct sk_buff *napi_skb_cache_get(void)
struct sk_buff *skb;
if (unlikely(!nc->skb_count)) {
- nc->skb_count = kmem_cache_alloc_bulk(skbuff_head_cache,
+ nc->skb_count = kmem_cache_alloc_bulk(skbuff_cache,
GFP_ATOMIC,
NAPI_SKB_CACHE_BULK,
nc->skb_cache);
@@ -265,7 +294,7 @@ static struct sk_buff *napi_skb_cache_get(void)
}
skb = nc->skb_cache[--nc->skb_count];
- kasan_unpoison_object_data(skbuff_head_cache, skb);
+ kasan_unpoison_object_data(skbuff_cache, skb);
return skb;
}
@@ -323,7 +352,7 @@ struct sk_buff *slab_build_skb(void *data)
struct sk_buff *skb;
unsigned int size;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -374,7 +403,7 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -386,8 +415,6 @@ struct sk_buff *__build_skb(void *data, unsigned int frag_size)
/* build_skb() is wrapper over __build_skb(), that specifically
* takes care of skb->head and skb->pfmemalloc
- * This means that if @frag_size is not zero, then @data must be backed
- * by a page fragment, not kmalloc() or vmalloc()
*/
struct sk_buff *build_skb(void *data, unsigned int frag_size)
{
@@ -406,7 +433,7 @@ EXPORT_SYMBOL(build_skb);
* build_skb_around - build a network buffer around provided skb
* @skb: sk_buff provide by caller, must be memset cleared
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*/
struct sk_buff *build_skb_around(struct sk_buff *skb,
void *data, unsigned int frag_size)
@@ -428,7 +455,7 @@ EXPORT_SYMBOL(build_skb_around);
/**
* __napi_build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*
* Version of __build_skb() that uses NAPI percpu caches to obtain
* skbuff_head instead of inplace allocation.
@@ -452,7 +479,7 @@ static struct sk_buff *__napi_build_skb(void *data, unsigned int frag_size)
/**
* napi_build_skb - build a network buffer
* @data: data buffer provided by caller
- * @frag_size: size of data, or 0 if head was kmalloced
+ * @frag_size: size of data
*
* Version of __napi_build_skb() that takes care of skb->head_frag
* and skb->pfmemalloc when the data is a page or page fragment.
@@ -479,17 +506,37 @@ EXPORT_SYMBOL(napi_build_skb);
* may be used. Otherwise, the packet data may be discarded until enough
* memory is free
*/
-static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
+static void *kmalloc_reserve(unsigned int *size, gfp_t flags, int node,
bool *pfmemalloc)
{
- void *obj;
bool ret_pfmemalloc = false;
+ unsigned int obj_size;
+ void *obj;
+
+ obj_size = SKB_HEAD_ALIGN(*size);
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ if (obj_size <= SKB_SMALL_HEAD_CACHE_SIZE &&
+ !(flags & KMALLOC_NOT_NORMAL_BITS)) {
+ /* skb_small_head_cache has non power of two size,
+ * likely forcing SLUB to use order-3 pages.
+ * We deliberately attempt a NOMEMALLOC allocation only.
+ */
+ obj = kmem_cache_alloc_node(skb_small_head_cache,
+ flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
+ node);
+ if (obj) {
+ *size = SKB_SMALL_HEAD_CACHE_SIZE;
+ goto out;
+ }
+ }
+#endif
+ *size = obj_size = kmalloc_size_roundup(obj_size);
/*
* Try a regular allocation, when that fails and we're not entitled
* to the reserves, fail.
*/
- obj = kmalloc_node_track_caller(size,
+ obj = kmalloc_node_track_caller(obj_size,
flags | __GFP_NOMEMALLOC | __GFP_NOWARN,
node);
if (obj || !(gfp_pfmemalloc_allowed(flags)))
@@ -497,7 +544,7 @@ static void *kmalloc_reserve(size_t size, gfp_t flags, int node,
/* Try again but now we are using pfmemalloc reserves */
ret_pfmemalloc = true;
- obj = kmalloc_node_track_caller(size, flags, node);
+ obj = kmalloc_node_track_caller(obj_size, flags, node);
out:
if (pfmemalloc)
@@ -534,12 +581,11 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
{
struct kmem_cache *cache;
struct sk_buff *skb;
- unsigned int osize;
bool pfmemalloc;
u8 *data;
cache = (flags & SKB_ALLOC_FCLONE)
- ? skbuff_fclone_cache : skbuff_head_cache;
+ ? skbuff_fclone_cache : skbuff_cache;
if (sk_memalloc_socks() && (flags & SKB_ALLOC_RX))
gfp_mask |= __GFP_MEMALLOC;
@@ -559,18 +605,14 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* aligned memory blocks, unless SLUB/SLAB debug is enabled.
* Both skb->head and skb_shared_info are cache line aligned.
*/
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- osize = kmalloc_size_roundup(size);
- data = kmalloc_reserve(osize, gfp_mask, node, &pfmemalloc);
+ data = kmalloc_reserve(&size, gfp_mask, node, &pfmemalloc);
if (unlikely(!data))
goto nodata;
/* kmalloc_size_roundup() might give us more room than requested.
* Put skb_shared_info exactly at the end of allocated zone,
* to allow max possible filling before reallocation.
*/
- size = SKB_WITH_OVERHEAD(osize);
- prefetchw(data + size);
+ prefetchw(data + SKB_WITH_OVERHEAD(size));
/*
* Only clear those fields we need to clear, not those that we will
@@ -578,7 +620,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
* the tail pointer in struct sk_buff!
*/
memset(skb, 0, offsetof(struct sk_buff, tail));
- __build_skb_around(skb, data, osize);
+ __build_skb_around(skb, data, size);
skb->pfmemalloc = pfmemalloc;
if (flags & SKB_ALLOC_FCLONE) {
@@ -633,8 +675,7 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev, unsigned int len,
goto skb_success;
}
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
+ len = SKB_HEAD_ALIGN(len);
if (sk_memalloc_socks())
gfp_mask |= __GFP_MEMALLOC;
@@ -733,8 +774,7 @@ struct sk_buff *__napi_alloc_skb(struct napi_struct *napi, unsigned int len,
data = page_frag_alloc_1k(&nc->page_small, gfp_mask);
pfmemalloc = NAPI_SMALL_PAGE_PFMEMALLOC(nc->page_small);
} else {
- len += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- len = SKB_DATA_ALIGN(len);
+ len = SKB_HEAD_ALIGN(len);
data = page_frag_alloc(&nc->page, len, gfp_mask);
pfmemalloc = nc->page.pfmemalloc;
@@ -810,6 +850,16 @@ static bool skb_pp_recycle(struct sk_buff *skb, void *data)
return page_pool_return_skb_page(virt_to_page(data));
}
+static void skb_kfree_head(void *head, unsigned int end_offset)
+{
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ if (end_offset == SKB_SMALL_HEAD_HEADROOM)
+ kmem_cache_free(skb_small_head_cache, head);
+ else
+#endif
+ kfree(head);
+}
+
static void skb_free_head(struct sk_buff *skb)
{
unsigned char *head = skb->head;
@@ -819,7 +869,7 @@ static void skb_free_head(struct sk_buff *skb)
return;
skb_free_frag(head);
} else {
- kfree(head);
+ skb_kfree_head(head, skb_end_offset(skb));
}
}
@@ -871,7 +921,7 @@ static void kfree_skbmem(struct sk_buff *skb)
switch (skb->fclone) {
case SKB_FCLONE_UNAVAILABLE:
- kmem_cache_free(skbuff_head_cache, skb);
+ kmem_cache_free(skbuff_cache, skb);
return;
case SKB_FCLONE_ORIG:
@@ -932,6 +982,21 @@ void __kfree_skb(struct sk_buff *skb)
}
EXPORT_SYMBOL(__kfree_skb);
+static __always_inline
+bool __kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
+{
+ if (unlikely(!skb_unref(skb)))
+ return false;
+
+ DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+
+ if (reason == SKB_CONSUMED)
+ trace_consume_skb(skb, __builtin_return_address(0));
+ else
+ trace_kfree_skb(skb, __builtin_return_address(0), reason);
+ return true;
+}
+
/**
* kfree_skb_reason - free an sk_buff with special reason
* @skb: buffer to free
@@ -944,28 +1009,58 @@ EXPORT_SYMBOL(__kfree_skb);
void __fix_address
kfree_skb_reason(struct sk_buff *skb, enum skb_drop_reason reason)
{
- if (unlikely(!skb_unref(skb)))
+ if (__kfree_skb_reason(skb, reason))
+ __kfree_skb(skb);
+}
+EXPORT_SYMBOL(kfree_skb_reason);
+
+#define KFREE_SKB_BULK_SIZE 16
+
+struct skb_free_array {
+ unsigned int skb_count;
+ void *skb_array[KFREE_SKB_BULK_SIZE];
+};
+
+static void kfree_skb_add_bulk(struct sk_buff *skb,
+ struct skb_free_array *sa,
+ enum skb_drop_reason reason)
+{
+ /* if SKB is a clone, don't handle this case */
+ if (unlikely(skb->fclone != SKB_FCLONE_UNAVAILABLE)) {
+ __kfree_skb(skb);
return;
+ }
- DEBUG_NET_WARN_ON_ONCE(reason <= 0 || reason >= SKB_DROP_REASON_MAX);
+ skb_release_all(skb, reason);
+ sa->skb_array[sa->skb_count++] = skb;
- if (reason == SKB_CONSUMED)
- trace_consume_skb(skb);
- else
- trace_kfree_skb(skb, __builtin_return_address(0), reason);
- __kfree_skb(skb);
+ if (unlikely(sa->skb_count == KFREE_SKB_BULK_SIZE)) {
+ kmem_cache_free_bulk(skbuff_cache, KFREE_SKB_BULK_SIZE,
+ sa->skb_array);
+ sa->skb_count = 0;
+ }
}
-EXPORT_SYMBOL(kfree_skb_reason);
-void kfree_skb_list_reason(struct sk_buff *segs,
- enum skb_drop_reason reason)
+void __fix_address
+kfree_skb_list_reason(struct sk_buff *segs, enum skb_drop_reason reason)
{
+ struct skb_free_array sa;
+
+ sa.skb_count = 0;
+
while (segs) {
struct sk_buff *next = segs->next;
- kfree_skb_reason(segs, reason);
+ if (__kfree_skb_reason(segs, reason)) {
+ skb_poison_list(segs);
+ kfree_skb_add_bulk(segs, &sa, reason);
+ }
+
segs = next;
}
+
+ if (sa.skb_count)
+ kmem_cache_free_bulk(skbuff_cache, sa.skb_count, sa.skb_array);
}
EXPORT_SYMBOL(kfree_skb_list_reason);
@@ -1094,7 +1189,7 @@ void consume_skb(struct sk_buff *skb)
if (!skb_unref(skb))
return;
- trace_consume_skb(skb);
+ trace_consume_skb(skb, __builtin_return_address(0));
__kfree_skb(skb);
}
EXPORT_SYMBOL(consume_skb);
@@ -1109,7 +1204,7 @@ EXPORT_SYMBOL(consume_skb);
*/
void __consume_stateless_skb(struct sk_buff *skb)
{
- trace_consume_skb(skb);
+ trace_consume_skb(skb, __builtin_return_address(0));
skb_release_data(skb, SKB_CONSUMED);
kfree_skbmem(skb);
}
@@ -1119,15 +1214,15 @@ static void napi_skb_cache_put(struct sk_buff *skb)
struct napi_alloc_cache *nc = this_cpu_ptr(&napi_alloc_cache);
u32 i;
- kasan_poison_object_data(skbuff_head_cache, skb);
+ kasan_poison_object_data(skbuff_cache, skb);
nc->skb_cache[nc->skb_count++] = skb;
if (unlikely(nc->skb_count == NAPI_SKB_CACHE_SIZE)) {
for (i = NAPI_SKB_CACHE_HALF; i < NAPI_SKB_CACHE_SIZE; i++)
- kasan_unpoison_object_data(skbuff_head_cache,
+ kasan_unpoison_object_data(skbuff_cache,
nc->skb_cache[i]);
- kmem_cache_free_bulk(skbuff_head_cache, NAPI_SKB_CACHE_HALF,
+ kmem_cache_free_bulk(skbuff_cache, NAPI_SKB_CACHE_HALF,
nc->skb_cache + NAPI_SKB_CACHE_HALF);
nc->skb_count = NAPI_SKB_CACHE_HALF;
}
@@ -1165,7 +1260,7 @@ void napi_consume_skb(struct sk_buff *skb, int budget)
return;
/* if reaching here SKB is ready to free */
- trace_consume_skb(skb);
+ trace_consume_skb(skb, __builtin_return_address(0));
/* if SKB is a clone, don't handle this case */
if (skb->fclone != SKB_FCLONE_UNAVAILABLE) {
@@ -1311,14 +1406,18 @@ EXPORT_SYMBOL_GPL(skb_morph);
int mm_account_pinned_pages(struct mmpin *mmp, size_t size)
{
- unsigned long max_pg, num_pg, new_pg, old_pg;
+ unsigned long max_pg, num_pg, new_pg, old_pg, rlim;
struct user_struct *user;
if (capable(CAP_IPC_LOCK) || !size)
return 0;
+ rlim = rlimit(RLIMIT_MEMLOCK);
+ if (rlim == RLIM_INFINITY)
+ return 0;
+
num_pg = (size >> PAGE_SHIFT) + 2; /* worst case */
- max_pg = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
+ max_pg = rlim >> PAGE_SHIFT;
user = mmp->user ? : current_user();
old_pg = atomic_long_read(&user->locked_vm);
@@ -1711,7 +1810,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- n = kmem_cache_alloc(skbuff_head_cache, gfp_mask);
+ n = kmem_cache_alloc(skbuff_cache, gfp_mask);
if (!n)
return NULL;
@@ -1893,10 +1992,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
goto nodata;
size = SKB_WITH_OVERHEAD(size);
@@ -1959,7 +2055,7 @@ int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail,
return 0;
nofrags:
- kfree(data);
+ skb_kfree_head(data, size);
nodata:
return -ENOMEM;
}
@@ -4100,7 +4196,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
skb_shinfo(skb)->frag_list = NULL;
- do {
+ while (list_skb) {
nskb = list_skb;
list_skb = list_skb->next;
@@ -4146,8 +4242,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
if (skb_needs_linearize(nskb, features) &&
__skb_linearize(nskb))
goto err_linearize;
-
- } while (list_skb);
+ }
skb->truesize = skb->truesize - delta_truesize;
skb->data_len = skb->data_len - delta_len;
@@ -4585,7 +4680,7 @@ static void skb_extensions_init(void) {}
void __init skb_init(void)
{
- skbuff_head_cache = kmem_cache_create_usercopy("skbuff_head_cache",
+ skbuff_cache = kmem_cache_create_usercopy("skbuff_head_cache",
sizeof(struct sk_buff),
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
@@ -4597,6 +4692,19 @@ void __init skb_init(void)
0,
SLAB_HWCACHE_ALIGN|SLAB_PANIC,
NULL);
+#ifdef HAVE_SKB_SMALL_HEAD_CACHE
+ /* usercopy should only access first SKB_SMALL_HEAD_HEADROOM bytes.
+ * struct skb_shared_info is located at the end of skb->head,
+ * and should not be copied to/from user.
+ */
+ skb_small_head_cache = kmem_cache_create_usercopy("skbuff_small_head",
+ SKB_SMALL_HEAD_CACHE_SIZE,
+ 0,
+ SLAB_HWCACHE_ALIGN | SLAB_PANIC,
+ 0,
+ SKB_SMALL_HEAD_HEADROOM,
+ NULL);
+#endif
skb_extensions_init();
}
@@ -5451,7 +5559,7 @@ void kfree_skb_partial(struct sk_buff *skb, bool head_stolen)
{
if (head_stolen) {
skb_release_head_state(skb);
- kmem_cache_free(skbuff_head_cache, skb);
+ kmem_cache_free(skbuff_cache, skb);
} else {
__kfree_skb(skb);
}
@@ -6245,10 +6353,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
size = SKB_WITH_OVERHEAD(size);
@@ -6264,7 +6369,7 @@ static int pskb_carve_inside_header(struct sk_buff *skb, const u32 off,
if (skb_cloned(skb)) {
/* drop the old head gracefully */
if (skb_orphan_frags(skb, gfp_mask)) {
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
@@ -6364,10 +6469,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
if (skb_pfmemalloc(skb))
gfp_mask |= __GFP_MEMALLOC;
- size = SKB_DATA_ALIGN(size);
- size += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
- size = kmalloc_size_roundup(size);
- data = kmalloc_reserve(size, gfp_mask, NUMA_NO_NODE, NULL);
+ data = kmalloc_reserve(&size, gfp_mask, NUMA_NO_NODE, NULL);
if (!data)
return -ENOMEM;
size = SKB_WITH_OVERHEAD(size);
@@ -6375,7 +6477,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
memcpy((struct skb_shared_info *)(data + size),
skb_shinfo(skb), offsetof(struct skb_shared_info, frags[0]));
if (skb_orphan_frags(skb, gfp_mask)) {
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
shinfo = (struct skb_shared_info *)(data + size);
@@ -6411,7 +6513,7 @@ static int pskb_carve_inside_nonlinear(struct sk_buff *skb, const u32 off,
/* skb_frag_unref() is not needed here as shinfo->nr_frags = 0. */
if (skb_has_frag_list(skb))
kfree_skb_list(skb_shinfo(skb)->frag_list);
- kfree(data);
+ skb_kfree_head(data, size);
return -ENOMEM;
}
skb_release_data(skb, SKB_CONSUMED);
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 53d0251788aa..f81883759d38 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -8,6 +8,7 @@
#include <net/sock.h>
#include <net/tcp.h>
#include <net/tls.h>
+#include <trace/events/sock.h>
static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
{
@@ -1114,6 +1115,8 @@ static void sk_psock_strp_data_ready(struct sock *sk)
{
struct sk_psock *psock;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
psock = sk_psock(sk);
if (likely(psock)) {
@@ -1210,6 +1213,8 @@ static void sk_psock_verdict_data_ready(struct sock *sk)
{
struct socket *sock = sk->sk_socket;
+ trace_sk_data_ready(sk);
+
if (unlikely(!sock || !sock->ops || !sock->ops->read_skb))
return;
sock->ops->read_skb(sk, sk_psock_verdict_recv);
diff --git a/net/core/sock.c b/net/core/sock.c
index f954d5893e79..341c565dbc26 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1531,6 +1531,8 @@ set_sndbuf:
ret = -EINVAL;
break;
}
+ if ((u8)val == SOCK_TXREHASH_DEFAULT)
+ val = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
/* Paired with READ_ONCE() in tcp_rtx_synack() */
WRITE_ONCE(sk->sk_txrehash, (u8)val);
break;
@@ -2338,17 +2340,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
smp_wmb();
refcount_set(&newsk->sk_refcnt, 2);
- /* Increment the counter in the same struct proto as the master
- * sock (sk_refcnt_debug_inc uses newsk->sk_prot->socks, that
- * is the same as sk->sk_prot->socks, as this field was copied
- * with memcpy).
- *
- * This _changes_ the previous behaviour, where
- * tcp_create_openreq_child always was incrementing the
- * equivalent to tcp_prot->socks (inet_sock_nr), so this have
- * to be taken into account in all callers. -acme
- */
- sk_refcnt_debug_inc(newsk);
sk_set_socket(newsk, NULL);
sk_tx_queue_clear(newsk);
RCU_INIT_POINTER(newsk->sk_wq, NULL);
@@ -2373,17 +2364,22 @@ void sk_free_unlock_clone(struct sock *sk)
}
EXPORT_SYMBOL_GPL(sk_free_unlock_clone);
-static void sk_trim_gso_size(struct sock *sk)
+static u32 sk_dst_gso_max_size(struct sock *sk, struct dst_entry *dst)
{
- if (sk->sk_gso_max_size <= GSO_LEGACY_MAX_SIZE)
- return;
+ bool is_ipv6 = false;
+ u32 max_size;
+
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6 &&
- sk_is_tcp(sk) &&
- !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
- return;
+ is_ipv6 = (sk->sk_family == AF_INET6 &&
+ !ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr));
#endif
- sk->sk_gso_max_size = GSO_LEGACY_MAX_SIZE;
+ /* pairs with the WRITE_ONCE() in netif_set_gso(_ipv4)_max_size() */
+ max_size = is_ipv6 ? READ_ONCE(dst->dev->gso_max_size) :
+ READ_ONCE(dst->dev->gso_ipv4_max_size);
+ if (max_size > GSO_LEGACY_MAX_SIZE && !sk_is_tcp(sk))
+ max_size = GSO_LEGACY_MAX_SIZE;
+
+ return max_size - (MAX_TCP_HEADER + 1);
}
void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
@@ -2403,10 +2399,7 @@ void sk_setup_caps(struct sock *sk, struct dst_entry *dst)
sk->sk_route_caps &= ~NETIF_F_GSO_MASK;
} else {
sk->sk_route_caps |= NETIF_F_SG | NETIF_F_HW_CSUM;
- /* pairs with the WRITE_ONCE() in netif_set_gso_max_size() */
- sk->sk_gso_max_size = READ_ONCE(dst->dev->gso_max_size);
- sk_trim_gso_size(sk);
- sk->sk_gso_max_size -= (MAX_TCP_HEADER + 1);
+ sk->sk_gso_max_size = sk_dst_gso_max_size(sk, dst);
/* pairs with the WRITE_ONCE() in netif_set_gso_max_segs() */
max_segs = max_t(u32, READ_ONCE(dst->dev->gso_max_segs), 1);
}
@@ -3291,6 +3284,8 @@ void sock_def_readable(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
@@ -3379,7 +3374,7 @@ void sk_stop_timer_sync(struct sock *sk, struct timer_list *timer)
}
EXPORT_SYMBOL(sk_stop_timer_sync);
-void sock_init_data(struct socket *sock, struct sock *sk)
+void sock_init_data_uid(struct socket *sock, struct sock *sk, kuid_t uid)
{
sk_init_common(sk);
sk->sk_send_head = NULL;
@@ -3399,11 +3394,10 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_type = sock->type;
RCU_INIT_POINTER(sk->sk_wq, &sock->wq);
sock->sk = sk;
- sk->sk_uid = SOCK_INODE(sock)->i_uid;
} else {
RCU_INIT_POINTER(sk->sk_wq, NULL);
- sk->sk_uid = make_kuid(sock_net(sk)->user_ns, 0);
}
+ sk->sk_uid = uid;
rwlock_init(&sk->sk_callback_lock);
if (sk->sk_kern_sock)
@@ -3451,7 +3445,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_pacing_rate = ~0UL;
WRITE_ONCE(sk->sk_pacing_shift, 10);
sk->sk_incoming_cpu = -1;
- sk->sk_txrehash = SOCK_TXREHASH_DEFAULT;
sk_rx_queue_clear(sk);
/*
@@ -3462,6 +3455,16 @@ void sock_init_data(struct socket *sock, struct sock *sk)
refcount_set(&sk->sk_refcnt, 1);
atomic_set(&sk->sk_drops, 0);
}
+EXPORT_SYMBOL(sock_init_data_uid);
+
+void sock_init_data(struct socket *sock, struct sock *sk)
+{
+ kuid_t uid = sock ?
+ SOCK_INODE(sock)->i_uid :
+ make_kuid(sock_net(sk)->user_ns, 0);
+
+ sock_init_data_uid(sock, sk, uid);
+}
EXPORT_SYMBOL(sock_init_data);
void lock_sock_nested(struct sock *sk, int subclass)
@@ -3696,8 +3699,6 @@ void sk_common_release(struct sock *sk)
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
-
sock_put(sk);
}
EXPORT_SYMBOL(sk_common_release);
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index 22fa2c5bc6ec..a68a7290a3b2 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -1569,15 +1569,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->unhash)
- sk->sk_prot->unhash(sk);
- return;
+ saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
+ } else {
+ saved_unhash = psock->saved_unhash;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
}
-
- saved_unhash = psock->saved_unhash;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- saved_unhash(sk);
+ if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
+ return;
+ if (saved_unhash)
+ saved_unhash(sk);
}
EXPORT_SYMBOL_GPL(sock_map_unhash);
@@ -1590,17 +1591,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
- if (sk->sk_prot->destroy)
- sk->sk_prot->destroy(sk);
- return;
+ saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
+ } else {
+ saved_destroy = psock->saved_destroy;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ sk_psock_put(sk, psock);
}
-
- saved_destroy = psock->saved_destroy;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- sk_psock_put(sk, psock);
- saved_destroy(sk);
+ if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
+ return;
+ if (saved_destroy)
+ saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
@@ -1615,16 +1617,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
- return sk->sk_prot->close(sk, timeout);
+ saved_close = READ_ONCE(sk->sk_prot)->close;
+ } else {
+ saved_close = psock->saved_close;
+ sock_map_remove_links(sk, psock);
+ rcu_read_unlock();
+ sk_psock_stop(psock);
+ release_sock(sk);
+ cancel_work_sync(&psock->work);
+ sk_psock_put(sk, psock);
}
-
- saved_close = psock->saved_close;
- sock_map_remove_links(sk, psock);
- rcu_read_unlock();
- sk_psock_stop(psock);
- release_sock(sk);
- cancel_work_sync(&psock->work);
- sk_psock_put(sk, psock);
+ /* Make sure we do not recurse. This is a bug.
+ * Leak the socket instead of crashing on a stack overflow.
+ */
+ if (WARN_ON_ONCE(saved_close == sock_map_close))
+ return;
saved_close(sk, timeout);
}
EXPORT_SYMBOL_GPL(sock_map_close);
diff --git a/net/core/stream.c b/net/core/stream.c
index cd06750dd329..434446ab14c5 100644
--- a/net/core/stream.c
+++ b/net/core/stream.c
@@ -209,7 +209,6 @@ void sk_stream_kill_queues(struct sock *sk)
sk_mem_reclaim_final(sk);
WARN_ON_ONCE(sk->sk_wmem_queued);
- WARN_ON_ONCE(sk->sk_forward_alloc);
/* It is _impossible_ for the backlog to contain anything
* when we get here. All user references to this socket
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 5b1ce656baa1..74842b453407 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -16,6 +16,7 @@
#include <linux/vmalloc.h>
#include <linux/init.h>
#include <linux/slab.h>
+#include <linux/sched/isolation.h>
#include <net/ip.h>
#include <net/sock.h>
@@ -45,7 +46,82 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
int sysctl_devconf_inherit_init_net __read_mostly;
EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
+#if IS_ENABLED(CONFIG_NET_FLOW_LIMIT) || IS_ENABLED(CONFIG_RPS)
+static void dump_cpumask(void *buffer, size_t *lenp, loff_t *ppos,
+ struct cpumask *mask)
+{
+ char kbuf[128];
+ int len;
+
+ if (*ppos || !*lenp) {
+ *lenp = 0;
+ return;
+ }
+
+ len = min(sizeof(kbuf) - 1, *lenp);
+ len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
+ if (!len) {
+ *lenp = 0;
+ return;
+ }
+
+ if (len < *lenp)
+ kbuf[len++] = '\n';
+ memcpy(buffer, kbuf, len);
+ *lenp = len;
+ *ppos += len;
+}
+#endif
+
#ifdef CONFIG_RPS
+
+static struct cpumask *rps_default_mask_cow_alloc(struct net *net)
+{
+ struct cpumask *rps_default_mask;
+
+ if (net->core.rps_default_mask)
+ return net->core.rps_default_mask;
+
+ rps_default_mask = kzalloc(cpumask_size(), GFP_KERNEL);
+ if (!rps_default_mask)
+ return NULL;
+
+ /* pairs with READ_ONCE in rx_queue_default_mask() */
+ WRITE_ONCE(net->core.rps_default_mask, rps_default_mask);
+ return rps_default_mask;
+}
+
+static int rps_default_mask_sysctl(struct ctl_table *table, int write,
+ void *buffer, size_t *lenp, loff_t *ppos)
+{
+ struct net *net = (struct net *)table->data;
+ int err = 0;
+
+ rtnl_lock();
+ if (write) {
+ struct cpumask *rps_default_mask = rps_default_mask_cow_alloc(net);
+
+ err = -ENOMEM;
+ if (!rps_default_mask)
+ goto done;
+
+ err = cpumask_parse(buffer, rps_default_mask);
+ if (err)
+ goto done;
+
+ err = rps_cpumask_housekeeping(rps_default_mask);
+ if (err)
+ goto done;
+ } else {
+ dump_cpumask(buffer, lenp, ppos,
+ net->core.rps_default_mask ? : cpu_none_mask);
+ }
+
+done:
+ rtnl_unlock();
+ return err;
+}
+
static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
void *buffer, size_t *lenp, loff_t *ppos)
{
@@ -155,13 +231,6 @@ static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
write_unlock:
mutex_unlock(&flow_limit_update_mutex);
} else {
- char kbuf[128];
-
- if (*ppos || !*lenp) {
- *lenp = 0;
- goto done;
- }
-
cpumask_clear(mask);
rcu_read_lock();
for_each_possible_cpu(i) {
@@ -171,17 +240,7 @@ write_unlock:
}
rcu_read_unlock();
- len = min(sizeof(kbuf) - 1, *lenp);
- len = scnprintf(kbuf, len, "%*pb", cpumask_pr_args(mask));
- if (!len) {
- *lenp = 0;
- goto done;
- }
- if (len < *lenp)
- kbuf[len++] = '\n';
- memcpy(buffer, kbuf, len);
- *lenp = len;
- *ppos += len;
+ dump_cpumask(buffer, lenp, ppos, mask);
}
done:
@@ -598,6 +657,14 @@ static struct ctl_table net_core_table[] = {
};
static struct ctl_table netns_core_table[] = {
+#if IS_ENABLED(CONFIG_RPS)
+ {
+ .procname = "rps_default_mask",
+ .data = &init_net,
+ .mode = 0644,
+ .proc_handler = rps_default_mask_sysctl
+ },
+#endif
{
.procname = "somaxconn",
.data = &init_net.core.sysctl_somaxconn,
@@ -643,11 +710,6 @@ static __net_init int sysctl_core_net_init(struct net *net)
for (tmp = tbl; tmp->procname; tmp++)
tmp->data += (char *)net - (char *)&init_net;
-
- /* Don't export any sysctls to unprivileged users */
- if (net->user_ns != &init_user_ns) {
- tbl[0].procname = NULL;
- }
}
net->core.sysctl_hdr = register_net_sysctl(net, "net/core", tbl);
@@ -670,6 +732,9 @@ static __net_exit void sysctl_core_net_exit(struct net *net)
tbl = net->core.sysctl_hdr->ctl_table_arg;
unregister_net_sysctl_table(net->core.sysctl_hdr);
BUG_ON(tbl == netns_core_table);
+#if IS_ENABLED(CONFIG_RPS)
+ kfree(net->core.rps_default_mask);
+#endif
kfree(tbl);
}
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 844c9d99dc0e..8c92fc553317 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -4,6 +4,8 @@
* Copyright (c) 2017 Jesper Dangaard Brouer, Red Hat Inc.
*/
#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
#include <linux/filter.h>
#include <linux/types.h>
#include <linux/mm.h>
@@ -602,8 +604,7 @@ EXPORT_SYMBOL_GPL(xdp_warn);
int xdp_alloc_skb_bulk(void **skbs, int n_skb, gfp_t gfp)
{
- n_skb = kmem_cache_alloc_bulk(skbuff_head_cache, gfp,
- n_skb, skbs);
+ n_skb = kmem_cache_alloc_bulk(skbuff_cache, gfp, n_skb, skbs);
if (unlikely(!n_skb))
return -ENOMEM;
@@ -672,7 +673,7 @@ struct sk_buff *xdp_build_skb_from_frame(struct xdp_frame *xdpf,
{
struct sk_buff *skb;
- skb = kmem_cache_alloc(skbuff_head_cache, GFP_ATOMIC);
+ skb = kmem_cache_alloc(skbuff_cache, GFP_ATOMIC);
if (unlikely(!skb))
return NULL;
@@ -709,3 +710,84 @@ struct xdp_frame *xdpf_clone(struct xdp_frame *xdpf)
return nxdpf;
}
+
+__diag_push();
+__diag_ignore_all("-Wmissing-prototypes",
+ "Global functions as their definitions will be in vmlinux BTF");
+
+/**
+ * bpf_xdp_metadata_rx_timestamp - Read XDP frame RX timestamp.
+ * @ctx: XDP context pointer.
+ * @timestamp: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, u64 *timestamp)
+{
+ return -EOPNOTSUPP;
+}
+
+/**
+ * bpf_xdp_metadata_rx_hash - Read XDP frame RX hash.
+ * @ctx: XDP context pointer.
+ * @hash: Return value pointer.
+ *
+ * Returns 0 on success or ``-errno`` on error.
+ */
+__bpf_kfunc int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, u32 *hash)
+{
+ return -EOPNOTSUPP;
+}
+
+__diag_pop();
+
+BTF_SET8_START(xdp_metadata_kfunc_ids)
+#define XDP_METADATA_KFUNC(_, name) BTF_ID_FLAGS(func, name, 0)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+BTF_SET8_END(xdp_metadata_kfunc_ids)
+
+static const struct btf_kfunc_id_set xdp_metadata_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &xdp_metadata_kfunc_ids,
+};
+
+BTF_ID_LIST(xdp_metadata_kfunc_ids_unsorted)
+#define XDP_METADATA_KFUNC(name, str) BTF_ID(func, str)
+XDP_METADATA_KFUNC_xxx
+#undef XDP_METADATA_KFUNC
+
+u32 bpf_xdp_metadata_kfunc_id(int id)
+{
+ /* xdp_metadata_kfunc_ids is sorted and can't be used */
+ return xdp_metadata_kfunc_ids_unsorted[id];
+}
+
+bool bpf_dev_bound_kfunc_id(u32 btf_id)
+{
+ return btf_id_set8_contains(&xdp_metadata_kfunc_ids, btf_id);
+}
+
+static int __init xdp_metadata_init(void)
+{
+ return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, &xdp_metadata_kfunc_set);
+}
+late_initcall(xdp_metadata_init);
+
+void xdp_features_set_redirect_target(struct net_device *dev, bool support_sg)
+{
+ dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT;
+ if (support_sg)
+ dev->xdp_features |= NETDEV_XDP_ACT_NDO_XMIT_SG;
+
+ call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_features_set_redirect_target);
+
+void xdp_features_clear_redirect_target(struct net_device *dev)
+{
+ dev->xdp_features &= ~(NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG);
+ call_netdevice_notifiers(NETDEV_XDP_FEAT_CHANGE, dev);
+}
+EXPORT_SYMBOL_GPL(xdp_features_clear_redirect_target);
diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c
index f9949e051f49..c0c438128575 100644
--- a/net/dcb/dcbnl.c
+++ b/net/dcb/dcbnl.c
@@ -178,6 +178,7 @@ static const struct nla_policy dcbnl_featcfg_nest[DCB_FEATCFG_ATTR_MAX + 1] = {
};
static LIST_HEAD(dcb_app_list);
+static LIST_HEAD(dcb_rewr_list);
static DEFINE_SPINLOCK(dcb_lock);
static enum ieee_attrs_app dcbnl_app_attr_type_get(u8 selector)
@@ -1099,11 +1100,46 @@ out:
return err;
}
+/* Set or delete APP table or rewrite table entries. The APP struct is validated
+ * and the appropriate callback function is called.
+ */
+static int dcbnl_app_table_setdel(struct nlattr *attr,
+ struct net_device *netdev,
+ int (*setdel)(struct net_device *dev,
+ struct dcb_app *app))
+{
+ struct dcb_app *app_data;
+ enum ieee_attrs_app type;
+ struct nlattr *attr_itr;
+ int rem, err;
+
+ nla_for_each_nested(attr_itr, attr, rem) {
+ type = nla_type(attr_itr);
+
+ if (!dcbnl_app_attr_type_validate(type))
+ continue;
+
+ if (nla_len(attr_itr) < sizeof(struct dcb_app))
+ return -ERANGE;
+
+ app_data = nla_data(attr_itr);
+
+ if (!dcbnl_app_selector_validate(type, app_data->selector))
+ return -EINVAL;
+
+ err = setdel(netdev, app_data);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
/* Handle IEEE 802.1Qaz/802.1Qau/802.1Qbb GET commands. */
static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
{
const struct dcbnl_rtnl_ops *ops = netdev->dcbnl_ops;
- struct nlattr *ieee, *app;
+ struct nlattr *ieee, *app, *rewr;
struct dcb_app_type *itr;
int dcbx;
int err;
@@ -1206,6 +1242,27 @@ static int dcbnl_ieee_fill(struct sk_buff *skb, struct net_device *netdev)
spin_unlock_bh(&dcb_lock);
nla_nest_end(skb, app);
+ rewr = nla_nest_start(skb, DCB_ATTR_DCB_REWR_TABLE);
+ if (!rewr)
+ return -EMSGSIZE;
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == netdev->ifindex) {
+ enum ieee_attrs_app type =
+ dcbnl_app_attr_type_get(itr->app.selector);
+ err = nla_put(skb, type, sizeof(itr->app), &itr->app);
+ if (err) {
+ spin_unlock_bh(&dcb_lock);
+ nla_nest_cancel(skb, rewr);
+ return -EMSGSIZE;
+ }
+ }
+ }
+
+ spin_unlock_bh(&dcb_lock);
+ nla_nest_end(skb, rewr);
+
if (ops->dcbnl_getapptrust) {
err = dcbnl_getapptrust(netdev, skb);
if (err)
@@ -1567,37 +1624,20 @@ static int dcbnl_ieee_set(struct net_device *netdev, struct nlmsghdr *nlh,
goto err;
}
- if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
- enum ieee_attrs_app type = nla_type(attr);
- struct dcb_app *app_data;
-
- if (!dcbnl_app_attr_type_validate(type))
- continue;
-
- if (nla_len(attr) < sizeof(struct dcb_app)) {
- err = -ERANGE;
- goto err;
- }
-
- app_data = nla_data(attr);
-
- if (!dcbnl_app_selector_validate(type,
- app_data->selector)) {
- err = -EINVAL;
- goto err;
- }
+ if (ieee[DCB_ATTR_DCB_REWR_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_DCB_REWR_TABLE],
+ netdev,
+ ops->dcbnl_setrewr ?: dcb_setrewr);
+ if (err)
+ goto err;
+ }
- if (ops->ieee_setapp)
- err = ops->ieee_setapp(netdev, app_data);
- else
- err = dcb_ieee_setapp(netdev, app_data);
- if (err)
- goto err;
- }
+ if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_IEEE_APP_TABLE],
+ netdev, ops->ieee_setapp ?:
+ dcb_ieee_setapp);
+ if (err)
+ goto err;
}
if (ieee[DCB_ATTR_DCB_APP_TRUST_TABLE]) {
@@ -1684,31 +1724,19 @@ static int dcbnl_ieee_del(struct net_device *netdev, struct nlmsghdr *nlh,
return err;
if (ieee[DCB_ATTR_IEEE_APP_TABLE]) {
- struct nlattr *attr;
- int rem;
-
- nla_for_each_nested(attr, ieee[DCB_ATTR_IEEE_APP_TABLE], rem) {
- enum ieee_attrs_app type = nla_type(attr);
- struct dcb_app *app_data;
-
- if (!dcbnl_app_attr_type_validate(type))
- continue;
-
- app_data = nla_data(attr);
-
- if (!dcbnl_app_selector_validate(type,
- app_data->selector)) {
- err = -EINVAL;
- goto err;
- }
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_IEEE_APP_TABLE],
+ netdev, ops->ieee_delapp ?:
+ dcb_ieee_delapp);
+ if (err)
+ goto err;
+ }
- if (ops->ieee_delapp)
- err = ops->ieee_delapp(netdev, app_data);
- else
- err = dcb_ieee_delapp(netdev, app_data);
- if (err)
- goto err;
- }
+ if (ieee[DCB_ATTR_DCB_REWR_TABLE]) {
+ err = dcbnl_app_table_setdel(ieee[DCB_ATTR_DCB_REWR_TABLE],
+ netdev,
+ ops->dcbnl_delrewr ?: dcb_delrewr);
+ if (err)
+ goto err;
}
err:
@@ -1939,6 +1967,22 @@ out:
return ret;
}
+static struct dcb_app_type *dcb_rewr_lookup(const struct dcb_app *app,
+ int ifindex, int proto)
+{
+ struct dcb_app_type *itr;
+
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->app.selector == app->selector &&
+ itr->app.priority == app->priority &&
+ itr->ifindex == ifindex &&
+ ((proto == -1) || itr->app.protocol == proto))
+ return itr;
+ }
+
+ return NULL;
+}
+
static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
int ifindex, int prio)
{
@@ -1955,7 +1999,8 @@ static struct dcb_app_type *dcb_app_lookup(const struct dcb_app *app,
return NULL;
}
-static int dcb_app_add(const struct dcb_app *app, int ifindex)
+static int dcb_app_add(struct list_head *list, const struct dcb_app *app,
+ int ifindex)
{
struct dcb_app_type *entry;
@@ -1965,7 +2010,7 @@ static int dcb_app_add(const struct dcb_app *app, int ifindex)
memcpy(&entry->app, app, sizeof(*app));
entry->ifindex = ifindex;
- list_add(&entry->list, &dcb_app_list);
+ list_add(&entry->list, list);
return 0;
}
@@ -2028,7 +2073,7 @@ int dcb_setapp(struct net_device *dev, struct dcb_app *new)
}
/* App type does not exist add new application type */
if (new->priority)
- err = dcb_app_add(new, dev->ifindex);
+ err = dcb_app_add(&dcb_app_list, new, dev->ifindex);
out:
spin_unlock_bh(&dcb_lock);
if (!err)
@@ -2061,6 +2106,63 @@ u8 dcb_ieee_getapp_mask(struct net_device *dev, struct dcb_app *app)
}
EXPORT_SYMBOL(dcb_ieee_getapp_mask);
+/* Get protocol value from rewrite entry. */
+u16 dcb_getrewr(struct net_device *dev, struct dcb_app *app)
+{
+ struct dcb_app_type *itr;
+ u16 proto = 0;
+
+ spin_lock_bh(&dcb_lock);
+ itr = dcb_rewr_lookup(app, dev->ifindex, -1);
+ if (itr)
+ proto = itr->app.protocol;
+ spin_unlock_bh(&dcb_lock);
+
+ return proto;
+}
+EXPORT_SYMBOL(dcb_getrewr);
+
+ /* Add rewrite entry to the rewrite list. */
+int dcb_setrewr(struct net_device *dev, struct dcb_app *new)
+{
+ int err;
+
+ spin_lock_bh(&dcb_lock);
+ /* Search for existing match and abort if found. */
+ if (dcb_rewr_lookup(new, dev->ifindex, new->protocol)) {
+ err = -EEXIST;
+ goto out;
+ }
+
+ err = dcb_app_add(&dcb_rewr_list, new, dev->ifindex);
+out:
+ spin_unlock_bh(&dcb_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(dcb_setrewr);
+
+/* Delete rewrite entry from the rewrite list. */
+int dcb_delrewr(struct net_device *dev, struct dcb_app *del)
+{
+ struct dcb_app_type *itr;
+ int err = -ENOENT;
+
+ spin_lock_bh(&dcb_lock);
+ /* Search for existing match and remove it. */
+ itr = dcb_rewr_lookup(del, dev->ifindex, del->protocol);
+ if (itr) {
+ list_del(&itr->list);
+ kfree(itr);
+ err = 0;
+ }
+
+ spin_unlock_bh(&dcb_lock);
+
+ return err;
+}
+EXPORT_SYMBOL(dcb_delrewr);
+
/**
* dcb_ieee_setapp - add IEEE dcb application data to app list
* @dev: network interface
@@ -2088,7 +2190,7 @@ int dcb_ieee_setapp(struct net_device *dev, struct dcb_app *new)
goto out;
}
- err = dcb_app_add(new, dev->ifindex);
+ err = dcb_app_add(&dcb_app_list, new, dev->ifindex);
out:
spin_unlock_bh(&dcb_lock);
if (!err)
@@ -2130,6 +2232,58 @@ int dcb_ieee_delapp(struct net_device *dev, struct dcb_app *del)
}
EXPORT_SYMBOL(dcb_ieee_delapp);
+/* dcb_getrewr_prio_pcp_mask_map - For a given device, find mapping from
+ * priorities to the PCP and DEI values assigned to that priority.
+ */
+void dcb_getrewr_prio_pcp_mask_map(const struct net_device *dev,
+ struct dcb_rewr_prio_pcp_map *p_map)
+{
+ int ifindex = dev->ifindex;
+ struct dcb_app_type *itr;
+ u8 prio;
+
+ memset(p_map->map, 0, sizeof(p_map->map));
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == ifindex &&
+ itr->app.selector == DCB_APP_SEL_PCP &&
+ itr->app.protocol < 16 &&
+ itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+ prio = itr->app.priority;
+ p_map->map[prio] |= 1 << itr->app.protocol;
+ }
+ }
+ spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_getrewr_prio_pcp_mask_map);
+
+/* dcb_getrewr_prio_dscp_mask_map - For a given device, find mapping from
+ * priorities to the DSCP values assigned to that priority.
+ */
+void dcb_getrewr_prio_dscp_mask_map(const struct net_device *dev,
+ struct dcb_ieee_app_prio_map *p_map)
+{
+ int ifindex = dev->ifindex;
+ struct dcb_app_type *itr;
+ u8 prio;
+
+ memset(p_map->map, 0, sizeof(p_map->map));
+
+ spin_lock_bh(&dcb_lock);
+ list_for_each_entry(itr, &dcb_rewr_list, list) {
+ if (itr->ifindex == ifindex &&
+ itr->app.selector == IEEE_8021QAZ_APP_SEL_DSCP &&
+ itr->app.protocol < 64 &&
+ itr->app.priority < IEEE_8021QAZ_MAX_TCS) {
+ prio = itr->app.priority;
+ p_map->map[prio] |= 1ULL << itr->app.protocol;
+ }
+ }
+ spin_unlock_bh(&dcb_lock);
+}
+EXPORT_SYMBOL(dcb_getrewr_prio_dscp_mask_map);
+
/*
* dcb_ieee_getapp_prio_dscp_mask_map - For a given device, find mapping from
* priorities to the DSCP values assigned to that priority. Initialize p_map
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 4260fe466993..b9d7c3dd1cb3 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -551,11 +551,9 @@ static struct sock *dccp_v6_request_recv_sock(const struct sock *sk,
*own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash), NULL);
/* Clone pktoptions received with SYN, if we own the req */
if (*own_req && ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts, GFP_ATOMIC);
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions)
- skb_set_owner_r(newnp->pktoptions, newsk);
}
return newsk;
@@ -615,7 +613,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, GFP_ATOMIC);
+ opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
if (dccp_rcv_established(sk, skb, dccp_hdr(skb), skb->len))
@@ -679,7 +677,6 @@ ipv6_pktoptions:
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb,
&DCCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
memmove(IP6CB(opt_skb),
&DCCP_SKB_CB(opt_skb)->header.h6,
sizeof(struct inet6_skb_parm));
diff --git a/net/devlink/Makefile b/net/devlink/Makefile
new file mode 100644
index 000000000000..ef91a76646a3
--- /dev/null
+++ b/net/devlink/Makefile
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-y := leftover.o core.o netlink.o dev.o health.o
diff --git a/net/devlink/core.c b/net/devlink/core.c
new file mode 100644
index 000000000000..777b091ef74d
--- /dev/null
+++ b/net/devlink/core.c
@@ -0,0 +1,320 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+
+#include "devl_internal.h"
+
+DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
+
+void *devlink_priv(struct devlink *devlink)
+{
+ return &devlink->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_priv);
+
+struct devlink *priv_to_devlink(void *priv)
+{
+ return container_of(priv, struct devlink, priv);
+}
+EXPORT_SYMBOL_GPL(priv_to_devlink);
+
+struct device *devlink_to_dev(const struct devlink *devlink)
+{
+ return devlink->dev;
+}
+EXPORT_SYMBOL_GPL(devlink_to_dev);
+
+struct net *devlink_net(const struct devlink *devlink)
+{
+ return read_pnet(&devlink->_net);
+}
+EXPORT_SYMBOL_GPL(devlink_net);
+
+void devl_assert_locked(struct devlink *devlink)
+{
+ lockdep_assert_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_assert_locked);
+
+#ifdef CONFIG_LOCKDEP
+/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
+bool devl_lock_is_held(struct devlink *devlink)
+{
+ return lockdep_is_held(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock_is_held);
+#endif
+
+void devl_lock(struct devlink *devlink)
+{
+ mutex_lock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_lock);
+
+int devl_trylock(struct devlink *devlink)
+{
+ return mutex_trylock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_trylock);
+
+void devl_unlock(struct devlink *devlink)
+{
+ mutex_unlock(&devlink->lock);
+}
+EXPORT_SYMBOL_GPL(devl_unlock);
+
+/**
+ * devlink_try_get() - try to obtain a reference on a devlink instance
+ * @devlink: instance to reference
+ *
+ * Obtain a reference on a devlink instance. A reference on a devlink instance
+ * only implies that it's safe to take the instance lock. It does not imply
+ * that the instance is registered, use devl_is_registered() after taking
+ * the instance lock to check registration status.
+ */
+struct devlink *__must_check devlink_try_get(struct devlink *devlink)
+{
+ if (refcount_inc_not_zero(&devlink->refcount))
+ return devlink;
+ return NULL;
+}
+
+static void devlink_release(struct work_struct *work)
+{
+ struct devlink *devlink;
+
+ devlink = container_of(to_rcu_work(work), struct devlink, rwork);
+
+ mutex_destroy(&devlink->lock);
+ lockdep_unregister_key(&devlink->lock_key);
+ kfree(devlink);
+}
+
+void devlink_put(struct devlink *devlink)
+{
+ if (refcount_dec_and_test(&devlink->refcount))
+ queue_rcu_work(system_wq, &devlink->rwork);
+}
+
+struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp)
+{
+ struct devlink *devlink = NULL;
+
+ rcu_read_lock();
+retry:
+ devlink = xa_find(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
+ if (!devlink)
+ goto unlock;
+
+ if (!devlink_try_get(devlink))
+ goto next;
+ if (!net_eq(devlink_net(devlink), net)) {
+ devlink_put(devlink);
+ goto next;
+ }
+unlock:
+ rcu_read_unlock();
+ return devlink;
+
+next:
+ (*indexp)++;
+ goto retry;
+}
+
+/**
+ * devl_register - Register devlink instance
+ * @devlink: devlink
+ */
+int devl_register(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devl_assert_locked(devlink);
+
+ xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+ devlink_notify_register(devlink);
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devl_register);
+
+void devlink_register(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_register(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_register);
+
+/**
+ * devl_unregister - Unregister devlink instance
+ * @devlink: devlink
+ */
+void devl_unregister(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_REGISTERED(devlink);
+ devl_assert_locked(devlink);
+
+ devlink_notify_unregister(devlink);
+ xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+EXPORT_SYMBOL_GPL(devl_unregister);
+
+void devlink_unregister(struct devlink *devlink)
+{
+ devl_lock(devlink);
+ devl_unregister(devlink);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_unregister);
+
+/**
+ * devlink_alloc_ns - Allocate new devlink instance resources
+ * in specific namespace
+ *
+ * @ops: ops
+ * @priv_size: size of user private data
+ * @net: net namespace
+ * @dev: parent device
+ *
+ * Allocate new devlink instance resources, including devlink index
+ * and name.
+ */
+struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
+ size_t priv_size, struct net *net,
+ struct device *dev)
+{
+ struct devlink *devlink;
+ static u32 last_id;
+ int ret;
+
+ WARN_ON(!ops || !dev);
+ if (!devlink_reload_actions_valid(ops))
+ return NULL;
+
+ devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
+ if (!devlink)
+ return NULL;
+
+ ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
+ &last_id, GFP_KERNEL);
+ if (ret < 0)
+ goto err_xa_alloc;
+
+ devlink->netdevice_nb.notifier_call = devlink_port_netdevice_event;
+ ret = register_netdevice_notifier(&devlink->netdevice_nb);
+ if (ret)
+ goto err_register_netdevice_notifier;
+
+ devlink->dev = dev;
+ devlink->ops = ops;
+ xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->params, XA_FLAGS_ALLOC);
+ xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
+ write_pnet(&devlink->_net, net);
+ INIT_LIST_HEAD(&devlink->rate_list);
+ INIT_LIST_HEAD(&devlink->linecard_list);
+ INIT_LIST_HEAD(&devlink->sb_list);
+ INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
+ INIT_LIST_HEAD(&devlink->resource_list);
+ INIT_LIST_HEAD(&devlink->region_list);
+ INIT_LIST_HEAD(&devlink->reporter_list);
+ INIT_LIST_HEAD(&devlink->trap_list);
+ INIT_LIST_HEAD(&devlink->trap_group_list);
+ INIT_LIST_HEAD(&devlink->trap_policer_list);
+ INIT_RCU_WORK(&devlink->rwork, devlink_release);
+ lockdep_register_key(&devlink->lock_key);
+ mutex_init(&devlink->lock);
+ lockdep_set_class(&devlink->lock, &devlink->lock_key);
+ refcount_set(&devlink->refcount, 1);
+
+ return devlink;
+
+err_register_netdevice_notifier:
+ xa_erase(&devlinks, devlink->index);
+err_xa_alloc:
+ kfree(devlink);
+ return NULL;
+}
+EXPORT_SYMBOL_GPL(devlink_alloc_ns);
+
+/**
+ * devlink_free - Free devlink instance resources
+ *
+ * @devlink: devlink
+ */
+void devlink_free(struct devlink *devlink)
+{
+ ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+
+ WARN_ON(!list_empty(&devlink->trap_policer_list));
+ WARN_ON(!list_empty(&devlink->trap_group_list));
+ WARN_ON(!list_empty(&devlink->trap_list));
+ WARN_ON(!list_empty(&devlink->reporter_list));
+ WARN_ON(!list_empty(&devlink->region_list));
+ WARN_ON(!list_empty(&devlink->resource_list));
+ WARN_ON(!list_empty(&devlink->dpipe_table_list));
+ WARN_ON(!list_empty(&devlink->sb_list));
+ WARN_ON(!list_empty(&devlink->rate_list));
+ WARN_ON(!list_empty(&devlink->linecard_list));
+ WARN_ON(!xa_empty(&devlink->ports));
+
+ xa_destroy(&devlink->snapshot_ids);
+ xa_destroy(&devlink->params);
+ xa_destroy(&devlink->ports);
+
+ WARN_ON_ONCE(unregister_netdevice_notifier(&devlink->netdevice_nb));
+
+ xa_erase(&devlinks, devlink->index);
+
+ devlink_put(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_free);
+
+static void __net_exit devlink_pernet_pre_exit(struct net *net)
+{
+ struct devlink *devlink;
+ u32 actions_performed;
+ unsigned long index;
+ int err;
+
+ /* In case network namespace is getting destroyed, reload
+ * all devlink instances from this namespace into init_net.
+ */
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
+ devl_lock(devlink);
+ err = 0;
+ if (devl_is_registered(devlink))
+ err = devlink_reload(devlink, &init_net,
+ DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_LIMIT_UNSPEC,
+ &actions_performed, NULL);
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ if (err && err != -EOPNOTSUPP)
+ pr_warn("Failed to reload devlink instance into init_net\n");
+ }
+}
+
+static struct pernet_operations devlink_pernet_ops __net_initdata = {
+ .pre_exit = devlink_pernet_pre_exit,
+};
+
+static int __init devlink_init(void)
+{
+ int err;
+
+ err = genl_register_family(&devlink_nl_family);
+ if (err)
+ goto out;
+ err = register_pernet_subsys(&devlink_pernet_ops);
+
+out:
+ WARN_ON(err);
+ return err;
+}
+
+subsys_initcall(devlink_init);
diff --git a/net/devlink/dev.c b/net/devlink/dev.c
new file mode 100644
index 000000000000..bf1d6f1bcfc7
--- /dev/null
+++ b/net/devlink/dev.c
@@ -0,0 +1,1346 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include "devl_internal.h"
+
+struct devlink_info_req {
+ struct sk_buff *msg;
+ void (*version_cb)(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv);
+ void *version_cb_priv;
+};
+
+struct devlink_reload_combination {
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+};
+
+static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
+ {
+ /* can't reinitialize driver with no down time */
+ .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
+ },
+};
+
+static bool
+devlink_reload_combination_is_invalid(enum devlink_reload_action action,
+ enum devlink_reload_limit limit)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
+ if (devlink_reload_invalid_combinations[i].action == action &&
+ devlink_reload_invalid_combinations[i].limit == limit)
+ return true;
+ return false;
+}
+
+static bool
+devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
+{
+ return test_bit(action, &devlink->ops->reload_actions);
+}
+
+static bool
+devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
+{
+ return test_bit(limit, &devlink->ops->reload_limits);
+}
+
+static int devlink_reload_stat_put(struct sk_buff *msg,
+ enum devlink_reload_limit limit, u32 value)
+{
+ struct nlattr *reload_stats_entry;
+
+ reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
+ if (!reload_stats_entry)
+ return -EMSGSIZE;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
+ nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
+ goto nla_put_failure;
+ nla_nest_end(msg, reload_stats_entry);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_entry);
+ return -EMSGSIZE;
+}
+
+static int
+devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
+{
+ struct nlattr *reload_stats_attr, *act_info, *act_stats;
+ int i, j, stat_idx;
+ u32 value;
+
+ if (!is_remote)
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
+ else
+ reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
+
+ if (!reload_stats_attr)
+ return -EMSGSIZE;
+
+ for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
+ if ((!is_remote &&
+ !devlink_reload_action_is_supported(devlink, i)) ||
+ i == DEVLINK_RELOAD_ACTION_UNSPEC)
+ continue;
+ act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
+ if (!act_info)
+ goto nla_put_failure;
+
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
+ goto action_info_nest_cancel;
+ act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
+ if (!act_stats)
+ goto action_info_nest_cancel;
+
+ for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
+ /* Remote stats are shown even if not locally supported.
+ * Stats of actions with unspecified limit are shown
+ * though drivers don't need to register unspecified
+ * limit.
+ */
+ if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
+ !devlink_reload_limit_is_supported(devlink, j)) ||
+ devlink_reload_combination_is_invalid(i, j))
+ continue;
+
+ stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
+ if (!is_remote)
+ value = devlink->stats.reload_stats[stat_idx];
+ else
+ value = devlink->stats.remote_reload_stats[stat_idx];
+ if (devlink_reload_stat_put(msg, j, value))
+ goto action_stats_nest_cancel;
+ }
+ nla_nest_end(msg, act_stats);
+ nla_nest_end(msg, act_info);
+ }
+ nla_nest_end(msg, reload_stats_attr);
+ return 0;
+
+action_stats_nest_cancel:
+ nla_nest_cancel(msg, act_stats);
+action_info_nest_cancel:
+ nla_nest_cancel(msg, act_info);
+nla_put_failure:
+ nla_nest_cancel(msg, reload_stats_attr);
+ return -EMSGSIZE;
+}
+
+static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct nlattr *dev_stats;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+ if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
+ goto nla_put_failure;
+
+ dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
+ if (!dev_stats)
+ goto nla_put_failure;
+
+ if (devlink_reload_stats_put(msg, devlink, false))
+ goto dev_stats_nest_cancel;
+ if (devlink_reload_stats_put(msg, devlink, true))
+ goto dev_stats_nest_cancel;
+
+ nla_nest_end(msg, dev_stats);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+dev_stats_nest_cancel:
+ nla_nest_cancel(msg, dev_stats);
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ info->snd_portid, info->snd_seq, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ return devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI);
+}
+
+const struct devlink_cmd devl_cmd_get = {
+ .dump_one = devlink_nl_cmd_get_dump_one,
+};
+
+static void devlink_reload_failed_set(struct devlink *devlink,
+ bool reload_failed)
+{
+ if (devlink->reload_failed == reload_failed)
+ return;
+ devlink->reload_failed = reload_failed;
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+bool devlink_is_reload_failed(const struct devlink *devlink)
+{
+ return devlink->reload_failed;
+}
+EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
+
+static void
+__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
+ enum devlink_reload_limit limit, u32 actions_performed)
+{
+ unsigned long actions = actions_performed;
+ int stat_idx;
+ int action;
+
+ for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
+ stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
+ reload_stats[stat_idx]++;
+ }
+ devlink_notify(devlink, DEVLINK_CMD_NEW);
+}
+
+static void
+devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
+ actions_performed);
+}
+
+/**
+ * devlink_remote_reload_actions_performed - Update devlink on reload actions
+ * performed which are not a direct result of devlink reload call.
+ *
+ * This should be called by a driver after performing reload actions in case it was not
+ * a result of devlink reload call. For example fw_activate was performed as a result
+ * of devlink reload triggered fw_activate on another host.
+ * The motivation for this function is to keep data on reload actions performed on this
+ * function whether it was done due to direct devlink reload call or not.
+ *
+ * @devlink: devlink
+ * @limit: reload limit
+ * @actions_performed: bitmask of actions performed
+ */
+void devlink_remote_reload_actions_performed(struct devlink *devlink,
+ enum devlink_reload_limit limit,
+ u32 actions_performed)
+{
+ if (WARN_ON(!actions_performed ||
+ actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
+ limit > DEVLINK_RELOAD_LIMIT_MAX))
+ return;
+
+ __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
+ actions_performed);
+}
+EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
+
+static struct net *devlink_netns_get(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
+ struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
+ struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
+ struct net *net;
+
+ if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
+ NL_SET_ERR_MSG(info->extack, "multiple netns identifying attributes specified");
+ return ERR_PTR(-EINVAL);
+ }
+
+ if (netns_pid_attr) {
+ net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
+ } else if (netns_fd_attr) {
+ net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
+ } else if (netns_id_attr) {
+ net = get_net_ns_by_id(sock_net(skb->sk),
+ nla_get_u32(netns_id_attr));
+ if (!net)
+ net = ERR_PTR(-EINVAL);
+ } else {
+ WARN_ON(1);
+ net = ERR_PTR(-EINVAL);
+ }
+ if (IS_ERR(net)) {
+ NL_SET_ERR_MSG(info->extack, "Unknown network namespace");
+ return ERR_PTR(-EINVAL);
+ }
+ if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
+ put_net(net);
+ return ERR_PTR(-EPERM);
+ }
+ return net;
+}
+
+static void devlink_reload_netns_change(struct devlink *devlink,
+ struct net *curr_net,
+ struct net *dest_net)
+{
+ /* Userspace needs to be notified about devlink objects
+ * removed from original and entering new network namespace.
+ * The rest of the devlink objects are re-created during
+ * reload process so the notifications are generated separatelly.
+ */
+ devlink_notify_unregister(devlink);
+ write_pnet(&devlink->_net, dest_net);
+ devlink_notify_register(devlink);
+}
+
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack)
+{
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ struct net *curr_net;
+ int err;
+
+ memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats));
+
+ err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
+ if (err)
+ return err;
+
+ curr_net = devlink_net(devlink);
+ if (dest_net && !net_eq(dest_net, curr_net))
+ devlink_reload_netns_change(devlink, curr_net, dest_net);
+
+ if (action == DEVLINK_RELOAD_ACTION_DRIVER_REINIT)
+ devlink_params_driverinit_load_new(devlink);
+
+ err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
+ devlink_reload_failed_set(devlink, !!err);
+ if (err)
+ return err;
+
+ WARN_ON(!(*actions_performed & BIT(action)));
+ /* Catch driver on updating the remote action within devlink reload */
+ WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
+ sizeof(remote_reload_stats)));
+ devlink_reload_stats_update(devlink, limit, *actions_performed);
+ return 0;
+}
+
+static int
+devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
+ enum devlink_command cmd, struct genl_info *info)
+{
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
+ actions_performed))
+ goto nla_put_failure;
+ genlmsg_end(msg, hdr);
+
+ return genlmsg_reply(msg, info);
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return -EMSGSIZE;
+}
+
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ enum devlink_reload_action action;
+ enum devlink_reload_limit limit;
+ struct net *dest_net = NULL;
+ u32 actions_performed;
+ int err;
+
+ err = devlink_resources_validate(devlink, NULL, info);
+ if (err) {
+ NL_SET_ERR_MSG(info->extack, "resources size validation failed");
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
+ action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
+ else
+ action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
+
+ if (!devlink_reload_action_is_supported(devlink, action)) {
+ NL_SET_ERR_MSG(info->extack, "Requested reload action is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+
+ limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
+ if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
+ struct nla_bitfield32 limits;
+ u32 limits_selected;
+
+ limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
+ limits_selected = limits.value & limits.selector;
+ if (!limits_selected) {
+ NL_SET_ERR_MSG(info->extack, "Invalid limit selected");
+ return -EINVAL;
+ }
+ for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
+ if (limits_selected & BIT(limit))
+ break;
+ /* UAPI enables multiselection, but currently it is not used */
+ if (limits_selected != BIT(limit)) {
+ NL_SET_ERR_MSG(info->extack, "Multiselection of limit is not supported");
+ return -EOPNOTSUPP;
+ }
+ if (!devlink_reload_limit_is_supported(devlink, limit)) {
+ NL_SET_ERR_MSG(info->extack, "Requested limit is not supported by the driver");
+ return -EOPNOTSUPP;
+ }
+ if (devlink_reload_combination_is_invalid(action, limit)) {
+ NL_SET_ERR_MSG(info->extack, "Requested limit is invalid for this action");
+ return -EINVAL;
+ }
+ }
+ if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
+ info->attrs[DEVLINK_ATTR_NETNS_FD] ||
+ info->attrs[DEVLINK_ATTR_NETNS_ID]) {
+ dest_net = devlink_netns_get(skb, info);
+ if (IS_ERR(dest_net))
+ return PTR_ERR(dest_net);
+ if (!net_eq(dest_net, devlink_net(devlink)) &&
+ action != DEVLINK_RELOAD_ACTION_DRIVER_REINIT) {
+ NL_SET_ERR_MSG_MOD(info->extack,
+ "Changing namespace is only supported for reinit action");
+ return -EOPNOTSUPP;
+ }
+ }
+
+ err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
+
+ if (dest_net)
+ put_net(dest_net);
+
+ if (err)
+ return err;
+ /* For backward compatibility generate reply only if attributes used by user */
+ if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
+ return 0;
+
+ return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
+ DEVLINK_CMD_RELOAD, info);
+}
+
+bool devlink_reload_actions_valid(const struct devlink_ops *ops)
+{
+ const struct devlink_reload_combination *comb;
+ int i;
+
+ if (!devlink_reload_supported(ops)) {
+ if (WARN_ON(ops->reload_actions))
+ return false;
+ return true;
+ }
+
+ if (WARN_ON(!ops->reload_actions ||
+ ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
+ ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
+ return false;
+
+ if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
+ ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
+ return false;
+
+ for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
+ comb = &devlink_reload_invalid_combinations[i];
+ if (ops->reload_actions == BIT(comb->action) &&
+ ops->reload_limits == BIT(comb->limit))
+ return false;
+ }
+ return true;
+}
+
+static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ void *hdr;
+ int err = 0;
+ u16 mode;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = devlink_nl_put_handle(msg, devlink);
+ if (err)
+ goto nla_put_failure;
+
+ if (ops->eswitch_mode_get) {
+ err = ops->eswitch_mode_get(devlink, &mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_inline_mode_get) {
+ err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
+ inline_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ if (ops->eswitch_encap_mode_get) {
+ err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
+ if (err)
+ goto nla_put_failure;
+ err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
+ if (err)
+ goto nla_put_failure;
+ }
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
+ info->snd_portid, info->snd_seq, 0);
+
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ const struct devlink_ops *ops = devlink->ops;
+ enum devlink_eswitch_encap_mode encap_mode;
+ u8 inline_mode;
+ int err = 0;
+ u16 mode;
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
+ if (!ops->eswitch_mode_set)
+ return -EOPNOTSUPP;
+ mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
+ err = devlink_rate_nodes_check(devlink, mode, info->extack);
+ if (err)
+ return err;
+ err = ops->eswitch_mode_set(devlink, mode, info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
+ if (!ops->eswitch_inline_mode_set)
+ return -EOPNOTSUPP;
+ inline_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
+ err = ops->eswitch_inline_mode_set(devlink, inline_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
+ if (!ops->eswitch_encap_mode_set)
+ return -EOPNOTSUPP;
+ encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
+ err = ops->eswitch_encap_mode_set(devlink, encap_mode,
+ info->extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
+
+int devlink_info_board_serial_number_put(struct devlink_info_req *req,
+ const char *bsn)
+{
+ if (!req->msg)
+ return 0;
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
+ bsn);
+}
+EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
+
+static int devlink_info_version_put(struct devlink_info_req *req, int attr,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ struct nlattr *nest;
+ int err;
+
+ if (req->version_cb)
+ req->version_cb(version_name, version_type,
+ req->version_cb_priv);
+
+ if (!req->msg)
+ return 0;
+
+ nest = nla_nest_start_noflag(req->msg, attr);
+ if (!nest)
+ return -EMSGSIZE;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
+ version_name);
+ if (err)
+ goto nla_put_failure;
+
+ err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
+ version_value);
+ if (err)
+ goto nla_put_failure;
+
+ nla_nest_end(req->msg, nest);
+
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(req->msg, nest);
+ return err;
+}
+
+int devlink_info_version_fixed_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
+
+int devlink_info_version_stored_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
+
+int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
+
+int devlink_info_version_running_put(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ DEVLINK_INFO_VERSION_TYPE_NONE);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
+
+int devlink_info_version_running_put_ext(struct devlink_info_req *req,
+ const char *version_name,
+ const char *version_value,
+ enum devlink_info_version_type version_type)
+{
+ return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
+ version_name, version_value,
+ version_type);
+}
+EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
+
+static int devlink_nl_driver_info_get(struct device_driver *drv,
+ struct devlink_info_req *req)
+{
+ if (!drv)
+ return 0;
+
+ if (drv->name[0])
+ return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
+ drv->name);
+
+ return 0;
+}
+
+static int
+devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags, struct netlink_ext_ack *extack)
+{
+ struct device *dev = devlink_to_dev(devlink);
+ struct devlink_info_req req = {};
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ req.msg = msg;
+ if (devlink->ops->info_get) {
+ err = devlink->ops->info_get(devlink, &req, extack);
+ if (err)
+ goto err_cancel_msg;
+ }
+
+ err = devlink_nl_driver_info_get(dev->driver, &req);
+ if (err)
+ goto err_cancel_msg;
+
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ info->snd_portid, info->snd_seq, 0,
+ info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_info_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ int err;
+
+ err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+ if (err == -EOPNOTSUPP)
+ err = 0;
+ return err;
+}
+
+const struct devlink_cmd devl_cmd_info_get = {
+ .dump_one = devlink_nl_cmd_info_get_dump_one,
+};
+
+static int devlink_nl_flash_update_fill(struct sk_buff *msg,
+ struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ void *hdr;
+
+ hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto nla_put_failure;
+
+ if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
+ goto out;
+
+ if (params->status_msg &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
+ params->status_msg))
+ goto nla_put_failure;
+ if (params->component &&
+ nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
+ params->component))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
+ params->done, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
+ params->total, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
+ params->timeout, DEVLINK_ATTR_PAD))
+ goto nla_put_failure;
+
+out:
+ genlmsg_end(msg, hdr);
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static void __devlink_flash_update_notify(struct devlink *devlink,
+ enum devlink_command cmd,
+ struct devlink_flash_notify *params)
+{
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
+ cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
+
+ if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
+ return;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
+ if (err)
+ goto out_free_msg;
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+ msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+ return;
+
+out_free_msg:
+ nlmsg_free(msg);
+}
+
+static void devlink_flash_update_begin_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE,
+ &params);
+}
+
+static void devlink_flash_update_end_notify(struct devlink *devlink)
+{
+ struct devlink_flash_notify params = {};
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_END,
+ &params);
+}
+
+void devlink_flash_update_status_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long done,
+ unsigned long total)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .done = done,
+ .total = total,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
+
+void devlink_flash_update_timeout_notify(struct devlink *devlink,
+ const char *status_msg,
+ const char *component,
+ unsigned long timeout)
+{
+ struct devlink_flash_notify params = {
+ .status_msg = status_msg,
+ .component = component,
+ .timeout = timeout,
+ };
+
+ __devlink_flash_update_notify(devlink,
+ DEVLINK_CMD_FLASH_UPDATE_STATUS,
+ &params);
+}
+EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
+
+struct devlink_flash_component_lookup_ctx {
+ const char *lookup_name;
+ bool lookup_name_found;
+};
+
+static void
+devlink_flash_component_lookup_cb(const char *version_name,
+ enum devlink_info_version_type version_type,
+ void *version_cb_priv)
+{
+ struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
+
+ if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
+ lookup_ctx->lookup_name_found)
+ return;
+
+ lookup_ctx->lookup_name_found =
+ !strcmp(lookup_ctx->lookup_name, version_name);
+}
+
+static int devlink_flash_component_get(struct devlink *devlink,
+ struct nlattr *nla_component,
+ const char **p_component,
+ struct netlink_ext_ack *extack)
+{
+ struct devlink_flash_component_lookup_ctx lookup_ctx = {};
+ struct devlink_info_req req = {};
+ const char *component;
+ int ret;
+
+ if (!nla_component)
+ return 0;
+
+ component = nla_data(nla_component);
+
+ if (!devlink->ops->info_get) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "component update is not supported by this device");
+ return -EOPNOTSUPP;
+ }
+
+ lookup_ctx.lookup_name = component;
+ req.version_cb = devlink_flash_component_lookup_cb;
+ req.version_cb_priv = &lookup_ctx;
+
+ ret = devlink->ops->info_get(devlink, &req, NULL);
+ if (ret)
+ return ret;
+
+ if (!lookup_ctx.lookup_name_found) {
+ NL_SET_ERR_MSG_ATTR(extack, nla_component,
+ "selected component is not supported by this device");
+ return -EINVAL;
+ }
+ *p_component = component;
+ return 0;
+}
+
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *nla_overwrite_mask, *nla_file_name;
+ struct devlink_flash_update_params params = {};
+ struct devlink *devlink = info->user_ptr[0];
+ const char *file_name;
+ u32 supported_params;
+ int ret;
+
+ if (!devlink->ops->flash_update)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
+ return -EINVAL;
+
+ ret = devlink_flash_component_get(devlink,
+ info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
+ &params.component, info->extack);
+ if (ret)
+ return ret;
+
+ supported_params = devlink->ops->supported_flash_update_params;
+
+ nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
+ if (nla_overwrite_mask) {
+ struct nla_bitfield32 sections;
+
+ if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
+ "overwrite settings are not supported by this device");
+ return -EOPNOTSUPP;
+ }
+ sections = nla_get_bitfield32(nla_overwrite_mask);
+ params.overwrite_mask = sections.value & sections.selector;
+ }
+
+ nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
+ file_name = nla_data(nla_file_name);
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret) {
+ NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name,
+ "failed to locate the requested firmware file");
+ return ret;
+ }
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, info->extack);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+
+ return ret;
+}
+
+static void __devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ struct devlink_info_req req = {};
+ const struct nlattr *nlattr;
+ struct sk_buff *msg;
+ int rem, err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ req.msg = msg;
+ err = devlink->ops->info_get(devlink, &req, NULL);
+ if (err)
+ goto free_msg;
+
+ nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
+ const struct nlattr *kv;
+ int rem_kv;
+
+ if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
+ continue;
+
+ nla_for_each_nested(kv, nlattr, rem_kv) {
+ if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
+ continue;
+
+ strlcat(buf, nla_data(kv), len);
+ strlcat(buf, " ", len);
+ }
+ }
+free_msg:
+ nlmsg_free(msg);
+}
+
+void devlink_compat_running_version(struct devlink *devlink,
+ char *buf, size_t len)
+{
+ if (!devlink->ops->info_get)
+ return;
+
+ devl_lock(devlink);
+ if (devl_is_registered(devlink))
+ __devlink_compat_running_version(devlink, buf, len);
+ devl_unlock(devlink);
+}
+
+int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
+{
+ struct devlink_flash_update_params params = {};
+ int ret;
+
+ devl_lock(devlink);
+ if (!devl_is_registered(devlink)) {
+ ret = -ENODEV;
+ goto out_unlock;
+ }
+
+ if (!devlink->ops->flash_update) {
+ ret = -EOPNOTSUPP;
+ goto out_unlock;
+ }
+
+ ret = request_firmware(&params.fw, file_name, devlink->dev);
+ if (ret)
+ goto out_unlock;
+
+ devlink_flash_update_begin_notify(devlink);
+ ret = devlink->ops->flash_update(devlink, &params, NULL);
+ devlink_flash_update_end_notify(devlink);
+
+ release_firmware(params.fw);
+out_unlock:
+ devl_unlock(devlink);
+
+ return ret;
+}
+
+static int
+devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
+ u32 portid, u32 seq, int flags,
+ struct netlink_ext_ack *extack)
+{
+ struct nlattr *selftests;
+ void *hdr;
+ int err;
+ int i;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
+ DEVLINK_CMD_SELFTESTS_GET);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ err = -EMSGSIZE;
+ if (devlink_nl_put_handle(msg, devlink))
+ goto err_cancel_msg;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto err_cancel_msg;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ if (devlink->ops->selftest_check(devlink, i, extack)) {
+ err = nla_put_flag(msg, i);
+ if (err)
+ goto err_cancel_msg;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+err_cancel_msg:
+ genlmsg_cancel(msg, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct sk_buff *msg;
+ int err;
+
+ if (!devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
+ info->snd_seq, 0, info->extack);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_selftests_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ if (!devlink->ops->selftest_check)
+ return 0;
+
+ return devlink_nl_selftests_fill(msg, devlink,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq, NLM_F_MULTI,
+ cb->extack);
+}
+
+const struct devlink_cmd devl_cmd_selftests_get = {
+ .dump_one = devlink_nl_cmd_selftests_get_dump_one,
+};
+
+static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
+ enum devlink_selftest_status test_status)
+{
+ struct nlattr *result_attr;
+
+ result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
+ if (!result_attr)
+ return -EMSGSIZE;
+
+ if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
+ nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
+ test_status))
+ goto nla_put_failure;
+
+ nla_nest_end(skb, result_attr);
+ return 0;
+
+nla_put_failure:
+ nla_nest_cancel(skb, result_attr);
+ return -EMSGSIZE;
+}
+
+static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
+ [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
+};
+
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info)
+{
+ struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
+ struct devlink *devlink = info->user_ptr[0];
+ struct nlattr *attrs, *selftests;
+ struct sk_buff *msg;
+ void *hdr;
+ int err;
+ int i;
+
+ if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
+ return -EOPNOTSUPP;
+
+ if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
+ return -EINVAL;
+
+ attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
+
+ err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
+ devlink_selftest_nl_policy, info->extack);
+ if (err < 0)
+ return err;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = -EMSGSIZE;
+ hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
+ if (!hdr)
+ goto free_msg;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
+ if (!selftests)
+ goto genlmsg_cancel;
+
+ for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
+ i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
+ enum devlink_selftest_status test_status;
+
+ if (nla_get_flag(tb[i])) {
+ if (!devlink->ops->selftest_check(devlink, i,
+ info->extack)) {
+ if (devlink_selftest_result_put(msg, i,
+ DEVLINK_SELFTEST_STATUS_SKIP))
+ goto selftests_nest_cancel;
+ continue;
+ }
+
+ test_status = devlink->ops->selftest_run(devlink, i,
+ info->extack);
+ if (devlink_selftest_result_put(msg, i, test_status))
+ goto selftests_nest_cancel;
+ }
+ }
+
+ nla_nest_end(msg, selftests);
+ genlmsg_end(msg, hdr);
+ return genlmsg_reply(msg, info);
+
+selftests_nest_cancel:
+ nla_nest_cancel(msg, selftests);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+free_msg:
+ nlmsg_free(msg);
+ return err;
+}
diff --git a/net/devlink/devl_internal.h b/net/devlink/devl_internal.h
new file mode 100644
index 000000000000..e133f423294a
--- /dev/null
+++ b/net/devlink/devl_internal.h
@@ -0,0 +1,239 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <linux/mutex.h>
+#include <linux/netdevice.h>
+#include <linux/notifier.h>
+#include <linux/types.h>
+#include <linux/workqueue.h>
+#include <linux/xarray.h>
+#include <net/devlink.h>
+#include <net/net_namespace.h>
+
+#define DEVLINK_REGISTERED XA_MARK_1
+
+#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
+ (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
+
+struct devlink_dev_stats {
+ u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+ u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
+};
+
+struct devlink {
+ u32 index;
+ struct xarray ports;
+ struct list_head rate_list;
+ struct list_head sb_list;
+ struct list_head dpipe_table_list;
+ struct list_head resource_list;
+ struct xarray params;
+ struct list_head region_list;
+ struct list_head reporter_list;
+ struct devlink_dpipe_headers *dpipe_headers;
+ struct list_head trap_list;
+ struct list_head trap_group_list;
+ struct list_head trap_policer_list;
+ struct list_head linecard_list;
+ const struct devlink_ops *ops;
+ struct xarray snapshot_ids;
+ struct devlink_dev_stats stats;
+ struct device *dev;
+ possible_net_t _net;
+ /* Serializes access to devlink instance specific objects such as
+ * port, sb, dpipe, resource, params, region, traps and more.
+ */
+ struct mutex lock;
+ struct lock_class_key lock_key;
+ u8 reload_failed:1;
+ refcount_t refcount;
+ struct rcu_work rwork;
+ struct notifier_block netdevice_nb;
+ char priv[] __aligned(NETDEV_ALIGN);
+};
+
+extern struct xarray devlinks;
+extern struct genl_family devlink_nl_family;
+
+/* devlink instances are open to the access from the user space after
+ * devlink_register() call. Such logical barrier allows us to have certain
+ * expectations related to locking.
+ *
+ * Before *_register() - we are in initialization stage and no parallel
+ * access possible to the devlink instance. All drivers perform that phase
+ * by implicitly holding device_lock.
+ *
+ * After *_register() - users and driver can access devlink instance at
+ * the same time.
+ */
+#define ASSERT_DEVLINK_REGISTERED(d) \
+ WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
+ WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
+
+/* Iterate over devlink pointers which were possible to get reference to.
+ * devlink_put() needs to be called for each iterated devlink pointer
+ * in loop body in order to release the reference.
+ */
+#define devlinks_xa_for_each_registered_get(net, index, devlink) \
+ for (index = 0; (devlink = devlinks_xa_find_get(net, &index)); index++)
+
+struct devlink *devlinks_xa_find_get(struct net *net, unsigned long *indexp);
+
+static inline bool devl_is_registered(struct devlink *devlink)
+{
+ devl_assert_locked(devlink);
+ return xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
+}
+
+/* Netlink */
+#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
+#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
+#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
+#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
+#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
+
+enum devlink_multicast_groups {
+ DEVLINK_MCGRP_CONFIG,
+};
+
+/* state held across netlink dumps */
+struct devlink_nl_dump_state {
+ unsigned long instance;
+ int idx;
+ union {
+ /* DEVLINK_CMD_REGION_READ */
+ struct {
+ u64 start_offset;
+ };
+ /* DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET */
+ struct {
+ u64 dump_ts;
+ };
+ };
+};
+
+struct devlink_cmd {
+ int (*dump_one)(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb);
+};
+
+extern const struct genl_small_ops devlink_nl_ops[56];
+
+struct devlink *
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs);
+
+void devlink_notify_unregister(struct devlink *devlink);
+void devlink_notify_register(struct devlink *devlink);
+
+int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb);
+
+static inline struct devlink_nl_dump_state *
+devlink_dump_state(struct netlink_callback *cb)
+{
+ NL_ASSERT_DUMP_CTX_FITS(struct devlink_nl_dump_state);
+
+ return (struct devlink_nl_dump_state *)cb->ctx;
+}
+
+static inline int
+devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
+{
+ if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
+ return -EMSGSIZE;
+ if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
+ return -EMSGSIZE;
+ return 0;
+}
+
+/* Commands */
+extern const struct devlink_cmd devl_cmd_get;
+extern const struct devlink_cmd devl_cmd_port_get;
+extern const struct devlink_cmd devl_cmd_sb_get;
+extern const struct devlink_cmd devl_cmd_sb_pool_get;
+extern const struct devlink_cmd devl_cmd_sb_port_pool_get;
+extern const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get;
+extern const struct devlink_cmd devl_cmd_param_get;
+extern const struct devlink_cmd devl_cmd_region_get;
+extern const struct devlink_cmd devl_cmd_info_get;
+extern const struct devlink_cmd devl_cmd_health_reporter_get;
+extern const struct devlink_cmd devl_cmd_trap_get;
+extern const struct devlink_cmd devl_cmd_trap_group_get;
+extern const struct devlink_cmd devl_cmd_trap_policer_get;
+extern const struct devlink_cmd devl_cmd_rate_get;
+extern const struct devlink_cmd devl_cmd_linecard_get;
+extern const struct devlink_cmd devl_cmd_selftests_get;
+
+/* Notify */
+void devlink_notify(struct devlink *devlink, enum devlink_command cmd);
+
+/* Ports */
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr);
+
+struct devlink_port *
+devlink_port_get_from_info(struct devlink *devlink, struct genl_info *info);
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs);
+
+/* Reload */
+bool devlink_reload_actions_valid(const struct devlink_ops *ops);
+int devlink_reload(struct devlink *devlink, struct net *dest_net,
+ enum devlink_reload_action action,
+ enum devlink_reload_limit limit,
+ u32 *actions_performed, struct netlink_ext_ack *extack);
+
+static inline bool devlink_reload_supported(const struct devlink_ops *ops)
+{
+ return ops->reload_down && ops->reload_up;
+}
+
+/* Params */
+void devlink_params_driverinit_load_new(struct devlink *devlink);
+
+/* Resources */
+struct devlink_resource;
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info);
+
+/* Line cards */
+struct devlink_linecard;
+
+struct devlink_linecard *
+devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info);
+
+/* Rates */
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack);
+struct devlink_rate *
+devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info);
+struct devlink_rate *
+devlink_rate_node_get_from_info(struct devlink *devlink,
+ struct genl_info *info);
+/* Devlink nl cmds */
+int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_info_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_flash_update(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_selftests_run(struct sk_buff *skb, struct genl_info *info);
+int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb);
+int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info);
+int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info);
diff --git a/net/devlink/health.c b/net/devlink/health.c
new file mode 100644
index 000000000000..0839706d5741
--- /dev/null
+++ b/net/devlink/health.c
@@ -0,0 +1,1333 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+#include <trace/events/devlink.h>
+#include "devl_internal.h"
+
+struct devlink_fmsg_item {
+ struct list_head list;
+ int attrtype;
+ u8 nla_type;
+ u16 len;
+ int value[];
+};
+
+struct devlink_fmsg {
+ struct list_head item_list;
+ bool putting_binary; /* This flag forces enclosing of binary data
+ * in an array brackets. It forces using
+ * of designated API:
+ * devlink_fmsg_binary_pair_nest_start()
+ * devlink_fmsg_binary_pair_nest_end()
+ */
+};
+
+static struct devlink_fmsg *devlink_fmsg_alloc(void)
+{
+ struct devlink_fmsg *fmsg;
+
+ fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
+ if (!fmsg)
+ return NULL;
+
+ INIT_LIST_HEAD(&fmsg->item_list);
+
+ return fmsg;
+}
+
+static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
+{
+ struct devlink_fmsg_item *item, *tmp;
+
+ list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
+ list_del(&item->list);
+ kfree(item);
+ }
+ kfree(fmsg);
+}
+
+struct devlink_health_reporter {
+ struct list_head list;
+ void *priv;
+ const struct devlink_health_reporter_ops *ops;
+ struct devlink *devlink;
+ struct devlink_port *devlink_port;
+ struct devlink_fmsg *dump_fmsg;
+ struct mutex dump_lock; /* lock parallel read/write from dump buffers */
+ u64 graceful_period;
+ bool auto_recover;
+ bool auto_dump;
+ u8 health_state;
+ u64 dump_ts;
+ u64 dump_real_ts;
+ u64 error_count;
+ u64 recovery_count;
+ u64 last_recovery_ts;
+};
+
+void *
+devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
+{
+ return reporter->priv;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
+ const char *reporter_name)
+{
+ struct devlink_health_reporter *reporter;
+
+ list_for_each_entry(reporter, reporter_list, list)
+ if (!strcmp(reporter->ops->name, reporter_name))
+ return reporter;
+ return NULL;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_find_by_name(struct devlink *devlink,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
+ const char *reporter_name)
+{
+ return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+__devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ if (WARN_ON(graceful_period && !ops->recover))
+ return ERR_PTR(-EINVAL);
+
+ reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
+ if (!reporter)
+ return ERR_PTR(-ENOMEM);
+
+ reporter->priv = priv;
+ reporter->ops = ops;
+ reporter->devlink = devlink;
+ reporter->graceful_period = graceful_period;
+ reporter->auto_recover = !!ops->recover;
+ reporter->auto_dump = !!ops->dump;
+ mutex_init(&reporter->dump_lock);
+ return reporter;
+}
+
+/**
+ * devl_port_health_reporter_create() - create devlink health reporter for
+ * specified port instance
+ *
+ * @port: devlink_port to which health reports will relate
+ * @ops: devlink health reporter ops
+ * @graceful_period: min time (in msec) between recovery attempts
+ * @priv: driver priv pointer
+ */
+struct devlink_health_reporter *
+devl_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_assert_locked(port->devlink);
+
+ if (__devlink_health_reporter_find_by_name(&port->reporter_list,
+ ops->name))
+ return ERR_PTR(-EEXIST);
+
+ reporter = __devlink_health_reporter_create(port->devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ return reporter;
+
+ reporter->devlink_port = port;
+ list_add_tail(&reporter->list, &port->reporter_list);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_port_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_port_health_reporter_create(struct devlink_port *port,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+ struct devlink *devlink = port->devlink;
+
+ devl_lock(devlink);
+ reporter = devl_port_health_reporter_create(port, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
+
+/**
+ * devl_health_reporter_create - create devlink health reporter
+ *
+ * @devlink: devlink instance which the health reports will relate
+ * @ops: devlink health reporter ops
+ * @graceful_period: min time (in msec) between recovery attempts
+ * @priv: driver priv pointer
+ */
+struct devlink_health_reporter *
+devl_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_assert_locked(devlink);
+
+ if (devlink_health_reporter_find_by_name(devlink, ops->name))
+ return ERR_PTR(-EEXIST);
+
+ reporter = __devlink_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ if (IS_ERR(reporter))
+ return reporter;
+
+ list_add_tail(&reporter->list, &devlink->reporter_list);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devl_health_reporter_create);
+
+struct devlink_health_reporter *
+devlink_health_reporter_create(struct devlink *devlink,
+ const struct devlink_health_reporter_ops *ops,
+ u64 graceful_period, void *priv)
+{
+ struct devlink_health_reporter *reporter;
+
+ devl_lock(devlink);
+ reporter = devl_health_reporter_create(devlink, ops,
+ graceful_period, priv);
+ devl_unlock(devlink);
+ return reporter;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
+
+static void
+devlink_health_reporter_free(struct devlink_health_reporter *reporter)
+{
+ mutex_destroy(&reporter->dump_lock);
+ if (reporter->dump_fmsg)
+ devlink_fmsg_free(reporter->dump_fmsg);
+ kfree(reporter);
+}
+
+/**
+ * devl_health_reporter_destroy() - destroy devlink health reporter
+ *
+ * @reporter: devlink health reporter to destroy
+ */
+void
+devl_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ devl_assert_locked(reporter->devlink);
+
+ list_del(&reporter->list);
+ devlink_health_reporter_free(reporter);
+}
+EXPORT_SYMBOL_GPL(devl_health_reporter_destroy);
+
+void
+devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
+{
+ struct devlink *devlink = reporter->devlink;
+
+ devl_lock(devlink);
+ devl_health_reporter_destroy(reporter);
+ devl_unlock(devlink);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
+
+static int
+devlink_nl_health_reporter_fill(struct sk_buff *msg,
+ struct devlink_health_reporter *reporter,
+ enum devlink_command cmd, u32 portid,
+ u32 seq, int flags)
+{
+ struct devlink *devlink = reporter->devlink;
+ struct nlattr *reporter_attr;
+ void *hdr;
+
+ hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
+ if (!hdr)
+ return -EMSGSIZE;
+
+ if (devlink_nl_put_handle(msg, devlink))
+ goto genlmsg_cancel;
+
+ if (reporter->devlink_port) {
+ if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
+ goto genlmsg_cancel;
+ }
+ reporter_attr = nla_nest_start_noflag(msg,
+ DEVLINK_ATTR_HEALTH_REPORTER);
+ if (!reporter_attr)
+ goto genlmsg_cancel;
+ if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
+ reporter->ops->name))
+ goto reporter_nest_cancel;
+ if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
+ reporter->health_state))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
+ reporter->error_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
+ reporter->recovery_count, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
+ reporter->graceful_period,
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->recover &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
+ reporter->auto_recover))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
+ jiffies_to_msecs(reporter->dump_ts),
+ DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->dump_fmsg &&
+ nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
+ reporter->dump_real_ts, DEVLINK_ATTR_PAD))
+ goto reporter_nest_cancel;
+ if (reporter->ops->dump &&
+ nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
+ reporter->auto_dump))
+ goto reporter_nest_cancel;
+
+ nla_nest_end(msg, reporter_attr);
+ genlmsg_end(msg, hdr);
+ return 0;
+
+reporter_nest_cancel:
+ nla_nest_cancel(msg, reporter_attr);
+genlmsg_cancel:
+ genlmsg_cancel(msg, hdr);
+ return -EMSGSIZE;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
+{
+ struct devlink_port *devlink_port;
+ char *reporter_name;
+
+ if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
+ return NULL;
+
+ reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
+ devlink_port = devlink_port_get_from_attrs(devlink, attrs);
+ if (IS_ERR(devlink_port))
+ return devlink_health_reporter_find_by_name(devlink,
+ reporter_name);
+ else
+ return devlink_port_health_reporter_find_by_name(devlink_port,
+ reporter_name);
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
+{
+ return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
+}
+
+int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct sk_buff *msg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ info->snd_portid, info->snd_seq,
+ 0);
+ if (err) {
+ nlmsg_free(msg);
+ return err;
+ }
+
+ return genlmsg_reply(msg, info);
+}
+
+static int
+devlink_nl_cmd_health_reporter_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_health_reporter *reporter;
+ struct devlink_port *port;
+ unsigned long port_index;
+ int idx = 0;
+ int err;
+
+ list_for_each_entry(reporter, &devlink->reporter_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+ xa_for_each(&devlink->ports, port_index, port) {
+ list_for_each_entry(reporter, &port->reporter_list, list) {
+ if (idx < state->idx) {
+ idx++;
+ continue;
+ }
+ err = devlink_nl_health_reporter_fill(msg, reporter,
+ DEVLINK_CMD_HEALTH_REPORTER_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
+ }
+ }
+
+ return 0;
+}
+
+const struct devlink_cmd devl_cmd_health_reporter_get = {
+ .dump_one = devlink_nl_cmd_health_reporter_get_dump_one,
+};
+
+int devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->recover &&
+ (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]))
+ return -EOPNOTSUPP;
+
+ if (!reporter->ops->dump &&
+ info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ return -EOPNOTSUPP;
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
+ reporter->graceful_period =
+ nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
+ reporter->auto_recover =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
+
+ if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
+ reporter->auto_dump =
+ nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
+
+ return 0;
+}
+
+static void devlink_recover_notify(struct devlink_health_reporter *reporter,
+ enum devlink_command cmd)
+{
+ struct devlink *devlink = reporter->devlink;
+ struct sk_buff *msg;
+ int err;
+
+ WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+ WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
+ if (err) {
+ nlmsg_free(msg);
+ return;
+ }
+
+ genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
+ 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
+}
+
+void
+devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
+{
+ reporter->recovery_count++;
+ reporter->last_recovery_ts = jiffies;
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
+
+static int
+devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
+ void *priv_ctx, struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
+ return 0;
+
+ if (!reporter->ops->recover)
+ return -EOPNOTSUPP;
+
+ err = reporter->ops->recover(reporter, priv_ctx, extack);
+ if (err)
+ return err;
+
+ devlink_health_reporter_recovery_done(reporter);
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ return 0;
+}
+
+static void
+devlink_health_dump_clear(struct devlink_health_reporter *reporter)
+{
+ if (!reporter->dump_fmsg)
+ return;
+ devlink_fmsg_free(reporter->dump_fmsg);
+ reporter->dump_fmsg = NULL;
+}
+
+static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
+ void *priv_ctx,
+ struct netlink_ext_ack *extack)
+{
+ int err;
+
+ if (!reporter->ops->dump)
+ return 0;
+
+ if (reporter->dump_fmsg)
+ return 0;
+
+ reporter->dump_fmsg = devlink_fmsg_alloc();
+ if (!reporter->dump_fmsg) {
+ err = -ENOMEM;
+ return err;
+ }
+
+ err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ err = reporter->ops->dump(reporter, reporter->dump_fmsg,
+ priv_ctx, extack);
+ if (err)
+ goto dump_err;
+
+ err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
+ if (err)
+ goto dump_err;
+
+ reporter->dump_ts = jiffies;
+ reporter->dump_real_ts = ktime_get_real_ns();
+
+ return 0;
+
+dump_err:
+ devlink_health_dump_clear(reporter);
+ return err;
+}
+
+int devlink_health_report(struct devlink_health_reporter *reporter,
+ const char *msg, void *priv_ctx)
+{
+ enum devlink_health_reporter_state prev_health_state;
+ struct devlink *devlink = reporter->devlink;
+ unsigned long recover_ts_threshold;
+ int ret;
+
+ /* write a log message of the current error */
+ WARN_ON(!msg);
+ trace_devlink_health_report(devlink, reporter->ops->name, msg);
+ reporter->error_count++;
+ prev_health_state = reporter->health_state;
+ reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+
+ /* abort if the previous error wasn't recovered */
+ recover_ts_threshold = reporter->last_recovery_ts +
+ msecs_to_jiffies(reporter->graceful_period);
+ if (reporter->auto_recover &&
+ (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
+ (reporter->last_recovery_ts && reporter->recovery_count &&
+ time_is_after_jiffies(recover_ts_threshold)))) {
+ trace_devlink_health_recover_aborted(devlink,
+ reporter->ops->name,
+ reporter->health_state,
+ jiffies -
+ reporter->last_recovery_ts);
+ return -ECANCELED;
+ }
+
+ if (reporter->auto_dump) {
+ mutex_lock(&reporter->dump_lock);
+ /* store current dump of current error, for later analysis */
+ devlink_health_do_dump(reporter, priv_ctx, NULL);
+ mutex_unlock(&reporter->dump_lock);
+ }
+
+ if (!reporter->auto_recover)
+ return 0;
+
+ devl_lock(devlink);
+ ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
+ devl_unlock(devlink);
+
+ return ret;
+}
+EXPORT_SYMBOL_GPL(devlink_health_report);
+
+void
+devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
+ enum devlink_health_reporter_state state)
+{
+ if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
+ state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
+ return;
+
+ if (reporter->health_state == state)
+ return;
+
+ reporter->health_state = state;
+ trace_devlink_health_reporter_state_update(reporter->devlink,
+ reporter->ops->name, state);
+ devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
+}
+EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
+
+int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ return devlink_health_reporter_recover(reporter, NULL, info->extack);
+}
+
+static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
+ int attrtype)
+{
+ struct devlink_fmsg_item *item;
+
+ item = kzalloc(sizeof(*item), GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->attrtype = attrtype;
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
+
+static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
+}
+
+int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
+
+#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
+
+static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
+{
+ struct devlink_fmsg_item *item;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = NLA_NUL_STRING;
+ item->len = strlen(name) + 1;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
+ memcpy(&item->value, name, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_put_name(fmsg, name);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
+
+int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
+
+int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
+
+int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ int err;
+
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
+
+int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
+ const char *name)
+{
+ int err;
+
+ err = devlink_fmsg_arr_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ fmsg->putting_binary = true;
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start);
+
+int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
+{
+ if (!fmsg->putting_binary)
+ return -EINVAL;
+
+ fmsg->putting_binary = false;
+ return devlink_fmsg_arr_pair_nest_end(fmsg);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end);
+
+static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
+ const void *value, u16 value_len,
+ u8 value_nla_type)
+{
+ struct devlink_fmsg_item *item;
+
+ if (value_len > DEVLINK_FMSG_MAX_SIZE)
+ return -EMSGSIZE;
+
+ item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
+ if (!item)
+ return -ENOMEM;
+
+ item->nla_type = value_nla_type;
+ item->len = value_len;
+ item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ memcpy(&item->value, value, item->len);
+ list_add_tail(&item->list, &fmsg->item_list);
+
+ return 0;
+}
+
+static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
+}
+
+static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
+}
+
+int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
+
+static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
+}
+
+int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
+{
+ if (fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
+ NLA_NUL_STRING);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
+
+int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
+ u16 value_len)
+{
+ if (!fmsg->putting_binary)
+ return -EINVAL;
+
+ return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
+
+int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ bool value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_bool_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
+
+int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u8 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u8_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
+
+int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u32 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u32_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
+
+int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ u64 value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_u64_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
+
+int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const char *value)
+{
+ int err;
+
+ err = devlink_fmsg_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_string_put(fmsg, value);
+ if (err)
+ return err;
+
+ err = devlink_fmsg_pair_nest_end(fmsg);
+ if (err)
+ return err;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
+
+int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
+ const void *value, u32 value_len)
+{
+ u32 data_size;
+ int end_err;
+ u32 offset;
+ int err;
+
+ err = devlink_fmsg_binary_pair_nest_start(fmsg, name);
+ if (err)
+ return err;
+
+ for (offset = 0; offset < value_len; offset += data_size) {
+ data_size = value_len - offset;
+ if (data_size > DEVLINK_FMSG_MAX_SIZE)
+ data_size = DEVLINK_FMSG_MAX_SIZE;
+ err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
+ if (err)
+ break;
+ /* Exit from loop with a break (instead of
+ * return) to make sure putting_binary is turned off in
+ * devlink_fmsg_binary_pair_nest_end
+ */
+ }
+
+ end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
+ if (end_err)
+ err = end_err;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
+
+static int
+devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ case NLA_U8:
+ case NLA_U32:
+ case NLA_U64:
+ case NLA_NUL_STRING:
+ case NLA_BINARY:
+ return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
+ msg->nla_type);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
+{
+ int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
+ u8 tmp;
+
+ switch (msg->nla_type) {
+ case NLA_FLAG:
+ /* Always provide flag data, regardless of its value */
+ tmp = *(bool *)msg->value;
+
+ return nla_put_u8(skb, attrtype, tmp);
+ case NLA_U8:
+ return nla_put_u8(skb, attrtype, *(u8 *)msg->value);
+ case NLA_U32:
+ return nla_put_u32(skb, attrtype, *(u32 *)msg->value);
+ case NLA_U64:
+ return nla_put_u64_64bit(skb, attrtype, *(u64 *)msg->value,
+ DEVLINK_ATTR_PAD);
+ case NLA_NUL_STRING:
+ return nla_put_string(skb, attrtype, (char *)&msg->value);
+ case NLA_BINARY:
+ return nla_put(skb, attrtype, msg->len, (void *)&msg->value);
+ default:
+ return -EINVAL;
+ }
+}
+
+static int
+devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ int *start)
+{
+ struct devlink_fmsg_item *item;
+ struct nlattr *fmsg_nlattr;
+ int err = 0;
+ int i = 0;
+
+ fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
+ if (!fmsg_nlattr)
+ return -EMSGSIZE;
+
+ list_for_each_entry(item, &fmsg->item_list, list) {
+ if (i < *start) {
+ i++;
+ continue;
+ }
+
+ switch (item->attrtype) {
+ case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
+ case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
+ case DEVLINK_ATTR_FMSG_ARR_NEST_START:
+ case DEVLINK_ATTR_FMSG_NEST_END:
+ err = nla_put_flag(skb, item->attrtype);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
+ err = devlink_fmsg_item_fill_type(item, skb);
+ if (err)
+ break;
+ err = devlink_fmsg_item_fill_data(item, skb);
+ break;
+ case DEVLINK_ATTR_FMSG_OBJ_NAME:
+ err = nla_put_string(skb, item->attrtype,
+ (char *)&item->value);
+ break;
+ default:
+ err = -EINVAL;
+ break;
+ }
+ if (!err)
+ *start = ++i;
+ else
+ break;
+ }
+
+ nla_nest_end(skb, fmsg_nlattr);
+ return err;
+}
+
+static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
+ struct genl_info *info,
+ enum devlink_command cmd, int flags)
+{
+ struct nlmsghdr *nlh;
+ struct sk_buff *skb;
+ bool last = false;
+ int index = 0;
+ void *hdr;
+ int err;
+
+ while (!last) {
+ int tmp_index = index;
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+
+ hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
+ &devlink_nl_family, flags | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if (!err)
+ last = true;
+ else if (err != -EMSGSIZE || tmp_index == index)
+ goto nla_put_failure;
+
+ genlmsg_end(skb, hdr);
+ err = genlmsg_reply(skb, info);
+ if (err)
+ return err;
+ }
+
+ skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!skb)
+ return -ENOMEM;
+ nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
+ NLMSG_DONE, 0, flags | NLM_F_MULTI);
+ if (!nlh) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ return genlmsg_reply(skb, info);
+
+nla_put_failure:
+ nlmsg_free(skb);
+ return err;
+}
+
+static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
+ struct netlink_callback *cb,
+ enum devlink_command cmd)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ int index = state->idx;
+ int tmp_index = index;
+ void *hdr;
+ int err;
+
+ hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
+ &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
+ if (!hdr) {
+ err = -EMSGSIZE;
+ goto nla_put_failure;
+ }
+
+ err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
+ if ((err && err != -EMSGSIZE) || tmp_index == index)
+ goto nla_put_failure;
+
+ state->idx = index;
+ genlmsg_end(skb, hdr);
+ return skb->len;
+
+nla_put_failure:
+ genlmsg_cancel(skb, hdr);
+ return err;
+}
+
+int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+ struct devlink_fmsg *fmsg;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->diagnose)
+ return -EOPNOTSUPP;
+
+ fmsg = devlink_fmsg_alloc();
+ if (!fmsg)
+ return -ENOMEM;
+
+ err = devlink_fmsg_obj_nest_start(fmsg);
+ if (err)
+ goto out;
+
+ err = reporter->ops->diagnose(reporter, fmsg, info->extack);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_obj_nest_end(fmsg);
+ if (err)
+ goto out;
+
+ err = devlink_fmsg_snd(fmsg, info,
+ DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
+
+out:
+ devlink_fmsg_free(fmsg);
+ return err;
+}
+
+static struct devlink_health_reporter *
+devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_health_reporter *reporter;
+ struct nlattr **attrs = info->attrs;
+ struct devlink *devlink;
+
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
+ if (IS_ERR(devlink))
+ return NULL;
+ devl_unlock(devlink);
+
+ reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
+ devlink_put(devlink);
+ return reporter;
+}
+
+int devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
+ struct netlink_callback *cb)
+{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ struct devlink_health_reporter *reporter;
+ int err;
+
+ reporter = devlink_health_reporter_get_from_cb(cb);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ if (!state->idx) {
+ err = devlink_health_do_dump(reporter, NULL, cb->extack);
+ if (err)
+ goto unlock;
+ state->dump_ts = reporter->dump_ts;
+ }
+ if (!reporter->dump_fmsg || state->dump_ts != reporter->dump_ts) {
+ NL_SET_ERR_MSG(cb->extack, "Dump trampled, please retry");
+ err = -EAGAIN;
+ goto unlock;
+ }
+
+ err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
+ DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
+unlock:
+ mutex_unlock(&reporter->dump_lock);
+ return err;
+}
+
+int devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->dump)
+ return -EOPNOTSUPP;
+
+ mutex_lock(&reporter->dump_lock);
+ devlink_health_dump_clear(reporter);
+ mutex_unlock(&reporter->dump_lock);
+ return 0;
+}
+
+int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
+ struct genl_info *info)
+{
+ struct devlink *devlink = info->user_ptr[0];
+ struct devlink_health_reporter *reporter;
+
+ reporter = devlink_health_reporter_get_from_info(devlink, info);
+ if (!reporter)
+ return -EINVAL;
+
+ if (!reporter->ops->test)
+ return -EOPNOTSUPP;
+
+ return reporter->ops->test(reporter, info->extack);
+}
diff --git a/net/core/devlink.c b/net/devlink/leftover.c
index 032d6d0a5ce6..dffca2f9bfa7 100644
--- a/net/core/devlink.c
+++ b/net/devlink/leftover.c
@@ -31,58 +31,12 @@
#define CREATE_TRACE_POINTS
#include <trace/events/devlink.h>
-#define DEVLINK_RELOAD_STATS_ARRAY_SIZE \
- (__DEVLINK_RELOAD_LIMIT_MAX * __DEVLINK_RELOAD_ACTION_MAX)
-
-struct devlink_dev_stats {
- u32 reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
-};
-
-struct devlink {
- u32 index;
- struct xarray ports;
- struct list_head rate_list;
- struct list_head sb_list;
- struct list_head dpipe_table_list;
- struct list_head resource_list;
- struct list_head param_list;
- struct list_head region_list;
- struct list_head reporter_list;
- struct mutex reporters_lock; /* protects reporter_list */
- struct devlink_dpipe_headers *dpipe_headers;
- struct list_head trap_list;
- struct list_head trap_group_list;
- struct list_head trap_policer_list;
- struct list_head linecard_list;
- struct mutex linecards_lock; /* protects linecard_list */
- const struct devlink_ops *ops;
- u64 features;
- struct xarray snapshot_ids;
- struct devlink_dev_stats stats;
- struct device *dev;
- possible_net_t _net;
- /* Serializes access to devlink instance specific objects such as
- * port, sb, dpipe, resource, params, region, traps and more.
- */
- struct mutex lock;
- struct lock_class_key lock_key;
- u8 reload_failed:1;
- refcount_t refcount;
- struct completion comp;
- struct rcu_head rcu;
- struct notifier_block netdevice_nb;
- char priv[] __aligned(NETDEV_ALIGN);
-};
-
-struct devlink_linecard_ops;
-struct devlink_linecard_type;
+#include "devl_internal.h"
struct devlink_linecard {
struct list_head list;
struct devlink *devlink;
unsigned int index;
- refcount_t refcount;
const struct devlink_linecard_ops *ops;
void *priv;
enum devlink_linecard_state state;
@@ -122,24 +76,6 @@ struct devlink_resource {
void *occ_get_priv;
};
-void *devlink_priv(struct devlink *devlink)
-{
- return &devlink->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_priv);
-
-struct devlink *priv_to_devlink(void *priv)
-{
- return container_of(priv, struct devlink, priv);
-}
-EXPORT_SYMBOL_GPL(priv_to_devlink);
-
-struct device *devlink_to_dev(const struct devlink *devlink)
-{
- return devlink->dev;
-}
-EXPORT_SYMBOL_GPL(devlink_to_dev);
-
static struct devlink_dpipe_field devlink_dpipe_fields_ethernet[] = {
{
.name = "destination mac",
@@ -207,176 +143,6 @@ static const struct nla_policy devlink_function_nl_policy[DEVLINK_PORT_FUNCTION_
NLA_POLICY_BITFIELD32(DEVLINK_PORT_FN_CAPS_VALID_MASK),
};
-static const struct nla_policy devlink_selftest_nl_policy[DEVLINK_ATTR_SELFTEST_ID_MAX + 1] = {
- [DEVLINK_ATTR_SELFTEST_ID_FLASH] = { .type = NLA_FLAG },
-};
-
-static DEFINE_XARRAY_FLAGS(devlinks, XA_FLAGS_ALLOC);
-#define DEVLINK_REGISTERED XA_MARK_1
-#define DEVLINK_UNREGISTERING XA_MARK_2
-
-/* devlink instances are open to the access from the user space after
- * devlink_register() call. Such logical barrier allows us to have certain
- * expectations related to locking.
- *
- * Before *_register() - we are in initialization stage and no parallel
- * access possible to the devlink instance. All drivers perform that phase
- * by implicitly holding device_lock.
- *
- * After *_register() - users and driver can access devlink instance at
- * the same time.
- */
-#define ASSERT_DEVLINK_REGISTERED(d) \
- WARN_ON_ONCE(!xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-#define ASSERT_DEVLINK_NOT_REGISTERED(d) \
- WARN_ON_ONCE(xa_get_mark(&devlinks, (d)->index, DEVLINK_REGISTERED))
-
-struct net *devlink_net(const struct devlink *devlink)
-{
- return read_pnet(&devlink->_net);
-}
-EXPORT_SYMBOL_GPL(devlink_net);
-
-static void __devlink_put_rcu(struct rcu_head *head)
-{
- struct devlink *devlink = container_of(head, struct devlink, rcu);
-
- complete(&devlink->comp);
-}
-
-void devlink_put(struct devlink *devlink)
-{
- if (refcount_dec_and_test(&devlink->refcount))
- /* Make sure unregister operation that may await the completion
- * is unblocked only after all users are after the end of
- * RCU grace period.
- */
- call_rcu(&devlink->rcu, __devlink_put_rcu);
-}
-
-struct devlink *__must_check devlink_try_get(struct devlink *devlink)
-{
- if (refcount_inc_not_zero(&devlink->refcount))
- return devlink;
- return NULL;
-}
-
-void devl_assert_locked(struct devlink *devlink)
-{
- lockdep_assert_held(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_assert_locked);
-
-#ifdef CONFIG_LOCKDEP
-/* For use in conjunction with LOCKDEP only e.g. rcu_dereference_protected() */
-bool devl_lock_is_held(struct devlink *devlink)
-{
- return lockdep_is_held(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_lock_is_held);
-#endif
-
-void devl_lock(struct devlink *devlink)
-{
- mutex_lock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_lock);
-
-int devl_trylock(struct devlink *devlink)
-{
- return mutex_trylock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_trylock);
-
-void devl_unlock(struct devlink *devlink)
-{
- mutex_unlock(&devlink->lock);
-}
-EXPORT_SYMBOL_GPL(devl_unlock);
-
-static struct devlink *
-devlinks_xa_find_get(struct net *net, unsigned long *indexp, xa_mark_t filter,
- void * (*xa_find_fn)(struct xarray *, unsigned long *,
- unsigned long, xa_mark_t))
-{
- struct devlink *devlink;
-
- rcu_read_lock();
-retry:
- devlink = xa_find_fn(&devlinks, indexp, ULONG_MAX, DEVLINK_REGISTERED);
- if (!devlink)
- goto unlock;
-
- /* In case devlink_unregister() was already called and "unregistering"
- * mark was set, do not allow to get a devlink reference here.
- * This prevents live-lock of devlink_unregister() wait for completion.
- */
- if (xa_get_mark(&devlinks, *indexp, DEVLINK_UNREGISTERING))
- goto retry;
-
- /* For a possible retry, the xa_find_after() should be always used */
- xa_find_fn = xa_find_after;
- if (!devlink_try_get(devlink))
- goto retry;
- if (!net_eq(devlink_net(devlink), net)) {
- devlink_put(devlink);
- goto retry;
- }
-unlock:
- rcu_read_unlock();
- return devlink;
-}
-
-static struct devlink *devlinks_xa_find_get_first(struct net *net,
- unsigned long *indexp,
- xa_mark_t filter)
-{
- return devlinks_xa_find_get(net, indexp, filter, xa_find);
-}
-
-static struct devlink *devlinks_xa_find_get_next(struct net *net,
- unsigned long *indexp,
- xa_mark_t filter)
-{
- return devlinks_xa_find_get(net, indexp, filter, xa_find_after);
-}
-
-/* Iterate over devlink pointers which were possible to get reference to.
- * devlink_put() needs to be called for each iterated devlink pointer
- * in loop body in order to release the reference.
- */
-#define devlinks_xa_for_each_get(net, index, devlink, filter) \
- for (index = 0, \
- devlink = devlinks_xa_find_get_first(net, &index, filter); \
- devlink; devlink = devlinks_xa_find_get_next(net, &index, filter))
-
-#define devlinks_xa_for_each_registered_get(net, index, devlink) \
- devlinks_xa_for_each_get(net, index, devlink, DEVLINK_REGISTERED)
-
-static struct devlink *devlink_get_from_attrs(struct net *net,
- struct nlattr **attrs)
-{
- struct devlink *devlink;
- unsigned long index;
- char *busname;
- char *devname;
-
- if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
- return ERR_PTR(-EINVAL);
-
- busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
- devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
-
- devlinks_xa_for_each_registered_get(net, index, devlink) {
- if (strcmp(devlink->dev->bus->name, busname) == 0 &&
- strcmp(dev_name(devlink->dev), devname) == 0)
- return devlink;
- devlink_put(devlink);
- }
-
- return ERR_PTR(-ENODEV);
-}
-
#define ASSERT_DEVLINK_PORT_REGISTERED(devlink_port) \
WARN_ON_ONCE(!(devlink_port)->registered)
#define ASSERT_DEVLINK_PORT_NOT_REGISTERED(devlink_port) \
@@ -390,8 +156,8 @@ static struct devlink_port *devlink_port_get_by_index(struct devlink *devlink,
return xa_load(&devlink->ports, port_index);
}
-static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
+struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
+ struct nlattr **attrs)
{
if (attrs[DEVLINK_ATTR_PORT_INDEX]) {
u32 port_index = nla_get_u32(attrs[DEVLINK_ATTR_PORT_INDEX]);
@@ -405,8 +171,8 @@ static struct devlink_port *devlink_port_get_from_attrs(struct devlink *devlink,
return ERR_PTR(-EINVAL);
}
-static struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
- struct genl_info *info)
+struct devlink_port *devlink_port_get_from_info(struct devlink *devlink,
+ struct genl_info *info)
{
return devlink_port_get_from_attrs(devlink, info->attrs);
}
@@ -466,13 +232,13 @@ devlink_rate_node_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return devlink_rate_node_get_by_name(devlink, rate_node_name);
}
-static struct devlink_rate *
+struct devlink_rate *
devlink_rate_node_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_rate_node_get_from_attrs(devlink, info->attrs);
}
-static struct devlink_rate *
+struct devlink_rate *
devlink_rate_get_from_info(struct devlink *devlink, struct genl_info *info)
{
struct nlattr **attrs = info->attrs;
@@ -511,11 +277,7 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
u32 linecard_index = nla_get_u32(attrs[DEVLINK_ATTR_LINECARD_INDEX]);
struct devlink_linecard *linecard;
- mutex_lock(&devlink->linecards_lock);
linecard = devlink_linecard_get_by_index(devlink, linecard_index);
- if (linecard)
- refcount_inc(&linecard->refcount);
- mutex_unlock(&devlink->linecards_lock);
if (!linecard)
return ERR_PTR(-ENODEV);
return linecard;
@@ -523,20 +285,12 @@ devlink_linecard_get_from_attrs(struct devlink *devlink, struct nlattr **attrs)
return ERR_PTR(-EINVAL);
}
-static struct devlink_linecard *
+struct devlink_linecard *
devlink_linecard_get_from_info(struct devlink *devlink, struct genl_info *info)
{
return devlink_linecard_get_from_attrs(devlink, info->attrs);
}
-static void devlink_linecard_put(struct devlink_linecard *linecard)
-{
- if (refcount_dec_and_test(&linecard->refcount)) {
- mutex_destroy(&linecard->state_lock);
- kfree(linecard);
- }
-}
-
struct devlink_sb {
struct list_head list;
unsigned int index;
@@ -838,104 +592,6 @@ devlink_region_snapshot_get_by_id(struct devlink_region *region, u32 id)
return NULL;
}
-#define DEVLINK_NL_FLAG_NEED_PORT BIT(0)
-#define DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT BIT(1)
-#define DEVLINK_NL_FLAG_NEED_RATE BIT(2)
-#define DEVLINK_NL_FLAG_NEED_RATE_NODE BIT(3)
-#define DEVLINK_NL_FLAG_NEED_LINECARD BIT(4)
-
-static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink_linecard *linecard;
- struct devlink_port *devlink_port;
- struct devlink *devlink;
- int err;
-
- devlink = devlink_get_from_attrs(genl_info_net(info), info->attrs);
- if (IS_ERR(devlink))
- return PTR_ERR(devlink);
- devl_lock(devlink);
- info->user_ptr[0] = devlink;
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
- devlink_port = devlink_port_get_from_info(devlink, info);
- if (IS_ERR(devlink_port)) {
- err = PTR_ERR(devlink_port);
- goto unlock;
- }
- info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
- devlink_port = devlink_port_get_from_info(devlink, info);
- if (!IS_ERR(devlink_port))
- info->user_ptr[1] = devlink_port;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
- struct devlink_rate *devlink_rate;
-
- devlink_rate = devlink_rate_get_from_info(devlink, info);
- if (IS_ERR(devlink_rate)) {
- err = PTR_ERR(devlink_rate);
- goto unlock;
- }
- info->user_ptr[1] = devlink_rate;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
- struct devlink_rate *rate_node;
-
- rate_node = devlink_rate_node_get_from_info(devlink, info);
- if (IS_ERR(rate_node)) {
- err = PTR_ERR(rate_node);
- goto unlock;
- }
- info->user_ptr[1] = rate_node;
- } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = devlink_linecard_get_from_info(devlink, info);
- if (IS_ERR(linecard)) {
- err = PTR_ERR(linecard);
- goto unlock;
- }
- info->user_ptr[1] = linecard;
- }
- return 0;
-
-unlock:
- devl_unlock(devlink);
- devlink_put(devlink);
- return err;
-}
-
-static void devlink_nl_post_doit(const struct genl_split_ops *ops,
- struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink_linecard *linecard;
- struct devlink *devlink;
-
- devlink = info->user_ptr[0];
- if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
- linecard = info->user_ptr[1];
- devlink_linecard_put(linecard);
- }
- devl_unlock(devlink);
- devlink_put(devlink);
-}
-
-static struct genl_family devlink_nl_family;
-
-enum devlink_multicast_groups {
- DEVLINK_MCGRP_CONFIG,
-};
-
-static const struct genl_multicast_group devlink_nl_mcgrps[] = {
- [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
-};
-
-static int devlink_nl_put_handle(struct sk_buff *msg, struct devlink *devlink)
-{
- if (nla_put_string(msg, DEVLINK_ATTR_BUS_NAME, devlink->dev->bus->name))
- return -EMSGSIZE;
- if (nla_put_string(msg, DEVLINK_ATTR_DEV_NAME, dev_name(devlink->dev)))
- return -EMSGSIZE;
- return 0;
-}
-
static int devlink_nl_put_nested_handle(struct sk_buff *msg, struct devlink *devlink)
{
struct nlattr *nested_attr;
@@ -972,185 +628,6 @@ size_t devlink_nl_port_handle_size(struct devlink_port *devlink_port)
+ nla_total_size(4); /* DEVLINK_ATTR_PORT_INDEX */
}
-struct devlink_reload_combination {
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
-};
-
-static const struct devlink_reload_combination devlink_reload_invalid_combinations[] = {
- {
- /* can't reinitialize driver with no down time */
- .action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- .limit = DEVLINK_RELOAD_LIMIT_NO_RESET,
- },
-};
-
-static bool
-devlink_reload_combination_is_invalid(enum devlink_reload_action action,
- enum devlink_reload_limit limit)
-{
- int i;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++)
- if (devlink_reload_invalid_combinations[i].action == action &&
- devlink_reload_invalid_combinations[i].limit == limit)
- return true;
- return false;
-}
-
-static bool
-devlink_reload_action_is_supported(struct devlink *devlink, enum devlink_reload_action action)
-{
- return test_bit(action, &devlink->ops->reload_actions);
-}
-
-static bool
-devlink_reload_limit_is_supported(struct devlink *devlink, enum devlink_reload_limit limit)
-{
- return test_bit(limit, &devlink->ops->reload_limits);
-}
-
-static int devlink_reload_stat_put(struct sk_buff *msg,
- enum devlink_reload_limit limit, u32 value)
-{
- struct nlattr *reload_stats_entry;
-
- reload_stats_entry = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS_ENTRY);
- if (!reload_stats_entry)
- return -EMSGSIZE;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_STATS_LIMIT, limit) ||
- nla_put_u32(msg, DEVLINK_ATTR_RELOAD_STATS_VALUE, value))
- goto nla_put_failure;
- nla_nest_end(msg, reload_stats_entry);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_entry);
- return -EMSGSIZE;
-}
-
-static int devlink_reload_stats_put(struct sk_buff *msg, struct devlink *devlink, bool is_remote)
-{
- struct nlattr *reload_stats_attr, *act_info, *act_stats;
- int i, j, stat_idx;
- u32 value;
-
- if (!is_remote)
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_STATS);
- else
- reload_stats_attr = nla_nest_start(msg, DEVLINK_ATTR_REMOTE_RELOAD_STATS);
-
- if (!reload_stats_attr)
- return -EMSGSIZE;
-
- for (i = 0; i <= DEVLINK_RELOAD_ACTION_MAX; i++) {
- if ((!is_remote &&
- !devlink_reload_action_is_supported(devlink, i)) ||
- i == DEVLINK_RELOAD_ACTION_UNSPEC)
- continue;
- act_info = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_INFO);
- if (!act_info)
- goto nla_put_failure;
-
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_ACTION, i))
- goto action_info_nest_cancel;
- act_stats = nla_nest_start(msg, DEVLINK_ATTR_RELOAD_ACTION_STATS);
- if (!act_stats)
- goto action_info_nest_cancel;
-
- for (j = 0; j <= DEVLINK_RELOAD_LIMIT_MAX; j++) {
- /* Remote stats are shown even if not locally supported.
- * Stats of actions with unspecified limit are shown
- * though drivers don't need to register unspecified
- * limit.
- */
- if ((!is_remote && j != DEVLINK_RELOAD_LIMIT_UNSPEC &&
- !devlink_reload_limit_is_supported(devlink, j)) ||
- devlink_reload_combination_is_invalid(i, j))
- continue;
-
- stat_idx = j * __DEVLINK_RELOAD_ACTION_MAX + i;
- if (!is_remote)
- value = devlink->stats.reload_stats[stat_idx];
- else
- value = devlink->stats.remote_reload_stats[stat_idx];
- if (devlink_reload_stat_put(msg, j, value))
- goto action_stats_nest_cancel;
- }
- nla_nest_end(msg, act_stats);
- nla_nest_end(msg, act_info);
- }
- nla_nest_end(msg, reload_stats_attr);
- return 0;
-
-action_stats_nest_cancel:
- nla_nest_cancel(msg, act_stats);
-action_info_nest_cancel:
- nla_nest_cancel(msg, act_info);
-nla_put_failure:
- nla_nest_cancel(msg, reload_stats_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct nlattr *dev_stats;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
- if (nla_put_u8(msg, DEVLINK_ATTR_RELOAD_FAILED, devlink->reload_failed))
- goto nla_put_failure;
-
- dev_stats = nla_nest_start(msg, DEVLINK_ATTR_DEV_STATS);
- if (!dev_stats)
- goto nla_put_failure;
-
- if (devlink_reload_stats_put(msg, devlink, false))
- goto dev_stats_nest_cancel;
- if (devlink_reload_stats_put(msg, devlink, true))
- goto dev_stats_nest_cancel;
-
- nla_nest_end(msg, dev_stats);
- genlmsg_end(msg, hdr);
- return 0;
-
-dev_stats_nest_cancel:
- nla_nest_cancel(msg, dev_stats);
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_notify(struct devlink *devlink, enum devlink_command cmd)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_NEW && cmd != DEVLINK_CMD_DEL);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_fill(msg, devlink, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
static int devlink_nl_port_attrs_put(struct sk_buff *msg,
struct devlink_port *devlink_port)
{
@@ -1333,13 +810,12 @@ static int devlink_port_fn_state_fill(const struct devlink_ops *ops,
}
if (!devlink_port_fn_state_valid(state)) {
WARN_ON_ONCE(1);
- NL_SET_ERR_MSG_MOD(extack, "Invalid state read from driver");
+ NL_SET_ERR_MSG(extack, "Invalid state read from driver");
return -EINVAL;
}
if (!devlink_port_fn_opstate_valid(opstate)) {
WARN_ON_ONCE(1);
- NL_SET_ERR_MSG_MOD(extack,
- "Invalid operational state read from driver");
+ NL_SET_ERR_MSG(extack, "Invalid operational state read from driver");
return -EINVAL;
}
if (nla_put_u8(msg, DEVLINK_PORT_FN_ATTR_STATE, state) ||
@@ -1537,47 +1013,40 @@ static void devlink_rate_notify(struct devlink_rate *devlink_rate,
0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int devlink_nl_cmd_rate_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_rate_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_rate *devlink_rate;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
- enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
- u32 id = NETLINK_CB(cb->skb).portid;
+ list_for_each_entry(devlink_rate, &devlink->rate_list, list) {
+ enum devlink_command cmd = DEVLINK_CMD_RATE_NEW;
+ u32 id = NETLINK_CB(cb->skb).portid;
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, NULL);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_rate_fill(msg, devlink_rate, cmd, id,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, NULL);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_rate_get = {
+ .dump_one = devlink_nl_cmd_rate_get_dump_one,
+};
+
static int devlink_nl_cmd_rate_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -1612,58 +1081,6 @@ devlink_rate_is_parent_node(struct devlink_rate *devlink_rate,
return false;
}
-static int devlink_nl_cmd_get_doit(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- info->snd_portid, info->snd_seq, 0);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start) {
- idx++;
- devlink_put(devlink);
- continue;
- }
-
- devl_lock(devlink);
- err = devlink_nl_fill(msg, devlink, DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
- devl_unlock(devlink);
- devlink_put(devlink);
-
- if (err)
- goto out;
- idx++;
- }
-out:
- cb->args[0] = idx;
- return msg->len;
-}
-
static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
@@ -1686,43 +1103,34 @@ static int devlink_nl_cmd_port_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_port_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_port_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_port *devlink_port;
- unsigned long index, port_index;
- int start = cb->args[0];
- int idx = 0;
- int err;
+ unsigned long port_index;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- xa_for_each(&devlink->ports, port_index, devlink_port) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_port_fill(msg, devlink_port,
- DEVLINK_CMD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI, cb->extack);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
- idx++;
+ xa_for_each_start(&devlink->ports, port_index, devlink_port, state->idx) {
+ err = devlink_nl_port_fill(msg, devlink_port,
+ DEVLINK_CMD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI, cb->extack);
+ if (err) {
+ state->idx = port_index;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_port_get = {
+ .dump_one = devlink_nl_cmd_port_get_dump_one,
+};
+
static int devlink_port_type_set(struct devlink_port *devlink_port,
enum devlink_port_type port_type)
@@ -1756,16 +1164,16 @@ static int devlink_port_function_hw_addr_set(struct devlink_port *port,
hw_addr = nla_data(attr);
hw_addr_len = nla_len(attr);
if (hw_addr_len > MAX_ADDR_LEN) {
- NL_SET_ERR_MSG_MOD(extack, "Port function hardware address too long");
+ NL_SET_ERR_MSG(extack, "Port function hardware address too long");
return -EINVAL;
}
if (port->type == DEVLINK_PORT_TYPE_ETH) {
if (hw_addr_len != ETH_ALEN) {
- NL_SET_ERR_MSG_MOD(extack, "Address must be 6 bytes for Ethernet device");
+ NL_SET_ERR_MSG(extack, "Address must be 6 bytes for Ethernet device");
return -EINVAL;
}
if (!is_unicast_ether_addr(hw_addr)) {
- NL_SET_ERR_MSG_MOD(extack, "Non-unicast hardware address unsupported");
+ NL_SET_ERR_MSG(extack, "Non-unicast hardware address unsupported");
return -EINVAL;
}
}
@@ -1841,7 +1249,7 @@ static int devlink_port_function_set(struct devlink_port *port,
err = nla_parse_nested(tb, DEVLINK_PORT_FUNCTION_ATTR_MAX, attr,
devlink_function_nl_policy, extack);
if (err < 0) {
- NL_SET_ERR_MSG_MOD(extack, "Fail to parse port function attributes");
+ NL_SET_ERR_MSG(extack, "Fail to parse port function attributes");
return err;
}
@@ -1920,14 +1328,14 @@ static int devlink_nl_cmd_port_split_doit(struct sk_buff *skb,
if (!devlink_port->attrs.splittable) {
/* Split ports cannot be split. */
if (devlink_port->attrs.split)
- NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split further");
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split further");
else
- NL_SET_ERR_MSG_MOD(info->extack, "Port cannot be split");
+ NL_SET_ERR_MSG(info->extack, "Port cannot be split");
return -EINVAL;
}
if (count < 2 || !is_power_of_2(count) || count > devlink_port->attrs.lanes) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid split count");
+ NL_SET_ERR_MSG(info->extack, "Invalid split count");
return -EINVAL;
}
@@ -1991,7 +1399,7 @@ static int devlink_nl_cmd_port_new_doit(struct sk_buff *skb,
if (!info->attrs[DEVLINK_ATTR_PORT_FLAVOUR] ||
!info->attrs[DEVLINK_ATTR_PORT_PCI_PF_NUMBER]) {
- NL_SET_ERR_MSG_MOD(extack, "Port flavour or PCI PF are not specified");
+ NL_SET_ERR_MSG(extack, "Port flavour or PCI PF are not specified");
return -EINVAL;
}
new_attrs.flavour = nla_get_u16(info->attrs[DEVLINK_ATTR_PORT_FLAVOUR]);
@@ -2039,7 +1447,7 @@ static int devlink_nl_cmd_port_del_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_PORT_INDEX)) {
- NL_SET_ERR_MSG_MOD(extack, "Port index is not specified");
+ NL_SET_ERR_MSG(extack, "Port index is not specified");
return -EINVAL;
}
port_index = nla_get_u32(info->attrs[DEVLINK_ATTR_PORT_INDEX]);
@@ -2081,13 +1489,13 @@ devlink_nl_rate_parent_node_set(struct devlink_rate *devlink_rate,
return -ENODEV;
if (parent == devlink_rate) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent to self is not allowed");
+ NL_SET_ERR_MSG(info->extack, "Parent to self is not allowed");
return -EINVAL;
}
if (devlink_rate_is_node(devlink_rate) &&
devlink_rate_is_parent_node(devlink_rate, parent->parent)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Node is already a parent of parent node.");
+ NL_SET_ERR_MSG(info->extack, "Node is already a parent of parent node.");
return -EEXIST;
}
@@ -2196,16 +1604,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
if (type == DEVLINK_RATE_TYPE_LEAF) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_leaf_tx_share_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_leaf_tx_max_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
!ops->rate_leaf_parent_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the leafs");
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the leafs");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_leaf_tx_priority_set) {
@@ -2222,16 +1630,16 @@ static bool devlink_rate_set_ops_supported(const struct devlink_ops *ops,
}
} else if (type == DEVLINK_RATE_TYPE_NODE) {
if (attrs[DEVLINK_ATTR_RATE_TX_SHARE] && !ops->rate_node_tx_share_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX share set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "TX share set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_MAX] && !ops->rate_node_tx_max_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "TX max set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "TX max set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_PARENT_NODE_NAME] &&
!ops->rate_node_parent_set) {
- NL_SET_ERR_MSG_MOD(info->extack, "Parent set isn't supported for the nodes");
+ NL_SET_ERR_MSG(info->extack, "Parent set isn't supported for the nodes");
return false;
}
if (attrs[DEVLINK_ATTR_RATE_TX_PRIORITY] && !ops->rate_node_tx_priority_set) {
@@ -2282,7 +1690,7 @@ static int devlink_nl_cmd_rate_new_doit(struct sk_buff *skb,
ops = devlink->ops;
if (!ops || !ops->rate_node_new || !ops->rate_node_del) {
- NL_SET_ERR_MSG_MOD(info->extack, "Rate nodes aren't supported");
+ NL_SET_ERR_MSG(info->extack, "Rate nodes aren't supported");
return -EOPNOTSUPP;
}
@@ -2338,7 +1746,7 @@ static int devlink_nl_cmd_rate_del_doit(struct sk_buff *skb,
int err;
if (refcount_read(&rate_node->refcnt) > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "Node has children. Cannot delete node.");
+ NL_SET_ERR_MSG(info->extack, "Node has children. Cannot delete node.");
return -EBUSY;
}
@@ -2465,46 +1873,42 @@ static int devlink_nl_cmd_linecard_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_linecard_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int devlink_nl_cmd_linecard_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_linecard *linecard;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- mutex_lock(&devlink->linecards_lock);
- list_for_each_entry(linecard, &devlink->linecard_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- mutex_lock(&linecard->state_lock);
- err = devlink_nl_linecard_fill(msg, devlink, linecard,
- DEVLINK_CMD_LINECARD_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI,
- cb->extack);
- mutex_unlock(&linecard->state_lock);
- if (err) {
- mutex_unlock(&devlink->linecards_lock);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(linecard, &devlink->linecard_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
}
- mutex_unlock(&devlink->linecards_lock);
- devlink_put(devlink);
+ mutex_lock(&linecard->state_lock);
+ err = devlink_nl_linecard_fill(msg, devlink, linecard,
+ DEVLINK_CMD_LINECARD_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI,
+ cb->extack);
+ mutex_unlock(&linecard->state_lock);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_linecard_get = {
+ .dump_one = devlink_nl_cmd_linecard_get_dump_one,
+};
+
static struct devlink_linecard_type *
devlink_linecard_type_lookup(struct devlink_linecard *linecard,
const char *type)
@@ -2530,26 +1934,26 @@ static int devlink_linecard_type_set(struct devlink_linecard *linecard,
mutex_lock(&linecard->state_lock);
if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
err = -EBUSY;
goto out;
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
err = -EBUSY;
goto out;
}
linecard_type = devlink_linecard_type_lookup(linecard, type);
if (!linecard_type) {
- NL_SET_ERR_MSG_MOD(extack, "Unsupported line card type provided");
+ NL_SET_ERR_MSG(extack, "Unsupported line card type provided");
err = -EINVAL;
goto out;
}
if (linecard->state != DEVLINK_LINECARD_STATE_UNPROVISIONED &&
linecard->state != DEVLINK_LINECARD_STATE_PROVISIONING_FAILED) {
- NL_SET_ERR_MSG_MOD(extack, "Line card already provisioned");
+ NL_SET_ERR_MSG(extack, "Line card already provisioned");
err = -EBUSY;
/* Check if the line card is provisioned in the same
* way the user asks. In case it is, make the operation
@@ -2593,12 +1997,12 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
mutex_lock(&linecard->state_lock);
if (linecard->state == DEVLINK_LINECARD_STATE_PROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being provisioned");
err = -EBUSY;
goto out;
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONING) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is currently being unprovisioned");
+ NL_SET_ERR_MSG(extack, "Line card is currently being unprovisioned");
err = -EBUSY;
goto out;
}
@@ -2611,7 +2015,7 @@ static int devlink_linecard_type_unset(struct devlink_linecard *linecard,
}
if (linecard->state == DEVLINK_LINECARD_STATE_UNPROVISIONED) {
- NL_SET_ERR_MSG_MOD(extack, "Line card is not provisioned");
+ NL_SET_ERR_MSG(extack, "Line card is not provisioned");
err = 0;
goto out;
}
@@ -2727,43 +2131,39 @@ static int devlink_nl_cmd_sb_get_doit(struct sk_buff *skb,
return genlmsg_reply(msg, info);
}
-static int devlink_nl_cmd_sb_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
- DEVLINK_CMD_SB_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_sb_fill(msg, devlink, devlink_sb,
+ DEVLINK_CMD_SB_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_get = {
+ .dump_one = devlink_nl_cmd_sb_get_dump_one,
+};
+
static int devlink_nl_sb_pool_fill(struct sk_buff *msg, struct devlink *devlink,
struct devlink_sb *devlink_sb,
u16 pool_index, enum devlink_command cmd,
@@ -2869,46 +2269,39 @@ static int __sb_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
return 0;
}
-static int devlink_nl_cmd_sb_pool_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
int err = 0;
+ int idx = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_pool_get)
- goto retry;
+ if (!devlink->ops->sb_pool_get)
+ return 0;
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_pool_get_dumpit(msg, start, &idx, devlink,
- devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_pool_get = {
+ .dump_one = devlink_nl_cmd_sb_pool_get_dump_one,
+};
+
static int devlink_sb_pool_set(struct devlink *devlink, unsigned int sb_index,
u16 pool_index, u32 size,
enum devlink_sb_threshold_type threshold_type,
@@ -3084,46 +2477,39 @@ static int __sb_port_pool_get_dumpit(struct sk_buff *msg, int start, int *p_idx,
return 0;
}
-static int devlink_nl_cmd_sb_port_pool_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_sb_port_pool_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_port_pool_get)
- goto retry;
-
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_port_pool_get_dumpit(msg, start, &idx,
- devlink, devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ if (!devlink->ops->sb_port_pool_get)
+ return 0;
+
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_port_pool_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_port_pool_get = {
+ .dump_one = devlink_nl_cmd_sb_port_pool_get_dump_one,
+};
+
static int devlink_sb_port_pool_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 pool_index,
u32 threshold,
@@ -3327,47 +2713,38 @@ static int __sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
}
static int
-devlink_nl_cmd_sb_tc_pool_bind_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+devlink_nl_cmd_sb_tc_pool_bind_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- struct devlink *devlink;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_sb *devlink_sb;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (!devlink->ops->sb_tc_pool_bind_get)
- goto retry;
+ if (!devlink->ops->sb_tc_pool_bind_get)
+ return 0;
- devl_lock(devlink);
- list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
- err = __sb_tc_pool_bind_get_dumpit(msg, start, &idx,
- devlink,
- devlink_sb,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(devlink_sb, &devlink->sb_list, list) {
+ err = __sb_tc_pool_bind_get_dumpit(msg, state->idx, &idx,
+ devlink, devlink_sb,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
-retry:
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_sb_tc_pool_bind_get = {
+ .dump_one = devlink_nl_cmd_sb_tc_pool_bind_get_dump_one,
+};
+
static int devlink_sb_tc_pool_bind_set(struct devlink_port *devlink_port,
unsigned int sb_index, u16 tc_index,
enum devlink_sb_pool_type pool_type,
@@ -3455,142 +2832,19 @@ static int devlink_nl_cmd_sb_occ_max_clear_doit(struct sk_buff *skb,
return -EOPNOTSUPP;
}
-static int devlink_nl_eswitch_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- void *hdr;
- int err = 0;
- u16 mode;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = devlink_nl_put_handle(msg, devlink);
- if (err)
- goto nla_put_failure;
-
- if (ops->eswitch_mode_get) {
- err = ops->eswitch_mode_get(devlink, &mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u16(msg, DEVLINK_ATTR_ESWITCH_MODE, mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_inline_mode_get) {
- err = ops->eswitch_inline_mode_get(devlink, &inline_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_INLINE_MODE,
- inline_mode);
- if (err)
- goto nla_put_failure;
- }
-
- if (ops->eswitch_encap_mode_get) {
- err = ops->eswitch_encap_mode_get(devlink, &encap_mode);
- if (err)
- goto nla_put_failure;
- err = nla_put_u8(msg, DEVLINK_ATTR_ESWITCH_ENCAP_MODE, encap_mode);
- if (err)
- goto nla_put_failure;
- }
-
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_eswitch_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_eswitch_fill(msg, devlink, DEVLINK_CMD_ESWITCH_GET,
- info->snd_portid, info->snd_seq, 0);
-
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
- struct netlink_ext_ack *extack)
+int devlink_rate_nodes_check(struct devlink *devlink, u16 mode,
+ struct netlink_ext_ack *extack)
{
struct devlink_rate *devlink_rate;
list_for_each_entry(devlink_rate, &devlink->rate_list, list)
if (devlink_rate_is_node(devlink_rate)) {
- NL_SET_ERR_MSG_MOD(extack, "Rate node(s) exists.");
+ NL_SET_ERR_MSG(extack, "Rate node(s) exists.");
return -EBUSY;
}
return 0;
}
-static int devlink_nl_cmd_eswitch_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- const struct devlink_ops *ops = devlink->ops;
- enum devlink_eswitch_encap_mode encap_mode;
- u8 inline_mode;
- int err = 0;
- u16 mode;
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_MODE]) {
- if (!ops->eswitch_mode_set)
- return -EOPNOTSUPP;
- mode = nla_get_u16(info->attrs[DEVLINK_ATTR_ESWITCH_MODE]);
- err = devlink_rate_nodes_check(devlink, mode, info->extack);
- if (err)
- return err;
- err = ops->eswitch_mode_set(devlink, mode, info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]) {
- if (!ops->eswitch_inline_mode_set)
- return -EOPNOTSUPP;
- inline_mode = nla_get_u8(
- info->attrs[DEVLINK_ATTR_ESWITCH_INLINE_MODE]);
- err = ops->eswitch_inline_mode_set(devlink, inline_mode,
- info->extack);
- if (err)
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]) {
- if (!ops->eswitch_encap_mode_set)
- return -EOPNOTSUPP;
- encap_mode = nla_get_u8(info->attrs[DEVLINK_ATTR_ESWITCH_ENCAP_MODE]);
- err = ops->eswitch_encap_mode_set(devlink, encap_mode,
- info->extack);
- if (err)
- return err;
- }
-
- return 0;
-}
-
int devlink_dpipe_match_put(struct sk_buff *skb,
struct devlink_dpipe_match *match)
{
@@ -4351,18 +3605,18 @@ devlink_resource_validate_size(struct devlink_resource *resource, u64 size,
int err = 0;
if (size > resource->size_params.size_max) {
- NL_SET_ERR_MSG_MOD(extack, "Size larger than maximum");
+ NL_SET_ERR_MSG(extack, "Size larger than maximum");
err = -EINVAL;
}
if (size < resource->size_params.size_min) {
- NL_SET_ERR_MSG_MOD(extack, "Size smaller than minimum");
+ NL_SET_ERR_MSG(extack, "Size smaller than minimum");
err = -EINVAL;
}
div64_u64_rem(size, resource->size_params.size_granularity, &reminder);
if (reminder) {
- NL_SET_ERR_MSG_MOD(extack, "Wrong granularity");
+ NL_SET_ERR_MSG(extack, "Wrong granularity");
err = -EINVAL;
}
@@ -4561,10 +3815,9 @@ static int devlink_nl_cmd_resource_dump(struct sk_buff *skb,
return devlink_resource_fill(info, DEVLINK_CMD_RESOURCE_DUMP, 0);
}
-static int
-devlink_resources_validate(struct devlink *devlink,
- struct devlink_resource *resource,
- struct genl_info *info)
+int devlink_resources_validate(struct devlink *devlink,
+ struct devlink_resource *resource,
+ struct genl_info *info)
{
struct list_head *resource_list;
int err = 0;
@@ -4584,743 +3837,6 @@ devlink_resources_validate(struct devlink *devlink,
return err;
}
-static struct net *devlink_netns_get(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *netns_pid_attr = info->attrs[DEVLINK_ATTR_NETNS_PID];
- struct nlattr *netns_fd_attr = info->attrs[DEVLINK_ATTR_NETNS_FD];
- struct nlattr *netns_id_attr = info->attrs[DEVLINK_ATTR_NETNS_ID];
- struct net *net;
-
- if (!!netns_pid_attr + !!netns_fd_attr + !!netns_id_attr > 1) {
- NL_SET_ERR_MSG_MOD(info->extack, "multiple netns identifying attributes specified");
- return ERR_PTR(-EINVAL);
- }
-
- if (netns_pid_attr) {
- net = get_net_ns_by_pid(nla_get_u32(netns_pid_attr));
- } else if (netns_fd_attr) {
- net = get_net_ns_by_fd(nla_get_u32(netns_fd_attr));
- } else if (netns_id_attr) {
- net = get_net_ns_by_id(sock_net(skb->sk),
- nla_get_u32(netns_id_attr));
- if (!net)
- net = ERR_PTR(-EINVAL);
- } else {
- WARN_ON(1);
- net = ERR_PTR(-EINVAL);
- }
- if (IS_ERR(net)) {
- NL_SET_ERR_MSG_MOD(info->extack, "Unknown network namespace");
- return ERR_PTR(-EINVAL);
- }
- if (!netlink_ns_capable(skb, net->user_ns, CAP_NET_ADMIN)) {
- put_net(net);
- return ERR_PTR(-EPERM);
- }
- return net;
-}
-
-static void devlink_param_notify(struct devlink *devlink,
- unsigned int port_index,
- struct devlink_param_item *param_item,
- enum devlink_command cmd);
-
-static void devlink_ns_change_notify(struct devlink *devlink,
- struct net *dest_net, struct net *curr_net,
- bool new)
-{
- struct devlink_param_item *param_item;
- enum devlink_command cmd;
-
- /* Userspace needs to be notified about devlink objects
- * removed from original and entering new network namespace.
- * The rest of the devlink objects are re-created during
- * reload process so the notifications are generated separatelly.
- */
-
- if (!dest_net || net_eq(dest_net, curr_net))
- return;
-
- if (new)
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-
- cmd = new ? DEVLINK_CMD_PARAM_NEW : DEVLINK_CMD_PARAM_DEL;
- list_for_each_entry(param_item, &devlink->param_list, list)
- devlink_param_notify(devlink, 0, param_item, cmd);
-
- if (!new)
- devlink_notify(devlink, DEVLINK_CMD_DEL);
-}
-
-static bool devlink_reload_supported(const struct devlink_ops *ops)
-{
- return ops->reload_down && ops->reload_up;
-}
-
-static void devlink_reload_failed_set(struct devlink *devlink,
- bool reload_failed)
-{
- if (devlink->reload_failed == reload_failed)
- return;
- devlink->reload_failed = reload_failed;
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-bool devlink_is_reload_failed(const struct devlink *devlink)
-{
- return devlink->reload_failed;
-}
-EXPORT_SYMBOL_GPL(devlink_is_reload_failed);
-
-static void
-__devlink_reload_stats_update(struct devlink *devlink, u32 *reload_stats,
- enum devlink_reload_limit limit, u32 actions_performed)
-{
- unsigned long actions = actions_performed;
- int stat_idx;
- int action;
-
- for_each_set_bit(action, &actions, __DEVLINK_RELOAD_ACTION_MAX) {
- stat_idx = limit * __DEVLINK_RELOAD_ACTION_MAX + action;
- reload_stats[stat_idx]++;
- }
- devlink_notify(devlink, DEVLINK_CMD_NEW);
-}
-
-static void
-devlink_reload_stats_update(struct devlink *devlink, enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- __devlink_reload_stats_update(devlink, devlink->stats.reload_stats, limit,
- actions_performed);
-}
-
-/**
- * devlink_remote_reload_actions_performed - Update devlink on reload actions
- * performed which are not a direct result of devlink reload call.
- *
- * This should be called by a driver after performing reload actions in case it was not
- * a result of devlink reload call. For example fw_activate was performed as a result
- * of devlink reload triggered fw_activate on another host.
- * The motivation for this function is to keep data on reload actions performed on this
- * function whether it was done due to direct devlink reload call or not.
- *
- * @devlink: devlink
- * @limit: reload limit
- * @actions_performed: bitmask of actions performed
- */
-void devlink_remote_reload_actions_performed(struct devlink *devlink,
- enum devlink_reload_limit limit,
- u32 actions_performed)
-{
- if (WARN_ON(!actions_performed ||
- actions_performed & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- actions_performed >= BIT(__DEVLINK_RELOAD_ACTION_MAX) ||
- limit > DEVLINK_RELOAD_LIMIT_MAX))
- return;
-
- __devlink_reload_stats_update(devlink, devlink->stats.remote_reload_stats, limit,
- actions_performed);
-}
-EXPORT_SYMBOL_GPL(devlink_remote_reload_actions_performed);
-
-static int devlink_reload(struct devlink *devlink, struct net *dest_net,
- enum devlink_reload_action action, enum devlink_reload_limit limit,
- u32 *actions_performed, struct netlink_ext_ack *extack)
-{
- u32 remote_reload_stats[DEVLINK_RELOAD_STATS_ARRAY_SIZE];
- struct net *curr_net;
- int err;
-
- memcpy(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats));
-
- curr_net = devlink_net(devlink);
- devlink_ns_change_notify(devlink, dest_net, curr_net, false);
- err = devlink->ops->reload_down(devlink, !!dest_net, action, limit, extack);
- if (err)
- return err;
-
- if (dest_net && !net_eq(dest_net, curr_net)) {
- move_netdevice_notifier_net(curr_net, dest_net,
- &devlink->netdevice_nb);
- write_pnet(&devlink->_net, dest_net);
- }
-
- err = devlink->ops->reload_up(devlink, action, limit, actions_performed, extack);
- devlink_reload_failed_set(devlink, !!err);
- if (err)
- return err;
-
- devlink_ns_change_notify(devlink, dest_net, curr_net, true);
- WARN_ON(!(*actions_performed & BIT(action)));
- /* Catch driver on updating the remote action within devlink reload */
- WARN_ON(memcmp(remote_reload_stats, devlink->stats.remote_reload_stats,
- sizeof(remote_reload_stats)));
- devlink_reload_stats_update(devlink, limit, *actions_performed);
- return 0;
-}
-
-static int
-devlink_nl_reload_actions_performed_snd(struct devlink *devlink, u32 actions_performed,
- enum devlink_command cmd, struct genl_info *info)
-{
- struct sk_buff *msg;
- void *hdr;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &devlink_nl_family, 0, cmd);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (nla_put_bitfield32(msg, DEVLINK_ATTR_RELOAD_ACTIONS_PERFORMED, actions_performed,
- actions_performed))
- goto nla_put_failure;
- genlmsg_end(msg, hdr);
-
- return genlmsg_reply(msg, info);
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_reload(struct sk_buff *skb, struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- enum devlink_reload_action action;
- enum devlink_reload_limit limit;
- struct net *dest_net = NULL;
- u32 actions_performed;
- int err;
-
- if (!(devlink->features & DEVLINK_F_RELOAD))
- return -EOPNOTSUPP;
-
- err = devlink_resources_validate(devlink, NULL, info);
- if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "resources size validation failed");
- return err;
- }
-
- if (info->attrs[DEVLINK_ATTR_RELOAD_ACTION])
- action = nla_get_u8(info->attrs[DEVLINK_ATTR_RELOAD_ACTION]);
- else
- action = DEVLINK_RELOAD_ACTION_DRIVER_REINIT;
-
- if (!devlink_reload_action_is_supported(devlink, action)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested reload action is not supported by the driver");
- return -EOPNOTSUPP;
- }
-
- limit = DEVLINK_RELOAD_LIMIT_UNSPEC;
- if (info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]) {
- struct nla_bitfield32 limits;
- u32 limits_selected;
-
- limits = nla_get_bitfield32(info->attrs[DEVLINK_ATTR_RELOAD_LIMITS]);
- limits_selected = limits.value & limits.selector;
- if (!limits_selected) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid limit selected");
- return -EINVAL;
- }
- for (limit = 0 ; limit <= DEVLINK_RELOAD_LIMIT_MAX ; limit++)
- if (limits_selected & BIT(limit))
- break;
- /* UAPI enables multiselection, but currently it is not used */
- if (limits_selected != BIT(limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Multiselection of limit is not supported");
- return -EOPNOTSUPP;
- }
- if (!devlink_reload_limit_is_supported(devlink, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is not supported by the driver");
- return -EOPNOTSUPP;
- }
- if (devlink_reload_combination_is_invalid(action, limit)) {
- NL_SET_ERR_MSG_MOD(info->extack,
- "Requested limit is invalid for this action");
- return -EINVAL;
- }
- }
- if (info->attrs[DEVLINK_ATTR_NETNS_PID] ||
- info->attrs[DEVLINK_ATTR_NETNS_FD] ||
- info->attrs[DEVLINK_ATTR_NETNS_ID]) {
- dest_net = devlink_netns_get(skb, info);
- if (IS_ERR(dest_net))
- return PTR_ERR(dest_net);
- }
-
- err = devlink_reload(devlink, dest_net, action, limit, &actions_performed, info->extack);
-
- if (dest_net)
- put_net(dest_net);
-
- if (err)
- return err;
- /* For backward compatibility generate reply only if attributes used by user */
- if (!info->attrs[DEVLINK_ATTR_RELOAD_ACTION] && !info->attrs[DEVLINK_ATTR_RELOAD_LIMITS])
- return 0;
-
- return devlink_nl_reload_actions_performed_snd(devlink, actions_performed,
- DEVLINK_CMD_RELOAD, info);
-}
-
-static int devlink_nl_flash_update_fill(struct sk_buff *msg,
- struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- void *hdr;
-
- hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto nla_put_failure;
-
- if (cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS)
- goto out;
-
- if (params->status_msg &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_MSG,
- params->status_msg))
- goto nla_put_failure;
- if (params->component &&
- nla_put_string(msg, DEVLINK_ATTR_FLASH_UPDATE_COMPONENT,
- params->component))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_DONE,
- params->done, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TOTAL,
- params->total, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_FLASH_UPDATE_STATUS_TIMEOUT,
- params->timeout, DEVLINK_ATTR_PAD))
- goto nla_put_failure;
-
-out:
- genlmsg_end(msg, hdr);
- return 0;
-
-nla_put_failure:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void __devlink_flash_update_notify(struct devlink *devlink,
- enum devlink_command cmd,
- struct devlink_flash_notify *params)
-{
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_FLASH_UPDATE &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_END &&
- cmd != DEVLINK_CMD_FLASH_UPDATE_STATUS);
-
- if (!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED))
- return;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_flash_update_fill(msg, devlink, cmd, params);
- if (err)
- goto out_free_msg;
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
- msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
- return;
-
-out_free_msg:
- nlmsg_free(msg);
-}
-
-static void devlink_flash_update_begin_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE,
- &params);
-}
-
-static void devlink_flash_update_end_notify(struct devlink *devlink)
-{
- struct devlink_flash_notify params = {};
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_END,
- &params);
-}
-
-void devlink_flash_update_status_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long done,
- unsigned long total)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .done = done,
- .total = total,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_status_notify);
-
-void devlink_flash_update_timeout_notify(struct devlink *devlink,
- const char *status_msg,
- const char *component,
- unsigned long timeout)
-{
- struct devlink_flash_notify params = {
- .status_msg = status_msg,
- .component = component,
- .timeout = timeout,
- };
-
- __devlink_flash_update_notify(devlink,
- DEVLINK_CMD_FLASH_UPDATE_STATUS,
- &params);
-}
-EXPORT_SYMBOL_GPL(devlink_flash_update_timeout_notify);
-
-struct devlink_info_req {
- struct sk_buff *msg;
- void (*version_cb)(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv);
- void *version_cb_priv;
-};
-
-struct devlink_flash_component_lookup_ctx {
- const char *lookup_name;
- bool lookup_name_found;
-};
-
-static void
-devlink_flash_component_lookup_cb(const char *version_name,
- enum devlink_info_version_type version_type,
- void *version_cb_priv)
-{
- struct devlink_flash_component_lookup_ctx *lookup_ctx = version_cb_priv;
-
- if (version_type != DEVLINK_INFO_VERSION_TYPE_COMPONENT ||
- lookup_ctx->lookup_name_found)
- return;
-
- lookup_ctx->lookup_name_found =
- !strcmp(lookup_ctx->lookup_name, version_name);
-}
-
-static int devlink_flash_component_get(struct devlink *devlink,
- struct nlattr *nla_component,
- const char **p_component,
- struct netlink_ext_ack *extack)
-{
- struct devlink_flash_component_lookup_ctx lookup_ctx = {};
- struct devlink_info_req req = {};
- const char *component;
- int ret;
-
- if (!nla_component)
- return 0;
-
- component = nla_data(nla_component);
-
- if (!devlink->ops->info_get) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "component update is not supported by this device");
- return -EOPNOTSUPP;
- }
-
- lookup_ctx.lookup_name = component;
- req.version_cb = devlink_flash_component_lookup_cb;
- req.version_cb_priv = &lookup_ctx;
-
- ret = devlink->ops->info_get(devlink, &req, NULL);
- if (ret)
- return ret;
-
- if (!lookup_ctx.lookup_name_found) {
- NL_SET_ERR_MSG_ATTR(extack, nla_component,
- "selected component is not supported by this device");
- return -EINVAL;
- }
- *p_component = component;
- return 0;
-}
-
-static int devlink_nl_cmd_flash_update(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *nla_overwrite_mask, *nla_file_name;
- struct devlink_flash_update_params params = {};
- struct devlink *devlink = info->user_ptr[0];
- const char *file_name;
- u32 supported_params;
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME))
- return -EINVAL;
-
- ret = devlink_flash_component_get(devlink,
- info->attrs[DEVLINK_ATTR_FLASH_UPDATE_COMPONENT],
- &params.component, info->extack);
- if (ret)
- return ret;
-
- supported_params = devlink->ops->supported_flash_update_params;
-
- nla_overwrite_mask = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK];
- if (nla_overwrite_mask) {
- struct nla_bitfield32 sections;
-
- if (!(supported_params & DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK)) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_overwrite_mask,
- "overwrite settings are not supported by this device");
- return -EOPNOTSUPP;
- }
- sections = nla_get_bitfield32(nla_overwrite_mask);
- params.overwrite_mask = sections.value & sections.selector;
- }
-
- nla_file_name = info->attrs[DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME];
- file_name = nla_data(nla_file_name);
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret) {
- NL_SET_ERR_MSG_ATTR(info->extack, nla_file_name, "failed to locate the requested firmware file");
- return ret;
- }
-
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, info->extack);
- devlink_flash_update_end_notify(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
-static int
-devlink_nl_selftests_fill(struct sk_buff *msg, struct devlink *devlink,
- u32 portid, u32 seq, int flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *selftests;
- void *hdr;
- int err;
- int i;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags,
- DEVLINK_CMD_SELFTESTS_GET);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto err_cancel_msg;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- if (devlink->ops->selftest_check(devlink, i, extack)) {
- err = nla_put_flag(msg, i);
- if (err)
- goto err_cancel_msg;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_selftests_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- if (!devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_selftests_fill(msg, devlink, info->snd_portid,
- info->snd_seq, 0, info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_selftests_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start || !devlink->ops->selftest_check)
- goto inc;
-
- devl_lock(devlink);
- err = devlink_nl_selftests_fill(msg, devlink,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- devl_unlock(devlink);
- if (err) {
- devlink_put(devlink);
- break;
- }
-inc:
- idx++;
- devlink_put(devlink);
- }
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
- return msg->len;
-}
-
-static int devlink_selftest_result_put(struct sk_buff *skb, unsigned int id,
- enum devlink_selftest_status test_status)
-{
- struct nlattr *result_attr;
-
- result_attr = nla_nest_start(skb, DEVLINK_ATTR_SELFTEST_RESULT);
- if (!result_attr)
- return -EMSGSIZE;
-
- if (nla_put_u32(skb, DEVLINK_ATTR_SELFTEST_RESULT_ID, id) ||
- nla_put_u8(skb, DEVLINK_ATTR_SELFTEST_RESULT_STATUS,
- test_status))
- goto nla_put_failure;
-
- nla_nest_end(skb, result_attr);
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(skb, result_attr);
- return -EMSGSIZE;
-}
-
-static int devlink_nl_cmd_selftests_run(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct nlattr *tb[DEVLINK_ATTR_SELFTEST_ID_MAX + 1];
- struct devlink *devlink = info->user_ptr[0];
- struct nlattr *attrs, *selftests;
- struct sk_buff *msg;
- void *hdr;
- int err;
- int i;
-
- if (!devlink->ops->selftest_run || !devlink->ops->selftest_check)
- return -EOPNOTSUPP;
-
- if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_SELFTESTS))
- return -EINVAL;
-
- attrs = info->attrs[DEVLINK_ATTR_SELFTESTS];
-
- err = nla_parse_nested(tb, DEVLINK_ATTR_SELFTEST_ID_MAX, attrs,
- devlink_selftest_nl_policy, info->extack);
- if (err < 0)
- return err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = -EMSGSIZE;
- hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq,
- &devlink_nl_family, 0, DEVLINK_CMD_SELFTESTS_RUN);
- if (!hdr)
- goto free_msg;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- selftests = nla_nest_start(msg, DEVLINK_ATTR_SELFTESTS);
- if (!selftests)
- goto genlmsg_cancel;
-
- for (i = DEVLINK_ATTR_SELFTEST_ID_UNSPEC + 1;
- i <= DEVLINK_ATTR_SELFTEST_ID_MAX; i++) {
- enum devlink_selftest_status test_status;
-
- if (nla_get_flag(tb[i])) {
- if (!devlink->ops->selftest_check(devlink, i,
- info->extack)) {
- if (devlink_selftest_result_put(msg, i,
- DEVLINK_SELFTEST_STATUS_SKIP))
- goto selftests_nest_cancel;
- continue;
- }
-
- test_status = devlink->ops->selftest_run(devlink, i,
- info->extack);
- if (devlink_selftest_result_put(msg, i, test_status))
- goto selftests_nest_cancel;
- }
- }
-
- nla_nest_end(msg, selftests);
- genlmsg_end(msg, hdr);
- return genlmsg_reply(msg, info);
-
-selftests_nest_cancel:
- nla_nest_cancel(msg, selftests);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
-free_msg:
- nlmsg_free(msg);
- return err;
-}
-
static const struct devlink_param devlink_param_generic[] = {
{
.id = DEVLINK_PARAM_GENERIC_ID_INT_ERR_RESET,
@@ -5437,26 +3953,22 @@ static int devlink_param_driver_verify(const struct devlink_param *param)
}
static struct devlink_param_item *
-devlink_param_find_by_name(struct list_head *param_list,
- const char *param_name)
+devlink_param_find_by_name(struct xarray *params, const char *param_name)
{
struct devlink_param_item *param_item;
+ unsigned long param_id;
- list_for_each_entry(param_item, param_list, list)
+ xa_for_each(params, param_id, param_item) {
if (!strcmp(param_item->param->name, param_name))
return param_item;
+ }
return NULL;
}
static struct devlink_param_item *
-devlink_param_find_by_id(struct list_head *param_list, u32 param_id)
+devlink_param_find_by_id(struct xarray *params, u32 param_id)
{
- struct devlink_param_item *param_item;
-
- list_for_each_entry(param_item, param_list, list)
- if (param_item->param->id == param_id)
- return param_item;
- return NULL;
+ return xa_load(params, param_id);
}
static bool
@@ -5575,9 +4087,12 @@ static int devlink_nl_param_fill(struct sk_buff *msg, struct devlink *devlink,
if (!devlink_param_cmode_is_supported(param, i))
continue;
if (i == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (!param_item->driverinit_value_valid)
+ if (param_item->driverinit_value_new_valid)
+ param_value[i] = param_item->driverinit_value_new;
+ else if (param_item->driverinit_value_valid)
+ param_value[i] = param_item->driverinit_value;
+ else
return -EOPNOTSUPP;
- param_value[i] = param_item->driverinit_value;
} else {
ctx.cmode = i;
err = devlink_param_get(devlink, param, &ctx);
@@ -5654,7 +4169,13 @@ static void devlink_param_notify(struct devlink *devlink,
WARN_ON(cmd != DEVLINK_CMD_PARAM_NEW && cmd != DEVLINK_CMD_PARAM_DEL &&
cmd != DEVLINK_CMD_PORT_PARAM_NEW &&
cmd != DEVLINK_CMD_PORT_PARAM_DEL);
- ASSERT_DEVLINK_REGISTERED(devlink);
+
+ /* devlink_notify_register() / devlink_notify_unregister()
+ * will replay the notifications if the params are added/removed
+ * outside of the lifetime of the instance.
+ */
+ if (!devl_is_registered(devlink))
+ return;
msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
@@ -5670,48 +4191,36 @@ static void devlink_param_notify(struct devlink *devlink,
msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
}
-static int devlink_nl_cmd_param_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_param_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_param_item *param_item;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
+ unsigned long param_id;
int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(param_item, &devlink->param_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_param_fill(msg, devlink, 0, param_item,
- DEVLINK_CMD_PARAM_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err == -EOPNOTSUPP) {
- err = 0;
- } else if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
- idx++;
+ xa_for_each_start(&devlink->params, param_id, param_item, state->idx) {
+ err = devlink_nl_param_fill(msg, devlink, 0, param_item,
+ DEVLINK_CMD_PARAM_GET,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err == -EOPNOTSUPP) {
+ err = 0;
+ } else if (err) {
+ state->idx = param_id;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
}
-out:
- if (err != -EMSGSIZE)
- return err;
- cb->args[0] = idx;
- return msg->len;
+ return err;
}
+const struct devlink_cmd devl_cmd_param_get = {
+ .dump_one = devlink_nl_cmd_param_get_dump_one,
+};
+
static int
devlink_param_type_get_from_info(struct genl_info *info,
enum devlink_param_type *param_type)
@@ -5788,8 +4297,7 @@ devlink_param_value_get_from_info(const struct devlink_param *param,
}
static struct devlink_param_item *
-devlink_param_get_from_info(struct list_head *param_list,
- struct genl_info *info)
+devlink_param_get_from_info(struct xarray *params, struct genl_info *info)
{
char *param_name;
@@ -5797,7 +4305,7 @@ devlink_param_get_from_info(struct list_head *param_list,
return NULL;
param_name = nla_data(info->attrs[DEVLINK_ATTR_PARAM_NAME]);
- return devlink_param_find_by_name(param_list, param_name);
+ return devlink_param_find_by_name(params, param_name);
}
static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
@@ -5808,7 +4316,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
struct sk_buff *msg;
int err;
- param_item = devlink_param_get_from_info(&devlink->param_list, info);
+ param_item = devlink_param_get_from_info(&devlink->params, info);
if (!param_item)
return -EINVAL;
@@ -5829,7 +4337,7 @@ static int devlink_nl_cmd_param_get_doit(struct sk_buff *skb,
static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
unsigned int port_index,
- struct list_head *param_list,
+ struct xarray *params,
struct genl_info *info,
enum devlink_command cmd)
{
@@ -5841,7 +4349,7 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
union devlink_param_value value;
int err = 0;
- param_item = devlink_param_get_from_info(param_list, info);
+ param_item = devlink_param_get_from_info(params, info);
if (!param_item)
return -EINVAL;
param = param_item->param;
@@ -5866,11 +4374,8 @@ static int __devlink_nl_cmd_param_set_doit(struct devlink *devlink,
return -EOPNOTSUPP;
if (cmode == DEVLINK_PARAM_CMODE_DRIVERINIT) {
- if (param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, value.vstr);
- else
- param_item->driverinit_value = value;
- param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new = value;
+ param_item->driverinit_value_new_valid = true;
} else {
if (!param->set)
return -EOPNOTSUPP;
@@ -5890,28 +4395,28 @@ static int devlink_nl_cmd_param_set_doit(struct sk_buff *skb,
{
struct devlink *devlink = info->user_ptr[0];
- return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->param_list,
+ return __devlink_nl_cmd_param_set_doit(devlink, 0, &devlink->params,
info, DEVLINK_CMD_PARAM_NEW);
}
static int devlink_nl_cmd_port_param_get_dumpit(struct sk_buff *msg,
struct netlink_callback *cb)
{
- NL_SET_ERR_MSG_MOD(cb->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(cb->extack, "Port params are not supported");
return msg->len;
}
static int devlink_nl_cmd_port_param_get_doit(struct sk_buff *skb,
struct genl_info *info)
{
- NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
}
static int devlink_nl_cmd_port_param_set_doit(struct sk_buff *skb,
struct genl_info *info)
{
- NL_SET_ERR_MSG_MOD(info->extack, "Port params are not supported");
+ NL_SET_ERR_MSG(info->extack, "Port params are not supported");
return -EINVAL;
}
@@ -6375,21 +4880,20 @@ out:
return err;
}
-static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb,
- struct devlink *devlink,
- int *idx,
- int start)
+static int
+devlink_nl_cmd_region_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_region *region;
struct devlink_port *port;
unsigned long port_index;
- int err = 0;
+ int idx = 0;
+ int err;
- devl_lock(devlink);
list_for_each_entry(region, &devlink->region_list, list) {
- if (*idx < start) {
- (*idx)++;
+ if (idx < state->idx) {
+ idx++;
continue;
}
err = devlink_nl_region_fill(msg, devlink,
@@ -6397,43 +4901,28 @@ static int devlink_nl_cmd_region_get_devlink_dumpit(struct sk_buff *msg,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq,
NLM_F_MULTI, region);
- if (err)
- goto out;
- (*idx)++;
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
+ idx++;
}
xa_for_each(&devlink->ports, port_index, port) {
- err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, idx,
- start);
- if (err)
- goto out;
+ err = devlink_nl_cmd_region_get_port_dumpit(msg, cb, port, &idx,
+ state->idx);
+ if (err) {
+ state->idx = idx;
+ return err;
+ }
}
-out:
- devl_unlock(devlink);
- return err;
+ return 0;
}
-static int devlink_nl_cmd_region_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- err = devlink_nl_cmd_region_get_devlink_dumpit(msg, cb, devlink,
- &idx, start);
- devlink_put(devlink);
- if (err)
- goto out;
- }
-out:
- cb->args[0] = idx;
- return msg->len;
-}
+const struct devlink_cmd devl_cmd_region_get = {
+ .dump_one = devlink_nl_cmd_region_get_dump_one,
+};
static int devlink_nl_cmd_region_del(struct sk_buff *skb,
struct genl_info *info)
@@ -6496,7 +4985,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
int err;
if (GENL_REQ_ATTR_CHECK(info, DEVLINK_ATTR_REGION_NAME)) {
- NL_SET_ERR_MSG_MOD(info->extack, "No region name provided");
+ NL_SET_ERR_MSG(info->extack, "No region name provided");
return -EINVAL;
}
@@ -6516,19 +5005,19 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
region = devlink_region_get_by_name(devlink, region_name);
if (!region) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not exist");
+ NL_SET_ERR_MSG(info->extack, "The requested region does not exist");
return -EINVAL;
}
if (!region->ops->snapshot) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested region does not support taking an immediate snapshot");
+ NL_SET_ERR_MSG(info->extack, "The requested region does not support taking an immediate snapshot");
return -EOPNOTSUPP;
}
mutex_lock(&region->snapshot_lock);
if (region->cur_snapshots == region->max_snapshots) {
- NL_SET_ERR_MSG_MOD(info->extack, "The region has reached the maximum number of stored snapshots");
+ NL_SET_ERR_MSG(info->extack, "The region has reached the maximum number of stored snapshots");
err = -ENOSPC;
goto unlock;
}
@@ -6538,7 +5027,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
snapshot_id = nla_get_u32(snapshot_id_attr);
if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
- NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
+ NL_SET_ERR_MSG(info->extack, "The requested snapshot id is already in use");
err = -EEXIST;
goto unlock;
}
@@ -6549,7 +5038,7 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
} else {
err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id");
+ NL_SET_ERR_MSG(info->extack, "Failed to allocate a new snapshot id");
goto unlock;
}
}
@@ -6716,6 +5205,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
struct netlink_callback *cb)
{
const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct nlattr *chunks_attr, *region_attr, *snapshot_attr;
u64 ret_offset, start_offset, end_offset = U64_MAX;
struct nlattr **attrs = info->attrs;
@@ -6729,14 +5219,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
void *hdr;
int err;
- start_offset = *((u64 *)&cb->args[0]);
+ start_offset = state->start_offset;
- devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
+ devlink = devlink_get_from_attrs_lock(sock_net(cb->skb->sk), attrs);
if (IS_ERR(devlink))
return PTR_ERR(devlink);
- devl_lock(devlink);
-
if (!attrs[DEVLINK_ATTR_REGION_NAME]) {
NL_SET_ERR_MSG(cb->extack, "No region name provided");
err = -EINVAL;
@@ -6868,7 +5356,7 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
goto nla_put_failure;
}
- *((u64 *)&cb->args[0]) = ret_offset;
+ state->start_offset = ret_offset;
nla_nest_end(skb, chunks_attr);
genlmsg_end(skb, hdr);
@@ -6884,1605 +5372,6 @@ out_unlock:
return err;
}
-int devlink_info_serial_number_put(struct devlink_info_req *req, const char *sn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_SERIAL_NUMBER, sn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_serial_number_put);
-
-int devlink_info_board_serial_number_put(struct devlink_info_req *req,
- const char *bsn)
-{
- if (!req->msg)
- return 0;
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_BOARD_SERIAL_NUMBER,
- bsn);
-}
-EXPORT_SYMBOL_GPL(devlink_info_board_serial_number_put);
-
-static int devlink_info_version_put(struct devlink_info_req *req, int attr,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- struct nlattr *nest;
- int err;
-
- if (req->version_cb)
- req->version_cb(version_name, version_type,
- req->version_cb_priv);
-
- if (!req->msg)
- return 0;
-
- nest = nla_nest_start_noflag(req->msg, attr);
- if (!nest)
- return -EMSGSIZE;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_NAME,
- version_name);
- if (err)
- goto nla_put_failure;
-
- err = nla_put_string(req->msg, DEVLINK_ATTR_INFO_VERSION_VALUE,
- version_value);
- if (err)
- goto nla_put_failure;
-
- nla_nest_end(req->msg, nest);
-
- return 0;
-
-nla_put_failure:
- nla_nest_cancel(req->msg, nest);
- return err;
-}
-
-int devlink_info_version_fixed_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_FIXED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_fixed_put);
-
-int devlink_info_version_stored_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put);
-
-int devlink_info_version_stored_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_STORED,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_stored_put_ext);
-
-int devlink_info_version_running_put(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- DEVLINK_INFO_VERSION_TYPE_NONE);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put);
-
-int devlink_info_version_running_put_ext(struct devlink_info_req *req,
- const char *version_name,
- const char *version_value,
- enum devlink_info_version_type version_type)
-{
- return devlink_info_version_put(req, DEVLINK_ATTR_INFO_VERSION_RUNNING,
- version_name, version_value,
- version_type);
-}
-EXPORT_SYMBOL_GPL(devlink_info_version_running_put_ext);
-
-static int devlink_nl_driver_info_get(struct device_driver *drv,
- struct devlink_info_req *req)
-{
- if (!drv)
- return 0;
-
- if (drv->name[0])
- return nla_put_string(req->msg, DEVLINK_ATTR_INFO_DRIVER_NAME,
- drv->name);
-
- return 0;
-}
-
-static int
-devlink_nl_info_fill(struct sk_buff *msg, struct devlink *devlink,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags, struct netlink_ext_ack *extack)
-{
- struct device *dev = devlink_to_dev(devlink);
- struct devlink_info_req req = {};
- void *hdr;
- int err;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- err = -EMSGSIZE;
- if (devlink_nl_put_handle(msg, devlink))
- goto err_cancel_msg;
-
- req.msg = msg;
- if (devlink->ops->info_get) {
- err = devlink->ops->info_get(devlink, &req, extack);
- if (err)
- goto err_cancel_msg;
- }
-
- err = devlink_nl_driver_info_get(dev->driver, &req);
- if (err)
- goto err_cancel_msg;
-
- genlmsg_end(msg, hdr);
- return 0;
-
-err_cancel_msg:
- genlmsg_cancel(msg, hdr);
- return err;
-}
-
-static int devlink_nl_cmd_info_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct sk_buff *msg;
- int err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return -ENOMEM;
-
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- info->snd_portid, info->snd_seq, 0,
- info->extack);
- if (err) {
- nlmsg_free(msg);
- return err;
- }
-
- return genlmsg_reply(msg, info);
-}
-
-static int devlink_nl_cmd_info_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
- int idx = 0;
- int err = 0;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- if (idx < start)
- goto inc;
-
- devl_lock(devlink);
- err = devlink_nl_info_fill(msg, devlink, DEVLINK_CMD_INFO_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI,
- cb->extack);
- devl_unlock(devlink);
- if (err == -EOPNOTSUPP)
- err = 0;
- else if (err) {
- devlink_put(devlink);
- break;
- }
-inc:
- idx++;
- devlink_put(devlink);
- }
-
- if (err != -EMSGSIZE)
- return err;
-
- cb->args[0] = idx;
- return msg->len;
-}
-
-struct devlink_fmsg_item {
- struct list_head list;
- int attrtype;
- u8 nla_type;
- u16 len;
- int value[];
-};
-
-struct devlink_fmsg {
- struct list_head item_list;
- bool putting_binary; /* This flag forces enclosing of binary data
- * in an array brackets. It forces using
- * of designated API:
- * devlink_fmsg_binary_pair_nest_start()
- * devlink_fmsg_binary_pair_nest_end()
- */
-};
-
-static struct devlink_fmsg *devlink_fmsg_alloc(void)
-{
- struct devlink_fmsg *fmsg;
-
- fmsg = kzalloc(sizeof(*fmsg), GFP_KERNEL);
- if (!fmsg)
- return NULL;
-
- INIT_LIST_HEAD(&fmsg->item_list);
-
- return fmsg;
-}
-
-static void devlink_fmsg_free(struct devlink_fmsg *fmsg)
-{
- struct devlink_fmsg_item *item, *tmp;
-
- list_for_each_entry_safe(item, tmp, &fmsg->item_list, list) {
- list_del(&item->list);
- kfree(item);
- }
- kfree(fmsg);
-}
-
-static int devlink_fmsg_nest_common(struct devlink_fmsg *fmsg,
- int attrtype)
-{
- struct devlink_fmsg_item *item;
-
- item = kzalloc(sizeof(*item), GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->attrtype = attrtype;
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-int devlink_fmsg_obj_nest_start(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_OBJ_NEST_START);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_start);
-
-static int devlink_fmsg_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_NEST_END);
-}
-
-int devlink_fmsg_obj_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_obj_nest_end);
-
-#define DEVLINK_FMSG_MAX_SIZE (GENLMSG_DEFAULT_SIZE - GENL_HDRLEN - NLA_HDRLEN)
-
-static int devlink_fmsg_put_name(struct devlink_fmsg *fmsg, const char *name)
-{
- struct devlink_fmsg_item *item;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- if (strlen(name) + 1 > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
-
- item = kzalloc(sizeof(*item) + strlen(name) + 1, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->nla_type = NLA_NUL_STRING;
- item->len = strlen(name) + 1;
- item->attrtype = DEVLINK_ATTR_FMSG_OBJ_NAME;
- memcpy(&item->value, name, item->len);
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-int devlink_fmsg_pair_nest_start(struct devlink_fmsg *fmsg, const char *name)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_PAIR_NEST_START);
- if (err)
- return err;
-
- err = devlink_fmsg_put_name(fmsg, name);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_start);
-
-int devlink_fmsg_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_pair_nest_end);
-
-int devlink_fmsg_arr_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_common(fmsg, DEVLINK_ATTR_FMSG_ARR_NEST_START);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_start);
-
-int devlink_fmsg_arr_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- int err;
-
- if (fmsg->putting_binary)
- return -EINVAL;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- err = devlink_fmsg_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_arr_pair_nest_end);
-
-int devlink_fmsg_binary_pair_nest_start(struct devlink_fmsg *fmsg,
- const char *name)
-{
- int err;
-
- err = devlink_fmsg_arr_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- fmsg->putting_binary = true;
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_start);
-
-int devlink_fmsg_binary_pair_nest_end(struct devlink_fmsg *fmsg)
-{
- if (!fmsg->putting_binary)
- return -EINVAL;
-
- fmsg->putting_binary = false;
- return devlink_fmsg_arr_pair_nest_end(fmsg);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_nest_end);
-
-static int devlink_fmsg_put_value(struct devlink_fmsg *fmsg,
- const void *value, u16 value_len,
- u8 value_nla_type)
-{
- struct devlink_fmsg_item *item;
-
- if (value_len > DEVLINK_FMSG_MAX_SIZE)
- return -EMSGSIZE;
-
- item = kzalloc(sizeof(*item) + value_len, GFP_KERNEL);
- if (!item)
- return -ENOMEM;
-
- item->nla_type = value_nla_type;
- item->len = value_len;
- item->attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
- memcpy(&item->value, value, item->len);
- list_add_tail(&item->list, &fmsg->item_list);
-
- return 0;
-}
-
-static int devlink_fmsg_bool_put(struct devlink_fmsg *fmsg, bool value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_FLAG);
-}
-
-static int devlink_fmsg_u8_put(struct devlink_fmsg *fmsg, u8 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U8);
-}
-
-int devlink_fmsg_u32_put(struct devlink_fmsg *fmsg, u32 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U32);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u32_put);
-
-static int devlink_fmsg_u64_put(struct devlink_fmsg *fmsg, u64 value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, &value, sizeof(value), NLA_U64);
-}
-
-int devlink_fmsg_string_put(struct devlink_fmsg *fmsg, const char *value)
-{
- if (fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, value, strlen(value) + 1,
- NLA_NUL_STRING);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_string_put);
-
-int devlink_fmsg_binary_put(struct devlink_fmsg *fmsg, const void *value,
- u16 value_len)
-{
- if (!fmsg->putting_binary)
- return -EINVAL;
-
- return devlink_fmsg_put_value(fmsg, value, value_len, NLA_BINARY);
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_put);
-
-int devlink_fmsg_bool_pair_put(struct devlink_fmsg *fmsg, const char *name,
- bool value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_bool_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_bool_pair_put);
-
-int devlink_fmsg_u8_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u8 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u8_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u8_pair_put);
-
-int devlink_fmsg_u32_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u32 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u32_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u32_pair_put);
-
-int devlink_fmsg_u64_pair_put(struct devlink_fmsg *fmsg, const char *name,
- u64 value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_u64_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_u64_pair_put);
-
-int devlink_fmsg_string_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const char *value)
-{
- int err;
-
- err = devlink_fmsg_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- err = devlink_fmsg_string_put(fmsg, value);
- if (err)
- return err;
-
- err = devlink_fmsg_pair_nest_end(fmsg);
- if (err)
- return err;
-
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_string_pair_put);
-
-int devlink_fmsg_binary_pair_put(struct devlink_fmsg *fmsg, const char *name,
- const void *value, u32 value_len)
-{
- u32 data_size;
- int end_err;
- u32 offset;
- int err;
-
- err = devlink_fmsg_binary_pair_nest_start(fmsg, name);
- if (err)
- return err;
-
- for (offset = 0; offset < value_len; offset += data_size) {
- data_size = value_len - offset;
- if (data_size > DEVLINK_FMSG_MAX_SIZE)
- data_size = DEVLINK_FMSG_MAX_SIZE;
- err = devlink_fmsg_binary_put(fmsg, value + offset, data_size);
- if (err)
- break;
- /* Exit from loop with a break (instead of
- * return) to make sure putting_binary is turned off in
- * devlink_fmsg_binary_pair_nest_end
- */
- }
-
- end_err = devlink_fmsg_binary_pair_nest_end(fmsg);
- if (end_err)
- err = end_err;
-
- return err;
-}
-EXPORT_SYMBOL_GPL(devlink_fmsg_binary_pair_put);
-
-static int
-devlink_fmsg_item_fill_type(struct devlink_fmsg_item *msg, struct sk_buff *skb)
-{
- switch (msg->nla_type) {
- case NLA_FLAG:
- case NLA_U8:
- case NLA_U32:
- case NLA_U64:
- case NLA_NUL_STRING:
- case NLA_BINARY:
- return nla_put_u8(skb, DEVLINK_ATTR_FMSG_OBJ_VALUE_TYPE,
- msg->nla_type);
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_fmsg_item_fill_data(struct devlink_fmsg_item *msg, struct sk_buff *skb)
-{
- int attrtype = DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA;
- u8 tmp;
-
- switch (msg->nla_type) {
- case NLA_FLAG:
- /* Always provide flag data, regardless of its value */
- tmp = *(bool *) msg->value;
-
- return nla_put_u8(skb, attrtype, tmp);
- case NLA_U8:
- return nla_put_u8(skb, attrtype, *(u8 *) msg->value);
- case NLA_U32:
- return nla_put_u32(skb, attrtype, *(u32 *) msg->value);
- case NLA_U64:
- return nla_put_u64_64bit(skb, attrtype, *(u64 *) msg->value,
- DEVLINK_ATTR_PAD);
- case NLA_NUL_STRING:
- return nla_put_string(skb, attrtype, (char *) &msg->value);
- case NLA_BINARY:
- return nla_put(skb, attrtype, msg->len, (void *) &msg->value);
- default:
- return -EINVAL;
- }
-}
-
-static int
-devlink_fmsg_prepare_skb(struct devlink_fmsg *fmsg, struct sk_buff *skb,
- int *start)
-{
- struct devlink_fmsg_item *item;
- struct nlattr *fmsg_nlattr;
- int i = 0;
- int err;
-
- fmsg_nlattr = nla_nest_start_noflag(skb, DEVLINK_ATTR_FMSG);
- if (!fmsg_nlattr)
- return -EMSGSIZE;
-
- list_for_each_entry(item, &fmsg->item_list, list) {
- if (i < *start) {
- i++;
- continue;
- }
-
- switch (item->attrtype) {
- case DEVLINK_ATTR_FMSG_OBJ_NEST_START:
- case DEVLINK_ATTR_FMSG_PAIR_NEST_START:
- case DEVLINK_ATTR_FMSG_ARR_NEST_START:
- case DEVLINK_ATTR_FMSG_NEST_END:
- err = nla_put_flag(skb, item->attrtype);
- break;
- case DEVLINK_ATTR_FMSG_OBJ_VALUE_DATA:
- err = devlink_fmsg_item_fill_type(item, skb);
- if (err)
- break;
- err = devlink_fmsg_item_fill_data(item, skb);
- break;
- case DEVLINK_ATTR_FMSG_OBJ_NAME:
- err = nla_put_string(skb, item->attrtype,
- (char *) &item->value);
- break;
- default:
- err = -EINVAL;
- break;
- }
- if (!err)
- *start = ++i;
- else
- break;
- }
-
- nla_nest_end(skb, fmsg_nlattr);
- return err;
-}
-
-static int devlink_fmsg_snd(struct devlink_fmsg *fmsg,
- struct genl_info *info,
- enum devlink_command cmd, int flags)
-{
- struct nlmsghdr *nlh;
- struct sk_buff *skb;
- bool last = false;
- int index = 0;
- void *hdr;
- int err;
-
- while (!last) {
- int tmp_index = index;
-
- skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
-
- hdr = genlmsg_put(skb, info->snd_portid, info->snd_seq,
- &devlink_nl_family, flags | NLM_F_MULTI, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
- if (!err)
- last = true;
- else if (err != -EMSGSIZE || tmp_index == index)
- goto nla_put_failure;
-
- genlmsg_end(skb, hdr);
- err = genlmsg_reply(skb, info);
- if (err)
- return err;
- }
-
- skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!skb)
- return -ENOMEM;
- nlh = nlmsg_put(skb, info->snd_portid, info->snd_seq,
- NLMSG_DONE, 0, flags | NLM_F_MULTI);
- if (!nlh) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- return genlmsg_reply(skb, info);
-
-nla_put_failure:
- nlmsg_free(skb);
- return err;
-}
-
-static int devlink_fmsg_dumpit(struct devlink_fmsg *fmsg, struct sk_buff *skb,
- struct netlink_callback *cb,
- enum devlink_command cmd)
-{
- int index = cb->args[0];
- int tmp_index = index;
- void *hdr;
- int err;
-
- hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- &devlink_nl_family, NLM_F_ACK | NLM_F_MULTI, cmd);
- if (!hdr) {
- err = -EMSGSIZE;
- goto nla_put_failure;
- }
-
- err = devlink_fmsg_prepare_skb(fmsg, skb, &index);
- if ((err && err != -EMSGSIZE) || tmp_index == index)
- goto nla_put_failure;
-
- cb->args[0] = index;
- genlmsg_end(skb, hdr);
- return skb->len;
-
-nla_put_failure:
- genlmsg_cancel(skb, hdr);
- return err;
-}
-
-struct devlink_health_reporter {
- struct list_head list;
- void *priv;
- const struct devlink_health_reporter_ops *ops;
- struct devlink *devlink;
- struct devlink_port *devlink_port;
- struct devlink_fmsg *dump_fmsg;
- struct mutex dump_lock; /* lock parallel read/write from dump buffers */
- u64 graceful_period;
- bool auto_recover;
- bool auto_dump;
- u8 health_state;
- u64 dump_ts;
- u64 dump_real_ts;
- u64 error_count;
- u64 recovery_count;
- u64 last_recovery_ts;
- refcount_t refcount;
-};
-
-void *
-devlink_health_reporter_priv(struct devlink_health_reporter *reporter)
-{
- return reporter->priv;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_priv);
-
-static struct devlink_health_reporter *
-__devlink_health_reporter_find_by_name(struct list_head *reporter_list,
- struct mutex *list_lock,
- const char *reporter_name)
-{
- struct devlink_health_reporter *reporter;
-
- lockdep_assert_held(list_lock);
- list_for_each_entry(reporter, reporter_list, list)
- if (!strcmp(reporter->ops->name, reporter_name))
- return reporter;
- return NULL;
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_find_by_name(struct devlink *devlink,
- const char *reporter_name)
-{
- return __devlink_health_reporter_find_by_name(&devlink->reporter_list,
- &devlink->reporters_lock,
- reporter_name);
-}
-
-static struct devlink_health_reporter *
-devlink_port_health_reporter_find_by_name(struct devlink_port *devlink_port,
- const char *reporter_name)
-{
- return __devlink_health_reporter_find_by_name(&devlink_port->reporter_list,
- &devlink_port->reporters_lock,
- reporter_name);
-}
-
-static struct devlink_health_reporter *
-__devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- if (WARN_ON(graceful_period && !ops->recover))
- return ERR_PTR(-EINVAL);
-
- reporter = kzalloc(sizeof(*reporter), GFP_KERNEL);
- if (!reporter)
- return ERR_PTR(-ENOMEM);
-
- reporter->priv = priv;
- reporter->ops = ops;
- reporter->devlink = devlink;
- reporter->graceful_period = graceful_period;
- reporter->auto_recover = !!ops->recover;
- reporter->auto_dump = !!ops->dump;
- mutex_init(&reporter->dump_lock);
- refcount_set(&reporter->refcount, 1);
- return reporter;
-}
-
-/**
- * devlink_port_health_reporter_create - create devlink health reporter for
- * specified port instance
- *
- * @port: devlink_port which should contain the new reporter
- * @ops: ops
- * @graceful_period: to avoid recovery loops, in msecs
- * @priv: priv
- */
-struct devlink_health_reporter *
-devlink_port_health_reporter_create(struct devlink_port *port,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- mutex_lock(&port->reporters_lock);
- if (__devlink_health_reporter_find_by_name(&port->reporter_list,
- &port->reporters_lock, ops->name)) {
- reporter = ERR_PTR(-EEXIST);
- goto unlock;
- }
-
- reporter = __devlink_health_reporter_create(port->devlink, ops,
- graceful_period, priv);
- if (IS_ERR(reporter))
- goto unlock;
-
- reporter->devlink_port = port;
- list_add_tail(&reporter->list, &port->reporter_list);
-unlock:
- mutex_unlock(&port->reporters_lock);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_port_health_reporter_create);
-
-/**
- * devlink_health_reporter_create - create devlink health reporter
- *
- * @devlink: devlink
- * @ops: ops
- * @graceful_period: to avoid recovery loops, in msecs
- * @priv: priv
- */
-struct devlink_health_reporter *
-devlink_health_reporter_create(struct devlink *devlink,
- const struct devlink_health_reporter_ops *ops,
- u64 graceful_period, void *priv)
-{
- struct devlink_health_reporter *reporter;
-
- mutex_lock(&devlink->reporters_lock);
- if (devlink_health_reporter_find_by_name(devlink, ops->name)) {
- reporter = ERR_PTR(-EEXIST);
- goto unlock;
- }
-
- reporter = __devlink_health_reporter_create(devlink, ops,
- graceful_period, priv);
- if (IS_ERR(reporter))
- goto unlock;
-
- list_add_tail(&reporter->list, &devlink->reporter_list);
-unlock:
- mutex_unlock(&devlink->reporters_lock);
- return reporter;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_create);
-
-static void
-devlink_health_reporter_free(struct devlink_health_reporter *reporter)
-{
- mutex_destroy(&reporter->dump_lock);
- if (reporter->dump_fmsg)
- devlink_fmsg_free(reporter->dump_fmsg);
- kfree(reporter);
-}
-
-static void
-devlink_health_reporter_put(struct devlink_health_reporter *reporter)
-{
- if (refcount_dec_and_test(&reporter->refcount))
- devlink_health_reporter_free(reporter);
-}
-
-static void
-__devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- list_del(&reporter->list);
- devlink_health_reporter_put(reporter);
-}
-
-/**
- * devlink_health_reporter_destroy - destroy devlink health reporter
- *
- * @reporter: devlink health reporter to destroy
- */
-void
-devlink_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- struct mutex *lock = &reporter->devlink->reporters_lock;
-
- mutex_lock(lock);
- __devlink_health_reporter_destroy(reporter);
- mutex_unlock(lock);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_destroy);
-
-/**
- * devlink_port_health_reporter_destroy - destroy devlink port health reporter
- *
- * @reporter: devlink health reporter to destroy
- */
-void
-devlink_port_health_reporter_destroy(struct devlink_health_reporter *reporter)
-{
- struct mutex *lock = &reporter->devlink_port->reporters_lock;
-
- mutex_lock(lock);
- __devlink_health_reporter_destroy(reporter);
- mutex_unlock(lock);
-}
-EXPORT_SYMBOL_GPL(devlink_port_health_reporter_destroy);
-
-static int
-devlink_nl_health_reporter_fill(struct sk_buff *msg,
- struct devlink_health_reporter *reporter,
- enum devlink_command cmd, u32 portid,
- u32 seq, int flags)
-{
- struct devlink *devlink = reporter->devlink;
- struct nlattr *reporter_attr;
- void *hdr;
-
- hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, flags, cmd);
- if (!hdr)
- return -EMSGSIZE;
-
- if (devlink_nl_put_handle(msg, devlink))
- goto genlmsg_cancel;
-
- if (reporter->devlink_port) {
- if (nla_put_u32(msg, DEVLINK_ATTR_PORT_INDEX, reporter->devlink_port->index))
- goto genlmsg_cancel;
- }
- reporter_attr = nla_nest_start_noflag(msg,
- DEVLINK_ATTR_HEALTH_REPORTER);
- if (!reporter_attr)
- goto genlmsg_cancel;
- if (nla_put_string(msg, DEVLINK_ATTR_HEALTH_REPORTER_NAME,
- reporter->ops->name))
- goto reporter_nest_cancel;
- if (nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_STATE,
- reporter->health_state))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_ERR_COUNT,
- reporter->error_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_RECOVER_COUNT,
- reporter->recovery_count, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD,
- reporter->graceful_period,
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->recover &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER,
- reporter->auto_recover))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS,
- jiffies_to_msecs(reporter->dump_ts),
- DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->dump_fmsg &&
- nla_put_u64_64bit(msg, DEVLINK_ATTR_HEALTH_REPORTER_DUMP_TS_NS,
- reporter->dump_real_ts, DEVLINK_ATTR_PAD))
- goto reporter_nest_cancel;
- if (reporter->ops->dump &&
- nla_put_u8(msg, DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP,
- reporter->auto_dump))
- goto reporter_nest_cancel;
-
- nla_nest_end(msg, reporter_attr);
- genlmsg_end(msg, hdr);
- return 0;
-
-reporter_nest_cancel:
- nla_nest_end(msg, reporter_attr);
-genlmsg_cancel:
- genlmsg_cancel(msg, hdr);
- return -EMSGSIZE;
-}
-
-static void devlink_recover_notify(struct devlink_health_reporter *reporter,
- enum devlink_command cmd)
-{
- struct devlink *devlink = reporter->devlink;
- struct sk_buff *msg;
- int err;
-
- WARN_ON(cmd != DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
- WARN_ON(!xa_get_mark(&devlinks, devlink->index, DEVLINK_REGISTERED));
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- err = devlink_nl_health_reporter_fill(msg, reporter, cmd, 0, 0, 0);
- if (err) {
- nlmsg_free(msg);
- return;
- }
-
- genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink), msg,
- 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-}
-
-void
-devlink_health_reporter_recovery_done(struct devlink_health_reporter *reporter)
-{
- reporter->recovery_count++;
- reporter->last_recovery_ts = jiffies;
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_recovery_done);
-
-static int
-devlink_health_reporter_recover(struct devlink_health_reporter *reporter,
- void *priv_ctx, struct netlink_ext_ack *extack)
-{
- int err;
-
- if (reporter->health_state == DEVLINK_HEALTH_REPORTER_STATE_HEALTHY)
- return 0;
-
- if (!reporter->ops->recover)
- return -EOPNOTSUPP;
-
- err = reporter->ops->recover(reporter, priv_ctx, extack);
- if (err)
- return err;
-
- devlink_health_reporter_recovery_done(reporter);
- reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_HEALTHY;
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-
- return 0;
-}
-
-static void
-devlink_health_dump_clear(struct devlink_health_reporter *reporter)
-{
- if (!reporter->dump_fmsg)
- return;
- devlink_fmsg_free(reporter->dump_fmsg);
- reporter->dump_fmsg = NULL;
-}
-
-static int devlink_health_do_dump(struct devlink_health_reporter *reporter,
- void *priv_ctx,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (!reporter->ops->dump)
- return 0;
-
- if (reporter->dump_fmsg)
- return 0;
-
- reporter->dump_fmsg = devlink_fmsg_alloc();
- if (!reporter->dump_fmsg) {
- err = -ENOMEM;
- return err;
- }
-
- err = devlink_fmsg_obj_nest_start(reporter->dump_fmsg);
- if (err)
- goto dump_err;
-
- err = reporter->ops->dump(reporter, reporter->dump_fmsg,
- priv_ctx, extack);
- if (err)
- goto dump_err;
-
- err = devlink_fmsg_obj_nest_end(reporter->dump_fmsg);
- if (err)
- goto dump_err;
-
- reporter->dump_ts = jiffies;
- reporter->dump_real_ts = ktime_get_real_ns();
-
- return 0;
-
-dump_err:
- devlink_health_dump_clear(reporter);
- return err;
-}
-
-int devlink_health_report(struct devlink_health_reporter *reporter,
- const char *msg, void *priv_ctx)
-{
- enum devlink_health_reporter_state prev_health_state;
- struct devlink *devlink = reporter->devlink;
- unsigned long recover_ts_threshold;
- int ret;
-
- /* write a log message of the current error */
- WARN_ON(!msg);
- trace_devlink_health_report(devlink, reporter->ops->name, msg);
- reporter->error_count++;
- prev_health_state = reporter->health_state;
- reporter->health_state = DEVLINK_HEALTH_REPORTER_STATE_ERROR;
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-
- /* abort if the previous error wasn't recovered */
- recover_ts_threshold = reporter->last_recovery_ts +
- msecs_to_jiffies(reporter->graceful_period);
- if (reporter->auto_recover &&
- (prev_health_state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY ||
- (reporter->last_recovery_ts && reporter->recovery_count &&
- time_is_after_jiffies(recover_ts_threshold)))) {
- trace_devlink_health_recover_aborted(devlink,
- reporter->ops->name,
- reporter->health_state,
- jiffies -
- reporter->last_recovery_ts);
- return -ECANCELED;
- }
-
- if (reporter->auto_dump) {
- mutex_lock(&reporter->dump_lock);
- /* store current dump of current error, for later analysis */
- devlink_health_do_dump(reporter, priv_ctx, NULL);
- mutex_unlock(&reporter->dump_lock);
- }
-
- if (!reporter->auto_recover)
- return 0;
-
- devl_lock(devlink);
- ret = devlink_health_reporter_recover(reporter, priv_ctx, NULL);
- devl_unlock(devlink);
-
- return ret;
-}
-EXPORT_SYMBOL_GPL(devlink_health_report);
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_attrs(struct devlink *devlink,
- struct nlattr **attrs)
-{
- struct devlink_health_reporter *reporter;
- struct devlink_port *devlink_port;
- char *reporter_name;
-
- if (!attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME])
- return NULL;
-
- reporter_name = nla_data(attrs[DEVLINK_ATTR_HEALTH_REPORTER_NAME]);
- devlink_port = devlink_port_get_from_attrs(devlink, attrs);
- if (IS_ERR(devlink_port)) {
- mutex_lock(&devlink->reporters_lock);
- reporter = devlink_health_reporter_find_by_name(devlink, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink->reporters_lock);
- } else {
- mutex_lock(&devlink_port->reporters_lock);
- reporter = devlink_port_health_reporter_find_by_name(devlink_port, reporter_name);
- if (reporter)
- refcount_inc(&reporter->refcount);
- mutex_unlock(&devlink_port->reporters_lock);
- }
-
- return reporter;
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_info(struct devlink *devlink,
- struct genl_info *info)
-{
- return devlink_health_reporter_get_from_attrs(devlink, info->attrs);
-}
-
-static struct devlink_health_reporter *
-devlink_health_reporter_get_from_cb(struct netlink_callback *cb)
-{
- const struct genl_dumpit_info *info = genl_dumpit_info(cb);
- struct devlink_health_reporter *reporter;
- struct nlattr **attrs = info->attrs;
- struct devlink *devlink;
-
- devlink = devlink_get_from_attrs(sock_net(cb->skb->sk), attrs);
- if (IS_ERR(devlink))
- return NULL;
-
- reporter = devlink_health_reporter_get_from_attrs(devlink, attrs);
- devlink_put(devlink);
- return reporter;
-}
-
-void
-devlink_health_reporter_state_update(struct devlink_health_reporter *reporter,
- enum devlink_health_reporter_state state)
-{
- if (WARN_ON(state != DEVLINK_HEALTH_REPORTER_STATE_HEALTHY &&
- state != DEVLINK_HEALTH_REPORTER_STATE_ERROR))
- return;
-
- if (reporter->health_state == state)
- return;
-
- reporter->health_state = state;
- trace_devlink_health_reporter_state_update(reporter->devlink,
- reporter->ops->name, state);
- devlink_recover_notify(reporter, DEVLINK_CMD_HEALTH_REPORTER_RECOVER);
-}
-EXPORT_SYMBOL_GPL(devlink_health_reporter_state_update);
-
-static int devlink_nl_cmd_health_reporter_get_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- struct sk_buff *msg;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg) {
- err = -ENOMEM;
- goto out;
- }
-
- err = devlink_nl_health_reporter_fill(msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- info->snd_portid, info->snd_seq,
- 0);
- if (err) {
- nlmsg_free(msg);
- goto out;
- }
-
- err = genlmsg_reply(msg, info);
-out:
- devlink_health_reporter_put(reporter);
- return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
-{
- struct devlink_health_reporter *reporter;
- unsigned long index, port_index;
- struct devlink_port *port;
- struct devlink *devlink;
- int start = cb->args[0];
- int idx = 0;
- int err;
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- mutex_lock(&devlink->reporters_lock);
- list_for_each_entry(reporter, &devlink->reporter_list,
- list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_health_reporter_fill(
- msg, reporter, DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- mutex_unlock(&devlink->reporters_lock);
- devlink_put(devlink);
- goto out;
- }
- idx++;
- }
- mutex_unlock(&devlink->reporters_lock);
- devlink_put(devlink);
- }
-
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- xa_for_each(&devlink->ports, port_index, port) {
- mutex_lock(&port->reporters_lock);
- list_for_each_entry(reporter, &port->reporter_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_health_reporter_fill(
- msg, reporter,
- DEVLINK_CMD_HEALTH_REPORTER_GET,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq, NLM_F_MULTI);
- if (err) {
- mutex_unlock(&port->reporters_lock);
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
- idx++;
- }
- mutex_unlock(&port->reporters_lock);
- }
- devl_unlock(devlink);
- devlink_put(devlink);
- }
-out:
- cb->args[0] = idx;
- return msg->len;
-}
-
-static int
-devlink_nl_cmd_health_reporter_set_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->recover &&
- (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] ||
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])) {
- err = -EOPNOTSUPP;
- goto out;
- }
- if (!reporter->ops->dump &&
- info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]) {
- err = -EOPNOTSUPP;
- goto out;
- }
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD])
- reporter->graceful_period =
- nla_get_u64(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD]);
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER])
- reporter->auto_recover =
- nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER]);
-
- if (info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP])
- reporter->auto_dump =
- nla_get_u8(info->attrs[DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP]);
-
- devlink_health_reporter_put(reporter);
- return 0;
-out:
- devlink_health_reporter_put(reporter);
- return err;
-}
-
-static int devlink_nl_cmd_health_reporter_recover_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- err = devlink_health_reporter_recover(reporter, NULL, info->extack);
-
- devlink_health_reporter_put(reporter);
- return err;
-}
-
-static int devlink_nl_cmd_health_reporter_diagnose_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- struct devlink_fmsg *fmsg;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->diagnose) {
- devlink_health_reporter_put(reporter);
- return -EOPNOTSUPP;
- }
-
- fmsg = devlink_fmsg_alloc();
- if (!fmsg) {
- devlink_health_reporter_put(reporter);
- return -ENOMEM;
- }
-
- err = devlink_fmsg_obj_nest_start(fmsg);
- if (err)
- goto out;
-
- err = reporter->ops->diagnose(reporter, fmsg, info->extack);
- if (err)
- goto out;
-
- err = devlink_fmsg_obj_nest_end(fmsg);
- if (err)
- goto out;
-
- err = devlink_fmsg_snd(fmsg, info,
- DEVLINK_CMD_HEALTH_REPORTER_DIAGNOSE, 0);
-
-out:
- devlink_fmsg_free(fmsg);
- devlink_health_reporter_put(reporter);
- return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_get_dumpit(struct sk_buff *skb,
- struct netlink_callback *cb)
-{
- struct devlink_health_reporter *reporter;
- u64 start = cb->args[0];
- int err;
-
- reporter = devlink_health_reporter_get_from_cb(cb);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->dump) {
- err = -EOPNOTSUPP;
- goto out;
- }
- mutex_lock(&reporter->dump_lock);
- if (!start) {
- err = devlink_health_do_dump(reporter, NULL, cb->extack);
- if (err)
- goto unlock;
- cb->args[1] = reporter->dump_ts;
- }
- if (!reporter->dump_fmsg || cb->args[1] != reporter->dump_ts) {
- NL_SET_ERR_MSG_MOD(cb->extack, "Dump trampled, please retry");
- err = -EAGAIN;
- goto unlock;
- }
-
- err = devlink_fmsg_dumpit(reporter->dump_fmsg, skb, cb,
- DEVLINK_CMD_HEALTH_REPORTER_DUMP_GET);
-unlock:
- mutex_unlock(&reporter->dump_lock);
-out:
- devlink_health_reporter_put(reporter);
- return err;
-}
-
-static int
-devlink_nl_cmd_health_reporter_dump_clear_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->dump) {
- devlink_health_reporter_put(reporter);
- return -EOPNOTSUPP;
- }
-
- mutex_lock(&reporter->dump_lock);
- devlink_health_dump_clear(reporter);
- mutex_unlock(&reporter->dump_lock);
- devlink_health_reporter_put(reporter);
- return 0;
-}
-
-static int devlink_nl_cmd_health_reporter_test_doit(struct sk_buff *skb,
- struct genl_info *info)
-{
- struct devlink *devlink = info->user_ptr[0];
- struct devlink_health_reporter *reporter;
- int err;
-
- reporter = devlink_health_reporter_get_from_info(devlink, info);
- if (!reporter)
- return -EINVAL;
-
- if (!reporter->ops->test) {
- devlink_health_reporter_put(reporter);
- return -EOPNOTSUPP;
- }
-
- err = reporter->ops->test(reporter, info->extack);
-
- devlink_health_reporter_put(reporter);
- return err;
-}
-
struct devlink_stats {
u64_stats_t rx_bytes;
u64_stats_t rx_packets;
@@ -8793,7 +5682,7 @@ static int devlink_nl_cmd_trap_get_doit(struct sk_buff *skb,
trap_item = devlink_trap_item_get_from_info(devlink, info);
if (!trap_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
return -ENOENT;
}
@@ -8814,43 +5703,39 @@ err_trap_fill:
return err;
}
-static int devlink_nl_cmd_trap_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_get_dump_one(struct sk_buff *msg, struct devlink *devlink,
+ struct netlink_callback *cb)
{
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_item *trap_item;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(trap_item, &devlink->trap_list, list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_fill(msg, devlink, trap_item,
- DEVLINK_CMD_TRAP_NEW,
- NETLINK_CB(cb->skb).portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(trap_item, &devlink->trap_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ err = devlink_nl_trap_fill(msg, devlink, trap_item,
+ DEVLINK_CMD_TRAP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_get = {
+ .dump_one = devlink_nl_cmd_trap_get_dump_one,
+};
+
static int __devlink_trap_action_set(struct devlink *devlink,
struct devlink_trap_item *trap_item,
enum devlink_trap_action trap_action,
@@ -8860,7 +5745,7 @@ static int __devlink_trap_action_set(struct devlink *devlink,
if (trap_item->action != trap_action &&
trap_item->trap->type != DEVLINK_TRAP_TYPE_DROP) {
- NL_SET_ERR_MSG_MOD(extack, "Cannot change action of non-drop traps. Skipping");
+ NL_SET_ERR_MSG(extack, "Cannot change action of non-drop traps. Skipping");
return 0;
}
@@ -8886,7 +5771,7 @@ static int devlink_trap_action_set(struct devlink *devlink,
err = devlink_trap_action_get_from_info(info, &trap_action);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
return -EINVAL;
}
@@ -8906,7 +5791,7 @@ static int devlink_nl_cmd_trap_set_doit(struct sk_buff *skb,
trap_item = devlink_trap_item_get_from_info(devlink, info);
if (!trap_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap");
return -ENOENT;
}
@@ -9008,7 +5893,7 @@ static int devlink_nl_cmd_trap_group_get_doit(struct sk_buff *skb,
group_item = devlink_trap_group_item_get_from_info(devlink, info);
if (!group_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
return -ENOENT;
}
@@ -9029,46 +5914,41 @@ err_trap_group_fill:
return err;
}
-static int devlink_nl_cmd_trap_group_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_group_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- enum devlink_command cmd = DEVLINK_CMD_TRAP_GROUP_NEW;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_group_item *group_item;
- u32 portid = NETLINK_CB(cb->skb).portid;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(group_item, &devlink->trap_group_list,
- list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_group_fill(msg, devlink,
- group_item, cmd,
- portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+
+ list_for_each_entry(group_item, &devlink->trap_group_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ err = devlink_nl_trap_group_fill(msg, devlink, group_item,
+ DEVLINK_CMD_TRAP_GROUP_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
+ }
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_group_get = {
+ .dump_one = devlink_nl_cmd_trap_group_get_dump_one,
+};
+
static int
__devlink_trap_group_action_set(struct devlink *devlink,
struct devlink_trap_group_item *group_item,
@@ -9122,7 +6002,7 @@ devlink_trap_group_action_set(struct devlink *devlink,
err = devlink_trap_action_get_from_info(info, &trap_action);
if (err) {
- NL_SET_ERR_MSG_MOD(info->extack, "Invalid trap action");
+ NL_SET_ERR_MSG(info->extack, "Invalid trap action");
return -EINVAL;
}
@@ -9144,6 +6024,7 @@ static int devlink_trap_group_set(struct devlink *devlink,
struct netlink_ext_ack *extack = info->extack;
const struct devlink_trap_policer *policer;
struct nlattr **attrs = info->attrs;
+ u32 policer_id;
int err;
if (!attrs[DEVLINK_ATTR_TRAP_POLICER_ID])
@@ -9152,17 +6033,11 @@ static int devlink_trap_group_set(struct devlink *devlink,
if (!devlink->ops->trap_group_set)
return -EOPNOTSUPP;
- policer_item = group_item->policer_item;
- if (attrs[DEVLINK_ATTR_TRAP_POLICER_ID]) {
- u32 policer_id;
-
- policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
- policer_item = devlink_trap_policer_item_lookup(devlink,
- policer_id);
- if (policer_id && !policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
- return -ENOENT;
- }
+ policer_id = nla_get_u32(attrs[DEVLINK_ATTR_TRAP_POLICER_ID]);
+ policer_item = devlink_trap_policer_item_lookup(devlink, policer_id);
+ if (policer_id && !policer_item) {
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
+ return -ENOENT;
}
policer = policer_item ? policer_item->policer : NULL;
@@ -9190,7 +6065,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
group_item = devlink_trap_group_item_get_from_info(devlink, info);
if (!group_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap group");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap group");
return -ENOENT;
}
@@ -9207,7 +6082,7 @@ static int devlink_nl_cmd_trap_group_set_doit(struct sk_buff *skb,
err_trap_group_set:
if (modified)
- NL_SET_ERR_MSG_MOD(extack, "Trap group set failed, but some changes were committed already");
+ NL_SET_ERR_MSG(extack, "Trap group set failed, but some changes were committed already");
return err;
}
@@ -9312,7 +6187,7 @@ static int devlink_nl_cmd_trap_policer_get_doit(struct sk_buff *skb,
policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
if (!policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
return -ENOENT;
}
@@ -9333,46 +6208,40 @@ err_trap_policer_fill:
return err;
}
-static int devlink_nl_cmd_trap_policer_get_dumpit(struct sk_buff *msg,
- struct netlink_callback *cb)
+static int
+devlink_nl_cmd_trap_policer_get_dump_one(struct sk_buff *msg,
+ struct devlink *devlink,
+ struct netlink_callback *cb)
{
- enum devlink_command cmd = DEVLINK_CMD_TRAP_POLICER_NEW;
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
struct devlink_trap_policer_item *policer_item;
- u32 portid = NETLINK_CB(cb->skb).portid;
- struct devlink *devlink;
- int start = cb->args[0];
- unsigned long index;
int idx = 0;
- int err;
+ int err = 0;
- devlinks_xa_for_each_registered_get(sock_net(msg->sk), index, devlink) {
- devl_lock(devlink);
- list_for_each_entry(policer_item, &devlink->trap_policer_list,
- list) {
- if (idx < start) {
- idx++;
- continue;
- }
- err = devlink_nl_trap_policer_fill(msg, devlink,
- policer_item, cmd,
- portid,
- cb->nlh->nlmsg_seq,
- NLM_F_MULTI);
- if (err) {
- devl_unlock(devlink);
- devlink_put(devlink);
- goto out;
- }
+ list_for_each_entry(policer_item, &devlink->trap_policer_list, list) {
+ if (idx < state->idx) {
idx++;
+ continue;
+ }
+ err = devlink_nl_trap_policer_fill(msg, devlink, policer_item,
+ DEVLINK_CMD_TRAP_POLICER_NEW,
+ NETLINK_CB(cb->skb).portid,
+ cb->nlh->nlmsg_seq,
+ NLM_F_MULTI);
+ if (err) {
+ state->idx = idx;
+ break;
}
- devl_unlock(devlink);
- devlink_put(devlink);
+ idx++;
}
-out:
- cb->args[0] = idx;
- return msg->len;
+
+ return err;
}
+const struct devlink_cmd devl_cmd_trap_policer_get = {
+ .dump_one = devlink_nl_cmd_trap_policer_get_dump_one,
+};
+
static int
devlink_trap_policer_set(struct devlink *devlink,
struct devlink_trap_policer_item *policer_item,
@@ -9393,22 +6262,22 @@ devlink_trap_policer_set(struct devlink *devlink,
burst = nla_get_u64(attrs[DEVLINK_ATTR_TRAP_POLICER_BURST]);
if (rate < policer_item->policer->min_rate) {
- NL_SET_ERR_MSG_MOD(extack, "Policer rate lower than limit");
+ NL_SET_ERR_MSG(extack, "Policer rate lower than limit");
return -EINVAL;
}
if (rate > policer_item->policer->max_rate) {
- NL_SET_ERR_MSG_MOD(extack, "Policer rate higher than limit");
+ NL_SET_ERR_MSG(extack, "Policer rate higher than limit");
return -EINVAL;
}
if (burst < policer_item->policer->min_burst) {
- NL_SET_ERR_MSG_MOD(extack, "Policer burst size lower than limit");
+ NL_SET_ERR_MSG(extack, "Policer burst size lower than limit");
return -EINVAL;
}
if (burst > policer_item->policer->max_burst) {
- NL_SET_ERR_MSG_MOD(extack, "Policer burst size higher than limit");
+ NL_SET_ERR_MSG(extack, "Policer burst size higher than limit");
return -EINVAL;
}
@@ -9438,95 +6307,26 @@ static int devlink_nl_cmd_trap_policer_set_doit(struct sk_buff *skb,
policer_item = devlink_trap_policer_item_get_from_info(devlink, info);
if (!policer_item) {
- NL_SET_ERR_MSG_MOD(extack, "Device did not register this trap policer");
+ NL_SET_ERR_MSG(extack, "Device did not register this trap policer");
return -ENOENT;
}
return devlink_trap_policer_set(devlink, policer_item, info);
}
-static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
- [DEVLINK_ATTR_UNSPEC] = { .strict_start_type =
- DEVLINK_ATTR_TRAP_POLICER_ID },
- [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
- DEVLINK_PORT_TYPE_IB),
- [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
- [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
- DEVLINK_ESWITCH_MODE_SWITCHDEV),
- [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
- [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
- [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
- [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
- [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
- [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
- [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
- NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
- [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
- [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
- [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
- [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
- [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_ACTION_MAX),
- [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
- [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
- [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
- [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
- [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
- [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
- [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
- [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
- [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
- [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
-};
-
-static const struct genl_small_ops devlink_nl_ops[] = {
+const struct genl_small_ops devlink_nl_ops[56] = {
{
.cmd = DEVLINK_CMD_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_get_doit,
- .dumpit = devlink_nl_cmd_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_PORT_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_port_get_doit,
- .dumpit = devlink_nl_cmd_port_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9540,7 +6340,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_RATE_GET,
.doit = devlink_nl_cmd_rate_get_doit,
- .dumpit = devlink_nl_cmd_rate_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_RATE,
/* can be retrieved by unprivileged users */
},
@@ -9588,7 +6388,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_LINECARD_GET,
.doit = devlink_nl_cmd_linecard_get_doit,
- .dumpit = devlink_nl_cmd_linecard_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_LINECARD,
/* can be retrieved by unprivileged users */
},
@@ -9602,14 +6402,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_get_doit,
- .dumpit = devlink_nl_cmd_sb_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_SB_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_pool_get_doit,
- .dumpit = devlink_nl_cmd_sb_pool_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9622,7 +6422,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_PORT_POOL_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_port_pool_get_doit,
- .dumpit = devlink_nl_cmd_sb_port_pool_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9637,7 +6437,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_SB_TC_POOL_BIND_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_sb_tc_pool_bind_get_doit,
- .dumpit = devlink_nl_cmd_sb_tc_pool_bind_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9718,7 +6518,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_PARAM_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_param_get_doit,
- .dumpit = devlink_nl_cmd_param_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9746,7 +6546,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_REGION_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_region_get_doit,
- .dumpit = devlink_nl_cmd_region_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.flags = GENL_ADMIN_PERM,
},
{
@@ -9772,14 +6572,14 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.cmd = DEVLINK_CMD_INFO_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_info_get_doit,
- .dumpit = devlink_nl_cmd_info_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
.cmd = DEVLINK_CMD_HEALTH_REPORTER_GET,
.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
.doit = devlink_nl_cmd_health_reporter_get_doit,
- .dumpit = devlink_nl_cmd_health_reporter_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
.internal_flags = DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT,
/* can be retrieved by unprivileged users */
},
@@ -9834,7 +6634,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_GET,
.doit = devlink_nl_cmd_trap_get_doit,
- .dumpit = devlink_nl_cmd_trap_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9845,7 +6645,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_GROUP_GET,
.doit = devlink_nl_cmd_trap_group_get_doit,
- .dumpit = devlink_nl_cmd_trap_group_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9856,7 +6656,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_TRAP_POLICER_GET,
.doit = devlink_nl_cmd_trap_policer_get_doit,
- .dumpit = devlink_nl_cmd_trap_policer_get_dumpit,
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9867,7 +6667,7 @@ static const struct genl_small_ops devlink_nl_ops[] = {
{
.cmd = DEVLINK_CMD_SELFTESTS_GET,
.doit = devlink_nl_cmd_selftests_get_doit,
- .dumpit = devlink_nl_cmd_selftests_get_dumpit
+ .dumpit = devlink_nl_instance_iter_dumpit,
/* can be retrieved by unprivileged users */
},
{
@@ -9875,148 +6675,9 @@ static const struct genl_small_ops devlink_nl_ops[] = {
.doit = devlink_nl_cmd_selftests_run,
.flags = GENL_ADMIN_PERM,
},
+ /* -- No new ops here! Use split ops going forward! -- */
};
-static struct genl_family devlink_nl_family __ro_after_init = {
- .name = DEVLINK_GENL_NAME,
- .version = DEVLINK_GENL_VERSION,
- .maxattr = DEVLINK_ATTR_MAX,
- .policy = devlink_nl_policy,
- .netnsok = true,
- .parallel_ops = true,
- .pre_doit = devlink_nl_pre_doit,
- .post_doit = devlink_nl_post_doit,
- .module = THIS_MODULE,
- .small_ops = devlink_nl_ops,
- .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
- .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
- .mcgrps = devlink_nl_mcgrps,
- .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
-};
-
-static bool devlink_reload_actions_valid(const struct devlink_ops *ops)
-{
- const struct devlink_reload_combination *comb;
- int i;
-
- if (!devlink_reload_supported(ops)) {
- if (WARN_ON(ops->reload_actions))
- return false;
- return true;
- }
-
- if (WARN_ON(!ops->reload_actions ||
- ops->reload_actions & BIT(DEVLINK_RELOAD_ACTION_UNSPEC) ||
- ops->reload_actions >= BIT(__DEVLINK_RELOAD_ACTION_MAX)))
- return false;
-
- if (WARN_ON(ops->reload_limits & BIT(DEVLINK_RELOAD_LIMIT_UNSPEC) ||
- ops->reload_limits >= BIT(__DEVLINK_RELOAD_LIMIT_MAX)))
- return false;
-
- for (i = 0; i < ARRAY_SIZE(devlink_reload_invalid_combinations); i++) {
- comb = &devlink_reload_invalid_combinations[i];
- if (ops->reload_actions == BIT(comb->action) &&
- ops->reload_limits == BIT(comb->limit))
- return false;
- }
- return true;
-}
-
-/**
- * devlink_set_features - Set devlink supported features
- *
- * @devlink: devlink
- * @features: devlink support features
- *
- * This interface allows us to set reload ops separatelly from
- * the devlink_alloc.
- */
-void devlink_set_features(struct devlink *devlink, u64 features)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- WARN_ON(features & DEVLINK_F_RELOAD &&
- !devlink_reload_supported(devlink->ops));
- devlink->features = features;
-}
-EXPORT_SYMBOL_GPL(devlink_set_features);
-
-static int devlink_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr);
-
-/**
- * devlink_alloc_ns - Allocate new devlink instance resources
- * in specific namespace
- *
- * @ops: ops
- * @priv_size: size of user private data
- * @net: net namespace
- * @dev: parent device
- *
- * Allocate new devlink instance resources, including devlink index
- * and name.
- */
-struct devlink *devlink_alloc_ns(const struct devlink_ops *ops,
- size_t priv_size, struct net *net,
- struct device *dev)
-{
- struct devlink *devlink;
- static u32 last_id;
- int ret;
-
- WARN_ON(!ops || !dev);
- if (!devlink_reload_actions_valid(ops))
- return NULL;
-
- devlink = kzalloc(sizeof(*devlink) + priv_size, GFP_KERNEL);
- if (!devlink)
- return NULL;
-
- ret = xa_alloc_cyclic(&devlinks, &devlink->index, devlink, xa_limit_31b,
- &last_id, GFP_KERNEL);
- if (ret < 0)
- goto err_xa_alloc;
-
- devlink->netdevice_nb.notifier_call = devlink_netdevice_event;
- ret = register_netdevice_notifier_net(net, &devlink->netdevice_nb);
- if (ret)
- goto err_register_netdevice_notifier;
-
- devlink->dev = dev;
- devlink->ops = ops;
- xa_init_flags(&devlink->ports, XA_FLAGS_ALLOC);
- xa_init_flags(&devlink->snapshot_ids, XA_FLAGS_ALLOC);
- write_pnet(&devlink->_net, net);
- INIT_LIST_HEAD(&devlink->rate_list);
- INIT_LIST_HEAD(&devlink->linecard_list);
- INIT_LIST_HEAD(&devlink->sb_list);
- INIT_LIST_HEAD_RCU(&devlink->dpipe_table_list);
- INIT_LIST_HEAD(&devlink->resource_list);
- INIT_LIST_HEAD(&devlink->param_list);
- INIT_LIST_HEAD(&devlink->region_list);
- INIT_LIST_HEAD(&devlink->reporter_list);
- INIT_LIST_HEAD(&devlink->trap_list);
- INIT_LIST_HEAD(&devlink->trap_group_list);
- INIT_LIST_HEAD(&devlink->trap_policer_list);
- lockdep_register_key(&devlink->lock_key);
- mutex_init(&devlink->lock);
- lockdep_set_class(&devlink->lock, &devlink->lock_key);
- mutex_init(&devlink->reporters_lock);
- mutex_init(&devlink->linecards_lock);
- refcount_set(&devlink->refcount, 1);
- init_completion(&devlink->comp);
-
- return devlink;
-
-err_register_netdevice_notifier:
- xa_erase(&devlinks, devlink->index);
-err_xa_alloc:
- kfree(devlink);
- return NULL;
-}
-EXPORT_SYMBOL_GPL(devlink_alloc_ns);
-
static void
devlink_trap_policer_notify(struct devlink *devlink,
const struct devlink_trap_policer_item *policer_item,
@@ -10029,7 +6690,7 @@ static void devlink_trap_notify(struct devlink *devlink,
const struct devlink_trap_item *trap_item,
enum devlink_command cmd);
-static void devlink_notify_register(struct devlink *devlink)
+void devlink_notify_register(struct devlink *devlink)
{
struct devlink_trap_policer_item *policer_item;
struct devlink_trap_group_item *group_item;
@@ -10040,6 +6701,7 @@ static void devlink_notify_register(struct devlink *devlink)
struct devlink_rate *rate_node;
struct devlink_region *region;
unsigned long port_index;
+ unsigned long param_id;
devlink_notify(devlink, DEVLINK_CMD_NEW);
list_for_each_entry(linecard, &devlink->linecard_list, list)
@@ -10065,12 +6727,12 @@ static void devlink_notify_register(struct devlink *devlink)
list_for_each_entry(region, &devlink->region_list, list)
devlink_nl_region_notify(region, NULL, DEVLINK_CMD_REGION_NEW);
- list_for_each_entry(param_item, &devlink->param_list, list)
+ xa_for_each(&devlink->params, param_id, param_item)
devlink_param_notify(devlink, 0, param_item,
DEVLINK_CMD_PARAM_NEW);
}
-static void devlink_notify_unregister(struct devlink *devlink)
+void devlink_notify_unregister(struct devlink *devlink)
{
struct devlink_trap_policer_item *policer_item;
struct devlink_trap_group_item *group_item;
@@ -10080,8 +6742,9 @@ static void devlink_notify_unregister(struct devlink *devlink)
struct devlink_rate *rate_node;
struct devlink_region *region;
unsigned long port_index;
+ unsigned long param_id;
- list_for_each_entry_reverse(param_item, &devlink->param_list, list)
+ xa_for_each(&devlink->params, param_id, param_item)
devlink_param_notify(devlink, 0, param_item,
DEVLINK_CMD_PARAM_DEL);
@@ -10107,79 +6770,6 @@ static void devlink_notify_unregister(struct devlink *devlink)
devlink_notify(devlink, DEVLINK_CMD_DEL);
}
-/**
- * devlink_register - Register devlink instance
- *
- * @devlink: devlink
- */
-void devlink_register(struct devlink *devlink)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
- /* Make sure that we are in .probe() routine */
-
- xa_set_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- devlink_notify_register(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_register);
-
-/**
- * devlink_unregister - Unregister devlink instance
- *
- * @devlink: devlink
- */
-void devlink_unregister(struct devlink *devlink)
-{
- ASSERT_DEVLINK_REGISTERED(devlink);
- /* Make sure that we are in .remove() routine */
-
- xa_set_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
- devlink_put(devlink);
- wait_for_completion(&devlink->comp);
-
- devlink_notify_unregister(devlink);
- xa_clear_mark(&devlinks, devlink->index, DEVLINK_REGISTERED);
- xa_clear_mark(&devlinks, devlink->index, DEVLINK_UNREGISTERING);
-}
-EXPORT_SYMBOL_GPL(devlink_unregister);
-
-/**
- * devlink_free - Free devlink instance resources
- *
- * @devlink: devlink
- */
-void devlink_free(struct devlink *devlink)
-{
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- mutex_destroy(&devlink->linecards_lock);
- mutex_destroy(&devlink->reporters_lock);
- mutex_destroy(&devlink->lock);
- lockdep_unregister_key(&devlink->lock_key);
- WARN_ON(!list_empty(&devlink->trap_policer_list));
- WARN_ON(!list_empty(&devlink->trap_group_list));
- WARN_ON(!list_empty(&devlink->trap_list));
- WARN_ON(!list_empty(&devlink->reporter_list));
- WARN_ON(!list_empty(&devlink->region_list));
- WARN_ON(!list_empty(&devlink->param_list));
- WARN_ON(!list_empty(&devlink->resource_list));
- WARN_ON(!list_empty(&devlink->dpipe_table_list));
- WARN_ON(!list_empty(&devlink->sb_list));
- WARN_ON(!list_empty(&devlink->rate_list));
- WARN_ON(!list_empty(&devlink->linecard_list));
- WARN_ON(!xa_empty(&devlink->ports));
-
- xa_destroy(&devlink->snapshot_ids);
- xa_destroy(&devlink->ports);
-
- WARN_ON_ONCE(unregister_netdevice_notifier_net(devlink_net(devlink),
- &devlink->netdevice_nb));
-
- xa_erase(&devlinks, devlink->index);
-
- kfree(devlink);
-}
-EXPORT_SYMBOL_GPL(devlink_free);
-
static void devlink_port_type_warn(struct work_struct *work)
{
WARN(true, "Type was not set for devlink port.");
@@ -10279,12 +6869,9 @@ int devl_port_register(struct devlink *devlink,
devlink_port->index = port_index;
spin_lock_init(&devlink_port->type_lock);
INIT_LIST_HEAD(&devlink_port->reporter_list);
- mutex_init(&devlink_port->reporters_lock);
err = xa_insert(&devlink->ports, port_index, devlink_port, GFP_KERNEL);
- if (err) {
- mutex_destroy(&devlink_port->reporters_lock);
+ if (err)
return err;
- }
INIT_DELAYED_WORK(&devlink_port->type_warn_dw, &devlink_port_type_warn);
devlink_port_type_warn_schedule(devlink_port);
@@ -10335,7 +6922,6 @@ void devl_port_unregister(struct devlink_port *devlink_port)
devlink_port_notify(devlink_port, DEVLINK_CMD_PORT_DEL);
xa_erase(&devlink_port->devlink->ports, devlink_port->index);
WARN_ON(!list_empty(&devlink_port->reporter_list));
- mutex_destroy(&devlink_port->reporters_lock);
devlink_port->registered = false;
}
EXPORT_SYMBOL_GPL(devl_port_unregister);
@@ -10480,8 +7066,8 @@ void devlink_port_type_clear(struct devlink_port *devlink_port)
}
EXPORT_SYMBOL_GPL(devlink_port_type_clear);
-static int devlink_netdevice_event(struct notifier_block *nb,
- unsigned long event, void *ptr)
+int devlink_port_netdevice_event(struct notifier_block *nb,
+ unsigned long event, void *ptr)
{
struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
struct devlink_port *devlink_port = netdev->devlink_port;
@@ -10503,6 +7089,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
break;
case NETDEV_REGISTER:
case NETDEV_CHANGENAME:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Set the netdev on top of previously set type. Note this
* event happens also during net namespace change so here
* we take into account netdev pointer appearing in this
@@ -10512,6 +7100,8 @@ static int devlink_netdevice_event(struct notifier_block *nb,
netdev);
break;
case NETDEV_UNREGISTER:
+ if (devlink_net(devlink) != dev_net(netdev))
+ return NOTIFY_OK;
/* Clear netdev pointer, but not the type. This event happens
* also during net namespace change so we need to clear
* pointer to netdev that is going to another net namespace.
@@ -10911,7 +7501,7 @@ static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
}
/**
- * devlink_linecard_create - Create devlink linecard
+ * devl_linecard_create - Create devlink linecard
*
* @devlink: devlink
* @linecard_index: driver-specific numerical identifier of the linecard
@@ -10924,8 +7514,8 @@ static void devlink_linecard_types_fini(struct devlink_linecard *linecard)
* Return: Line card structure or an ERR_PTR() encoded error code.
*/
struct devlink_linecard *
-devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
- const struct devlink_linecard_ops *ops, void *priv)
+devl_linecard_create(struct devlink *devlink, unsigned int linecard_index,
+ const struct devlink_linecard_ops *ops, void *priv)
{
struct devlink_linecard *linecard;
int err;
@@ -10934,17 +7524,12 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
!ops->types_count || !ops->types_get))
return ERR_PTR(-EINVAL);
- mutex_lock(&devlink->linecards_lock);
- if (devlink_linecard_index_exists(devlink, linecard_index)) {
- mutex_unlock(&devlink->linecards_lock);
+ if (devlink_linecard_index_exists(devlink, linecard_index))
return ERR_PTR(-EEXIST);
- }
linecard = kzalloc(sizeof(*linecard), GFP_KERNEL);
- if (!linecard) {
- mutex_unlock(&devlink->linecards_lock);
+ if (!linecard)
return ERR_PTR(-ENOMEM);
- }
linecard->devlink = devlink;
linecard->index = linecard_index;
@@ -10957,35 +7542,29 @@ devlink_linecard_create(struct devlink *devlink, unsigned int linecard_index,
if (err) {
mutex_destroy(&linecard->state_lock);
kfree(linecard);
- mutex_unlock(&devlink->linecards_lock);
return ERR_PTR(err);
}
list_add_tail(&linecard->list, &devlink->linecard_list);
- refcount_set(&linecard->refcount, 1);
- mutex_unlock(&devlink->linecards_lock);
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_NEW);
return linecard;
}
-EXPORT_SYMBOL_GPL(devlink_linecard_create);
+EXPORT_SYMBOL_GPL(devl_linecard_create);
/**
- * devlink_linecard_destroy - Destroy devlink linecard
+ * devl_linecard_destroy - Destroy devlink linecard
*
* @linecard: devlink linecard
*/
-void devlink_linecard_destroy(struct devlink_linecard *linecard)
+void devl_linecard_destroy(struct devlink_linecard *linecard)
{
- struct devlink *devlink = linecard->devlink;
-
devlink_linecard_notify(linecard, DEVLINK_CMD_LINECARD_DEL);
- mutex_lock(&devlink->linecards_lock);
list_del(&linecard->list);
devlink_linecard_types_fini(linecard);
- mutex_unlock(&devlink->linecards_lock);
- devlink_linecard_put(linecard);
+ mutex_destroy(&linecard->state_lock);
+ kfree(linecard);
}
-EXPORT_SYMBOL_GPL(devlink_linecard_destroy);
+EXPORT_SYMBOL_GPL(devl_linecard_destroy);
/**
* devlink_linecard_provision_set - Set provisioning on linecard
@@ -11588,8 +8167,53 @@ static int devlink_param_verify(const struct devlink_param *param)
return devlink_param_driver_verify(param);
}
+static int devlink_param_register(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+ int err;
+
+ WARN_ON(devlink_param_verify(param));
+ WARN_ON(devlink_param_find_by_name(&devlink->params, param->name));
+
+ if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
+ WARN_ON(param->get || param->set);
+ else
+ WARN_ON(!param->get || !param->set);
+
+ param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
+ if (!param_item)
+ return -ENOMEM;
+
+ param_item->param = param;
+
+ err = xa_insert(&devlink->params, param->id, param_item, GFP_KERNEL);
+ if (err)
+ goto err_xa_insert;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+ return 0;
+
+err_xa_insert:
+ kfree(param_item);
+ return err;
+}
+
+static void devlink_param_unregister(struct devlink *devlink,
+ const struct devlink_param *param)
+{
+ struct devlink_param_item *param_item;
+
+ param_item = devlink_param_find_by_id(&devlink->params, param->id);
+ if (WARN_ON(!param_item))
+ return;
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_DEL);
+ xa_erase(&devlink->params, param->id);
+ kfree(param_item);
+}
+
/**
- * devlink_params_register - register configuration parameters
+ * devl_params_register - register configuration parameters
*
* @devlink: devlink
* @params: configuration parameters array
@@ -11597,14 +8221,14 @@ static int devlink_param_verify(const struct devlink_param *param)
*
* Register the configuration parameters supported by the driver.
*/
-int devlink_params_register(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
+int devl_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
const struct devlink_param *param = params;
int i, err;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ lockdep_assert_held(&devlink->lock);
for (i = 0; i < params_count; i++, param++) {
err = devlink_param_register(devlink, param);
@@ -11621,124 +8245,103 @@ rollback:
devlink_param_unregister(devlink, param);
return err;
}
+EXPORT_SYMBOL_GPL(devl_params_register);
+
+int devlink_params_register(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
+{
+ int err;
+
+ devl_lock(devlink);
+ err = devl_params_register(devlink, params, params_count);
+ devl_unlock(devlink);
+ return err;
+}
EXPORT_SYMBOL_GPL(devlink_params_register);
/**
- * devlink_params_unregister - unregister configuration parameters
+ * devl_params_unregister - unregister configuration parameters
* @devlink: devlink
* @params: configuration parameters to unregister
* @params_count: number of parameters provided
*/
-void devlink_params_unregister(struct devlink *devlink,
- const struct devlink_param *params,
- size_t params_count)
+void devl_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
const struct devlink_param *param = params;
int i;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ lockdep_assert_held(&devlink->lock);
for (i = 0; i < params_count; i++, param++)
devlink_param_unregister(devlink, param);
}
-EXPORT_SYMBOL_GPL(devlink_params_unregister);
-
-/**
- * devlink_param_register - register one configuration parameter
- *
- * @devlink: devlink
- * @param: one configuration parameter
- *
- * Register the configuration parameter supported by the driver.
- * Return: returns 0 on successful registration or error code otherwise.
- */
-int devlink_param_register(struct devlink *devlink,
- const struct devlink_param *param)
-{
- struct devlink_param_item *param_item;
-
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- WARN_ON(devlink_param_verify(param));
- WARN_ON(devlink_param_find_by_name(&devlink->param_list, param->name));
-
- if (param->supported_cmodes == BIT(DEVLINK_PARAM_CMODE_DRIVERINIT))
- WARN_ON(param->get || param->set);
- else
- WARN_ON(!param->get || !param->set);
-
- param_item = kzalloc(sizeof(*param_item), GFP_KERNEL);
- if (!param_item)
- return -ENOMEM;
-
- param_item->param = param;
-
- list_add_tail(&param_item->list, &devlink->param_list);
- return 0;
-}
-EXPORT_SYMBOL_GPL(devlink_param_register);
+EXPORT_SYMBOL_GPL(devl_params_unregister);
-/**
- * devlink_param_unregister - unregister one configuration parameter
- * @devlink: devlink
- * @param: configuration parameter to unregister
- */
-void devlink_param_unregister(struct devlink *devlink,
- const struct devlink_param *param)
+void devlink_params_unregister(struct devlink *devlink,
+ const struct devlink_param *params,
+ size_t params_count)
{
- struct devlink_param_item *param_item;
-
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
-
- param_item =
- devlink_param_find_by_name(&devlink->param_list, param->name);
- WARN_ON(!param_item);
- list_del(&param_item->list);
- kfree(param_item);
+ devl_lock(devlink);
+ devl_params_unregister(devlink, params, params_count);
+ devl_unlock(devlink);
}
-EXPORT_SYMBOL_GPL(devlink_param_unregister);
+EXPORT_SYMBOL_GPL(devlink_params_unregister);
/**
- * devlink_param_driverinit_value_get - get configuration parameter
- * value for driver initializing
+ * devl_param_driverinit_value_get - get configuration parameter
+ * value for driver initializing
*
* @devlink: devlink
* @param_id: parameter ID
- * @init_val: value of parameter in driverinit configuration mode
+ * @val: pointer to store the value of parameter in driverinit
+ * configuration mode
*
* This function should be used by the driver to get driverinit
* configuration for initialization after reload command.
+ *
+ * Note that lockless call of this function relies on the
+ * driver to maintain following basic sane behavior:
+ * 1) Driver ensures a call to this function cannot race with
+ * registering/unregistering the parameter with the same parameter ID.
+ * 2) Driver ensures a call to this function cannot race with
+ * devl_param_driverinit_value_set() call with the same parameter ID.
+ * 3) Driver ensures a call to this function cannot race with
+ * reload operation.
+ * If the driver is not able to comply, it has to take the devlink->lock
+ * while calling this.
*/
-int devlink_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
- union devlink_param_value *init_val)
+int devl_param_driverinit_value_get(struct devlink *devlink, u32 param_id,
+ union devlink_param_value *val)
{
struct devlink_param_item *param_item;
- if (!devlink_reload_supported(devlink->ops))
+ if (WARN_ON(!devlink_reload_supported(devlink->ops)))
return -EOPNOTSUPP;
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
if (!param_item)
return -EINVAL;
- if (!param_item->driverinit_value_valid ||
- !devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
+ if (!param_item->driverinit_value_valid)
return -EOPNOTSUPP;
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(init_val->vstr, param_item->driverinit_value.vstr);
- else
- *init_val = param_item->driverinit_value;
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return -EOPNOTSUPP;
+
+ *val = param_item->driverinit_value;
return 0;
}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_get);
/**
- * devlink_param_driverinit_value_set - set value of configuration
- * parameter for driverinit
- * configuration mode
+ * devl_param_driverinit_value_set - set value of configuration
+ * parameter for driverinit
+ * configuration mode
*
* @devlink: devlink
* @param_id: parameter ID
@@ -11747,34 +8350,48 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_get);
* This function should be used by the driver to set driverinit
* configuration mode default value.
*/
-int devlink_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
- union devlink_param_value init_val)
+void devl_param_driverinit_value_set(struct devlink *devlink, u32 param_id,
+ union devlink_param_value init_val)
{
struct devlink_param_item *param_item;
- ASSERT_DEVLINK_NOT_REGISTERED(devlink);
+ devl_assert_locked(devlink);
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
- if (!param_item)
- return -EINVAL;
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
+ if (WARN_ON(!param_item))
+ return;
- if (!devlink_param_cmode_is_supported(param_item->param,
- DEVLINK_PARAM_CMODE_DRIVERINIT))
- return -EOPNOTSUPP;
+ if (WARN_ON(!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT)))
+ return;
- if (param_item->param->type == DEVLINK_PARAM_TYPE_STRING)
- strcpy(param_item->driverinit_value.vstr, init_val.vstr);
- else
- param_item->driverinit_value = init_val;
+ param_item->driverinit_value = init_val;
param_item->driverinit_value_valid = true;
- return 0;
+
+ devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
+}
+EXPORT_SYMBOL_GPL(devl_param_driverinit_value_set);
+
+void devlink_params_driverinit_load_new(struct devlink *devlink)
+{
+ struct devlink_param_item *param_item;
+ unsigned long param_id;
+
+ xa_for_each(&devlink->params, param_id, param_item) {
+ if (!devlink_param_cmode_is_supported(param_item->param,
+ DEVLINK_PARAM_CMODE_DRIVERINIT) ||
+ !param_item->driverinit_value_new_valid)
+ continue;
+ param_item->driverinit_value = param_item->driverinit_value_new;
+ param_item->driverinit_value_valid = true;
+ param_item->driverinit_value_new_valid = false;
+ }
}
-EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
/**
- * devlink_param_value_changed - notify devlink on a parameter's value
- * change. Should be called by the driver
- * right after the change.
+ * devl_param_value_changed - notify devlink on a parameter's value
+ * change. Should be called by the driver
+ * right after the change.
*
* @devlink: devlink
* @param_id: parameter ID
@@ -11783,16 +8400,16 @@ EXPORT_SYMBOL_GPL(devlink_param_driverinit_value_set);
* change, excluding driverinit configuration mode.
* For driverinit configuration mode driver should use the function
*/
-void devlink_param_value_changed(struct devlink *devlink, u32 param_id)
+void devl_param_value_changed(struct devlink *devlink, u32 param_id)
{
struct devlink_param_item *param_item;
- param_item = devlink_param_find_by_id(&devlink->param_list, param_id);
+ param_item = devlink_param_find_by_id(&devlink->params, param_id);
WARN_ON(!param_item);
devlink_param_notify(devlink, 0, param_item, DEVLINK_CMD_PARAM_NEW);
}
-EXPORT_SYMBOL_GPL(devlink_param_value_changed);
+EXPORT_SYMBOL_GPL(devl_param_value_changed);
/**
* devl_region_create - create a new address region
@@ -12878,76 +9495,6 @@ devl_trap_policers_unregister(struct devlink *devlink,
}
EXPORT_SYMBOL_GPL(devl_trap_policers_unregister);
-static void __devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- struct devlink_info_req req = {};
- const struct nlattr *nlattr;
- struct sk_buff *msg;
- int rem, err;
-
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
- if (!msg)
- return;
-
- req.msg = msg;
- err = devlink->ops->info_get(devlink, &req, NULL);
- if (err)
- goto free_msg;
-
- nla_for_each_attr(nlattr, (void *)msg->data, msg->len, rem) {
- const struct nlattr *kv;
- int rem_kv;
-
- if (nla_type(nlattr) != DEVLINK_ATTR_INFO_VERSION_RUNNING)
- continue;
-
- nla_for_each_nested(kv, nlattr, rem_kv) {
- if (nla_type(kv) != DEVLINK_ATTR_INFO_VERSION_VALUE)
- continue;
-
- strlcat(buf, nla_data(kv), len);
- strlcat(buf, " ", len);
- }
- }
-free_msg:
- nlmsg_free(msg);
-}
-
-void devlink_compat_running_version(struct devlink *devlink,
- char *buf, size_t len)
-{
- if (!devlink->ops->info_get)
- return;
-
- devl_lock(devlink);
- __devlink_compat_running_version(devlink, buf, len);
- devl_unlock(devlink);
-}
-
-int devlink_compat_flash_update(struct devlink *devlink, const char *file_name)
-{
- struct devlink_flash_update_params params = {};
- int ret;
-
- if (!devlink->ops->flash_update)
- return -EOPNOTSUPP;
-
- ret = request_firmware(&params.fw, file_name, devlink->dev);
- if (ret)
- return ret;
-
- devl_lock(devlink);
- devlink_flash_update_begin_notify(devlink);
- ret = devlink->ops->flash_update(devlink, &params, NULL);
- devlink_flash_update_end_notify(devlink);
- devl_unlock(devlink);
-
- release_firmware(params.fw);
-
- return ret;
-}
-
int devlink_compat_phys_port_name_get(struct net_device *dev,
char *name, size_t len)
{
@@ -12983,47 +9530,3 @@ int devlink_compat_switch_id_get(struct net_device *dev,
return 0;
}
-
-static void __net_exit devlink_pernet_pre_exit(struct net *net)
-{
- struct devlink *devlink;
- u32 actions_performed;
- unsigned long index;
- int err;
-
- /* In case network namespace is getting destroyed, reload
- * all devlink instances from this namespace into init_net.
- */
- devlinks_xa_for_each_registered_get(net, index, devlink) {
- WARN_ON(!(devlink->features & DEVLINK_F_RELOAD));
- mutex_lock(&devlink->lock);
- err = devlink_reload(devlink, &init_net,
- DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
- DEVLINK_RELOAD_LIMIT_UNSPEC,
- &actions_performed, NULL);
- mutex_unlock(&devlink->lock);
- if (err && err != -EOPNOTSUPP)
- pr_warn("Failed to reload devlink instance into init_net\n");
- devlink_put(devlink);
- }
-}
-
-static struct pernet_operations devlink_pernet_ops __net_initdata = {
- .pre_exit = devlink_pernet_pre_exit,
-};
-
-static int __init devlink_init(void)
-{
- int err;
-
- err = genl_register_family(&devlink_nl_family);
- if (err)
- goto out;
- err = register_pernet_subsys(&devlink_pernet_ops);
-
-out:
- WARN_ON(err);
- return err;
-}
-
-subsys_initcall(devlink_init);
diff --git a/net/devlink/netlink.c b/net/devlink/netlink.c
new file mode 100644
index 000000000000..7a332eb70f70
--- /dev/null
+++ b/net/devlink/netlink.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2016 Mellanox Technologies. All rights reserved.
+ * Copyright (c) 2016 Jiri Pirko <jiri@mellanox.com>
+ */
+
+#include <net/genetlink.h>
+#include <net/sock.h>
+
+#include "devl_internal.h"
+
+static const struct genl_multicast_group devlink_nl_mcgrps[] = {
+ [DEVLINK_MCGRP_CONFIG] = { .name = DEVLINK_GENL_MCGRP_CONFIG_NAME },
+};
+
+static const struct nla_policy devlink_nl_policy[DEVLINK_ATTR_MAX + 1] = {
+ [DEVLINK_ATTR_UNSPEC] = { .strict_start_type =
+ DEVLINK_ATTR_TRAP_POLICER_ID },
+ [DEVLINK_ATTR_BUS_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DEV_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_PORT_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_TYPE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_PORT_TYPE_AUTO,
+ DEVLINK_PORT_TYPE_IB),
+ [DEVLINK_ATTR_PORT_SPLIT_COUNT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_SB_POOL_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_POOL_SIZE] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_POOL_THRESHOLD_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_SB_THRESHOLD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_SB_TC_INDEX] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_ESWITCH_MODE] = NLA_POLICY_RANGE(NLA_U16, DEVLINK_ESWITCH_MODE_LEGACY,
+ DEVLINK_ESWITCH_MODE_SWITCHDEV),
+ [DEVLINK_ATTR_ESWITCH_INLINE_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_ESWITCH_ENCAP_MODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_DPIPE_TABLE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_DPIPE_TABLE_COUNTERS_ENABLED] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_RESOURCE_ID] = { .type = NLA_U64},
+ [DEVLINK_ATTR_RESOURCE_SIZE] = { .type = NLA_U64},
+ [DEVLINK_ATTR_PARAM_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_PARAM_TYPE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_PARAM_VALUE_CMODE] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_REGION_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_REGION_SNAPSHOT_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_CHUNK_ADDR] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_REGION_CHUNK_LEN] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_HEALTH_REPORTER_GRACEFUL_PERIOD] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_RECOVER] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_FLASH_UPDATE_FILE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_COMPONENT] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_FLASH_UPDATE_OVERWRITE_MASK] =
+ NLA_POLICY_BITFIELD32(DEVLINK_SUPPORTED_FLASH_OVERWRITE_SECTIONS),
+ [DEVLINK_ATTR_TRAP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_TRAP_ACTION] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_GROUP_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_NETNS_PID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_FD] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_NETNS_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_HEALTH_REPORTER_AUTO_DUMP] = { .type = NLA_U8 },
+ [DEVLINK_ATTR_TRAP_POLICER_ID] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_TRAP_POLICER_RATE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_TRAP_POLICER_BURST] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_PORT_FUNCTION] = { .type = NLA_NESTED },
+ [DEVLINK_ATTR_RELOAD_ACTION] = NLA_POLICY_RANGE(NLA_U8, DEVLINK_RELOAD_ACTION_DRIVER_REINIT,
+ DEVLINK_RELOAD_ACTION_MAX),
+ [DEVLINK_ATTR_RELOAD_LIMITS] = NLA_POLICY_BITFIELD32(DEVLINK_RELOAD_LIMITS_VALID_MASK),
+ [DEVLINK_ATTR_PORT_FLAVOUR] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_PF_NUMBER] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_PORT_PCI_SF_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_PORT_CONTROLLER_NUMBER] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TYPE] = { .type = NLA_U16 },
+ [DEVLINK_ATTR_RATE_TX_SHARE] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_TX_MAX] = { .type = NLA_U64 },
+ [DEVLINK_ATTR_RATE_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_RATE_PARENT_NODE_NAME] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_LINECARD_INDEX] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_LINECARD_TYPE] = { .type = NLA_NUL_STRING },
+ [DEVLINK_ATTR_SELFTESTS] = { .type = NLA_NESTED },
+ [DEVLINK_ATTR_RATE_TX_PRIORITY] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_RATE_TX_WEIGHT] = { .type = NLA_U32 },
+ [DEVLINK_ATTR_REGION_DIRECT] = { .type = NLA_FLAG },
+};
+
+struct devlink *
+devlink_get_from_attrs_lock(struct net *net, struct nlattr **attrs)
+{
+ struct devlink *devlink;
+ unsigned long index;
+ char *busname;
+ char *devname;
+
+ if (!attrs[DEVLINK_ATTR_BUS_NAME] || !attrs[DEVLINK_ATTR_DEV_NAME])
+ return ERR_PTR(-EINVAL);
+
+ busname = nla_data(attrs[DEVLINK_ATTR_BUS_NAME]);
+ devname = nla_data(attrs[DEVLINK_ATTR_DEV_NAME]);
+
+ devlinks_xa_for_each_registered_get(net, index, devlink) {
+ devl_lock(devlink);
+ if (devl_is_registered(devlink) &&
+ strcmp(devlink->dev->bus->name, busname) == 0 &&
+ strcmp(dev_name(devlink->dev), devname) == 0)
+ return devlink;
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ }
+
+ return ERR_PTR(-ENODEV);
+}
+
+static int devlink_nl_pre_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink_linecard *linecard;
+ struct devlink_port *devlink_port;
+ struct devlink *devlink;
+ int err;
+
+ devlink = devlink_get_from_attrs_lock(genl_info_net(info), info->attrs);
+ if (IS_ERR(devlink))
+ return PTR_ERR(devlink);
+
+ info->user_ptr[0] = devlink;
+ if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (IS_ERR(devlink_port)) {
+ err = PTR_ERR(devlink_port);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_DEVLINK_OR_PORT) {
+ devlink_port = devlink_port_get_from_info(devlink, info);
+ if (!IS_ERR(devlink_port))
+ info->user_ptr[1] = devlink_port;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE) {
+ struct devlink_rate *devlink_rate;
+
+ devlink_rate = devlink_rate_get_from_info(devlink, info);
+ if (IS_ERR(devlink_rate)) {
+ err = PTR_ERR(devlink_rate);
+ goto unlock;
+ }
+ info->user_ptr[1] = devlink_rate;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_RATE_NODE) {
+ struct devlink_rate *rate_node;
+
+ rate_node = devlink_rate_node_get_from_info(devlink, info);
+ if (IS_ERR(rate_node)) {
+ err = PTR_ERR(rate_node);
+ goto unlock;
+ }
+ info->user_ptr[1] = rate_node;
+ } else if (ops->internal_flags & DEVLINK_NL_FLAG_NEED_LINECARD) {
+ linecard = devlink_linecard_get_from_info(devlink, info);
+ if (IS_ERR(linecard)) {
+ err = PTR_ERR(linecard);
+ goto unlock;
+ }
+ info->user_ptr[1] = linecard;
+ }
+ return 0;
+
+unlock:
+ devl_unlock(devlink);
+ devlink_put(devlink);
+ return err;
+}
+
+static void devlink_nl_post_doit(const struct genl_split_ops *ops,
+ struct sk_buff *skb, struct genl_info *info)
+{
+ struct devlink *devlink;
+
+ devlink = info->user_ptr[0];
+ devl_unlock(devlink);
+ devlink_put(devlink);
+}
+
+static const struct devlink_cmd *devl_cmds[] = {
+ [DEVLINK_CMD_GET] = &devl_cmd_get,
+ [DEVLINK_CMD_PORT_GET] = &devl_cmd_port_get,
+ [DEVLINK_CMD_SB_GET] = &devl_cmd_sb_get,
+ [DEVLINK_CMD_SB_POOL_GET] = &devl_cmd_sb_pool_get,
+ [DEVLINK_CMD_SB_PORT_POOL_GET] = &devl_cmd_sb_port_pool_get,
+ [DEVLINK_CMD_SB_TC_POOL_BIND_GET] = &devl_cmd_sb_tc_pool_bind_get,
+ [DEVLINK_CMD_PARAM_GET] = &devl_cmd_param_get,
+ [DEVLINK_CMD_REGION_GET] = &devl_cmd_region_get,
+ [DEVLINK_CMD_INFO_GET] = &devl_cmd_info_get,
+ [DEVLINK_CMD_HEALTH_REPORTER_GET] = &devl_cmd_health_reporter_get,
+ [DEVLINK_CMD_TRAP_GET] = &devl_cmd_trap_get,
+ [DEVLINK_CMD_TRAP_GROUP_GET] = &devl_cmd_trap_group_get,
+ [DEVLINK_CMD_TRAP_POLICER_GET] = &devl_cmd_trap_policer_get,
+ [DEVLINK_CMD_RATE_GET] = &devl_cmd_rate_get,
+ [DEVLINK_CMD_LINECARD_GET] = &devl_cmd_linecard_get,
+ [DEVLINK_CMD_SELFTESTS_GET] = &devl_cmd_selftests_get,
+};
+
+int devlink_nl_instance_iter_dumpit(struct sk_buff *msg,
+ struct netlink_callback *cb)
+{
+ const struct genl_dumpit_info *info = genl_dumpit_info(cb);
+ struct devlink_nl_dump_state *state = devlink_dump_state(cb);
+ const struct devlink_cmd *cmd;
+ struct devlink *devlink;
+ int err = 0;
+
+ cmd = devl_cmds[info->op.cmd];
+
+ while ((devlink = devlinks_xa_find_get(sock_net(msg->sk),
+ &state->instance))) {
+ devl_lock(devlink);
+
+ if (devl_is_registered(devlink))
+ err = cmd->dump_one(msg, devlink, cb);
+ else
+ err = 0;
+
+ devl_unlock(devlink);
+ devlink_put(devlink);
+
+ if (err)
+ break;
+
+ state->instance++;
+
+ /* restart sub-object walk for the next instance */
+ state->idx = 0;
+ }
+
+ if (err != -EMSGSIZE)
+ return err;
+ return msg->len;
+}
+
+struct genl_family devlink_nl_family __ro_after_init = {
+ .name = DEVLINK_GENL_NAME,
+ .version = DEVLINK_GENL_VERSION,
+ .maxattr = DEVLINK_ATTR_MAX,
+ .policy = devlink_nl_policy,
+ .netnsok = true,
+ .parallel_ops = true,
+ .pre_doit = devlink_nl_pre_doit,
+ .post_doit = devlink_nl_post_doit,
+ .module = THIS_MODULE,
+ .small_ops = devlink_nl_ops,
+ .n_small_ops = ARRAY_SIZE(devlink_nl_ops),
+ .resv_start_op = DEVLINK_CMD_SELFTESTS_RUN + 1,
+ .mcgrps = devlink_nl_mcgrps,
+ .n_mcgrps = ARRAY_SIZE(devlink_nl_mcgrps),
+};
diff --git a/net/dsa/master.c b/net/dsa/master.c
index 26d90140d271..22d3f16b0e6d 100644
--- a/net/dsa/master.c
+++ b/net/dsa/master.c
@@ -299,7 +299,7 @@ static ssize_t tagging_show(struct device *d, struct device_attribute *attr,
struct net_device *dev = to_net_dev(d);
struct dsa_port *cpu_dp = dev->dsa_ptr;
- return sprintf(buf, "%s\n",
+ return sysfs_emit(buf, "%s\n",
dsa_tag_protocol_to_str(cpu_dp->tag_ops));
}
@@ -464,9 +464,7 @@ int dsa_master_lag_setup(struct net_device *lag_dev, struct dsa_port *cpu_dp,
err = dsa_port_lag_join(cpu_dp, lag_dev, uinfo, extack);
if (err) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "CPU port failed to join LAG");
+ NL_SET_ERR_MSG_WEAK_MOD(extack, "CPU port failed to join LAG");
goto out_master_teardown;
}
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index aab79c355224..6957971c2db2 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1117,6 +1117,40 @@ static void dsa_slave_net_selftest(struct net_device *ndev,
net_selftest(ndev, etest, buf);
}
+static int dsa_slave_get_mm(struct net_device *dev,
+ struct ethtool_mm_state *state)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->get_mm)
+ return -EOPNOTSUPP;
+
+ return ds->ops->get_mm(ds, dp->index, state);
+}
+
+static int dsa_slave_set_mm(struct net_device *dev, struct ethtool_mm_cfg *cfg,
+ struct netlink_ext_ack *extack)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (!ds->ops->set_mm)
+ return -EOPNOTSUPP;
+
+ return ds->ops->set_mm(ds, dp->index, cfg, extack);
+}
+
+static void dsa_slave_get_mm_stats(struct net_device *dev,
+ struct ethtool_mm_stats *stats)
+{
+ struct dsa_port *dp = dsa_slave_to_port(dev);
+ struct dsa_switch *ds = dp->ds;
+
+ if (ds->ops->get_mm_stats)
+ ds->ops->get_mm_stats(ds, dp->index, stats);
+}
+
static void dsa_slave_get_wol(struct net_device *dev, struct ethtool_wolinfo *w)
{
struct dsa_port *dp = dsa_slave_to_port(dev);
@@ -2205,6 +2239,9 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
.set_rxnfc = dsa_slave_set_rxnfc,
.get_ts_info = dsa_slave_get_ts_info,
.self_test = dsa_slave_net_selftest,
+ .get_mm = dsa_slave_get_mm,
+ .set_mm = dsa_slave_set_mm,
+ .get_mm_stats = dsa_slave_get_mm_stats,
};
static const struct dcbnl_rtnl_ops __maybe_unused dsa_slave_dcbnl_ops = {
@@ -2655,9 +2692,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (!err)
dsa_bridge_mtu_normalization(dp);
if (err == -EOPNOTSUPP) {
- if (extack && !extack->_msg)
- NL_SET_ERR_MSG_MOD(extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2670,8 +2706,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
err = dsa_port_lag_join(dp, info->upper_dev,
info->upper_info, extack);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
@@ -2683,8 +2719,8 @@ static int dsa_slave_changeupper(struct net_device *dev,
if (info->linking) {
err = dsa_port_hsr_join(dp, info->upper_dev);
if (err == -EOPNOTSUPP) {
- NL_SET_ERR_MSG_MOD(info->info.extack,
- "Offloading not supported");
+ NL_SET_ERR_MSG_WEAK_MOD(extack,
+ "Offloading not supported");
err = 0;
}
err = notifier_from_errno(err);
diff --git a/net/dsa/tag_ksz.c b/net/dsa/tag_ksz.c
index 080e5c369f5b..0eb1c7784c3d 100644
--- a/net/dsa/tag_ksz.c
+++ b/net/dsa/tag_ksz.c
@@ -4,8 +4,10 @@
* Copyright (c) 2017 Microchip Technology
*/
+#include <linux/dsa/ksz_common.h>
#include <linux/etherdevice.h>
#include <linux/list.h>
+#include <linux/ptp_classify.h>
#include <net/dsa.h>
#include "tag.h"
@@ -16,9 +18,71 @@
#define LAN937X_NAME "lan937x"
/* Typically only one byte is used for tail tag. */
+#define KSZ_PTP_TAG_LEN 4
#define KSZ_EGRESS_TAG_LEN 1
#define KSZ_INGRESS_TAG_LEN 1
+#define KSZ_HWTS_EN 0
+
+struct ksz_tagger_private {
+ struct ksz_tagger_data data; /* Must be first */
+ unsigned long state;
+ struct kthread_worker *xmit_worker;
+};
+
+static struct ksz_tagger_private *
+ksz_tagger_private(struct dsa_switch *ds)
+{
+ return ds->tagger_data;
+}
+
+static void ksz_hwtstamp_set_state(struct dsa_switch *ds, bool on)
+{
+ struct ksz_tagger_private *priv = ksz_tagger_private(ds);
+
+ if (on)
+ set_bit(KSZ_HWTS_EN, &priv->state);
+ else
+ clear_bit(KSZ_HWTS_EN, &priv->state);
+}
+
+static void ksz_disconnect(struct dsa_switch *ds)
+{
+ struct ksz_tagger_private *priv = ds->tagger_data;
+
+ kthread_destroy_worker(priv->xmit_worker);
+ kfree(priv);
+ ds->tagger_data = NULL;
+}
+
+static int ksz_connect(struct dsa_switch *ds)
+{
+ struct ksz_tagger_data *tagger_data;
+ struct kthread_worker *xmit_worker;
+ struct ksz_tagger_private *priv;
+ int ret;
+
+ priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+ if (!priv)
+ return -ENOMEM;
+
+ xmit_worker = kthread_create_worker(0, "dsa%d:%d_xmit",
+ ds->dst->index, ds->index);
+ if (IS_ERR(xmit_worker)) {
+ ret = PTR_ERR(xmit_worker);
+ kfree(priv);
+ return ret;
+ }
+
+ priv->xmit_worker = xmit_worker;
+ /* Export functions for switch driver use */
+ tagger_data = &priv->data;
+ tagger_data->hwtstamp_set_state = ksz_hwtstamp_set_state;
+ ds->tagger_data = priv;
+
+ return 0;
+}
+
static struct sk_buff *ksz_common_rcv(struct sk_buff *skb,
struct net_device *dev,
unsigned int port, unsigned int len)
@@ -92,17 +156,20 @@ DSA_TAG_DRIVER(ksz8795_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
/*
- * For Ingress (Host -> KSZ9477), 2 bytes are added before FCS.
+ * For Ingress (Host -> KSZ9477), 2/6 bytes are added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|tag1(1byte)|
+ * FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if PTP is enabled in the Hardware)
* tag0 : Prioritization (not used now)
* tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x10=port5)
*
- * For Egress (KSZ9477 -> Host), 1 byte is added before FCS.
+ * For Egress (KSZ9477 -> Host), 1/5 bytes is added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if bit 7 of tag0 is set)
* tag0 : zero-based value represents port
* (eg, 0x00=port1, 0x02=port3, 0x06=port7)
*/
@@ -111,12 +178,100 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ8795, KSZ8795_NAME);
#define KSZ9477_PTP_TAG_LEN 4
#define KSZ9477_PTP_TAG_INDICATION 0x80
+#define KSZ9477_TAIL_TAG_PRIO GENMASK(8, 7)
#define KSZ9477_TAIL_TAG_OVERRIDE BIT(9)
#define KSZ9477_TAIL_TAG_LOOKUP BIT(10)
+static void ksz_rcv_timestamp(struct sk_buff *skb, u8 *tag)
+{
+ u8 *tstamp_raw = tag - KSZ_PTP_TAG_LEN;
+ ktime_t tstamp;
+
+ tstamp = ksz_decode_tstamp(get_unaligned_be32(tstamp_raw));
+ KSZ_SKB_CB(skb)->tstamp = tstamp;
+}
+
+/* Time stamp tag *needs* to be inserted if PTP is enabled in hardware.
+ * Regardless of Whether it is a PTP frame or not.
+ */
+static void ksz_xmit_timestamp(struct dsa_port *dp, struct sk_buff *skb)
+{
+ struct ksz_tagger_private *priv;
+ struct ptp_header *ptp_hdr;
+ unsigned int ptp_type;
+ u32 tstamp_raw = 0;
+ s64 correction;
+
+ priv = ksz_tagger_private(dp->ds);
+
+ if (!test_bit(KSZ_HWTS_EN, &priv->state))
+ return;
+
+ if (!KSZ_SKB_CB(skb)->update_correction)
+ goto output_tag;
+
+ ptp_type = KSZ_SKB_CB(skb)->ptp_type;
+
+ ptp_hdr = ptp_parse_header(skb, ptp_type);
+ if (!ptp_hdr)
+ goto output_tag;
+
+ correction = (s64)get_unaligned_be64(&ptp_hdr->correction);
+
+ if (correction < 0) {
+ struct timespec64 ts;
+
+ ts = ns_to_timespec64(-correction >> 16);
+ tstamp_raw = ((ts.tv_sec & 3) << 30) | ts.tv_nsec;
+
+ /* Set correction field to 0 and update UDP checksum */
+ ptp_header_update_correction(skb, ptp_type, ptp_hdr, 0);
+ }
+
+output_tag:
+ put_unaligned_be32(tstamp_raw, skb_put(skb, KSZ_PTP_TAG_LEN));
+}
+
+/* Defer transmit if waiting for egress time stamp is required. */
+static struct sk_buff *ksz_defer_xmit(struct dsa_port *dp, struct sk_buff *skb)
+{
+ struct ksz_tagger_data *tagger_data = ksz_tagger_data(dp->ds);
+ struct ksz_tagger_private *priv = ksz_tagger_private(dp->ds);
+ void (*xmit_work_fn)(struct kthread_work *work);
+ struct sk_buff *clone = KSZ_SKB_CB(skb)->clone;
+ struct ksz_deferred_xmit_work *xmit_work;
+ struct kthread_worker *xmit_worker;
+
+ if (!clone)
+ return skb; /* no deferred xmit for this packet */
+
+ xmit_work_fn = tagger_data->xmit_work_fn;
+ xmit_worker = priv->xmit_worker;
+
+ if (!xmit_work_fn || !xmit_worker)
+ return NULL;
+
+ xmit_work = kzalloc(sizeof(*xmit_work), GFP_ATOMIC);
+ if (!xmit_work)
+ return NULL;
+
+ kthread_init_work(&xmit_work->work, xmit_work_fn);
+ /* Increase refcount so the kfree_skb in dsa_slave_xmit
+ * won't really free the packet.
+ */
+ xmit_work->dp = dp;
+ xmit_work->skb = skb_get(skb);
+
+ kthread_queue_work(xmit_worker, &xmit_work->work);
+
+ return NULL;
+}
+
static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
__be16 *tag;
u8 *addr;
@@ -126,17 +281,21 @@ static struct sk_buff *ksz9477_xmit(struct sk_buff *skb,
return NULL;
/* Tag encoding */
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, KSZ9477_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
val = BIT(dp->index);
+ val |= FIELD_PREP(KSZ9477_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(addr))
val |= KSZ9477_TAIL_TAG_OVERRIDE;
*tag = cpu_to_be16(val);
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
@@ -147,8 +306,10 @@ static struct sk_buff *ksz9477_rcv(struct sk_buff *skb, struct net_device *dev)
unsigned int len = KSZ_EGRESS_TAG_LEN;
/* Extra 4-bytes PTP timestamp */
- if (tag[0] & KSZ9477_PTP_TAG_INDICATION)
- len += KSZ9477_PTP_TAG_LEN;
+ if (tag[0] & KSZ9477_PTP_TAG_INDICATION) {
+ ksz_rcv_timestamp(skb, tag);
+ len += KSZ_PTP_TAG_LEN;
+ }
return ksz_common_rcv(skb, dev, port, len);
}
@@ -158,18 +319,23 @@ static const struct dsa_device_ops ksz9477_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9477,
.xmit = ksz9477_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = KSZ9477_INGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = KSZ9477_INGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9477_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9477, KSZ9477_NAME);
+#define KSZ9893_TAIL_TAG_PRIO GENMASK(4, 3)
#define KSZ9893_TAIL_TAG_OVERRIDE BIT(5)
#define KSZ9893_TAIL_TAG_LOOKUP BIT(6)
static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
u8 *addr;
u8 *tag;
@@ -178,15 +344,19 @@ static struct sk_buff *ksz9893_xmit(struct sk_buff *skb,
return NULL;
/* Tag encoding */
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, KSZ_INGRESS_TAG_LEN);
addr = skb_mac_header(skb);
*tag = BIT(dp->index);
+ *tag |= FIELD_PREP(KSZ9893_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(addr))
*tag |= KSZ9893_TAIL_TAG_OVERRIDE;
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static const struct dsa_device_ops ksz9893_netdev_ops = {
@@ -194,23 +364,28 @@ static const struct dsa_device_ops ksz9893_netdev_ops = {
.proto = DSA_TAG_PROTO_KSZ9893,
.xmit = ksz9893_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = KSZ_INGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = KSZ_INGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(ksz9893_netdev_ops);
MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893, KSZ9893_NAME);
-/* For xmit, 2 bytes are added before FCS.
+/* For xmit, 2/6 bytes are added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|tag1(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|tag1(1byte)|
+ * FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if PTP is enabled in the Hardware)
* tag0 : represents tag override, lookup and valid
* tag1 : each bit represents port (eg, 0x01=port1, 0x02=port2, 0x80=port8)
*
- * For rcv, 1 byte is added before FCS.
+ * For rcv, 1/5 bytes is added before FCS.
* ---------------------------------------------------------------------------
- * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|tag0(1byte)|FCS(4bytes)
+ * DA(6bytes)|SA(6bytes)|....|Data(nbytes)|ts(4bytes)|tag0(1byte)|FCS(4bytes)
* ---------------------------------------------------------------------------
+ * ts : time stamp (Present only if bit 7 of tag0 is set)
* tag0 : zero-based value represents port
* (eg, 0x00=port1, 0x02=port3, 0x07=port8)
*/
@@ -219,11 +394,14 @@ MODULE_ALIAS_DSA_TAG_DRIVER(DSA_TAG_PROTO_KSZ9893, KSZ9893_NAME);
#define LAN937X_TAIL_TAG_BLOCKING_OVERRIDE BIT(11)
#define LAN937X_TAIL_TAG_LOOKUP BIT(12)
#define LAN937X_TAIL_TAG_VALID BIT(13)
+#define LAN937X_TAIL_TAG_PRIO GENMASK(10, 8)
#define LAN937X_TAIL_TAG_PORT_MASK 7
static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
struct net_device *dev)
{
+ u16 queue_mapping = skb_get_queue_mapping(skb);
+ u8 prio = netdev_txq_to_tc(dev, queue_mapping);
struct dsa_port *dp = dsa_slave_to_port(dev);
const struct ethhdr *hdr = eth_hdr(skb);
__be16 *tag;
@@ -232,10 +410,14 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
if (skb->ip_summed == CHECKSUM_PARTIAL && skb_checksum_help(skb))
return NULL;
+ ksz_xmit_timestamp(dp, skb);
+
tag = skb_put(skb, LAN937X_EGRESS_TAG_LEN);
val = BIT(dp->index);
+ val |= FIELD_PREP(LAN937X_TAIL_TAG_PRIO, prio);
+
if (is_link_local_ether_addr(hdr->h_dest))
val |= LAN937X_TAIL_TAG_BLOCKING_OVERRIDE;
@@ -244,7 +426,7 @@ static struct sk_buff *lan937x_xmit(struct sk_buff *skb,
put_unaligned_be16(val, tag);
- return skb;
+ return ksz_defer_xmit(dp, skb);
}
static const struct dsa_device_ops lan937x_netdev_ops = {
@@ -252,7 +434,9 @@ static const struct dsa_device_ops lan937x_netdev_ops = {
.proto = DSA_TAG_PROTO_LAN937X,
.xmit = lan937x_xmit,
.rcv = ksz9477_rcv,
- .needed_tailroom = LAN937X_EGRESS_TAG_LEN,
+ .connect = ksz_connect,
+ .disconnect = ksz_disconnect,
+ .needed_tailroom = LAN937X_EGRESS_TAG_LEN + KSZ_PTP_TAG_LEN,
};
DSA_TAG_DRIVER(lan937x_netdev_ops);
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 228f13df2e18..504f954a1b28 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -7,5 +7,5 @@ obj-$(CONFIG_ETHTOOL_NETLINK) += ethtool_nl.o
ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \
linkstate.o debug.o wol.o features.o privflags.o rings.o \
channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \
- tunnels.o fec.o eeprom.o stats.o phc_vclocks.o module.o \
- pse-pd.o
+ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \
+ module.o pse-pd.o plca.o mm.o
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index c7e37130647e..61c40e889a4d 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -86,18 +86,6 @@ static int channels_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_channels_request_ops = {
- .request_cmd = ETHTOOL_MSG_CHANNELS_GET,
- .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_CHANNELS_HEADER,
- .req_info_size = sizeof(struct channels_req_info),
- .reply_data_size = sizeof(struct channels_reply_data),
-
- .prepare_data = channels_prepare_data,
- .reply_size = channels_reply_size,
- .fill_reply = channels_fill_reply,
-};
-
/* CHANNELS_SET */
const struct nla_policy ethnl_channels_set_policy[] = {
@@ -109,36 +97,28 @@ const struct nla_policy ethnl_channels_set_policy[] = {
[ETHTOOL_A_CHANNELS_COMBINED_COUNT] = { .type = NLA_U32 },
};
-int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_channels_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_channels && ops->set_channels ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_channels(struct ethnl_req_info *req_info, struct genl_info *info)
{
unsigned int from_channel, old_total, i;
bool mod = false, mod_combined = false;
+ struct net_device *dev = req_info->dev;
struct ethtool_channels channels = {};
- struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
u32 err_attr, max_rxfh_in_use;
- const struct ethtool_ops *ops;
- struct net_device *dev;
u64 max_rxnfc_in_use;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_CHANNELS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_channels || !ops->set_channels)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_channels(dev, &channels);
+ dev->ethtool_ops->get_channels(dev, &channels);
old_total = channels.combined_count +
max(channels.rx_count, channels.tx_count);
@@ -151,9 +131,8 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
ethnl_update_u32(&channels.combined_count,
tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod_combined);
mod |= mod_combined;
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
/* ensure new channel counts are within limits */
if (channels.rx_count > channels.max_rx)
@@ -167,10 +146,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
else
err_attr = 0;
if (err_attr) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
"requested channel count exceeds maximum");
- goto out_ops;
+ return -EINVAL;
}
/* ensure there is at least one RX and one TX channel */
@@ -183,10 +161,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
if (err_attr) {
if (mod_combined)
err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
"requested channel counts would result in no RX or TX channel being configured");
- goto out_ops;
+ return -EINVAL;
}
/* ensure the new Rx count fits within the configured Rx flow
@@ -198,14 +175,12 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
ethtool_get_max_rxfh_channel(dev, &max_rxfh_in_use))
max_rxfh_in_use = 0;
if (channels.combined_count + channels.rx_count <= max_rxfh_in_use) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings");
- goto out_ops;
+ return -EINVAL;
}
if (channels.combined_count + channels.rx_count <= max_rxnfc_in_use) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing ntuple filter settings");
- goto out_ops;
+ return -EINVAL;
}
/* Disabling channels, query zero-copy AF_XDP sockets */
@@ -213,21 +188,26 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
min(channels.rx_count, channels.tx_count);
for (i = from_channel; i < old_total; i++)
if (xsk_get_pool_from_qid(dev, i)) {
- ret = -EINVAL;
GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_channels(dev, &channels);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_CHANNELS_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_channels_request_ops = {
+ .request_cmd = ETHTOOL_MSG_CHANNELS_GET,
+ .reply_cmd = ETHTOOL_MSG_CHANNELS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_CHANNELS_HEADER,
+ .req_info_size = sizeof(struct channels_req_info),
+ .reply_data_size = sizeof(struct channels_reply_data),
+
+ .prepare_data = channels_prepare_data,
+ .reply_size = channels_reply_size,
+ .fill_reply = channels_fill_reply,
+
+ .set_validate = ethnl_set_channels_validate,
+ .set = ethnl_set_channels,
+ .set_ntf_cmd = ETHTOOL_MSG_CHANNELS_NTF,
+};
diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c
index 487bdf345541..443e7e642c96 100644
--- a/net/ethtool/coalesce.c
+++ b/net/ethtool/coalesce.c
@@ -105,7 +105,10 @@ static int coalesce_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _TX_MAX_FRAMES_HIGH */
nla_total_size(sizeof(u32)) + /* _RATE_SAMPLE_INTERVAL */
nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_TX */
- nla_total_size(sizeof(u8)); /* _USE_CQE_MODE_RX */
+ nla_total_size(sizeof(u8)) + /* _USE_CQE_MODE_RX */
+ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_BYTES */
+ nla_total_size(sizeof(u32)) + /* _TX_AGGR_MAX_FRAMES */
+ nla_total_size(sizeof(u32)); /* _TX_AGGR_TIME_USECS */
}
static bool coalesce_put_u32(struct sk_buff *skb, u16 attr_type, u32 val,
@@ -180,24 +183,18 @@ static int coalesce_fill_reply(struct sk_buff *skb,
coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_TX,
kcoal->use_cqe_mode_tx, supported) ||
coalesce_put_bool(skb, ETHTOOL_A_COALESCE_USE_CQE_MODE_RX,
- kcoal->use_cqe_mode_rx, supported))
+ kcoal->use_cqe_mode_rx, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES,
+ kcoal->tx_aggr_max_bytes, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES,
+ kcoal->tx_aggr_max_frames, supported) ||
+ coalesce_put_u32(skb, ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS,
+ kcoal->tx_aggr_time_usecs, supported))
return -EMSGSIZE;
return 0;
}
-const struct ethnl_request_ops ethnl_coalesce_request_ops = {
- .request_cmd = ETHTOOL_MSG_COALESCE_GET,
- .reply_cmd = ETHTOOL_MSG_COALESCE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_COALESCE_HEADER,
- .req_info_size = sizeof(struct coalesce_req_info),
- .reply_data_size = sizeof(struct coalesce_reply_data),
-
- .prepare_data = coalesce_prepare_data,
- .reply_size = coalesce_reply_size,
- .fill_reply = coalesce_fill_reply,
-};
-
/* COALESCE_SET */
const struct nla_policy ethnl_coalesce_set_policy[] = {
@@ -227,51 +224,49 @@ const struct nla_policy ethnl_coalesce_set_policy[] = {
[ETHTOOL_A_COALESCE_RATE_SAMPLE_INTERVAL] = { .type = NLA_U32 },
[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX] = NLA_POLICY_MAX(NLA_U8, 1),
[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES] = { .type = NLA_U32 },
+ [ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS] = { .type = NLA_U32 },
};
-int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_coalesce_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct kernel_ethtool_coalesce kernel_coalesce = {};
- struct ethtool_coalesce coalesce = {};
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
u32 supported_params;
- bool mod = false;
- int ret;
u16 a;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_COALESCE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
if (!ops->get_coalesce || !ops->set_coalesce)
- goto out_dev;
+ return -EOPNOTSUPP;
/* make sure that only supported parameters are present */
supported_params = ops->supported_coalesce_params;
for (a = ETHTOOL_A_COALESCE_RX_USECS; a < __ETHTOOL_A_COALESCE_CNT; a++)
if (tb[a] && !(supported_params & attr_to_mask(a))) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[a],
"cannot modify an unsupported parameter");
- goto out_dev;
+ return -EINVAL;
}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
- info->extack);
+ return 1;
+}
+
+static int
+ethnl_set_coalesce(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct kernel_ethtool_coalesce kernel_coalesce = {};
+ struct net_device *dev = req_info->dev;
+ struct ethtool_coalesce coalesce = {};
+ struct nlattr **tb = info->attrs;
+ bool mod = false;
+ int ret;
+
+ ret = dev->ethtool_ops->get_coalesce(dev, &coalesce, &kernel_coalesce,
+ info->extack);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_u32(&coalesce.rx_coalesce_usecs,
tb[ETHTOOL_A_COALESCE_RX_USECS], &mod);
@@ -321,21 +316,32 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_TX], &mod);
ethnl_update_u8(&kernel_coalesce.use_cqe_mode_rx,
tb[ETHTOOL_A_COALESCE_USE_CQE_MODE_RX], &mod);
- ret = 0;
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_max_bytes,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_BYTES], &mod);
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_max_frames,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_MAX_FRAMES], &mod);
+ ethnl_update_u32(&kernel_coalesce.tx_aggr_time_usecs,
+ tb[ETHTOOL_A_COALESCE_TX_AGGR_TIME_USECS], &mod);
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_coalesce(dev, &coalesce, &kernel_coalesce,
info->extack);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_COALESCE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_coalesce_request_ops = {
+ .request_cmd = ETHTOOL_MSG_COALESCE_GET,
+ .reply_cmd = ETHTOOL_MSG_COALESCE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_COALESCE_HEADER,
+ .req_info_size = sizeof(struct coalesce_req_info),
+ .reply_data_size = sizeof(struct coalesce_reply_data),
+
+ .prepare_data = coalesce_prepare_data,
+ .reply_size = coalesce_reply_size,
+ .fill_reply = coalesce_fill_reply,
+
+ .set_validate = ethnl_set_coalesce_validate,
+ .set = ethnl_set_coalesce,
+ .set_ntf_cmd = ETHTOOL_MSG_COALESCE_NTF,
+};
diff --git a/net/ethtool/common.c b/net/ethtool/common.c
index 6f399afc2ff2..5fb19050991e 100644
--- a/net/ethtool/common.c
+++ b/net/ethtool/common.c
@@ -208,6 +208,9 @@ const char link_mode_names[][ETH_GSTRING_LEN] = {
__DEFINE_LINK_MODE_NAME(800000, DR8_2, Full),
__DEFINE_LINK_MODE_NAME(800000, SR8, Full),
__DEFINE_LINK_MODE_NAME(800000, VR8, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1S, Full),
+ __DEFINE_LINK_MODE_NAME(10, T1S, Half),
+ __DEFINE_LINK_MODE_NAME(10, T1S_P2MP, Half),
};
static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
@@ -244,6 +247,8 @@ static_assert(ARRAY_SIZE(link_mode_names) == __ETHTOOL_LINK_MODE_MASK_NBITS);
#define __LINK_MODE_LANES_X 1
#define __LINK_MODE_LANES_FX 1
#define __LINK_MODE_LANES_T1L 1
+#define __LINK_MODE_LANES_T1S 1
+#define __LINK_MODE_LANES_T1S_P2MP 1
#define __LINK_MODE_LANES_VR8 8
#define __LINK_MODE_LANES_DR8_2 8
@@ -366,6 +371,9 @@ const struct link_mode_info link_mode_params[] = {
__DEFINE_LINK_MODE_PARAMS(800000, DR8_2, Full),
__DEFINE_LINK_MODE_PARAMS(800000, SR8, Full),
__DEFINE_LINK_MODE_PARAMS(800000, VR8, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S, Full),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S, Half),
+ __DEFINE_LINK_MODE_PARAMS(10, T1S_P2MP, Half),
};
static_assert(ARRAY_SIZE(link_mode_params) == __ETHTOOL_LINK_MODE_MASK_NBITS);
diff --git a/net/ethtool/common.h b/net/ethtool/common.h
index b1b9db810eca..28b8aaaf9bcb 100644
--- a/net/ethtool/common.h
+++ b/net/ethtool/common.h
@@ -54,4 +54,6 @@ int ethtool_get_module_info_call(struct net_device *dev,
int ethtool_get_module_eeprom_call(struct net_device *dev,
struct ethtool_eeprom *ee, u8 *data);
+bool __ethtool_dev_mm_supported(struct net_device *dev);
+
#endif /* _ETHTOOL_COMMON_H */
diff --git a/net/ethtool/debug.c b/net/ethtool/debug.c
index d73888c7d19c..e4369769817e 100644
--- a/net/ethtool/debug.c
+++ b/net/ethtool/debug.c
@@ -63,18 +63,6 @@ static int debug_fill_reply(struct sk_buff *skb,
netif_msg_class_names, compact);
}
-const struct ethnl_request_ops ethnl_debug_request_ops = {
- .request_cmd = ETHTOOL_MSG_DEBUG_GET,
- .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY,
- .hdr_attr = ETHTOOL_A_DEBUG_HEADER,
- .req_info_size = sizeof(struct debug_req_info),
- .reply_data_size = sizeof(struct debug_reply_data),
-
- .prepare_data = debug_prepare_data,
- .reply_size = debug_reply_size,
- .fill_reply = debug_fill_reply,
-};
-
/* DEBUG_SET */
const struct nla_policy ethnl_debug_set_policy[] = {
@@ -83,46 +71,47 @@ const struct nla_policy ethnl_debug_set_policy[] = {
[ETHTOOL_A_DEBUG_MSGMASK] = { .type = NLA_NESTED },
};
-int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_debug_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_msglevel && ops->set_msglevel ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_debug(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
u32 msg_mask;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_DEBUG_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_msglevel || !dev->ethtool_ops->set_msglevel)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
msg_mask = dev->ethtool_ops->get_msglevel(dev);
ret = ethnl_update_bitset32(&msg_mask, NETIF_MSG_CLASS_COUNT,
tb[ETHTOOL_A_DEBUG_MSGMASK],
netif_msg_class_names, info->extack, &mod);
if (ret < 0 || !mod)
- goto out_ops;
+ return ret;
dev->ethtool_ops->set_msglevel(dev, msg_mask);
- ethtool_notify(dev, ETHTOOL_MSG_DEBUG_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_debug_request_ops = {
+ .request_cmd = ETHTOOL_MSG_DEBUG_GET,
+ .reply_cmd = ETHTOOL_MSG_DEBUG_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_DEBUG_HEADER,
+ .req_info_size = sizeof(struct debug_req_info),
+ .reply_data_size = sizeof(struct debug_reply_data),
+
+ .prepare_data = debug_prepare_data,
+ .reply_size = debug_reply_size,
+ .fill_reply = debug_fill_reply,
+
+ .set_validate = ethnl_set_debug_validate,
+ .set = ethnl_set_debug,
+ .set_ntf_cmd = ETHTOOL_MSG_DEBUG_NTF,
+};
diff --git a/net/ethtool/eee.c b/net/ethtool/eee.c
index 45c42b2d5f17..42104bcb0e47 100644
--- a/net/ethtool/eee.c
+++ b/net/ethtool/eee.c
@@ -108,18 +108,6 @@ static int eee_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_eee_request_ops = {
- .request_cmd = ETHTOOL_MSG_EEE_GET,
- .reply_cmd = ETHTOOL_MSG_EEE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_EEE_HEADER,
- .req_info_size = sizeof(struct eee_req_info),
- .reply_data_size = sizeof(struct eee_reply_data),
-
- .prepare_data = eee_prepare_data,
- .reply_size = eee_reply_size,
- .fill_reply = eee_fill_reply,
-};
-
/* EEE_SET */
const struct nla_policy ethnl_eee_set_policy[] = {
@@ -131,60 +119,56 @@ const struct nla_policy ethnl_eee_set_policy[] = {
[ETHTOOL_A_EEE_TX_LPI_TIMER] = { .type = NLA_U32 },
};
-int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_eee_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_eee && ops->set_eee ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_eee(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
struct ethtool_eee eee = {};
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_EEE_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ret = dev->ethtool_ops->get_eee(dev, &eee);
if (ret < 0)
return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_eee || !ops->set_eee)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_eee(dev, &eee);
- if (ret < 0)
- goto out_ops;
ret = ethnl_update_bitset32(&eee.advertised, EEE_MODES_COUNT,
tb[ETHTOOL_A_EEE_MODES_OURS],
link_mode_names, info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_bool32(&eee.eee_enabled, tb[ETHTOOL_A_EEE_ENABLED], &mod);
ethnl_update_bool32(&eee.tx_lpi_enabled,
tb[ETHTOOL_A_EEE_TX_LPI_ENABLED], &mod);
ethnl_update_u32(&eee.tx_lpi_timer, tb[ETHTOOL_A_EEE_TX_LPI_TIMER],
&mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_eee(dev, &eee);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_EEE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_eee_request_ops = {
+ .request_cmd = ETHTOOL_MSG_EEE_GET,
+ .reply_cmd = ETHTOOL_MSG_EEE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_EEE_HEADER,
+ .req_info_size = sizeof(struct eee_req_info),
+ .reply_data_size = sizeof(struct eee_reply_data),
+
+ .prepare_data = eee_prepare_data,
+ .reply_size = eee_reply_size,
+ .fill_reply = eee_fill_reply,
+
+ .set_validate = ethnl_set_eee_validate,
+ .set = ethnl_set_eee,
+ .set_ntf_cmd = ETHTOOL_MSG_EEE_NTF,
+};
diff --git a/net/ethtool/fec.c b/net/ethtool/fec.c
index 9f5a134e2e01..0d9a3d153170 100644
--- a/net/ethtool/fec.c
+++ b/net/ethtool/fec.c
@@ -217,18 +217,6 @@ static int fec_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_fec_request_ops = {
- .request_cmd = ETHTOOL_MSG_FEC_GET,
- .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY,
- .hdr_attr = ETHTOOL_A_FEC_HEADER,
- .req_info_size = sizeof(struct fec_req_info),
- .reply_data_size = sizeof(struct fec_reply_data),
-
- .prepare_data = fec_prepare_data,
- .reply_size = fec_reply_size,
- .fill_reply = fec_fill_reply,
-};
-
/* FEC_SET */
const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = {
@@ -237,36 +225,28 @@ const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1] = {
[ETHTOOL_A_FEC_AUTO] = NLA_POLICY_MAX(NLA_U8, 1),
};
-int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_fec_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_fecparam && ops->set_fecparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_fec(struct ethnl_req_info *req_info, struct genl_info *info)
{
__ETHTOOL_DECLARE_LINK_MODE_MASK(fec_link_modes) = {};
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
struct ethtool_fecparam fec = {};
- const struct ethtool_ops *ops;
- struct net_device *dev;
bool mod = false;
u8 fec_auto;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_FEC_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ret = dev->ethtool_ops->get_fecparam(dev, &fec);
if (ret < 0)
return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_fecparam || !ops->set_fecparam)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ret = ops->get_fecparam(dev, &fec);
- if (ret < 0)
- goto out_ops;
ethtool_fec_to_link_modes(fec.fec, fec_link_modes, &fec_auto);
@@ -275,36 +255,39 @@ int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_FEC_MODES],
link_mode_names, info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
ethnl_update_u8(&fec_auto, tb[ETHTOOL_A_FEC_AUTO], &mod);
-
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = ethtool_link_modes_to_fecparam(&fec, fec_link_modes, fec_auto);
if (ret) {
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
"invalid FEC modes requested");
- goto out_ops;
+ return ret;
}
if (!fec.fec) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_FEC_MODES],
"no FEC modes set");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_fecparam(dev, &fec);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_FEC_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_fec_request_ops = {
+ .request_cmd = ETHTOOL_MSG_FEC_GET,
+ .reply_cmd = ETHTOOL_MSG_FEC_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_FEC_HEADER,
+ .req_info_size = sizeof(struct fec_req_info),
+ .reply_data_size = sizeof(struct fec_reply_data),
+
+ .prepare_data = fec_prepare_data,
+ .reply_size = fec_reply_size,
+ .fill_reply = fec_fill_reply,
+
+ .set_validate = ethnl_set_fec_validate,
+ .set = ethnl_set_fec,
+ .set_ntf_cmd = ETHTOOL_MSG_FEC_NTF,
+};
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index c2f1a542e6fa..646b3e490c71 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -2078,58 +2078,91 @@ static int ethtool_get_stats(struct net_device *dev, void __user *useraddr)
return ret;
}
-static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+static int ethtool_vzalloc_stats_array(int n_stats, u64 **data)
{
+ if (n_stats < 0)
+ return n_stats;
+ if (n_stats > S32_MAX / sizeof(u64))
+ return -ENOMEM;
+ if (WARN_ON_ONCE(!n_stats))
+ return -EOPNOTSUPP;
+
+ *data = vzalloc(array_size(n_stats, sizeof(u64)));
+ if (!*data)
+ return -ENOMEM;
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats_phydev(struct phy_device *phydev,
+ struct ethtool_stats *stats,
+ u64 **data)
+ {
const struct ethtool_phy_ops *phy_ops = ethtool_phy_ops;
+ int n_stats, ret;
+
+ if (!phy_ops || !phy_ops->get_sset_count || !phy_ops->get_stats)
+ return -EOPNOTSUPP;
+
+ n_stats = phy_ops->get_sset_count(phydev);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ return phy_ops->get_stats(phydev, stats, *data);
+}
+
+static int ethtool_get_phy_stats_ethtool(struct net_device *dev,
+ struct ethtool_stats *stats,
+ u64 **data)
+{
const struct ethtool_ops *ops = dev->ethtool_ops;
- struct phy_device *phydev = dev->phydev;
- struct ethtool_stats stats;
- u64 *data;
- int ret, n_stats;
+ int n_stats, ret;
- if (!phydev && (!ops->get_ethtool_phy_stats || !ops->get_sset_count))
+ if (!ops || !ops->get_sset_count || ops->get_ethtool_phy_stats)
return -EOPNOTSUPP;
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_sset_count)
- n_stats = phy_ops->get_sset_count(phydev);
- else
- n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
- if (n_stats < 0)
- return n_stats;
- if (n_stats > S32_MAX / sizeof(u64))
- return -ENOMEM;
- WARN_ON_ONCE(!n_stats);
+ n_stats = ops->get_sset_count(dev, ETH_SS_PHY_STATS);
+
+ ret = ethtool_vzalloc_stats_array(n_stats, data);
+ if (ret)
+ return ret;
+
+ stats->n_stats = n_stats;
+ ops->get_ethtool_phy_stats(dev, stats, *data);
+
+ return 0;
+}
+
+static int ethtool_get_phy_stats(struct net_device *dev, void __user *useraddr)
+{
+ struct phy_device *phydev = dev->phydev;
+ struct ethtool_stats stats;
+ u64 *data = NULL;
+ int ret = -EOPNOTSUPP;
if (copy_from_user(&stats, useraddr, sizeof(stats)))
return -EFAULT;
- stats.n_stats = n_stats;
+ if (phydev)
+ ret = ethtool_get_phy_stats_phydev(phydev, &stats, &data);
- if (n_stats) {
- data = vzalloc(array_size(n_stats, sizeof(u64)));
- if (!data)
- return -ENOMEM;
+ if (ret == -EOPNOTSUPP)
+ ret = ethtool_get_phy_stats_ethtool(dev, &stats, &data);
- if (phydev && !ops->get_ethtool_phy_stats &&
- phy_ops && phy_ops->get_stats) {
- ret = phy_ops->get_stats(phydev, &stats, data);
- if (ret < 0)
- goto out;
- } else {
- ops->get_ethtool_phy_stats(dev, &stats, data);
- }
- } else {
- data = NULL;
- }
+ if (ret)
+ goto out;
- ret = -EFAULT;
- if (copy_to_user(useraddr, &stats, sizeof(stats)))
+ if (copy_to_user(useraddr, &stats, sizeof(stats))) {
+ ret = -EFAULT;
goto out;
+ }
+
useraddr += sizeof(stats);
- if (n_stats && copy_to_user(useraddr, data, array_size(n_stats, sizeof(u64))))
- goto out;
- ret = 0;
+ if (copy_to_user(useraddr, data, array_size(stats.n_stats, sizeof(u64))))
+ ret = -EFAULT;
out:
vfree(data);
diff --git a/net/ethtool/linkinfo.c b/net/ethtool/linkinfo.c
index efa0f7f48836..310dfe63292a 100644
--- a/net/ethtool/linkinfo.c
+++ b/net/ethtool/linkinfo.c
@@ -73,18 +73,6 @@ static int linkinfo_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
- .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
- .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
- .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
- .req_info_size = sizeof(struct linkinfo_req_info),
- .reply_data_size = sizeof(struct linkinfo_reply_data),
-
- .prepare_data = linkinfo_prepare_data,
- .reply_size = linkinfo_reply_size,
- .fill_reply = linkinfo_fill_reply,
-};
-
/* LINKINFO_SET */
const struct nla_policy ethnl_linkinfo_set_policy[] = {
@@ -95,37 +83,31 @@ const struct nla_policy ethnl_linkinfo_set_policy[] = {
[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL] = { .type = NLA_U8 },
};
-int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_linkinfo_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ if (!ops->get_link_ksettings || !ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+ return 1;
+}
+
+static int
+ethnl_set_linkinfo(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct ethtool_link_ksettings ksettings = {};
struct ethtool_link_settings *lsettings;
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_LINKINFO_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_link_ksettings ||
- !dev->ethtool_ops->set_link_ksettings)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
- goto out_ops;
+ return ret;
}
lsettings = &ksettings.base;
@@ -134,21 +116,30 @@ int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info)
&mod);
ethnl_update_u8(&lsettings->eth_tp_mdix_ctrl,
tb[ETHTOOL_A_LINKINFO_TP_MDIX_CTRL], &mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
- if (ret < 0)
+ if (ret < 0) {
GENL_SET_ERR_MSG(info, "link settings update failed");
- else
- ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
+ return ret;
+ }
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_linkinfo_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKINFO_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKINFO_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKINFO_HEADER,
+ .req_info_size = sizeof(struct linkinfo_req_info),
+ .reply_data_size = sizeof(struct linkinfo_reply_data),
+
+ .prepare_data = linkinfo_prepare_data,
+ .reply_size = linkinfo_reply_size,
+ .fill_reply = linkinfo_fill_reply,
+
+ .set_validate = ethnl_set_linkinfo_validate,
+ .set = ethnl_set_linkinfo,
+ .set_ntf_cmd = ETHTOOL_MSG_LINKINFO_NTF,
+};
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 126e06c713a3..fab66c169b9f 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -151,18 +151,6 @@ static int linkmodes_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
- .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
- .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
- .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
- .req_info_size = sizeof(struct linkmodes_req_info),
- .reply_data_size = sizeof(struct linkmodes_reply_data),
-
- .prepare_data = linkmodes_prepare_data,
- .reply_size = linkmodes_reply_size,
- .fill_reply = linkmodes_fill_reply,
-};
-
/* LINKMODES_SET */
const struct nla_policy ethnl_linkmodes_set_policy[] = {
@@ -310,59 +298,64 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
return 0;
}
-int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_linkmodes_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethtool_link_ksettings ksettings = {};
- struct ethnl_req_info req_info = {};
- struct nlattr **tb = info->attrs;
- struct net_device *dev;
- bool mod = false;
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
int ret;
- ret = ethnl_check_linkmodes(info, tb);
+ ret = ethnl_check_linkmodes(info, info->attrs);
if (ret < 0)
return ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_LINKMODES_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_link_ksettings ||
- !dev->ethtool_ops->set_link_ksettings)
- goto out_dev;
+ if (!ops->get_link_ksettings || !ops->set_link_ksettings)
+ return -EOPNOTSUPP;
+ return 1;
+}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
+static int
+ethnl_set_linkmodes(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct ethtool_link_ksettings ksettings = {};
+ struct net_device *dev = req_info->dev;
+ struct nlattr **tb = info->attrs;
+ bool mod = false;
+ int ret;
ret = __ethtool_get_link_ksettings(dev, &ksettings);
if (ret < 0) {
GENL_SET_ERR_MSG(info, "failed to retrieve link settings");
- goto out_ops;
+ return ret;
}
ret = ethnl_update_linkmodes(info, tb, &ksettings, &mod, dev);
if (ret < 0)
- goto out_ops;
+ return ret;
+ if (!mod)
+ return 0;
- if (mod) {
- ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
- if (ret < 0)
- GENL_SET_ERR_MSG(info, "link settings update failed");
- else
- ethtool_notify(dev, ETHTOOL_MSG_LINKMODES_NTF, NULL);
+ ret = dev->ethtool_ops->set_link_ksettings(dev, &ksettings);
+ if (ret < 0) {
+ GENL_SET_ERR_MSG(info, "link settings update failed");
+ return ret;
}
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_linkmodes_request_ops = {
+ .request_cmd = ETHTOOL_MSG_LINKMODES_GET,
+ .reply_cmd = ETHTOOL_MSG_LINKMODES_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_LINKMODES_HEADER,
+ .req_info_size = sizeof(struct linkmodes_req_info),
+ .reply_data_size = sizeof(struct linkmodes_reply_data),
+
+ .prepare_data = linkmodes_prepare_data,
+ .reply_size = linkmodes_reply_size,
+ .fill_reply = linkmodes_fill_reply,
+
+ .set_validate = ethnl_set_linkmodes_validate,
+ .set = ethnl_set_linkmodes,
+ .set_ntf_cmd = ETHTOOL_MSG_LINKMODES_NTF,
+};
diff --git a/net/ethtool/mm.c b/net/ethtool/mm.c
new file mode 100644
index 000000000000..fce3cc2734f9
--- /dev/null
+++ b/net/ethtool/mm.c
@@ -0,0 +1,251 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022-2023 NXP
+ */
+#include "common.h"
+#include "netlink.h"
+
+struct mm_req_info {
+ struct ethnl_req_info base;
+};
+
+struct mm_reply_data {
+ struct ethnl_reply_data base;
+ struct ethtool_mm_state state;
+ struct ethtool_mm_stats stats;
+};
+
+#define MM_REPDATA(__reply_base) \
+ container_of(__reply_base, struct mm_reply_data, base)
+
+#define ETHTOOL_MM_STAT_CNT \
+ (__ETHTOOL_A_MM_STAT_CNT - (ETHTOOL_A_MM_STAT_PAD + 1))
+
+const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1] = {
+ [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy_stats),
+};
+
+static int mm_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct mm_reply_data *data = MM_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_ops *ops;
+ int ret;
+
+ ops = dev->ethtool_ops;
+
+ if (!ops->get_mm)
+ return -EOPNOTSUPP;
+
+ ethtool_stats_init((u64 *)&data->stats,
+ sizeof(data->stats) / sizeof(u64));
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ return ret;
+
+ ret = ops->get_mm(dev, &data->state);
+ if (ret)
+ goto out_complete;
+
+ if (ops->get_mm_stats && (req_base->flags & ETHTOOL_FLAG_STATS))
+ ops->get_mm_stats(dev, &data->stats);
+
+out_complete:
+ ethnl_ops_complete(dev);
+
+ return ret;
+}
+
+static int mm_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ int len = 0;
+
+ len += nla_total_size(sizeof(u8)); /* _MM_PMAC_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_TX_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_TX_ACTIVE */
+ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_ENABLED */
+ len += nla_total_size(sizeof(u8)); /* _MM_VERIFY_STATUS */
+ len += nla_total_size(sizeof(u32)); /* _MM_VERIFY_TIME */
+ len += nla_total_size(sizeof(u32)); /* _MM_MAX_VERIFY_TIME */
+ len += nla_total_size(sizeof(u32)); /* _MM_TX_MIN_FRAG_SIZE */
+ len += nla_total_size(sizeof(u32)); /* _MM_RX_MIN_FRAG_SIZE */
+
+ if (req_base->flags & ETHTOOL_FLAG_STATS)
+ len += nla_total_size(0) + /* _MM_STATS */
+ nla_total_size_64bit(sizeof(u64)) * ETHTOOL_MM_STAT_CNT;
+
+ return len;
+}
+
+static int mm_put_stat(struct sk_buff *skb, u64 val, u16 attrtype)
+{
+ if (val == ETHTOOL_STAT_NOT_SET)
+ return 0;
+ if (nla_put_u64_64bit(skb, attrtype, val, ETHTOOL_A_MM_STAT_PAD))
+ return -EMSGSIZE;
+ return 0;
+}
+
+static int mm_put_stats(struct sk_buff *skb,
+ const struct ethtool_mm_stats *stats)
+{
+ struct nlattr *nest;
+
+ nest = nla_nest_start(skb, ETHTOOL_A_MM_STATS);
+ if (!nest)
+ return -EMSGSIZE;
+
+ if (mm_put_stat(skb, stats->MACMergeFrameAssErrorCount,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_ERRORS) ||
+ mm_put_stat(skb, stats->MACMergeFrameSmdErrorCount,
+ ETHTOOL_A_MM_STAT_SMD_ERRORS) ||
+ mm_put_stat(skb, stats->MACMergeFrameAssOkCount,
+ ETHTOOL_A_MM_STAT_REASSEMBLY_OK) ||
+ mm_put_stat(skb, stats->MACMergeFragCountRx,
+ ETHTOOL_A_MM_STAT_RX_FRAG_COUNT) ||
+ mm_put_stat(skb, stats->MACMergeFragCountTx,
+ ETHTOOL_A_MM_STAT_TX_FRAG_COUNT) ||
+ mm_put_stat(skb, stats->MACMergeHoldCount,
+ ETHTOOL_A_MM_STAT_HOLD_COUNT))
+ goto err_cancel;
+
+ nla_nest_end(skb, nest);
+ return 0;
+
+err_cancel:
+ nla_nest_cancel(skb, nest);
+ return -EMSGSIZE;
+}
+
+static int mm_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct mm_reply_data *data = MM_REPDATA(reply_base);
+ const struct ethtool_mm_state *state = &data->state;
+
+ if (nla_put_u8(skb, ETHTOOL_A_MM_TX_ENABLED, state->tx_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_TX_ACTIVE, state->tx_active) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_PMAC_ENABLED, state->pmac_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_ENABLED, state->verify_enabled) ||
+ nla_put_u8(skb, ETHTOOL_A_MM_VERIFY_STATUS, state->verify_status) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_VERIFY_TIME, state->verify_time) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_MAX_VERIFY_TIME, state->max_verify_time) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_TX_MIN_FRAG_SIZE, state->tx_min_frag_size) ||
+ nla_put_u32(skb, ETHTOOL_A_MM_RX_MIN_FRAG_SIZE, state->rx_min_frag_size))
+ return -EMSGSIZE;
+
+ if (req_base->flags & ETHTOOL_FLAG_STATS &&
+ mm_put_stats(skb, &data->stats))
+ return -EMSGSIZE;
+
+ return 0;
+}
+
+const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1] = {
+ [ETHTOOL_A_MM_HEADER] = NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_MM_VERIFY_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_VERIFY_TIME] = NLA_POLICY_RANGE(NLA_U32, 1, 128),
+ [ETHTOOL_A_MM_TX_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_PMAC_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_MM_TX_MIN_FRAG_SIZE] = NLA_POLICY_RANGE(NLA_U32, 60, 252),
+};
+
+static void mm_state_to_cfg(const struct ethtool_mm_state *state,
+ struct ethtool_mm_cfg *cfg)
+{
+ /* We could also compare state->verify_status against
+ * ETHTOOL_MM_VERIFY_STATUS_DISABLED, but state->verify_enabled
+ * is more like an administrative state which should be seen in
+ * ETHTOOL_MSG_MM_GET replies. For example, a port with verification
+ * disabled might be in the ETHTOOL_MM_VERIFY_STATUS_INITIAL
+ * if it's down.
+ */
+ cfg->verify_enabled = state->verify_enabled;
+ cfg->verify_time = state->verify_time;
+ cfg->tx_enabled = state->tx_enabled;
+ cfg->pmac_enabled = state->pmac_enabled;
+ cfg->tx_min_frag_size = state->tx_min_frag_size;
+}
+
+static int
+ethnl_set_mm_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_mm && ops->set_mm ? 1 : -EOPNOTSUPP;
+}
+
+static int ethnl_set_mm(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct netlink_ext_ack *extack = info->extack;
+ struct net_device *dev = req_info->dev;
+ struct ethtool_mm_state state = {};
+ struct nlattr **tb = info->attrs;
+ struct ethtool_mm_cfg cfg = {};
+ bool mod = false;
+ int ret;
+
+ ret = dev->ethtool_ops->get_mm(dev, &state);
+ if (ret)
+ return ret;
+
+ mm_state_to_cfg(&state, &cfg);
+
+ ethnl_update_bool(&cfg.verify_enabled, tb[ETHTOOL_A_MM_VERIFY_ENABLED],
+ &mod);
+ ethnl_update_u32(&cfg.verify_time, tb[ETHTOOL_A_MM_VERIFY_TIME], &mod);
+ ethnl_update_bool(&cfg.tx_enabled, tb[ETHTOOL_A_MM_TX_ENABLED], &mod);
+ ethnl_update_bool(&cfg.pmac_enabled, tb[ETHTOOL_A_MM_PMAC_ENABLED],
+ &mod);
+ ethnl_update_u32(&cfg.tx_min_frag_size,
+ tb[ETHTOOL_A_MM_TX_MIN_FRAG_SIZE], &mod);
+
+ if (!mod)
+ return 0;
+
+ if (cfg.verify_time > state.max_verify_time) {
+ NL_SET_ERR_MSG_ATTR(extack, tb[ETHTOOL_A_MM_VERIFY_TIME],
+ "verifyTime exceeds device maximum");
+ return -ERANGE;
+ }
+
+ ret = dev->ethtool_ops->set_mm(dev, &cfg, extack);
+ return ret < 0 ? ret : 1;
+}
+
+const struct ethnl_request_ops ethnl_mm_request_ops = {
+ .request_cmd = ETHTOOL_MSG_MM_GET,
+ .reply_cmd = ETHTOOL_MSG_MM_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_MM_HEADER,
+ .req_info_size = sizeof(struct mm_req_info),
+ .reply_data_size = sizeof(struct mm_reply_data),
+
+ .prepare_data = mm_prepare_data,
+ .reply_size = mm_reply_size,
+ .fill_reply = mm_fill_reply,
+
+ .set_validate = ethnl_set_mm_validate,
+ .set = ethnl_set_mm,
+ .set_ntf_cmd = ETHTOOL_MSG_MM_NTF,
+};
+
+/* Returns whether a given device supports the MAC merge layer
+ * (has an eMAC and a pMAC). Must be called under rtnl_lock() and
+ * ethnl_ops_begin().
+ */
+bool __ethtool_dev_mm_supported(struct net_device *dev)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_mm_state state = {};
+ int ret = -EOPNOTSUPP;
+
+ if (ops && ops->get_mm)
+ ret = ops->get_mm(dev, &state);
+
+ return !ret;
+}
diff --git a/net/ethtool/module.c b/net/ethtool/module.c
index 898ed436b9e4..e0d539b21423 100644
--- a/net/ethtool/module.c
+++ b/net/ethtool/module.c
@@ -91,18 +91,6 @@ static int module_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_module_request_ops = {
- .request_cmd = ETHTOOL_MSG_MODULE_GET,
- .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_MODULE_HEADER,
- .req_info_size = sizeof(struct module_req_info),
- .reply_data_size = sizeof(struct module_reply_data),
-
- .prepare_data = module_prepare_data,
- .reply_size = module_reply_size,
- .fill_reply = module_fill_reply,
-};
-
/* MODULE_SET */
const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1] = {
@@ -112,69 +100,62 @@ const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLI
ETHTOOL_MODULE_POWER_MODE_POLICY_AUTO),
};
-static int module_set_power_mode(struct net_device *dev, struct nlattr **tb,
- bool *p_mod, struct netlink_ext_ack *extack)
+static int
+ethnl_set_module_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct ethtool_module_power_mode_params power = {};
- struct ethtool_module_power_mode_params power_new;
- const struct ethtool_ops *ops = dev->ethtool_ops;
- int ret;
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+ struct nlattr **tb = info->attrs;
if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY])
return 0;
if (!ops->get_module_power_mode || !ops->set_module_power_mode) {
- NL_SET_ERR_MSG_ATTR(extack,
+ NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY],
"Setting power mode policy is not supported by this device");
return -EOPNOTSUPP;
}
- power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
- ret = ops->get_module_power_mode(dev, &power, extack);
- if (ret < 0)
- return ret;
-
- if (power_new.policy == power.policy)
- return 0;
- *p_mod = true;
-
- return ops->set_module_power_mode(dev, &power_new, extack);
+ return 1;
}
-int ethnl_set_module(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_module(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct ethnl_req_info req_info = {};
+ struct ethtool_module_power_mode_params power = {};
+ struct ethtool_module_power_mode_params power_new;
+ const struct ethtool_ops *ops;
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
- bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_MODULE_HEADER],
- genl_info_net(info), info->extack,
- true);
+ ops = dev->ethtool_ops;
+
+ power_new.policy = nla_get_u8(tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]);
+ ret = ops->get_module_power_mode(dev, &power, info->extack);
if (ret < 0)
return ret;
- dev = req_info.dev;
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
+ if (power_new.policy == power.policy)
+ return 0;
- ret = module_set_power_mode(dev, tb, &mod, info->extack);
- if (ret < 0)
- goto out_ops;
+ ret = ops->set_module_power_mode(dev, &power_new, info->extack);
+ return ret < 0 ? ret : 1;
+}
- if (!mod)
- goto out_ops;
+const struct ethnl_request_ops ethnl_module_request_ops = {
+ .request_cmd = ETHTOOL_MSG_MODULE_GET,
+ .reply_cmd = ETHTOOL_MSG_MODULE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_MODULE_HEADER,
+ .req_info_size = sizeof(struct module_req_info),
+ .reply_data_size = sizeof(struct module_reply_data),
- ethtool_notify(dev, ETHTOOL_MSG_MODULE_NTF, NULL);
+ .prepare_data = module_prepare_data,
+ .reply_size = module_reply_size,
+ .fill_reply = module_fill_reply,
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
- ethnl_parse_header_dev_put(&req_info);
- return ret;
-}
+ .set_validate = ethnl_set_module_validate,
+ .set = ethnl_set_module,
+ .set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF,
+};
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index aee98be6237f..08120095cc68 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -269,25 +269,43 @@ static const struct ethnl_request_ops *
ethnl_default_requests[__ETHTOOL_MSG_USER_CNT] = {
[ETHTOOL_MSG_STRSET_GET] = &ethnl_strset_request_ops,
[ETHTOOL_MSG_LINKINFO_GET] = &ethnl_linkinfo_request_ops,
+ [ETHTOOL_MSG_LINKINFO_SET] = &ethnl_linkinfo_request_ops,
[ETHTOOL_MSG_LINKMODES_GET] = &ethnl_linkmodes_request_ops,
+ [ETHTOOL_MSG_LINKMODES_SET] = &ethnl_linkmodes_request_ops,
[ETHTOOL_MSG_LINKSTATE_GET] = &ethnl_linkstate_request_ops,
[ETHTOOL_MSG_DEBUG_GET] = &ethnl_debug_request_ops,
+ [ETHTOOL_MSG_DEBUG_SET] = &ethnl_debug_request_ops,
[ETHTOOL_MSG_WOL_GET] = &ethnl_wol_request_ops,
+ [ETHTOOL_MSG_WOL_SET] = &ethnl_wol_request_ops,
[ETHTOOL_MSG_FEATURES_GET] = &ethnl_features_request_ops,
[ETHTOOL_MSG_PRIVFLAGS_GET] = &ethnl_privflags_request_ops,
+ [ETHTOOL_MSG_PRIVFLAGS_SET] = &ethnl_privflags_request_ops,
[ETHTOOL_MSG_RINGS_GET] = &ethnl_rings_request_ops,
+ [ETHTOOL_MSG_RINGS_SET] = &ethnl_rings_request_ops,
[ETHTOOL_MSG_CHANNELS_GET] = &ethnl_channels_request_ops,
+ [ETHTOOL_MSG_CHANNELS_SET] = &ethnl_channels_request_ops,
[ETHTOOL_MSG_COALESCE_GET] = &ethnl_coalesce_request_ops,
+ [ETHTOOL_MSG_COALESCE_SET] = &ethnl_coalesce_request_ops,
[ETHTOOL_MSG_PAUSE_GET] = &ethnl_pause_request_ops,
+ [ETHTOOL_MSG_PAUSE_SET] = &ethnl_pause_request_ops,
[ETHTOOL_MSG_EEE_GET] = &ethnl_eee_request_ops,
+ [ETHTOOL_MSG_EEE_SET] = &ethnl_eee_request_ops,
[ETHTOOL_MSG_FEC_GET] = &ethnl_fec_request_ops,
+ [ETHTOOL_MSG_FEC_SET] = &ethnl_fec_request_ops,
[ETHTOOL_MSG_TSINFO_GET] = &ethnl_tsinfo_request_ops,
[ETHTOOL_MSG_MODULE_EEPROM_GET] = &ethnl_module_eeprom_request_ops,
[ETHTOOL_MSG_STATS_GET] = &ethnl_stats_request_ops,
[ETHTOOL_MSG_PHC_VCLOCKS_GET] = &ethnl_phc_vclocks_request_ops,
[ETHTOOL_MSG_MODULE_GET] = &ethnl_module_request_ops,
+ [ETHTOOL_MSG_MODULE_SET] = &ethnl_module_request_ops,
[ETHTOOL_MSG_PSE_GET] = &ethnl_pse_request_ops,
+ [ETHTOOL_MSG_PSE_SET] = &ethnl_pse_request_ops,
[ETHTOOL_MSG_RSS_GET] = &ethnl_rss_request_ops,
+ [ETHTOOL_MSG_PLCA_GET_CFG] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_PLCA_SET_CFG] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_PLCA_GET_STATUS] = &ethnl_plca_status_request_ops,
+ [ETHTOOL_MSG_MM_GET] = &ethnl_mm_request_ops,
+ [ETHTOOL_MSG_MM_SET] = &ethnl_mm_request_ops,
};
static struct ethnl_dump_ctx *ethnl_dump_context(struct netlink_callback *cb)
@@ -588,6 +606,52 @@ static int ethnl_default_done(struct netlink_callback *cb)
return 0;
}
+static int ethnl_default_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+ const struct ethnl_request_ops *ops;
+ struct ethnl_req_info req_info = {};
+ const u8 cmd = info->genlhdr->cmd;
+ int ret;
+
+ ops = ethnl_default_requests[cmd];
+ if (WARN_ONCE(!ops, "cmd %u has no ethnl_request_ops\n", cmd))
+ return -EOPNOTSUPP;
+ if (GENL_REQ_ATTR_CHECK(info, ops->hdr_attr))
+ return -EINVAL;
+
+ ret = ethnl_parse_header_dev_get(&req_info, info->attrs[ops->hdr_attr],
+ genl_info_net(info), info->extack,
+ true);
+ if (ret < 0)
+ return ret;
+
+ if (ops->set_validate) {
+ ret = ops->set_validate(&req_info, info);
+ /* 0 means nothing to do */
+ if (ret <= 0)
+ goto out_dev;
+ }
+
+ rtnl_lock();
+ ret = ethnl_ops_begin(req_info.dev);
+ if (ret < 0)
+ goto out_rtnl;
+
+ ret = ops->set(&req_info, info);
+ if (ret <= 0)
+ goto out_ops;
+ ethtool_notify(req_info.dev, ops->set_ntf_cmd, NULL);
+
+ ret = 0;
+out_ops:
+ ethnl_ops_complete(req_info.dev);
+out_rtnl:
+ rtnl_unlock();
+out_dev:
+ ethnl_parse_header_dev_put(&req_info);
+ return ret;
+}
+
static const struct ethnl_request_ops *
ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_LINKINFO_NTF] = &ethnl_linkinfo_request_ops,
@@ -603,6 +667,8 @@ ethnl_default_notify_ops[ETHTOOL_MSG_KERNEL_MAX + 1] = {
[ETHTOOL_MSG_EEE_NTF] = &ethnl_eee_request_ops,
[ETHTOOL_MSG_FEC_NTF] = &ethnl_fec_request_ops,
[ETHTOOL_MSG_MODULE_NTF] = &ethnl_module_request_ops,
+ [ETHTOOL_MSG_PLCA_NTF] = &ethnl_plca_cfg_request_ops,
+ [ETHTOOL_MSG_MM_NTF] = &ethnl_mm_request_ops,
};
/* default notification handler */
@@ -696,6 +762,8 @@ static const ethnl_notify_handler_t ethnl_notify_handlers[] = {
[ETHTOOL_MSG_EEE_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_FEC_NTF] = ethnl_default_notify,
[ETHTOOL_MSG_MODULE_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_PLCA_NTF] = ethnl_default_notify,
+ [ETHTOOL_MSG_MM_NTF] = ethnl_default_notify,
};
void ethtool_notify(struct net_device *dev, unsigned int cmd, const void *data)
@@ -760,7 +828,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_LINKINFO_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_linkinfo,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_linkinfo_set_policy,
.maxattr = ARRAY_SIZE(ethnl_linkinfo_set_policy) - 1,
},
@@ -776,7 +844,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_LINKMODES_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_linkmodes,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_linkmodes_set_policy,
.maxattr = ARRAY_SIZE(ethnl_linkmodes_set_policy) - 1,
},
@@ -801,7 +869,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_DEBUG_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_debug,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_debug_set_policy,
.maxattr = ARRAY_SIZE(ethnl_debug_set_policy) - 1,
},
@@ -818,7 +886,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_WOL_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_wol,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_wol_set_policy,
.maxattr = ARRAY_SIZE(ethnl_wol_set_policy) - 1,
},
@@ -850,7 +918,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PRIVFLAGS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_privflags,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_privflags_set_policy,
.maxattr = ARRAY_SIZE(ethnl_privflags_set_policy) - 1,
},
@@ -866,7 +934,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_RINGS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_rings,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_rings_set_policy,
.maxattr = ARRAY_SIZE(ethnl_rings_set_policy) - 1,
},
@@ -882,7 +950,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_CHANNELS_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_channels,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_channels_set_policy,
.maxattr = ARRAY_SIZE(ethnl_channels_set_policy) - 1,
},
@@ -898,7 +966,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_COALESCE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_coalesce,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_coalesce_set_policy,
.maxattr = ARRAY_SIZE(ethnl_coalesce_set_policy) - 1,
},
@@ -914,7 +982,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PAUSE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_pause,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_pause_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pause_set_policy) - 1,
},
@@ -930,7 +998,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_EEE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_eee,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_eee_set_policy,
.maxattr = ARRAY_SIZE(ethnl_eee_set_policy) - 1,
},
@@ -977,7 +1045,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_FEC_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_fec,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_fec_set_policy,
.maxattr = ARRAY_SIZE(ethnl_fec_set_policy) - 1,
},
@@ -1021,7 +1089,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_MODULE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_module,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_module_set_policy,
.maxattr = ARRAY_SIZE(ethnl_module_set_policy) - 1,
},
@@ -1037,7 +1105,7 @@ static const struct genl_ops ethtool_genl_ops[] = {
{
.cmd = ETHTOOL_MSG_PSE_SET,
.flags = GENL_UNS_ADMIN_PERM,
- .doit = ethnl_set_pse,
+ .doit = ethnl_default_set_doit,
.policy = ethnl_pse_set_policy,
.maxattr = ARRAY_SIZE(ethnl_pse_set_policy) - 1,
},
@@ -1047,6 +1115,47 @@ static const struct genl_ops ethtool_genl_ops[] = {
.policy = ethnl_rss_get_policy,
.maxattr = ARRAY_SIZE(ethnl_rss_get_policy) - 1,
},
+ {
+ .cmd = ETHTOOL_MSG_PLCA_GET_CFG,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_plca_get_cfg_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_get_cfg_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_PLCA_SET_CFG,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_set_doit,
+ .policy = ethnl_plca_set_cfg_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_set_cfg_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_PLCA_GET_STATUS,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_plca_get_status_policy,
+ .maxattr = ARRAY_SIZE(ethnl_plca_get_status_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_MM_GET,
+ .doit = ethnl_default_doit,
+ .start = ethnl_default_start,
+ .dumpit = ethnl_default_dumpit,
+ .done = ethnl_default_done,
+ .policy = ethnl_mm_get_policy,
+ .maxattr = ARRAY_SIZE(ethnl_mm_get_policy) - 1,
+ },
+ {
+ .cmd = ETHTOOL_MSG_MM_SET,
+ .flags = GENL_UNS_ADMIN_PERM,
+ .doit = ethnl_default_set_doit,
+ .policy = ethnl_mm_set_policy,
+ .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1,
+ },
};
static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 3753787ba233..f7b189ed96b2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -138,6 +138,32 @@ static inline void ethnl_update_bool32(u32 *dst, const struct nlattr *attr,
}
/**
+ * ethnl_update_bool() - updateb bool used as bool from NLA_U8 attribute
+ * @dst: value to update
+ * @attr: netlink attribute with new value or null
+ * @mod: pointer to bool for modification tracking
+ *
+ * Use the bool value from NLA_U8 netlink attribute @attr to set bool variable
+ * pointed to by @dst to 0 (if zero) or 1 (if not); do nothing if @attr is
+ * null. Bool pointed to by @mod is set to true if this function changed the
+ * logical value of *dst, otherwise it is left as is.
+ */
+static inline void ethnl_update_bool(bool *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ u8 val;
+
+ if (!attr)
+ return;
+ val = !!nla_get_u8(attr);
+ if (!!*dst == val)
+ return;
+
+ *dst = val;
+ *mod = true;
+}
+
+/**
* ethnl_update_binary() - update binary data from NLA_BINARY attribute
* @dst: value to update
* @len: destination buffer length
@@ -258,13 +284,14 @@ int ethnl_ops_begin(struct net_device *dev);
void ethnl_ops_complete(struct net_device *dev);
/**
- * struct ethnl_request_ops - unified handling of GET requests
+ * struct ethnl_request_ops - unified handling of GET and SET requests
* @request_cmd: command id for request (GET)
* @reply_cmd: command id for reply (GET_REPLY)
* @hdr_attr: attribute type for request header
* @req_info_size: size of request info
* @reply_data_size: size of reply data
* @allow_nodev_do: allow non-dump request with no device identification
+ * @set_ntf_cmd: notification to generate on changes (SET)
* @parse_request:
* Parse request except common header (struct ethnl_req_info). Common
* header is already filled on entry, the rest up to @repdata_offset
@@ -293,6 +320,18 @@ void ethnl_ops_complete(struct net_device *dev);
* used e.g. to free any additional data structures outside the main
* structure which were allocated by ->prepare_data(). When processing
* dump requests, ->cleanup() is called for each message.
+ * @set_validate:
+ * Check if set operation is supported for a given device, and perform
+ * extra input checks. Expected return values:
+ * - 0 if the operation is a noop for the device (rare)
+ * - 1 if operation should proceed to calling @set
+ * - negative errno on errors
+ * Called without any locks, just a reference on the netdev.
+ * @set:
+ * Execute the set operation. The implementation should return
+ * - 0 if no configuration has changed
+ * - 1 if configuration changed and notification should be generated
+ * - negative errno on errors
*
* Description of variable parts of GET request handling when using the
* unified infrastructure. When used, a pointer to an instance of this
@@ -309,6 +348,7 @@ struct ethnl_request_ops {
unsigned int req_info_size;
unsigned int reply_data_size;
bool allow_nodev_do;
+ u8 set_ntf_cmd;
int (*parse_request)(struct ethnl_req_info *req_info,
struct nlattr **tb,
@@ -322,6 +362,11 @@ struct ethnl_request_ops {
const struct ethnl_req_info *req_info,
const struct ethnl_reply_data *reply_data);
void (*cleanup_data)(struct ethnl_reply_data *reply_data);
+
+ int (*set_validate)(struct ethnl_req_info *req_info,
+ struct genl_info *info);
+ int (*set)(struct ethnl_req_info *req_info,
+ struct genl_info *info);
};
/* request handlers */
@@ -347,6 +392,9 @@ extern const struct ethnl_request_ops ethnl_phc_vclocks_request_ops;
extern const struct ethnl_request_ops ethnl_module_request_ops;
extern const struct ethnl_request_ops ethnl_pse_request_ops;
extern const struct ethnl_request_ops ethnl_rss_request_ops;
+extern const struct ethnl_request_ops ethnl_plca_cfg_request_ops;
+extern const struct ethnl_request_ops ethnl_plca_status_request_ops;
+extern const struct ethnl_request_ops ethnl_mm_request_ops;
extern const struct nla_policy ethnl_header_policy[ETHTOOL_A_HEADER_FLAGS + 1];
extern const struct nla_policy ethnl_header_policy_stats[ETHTOOL_A_HEADER_FLAGS + 1];
@@ -365,12 +413,12 @@ extern const struct nla_policy ethnl_features_set_policy[ETHTOOL_A_FEATURES_WANT
extern const struct nla_policy ethnl_privflags_get_policy[ETHTOOL_A_PRIVFLAGS_HEADER + 1];
extern const struct nla_policy ethnl_privflags_set_policy[ETHTOOL_A_PRIVFLAGS_FLAGS + 1];
extern const struct nla_policy ethnl_rings_get_policy[ETHTOOL_A_RINGS_HEADER + 1];
-extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_TX_PUSH + 1];
+extern const struct nla_policy ethnl_rings_set_policy[ETHTOOL_A_RINGS_RX_PUSH + 1];
extern const struct nla_policy ethnl_channels_get_policy[ETHTOOL_A_CHANNELS_HEADER + 1];
extern const struct nla_policy ethnl_channels_set_policy[ETHTOOL_A_CHANNELS_COMBINED_COUNT + 1];
extern const struct nla_policy ethnl_coalesce_get_policy[ETHTOOL_A_COALESCE_HEADER + 1];
extern const struct nla_policy ethnl_coalesce_set_policy[ETHTOOL_A_COALESCE_MAX + 1];
-extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_HEADER + 1];
+extern const struct nla_policy ethnl_pause_get_policy[ETHTOOL_A_PAUSE_STATS_SRC + 1];
extern const struct nla_policy ethnl_pause_set_policy[ETHTOOL_A_PAUSE_TX + 1];
extern const struct nla_policy ethnl_eee_get_policy[ETHTOOL_A_EEE_HEADER + 1];
extern const struct nla_policy ethnl_eee_set_policy[ETHTOOL_A_EEE_TX_LPI_TIMER + 1];
@@ -381,33 +429,25 @@ extern const struct nla_policy ethnl_tunnel_info_get_policy[ETHTOOL_A_TUNNEL_INF
extern const struct nla_policy ethnl_fec_get_policy[ETHTOOL_A_FEC_HEADER + 1];
extern const struct nla_policy ethnl_fec_set_policy[ETHTOOL_A_FEC_AUTO + 1];
extern const struct nla_policy ethnl_module_eeprom_get_policy[ETHTOOL_A_MODULE_EEPROM_I2C_ADDRESS + 1];
-extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1];
+extern const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1];
extern const struct nla_policy ethnl_phc_vclocks_get_policy[ETHTOOL_A_PHC_VCLOCKS_HEADER + 1];
extern const struct nla_policy ethnl_module_get_policy[ETHTOOL_A_MODULE_HEADER + 1];
extern const struct nla_policy ethnl_module_set_policy[ETHTOOL_A_MODULE_POWER_MODE_POLICY + 1];
extern const struct nla_policy ethnl_pse_get_policy[ETHTOOL_A_PSE_HEADER + 1];
extern const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1];
extern const struct nla_policy ethnl_rss_get_policy[ETHTOOL_A_RSS_CONTEXT + 1];
+extern const struct nla_policy ethnl_plca_get_cfg_policy[ETHTOOL_A_PLCA_HEADER + 1];
+extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1];
+extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1];
+extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1];
+extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1];
-int ethnl_set_linkinfo(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_linkmodes(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_debug(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info);
int ethnl_set_features(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info);
int ethnl_tunnel_info_start(struct netlink_callback *cb);
int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
-int ethnl_set_fec(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_module(struct sk_buff *skb, struct genl_info *info);
-int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info);
extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN];
extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN];
diff --git a/net/ethtool/pause.c b/net/ethtool/pause.c
index a8c113d244db..6657d0b888d8 100644
--- a/net/ethtool/pause.c
+++ b/net/ethtool/pause.c
@@ -5,8 +5,12 @@
struct pause_req_info {
struct ethnl_req_info base;
+ enum ethtool_mac_stats_src src;
};
+#define PAUSE_REQINFO(__req_base) \
+ container_of(__req_base, struct pause_req_info, base)
+
struct pause_reply_data {
struct ethnl_reply_data base;
struct ethtool_pauseparam pauseparam;
@@ -19,13 +23,40 @@ struct pause_reply_data {
const struct nla_policy ethnl_pause_get_policy[] = {
[ETHTOOL_A_PAUSE_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy_stats),
+ [ETHTOOL_A_PAUSE_STATS_SRC] =
+ NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC),
};
+static int pause_parse_request(struct ethnl_req_info *req_base,
+ struct nlattr **tb,
+ struct netlink_ext_ack *extack)
+{
+ enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE;
+ struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
+
+ if (tb[ETHTOOL_A_PAUSE_STATS_SRC]) {
+ if (!(req_base->flags & ETHTOOL_FLAG_STATS)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "ETHTOOL_FLAG_STATS must be set when requesting a source of stats");
+ return -EINVAL;
+ }
+
+ src = nla_get_u32(tb[ETHTOOL_A_PAUSE_STATS_SRC]);
+ }
+
+ req_info->src = src;
+
+ return 0;
+}
+
static int pause_prepare_data(const struct ethnl_req_info *req_base,
struct ethnl_reply_data *reply_base,
struct genl_info *info)
{
+ const struct pause_req_info *req_info = PAUSE_REQINFO(req_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct pause_reply_data *data = PAUSE_REPDATA(reply_base);
+ enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
int ret;
@@ -34,14 +65,26 @@ static int pause_prepare_data(const struct ethnl_req_info *req_base,
ethtool_stats_init((u64 *)&data->pausestat,
sizeof(data->pausestat) / 8);
+ data->pausestat.src = src;
ret = ethnl_ops_begin(dev);
if (ret < 0)
return ret;
+
+ if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
+ src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
+ !__ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device does not support MAC merge layer");
+ ethnl_ops_complete(dev);
+ return -EOPNOTSUPP;
+ }
+
dev->ethtool_ops->get_pauseparam(dev, &data->pauseparam);
if (req_base->flags & ETHTOOL_FLAG_STATS &&
dev->ethtool_ops->get_pause_stats)
dev->ethtool_ops->get_pause_stats(dev, &data->pausestat);
+
ethnl_ops_complete(dev);
return 0;
@@ -56,6 +99,7 @@ static int pause_reply_size(const struct ethnl_req_info *req_base,
if (req_base->flags & ETHTOOL_FLAG_STATS)
n += nla_total_size(0) + /* _PAUSE_STATS */
+ nla_total_size(sizeof(u32)) + /* _PAUSE_STATS_SRC */
nla_total_size_64bit(sizeof(u64)) * ETHTOOL_PAUSE_STAT_CNT;
return n;
}
@@ -77,6 +121,9 @@ static int pause_put_stats(struct sk_buff *skb,
const u16 pad = ETHTOOL_A_PAUSE_STAT_PAD;
struct nlattr *nest;
+ if (nla_put_u32(skb, ETHTOOL_A_PAUSE_STATS_SRC, pause_stats->src))
+ return -EMSGSIZE;
+
nest = nla_nest_start(skb, ETHTOOL_A_PAUSE_STATS);
if (!nest)
return -EMSGSIZE;
@@ -114,18 +161,6 @@ static int pause_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_pause_request_ops = {
- .request_cmd = ETHTOOL_MSG_PAUSE_GET,
- .reply_cmd = ETHTOOL_MSG_PAUSE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PAUSE_HEADER,
- .req_info_size = sizeof(struct pause_req_info),
- .reply_data_size = sizeof(struct pause_reply_data),
-
- .prepare_data = pause_prepare_data,
- .reply_size = pause_reply_size,
- .fill_reply = pause_fill_reply,
-};
-
/* PAUSE_SET */
const struct nla_policy ethnl_pause_set_policy[] = {
@@ -136,51 +171,49 @@ const struct nla_policy ethnl_pause_set_policy[] = {
[ETHTOOL_A_PAUSE_TX] = { .type = NLA_U8 },
};
-int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_pause_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_pauseparam && ops->set_pauseparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_pause(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
struct ethtool_pauseparam params = {};
- struct ethnl_req_info req_info = {};
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_PAUSE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_pauseparam || !ops->set_pauseparam)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_pauseparam(dev, &params);
+ dev->ethtool_ops->get_pauseparam(dev, &params);
ethnl_update_bool32(&params.autoneg, tb[ETHTOOL_A_PAUSE_AUTONEG], &mod);
ethnl_update_bool32(&params.rx_pause, tb[ETHTOOL_A_PAUSE_RX], &mod);
ethnl_update_bool32(&params.tx_pause, tb[ETHTOOL_A_PAUSE_TX], &mod);
- ret = 0;
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_pauseparam(dev, &params);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_PAUSE_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_pause_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PAUSE_GET,
+ .reply_cmd = ETHTOOL_MSG_PAUSE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PAUSE_HEADER,
+ .req_info_size = sizeof(struct pause_req_info),
+ .reply_data_size = sizeof(struct pause_reply_data),
+
+ .parse_request = pause_parse_request,
+ .prepare_data = pause_prepare_data,
+ .reply_size = pause_reply_size,
+ .fill_reply = pause_fill_reply,
+
+ .set_validate = ethnl_set_pause_validate,
+ .set = ethnl_set_pause,
+ .set_ntf_cmd = ETHTOOL_MSG_PAUSE_NTF,
+};
diff --git a/net/ethtool/plca.c b/net/ethtool/plca.c
new file mode 100644
index 000000000000..5a8cab4df0c9
--- /dev/null
+++ b/net/ethtool/plca.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/phy.h>
+#include <linux/ethtool_netlink.h>
+
+#include "netlink.h"
+#include "common.h"
+
+struct plca_req_info {
+ struct ethnl_req_info base;
+};
+
+struct plca_reply_data {
+ struct ethnl_reply_data base;
+ struct phy_plca_cfg plca_cfg;
+ struct phy_plca_status plca_st;
+};
+
+// Helpers ------------------------------------------------------------------ //
+
+#define PLCA_REPDATA(__reply_base) \
+ container_of(__reply_base, struct plca_reply_data, base)
+
+static void plca_update_sint(int *dst, const struct nlattr *attr,
+ bool *mod)
+{
+ if (!attr)
+ return;
+
+ *dst = nla_get_u32(attr);
+ *mod = true;
+}
+
+// PLCA get configuration message ------------------------------------------- //
+
+const struct nla_policy ethnl_plca_get_cfg_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int plca_get_cfg_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_phy_ops *ops;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ // note: rtnl_lock is held already by ethnl_default_doit
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->get_plca_cfg) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out;
+
+ memset(&data->plca_cfg, 0xff,
+ sizeof_field(struct plca_reply_data, plca_cfg));
+
+ ret = ops->get_plca_cfg(dev->phydev, &data->plca_cfg);
+ ethnl_ops_complete(dev);
+
+out:
+ return ret;
+}
+
+static int plca_get_cfg_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u16)) + /* _VERSION */
+ nla_total_size(sizeof(u8)) + /* _ENABLED */
+ nla_total_size(sizeof(u32)) + /* _NODE_CNT */
+ nla_total_size(sizeof(u32)) + /* _NODE_ID */
+ nla_total_size(sizeof(u32)) + /* _TO_TIMER */
+ nla_total_size(sizeof(u32)) + /* _BURST_COUNT */
+ nla_total_size(sizeof(u32)); /* _BURST_TIMER */
+}
+
+static int plca_get_cfg_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ const struct phy_plca_cfg *plca = &data->plca_cfg;
+
+ if ((plca->version >= 0 &&
+ nla_put_u16(skb, ETHTOOL_A_PLCA_VERSION, plca->version)) ||
+ (plca->enabled >= 0 &&
+ nla_put_u8(skb, ETHTOOL_A_PLCA_ENABLED, !!plca->enabled)) ||
+ (plca->node_id >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_ID, plca->node_id)) ||
+ (plca->node_cnt >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_NODE_CNT, plca->node_cnt)) ||
+ (plca->to_tmr >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_TO_TMR, plca->to_tmr)) ||
+ (plca->burst_cnt >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_CNT, plca->burst_cnt)) ||
+ (plca->burst_tmr >= 0 &&
+ nla_put_u32(skb, ETHTOOL_A_PLCA_BURST_TMR, plca->burst_tmr)))
+ return -EMSGSIZE;
+
+ return 0;
+};
+
+// PLCA set configuration message ------------------------------------------- //
+
+const struct nla_policy ethnl_plca_set_cfg_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+ [ETHTOOL_A_PLCA_ENABLED] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_PLCA_NODE_ID] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_NODE_CNT] = NLA_POLICY_RANGE(NLA_U32, 1, 255),
+ [ETHTOOL_A_PLCA_TO_TMR] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_BURST_CNT] = NLA_POLICY_MAX(NLA_U32, 255),
+ [ETHTOOL_A_PLCA_BURST_TMR] = NLA_POLICY_MAX(NLA_U32, 255),
+};
+
+static int
+ethnl_set_plca(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ const struct ethtool_phy_ops *ops;
+ struct nlattr **tb = info->attrs;
+ struct phy_plca_cfg plca_cfg;
+ bool mod = false;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev)
+ return -EOPNOTSUPP;
+
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->set_plca_cfg)
+ return -EOPNOTSUPP;
+
+ memset(&plca_cfg, 0xff, sizeof(plca_cfg));
+ plca_update_sint(&plca_cfg.enabled, tb[ETHTOOL_A_PLCA_ENABLED], &mod);
+ plca_update_sint(&plca_cfg.node_id, tb[ETHTOOL_A_PLCA_NODE_ID], &mod);
+ plca_update_sint(&plca_cfg.node_cnt, tb[ETHTOOL_A_PLCA_NODE_CNT], &mod);
+ plca_update_sint(&plca_cfg.to_tmr, tb[ETHTOOL_A_PLCA_TO_TMR], &mod);
+ plca_update_sint(&plca_cfg.burst_cnt, tb[ETHTOOL_A_PLCA_BURST_CNT],
+ &mod);
+ plca_update_sint(&plca_cfg.burst_tmr, tb[ETHTOOL_A_PLCA_BURST_TMR],
+ &mod);
+ if (!mod)
+ return 0;
+
+ ret = ops->set_plca_cfg(dev->phydev, &plca_cfg, info->extack);
+ return ret < 0 ? ret : 1;
+}
+
+const struct ethnl_request_ops ethnl_plca_cfg_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PLCA_GET_CFG,
+ .reply_cmd = ETHTOOL_MSG_PLCA_GET_CFG_REPLY,
+ .hdr_attr = ETHTOOL_A_PLCA_HEADER,
+ .req_info_size = sizeof(struct plca_req_info),
+ .reply_data_size = sizeof(struct plca_reply_data),
+
+ .prepare_data = plca_get_cfg_prepare_data,
+ .reply_size = plca_get_cfg_reply_size,
+ .fill_reply = plca_get_cfg_fill_reply,
+
+ .set = ethnl_set_plca,
+ .set_ntf_cmd = ETHTOOL_MSG_PLCA_NTF,
+};
+
+// PLCA get status message -------------------------------------------------- //
+
+const struct nla_policy ethnl_plca_get_status_policy[] = {
+ [ETHTOOL_A_PLCA_HEADER] =
+ NLA_POLICY_NESTED(ethnl_header_policy),
+};
+
+static int plca_get_status_prepare_data(const struct ethnl_req_info *req_base,
+ struct ethnl_reply_data *reply_base,
+ struct genl_info *info)
+{
+ struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ struct net_device *dev = reply_base->dev;
+ const struct ethtool_phy_ops *ops;
+ int ret;
+
+ // check that the PHY device is available and connected
+ if (!dev->phydev) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ // note: rtnl_lock is held already by ethnl_default_doit
+ ops = ethtool_phy_ops;
+ if (!ops || !ops->get_plca_status) {
+ ret = -EOPNOTSUPP;
+ goto out;
+ }
+
+ ret = ethnl_ops_begin(dev);
+ if (ret < 0)
+ goto out;
+
+ memset(&data->plca_st, 0xff,
+ sizeof_field(struct plca_reply_data, plca_st));
+
+ ret = ops->get_plca_status(dev->phydev, &data->plca_st);
+ ethnl_ops_complete(dev);
+out:
+ return ret;
+}
+
+static int plca_get_status_reply_size(const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ return nla_total_size(sizeof(u8)); /* _STATUS */
+}
+
+static int plca_get_status_fill_reply(struct sk_buff *skb,
+ const struct ethnl_req_info *req_base,
+ const struct ethnl_reply_data *reply_base)
+{
+ const struct plca_reply_data *data = PLCA_REPDATA(reply_base);
+ const u8 status = data->plca_st.pst;
+
+ if (nla_put_u8(skb, ETHTOOL_A_PLCA_STATUS, !!status))
+ return -EMSGSIZE;
+
+ return 0;
+};
+
+const struct ethnl_request_ops ethnl_plca_status_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PLCA_GET_STATUS,
+ .reply_cmd = ETHTOOL_MSG_PLCA_GET_STATUS_REPLY,
+ .hdr_attr = ETHTOOL_A_PLCA_HEADER,
+ .req_info_size = sizeof(struct plca_req_info),
+ .reply_data_size = sizeof(struct plca_reply_data),
+
+ .prepare_data = plca_get_status_prepare_data,
+ .reply_size = plca_get_status_reply_size,
+ .fill_reply = plca_get_status_fill_reply,
+};
diff --git a/net/ethtool/privflags.c b/net/ethtool/privflags.c
index 4c7bfa81e4ab..23264a1ebf12 100644
--- a/net/ethtool/privflags.c
+++ b/net/ethtool/privflags.c
@@ -118,19 +118,6 @@ static void privflags_cleanup_data(struct ethnl_reply_data *reply_data)
kfree(data->priv_flag_names);
}
-const struct ethnl_request_ops ethnl_privflags_request_ops = {
- .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET,
- .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER,
- .req_info_size = sizeof(struct privflags_req_info),
- .reply_data_size = sizeof(struct privflags_reply_data),
-
- .prepare_data = privflags_prepare_data,
- .reply_size = privflags_reply_size,
- .fill_reply = privflags_fill_reply,
- .cleanup_data = privflags_cleanup_data,
-};
-
/* PRIVFLAGS_SET */
const struct nla_policy ethnl_privflags_set_policy[] = {
@@ -139,63 +126,70 @@ const struct nla_policy ethnl_privflags_set_policy[] = {
[ETHTOOL_A_PRIVFLAGS_FLAGS] = { .type = NLA_NESTED },
};
-int ethnl_set_privflags(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_privflags_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ if (!info->attrs[ETHTOOL_A_PRIVFLAGS_FLAGS])
+ return -EINVAL;
+
+ if (!ops->get_priv_flags || !ops->set_priv_flags ||
+ !ops->get_sset_count || !ops->get_strings)
+ return -EOPNOTSUPP;
+ return 1;
+}
+
+static int
+ethnl_set_privflags(struct ethnl_req_info *req_info, struct genl_info *info)
{
const char (*names)[ETH_GSTRING_LEN] = NULL;
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- const struct ethtool_ops *ops;
- struct net_device *dev;
unsigned int nflags;
bool mod = false;
bool compact;
u32 flags;
int ret;
- if (!tb[ETHTOOL_A_PRIVFLAGS_FLAGS])
- return -EINVAL;
ret = ethnl_bitset_is_compact(tb[ETHTOOL_A_PRIVFLAGS_FLAGS], &compact);
if (ret < 0)
return ret;
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_PRIVFLAGS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_priv_flags || !ops->set_priv_flags ||
- !ops->get_sset_count || !ops->get_strings)
- goto out_dev;
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
ret = ethnl_get_priv_flags_info(dev, &nflags, compact ? NULL : &names);
if (ret < 0)
- goto out_ops;
- flags = ops->get_priv_flags(dev);
+ return ret;
+ flags = dev->ethtool_ops->get_priv_flags(dev);
ret = ethnl_update_bitset32(&flags, nflags,
tb[ETHTOOL_A_PRIVFLAGS_FLAGS], names,
info->extack, &mod);
if (ret < 0 || !mod)
goto out_free;
- ret = ops->set_priv_flags(dev, flags);
+ ret = dev->ethtool_ops->set_priv_flags(dev, flags);
if (ret < 0)
goto out_free;
- ethtool_notify(dev, ETHTOOL_MSG_PRIVFLAGS_NTF, NULL);
+ ret = 1;
out_free:
kfree(names);
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
return ret;
}
+
+const struct ethnl_request_ops ethnl_privflags_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PRIVFLAGS_GET,
+ .reply_cmd = ETHTOOL_MSG_PRIVFLAGS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PRIVFLAGS_HEADER,
+ .req_info_size = sizeof(struct privflags_req_info),
+ .reply_data_size = sizeof(struct privflags_reply_data),
+
+ .prepare_data = privflags_prepare_data,
+ .reply_size = privflags_reply_size,
+ .fill_reply = privflags_fill_reply,
+ .cleanup_data = privflags_cleanup_data,
+
+ .set_validate = ethnl_set_privflags_validate,
+ .set = ethnl_set_privflags,
+ .set_ntf_cmd = ETHTOOL_MSG_PRIVFLAGS_NTF,
+};
diff --git a/net/ethtool/pse-pd.c b/net/ethtool/pse-pd.c
index e8683e485dc9..530b8b99e6df 100644
--- a/net/ethtool/pse-pd.c
+++ b/net/ethtool/pse-pd.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
//
-// ethtool interface for for Ethernet PSE (Power Sourcing Equipment)
+// ethtool interface for Ethernet PSE (Power Sourcing Equipment)
// and PD (Powered Device)
//
// Copyright (c) 2022 Pengutronix, Oleksij Rempel <kernel@pengutronix.de>
@@ -106,18 +106,6 @@ static int pse_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_pse_request_ops = {
- .request_cmd = ETHTOOL_MSG_PSE_GET,
- .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY,
- .hdr_attr = ETHTOOL_A_PSE_HEADER,
- .req_info_size = sizeof(struct pse_req_info),
- .reply_data_size = sizeof(struct pse_reply_data),
-
- .prepare_data = pse_prepare_data,
- .reply_size = pse_reply_size,
- .fill_reply = pse_fill_reply,
-};
-
/* PSE_SET */
const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
@@ -127,59 +115,50 @@ const struct nla_policy ethnl_pse_set_policy[ETHTOOL_A_PSE_MAX + 1] = {
ETHTOOL_PODL_PSE_ADMIN_STATE_ENABLED),
};
-static int pse_set_pse_config(struct net_device *dev,
- struct netlink_ext_ack *extack,
- struct nlattr **tb)
+static int
+ethnl_set_pse_validate(struct ethnl_req_info *req_info, struct genl_info *info)
{
- struct phy_device *phydev = dev->phydev;
- struct pse_control_config config = {};
+ return !!info->attrs[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL];
+}
- /* Optional attribute. Do not return error if not set. */
- if (!tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL])
- return 0;
+static int
+ethnl_set_pse(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct net_device *dev = req_info->dev;
+ struct pse_control_config config = {};
+ struct nlattr **tb = info->attrs;
+ struct phy_device *phydev;
/* this values are already validated by the ethnl_pse_set_policy */
config.admin_cotrol = nla_get_u32(tb[ETHTOOL_A_PODL_PSE_ADMIN_CONTROL]);
+ phydev = dev->phydev;
if (!phydev) {
- NL_SET_ERR_MSG(extack, "No PHY is attached");
+ NL_SET_ERR_MSG(info->extack, "No PHY is attached");
return -EOPNOTSUPP;
}
if (!phydev->psec) {
- NL_SET_ERR_MSG(extack, "No PSE is attached");
+ NL_SET_ERR_MSG(info->extack, "No PSE is attached");
return -EOPNOTSUPP;
}
- return pse_ethtool_set_config(phydev->psec, extack, &config);
+ /* Return errno directly - PSE has no notification */
+ return pse_ethtool_set_config(phydev->psec, info->extack, &config);
}
-int ethnl_set_pse(struct sk_buff *skb, struct genl_info *info)
-{
- struct ethnl_req_info req_info = {};
- struct nlattr **tb = info->attrs;
- struct net_device *dev;
- int ret;
-
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_PSE_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
-
- dev = req_info.dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
- ret = pse_set_pse_config(dev, info->extack, tb);
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
+const struct ethnl_request_ops ethnl_pse_request_ops = {
+ .request_cmd = ETHTOOL_MSG_PSE_GET,
+ .reply_cmd = ETHTOOL_MSG_PSE_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_PSE_HEADER,
+ .req_info_size = sizeof(struct pse_req_info),
+ .reply_data_size = sizeof(struct pse_reply_data),
- ethnl_parse_header_dev_put(&req_info);
+ .prepare_data = pse_prepare_data,
+ .reply_size = pse_reply_size,
+ .fill_reply = pse_fill_reply,
- return ret;
-}
+ .set_validate = ethnl_set_pse_validate,
+ .set = ethnl_set_pse,
+ /* PSE has no notification */
+};
diff --git a/net/ethtool/rings.c b/net/ethtool/rings.c
index fa3ec8d438f7..f358cd57d094 100644
--- a/net/ethtool/rings.c
+++ b/net/ethtool/rings.c
@@ -56,7 +56,8 @@ static int rings_reply_size(const struct ethnl_req_info *req_base,
nla_total_size(sizeof(u32)) + /* _RINGS_RX_BUF_LEN */
nla_total_size(sizeof(u8)) + /* _RINGS_TCP_DATA_SPLIT */
nla_total_size(sizeof(u32) + /* _RINGS_CQE_SIZE */
- nla_total_size(sizeof(u8))); /* _RINGS_TX_PUSH */
+ nla_total_size(sizeof(u8)) + /* _RINGS_TX_PUSH */
+ nla_total_size(sizeof(u8))); /* _RINGS_RX_PUSH */
}
static int rings_fill_reply(struct sk_buff *skb,
@@ -96,24 +97,13 @@ static int rings_fill_reply(struct sk_buff *skb,
kr->tcp_data_split))) ||
(kr->cqe_size &&
(nla_put_u32(skb, ETHTOOL_A_RINGS_CQE_SIZE, kr->cqe_size))) ||
- nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push))
+ nla_put_u8(skb, ETHTOOL_A_RINGS_TX_PUSH, !!kr->tx_push) ||
+ nla_put_u8(skb, ETHTOOL_A_RINGS_RX_PUSH, !!kr->rx_push))
return -EMSGSIZE;
return 0;
}
-const struct ethnl_request_ops ethnl_rings_request_ops = {
- .request_cmd = ETHTOOL_MSG_RINGS_GET,
- .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY,
- .hdr_attr = ETHTOOL_A_RINGS_HEADER,
- .req_info_size = sizeof(struct rings_req_info),
- .reply_data_size = sizeof(struct rings_reply_data),
-
- .prepare_data = rings_prepare_data,
- .reply_size = rings_reply_size,
- .fill_reply = rings_fill_reply,
-};
-
/* RINGS_SET */
const struct nla_policy ethnl_rings_set_policy[] = {
@@ -126,64 +116,64 @@ const struct nla_policy ethnl_rings_set_policy[] = {
[ETHTOOL_A_RINGS_RX_BUF_LEN] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_CQE_SIZE] = NLA_POLICY_MIN(NLA_U32, 1),
[ETHTOOL_A_RINGS_TX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
+ [ETHTOOL_A_RINGS_RX_PUSH] = NLA_POLICY_MAX(NLA_U8, 1),
};
-int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_rings_validate(struct ethnl_req_info *req_info,
+ struct genl_info *info)
{
- struct kernel_ethtool_ringparam kernel_ringparam = {};
- struct ethtool_ringparam ringparam = {};
- struct ethnl_req_info req_info = {};
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
struct nlattr **tb = info->attrs;
- const struct nlattr *err_attr;
- const struct ethtool_ops *ops;
- struct net_device *dev;
- bool mod = false;
- int ret;
-
- ret = ethnl_parse_header_dev_get(&req_info,
- tb[ETHTOOL_A_RINGS_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ops = dev->ethtool_ops;
- ret = -EOPNOTSUPP;
- if (!ops->get_ringparam || !ops->set_ringparam)
- goto out_dev;
if (tb[ETHTOOL_A_RINGS_RX_BUF_LEN] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_RX_BUF_LEN)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_RX_BUF_LEN],
"setting rx buf len not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
if (tb[ETHTOOL_A_RINGS_CQE_SIZE] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_CQE_SIZE)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_CQE_SIZE],
"setting cqe size not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
if (tb[ETHTOOL_A_RINGS_TX_PUSH] &&
!(ops->supported_ring_params & ETHTOOL_RING_USE_TX_PUSH)) {
- ret = -EOPNOTSUPP;
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_RINGS_TX_PUSH],
"setting tx push not supported");
- goto out_dev;
+ return -EOPNOTSUPP;
}
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
- ops->get_ringparam(dev, &ringparam, &kernel_ringparam, info->extack);
+ if (tb[ETHTOOL_A_RINGS_RX_PUSH] &&
+ !(ops->supported_ring_params & ETHTOOL_RING_USE_RX_PUSH)) {
+ NL_SET_ERR_MSG_ATTR(info->extack,
+ tb[ETHTOOL_A_RINGS_RX_PUSH],
+ "setting rx push not supported");
+ return -EOPNOTSUPP;
+ }
+
+ return ops->get_ringparam && ops->set_ringparam ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_rings(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ struct kernel_ethtool_ringparam kernel_ringparam = {};
+ struct ethtool_ringparam ringparam = {};
+ struct net_device *dev = req_info->dev;
+ struct nlattr **tb = info->attrs;
+ const struct nlattr *err_attr;
+ bool mod = false;
+ int ret;
+
+ dev->ethtool_ops->get_ringparam(dev, &ringparam,
+ &kernel_ringparam, info->extack);
ethnl_update_u32(&ringparam.rx_pending, tb[ETHTOOL_A_RINGS_RX], &mod);
ethnl_update_u32(&ringparam.rx_mini_pending,
@@ -197,9 +187,10 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
tb[ETHTOOL_A_RINGS_CQE_SIZE], &mod);
ethnl_update_u8(&kernel_ringparam.tx_push,
tb[ETHTOOL_A_RINGS_TX_PUSH], &mod);
- ret = 0;
+ ethnl_update_u8(&kernel_ringparam.rx_push,
+ tb[ETHTOOL_A_RINGS_RX_PUSH], &mod);
if (!mod)
- goto out_ops;
+ return 0;
/* ensure new ring parameters are within limits */
if (ringparam.rx_pending > ringparam.rx_max_pending)
@@ -213,23 +204,28 @@ int ethnl_set_rings(struct sk_buff *skb, struct genl_info *info)
else
err_attr = NULL;
if (err_attr) {
- ret = -EINVAL;
NL_SET_ERR_MSG_ATTR(info->extack, err_attr,
"requested ring size exceeds maximum");
- goto out_ops;
+ return -EINVAL;
}
ret = dev->ethtool_ops->set_ringparam(dev, &ringparam,
&kernel_ringparam, info->extack);
- if (ret < 0)
- goto out_ops;
- ethtool_notify(dev, ETHTOOL_MSG_RINGS_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return ret < 0 ? ret : 1;
}
+
+const struct ethnl_request_ops ethnl_rings_request_ops = {
+ .request_cmd = ETHTOOL_MSG_RINGS_GET,
+ .reply_cmd = ETHTOOL_MSG_RINGS_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_RINGS_HEADER,
+ .req_info_size = sizeof(struct rings_req_info),
+ .reply_data_size = sizeof(struct rings_reply_data),
+
+ .prepare_data = rings_prepare_data,
+ .reply_size = rings_reply_size,
+ .fill_reply = rings_fill_reply,
+
+ .set_validate = ethnl_set_rings_validate,
+ .set = ethnl_set_rings,
+ .set_ntf_cmd = ETHTOOL_MSG_RINGS_NTF,
+};
diff --git a/net/ethtool/rss.c b/net/ethtool/rss.c
index ebe6145aed3f..be260ab34e58 100644
--- a/net/ethtool/rss.c
+++ b/net/ethtool/rss.c
@@ -122,10 +122,13 @@ rss_fill_reply(struct sk_buff *skb, const struct ethnl_req_info *req_base,
{
const struct rss_reply_data *data = RSS_REPDATA(reply_base);
- if (nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc) ||
- nla_put(skb, ETHTOOL_A_RSS_INDIR,
- sizeof(u32) * data->indir_size, data->indir_table) ||
- nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey))
+ if ((data->hfunc &&
+ nla_put_u32(skb, ETHTOOL_A_RSS_HFUNC, data->hfunc)) ||
+ (data->indir_size &&
+ nla_put(skb, ETHTOOL_A_RSS_INDIR,
+ sizeof(u32) * data->indir_size, data->indir_table)) ||
+ (data->hkey_size &&
+ nla_put(skb, ETHTOOL_A_RSS_HKEY, data->hkey_size, data->hkey)))
return -EMSGSIZE;
return 0;
diff --git a/net/ethtool/stats.c b/net/ethtool/stats.c
index a20e0a24ff61..010ed19ccc99 100644
--- a/net/ethtool/stats.c
+++ b/net/ethtool/stats.c
@@ -7,6 +7,7 @@
struct stats_req_info {
struct ethnl_req_info base;
DECLARE_BITMAP(stat_mask, __ETHTOOL_STATS_CNT);
+ enum ethtool_mac_stats_src src;
};
#define STATS_REQINFO(__req_base) \
@@ -75,16 +76,19 @@ const char stats_rmon_names[__ETHTOOL_A_STATS_RMON_CNT][ETH_GSTRING_LEN] = {
[ETHTOOL_A_STATS_RMON_JABBER] = "etherStatsJabbers",
};
-const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_GROUPS + 1] = {
+const struct nla_policy ethnl_stats_get_policy[ETHTOOL_A_STATS_SRC + 1] = {
[ETHTOOL_A_STATS_HEADER] =
NLA_POLICY_NESTED(ethnl_header_policy),
[ETHTOOL_A_STATS_GROUPS] = { .type = NLA_NESTED },
+ [ETHTOOL_A_STATS_SRC] =
+ NLA_POLICY_MAX(NLA_U32, ETHTOOL_MAC_STATS_SRC_PMAC),
};
static int stats_parse_request(struct ethnl_req_info *req_base,
struct nlattr **tb,
struct netlink_ext_ack *extack)
{
+ enum ethtool_mac_stats_src src = ETHTOOL_MAC_STATS_SRC_AGGREGATE;
struct stats_req_info *req_info = STATS_REQINFO(req_base);
bool mod = false;
int err;
@@ -100,6 +104,11 @@ static int stats_parse_request(struct ethnl_req_info *req_base,
return -EINVAL;
}
+ if (tb[ETHTOOL_A_STATS_SRC])
+ src = nla_get_u32(tb[ETHTOOL_A_STATS_SRC]);
+
+ req_info->src = src;
+
return 0;
}
@@ -108,7 +117,9 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
struct genl_info *info)
{
const struct stats_req_info *req_info = STATS_REQINFO(req_base);
+ struct netlink_ext_ack *extack = info ? info->extack : NULL;
struct stats_reply_data *data = STATS_REPDATA(reply_base);
+ enum ethtool_mac_stats_src src = req_info->src;
struct net_device *dev = reply_base->dev;
int ret;
@@ -116,11 +127,25 @@ static int stats_prepare_data(const struct ethnl_req_info *req_base,
if (ret < 0)
return ret;
+ if ((src == ETHTOOL_MAC_STATS_SRC_EMAC ||
+ src == ETHTOOL_MAC_STATS_SRC_PMAC) &&
+ !__ethtool_dev_mm_supported(dev)) {
+ NL_SET_ERR_MSG_MOD(extack,
+ "Device does not support MAC merge layer");
+ ethnl_ops_complete(dev);
+ return -EOPNOTSUPP;
+ }
+
/* Mark all stats as unset (see ETHTOOL_STAT_NOT_SET) to prevent them
* from being reported to user space in case driver did not set them.
*/
memset(&data->stats, 0xff, sizeof(data->stats));
+ data->phy_stats.src = src;
+ data->mac_stats.src = src;
+ data->ctrl_stats.src = src;
+ data->rmon_stats.src = src;
+
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask) &&
dev->ethtool_ops->get_eth_phy_stats)
dev->ethtool_ops->get_eth_phy_stats(dev, &data->phy_stats);
@@ -146,6 +171,8 @@ static int stats_reply_size(const struct ethnl_req_info *req_base,
unsigned int n_grps = 0, n_stats = 0;
int len = 0;
+ len += nla_total_size(sizeof(u32)); /* _STATS_SRC */
+
if (test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask)) {
n_stats += sizeof(struct ethtool_eth_phy_stats) / sizeof(u64);
n_grps++;
@@ -379,6 +406,9 @@ static int stats_fill_reply(struct sk_buff *skb,
const struct stats_reply_data *data = STATS_REPDATA(reply_base);
int ret = 0;
+ if (nla_put_u32(skb, ETHTOOL_A_STATS_SRC, req_info->src))
+ return -EMSGSIZE;
+
if (!ret && test_bit(ETHTOOL_STATS_ETH_PHY, req_info->stat_mask))
ret = stats_put_stats(skb, data, ETHTOOL_STATS_ETH_PHY,
ETH_SS_STATS_ETH_PHY,
@@ -410,3 +440,130 @@ const struct ethnl_request_ops ethnl_stats_request_ops = {
.reply_size = stats_reply_size,
.fill_reply = stats_fill_reply,
};
+
+static u64 ethtool_stats_sum(u64 a, u64 b)
+{
+ if (a == ETHTOOL_STAT_NOT_SET)
+ return b;
+ if (b == ETHTOOL_STAT_NOT_SET)
+ return a;
+ return a + b;
+}
+
+/* Avoid modifying the aggregation procedure every time a new counter is added
+ * by treating the structures as an array of u64 statistics.
+ */
+static void ethtool_aggregate_stats(void *aggr_stats, const void *emac_stats,
+ const void *pmac_stats, size_t stats_size,
+ size_t stats_offset)
+{
+ size_t num_stats = stats_size / sizeof(u64);
+ const u64 *s1 = emac_stats + stats_offset;
+ const u64 *s2 = pmac_stats + stats_offset;
+ u64 *s = aggr_stats + stats_offset;
+ int i;
+
+ for (i = 0; i < num_stats; i++)
+ s[i] = ethtool_stats_sum(s1[i], s2[i]);
+}
+
+void ethtool_aggregate_mac_stats(struct net_device *dev,
+ struct ethtool_eth_mac_stats *mac_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_mac_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_mac_stats(dev, &emac);
+ ops->get_eth_mac_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(mac_stats, &emac, &pmac,
+ sizeof(mac_stats->stats),
+ offsetof(struct ethtool_eth_mac_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_mac_stats);
+
+void ethtool_aggregate_phy_stats(struct net_device *dev,
+ struct ethtool_eth_phy_stats *phy_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_phy_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_phy_stats(dev, &emac);
+ ops->get_eth_phy_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(phy_stats, &emac, &pmac,
+ sizeof(phy_stats->stats),
+ offsetof(struct ethtool_eth_phy_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_phy_stats);
+
+void ethtool_aggregate_ctrl_stats(struct net_device *dev,
+ struct ethtool_eth_ctrl_stats *ctrl_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_eth_ctrl_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_eth_ctrl_stats(dev, &emac);
+ ops->get_eth_ctrl_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(ctrl_stats, &emac, &pmac,
+ sizeof(ctrl_stats->stats),
+ offsetof(struct ethtool_eth_ctrl_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_ctrl_stats);
+
+void ethtool_aggregate_pause_stats(struct net_device *dev,
+ struct ethtool_pause_stats *pause_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ struct ethtool_pause_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_pause_stats(dev, &emac);
+ ops->get_pause_stats(dev, &pmac);
+
+ ethtool_aggregate_stats(pause_stats, &emac, &pmac,
+ sizeof(pause_stats->stats),
+ offsetof(struct ethtool_pause_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_pause_stats);
+
+void ethtool_aggregate_rmon_stats(struct net_device *dev,
+ struct ethtool_rmon_stats *rmon_stats)
+{
+ const struct ethtool_ops *ops = dev->ethtool_ops;
+ const struct ethtool_rmon_hist_range *dummy;
+ struct ethtool_rmon_stats pmac, emac;
+
+ memset(&emac, 0xff, sizeof(emac));
+ memset(&pmac, 0xff, sizeof(pmac));
+ emac.src = ETHTOOL_MAC_STATS_SRC_EMAC;
+ pmac.src = ETHTOOL_MAC_STATS_SRC_PMAC;
+
+ ops->get_rmon_stats(dev, &emac, &dummy);
+ ops->get_rmon_stats(dev, &pmac, &dummy);
+
+ ethtool_aggregate_stats(rmon_stats, &emac, &pmac,
+ sizeof(rmon_stats->stats),
+ offsetof(struct ethtool_rmon_stats, stats));
+}
+EXPORT_SYMBOL(ethtool_aggregate_rmon_stats);
diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c
index 88f435e76481..a4a43d9e6e9d 100644
--- a/net/ethtool/wol.c
+++ b/net/ethtool/wol.c
@@ -82,18 +82,6 @@ static int wol_fill_reply(struct sk_buff *skb,
return 0;
}
-const struct ethnl_request_ops ethnl_wol_request_ops = {
- .request_cmd = ETHTOOL_MSG_WOL_GET,
- .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY,
- .hdr_attr = ETHTOOL_A_WOL_HEADER,
- .req_info_size = sizeof(struct wol_req_info),
- .reply_data_size = sizeof(struct wol_reply_data),
-
- .prepare_data = wol_prepare_data,
- .reply_size = wol_reply_size,
- .fill_reply = wol_fill_reply,
-};
-
/* WOL_SET */
const struct nla_policy ethnl_wol_set_policy[] = {
@@ -104,67 +92,66 @@ const struct nla_policy ethnl_wol_set_policy[] = {
.len = SOPASS_MAX },
};
-int ethnl_set_wol(struct sk_buff *skb, struct genl_info *info)
+static int
+ethnl_set_wol_validate(struct ethnl_req_info *req_info, struct genl_info *info)
+{
+ const struct ethtool_ops *ops = req_info->dev->ethtool_ops;
+
+ return ops->get_wol && ops->set_wol ? 1 : -EOPNOTSUPP;
+}
+
+static int
+ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info)
{
struct ethtool_wolinfo wol = { .cmd = ETHTOOL_GWOL };
- struct ethnl_req_info req_info = {};
+ struct net_device *dev = req_info->dev;
struct nlattr **tb = info->attrs;
- struct net_device *dev;
bool mod = false;
int ret;
- ret = ethnl_parse_header_dev_get(&req_info, tb[ETHTOOL_A_WOL_HEADER],
- genl_info_net(info), info->extack,
- true);
- if (ret < 0)
- return ret;
- dev = req_info.dev;
- ret = -EOPNOTSUPP;
- if (!dev->ethtool_ops->get_wol || !dev->ethtool_ops->set_wol)
- goto out_dev;
-
- rtnl_lock();
- ret = ethnl_ops_begin(dev);
- if (ret < 0)
- goto out_rtnl;
-
dev->ethtool_ops->get_wol(dev, &wol);
ret = ethnl_update_bitset32(&wol.wolopts, WOL_MODE_COUNT,
tb[ETHTOOL_A_WOL_MODES], wol_mode_names,
info->extack, &mod);
if (ret < 0)
- goto out_ops;
+ return ret;
if (wol.wolopts & ~wol.supported) {
NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_WOL_MODES],
"cannot enable unsupported WoL mode");
- ret = -EINVAL;
- goto out_ops;
+ return -EINVAL;
}
if (tb[ETHTOOL_A_WOL_SOPASS]) {
if (!(wol.supported & WAKE_MAGICSECURE)) {
NL_SET_ERR_MSG_ATTR(info->extack,
tb[ETHTOOL_A_WOL_SOPASS],
"magicsecure not supported, cannot set password");
- ret = -EINVAL;
- goto out_ops;
+ return -EINVAL;
}
ethnl_update_binary(wol.sopass, sizeof(wol.sopass),
tb[ETHTOOL_A_WOL_SOPASS], &mod);
}
if (!mod)
- goto out_ops;
+ return 0;
ret = dev->ethtool_ops->set_wol(dev, &wol);
if (ret)
- goto out_ops;
+ return ret;
dev->wol_enabled = !!wol.wolopts;
- ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL);
-
-out_ops:
- ethnl_ops_complete(dev);
-out_rtnl:
- rtnl_unlock();
-out_dev:
- ethnl_parse_header_dev_put(&req_info);
- return ret;
+ return 1;
}
+
+const struct ethnl_request_ops ethnl_wol_request_ops = {
+ .request_cmd = ETHTOOL_MSG_WOL_GET,
+ .reply_cmd = ETHTOOL_MSG_WOL_GET_REPLY,
+ .hdr_attr = ETHTOOL_A_WOL_HEADER,
+ .req_info_size = sizeof(struct wol_req_info),
+ .reply_data_size = sizeof(struct wol_reply_data),
+
+ .prepare_data = wol_prepare_data,
+ .reply_size = wol_reply_size,
+ .fill_reply = wol_fill_reply,
+
+ .set_validate = ethnl_set_wol_validate,
+ .set = ethnl_set_wol,
+ .set_ntf_cmd = ETHTOOL_MSG_WOL_NTF,
+};
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index af337cf62764..35d384dfe29d 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -120,6 +120,30 @@ ieee802154_hdr_push(struct sk_buff *skb, struct ieee802154_hdr *hdr)
}
EXPORT_SYMBOL_GPL(ieee802154_hdr_push);
+int ieee802154_beacon_push(struct sk_buff *skb,
+ struct ieee802154_beacon_frame *beacon)
+{
+ struct ieee802154_beacon_hdr *mac_pl = &beacon->mac_pl;
+ struct ieee802154_hdr *mhr = &beacon->mhr;
+ int ret;
+
+ skb_reserve(skb, sizeof(*mhr));
+ ret = ieee802154_hdr_push(skb, mhr);
+ if (ret < 0)
+ return ret;
+
+ skb_reset_mac_header(skb);
+ skb->mac_len = ret;
+
+ skb_put_data(skb, mac_pl, sizeof(*mac_pl));
+
+ if (mac_pl->pend_short_addr_count || mac_pl->pend_ext_addr_count)
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(ieee802154_beacon_push);
+
static int
ieee802154_hdr_get_addr(const u8 *buf, int mode, bool omit_pan,
struct ieee802154_addr *addr)
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 248ad5e46969..2215f576ee37 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -187,8 +187,8 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_WPAN_DEV] = { .type = NLA_U64 },
- [NL802154_ATTR_PAGE] = { .type = NLA_U8, },
- [NL802154_ATTR_CHANNEL] = { .type = NLA_U8, },
+ [NL802154_ATTR_PAGE] = NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_PAGE),
+ [NL802154_ATTR_CHANNEL] = NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_CHANNEL),
[NL802154_ATTR_TX_POWER] = { .type = NLA_S32, },
@@ -221,6 +221,20 @@ static const struct nla_policy nl802154_policy[NL802154_ATTR_MAX+1] = {
[NL802154_ATTR_COORDINATOR] = { .type = NLA_NESTED },
+ [NL802154_ATTR_SCAN_TYPE] =
+ NLA_POLICY_RANGE(NLA_U8, NL802154_SCAN_ED, NL802154_SCAN_RIT_PASSIVE),
+ [NL802154_ATTR_SCAN_CHANNELS] =
+ NLA_POLICY_MASK(NLA_U32, GENMASK(IEEE802154_MAX_CHANNEL, 0)),
+ [NL802154_ATTR_SCAN_PREAMBLE_CODES] = { .type = NLA_REJECT },
+ [NL802154_ATTR_SCAN_MEAN_PRF] = { .type = NLA_REJECT },
+ [NL802154_ATTR_SCAN_DURATION] =
+ NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_SCAN_DURATION),
+ [NL802154_ATTR_SCAN_DONE_REASON] =
+ NLA_POLICY_RANGE(NLA_U8, NL802154_SCAN_DONE_REASON_FINISHED,
+ NL802154_SCAN_DONE_REASON_ABORTED),
+ [NL802154_ATTR_BEACON_INTERVAL] =
+ NLA_POLICY_MAX(NLA_U8, IEEE802154_MAX_SCAN_DURATION),
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
[NL802154_ATTR_SEC_ENABLED] = { .type = NLA_U8, },
[NL802154_ATTR_SEC_OUT_LEVEL] = { .type = NLA_U32, },
@@ -969,8 +983,7 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]);
/* check 802.15.4 constraints */
- if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL ||
- !(rdev->wpan_phy.supported.channels[page] & BIT(channel)))
+ if (!ieee802154_chan_is_valid(&rdev->wpan_phy, page, channel))
return -EINVAL;
return rdev_set_channel(rdev, page, channel);
@@ -1384,6 +1397,236 @@ int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
}
EXPORT_SYMBOL_GPL(nl802154_scan_event);
+static int nl802154_trigger_scan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct wpan_phy *wpan_phy = &rdev->wpan_phy;
+ struct cfg802154_scan_request *request;
+ u8 type;
+ int err;
+
+ if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR) {
+ NL_SET_ERR_MSG(info->extack, "Monitors are not allowed to perform scans");
+ return -EOPNOTSUPP;
+ }
+
+ if (!nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE])) {
+ NL_SET_ERR_MSG(info->extack, "Malformed request, missing scan type");
+ return -EINVAL;
+ }
+
+ request = kzalloc(sizeof(*request), GFP_KERNEL);
+ if (!request)
+ return -ENOMEM;
+
+ request->wpan_dev = wpan_dev;
+ request->wpan_phy = wpan_phy;
+
+ type = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_TYPE]);
+ switch (type) {
+ case NL802154_SCAN_PASSIVE:
+ request->type = type;
+ break;
+ default:
+ NL_SET_ERR_MSG_FMT(info->extack, "Unsupported scan type: %d", type);
+ err = -EINVAL;
+ goto free_request;
+ }
+
+ /* Use current page by default */
+ if (info->attrs[NL802154_ATTR_PAGE])
+ request->page = nla_get_u8(info->attrs[NL802154_ATTR_PAGE]);
+ else
+ request->page = wpan_phy->current_page;
+
+ /* Scan all supported channels by default */
+ if (info->attrs[NL802154_ATTR_SCAN_CHANNELS])
+ request->channels = nla_get_u32(info->attrs[NL802154_ATTR_SCAN_CHANNELS]);
+ else
+ request->channels = wpan_phy->supported.channels[request->page];
+
+ /* Use maximum duration order by default */
+ if (info->attrs[NL802154_ATTR_SCAN_DURATION])
+ request->duration = nla_get_u8(info->attrs[NL802154_ATTR_SCAN_DURATION]);
+ else
+ request->duration = IEEE802154_MAX_SCAN_DURATION;
+
+ err = rdev_trigger_scan(rdev, request);
+ if (err) {
+ pr_err("Failure starting scanning (%d)\n", err);
+ goto free_request;
+ }
+
+ return 0;
+
+free_request:
+ kfree(request);
+
+ return err;
+}
+
+static int nl802154_prep_scan_msg(struct sk_buff *msg,
+ struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, u32 portid,
+ u32 seq, int flags, u8 cmd, u8 arg)
+{
+ void *hdr;
+
+ hdr = nl802154hdr_put(msg, portid, seq, flags, cmd);
+ if (!hdr)
+ return -ENOBUFS;
+
+ if (nla_put_u32(msg, NL802154_ATTR_WPAN_PHY, rdev->wpan_phy_idx))
+ goto nla_put_failure;
+
+ if (wpan_dev->netdev &&
+ nla_put_u32(msg, NL802154_ATTR_IFINDEX, wpan_dev->netdev->ifindex))
+ goto nla_put_failure;
+
+ if (nla_put_u64_64bit(msg, NL802154_ATTR_WPAN_DEV,
+ wpan_dev_id(wpan_dev), NL802154_ATTR_PAD))
+ goto nla_put_failure;
+
+ if (cmd == NL802154_CMD_SCAN_DONE &&
+ nla_put_u8(msg, NL802154_ATTR_SCAN_DONE_REASON, arg))
+ goto nla_put_failure;
+
+ genlmsg_end(msg, hdr);
+
+ return 0;
+
+nla_put_failure:
+ genlmsg_cancel(msg, hdr);
+
+ return -EMSGSIZE;
+}
+
+static int nl802154_send_scan_msg(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev, u8 cmd, u8 arg)
+{
+ struct sk_buff *msg;
+ int ret;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return -ENOMEM;
+
+ ret = nl802154_prep_scan_msg(msg, rdev, wpan_dev, 0, 0, 0, cmd, arg);
+ if (ret < 0) {
+ nlmsg_free(msg);
+ return ret;
+ }
+
+ return genlmsg_multicast_netns(&nl802154_fam,
+ wpan_phy_net(&rdev->wpan_phy), msg, 0,
+ NL802154_MCGRP_SCAN, GFP_KERNEL);
+}
+
+int nl802154_scan_started(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev)
+{
+ struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(wpan_phy);
+ int err;
+
+ /* Ignore errors when there are no listeners */
+ err = nl802154_send_scan_msg(rdev, wpan_dev, NL802154_CMD_TRIGGER_SCAN, 0);
+ if (err == -ESRCH)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nl802154_scan_started);
+
+int nl802154_scan_done(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ enum nl802154_scan_done_reasons reason)
+{
+ struct cfg802154_registered_device *rdev = wpan_phy_to_rdev(wpan_phy);
+ int err;
+
+ /* Ignore errors when there are no listeners */
+ err = nl802154_send_scan_msg(rdev, wpan_dev, NL802154_CMD_SCAN_DONE, reason);
+ if (err == -ESRCH)
+ err = 0;
+
+ return err;
+}
+EXPORT_SYMBOL_GPL(nl802154_scan_done);
+
+static int nl802154_abort_scan(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+
+ /* Resources are released in the notification helper above */
+ return rdev_abort_scan(rdev, wpan_dev);
+}
+
+static int
+nl802154_send_beacons(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+ struct wpan_phy *wpan_phy = &rdev->wpan_phy;
+ struct cfg802154_beacon_request *request;
+ int err;
+
+ if (wpan_dev->iftype != NL802154_IFTYPE_COORD) {
+ NL_SET_ERR_MSG(info->extack, "Only coordinators can send beacons");
+ return -EOPNOTSUPP;
+ }
+
+ if (wpan_dev->pan_id == cpu_to_le16(IEEE802154_PANID_BROADCAST)) {
+ NL_SET_ERR_MSG(info->extack, "Device is not part of any PAN");
+ return -EPERM;
+ }
+
+ request = kzalloc(sizeof(*request), GFP_KERNEL);
+ if (!request)
+ return -ENOMEM;
+
+ request->wpan_dev = wpan_dev;
+ request->wpan_phy = wpan_phy;
+
+ /* Use maximum duration order by default */
+ if (info->attrs[NL802154_ATTR_BEACON_INTERVAL])
+ request->interval = nla_get_u8(info->attrs[NL802154_ATTR_BEACON_INTERVAL]);
+ else
+ request->interval = IEEE802154_MAX_SCAN_DURATION;
+
+ err = rdev_send_beacons(rdev, request);
+ if (err) {
+ pr_err("Failure starting sending beacons (%d)\n", err);
+ goto free_request;
+ }
+
+ return 0;
+
+free_request:
+ kfree(request);
+
+ return err;
+}
+
+void nl802154_beaconing_done(struct wpan_dev *wpan_dev)
+{
+ /* NOP */
+}
+EXPORT_SYMBOL_GPL(nl802154_beaconing_done);
+
+static int
+nl802154_stop_beacons(struct sk_buff *skb, struct genl_info *info)
+{
+ struct cfg802154_registered_device *rdev = info->user_ptr[0];
+ struct net_device *dev = info->user_ptr[1];
+ struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+
+ /* Resources are released in the notification helper above */
+ return rdev_stop_beacons(rdev, wpan_dev);
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static const struct nla_policy nl802154_dev_addr_policy[NL802154_DEV_ADDR_ATTR_MAX + 1] = {
[NL802154_DEV_ADDR_ATTR_PAN_ID] = { .type = NLA_U16 },
@@ -2474,6 +2717,38 @@ static const struct genl_ops nl802154_ops[] = {
.internal_flags = NL802154_FLAG_NEED_NETDEV |
NL802154_FLAG_NEED_RTNL,
},
+ {
+ .cmd = NL802154_CMD_TRIGGER_SCAN,
+ .doit = nl802154_trigger_scan,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_ABORT_SCAN,
+ .doit = nl802154_abort_scan,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_SEND_BEACONS,
+ .doit = nl802154_send_beacons,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
+ {
+ .cmd = NL802154_CMD_STOP_BEACONS,
+ .doit = nl802154_stop_beacons,
+ .flags = GENL_ADMIN_PERM,
+ .internal_flags = NL802154_FLAG_NEED_NETDEV |
+ NL802154_FLAG_CHECK_NETDEV_UP |
+ NL802154_FLAG_NEED_RTNL,
+ },
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
{
.cmd = NL802154_CMD_SET_SEC_PARAMS,
diff --git a/net/ieee802154/nl802154.h b/net/ieee802154/nl802154.h
index 89b805500032..d69d950f9a6a 100644
--- a/net/ieee802154/nl802154.h
+++ b/net/ieee802154/nl802154.h
@@ -6,5 +6,9 @@ int nl802154_init(void);
void nl802154_exit(void);
int nl802154_scan_event(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
struct ieee802154_coord_desc *desc);
+int nl802154_scan_started(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev);
+int nl802154_scan_done(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+ enum nl802154_scan_done_reasons reason);
+void nl802154_beaconing_done(struct wpan_dev *wpan_dev);
#endif /* __IEEE802154_NL802154_H */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 598f5af49775..5eaae15c610e 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -209,6 +209,62 @@ rdev_set_ackreq_default(struct cfg802154_registered_device *rdev,
return ret;
}
+static inline int rdev_trigger_scan(struct cfg802154_registered_device *rdev,
+ struct cfg802154_scan_request *request)
+{
+ int ret;
+
+ if (!rdev->ops->trigger_scan)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_trigger_scan(&rdev->wpan_phy, request);
+ ret = rdev->ops->trigger_scan(&rdev->wpan_phy, request);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int rdev_abort_scan(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev)
+{
+ int ret;
+
+ if (!rdev->ops->abort_scan)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_abort_scan(&rdev->wpan_phy, wpan_dev);
+ ret = rdev->ops->abort_scan(&rdev->wpan_phy, wpan_dev);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int rdev_send_beacons(struct cfg802154_registered_device *rdev,
+ struct cfg802154_beacon_request *request)
+{
+ int ret;
+
+ if (!rdev->ops->send_beacons)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_send_beacons(&rdev->wpan_phy, request);
+ ret = rdev->ops->send_beacons(&rdev->wpan_phy, request);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
+static inline int rdev_stop_beacons(struct cfg802154_registered_device *rdev,
+ struct wpan_dev *wpan_dev)
+{
+ int ret;
+
+ if (!rdev->ops->stop_beacons)
+ return -EOPNOTSUPP;
+
+ trace_802154_rdev_stop_beacons(&rdev->wpan_phy, wpan_dev);
+ ret = rdev->ops->stop_beacons(&rdev->wpan_phy, wpan_dev);
+ trace_802154_rdev_return_int(&rdev->wpan_phy, ret);
+ return ret;
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
/* TODO this is already a nl802154, so move into ieee802154 */
static inline void
diff --git a/net/ieee802154/trace.h b/net/ieee802154/trace.h
index 19c2e5d60e76..e5d8439b9e45 100644
--- a/net/ieee802154/trace.h
+++ b/net/ieee802154/trace.h
@@ -295,6 +295,67 @@ TRACE_EVENT(802154_rdev_set_ackreq_default,
WPAN_DEV_PR_ARG, BOOL_TO_STR(__entry->ackreq))
);
+TRACE_EVENT(802154_rdev_trigger_scan,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct cfg802154_scan_request *request),
+ TP_ARGS(wpan_phy, request),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(u8, page)
+ __field(u32, channels)
+ __field(u8, duration)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->page = request->page;
+ __entry->channels = request->channels;
+ __entry->duration = request->duration;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", scan, page: %d, channels: %x, duration %d",
+ WPAN_PHY_PR_ARG, __entry->page, __entry->channels, __entry->duration)
+);
+
+TRACE_EVENT(802154_rdev_send_beacons,
+ TP_PROTO(struct wpan_phy *wpan_phy,
+ struct cfg802154_beacon_request *request),
+ TP_ARGS(wpan_phy, request),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ __field(u8, interval)
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ __entry->interval = request->interval;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", sending beacons (interval order: %d)",
+ WPAN_PHY_PR_ARG, __entry->interval)
+);
+
+DECLARE_EVENT_CLASS(802154_wdev_template,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev),
+ TP_ARGS(wpan_phy, wpan_dev),
+ TP_STRUCT__entry(
+ WPAN_PHY_ENTRY
+ WPAN_DEV_ENTRY
+ ),
+ TP_fast_assign(
+ WPAN_PHY_ASSIGN;
+ WPAN_DEV_ASSIGN;
+ ),
+ TP_printk(WPAN_PHY_PR_FMT ", " WPAN_DEV_PR_FMT,
+ WPAN_PHY_PR_ARG, WPAN_DEV_PR_ARG)
+);
+
+DEFINE_EVENT(802154_wdev_template, 802154_rdev_abort_scan,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev),
+ TP_ARGS(wpan_phy, wpan_dev)
+);
+
+DEFINE_EVENT(802154_wdev_template, 802154_rdev_stop_beacons,
+ TP_PROTO(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev),
+ TP_ARGS(wpan_phy, wpan_dev)
+);
+
TRACE_EVENT(802154_rdev_return_int,
TP_PROTO(struct wpan_phy *wpan_phy, int ret),
TP_ARGS(wpan_phy, ret),
diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile
index af7d2cf490fb..880277c9fd07 100644
--- a/net/ipv4/Makefile
+++ b/net/ipv4/Makefile
@@ -26,6 +26,7 @@ obj-$(CONFIG_IP_MROUTE) += ipmr.o
obj-$(CONFIG_IP_MROUTE_COMMON) += ipmr_base.o
obj-$(CONFIG_NET_IPIP) += ipip.o
gre-y := gre_demux.o
+fou-y := fou_core.o fou_nl.o
obj-$(CONFIG_NET_FOU) += fou.o
obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o
obj-$(CONFIG_NET_IPGRE) += ip_gre.o
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ab4a06be489b..8db6747f892f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -156,7 +156,6 @@ void inet_sock_destruct(struct sock *sk)
kfree(rcu_dereference_protected(inet->inet_opt, 1));
dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1));
dst_release(rcu_dereference_protected(sk->sk_rx_dst, 1));
- sk_refcnt_debug_dec(sk);
}
EXPORT_SYMBOL(inet_sock_destruct);
@@ -347,6 +346,7 @@ lookup_protocol:
sk->sk_destruct = inet_sock_destruct;
sk->sk_protocol = protocol;
sk->sk_backlog_rcv = sk->sk_prot->backlog_rcv;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
inet->uc_ttl = -1;
inet->mc_loop = 1;
@@ -356,8 +356,6 @@ lookup_protocol:
inet->mc_list = NULL;
inet->rcv_tos = 0;
- sk_refcnt_debug_inc(sk);
-
if (inet->inet_num) {
/* It assumes that any protocol which allows
* the user to assign a number at socket
@@ -1485,6 +1483,7 @@ struct sk_buff *inet_gro_receive(struct list_head *head, struct sk_buff *skb)
if (unlikely(ip_fast_csum((u8 *)iph, 5)))
goto out;
+ NAPI_GRO_CB(skb)->proto = proto;
id = ntohl(*(__be32 *)&iph->id);
flush = (u16)((ntohl(*(__be32 *)iph) ^ skb_gro_len(skb)) | (id & ~IP_DF));
id >>= 16;
@@ -1618,9 +1617,9 @@ int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
int inet_gro_complete(struct sk_buff *skb, int nhoff)
{
- __be16 newlen = htons(skb->len - nhoff);
struct iphdr *iph = (struct iphdr *)(skb->data + nhoff);
const struct net_offload *ops;
+ __be16 totlen = iph->tot_len;
int proto = iph->protocol;
int err = -ENOSYS;
@@ -1629,8 +1628,8 @@ int inet_gro_complete(struct sk_buff *skb, int nhoff)
skb_set_inner_network_header(skb, nhoff);
}
- csum_replace2(&iph->check, iph->tot_len, newlen);
- iph->tot_len = newlen;
+ iph_set_totlen(iph, skb->len - nhoff);
+ csum_replace2(&iph->check, totlen, iph->tot_len);
ops = rcu_dereference(inet_offloads[proto]);
if (WARN_ON(!ops || !ops->callbacks.gro_complete))
@@ -1665,6 +1664,7 @@ int inet_ctl_sock_create(struct sock **sk, unsigned short family,
if (rc == 0) {
*sk = sock->sk;
(*sk)->sk_allocation = GFP_ATOMIC;
+ (*sk)->sk_use_task_frag = false;
/*
* Unhash it so that IP input processing does not even see it,
* we do not wish this socket to see incoming packets.
diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c
index ee4e578c7f20..015c0f4ec5ba 100644
--- a/net/ipv4/ah4.c
+++ b/net/ipv4/ah4.c
@@ -117,11 +117,11 @@ static int ip_clear_mutable_options(const struct iphdr *iph, __be32 *daddr)
return 0;
}
-static void ah_output_done(struct crypto_async_request *base, int err)
+static void ah_output_done(void *data, int err)
{
u8 *icv;
struct iphdr *iph;
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_state *x = skb_dst(skb)->xfrm;
struct ah_data *ahp = x->data;
struct iphdr *top_iph = ip_hdr(skb);
@@ -262,12 +262,12 @@ out:
return err;
}
-static void ah_input_done(struct crypto_async_request *base, int err)
+static void ah_input_done(void *data, int err)
{
u8 *auth_data;
u8 *icv;
struct iphdr *work_iph;
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_state *x = xfrm_input_state(skb);
struct ah_data *ahp = x->data;
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
diff --git a/net/ipv4/bpf_tcp_ca.c b/net/ipv4/bpf_tcp_ca.c
index 4517d2bd186a..13fc0c185cd9 100644
--- a/net/ipv4/bpf_tcp_ca.c
+++ b/net/ipv4/bpf_tcp_ca.c
@@ -248,7 +248,8 @@ static int bpf_tcp_ca_init_member(const struct btf_type *t,
}
static int bpf_tcp_ca_check_member(const struct btf_type *t,
- const struct btf_member *member)
+ const struct btf_member *member,
+ const struct bpf_prog *prog)
{
if (is_unsupported(__btf_member_bit_offset(t, member) / 8))
return -ENOTSUPP;
diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c
index 6cd3b6c559f0..79ae7204e8ed 100644
--- a/net/ipv4/cipso_ipv4.c
+++ b/net/ipv4/cipso_ipv4.c
@@ -2222,7 +2222,7 @@ int cipso_v4_skbuff_setattr(struct sk_buff *skb,
memset((char *)(iph + 1) + buf_len, 0, opt_len - buf_len);
if (len_delta != 0) {
iph->ihl = 5 + (opt_len >> 2);
- iph->tot_len = htons(skb->len);
+ iph_set_totlen(iph, skb->len);
}
ip_send_check(iph);
diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c
index 52c8047efedb..ba06ed42e428 100644
--- a/net/ipv4/esp4.c
+++ b/net/ipv4/esp4.c
@@ -244,9 +244,9 @@ static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
}
#endif
-static void esp_output_done(struct crypto_async_request *base, int err)
+static void esp_output_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_offload *xo = xfrm_offload(skb);
void *tmp;
struct xfrm_state *x;
@@ -332,12 +332,12 @@ static struct ip_esp_hdr *esp_output_set_extra(struct sk_buff *skb,
return esph;
}
-static void esp_output_done_esn(struct crypto_async_request *base, int err)
+static void esp_output_done_esn(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
esp_output_restore_header(skb);
- esp_output_done(base, err);
+ esp_output_done(data, err);
}
static struct ip_esp_hdr *esp_output_udp_encap(struct sk_buff *skb,
@@ -830,9 +830,9 @@ out:
}
EXPORT_SYMBOL_GPL(esp_input_done2);
-static void esp_input_done(struct crypto_async_request *base, int err)
+static void esp_input_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
xfrm_input_resume(skb, esp_input_done2(skb, err));
}
@@ -860,12 +860,12 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
}
}
-static void esp_input_done_esn(struct crypto_async_request *base, int err)
+static void esp_input_done_esn(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
esp_input_restore_header(skb);
- esp_input_done(base, err);
+ esp_input_done(data, err);
}
/*
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index ce9ff3c62e84..3bb890a40ed7 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -30,6 +30,7 @@
#include <linux/slab.h>
#include <linux/netlink.h>
#include <linux/hash.h>
+#include <linux/nospec.h>
#include <net/arp.h>
#include <net/inet_dscp.h>
@@ -1022,6 +1023,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi)
if (type > RTAX_MAX)
return false;
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
bool ecn_ca = false;
diff --git a/net/ipv4/fou.c b/net/ipv4/fou_core.c
index 0c3c6d0cee29..cafec9b4eee0 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou_core.c
@@ -19,6 +19,8 @@
#include <uapi/linux/fou.h>
#include <uapi/linux/genetlink.h>
+#include "fou_nl.h"
+
struct fou {
struct socket *sock;
u8 protocol;
@@ -640,20 +642,6 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
static struct genl_family fou_nl_family;
-static const struct nla_policy fou_nl_policy[FOU_ATTR_MAX + 1] = {
- [FOU_ATTR_PORT] = { .type = NLA_U16, },
- [FOU_ATTR_AF] = { .type = NLA_U8, },
- [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
- [FOU_ATTR_TYPE] = { .type = NLA_U8, },
- [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
- [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
- [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
- [FOU_ATTR_LOCAL_V6] = { .len = sizeof(struct in6_addr), },
- [FOU_ATTR_PEER_V6] = { .len = sizeof(struct in6_addr), },
- [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
- [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
-};
-
static int parse_nl_config(struct genl_info *info,
struct fou_cfg *cfg)
{
@@ -745,7 +733,7 @@ static int parse_nl_config(struct genl_info *info,
return 0;
}
-static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_add_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_cfg cfg;
@@ -758,7 +746,7 @@ static int fou_nl_cmd_add_port(struct sk_buff *skb, struct genl_info *info)
return fou_create(net, &cfg, NULL);
}
-static int fou_nl_cmd_rm_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_del_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_cfg cfg;
@@ -827,7 +815,7 @@ nla_put_failure:
return -EMSGSIZE;
}
-static int fou_nl_cmd_get_port(struct sk_buff *skb, struct genl_info *info)
+int fou_nl_get_doit(struct sk_buff *skb, struct genl_info *info)
{
struct net *net = genl_info_net(info);
struct fou_net *fn = net_generic(net, fou_net_id);
@@ -874,7 +862,7 @@ out_free:
return ret;
}
-static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
+int fou_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb)
{
struct net *net = sock_net(skb->sk);
struct fou_net *fn = net_generic(net, fou_net_id);
@@ -897,33 +885,12 @@ static int fou_nl_dump(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
-static const struct genl_small_ops fou_nl_ops[] = {
- {
- .cmd = FOU_CMD_ADD,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_add_port,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = FOU_CMD_DEL,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_rm_port,
- .flags = GENL_ADMIN_PERM,
- },
- {
- .cmd = FOU_CMD_GET,
- .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
- .doit = fou_nl_cmd_get_port,
- .dumpit = fou_nl_dump,
- },
-};
-
static struct genl_family fou_nl_family __ro_after_init = {
.hdrsize = 0,
.name = FOU_GENL_NAME,
.version = FOU_GENL_VERSION,
.maxattr = FOU_ATTR_MAX,
- .policy = fou_nl_policy,
+ .policy = fou_nl_policy,
.netnsok = true,
.module = THIS_MODULE,
.small_ops = fou_nl_ops,
diff --git a/net/ipv4/fou_nl.c b/net/ipv4/fou_nl.c
new file mode 100644
index 000000000000..6c3820f41dd5
--- /dev/null
+++ b/net/ipv4/fou_nl.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: BSD-3-Clause
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN kernel source */
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include "fou_nl.h"
+
+#include <linux/fou.h>
+
+/* Global operation policy for fou */
+const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1] = {
+ [FOU_ATTR_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_AF] = { .type = NLA_U8, },
+ [FOU_ATTR_IPPROTO] = { .type = NLA_U8, },
+ [FOU_ATTR_TYPE] = { .type = NLA_U8, },
+ [FOU_ATTR_REMCSUM_NOPARTIAL] = { .type = NLA_FLAG, },
+ [FOU_ATTR_LOCAL_V4] = { .type = NLA_U32, },
+ [FOU_ATTR_LOCAL_V6] = { .len = 16, },
+ [FOU_ATTR_PEER_V4] = { .type = NLA_U32, },
+ [FOU_ATTR_PEER_V6] = { .len = 16, },
+ [FOU_ATTR_PEER_PORT] = { .type = NLA_U16, },
+ [FOU_ATTR_IFINDEX] = { .type = NLA_S32, },
+};
+
+/* Ops table for fou */
+const struct genl_small_ops fou_nl_ops[3] = {
+ {
+ .cmd = FOU_CMD_ADD,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_add_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_DEL,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_del_doit,
+ .flags = GENL_ADMIN_PERM,
+ },
+ {
+ .cmd = FOU_CMD_GET,
+ .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+ .doit = fou_nl_get_doit,
+ .dumpit = fou_nl_get_dumpit,
+ },
+};
diff --git a/net/ipv4/fou_nl.h b/net/ipv4/fou_nl.h
new file mode 100644
index 000000000000..b7a68121ce6f
--- /dev/null
+++ b/net/ipv4/fou_nl.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: BSD-3-Clause */
+/* Do not edit directly, auto-generated from: */
+/* Documentation/netlink/specs/fou.yaml */
+/* YNL-GEN kernel header */
+
+#ifndef _LINUX_FOU_GEN_H
+#define _LINUX_FOU_GEN_H
+
+#include <net/netlink.h>
+#include <net/genetlink.h>
+
+#include <linux/fou.h>
+
+/* Global operation policy for fou */
+extern const struct nla_policy fou_nl_policy[FOU_ATTR_IFINDEX + 1];
+
+/* Ops table for fou */
+extern const struct genl_small_ops fou_nl_ops[3];
+
+int fou_nl_add_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_del_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_get_doit(struct sk_buff *skb, struct genl_info *info);
+int fou_nl_get_dumpit(struct sk_buff *skb, struct netlink_callback *cb);
+
+#endif /* _LINUX_FOU_GEN_H */
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index 46aa2d65e40a..8cebb476b3ab 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -296,6 +296,7 @@ static bool icmpv4_global_allow(struct net *net, int type, int code)
if (icmp_global_allow())
return true;
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -325,6 +326,8 @@ static bool icmpv4_xrlim_allow(struct net *net, struct rtable *rt,
if (peer)
inet_putpeer(peer);
out:
+ if (!rc)
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITHOST);
return rc;
}
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index b366ab9148f2..65ad4251f6fd 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -117,7 +117,7 @@ bool inet_rcv_saddr_any(const struct sock *sk)
return !sk->sk_rcv_saddr;
}
-void inet_get_local_port_range(struct net *net, int *low, int *high)
+void inet_get_local_port_range(const struct net *net, int *low, int *high)
{
unsigned int seq;
@@ -130,6 +130,27 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
}
EXPORT_SYMBOL(inet_get_local_port_range);
+void inet_sk_get_local_port_range(const struct sock *sk, int *low, int *high)
+{
+ const struct inet_sock *inet = inet_sk(sk);
+ const struct net *net = sock_net(sk);
+ int lo, hi, sk_lo, sk_hi;
+
+ inet_get_local_port_range(net, &lo, &hi);
+
+ sk_lo = inet->local_port_range.lo;
+ sk_hi = inet->local_port_range.hi;
+
+ if (unlikely(lo <= sk_lo && sk_lo <= hi))
+ lo = sk_lo;
+ if (unlikely(lo <= sk_hi && sk_hi <= hi))
+ hi = sk_hi;
+
+ *low = lo;
+ *high = hi;
+}
+EXPORT_SYMBOL(inet_sk_get_local_port_range);
+
static bool inet_use_bhash2_on_bind(const struct sock *sk)
{
#if IS_ENABLED(CONFIG_IPV6)
@@ -173,22 +194,40 @@ static bool inet_bind_conflict(const struct sock *sk, struct sock *sk2,
return false;
}
+static bool __inet_bhash2_conflict(const struct sock *sk, struct sock *sk2,
+ kuid_t sk_uid, bool relax,
+ bool reuseport_cb_ok, bool reuseport_ok)
+{
+ if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
+ return false;
+
+ return inet_bind_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok);
+}
+
static bool inet_bhash2_conflict(const struct sock *sk,
const struct inet_bind2_bucket *tb2,
kuid_t sk_uid,
bool relax, bool reuseport_cb_ok,
bool reuseport_ok)
{
+ struct inet_timewait_sock *tw2;
struct sock *sk2;
sk_for_each_bound_bhash2(sk2, &tb2->owners) {
- if (sk->sk_family == AF_INET && ipv6_only_sock(sk2))
- continue;
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
+ return true;
+ }
+
+ twsk_for_each_bound_bhash2(tw2, &tb2->deathrow) {
+ sk2 = (struct sock *)tw2;
- if (inet_bind_conflict(sk, sk2, sk_uid, relax,
- reuseport_cb_ok, reuseport_ok))
+ if (__inet_bhash2_conflict(sk, sk2, sk_uid, relax,
+ reuseport_cb_ok, reuseport_ok))
return true;
}
+
return false;
}
@@ -298,7 +337,7 @@ inet_csk_find_open_port(const struct sock *sk, struct inet_bind_bucket **tb_ret,
ports_exhausted:
attempt_half = (sk->sk_reuse == SK_CAN_REUSE) ? 1 : 0;
other_half_scan:
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
if (high - low < 4)
attempt_half = 0;
@@ -1083,8 +1122,7 @@ static void inet_clone_ulp(const struct request_sock *req, struct sock *newsk,
if (!icsk->icsk_ulp_ops)
return;
- if (icsk->icsk_ulp_ops->clone)
- icsk->icsk_ulp_ops->clone(req, newsk, priority);
+ icsk->icsk_ulp_ops->clone(req, newsk, priority);
}
/**
@@ -1160,8 +1198,6 @@ void inet_csk_destroy_sock(struct sock *sk)
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
-
this_cpu_dec(*sk->sk_prot->orphan_count);
sock_put(sk);
@@ -1182,20 +1218,31 @@ void inet_csk_prepare_forced_close(struct sock *sk)
}
EXPORT_SYMBOL(inet_csk_prepare_forced_close);
+static int inet_ulp_can_listen(const struct sock *sk)
+{
+ const struct inet_connection_sock *icsk = inet_csk(sk);
+
+ if (icsk->icsk_ulp_ops && !icsk->icsk_ulp_ops->clone)
+ return -EINVAL;
+
+ return 0;
+}
+
int inet_csk_listen_start(struct sock *sk)
{
struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_sock *inet = inet_sk(sk);
int err;
+ err = inet_ulp_can_listen(sk);
+ if (unlikely(err))
+ return err;
+
reqsk_queue_alloc(&icsk->icsk_accept_queue);
sk->sk_ack_backlog = 0;
inet_csk_delack_init(sk);
- if (sk->sk_txrehash == SOCK_TXREHASH_DEFAULT)
- sk->sk_txrehash = READ_ONCE(sock_net(sk)->core.sysctl_txrehash);
-
/* There is race window here: we announce ourselves listening,
* but this transition is still not validated by get_port().
* It is OK, because this socket enters to hash table only
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index d039b4e732a3..e41fdc38ce19 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -116,6 +116,7 @@ static void inet_bind2_bucket_init(struct inet_bind2_bucket *tb,
#endif
tb->rcv_saddr = sk->sk_rcv_saddr;
INIT_HLIST_HEAD(&tb->owners);
+ INIT_HLIST_HEAD(&tb->deathrow);
hlist_add_head(&tb->node, &head->chain);
}
@@ -137,7 +138,7 @@ struct inet_bind2_bucket *inet_bind2_bucket_create(struct kmem_cache *cachep,
/* Caller must hold hashbucket lock for this tb with local BH disabled */
void inet_bind2_bucket_destroy(struct kmem_cache *cachep, struct inet_bind2_bucket *tb)
{
- if (hlist_empty(&tb->owners)) {
+ if (hlist_empty(&tb->owners) && hlist_empty(&tb->deathrow)) {
__hlist_del(&tb->node);
kmem_cache_free(cachep, tb);
}
@@ -649,8 +650,20 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
spin_lock(lock);
if (osk) {
WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
- ret = sk_nulls_del_node_init_rcu(osk);
- } else if (found_dup_sk) {
+ ret = sk_hashed(osk);
+ if (ret) {
+ /* Before deleting the node, we insert a new one to make
+ * sure that the look-up-sk process would not miss either
+ * of them and that at least one node would exist in ehash
+ * table all the time. Otherwise there's a tiny chance
+ * that lookup process could find nothing in ehash table.
+ */
+ __sk_nulls_add_node_tail_rcu(sk, list);
+ sk_nulls_del_node_init_rcu(osk);
+ }
+ goto unlock;
+ }
+ if (found_dup_sk) {
*found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
if (*found_dup_sk)
ret = false;
@@ -659,6 +672,7 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
if (ret)
__sk_nulls_add_node_rcu(sk, list);
+unlock:
spin_unlock(lock);
return ret;
@@ -994,17 +1008,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
u32 index;
if (port) {
- head = &hinfo->bhash[inet_bhashfn(net, port,
- hinfo->bhash_size)];
- tb = inet_csk(sk)->icsk_bind_hash;
- spin_lock_bh(&head->lock);
- if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
- inet_ehash_nolisten(sk, NULL, NULL);
- spin_unlock_bh(&head->lock);
- return 0;
- }
- spin_unlock(&head->lock);
- /* No definite answer... Walk to established hash table */
+ local_bh_disable();
ret = check_established(death_row, sk, port, NULL);
local_bh_enable();
return ret;
@@ -1012,7 +1016,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
l3mdev = inet_sk_bound_l3mdev(sk);
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
high++; /* [32768, 60999] -> [32768, 61000[ */
remaining = high - low;
if (likely(remaining > 1))
@@ -1103,15 +1107,16 @@ ok:
/* Head lock still held and bh's disabled */
inet_bind_hash(sk, tb, tb2, port);
- spin_unlock(&head2->lock);
-
if (sk_unhashed(sk)) {
inet_sk(sk)->inet_sport = htons(port);
inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
}
if (tw)
inet_twsk_bind_unhash(tw, hinfo);
+
+ spin_unlock(&head2->lock);
spin_unlock(&head->lock);
+
if (tw)
inet_twsk_deschedule_put(tw);
local_bh_enable();
diff --git a/net/ipv4/inet_timewait_sock.c b/net/ipv4/inet_timewait_sock.c
index 66fc940f9521..40052414c7c7 100644
--- a/net/ipv4/inet_timewait_sock.c
+++ b/net/ipv4/inet_timewait_sock.c
@@ -29,6 +29,7 @@
void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
struct inet_hashinfo *hashinfo)
{
+ struct inet_bind2_bucket *tb2 = tw->tw_tb2;
struct inet_bind_bucket *tb = tw->tw_tb;
if (!tb)
@@ -37,6 +38,11 @@ void inet_twsk_bind_unhash(struct inet_timewait_sock *tw,
__hlist_del(&tw->tw_bind_node);
tw->tw_tb = NULL;
inet_bind_bucket_destroy(hashinfo->bind_bucket_cachep, tb);
+
+ __hlist_del(&tw->tw_bind2_node);
+ tw->tw_tb2 = NULL;
+ inet_bind2_bucket_destroy(hashinfo->bind2_bucket_cachep, tb2);
+
__sock_put((struct sock *)tw);
}
@@ -45,7 +51,7 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
{
struct inet_hashinfo *hashinfo = tw->tw_dr->hashinfo;
spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
spin_lock(lock);
sk_nulls_del_node_init_rcu((struct sock *)tw);
@@ -54,9 +60,13 @@ static void inet_twsk_kill(struct inet_timewait_sock *tw)
/* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, (struct sock *)tw,
+ twsk_net(tw), tw->tw_num);
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
inet_twsk_bind_unhash(tw, hashinfo);
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
refcount_dec(&tw->tw_dr->tw_refcount);
@@ -67,9 +77,6 @@ void inet_twsk_free(struct inet_timewait_sock *tw)
{
struct module *owner = tw->tw_prot->owner;
twsk_destructor((struct sock *)tw);
-#ifdef SOCK_REFCNT_DEBUG
- pr_debug("%s timewait_sock %p released\n", tw->tw_prot->name, tw);
-#endif
kmem_cache_free(tw->tw_prot->twsk_prot->twsk_slab, tw);
module_put(owner);
}
@@ -81,10 +88,10 @@ void inet_twsk_put(struct inet_timewait_sock *tw)
}
EXPORT_SYMBOL_GPL(inet_twsk_put);
-static void inet_twsk_add_node_rcu(struct inet_timewait_sock *tw,
- struct hlist_nulls_head *list)
+static void inet_twsk_add_node_tail_rcu(struct inet_timewait_sock *tw,
+ struct hlist_nulls_head *list)
{
- hlist_nulls_add_head_rcu(&tw->tw_node, list);
+ hlist_nulls_add_tail_rcu(&tw->tw_node, list);
}
static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
@@ -93,6 +100,12 @@ static void inet_twsk_add_bind_node(struct inet_timewait_sock *tw,
hlist_add_head(&tw->tw_bind_node, list);
}
+static void inet_twsk_add_bind2_node(struct inet_timewait_sock *tw,
+ struct hlist_head *list)
+{
+ hlist_add_head(&tw->tw_bind2_node, list);
+}
+
/*
* Enter the time wait state. This is called with locally disabled BH.
* Essentially we whip up a timewait bucket, copy the relevant info into it
@@ -105,22 +118,33 @@ void inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
- struct inet_bind_hashbucket *bhead;
+ struct inet_bind_hashbucket *bhead, *bhead2;
+
/* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in
binding cache, even if it is closed.
*/
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), inet->inet_num,
hashinfo->bhash_size)];
+ bhead2 = inet_bhashfn_portaddr(hashinfo, sk, twsk_net(tw), inet->inet_num);
+
spin_lock(&bhead->lock);
+ spin_lock(&bhead2->lock);
+
tw->tw_tb = icsk->icsk_bind_hash;
WARN_ON(!icsk->icsk_bind_hash);
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
+
+ tw->tw_tb2 = icsk->icsk_bind2_hash;
+ WARN_ON(!icsk->icsk_bind2_hash);
+ inet_twsk_add_bind2_node(tw, &tw->tw_tb2->deathrow);
+
+ spin_unlock(&bhead2->lock);
spin_unlock(&bhead->lock);
spin_lock(lock);
- inet_twsk_add_node_rcu(tw, &ehead->chain);
+ inet_twsk_add_node_tail_rcu(tw, &ehead->chain);
/* Step 3: Remove SK from hash chain */
if (__sk_nulls_del_node_init_rcu(sk))
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index e880ce77322a..fe9ead9ee863 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -511,7 +511,7 @@ static struct sk_buff *ip_rcv_core(struct sk_buff *skb, struct net *net)
if (unlikely(ip_fast_csum((u8 *)iph, iph->ihl)))
goto csum_error;
- len = ntohs(iph->tot_len);
+ len = iph_totlen(skb, iph);
if (skb->len < len) {
drop_reason = SKB_DROP_REASON_PKT_TOO_SMALL;
__IP_INC_STATS(net, IPSTATS_MIB_INTRUNCATEDPKTS);
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 922c87ef1ab5..4e4e308c3230 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -100,7 +100,7 @@ int __ip_local_out(struct net *net, struct sock *sk, struct sk_buff *skb)
{
struct iphdr *iph = ip_hdr(skb);
- iph->tot_len = htons(skb->len);
+ iph_set_totlen(iph, skb->len);
ip_send_check(iph);
/* if egress device is enslaved to an L3 master device pass the
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 9f92ae35bb01..b511ff0adc0a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -923,6 +923,7 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
case IP_CHECKSUM:
case IP_RECVFRAGSIZE:
case IP_RECVERR_RFC4884:
+ case IP_LOCAL_PORT_RANGE:
if (optlen >= sizeof(int)) {
if (copy_from_sockptr(&val, optval, sizeof(val)))
return -EFAULT;
@@ -1365,6 +1366,20 @@ int do_ip_setsockopt(struct sock *sk, int level, int optname,
WRITE_ONCE(inet->min_ttl, val);
break;
+ case IP_LOCAL_PORT_RANGE:
+ {
+ const __u16 lo = val;
+ const __u16 hi = val >> 16;
+
+ if (optlen != sizeof(__u32))
+ goto e_inval;
+ if (lo != 0 && hi != 0 && lo > hi)
+ goto e_inval;
+
+ inet->local_port_range.lo = lo;
+ inet->local_port_range.hi = hi;
+ break;
+ }
default:
err = -ENOPROTOOPT;
break;
@@ -1743,6 +1758,9 @@ int do_ip_getsockopt(struct sock *sk, int level, int optname,
case IP_MINTTL:
val = inet->min_ttl;
break;
+ case IP_LOCAL_PORT_RANGE:
+ val = inet->local_port_range.hi << 16 | inet->local_port_range.lo;
+ break;
default:
sockopt_release_sock(sk);
return -ENOPROTOOPT;
diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c
index 7fcfdfd8f9de..0e3ee1532848 100644
--- a/net/ipv4/metrics.c
+++ b/net/ipv4/metrics.c
@@ -1,5 +1,6 @@
// SPDX-License-Identifier: GPL-2.0-only
#include <linux/netlink.h>
+#include <linux/nospec.h>
#include <linux/rtnetlink.h>
#include <linux/types.h>
#include <net/ip.h>
@@ -25,6 +26,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx,
return -EINVAL;
}
+ type = array_index_nospec(type, RTAX_MAX + 1);
if (type == RTAX_CC_ALGO) {
char tmp[TCP_CA_NAME_MAX];
diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig
index aab384126f61..f71a7e9a7de6 100644
--- a/net/ipv4/netfilter/Kconfig
+++ b/net/ipv4/netfilter/Kconfig
@@ -259,20 +259,6 @@ config IP_NF_MANGLE
To compile it as a module, choose M here. If unsure, say N.
-config IP_NF_TARGET_CLUSTERIP
- tristate "CLUSTERIP target support"
- depends on IP_NF_MANGLE
- depends on NF_CONNTRACK
- depends on NETFILTER_ADVANCED
- select NF_CONNTRACK_MARK
- select NETFILTER_FAMILY_ARP
- help
- The CLUSTERIP target allows you to build load-balancing clusters of
- network servers without having a dedicated load-balancing
- router/server/switch.
-
- To compile it as a module, choose M here. If unsure, say N.
-
config IP_NF_TARGET_ECN
tristate "ECN target support"
depends on IP_NF_MANGLE
diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile
index 93bad1184251..5a26f9de1ab9 100644
--- a/net/ipv4/netfilter/Makefile
+++ b/net/ipv4/netfilter/Makefile
@@ -39,7 +39,6 @@ obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o
# targets
-obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o
obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o
obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o
obj-$(CONFIG_IP_NF_TARGET_SYNPROXY) += ipt_SYNPROXY.o
diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c
index ffc0cab7cf18..2407066b0fec 100644
--- a/net/ipv4/netfilter/arp_tables.c
+++ b/net/ipv4/netfilter/arp_tables.c
@@ -1525,6 +1525,10 @@ int arpt_register_table(struct net *net,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
+ struct arpt_entry *iter;
+
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c
index 2ed7c58b471a..da5998011ab9 100644
--- a/net/ipv4/netfilter/ip_tables.c
+++ b/net/ipv4/netfilter/ip_tables.c
@@ -1045,7 +1045,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_counters *counters;
struct ipt_entry *iter;
- ret = 0;
counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
@@ -1091,7 +1090,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("iptables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- return ret;
+ return 0;
put_module:
module_put(t->me);
@@ -1742,6 +1741,10 @@ int ipt_register_table(struct net *net, const struct xt_table *table,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
+ struct ipt_entry *iter;
+
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c
deleted file mode 100644
index b3cc416ed292..000000000000
--- a/net/ipv4/netfilter/ipt_CLUSTERIP.c
+++ /dev/null
@@ -1,929 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Cluster IP hashmark target
- * (C) 2003-2004 by Harald Welte <laforge@netfilter.org>
- * based on ideas of Fabio Olive Leite <olive@unixforge.org>
- *
- * Development of this code funded by SuSE Linux AG, https://www.suse.com/
- */
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-#include <linux/module.h>
-#include <linux/proc_fs.h>
-#include <linux/jhash.h>
-#include <linux/bitops.h>
-#include <linux/skbuff.h>
-#include <linux/slab.h>
-#include <linux/ip.h>
-#include <linux/tcp.h>
-#include <linux/udp.h>
-#include <linux/icmp.h>
-#include <linux/if_arp.h>
-#include <linux/seq_file.h>
-#include <linux/refcount.h>
-#include <linux/netfilter_arp.h>
-#include <linux/netfilter/x_tables.h>
-#include <linux/netfilter_ipv4/ip_tables.h>
-#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
-#include <net/netfilter/nf_conntrack.h>
-#include <net/net_namespace.h>
-#include <net/netns/generic.h>
-#include <net/checksum.h>
-#include <net/ip.h>
-
-#define CLUSTERIP_VERSION "0.8"
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
-MODULE_DESCRIPTION("Xtables: CLUSTERIP target");
-
-struct clusterip_config {
- struct list_head list; /* list of all configs */
- refcount_t refcount; /* reference count */
- refcount_t entries; /* number of entries/rules
- * referencing us */
-
- __be32 clusterip; /* the IP address */
- u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
- int ifindex; /* device ifindex */
- u_int16_t num_total_nodes; /* total number of nodes */
- unsigned long local_nodes; /* node number array */
-
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *pde; /* proc dir entry */
-#endif
- enum clusterip_hashmode hash_mode; /* which hashing mode */
- u_int32_t hash_initval; /* hash initialization */
- struct rcu_head rcu; /* for call_rcu */
- struct net *net; /* netns for pernet list */
- char ifname[IFNAMSIZ]; /* device ifname */
-};
-
-#ifdef CONFIG_PROC_FS
-static const struct proc_ops clusterip_proc_ops;
-#endif
-
-struct clusterip_net {
- struct list_head configs;
- /* lock protects the configs list */
- spinlock_t lock;
-
- bool clusterip_deprecated_warning;
-#ifdef CONFIG_PROC_FS
- struct proc_dir_entry *procdir;
- /* mutex protects the config->pde*/
- struct mutex mutex;
-#endif
- unsigned int hook_users;
-};
-
-static unsigned int clusterip_arp_mangle(void *priv, struct sk_buff *skb, const struct nf_hook_state *state);
-
-static const struct nf_hook_ops cip_arp_ops = {
- .hook = clusterip_arp_mangle,
- .pf = NFPROTO_ARP,
- .hooknum = NF_ARP_OUT,
- .priority = -1
-};
-
-static unsigned int clusterip_net_id __read_mostly;
-static inline struct clusterip_net *clusterip_pernet(struct net *net)
-{
- return net_generic(net, clusterip_net_id);
-}
-
-static inline void
-clusterip_config_get(struct clusterip_config *c)
-{
- refcount_inc(&c->refcount);
-}
-
-static void clusterip_config_rcu_free(struct rcu_head *head)
-{
- struct clusterip_config *config;
- struct net_device *dev;
-
- config = container_of(head, struct clusterip_config, rcu);
- dev = dev_get_by_name(config->net, config->ifname);
- if (dev) {
- dev_mc_del(dev, config->clustermac);
- dev_put(dev);
- }
- kfree(config);
-}
-
-static inline void
-clusterip_config_put(struct clusterip_config *c)
-{
- if (refcount_dec_and_test(&c->refcount))
- call_rcu(&c->rcu, clusterip_config_rcu_free);
-}
-
-/* decrease the count of entries using/referencing this config. If last
- * entry(rule) is removed, remove the config from lists, but don't free it
- * yet, since proc-files could still be holding references */
-static inline void
-clusterip_config_entry_put(struct clusterip_config *c)
-{
- struct clusterip_net *cn = clusterip_pernet(c->net);
-
- local_bh_disable();
- if (refcount_dec_and_lock(&c->entries, &cn->lock)) {
- list_del_rcu(&c->list);
- spin_unlock(&cn->lock);
- local_bh_enable();
- /* In case anyone still accesses the file, the open/close
- * functions are also incrementing the refcount on their own,
- * so it's safe to remove the entry even if it's in use. */
-#ifdef CONFIG_PROC_FS
- mutex_lock(&cn->mutex);
- if (cn->procdir)
- proc_remove(c->pde);
- mutex_unlock(&cn->mutex);
-#endif
- return;
- }
- local_bh_enable();
-}
-
-static struct clusterip_config *
-__clusterip_config_find(struct net *net, __be32 clusterip)
-{
- struct clusterip_config *c;
- struct clusterip_net *cn = clusterip_pernet(net);
-
- list_for_each_entry_rcu(c, &cn->configs, list) {
- if (c->clusterip == clusterip)
- return c;
- }
-
- return NULL;
-}
-
-static inline struct clusterip_config *
-clusterip_config_find_get(struct net *net, __be32 clusterip, int entry)
-{
- struct clusterip_config *c;
-
- rcu_read_lock_bh();
- c = __clusterip_config_find(net, clusterip);
- if (c) {
-#ifdef CONFIG_PROC_FS
- if (!c->pde)
- c = NULL;
- else
-#endif
- if (unlikely(!refcount_inc_not_zero(&c->refcount)))
- c = NULL;
- else if (entry) {
- if (unlikely(!refcount_inc_not_zero(&c->entries))) {
- clusterip_config_put(c);
- c = NULL;
- }
- }
- }
- rcu_read_unlock_bh();
-
- return c;
-}
-
-static void
-clusterip_config_init_nodelist(struct clusterip_config *c,
- const struct ipt_clusterip_tgt_info *i)
-{
- int n;
-
- for (n = 0; n < i->num_local_nodes; n++)
- set_bit(i->local_nodes[n] - 1, &c->local_nodes);
-}
-
-static int
-clusterip_netdev_event(struct notifier_block *this, unsigned long event,
- void *ptr)
-{
- struct net_device *dev = netdev_notifier_info_to_dev(ptr);
- struct net *net = dev_net(dev);
- struct clusterip_net *cn = clusterip_pernet(net);
- struct clusterip_config *c;
-
- spin_lock_bh(&cn->lock);
- list_for_each_entry_rcu(c, &cn->configs, list) {
- switch (event) {
- case NETDEV_REGISTER:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- }
- break;
- case NETDEV_UNREGISTER:
- if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- case NETDEV_CHANGENAME:
- if (!strcmp(dev->name, c->ifname)) {
- c->ifindex = dev->ifindex;
- dev_mc_add(dev, c->clustermac);
- } else if (dev->ifindex == c->ifindex) {
- dev_mc_del(dev, c->clustermac);
- c->ifindex = -1;
- }
- break;
- }
- }
- spin_unlock_bh(&cn->lock);
-
- return NOTIFY_DONE;
-}
-
-static struct clusterip_config *
-clusterip_config_init(struct net *net, const struct ipt_clusterip_tgt_info *i,
- __be32 ip, const char *iniface)
-{
- struct clusterip_net *cn = clusterip_pernet(net);
- struct clusterip_config *c;
- struct net_device *dev;
- int err;
-
- if (iniface[0] == '\0') {
- pr_info("Please specify an interface name\n");
- return ERR_PTR(-EINVAL);
- }
-
- c = kzalloc(sizeof(*c), GFP_ATOMIC);
- if (!c)
- return ERR_PTR(-ENOMEM);
-
- dev = dev_get_by_name(net, iniface);
- if (!dev) {
- pr_info("no such interface %s\n", iniface);
- kfree(c);
- return ERR_PTR(-ENOENT);
- }
- c->ifindex = dev->ifindex;
- strcpy(c->ifname, dev->name);
- memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
- dev_mc_add(dev, c->clustermac);
- dev_put(dev);
-
- c->clusterip = ip;
- c->num_total_nodes = i->num_total_nodes;
- clusterip_config_init_nodelist(c, i);
- c->hash_mode = i->hash_mode;
- c->hash_initval = i->hash_initval;
- c->net = net;
- refcount_set(&c->refcount, 1);
-
- spin_lock_bh(&cn->lock);
- if (__clusterip_config_find(net, ip)) {
- err = -EBUSY;
- goto out_config_put;
- }
-
- list_add_rcu(&c->list, &cn->configs);
- spin_unlock_bh(&cn->lock);
-
-#ifdef CONFIG_PROC_FS
- {
- char buffer[16];
-
- /* create proc dir entry */
- sprintf(buffer, "%pI4", &ip);
- mutex_lock(&cn->mutex);
- c->pde = proc_create_data(buffer, 0600,
- cn->procdir,
- &clusterip_proc_ops, c);
- mutex_unlock(&cn->mutex);
- if (!c->pde) {
- err = -ENOMEM;
- goto err;
- }
- }
-#endif
-
- refcount_set(&c->entries, 1);
- return c;
-
-#ifdef CONFIG_PROC_FS
-err:
-#endif
- spin_lock_bh(&cn->lock);
- list_del_rcu(&c->list);
-out_config_put:
- spin_unlock_bh(&cn->lock);
- clusterip_config_put(c);
- return ERR_PTR(err);
-}
-
-#ifdef CONFIG_PROC_FS
-static int
-clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
-{
-
- if (nodenum == 0 ||
- nodenum > c->num_total_nodes)
- return 1;
-
- /* check if we already have this number in our bitfield */
- if (test_and_set_bit(nodenum - 1, &c->local_nodes))
- return 1;
-
- return 0;
-}
-
-static bool
-clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
-{
- if (nodenum == 0 ||
- nodenum > c->num_total_nodes)
- return true;
-
- if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
- return false;
-
- return true;
-}
-#endif
-
-static inline u_int32_t
-clusterip_hashfn(const struct sk_buff *skb,
- const struct clusterip_config *config)
-{
- const struct iphdr *iph = ip_hdr(skb);
- unsigned long hashval;
- u_int16_t sport = 0, dport = 0;
- int poff;
-
- poff = proto_ports_offset(iph->protocol);
- if (poff >= 0) {
- const u_int16_t *ports;
- u16 _ports[2];
-
- ports = skb_header_pointer(skb, iph->ihl * 4 + poff, 4, _ports);
- if (ports) {
- sport = ports[0];
- dport = ports[1];
- }
- } else {
- net_info_ratelimited("unknown protocol %u\n", iph->protocol);
- }
-
- switch (config->hash_mode) {
- case CLUSTERIP_HASHMODE_SIP:
- hashval = jhash_1word(ntohl(iph->saddr),
- config->hash_initval);
- break;
- case CLUSTERIP_HASHMODE_SIP_SPT:
- hashval = jhash_2words(ntohl(iph->saddr), sport,
- config->hash_initval);
- break;
- case CLUSTERIP_HASHMODE_SIP_SPT_DPT:
- hashval = jhash_3words(ntohl(iph->saddr), sport, dport,
- config->hash_initval);
- break;
- default:
- /* to make gcc happy */
- hashval = 0;
- /* This cannot happen, unless the check function wasn't called
- * at rule load time */
- pr_info("unknown mode %u\n", config->hash_mode);
- BUG();
- break;
- }
-
- /* node numbers are 1..n, not 0..n */
- return reciprocal_scale(hashval, config->num_total_nodes) + 1;
-}
-
-static inline int
-clusterip_responsible(const struct clusterip_config *config, u_int32_t hash)
-{
- return test_bit(hash - 1, &config->local_nodes);
-}
-
-/***********************************************************************
- * IPTABLES TARGET
- ***********************************************************************/
-
-static unsigned int
-clusterip_tg(struct sk_buff *skb, const struct xt_action_param *par)
-{
- const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct nf_conn *ct;
- enum ip_conntrack_info ctinfo;
- u_int32_t hash;
-
- /* don't need to clusterip_config_get() here, since refcount
- * is only decremented by destroy() - and ip_tables guarantees
- * that the ->target() function isn't called after ->destroy() */
-
- ct = nf_ct_get(skb, &ctinfo);
- if (ct == NULL)
- return NF_DROP;
-
- /* special case: ICMP error handling. conntrack distinguishes between
- * error messages (RELATED) and information requests (see below) */
- if (ip_hdr(skb)->protocol == IPPROTO_ICMP &&
- (ctinfo == IP_CT_RELATED ||
- ctinfo == IP_CT_RELATED_REPLY))
- return XT_CONTINUE;
-
- /* nf_conntrack_proto_icmp guarantees us that we only have ICMP_ECHO,
- * TIMESTAMP, INFO_REQUEST or ICMP_ADDRESS type icmp packets from here
- * on, which all have an ID field [relevant for hashing]. */
-
- hash = clusterip_hashfn(skb, cipinfo->config);
-
- switch (ctinfo) {
- case IP_CT_NEW:
- WRITE_ONCE(ct->mark, hash);
- break;
- case IP_CT_RELATED:
- case IP_CT_RELATED_REPLY:
- /* FIXME: we don't handle expectations at the moment.
- * They can arrive on a different node than
- * the master connection (e.g. FTP passive mode) */
- case IP_CT_ESTABLISHED:
- case IP_CT_ESTABLISHED_REPLY:
- break;
- default: /* Prevent gcc warnings */
- break;
- }
-
-#ifdef DEBUG
- nf_ct_dump_tuple_ip(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
-#endif
- pr_debug("hash=%u ct_hash=%u ", hash, READ_ONCE(ct->mark));
- if (!clusterip_responsible(cipinfo->config, hash)) {
- pr_debug("not responsible\n");
- return NF_DROP;
- }
- pr_debug("responsible\n");
-
- /* despite being received via linklayer multicast, this is
- * actually a unicast IP packet. TCP doesn't like PACKET_MULTICAST */
- skb->pkt_type = PACKET_HOST;
-
- return XT_CONTINUE;
-}
-
-static int clusterip_tg_check(const struct xt_tgchk_param *par)
-{
- struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct clusterip_net *cn = clusterip_pernet(par->net);
- const struct ipt_entry *e = par->entryinfo;
- struct clusterip_config *config;
- int ret, i;
-
- if (par->nft_compat) {
- pr_err("cannot use CLUSTERIP target from nftables compat\n");
- return -EOPNOTSUPP;
- }
-
- if (cn->hook_users == UINT_MAX)
- return -EOVERFLOW;
-
- if (cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP &&
- cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT &&
- cipinfo->hash_mode != CLUSTERIP_HASHMODE_SIP_SPT_DPT) {
- pr_info("unknown mode %u\n", cipinfo->hash_mode);
- return -EINVAL;
-
- }
- if (e->ip.dmsk.s_addr != htonl(0xffffffff) ||
- e->ip.dst.s_addr == 0) {
- pr_info("Please specify destination IP\n");
- return -EINVAL;
- }
- if (cipinfo->num_local_nodes > ARRAY_SIZE(cipinfo->local_nodes)) {
- pr_info("bad num_local_nodes %u\n", cipinfo->num_local_nodes);
- return -EINVAL;
- }
- for (i = 0; i < cipinfo->num_local_nodes; i++) {
- if (cipinfo->local_nodes[i] - 1 >=
- sizeof(config->local_nodes) * 8) {
- pr_info("bad local_nodes[%d] %u\n",
- i, cipinfo->local_nodes[i]);
- return -EINVAL;
- }
- }
-
- config = clusterip_config_find_get(par->net, e->ip.dst.s_addr, 1);
- if (!config) {
- if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
- pr_info("no config found for %pI4, need 'new'\n",
- &e->ip.dst.s_addr);
- return -EINVAL;
- } else {
- config = clusterip_config_init(par->net, cipinfo,
- e->ip.dst.s_addr,
- e->ip.iniface);
- if (IS_ERR(config))
- return PTR_ERR(config);
- }
- } else if (memcmp(&config->clustermac, &cipinfo->clustermac, ETH_ALEN)) {
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- return -EINVAL;
- }
-
- ret = nf_ct_netns_get(par->net, par->family);
- if (ret < 0) {
- pr_info("cannot load conntrack support for proto=%u\n",
- par->family);
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- return ret;
- }
-
- if (cn->hook_users == 0) {
- ret = nf_register_net_hook(par->net, &cip_arp_ops);
-
- if (ret < 0) {
- clusterip_config_entry_put(config);
- clusterip_config_put(config);
- nf_ct_netns_put(par->net, par->family);
- return ret;
- }
- }
-
- cn->hook_users++;
-
- if (!cn->clusterip_deprecated_warning) {
- pr_info("ipt_CLUSTERIP is deprecated and it will removed soon, "
- "use xt_cluster instead\n");
- cn->clusterip_deprecated_warning = true;
- }
-
- cipinfo->config = config;
- return ret;
-}
-
-/* drop reference count of cluster config when rule is deleted */
-static void clusterip_tg_destroy(const struct xt_tgdtor_param *par)
-{
- const struct ipt_clusterip_tgt_info *cipinfo = par->targinfo;
- struct clusterip_net *cn = clusterip_pernet(par->net);
-
- /* if no more entries are referencing the config, remove it
- * from the list and destroy the proc entry */
- clusterip_config_entry_put(cipinfo->config);
-
- clusterip_config_put(cipinfo->config);
-
- nf_ct_netns_put(par->net, par->family);
- cn->hook_users--;
-
- if (cn->hook_users == 0)
- nf_unregister_net_hook(par->net, &cip_arp_ops);
-}
-
-#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
-struct compat_ipt_clusterip_tgt_info
-{
- u_int32_t flags;
- u_int8_t clustermac[6];
- u_int16_t num_total_nodes;
- u_int16_t num_local_nodes;
- u_int16_t local_nodes[CLUSTERIP_MAX_NODES];
- u_int32_t hash_mode;
- u_int32_t hash_initval;
- compat_uptr_t config;
-};
-#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
-
-static struct xt_target clusterip_tg_reg __read_mostly = {
- .name = "CLUSTERIP",
- .family = NFPROTO_IPV4,
- .target = clusterip_tg,
- .checkentry = clusterip_tg_check,
- .destroy = clusterip_tg_destroy,
- .targetsize = sizeof(struct ipt_clusterip_tgt_info),
- .usersize = offsetof(struct ipt_clusterip_tgt_info, config),
-#ifdef CONFIG_NETFILTER_XTABLES_COMPAT
- .compatsize = sizeof(struct compat_ipt_clusterip_tgt_info),
-#endif /* CONFIG_NETFILTER_XTABLES_COMPAT */
- .me = THIS_MODULE
-};
-
-
-/***********************************************************************
- * ARP MANGLING CODE
- ***********************************************************************/
-
-/* hardcoded for 48bit ethernet and 32bit ipv4 addresses */
-struct arp_payload {
- u_int8_t src_hw[ETH_ALEN];
- __be32 src_ip;
- u_int8_t dst_hw[ETH_ALEN];
- __be32 dst_ip;
-} __packed;
-
-#ifdef DEBUG
-static void arp_print(struct arp_payload *payload)
-{
-#define HBUFFERLEN 30
- char hbuffer[HBUFFERLEN];
- int j, k;
-
- for (k = 0, j = 0; k < HBUFFERLEN - 3 && j < ETH_ALEN; j++) {
- hbuffer[k++] = hex_asc_hi(payload->src_hw[j]);
- hbuffer[k++] = hex_asc_lo(payload->src_hw[j]);
- hbuffer[k++] = ':';
- }
- hbuffer[--k] = '\0';
-
- pr_debug("src %pI4@%s, dst %pI4\n",
- &payload->src_ip, hbuffer, &payload->dst_ip);
-}
-#endif
-
-static unsigned int
-clusterip_arp_mangle(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
-{
- struct arphdr *arp = arp_hdr(skb);
- struct arp_payload *payload;
- struct clusterip_config *c;
- struct net *net = state->net;
-
- /* we don't care about non-ethernet and non-ipv4 ARP */
- if (arp->ar_hrd != htons(ARPHRD_ETHER) ||
- arp->ar_pro != htons(ETH_P_IP) ||
- arp->ar_pln != 4 || arp->ar_hln != ETH_ALEN)
- return NF_ACCEPT;
-
- /* we only want to mangle arp requests and replies */
- if (arp->ar_op != htons(ARPOP_REPLY) &&
- arp->ar_op != htons(ARPOP_REQUEST))
- return NF_ACCEPT;
-
- payload = (void *)(arp+1);
-
- /* if there is no clusterip configuration for the arp reply's
- * source ip, we don't want to mangle it */
- c = clusterip_config_find_get(net, payload->src_ip, 0);
- if (!c)
- return NF_ACCEPT;
-
- /* normally the linux kernel always replies to arp queries of
- * addresses on different interfacs. However, in the CLUSTERIP case
- * this wouldn't work, since we didn't subscribe the mcast group on
- * other interfaces */
- if (c->ifindex != state->out->ifindex) {
- pr_debug("not mangling arp reply on different interface: cip'%d'-skb'%d'\n",
- c->ifindex, state->out->ifindex);
- clusterip_config_put(c);
- return NF_ACCEPT;
- }
-
- /* mangle reply hardware address */
- memcpy(payload->src_hw, c->clustermac, arp->ar_hln);
-
-#ifdef DEBUG
- pr_debug("mangled arp reply: ");
- arp_print(payload);
-#endif
-
- clusterip_config_put(c);
-
- return NF_ACCEPT;
-}
-
-/***********************************************************************
- * PROC DIR HANDLING
- ***********************************************************************/
-
-#ifdef CONFIG_PROC_FS
-
-struct clusterip_seq_position {
- unsigned int pos; /* position */
- unsigned int weight; /* number of bits set == size */
- unsigned int bit; /* current bit */
- unsigned long val; /* current value */
-};
-
-static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
-{
- struct clusterip_config *c = s->private;
- unsigned int weight;
- u_int32_t local_nodes;
- struct clusterip_seq_position *idx;
-
- /* FIXME: possible race */
- local_nodes = c->local_nodes;
- weight = hweight32(local_nodes);
- if (*pos >= weight)
- return NULL;
-
- idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
- if (!idx)
- return ERR_PTR(-ENOMEM);
-
- idx->pos = *pos;
- idx->weight = weight;
- idx->bit = ffs(local_nodes);
- idx->val = local_nodes;
- clear_bit(idx->bit - 1, &idx->val);
-
- return idx;
-}
-
-static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
-{
- struct clusterip_seq_position *idx = v;
-
- *pos = ++idx->pos;
- if (*pos >= idx->weight) {
- kfree(v);
- return NULL;
- }
- idx->bit = ffs(idx->val);
- clear_bit(idx->bit - 1, &idx->val);
- return idx;
-}
-
-static void clusterip_seq_stop(struct seq_file *s, void *v)
-{
- if (!IS_ERR(v))
- kfree(v);
-}
-
-static int clusterip_seq_show(struct seq_file *s, void *v)
-{
- struct clusterip_seq_position *idx = v;
-
- if (idx->pos != 0)
- seq_putc(s, ',');
-
- seq_printf(s, "%u", idx->bit);
-
- if (idx->pos == idx->weight - 1)
- seq_putc(s, '\n');
-
- return 0;
-}
-
-static const struct seq_operations clusterip_seq_ops = {
- .start = clusterip_seq_start,
- .next = clusterip_seq_next,
- .stop = clusterip_seq_stop,
- .show = clusterip_seq_show,
-};
-
-static int clusterip_proc_open(struct inode *inode, struct file *file)
-{
- int ret = seq_open(file, &clusterip_seq_ops);
-
- if (!ret) {
- struct seq_file *sf = file->private_data;
- struct clusterip_config *c = pde_data(inode);
-
- sf->private = c;
-
- clusterip_config_get(c);
- }
-
- return ret;
-}
-
-static int clusterip_proc_release(struct inode *inode, struct file *file)
-{
- struct clusterip_config *c = pde_data(inode);
- int ret;
-
- ret = seq_release(inode, file);
-
- if (!ret)
- clusterip_config_put(c);
-
- return ret;
-}
-
-static ssize_t clusterip_proc_write(struct file *file, const char __user *input,
- size_t size, loff_t *ofs)
-{
- struct clusterip_config *c = pde_data(file_inode(file));
-#define PROC_WRITELEN 10
- char buffer[PROC_WRITELEN+1];
- unsigned long nodenum;
- int rc;
-
- if (size > PROC_WRITELEN)
- return -EIO;
- if (copy_from_user(buffer, input, size))
- return -EFAULT;
- buffer[size] = 0;
-
- if (*buffer == '+') {
- rc = kstrtoul(buffer+1, 10, &nodenum);
- if (rc)
- return rc;
- if (clusterip_add_node(c, nodenum))
- return -ENOMEM;
- } else if (*buffer == '-') {
- rc = kstrtoul(buffer+1, 10, &nodenum);
- if (rc)
- return rc;
- if (clusterip_del_node(c, nodenum))
- return -ENOENT;
- } else
- return -EIO;
-
- return size;
-}
-
-static const struct proc_ops clusterip_proc_ops = {
- .proc_open = clusterip_proc_open,
- .proc_read = seq_read,
- .proc_write = clusterip_proc_write,
- .proc_lseek = seq_lseek,
- .proc_release = clusterip_proc_release,
-};
-
-#endif /* CONFIG_PROC_FS */
-
-static int clusterip_net_init(struct net *net)
-{
- struct clusterip_net *cn = clusterip_pernet(net);
-
- INIT_LIST_HEAD(&cn->configs);
-
- spin_lock_init(&cn->lock);
-
-#ifdef CONFIG_PROC_FS
- cn->procdir = proc_mkdir("ipt_CLUSTERIP", net->proc_net);
- if (!cn->procdir) {
- pr_err("Unable to proc dir entry\n");
- return -ENOMEM;
- }
- mutex_init(&cn->mutex);
-#endif /* CONFIG_PROC_FS */
-
- return 0;
-}
-
-static void clusterip_net_exit(struct net *net)
-{
-#ifdef CONFIG_PROC_FS
- struct clusterip_net *cn = clusterip_pernet(net);
-
- mutex_lock(&cn->mutex);
- proc_remove(cn->procdir);
- cn->procdir = NULL;
- mutex_unlock(&cn->mutex);
-#endif
-}
-
-static struct pernet_operations clusterip_net_ops = {
- .init = clusterip_net_init,
- .exit = clusterip_net_exit,
- .id = &clusterip_net_id,
- .size = sizeof(struct clusterip_net),
-};
-
-static struct notifier_block cip_netdev_notifier = {
- .notifier_call = clusterip_netdev_event
-};
-
-static int __init clusterip_tg_init(void)
-{
- int ret;
-
- ret = register_pernet_subsys(&clusterip_net_ops);
- if (ret < 0)
- return ret;
-
- ret = xt_register_target(&clusterip_tg_reg);
- if (ret < 0)
- goto cleanup_subsys;
-
- ret = register_netdevice_notifier(&cip_netdev_notifier);
- if (ret < 0)
- goto unregister_target;
-
- pr_info("ClusterIP Version %s loaded successfully\n",
- CLUSTERIP_VERSION);
-
- return 0;
-
-unregister_target:
- xt_unregister_target(&clusterip_tg_reg);
-cleanup_subsys:
- unregister_pernet_subsys(&clusterip_net_ops);
- return ret;
-}
-
-static void __exit clusterip_tg_exit(void)
-{
- pr_info("ClusterIP Version %s unloading\n", CLUSTERIP_VERSION);
-
- unregister_netdevice_notifier(&cip_netdev_notifier);
- xt_unregister_target(&clusterip_tg_reg);
- unregister_pernet_subsys(&clusterip_net_ops);
-
- /* Wait for completion of call_rcu()'s (clusterip_config_rcu_free) */
- rcu_barrier();
-}
-
-module_init(clusterip_tg_init);
-module_exit(clusterip_tg_exit);
diff --git a/net/ipv4/netfilter/nf_reject_ipv4.c b/net/ipv4/netfilter/nf_reject_ipv4.c
index d640adcaf1b1..f33aeab9424f 100644
--- a/net/ipv4/netfilter/nf_reject_ipv4.c
+++ b/net/ipv4/netfilter/nf_reject_ipv4.c
@@ -280,6 +280,7 @@ void nf_send_reset(struct net *net, struct sock *sk, struct sk_buff *oldskb,
goto free_nskb;
nf_ct_attach(nskb, oldskb);
+ nf_ct_set_closing(skb_nfct(oldskb));
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip_local_out for bridged traffic, the MAC source on
diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c
index f88daace9de3..eaf1d3113b62 100644
--- a/net/ipv4/proc.c
+++ b/net/ipv4/proc.c
@@ -353,7 +353,7 @@ static void icmp_put(struct seq_file *seq)
seq_puts(seq, "\nIcmp: InMsgs InErrors InCsumErrors");
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " In%s", icmpmibmap[i].name);
- seq_puts(seq, " OutMsgs OutErrors");
+ seq_puts(seq, " OutMsgs OutErrors OutRateLimitGlobal OutRateLimitHost");
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " Out%s", icmpmibmap[i].name);
seq_printf(seq, "\nIcmp: %lu %lu %lu",
@@ -363,9 +363,11 @@ static void icmp_put(struct seq_file *seq)
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + icmpmibmap[i].index));
- seq_printf(seq, " %lu %lu",
+ seq_printf(seq, " %lu %lu %lu %lu",
snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTMSGS),
- snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS));
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_OUTERRORS),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITGLOBAL),
+ snmp_fold_field(net->mib.icmp_statistics, ICMP_MIB_RATELIMITHOST));
for (i = 0; icmpmibmap[i].name; i++)
seq_printf(seq, " %lu",
atomic_long_read(ptr + (icmpmibmap[i].index | 0x100)));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 006c1f0ed8b4..94df935ee0c5 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -93,7 +93,7 @@ int raw_hash_sk(struct sock *sk)
struct raw_hashinfo *h = sk->sk_prot->h.raw_hash;
struct hlist_nulls_head *hlist;
- hlist = &h->ht[inet_sk(sk)->inet_num & (RAW_HTABLE_SIZE - 1)];
+ hlist = &h->ht[raw_hashfunc(sock_net(sk), inet_sk(sk)->inet_num)];
spin_lock(&h->lock);
__sk_nulls_add_node_rcu(sk, hlist);
@@ -160,9 +160,9 @@ static int icmp_filter(const struct sock *sk, const struct sk_buff *skb)
* RFC 1122: SHOULD pass TOS value up to the transport layer.
* -> It does. And not only TOS, but all IP header.
*/
-static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
+static int raw_v4_input(struct net *net, struct sk_buff *skb,
+ const struct iphdr *iph, int hash)
{
- struct net *net = dev_net(skb->dev);
struct hlist_nulls_head *hlist;
struct hlist_nulls_node *hnode;
int sdif = inet_sdif(skb);
@@ -193,9 +193,10 @@ static int raw_v4_input(struct sk_buff *skb, const struct iphdr *iph, int hash)
int raw_local_deliver(struct sk_buff *skb, int protocol)
{
- int hash = protocol & (RAW_HTABLE_SIZE - 1);
+ struct net *net = dev_net(skb->dev);
- return raw_v4_input(skb, ip_hdr(skb), hash);
+ return raw_v4_input(net, skb, ip_hdr(skb),
+ raw_hashfunc(net, protocol));
}
static void raw_err(struct sock *sk, struct sk_buff *skb, u32 info)
@@ -271,7 +272,7 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
struct sock *sk;
int hash;
- hash = protocol & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, protocol);
hlist = &raw_v4_hashinfo.ht[hash];
rcu_read_lock();
@@ -287,11 +288,13 @@ void raw_icmp_error(struct sk_buff *skb, int protocol, u32 info)
static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
/* Charge it to the socket. */
ipv4_pktinfo_prepare(sk, skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -302,7 +305,7 @@ int raw_rcv(struct sock *sk, struct sk_buff *skb)
{
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
nf_reset_ct(skb);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c567d5e8053e..288693981b00 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -435,6 +435,7 @@ void tcp_init_sock(struct sock *sk)
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
/* See draft-stevens-tcpca-spec-01 for discussion of the
* initialization of these values.
@@ -1890,10 +1891,10 @@ int tcp_mmap(struct file *file, struct socket *sock,
{
if (vma->vm_flags & (VM_WRITE | VM_EXEC))
return -EPERM;
- vma->vm_flags &= ~(VM_MAYWRITE | VM_MAYEXEC);
+ vm_flags_clear(vma, VM_MAYWRITE | VM_MAYEXEC);
/* Instruct vm_insert_page() to not mmap_read_lock(mm) */
- vma->vm_flags |= VM_MIXEDMAP;
+ vm_flags_set(vma, VM_MIXEDMAP);
vma->vm_ops = &tcp_vm_ops;
return 0;
@@ -2092,7 +2093,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
maybe_zap_len = total_bytes_to_map - /* All bytes to map */
*length + /* Mapped or pending */
(pages_remaining * PAGE_SIZE); /* Failed map. */
- zap_page_range(vma, *address, maybe_zap_len);
+ zap_page_range_single(vma, *address, maybe_zap_len, NULL);
err = 0;
}
@@ -2100,7 +2101,7 @@ static int tcp_zerocopy_vm_insert_batch_error(struct vm_area_struct *vma,
unsigned long leftover_pages = pages_remaining;
int bytes_mapped;
- /* We called zap_page_range, try to reinsert. */
+ /* We called zap_page_range_single, try to reinsert. */
err = vm_insert_pages(vma, *address,
pending_pages,
&pages_remaining);
@@ -2234,7 +2235,8 @@ static int tcp_zerocopy_receive(struct sock *sk,
total_bytes_to_map = avail_len & ~(PAGE_SIZE - 1);
if (total_bytes_to_map) {
if (!(zc->flags & TCP_RECEIVE_ZEROCOPY_FLAG_TLB_CLEAN_HINT))
- zap_page_range(vma, address, total_bytes_to_map);
+ zap_page_range_single(vma, address, total_bytes_to_map,
+ NULL);
zc->length = total_bytes_to_map;
zc->recv_skip_hint = 0;
} else {
@@ -3178,6 +3180,7 @@ int tcp_disconnect(struct sock *sk, int flags)
tp->plb_rehash = 0;
/* There's a bubble in the pipe until at least the first ACK. */
tp->app_limited = ~0U;
+ tp->rate_app_limited = 1;
tp->rack.mstamp = 0;
tp->rack.advanced = 0;
tp->rack.reo_wnd_steps = 1;
diff --git a/net/ipv4/tcp_bbr.c b/net/ipv4/tcp_bbr.c
index d2c470524e58..146792cd26fe 100644
--- a/net/ipv4/tcp_bbr.c
+++ b/net/ipv4/tcp_bbr.c
@@ -295,7 +295,7 @@ static void bbr_set_pacing_rate(struct sock *sk, u32 bw, int gain)
}
/* override sysctl_tcp_min_tso_segs */
-static u32 bbr_min_tso_segs(struct sock *sk)
+__bpf_kfunc static u32 bbr_min_tso_segs(struct sock *sk)
{
return sk->sk_pacing_rate < (bbr_min_tso_rate >> 3) ? 1 : 2;
}
@@ -328,7 +328,7 @@ static void bbr_save_cwnd(struct sock *sk)
bbr->prior_cwnd = max(bbr->prior_cwnd, tcp_snd_cwnd(tp));
}
-static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void bbr_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
@@ -1023,7 +1023,7 @@ static void bbr_update_model(struct sock *sk, const struct rate_sample *rs)
bbr_update_gains(sk);
}
-static void bbr_main(struct sock *sk, const struct rate_sample *rs)
+__bpf_kfunc static void bbr_main(struct sock *sk, const struct rate_sample *rs)
{
struct bbr *bbr = inet_csk_ca(sk);
u32 bw;
@@ -1035,7 +1035,7 @@ static void bbr_main(struct sock *sk, const struct rate_sample *rs)
bbr_set_cwnd(sk, rs, rs->acked_sacked, bw, bbr->cwnd_gain);
}
-static void bbr_init(struct sock *sk)
+__bpf_kfunc static void bbr_init(struct sock *sk)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bbr *bbr = inet_csk_ca(sk);
@@ -1077,7 +1077,7 @@ static void bbr_init(struct sock *sk)
cmpxchg(&sk->sk_pacing_status, SK_PACING_NONE, SK_PACING_NEEDED);
}
-static u32 bbr_sndbuf_expand(struct sock *sk)
+__bpf_kfunc static u32 bbr_sndbuf_expand(struct sock *sk)
{
/* Provision 3 * cwnd since BBR may slow-start even during recovery. */
return 3;
@@ -1086,7 +1086,7 @@ static u32 bbr_sndbuf_expand(struct sock *sk)
/* In theory BBR does not need to undo the cwnd since it does not
* always reduce cwnd on losses (see bbr_main()). Keep it for now.
*/
-static u32 bbr_undo_cwnd(struct sock *sk)
+__bpf_kfunc static u32 bbr_undo_cwnd(struct sock *sk)
{
struct bbr *bbr = inet_csk_ca(sk);
@@ -1097,7 +1097,7 @@ static u32 bbr_undo_cwnd(struct sock *sk)
}
/* Entering loss recovery, so save cwnd for when we exit or undo recovery. */
-static u32 bbr_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 bbr_ssthresh(struct sock *sk)
{
bbr_save_cwnd(sk);
return tcp_sk(sk)->snd_ssthresh;
@@ -1125,7 +1125,7 @@ static size_t bbr_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
-static void bbr_set_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void bbr_set_state(struct sock *sk, u8 new_state)
{
struct bbr *bbr = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c
index 94aad3870c5f..cf26d65ca389 100644
--- a/net/ipv4/tcp_bpf.c
+++ b/net/ipv4/tcp_bpf.c
@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
+#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
@@ -639,10 +640,9 @@ EXPORT_SYMBOL_GPL(tcp_bpf_update_proto);
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
- int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
- if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
+ if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_SYSCALL */
diff --git a/net/ipv4/tcp_cong.c b/net/ipv4/tcp_cong.c
index d3cae40749e8..db8b4b488c31 100644
--- a/net/ipv4/tcp_cong.c
+++ b/net/ipv4/tcp_cong.c
@@ -403,7 +403,7 @@ int tcp_set_congestion_control(struct sock *sk, const char *name, bool load,
* ABC caps N to 2. Slow start exits when cwnd grows over ssthresh and
* returns the leftover acks to adjust cwnd in congestion avoidance mode.
*/
-u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
+__bpf_kfunc u32 tcp_slow_start(struct tcp_sock *tp, u32 acked)
{
u32 cwnd = min(tcp_snd_cwnd(tp) + acked, tp->snd_ssthresh);
@@ -417,7 +417,7 @@ EXPORT_SYMBOL_GPL(tcp_slow_start);
/* In theory this is tp->snd_cwnd += 1 / tp->snd_cwnd (or alternative w),
* for every packet that was ACKed.
*/
-void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
+__bpf_kfunc void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked)
{
/* If credits accumulated at a higher w, apply them gently now. */
if (tp->snd_cwnd_cnt >= w) {
@@ -443,7 +443,7 @@ EXPORT_SYMBOL_GPL(tcp_cong_avoid_ai);
/* This is Jacobson's slow start and congestion avoidance.
* SIGCOMM '88, p. 328.
*/
-void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
@@ -462,7 +462,7 @@ void tcp_reno_cong_avoid(struct sock *sk, u32 ack, u32 acked)
EXPORT_SYMBOL_GPL(tcp_reno_cong_avoid);
/* Slow start threshold is half the congestion window (min 2) */
-u32 tcp_reno_ssthresh(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -470,7 +470,7 @@ u32 tcp_reno_ssthresh(struct sock *sk)
}
EXPORT_SYMBOL_GPL(tcp_reno_ssthresh);
-u32 tcp_reno_undo_cwnd(struct sock *sk)
+__bpf_kfunc u32 tcp_reno_undo_cwnd(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_cubic.c b/net/ipv4/tcp_cubic.c
index 768c10c1f649..0fd78ecb67e7 100644
--- a/net/ipv4/tcp_cubic.c
+++ b/net/ipv4/tcp_cubic.c
@@ -126,7 +126,7 @@ static inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-static void cubictcp_init(struct sock *sk)
+__bpf_kfunc static void cubictcp_init(struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -139,7 +139,7 @@ static void cubictcp_init(struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
+__bpf_kfunc static void cubictcp_cwnd_event(struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -321,7 +321,7 @@ tcp_friendliness:
ca->cnt = max(ca->cnt, 2U);
}
-static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
+__bpf_kfunc static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -338,7 +338,7 @@ static void cubictcp_cong_avoid(struct sock *sk, u32 ack, u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-static u32 cubictcp_recalc_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 cubictcp_recalc_ssthresh(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -355,7 +355,7 @@ static u32 cubictcp_recalc_ssthresh(struct sock *sk)
return max((tcp_snd_cwnd(tp) * beta) / BICTCP_BETA_SCALE, 2U);
}
-static void cubictcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void cubictcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -445,7 +445,7 @@ static void hystart_update(struct sock *sk, u32 delay)
}
}
-static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
+__bpf_kfunc static void cubictcp_acked(struct sock *sk, const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
diff --git a/net/ipv4/tcp_dctcp.c b/net/ipv4/tcp_dctcp.c
index e0a2ca7456ff..bb23bb5b387a 100644
--- a/net/ipv4/tcp_dctcp.c
+++ b/net/ipv4/tcp_dctcp.c
@@ -75,7 +75,7 @@ static void dctcp_reset(const struct tcp_sock *tp, struct dctcp *ca)
ca->old_delivered_ce = tp->delivered_ce;
}
-static void dctcp_init(struct sock *sk)
+__bpf_kfunc static void dctcp_init(struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
@@ -104,7 +104,7 @@ static void dctcp_init(struct sock *sk)
INET_ECN_dontxmit(sk);
}
-static u32 dctcp_ssthresh(struct sock *sk)
+__bpf_kfunc static u32 dctcp_ssthresh(struct sock *sk)
{
struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
@@ -113,7 +113,7 @@ static u32 dctcp_ssthresh(struct sock *sk)
return max(tcp_snd_cwnd(tp) - ((tcp_snd_cwnd(tp) * ca->dctcp_alpha) >> 11U), 2U);
}
-static void dctcp_update_alpha(struct sock *sk, u32 flags)
+__bpf_kfunc static void dctcp_update_alpha(struct sock *sk, u32 flags)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct dctcp *ca = inet_csk_ca(sk);
@@ -169,7 +169,7 @@ static void dctcp_react_to_loss(struct sock *sk)
tp->snd_ssthresh = max(tcp_snd_cwnd(tp) >> 1U, 2U);
}
-static void dctcp_state(struct sock *sk, u8 new_state)
+__bpf_kfunc static void dctcp_state(struct sock *sk, u8 new_state)
{
if (new_state == TCP_CA_Recovery &&
new_state != inet_csk(sk)->icsk_ca_state)
@@ -179,7 +179,7 @@ static void dctcp_state(struct sock *sk, u8 new_state)
*/
}
-static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
+__bpf_kfunc static void dctcp_cwnd_event(struct sock *sk, enum tcp_ca_event ev)
{
struct dctcp *ca = inet_csk_ca(sk);
@@ -229,7 +229,7 @@ static size_t dctcp_get_info(struct sock *sk, u32 ext, int *attr,
return 0;
}
-static u32 dctcp_cwnd_undo(struct sock *sk)
+__bpf_kfunc static u32 dctcp_cwnd_undo(struct sock *sk)
{
const struct dctcp *ca = inet_csk_ca(sk);
struct tcp_sock *tp = tcp_sk(sk);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 8320d0ecb13a..ea370afa70ed 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2102,6 +2102,7 @@ process:
/* min_ttl can be changed concurrently from do_ip_setsockopt() */
if (unlikely(iph->ttl < READ_ONCE(inet_sk(sk)->min_ttl))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index e002f2e1d4f2..9a7ef7732c24 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -597,6 +597,9 @@ EXPORT_SYMBOL(tcp_create_openreq_child);
* validation and inside tcp_v4_reqsk_send_ack(). Can we do better?
*
* We don't need to initialize tmp_opt.sack_ok as we don't use the results
+ *
+ * Note: If @fastopen is true, this can be called from process context.
+ * Otherwise, this is from BH context.
*/
struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
@@ -748,7 +751,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
&tcp_rsk(req)->last_oow_ack_time))
req->rsk_ops->send_ack(sk, skb, req);
if (paws_reject)
- __NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
+ NET_INC_STATS(sock_net(sk), LINUX_MIB_PAWSESTABREJECTED);
return NULL;
}
@@ -767,7 +770,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
* "fourth, check the SYN bit"
*/
if (flg & (TCP_FLAG_RST|TCP_FLAG_SYN)) {
- __TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
+ TCP_INC_STATS(sock_net(sk), TCP_MIB_ATTEMPTFAILS);
goto embryonic_reset;
}
diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c
index 9ae50b1bd844..2aa442128630 100644
--- a/net/ipv4/tcp_ulp.c
+++ b/net/ipv4/tcp_ulp.c
@@ -139,6 +139,10 @@ static int __tcp_set_ulp(struct sock *sk, const struct tcp_ulp_ops *ulp_ops)
if (sk->sk_socket)
clear_bit(SOCK_SUPPORT_ZC, &sk->sk_socket->flags);
+ err = -ENOTCONN;
+ if (!ulp_ops->clone && sk->sk_state == TCP_LISTEN)
+ goto out_err;
+
err = ulp_ops->init(sk);
if (err)
goto out_err;
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 9592fe3e444a..c605d171eb2d 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -248,7 +248,7 @@ int udp_lib_get_port(struct sock *sk, unsigned short snum,
int low, high, remaining;
unsigned int rand;
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
remaining = (high - low) + 1;
rand = get_random_u32();
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index f7a84a4acffc..faa47f9ea73a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -3127,17 +3127,17 @@ static void add_v4_addrs(struct inet6_dev *idev)
offset = sizeof(struct in6_addr) - 4;
memcpy(&addr.s6_addr32[3], idev->dev->dev_addr + offset, 4);
- if (idev->dev->flags&IFF_POINTOPOINT) {
+ if (!(idev->dev->flags & IFF_POINTOPOINT) && idev->dev->type == ARPHRD_SIT) {
+ scope = IPV6_ADDR_COMPATv4;
+ plen = 96;
+ pflags |= RTF_NONEXTHOP;
+ } else {
if (idev->cnf.addr_gen_mode == IN6_ADDR_GEN_MODE_NONE)
return;
addr.s6_addr32[0] = htonl(0xfe800000);
scope = IFA_LINK;
plen = 64;
- } else {
- scope = IPV6_ADDR_COMPATv4;
- plen = 96;
- pflags |= RTF_NONEXTHOP;
}
if (addr.s6_addr32[3]) {
@@ -3447,6 +3447,30 @@ static void addrconf_gre_config(struct net_device *dev)
}
#endif
+static void addrconf_init_auto_addrs(struct net_device *dev)
+{
+ switch (dev->type) {
+#if IS_ENABLED(CONFIG_IPV6_SIT)
+ case ARPHRD_SIT:
+ addrconf_sit_config(dev);
+ break;
+#endif
+#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
+ case ARPHRD_IP6GRE:
+ case ARPHRD_IPGRE:
+ addrconf_gre_config(dev);
+ break;
+#endif
+ case ARPHRD_LOOPBACK:
+ init_loopback(dev);
+ break;
+
+ default:
+ addrconf_dev_config(dev);
+ break;
+ }
+}
+
static int fixup_permanent_addr(struct net *net,
struct inet6_dev *idev,
struct inet6_ifaddr *ifp)
@@ -3615,26 +3639,7 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event,
run_pending = 1;
}
- switch (dev->type) {
-#if IS_ENABLED(CONFIG_IPV6_SIT)
- case ARPHRD_SIT:
- addrconf_sit_config(dev);
- break;
-#endif
-#if IS_ENABLED(CONFIG_NET_IPGRE) || IS_ENABLED(CONFIG_IPV6_GRE)
- case ARPHRD_IP6GRE:
- case ARPHRD_IPGRE:
- addrconf_gre_config(dev);
- break;
-#endif
- case ARPHRD_LOOPBACK:
- init_loopback(dev);
- break;
-
- default:
- addrconf_dev_config(dev);
- break;
- }
+ addrconf_init_auto_addrs(dev);
if (!IS_ERR_OR_NULL(idev)) {
if (run_pending)
@@ -6397,7 +6402,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
} else if (&net->ipv6.devconf_all->addr_gen_mode == ctl->data) {
struct net_device *dev;
@@ -6408,7 +6413,7 @@ static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
if (idev &&
idev->cnf.addr_gen_mode != new_val) {
idev->cnf.addr_gen_mode = new_val;
- addrconf_dev_config(idev->dev);
+ addrconf_init_auto_addrs(idev->dev);
}
}
}
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index fee9163382c2..38689bedfce7 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -222,6 +222,7 @@ lookup_protocol:
np->pmtudisc = IPV6_PMTUDISC_WANT;
np->repflow = net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ESTABLISHED;
sk->sk_ipv6only = net->ipv6.sysctl.bindv6only;
+ sk->sk_txrehash = READ_ONCE(net->core.sysctl_txrehash);
/* Init the ipv4 part of the socket since we can have sockets
* using v6 API for ipv4.
@@ -238,16 +239,6 @@ lookup_protocol:
inet->pmtudisc = IP_PMTUDISC_DONT;
else
inet->pmtudisc = IP_PMTUDISC_WANT;
- /*
- * Increment only the relevant sk_prot->socks debug field, this changes
- * the previous behaviour of incrementing both the equivalent to
- * answer->prot->socks (inet6_sock_nr) and inet_sock_nr.
- *
- * This allows better debug granularity as we'll know exactly how many
- * UDPv6, TCPv6, etc socks were allocated, not the sum of all IPv6
- * transport protocol socks. -acme
- */
- sk_refcnt_debug_inc(sk);
if (inet->inet_num) {
/* It assumes that any protocol which allows
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 5228d2716289..01005035ad10 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -281,12 +281,12 @@ static int ipv6_clear_mutable_options(struct ipv6hdr *iph, int len, int dir)
return 0;
}
-static void ah6_output_done(struct crypto_async_request *base, int err)
+static void ah6_output_done(void *data, int err)
{
int extlen;
u8 *iph_base;
u8 *icv;
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_state *x = skb_dst(skb)->xfrm;
struct ah_data *ahp = x->data;
struct ipv6hdr *top_iph = ipv6_hdr(skb);
@@ -451,12 +451,12 @@ out:
return err;
}
-static void ah6_input_done(struct crypto_async_request *base, int err)
+static void ah6_input_done(void *data, int err)
{
u8 *auth_data;
u8 *icv;
u8 *work_iph;
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_state *x = xfrm_input_state(skb);
struct ah_data *ahp = x->data;
struct ip_auth_hdr *ah = ip_auth_hdr(skb);
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index e624497fa992..9b6818453afe 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -51,7 +51,7 @@ static void ip6_datagram_flow_key_init(struct flowi6 *fl6, struct sock *sk)
fl6->flowi6_mark = sk->sk_mark;
fl6->fl6_dport = inet->inet_dport;
fl6->fl6_sport = inet->inet_sport;
- fl6->flowlabel = np->flow_label;
+ fl6->flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6->flowi6_uid = sk->sk_uid;
if (!oif)
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 14ed868680c6..fddd0cbdede1 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -278,9 +278,9 @@ static void esp_output_encap_csum(struct sk_buff *skb)
}
}
-static void esp_output_done(struct crypto_async_request *base, int err)
+static void esp_output_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct xfrm_offload *xo = xfrm_offload(skb);
void *tmp;
struct xfrm_state *x;
@@ -368,12 +368,12 @@ static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
return esph;
}
-static void esp_output_done_esn(struct crypto_async_request *base, int err)
+static void esp_output_done_esn(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
esp_output_restore_header(skb);
- esp_output_done(base, err);
+ esp_output_done(data, err);
}
static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
@@ -879,9 +879,9 @@ out:
}
EXPORT_SYMBOL_GPL(esp6_input_done2);
-static void esp_input_done(struct crypto_async_request *base, int err)
+static void esp_input_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
xfrm_input_resume(skb, esp6_input_done2(skb, err));
}
@@ -909,12 +909,12 @@ static void esp_input_set_header(struct sk_buff *skb, __be32 *seqhi)
}
}
-static void esp_input_done_esn(struct crypto_async_request *base, int err)
+static void esp_input_done_esn(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
esp_input_restore_header(skb);
- esp_input_done(base, err);
+ esp_input_done(data, err);
}
static int esp6_input(struct xfrm_state *x, struct sk_buff *skb)
diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c
index 9d92d51c4757..1f53f2a74480 100644
--- a/net/ipv6/icmp.c
+++ b/net/ipv6/icmp.c
@@ -183,6 +183,7 @@ static bool icmpv6_global_allow(struct net *net, int type)
if (icmp_global_allow())
return true;
+ __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
return false;
}
@@ -224,6 +225,9 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
if (peer)
inet_putpeer(peer);
}
+ if (!res)
+ __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
+ ICMP6_MIB_RATELIMITHOST);
dst_release(dst);
return res;
}
@@ -328,7 +332,6 @@ static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt
{
struct ipv6hdr *iph = ipv6_hdr(skb);
struct ipv6_destopt_hao *hao;
- struct in6_addr tmp;
int off;
if (opt->dsthao) {
@@ -336,9 +339,7 @@ static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt
if (likely(off >= 0)) {
hao = (struct ipv6_destopt_hao *)
(skb_network_header(skb) + off);
- tmp = iph->saddr;
- iph->saddr = hao->addr;
- hao->addr = tmp;
+ swap(iph->saddr, hao->addr);
}
}
}
@@ -704,7 +705,7 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
}
EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
-static void icmpv6_echo_reply(struct sk_buff *skb)
+static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
{
struct net *net = dev_net(skb->dev);
struct sock *sk;
@@ -718,18 +719,19 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
struct dst_entry *dst;
struct ipcm6_cookie ipc6;
u32 mark = IP6_REPLY_MARK(net, skb->mark);
+ SKB_DR(reason);
bool acast;
u8 type;
if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
- return;
+ return reason;
saddr = &ipv6_hdr(skb)->daddr;
acast = ipv6_anycast_destination(skb_dst(skb), saddr);
if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
- return;
+ return reason;
if (!ipv6_unicast_destination(skb) &&
!(net->ipv6.sysctl.anycast_src_echo_reply && acast))
@@ -803,6 +805,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb)
} else {
icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
skb->len + sizeof(struct icmp6hdr));
+ reason = SKB_CONSUMED;
}
out_dst_release:
dst_release(dst);
@@ -810,18 +813,22 @@ out:
icmpv6_xmit_unlock(sk);
out_bh_enable:
local_bh_enable();
+ return reason;
}
-void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
+enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
+ u8 code, __be32 info)
{
struct inet6_skb_parm *opt = IP6CB(skb);
+ struct net *net = dev_net(skb->dev);
const struct inet6_protocol *ipprot;
+ enum skb_drop_reason reason;
int inner_offset;
__be16 frag_off;
u8 nexthdr;
- struct net *net = dev_net(skb->dev);
- if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+ reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
+ if (reason != SKB_NOT_DROPPED_YET)
goto out;
seg6_icmp_srh(skb, opt);
@@ -831,14 +838,17 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
/* now skip over extension headers */
inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
&nexthdr, &frag_off);
- if (inner_offset < 0)
+ if (inner_offset < 0) {
+ SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
goto out;
+ }
} else {
inner_offset = sizeof(struct ipv6hdr);
}
/* Checkin header including 8 bytes of inner protocol header. */
- if (!pskb_may_pull(skb, inner_offset+8))
+ reason = pskb_may_pull_reason(skb, inner_offset + 8);
+ if (reason != SKB_NOT_DROPPED_YET)
goto out;
/* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
@@ -853,10 +863,11 @@ void icmpv6_notify(struct sk_buff *skb, u8 type, u8 code, __be32 info)
ipprot->err_handler(skb, opt, type, code, inner_offset, info);
raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
- return;
+ return SKB_CONSUMED;
out:
__ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
+ return reason;
}
/*
@@ -921,12 +932,12 @@ static int icmpv6_rcv(struct sk_buff *skb)
switch (type) {
case ICMPV6_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
- icmpv6_echo_reply(skb);
+ reason = icmpv6_echo_reply(skb);
break;
case ICMPV6_EXT_ECHO_REQUEST:
if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
- icmpv6_echo_reply(skb);
+ reason = icmpv6_echo_reply(skb);
break;
case ICMPV6_ECHO_REPLY:
@@ -952,7 +963,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
case ICMPV6_DEST_UNREACH:
case ICMPV6_TIME_EXCEED:
case ICMPV6_PARAMPROB:
- icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ reason = icmpv6_notify(skb, type, hdr->icmp6_code,
+ hdr->icmp6_mtu);
break;
case NDISC_ROUTER_SOLICITATION:
@@ -960,7 +972,7 @@ static int icmpv6_rcv(struct sk_buff *skb)
case NDISC_NEIGHBOUR_SOLICITATION:
case NDISC_NEIGHBOUR_ADVERTISEMENT:
case NDISC_REDIRECT:
- ndisc_rcv(skb);
+ reason = ndisc_rcv(skb);
break;
case ICMPV6_MGM_QUERY:
@@ -994,7 +1006,8 @@ static int icmpv6_rcv(struct sk_buff *skb)
* must pass to upper level
*/
- icmpv6_notify(skb, type, hdr->icmp6_code, hdr->icmp6_mtu);
+ reason = icmpv6_notify(skb, type, hdr->icmp6_code,
+ hdr->icmp6_mtu);
}
/* until the v6 path can be better sorted assume failure and
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 60fd91bb5171..c314fdde0097 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -547,7 +547,20 @@ int ip6_forward(struct sk_buff *skb)
pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
int proxied = ip6_forward_proxy_check(skb);
if (proxied > 0) {
- hdr->hop_limit--;
+ /* It's tempting to decrease the hop limit
+ * here by 1, as we do at the end of the
+ * function too.
+ *
+ * But that would be incorrect, as proxying is
+ * not forwarding. The ip6_input function
+ * will handle this packet locally, and it
+ * depends on the hop limit being unchanged.
+ *
+ * One example is the NDP hop limit, that
+ * always has to stay 255, but other would be
+ * similar checks around RA packets, where the
+ * user can even change the desired limit.
+ */
return ip6_input(skb);
} else if (proxied < 0) {
__IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 9ce51680290b..2917dd8d198c 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -464,13 +464,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
__ipv6_sock_mc_close(sk);
__ipv6_sock_ac_close(sk);
- /*
- * Sock is moving from IPv6 to IPv4 (sk_prot), so
- * remove it from the refcnt debug socks count in the
- * original family...
- */
- sk_refcnt_debug_dec(sk);
-
if (sk->sk_protocol == IPPROTO_TCP) {
struct inet_connection_sock *icsk = inet_csk(sk);
@@ -507,11 +500,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
inet6_cleanup_sock(sk);
- /*
- * ... and add it to the refcnt debug socks count
- * in the new family. -acme
- */
- sk_refcnt_debug_inc(sk);
module_put(THIS_MODULE);
retv = 0;
break;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 3a553494ff16..c4be62c99f73 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -783,7 +783,7 @@ void ndisc_update(const struct net_device *dev, struct neighbour *neigh,
ndisc_ops_update(dev, neigh, flags, icmp6_type, ndopts);
}
-static void ndisc_recv_ns(struct sk_buff *skb)
+static enum skb_drop_reason ndisc_recv_ns(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
@@ -797,18 +797,17 @@ static void ndisc_recv_ns(struct sk_buff *skb)
struct inet6_dev *idev = NULL;
struct neighbour *neigh;
int dad = ipv6_addr_any(saddr);
- bool inc;
int is_router = -1;
+ SKB_DR(reason);
u64 nonce = 0;
+ bool inc;
- if (skb->len < sizeof(struct nd_msg)) {
- ND_PRINTK(2, warn, "NS: packet too short\n");
- return;
- }
+ if (skb->len < sizeof(struct nd_msg))
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK(2, warn, "NS: multicast target address\n");
- return;
+ return reason;
}
/*
@@ -817,20 +816,18 @@ static void ndisc_recv_ns(struct sk_buff *skb)
*/
if (dad && !ipv6_addr_is_solict_mult(daddr)) {
ND_PRINTK(2, warn, "NS: bad DAD packet (wrong destination)\n");
- return;
+ return reason;
}
- if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
- ND_PRINTK(2, warn, "NS: invalid ND options\n");
- return;
- }
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr, dev);
if (!lladdr) {
ND_PRINTK(2, warn,
"NS: invalid link-layer address length\n");
- return;
+ return reason;
}
/* RFC2461 7.1.1:
@@ -841,7 +838,7 @@ static void ndisc_recv_ns(struct sk_buff *skb)
if (dad) {
ND_PRINTK(2, warn,
"NS: bad DAD packet (link-layer address option)\n");
- return;
+ return reason;
}
}
if (ndopts.nd_opts_nonce && ndopts.nd_opts_nonce->nd_opt_len == 1)
@@ -869,7 +866,7 @@ have_ifp:
* so fail our DAD process
*/
addrconf_dad_failure(skb, ifp);
- return;
+ return reason;
} else {
/*
* This is not a dad solicitation.
@@ -901,7 +898,7 @@ have_ifp:
idev = in6_dev_get(dev);
if (!idev) {
/* XXX: count this drop? */
- return;
+ return reason;
}
if (ipv6_chk_acast_addr(net, dev, &msg->target) ||
@@ -924,8 +921,10 @@ have_ifp:
pneigh_enqueue(&nd_tbl, idev->nd_parms, n);
goto out;
}
- } else
+ } else {
+ SKB_DR_SET(reason, IPV6_NDISC_NS_OTHERHOST);
goto out;
+ }
}
if (is_router < 0)
@@ -958,6 +957,7 @@ have_ifp:
true, (ifp != NULL && inc), inc);
if (neigh)
neigh_release(neigh);
+ reason = SKB_CONSUMED;
}
out:
@@ -965,6 +965,7 @@ out:
in6_ifa_put(ifp);
else
in6_dev_put(idev);
+ return reason;
}
static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
@@ -986,7 +987,7 @@ static int accept_untracked_na(struct net_device *dev, struct in6_addr *saddr)
}
}
-static void ndisc_recv_na(struct sk_buff *skb)
+static enum skb_drop_reason ndisc_recv_na(struct sk_buff *skb)
{
struct nd_msg *msg = (struct nd_msg *)skb_transport_header(skb);
struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
@@ -999,22 +1000,21 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct inet6_dev *idev = __in6_dev_get(dev);
struct inet6_ifaddr *ifp;
struct neighbour *neigh;
+ SKB_DR(reason);
u8 new_state;
- if (skb->len < sizeof(struct nd_msg)) {
- ND_PRINTK(2, warn, "NA: packet too short\n");
- return;
- }
+ if (skb->len < sizeof(struct nd_msg))
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
if (ipv6_addr_is_multicast(&msg->target)) {
ND_PRINTK(2, warn, "NA: target address is multicast\n");
- return;
+ return reason;
}
if (ipv6_addr_is_multicast(daddr) &&
msg->icmph.icmp6_solicited) {
ND_PRINTK(2, warn, "NA: solicited NA is multicasted\n");
- return;
+ return reason;
}
/* For some 802.11 wireless deployments (and possibly other networks),
@@ -1024,18 +1024,17 @@ static void ndisc_recv_na(struct sk_buff *skb)
*/
if (!msg->icmph.icmp6_solicited && idev &&
idev->cnf.drop_unsolicited_na)
- return;
+ return reason;
+
+ if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts))
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
- if (!ndisc_parse_options(dev, msg->opt, ndoptlen, &ndopts)) {
- ND_PRINTK(2, warn, "NS: invalid ND option\n");
- return;
- }
if (ndopts.nd_opts_tgt_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_tgt_lladdr, dev);
if (!lladdr) {
ND_PRINTK(2, warn,
"NA: invalid link-layer address length\n");
- return;
+ return reason;
}
}
ifp = ipv6_get_ifaddr(dev_net(dev), &msg->target, dev, 1);
@@ -1043,7 +1042,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
if (skb->pkt_type != PACKET_LOOPBACK
&& (ifp->flags & IFA_F_TENTATIVE)) {
addrconf_dad_failure(skb, ifp);
- return;
+ return reason;
}
/* What should we make now? The advertisement
is invalid, but ndisc specs say nothing
@@ -1059,7 +1058,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
"NA: %pM advertised our address %pI6c on %s!\n",
eth_hdr(skb)->h_source, &ifp->addr, ifp->idev->dev->name);
in6_ifa_put(ifp);
- return;
+ return reason;
}
neigh = neigh_lookup(&nd_tbl, &msg->target, dev);
@@ -1120,13 +1119,14 @@ static void ndisc_recv_na(struct sk_buff *skb)
*/
rt6_clean_tohost(dev_net(dev), saddr);
}
-
+ reason = SKB_CONSUMED;
out:
neigh_release(neigh);
}
+ return reason;
}
-static void ndisc_recv_rs(struct sk_buff *skb)
+static enum skb_drop_reason ndisc_recv_rs(struct sk_buff *skb)
{
struct rs_msg *rs_msg = (struct rs_msg *)skb_transport_header(skb);
unsigned long ndoptlen = skb->len - sizeof(*rs_msg);
@@ -1135,14 +1135,15 @@ static void ndisc_recv_rs(struct sk_buff *skb)
const struct in6_addr *saddr = &ipv6_hdr(skb)->saddr;
struct ndisc_options ndopts;
u8 *lladdr = NULL;
+ SKB_DR(reason);
if (skb->len < sizeof(*rs_msg))
- return;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
idev = __in6_dev_get(skb->dev);
if (!idev) {
ND_PRINTK(1, err, "RS: can't find in6 device\n");
- return;
+ return reason;
}
/* Don't accept RS if we're not in router mode */
@@ -1157,10 +1158,8 @@ static void ndisc_recv_rs(struct sk_buff *skb)
goto out;
/* Parse ND options */
- if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts)) {
- ND_PRINTK(2, notice, "NS: invalid ND option, ignored\n");
- goto out;
- }
+ if (!ndisc_parse_options(skb->dev, rs_msg->opt, ndoptlen, &ndopts))
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
if (ndopts.nd_opts_src_lladdr) {
lladdr = ndisc_opt_addr_data(ndopts.nd_opts_src_lladdr,
@@ -1177,9 +1176,10 @@ static void ndisc_recv_rs(struct sk_buff *skb)
NEIGH_UPDATE_F_OVERRIDE_ISROUTER,
NDISC_ROUTER_SOLICITATION, &ndopts);
neigh_release(neigh);
+ reason = SKB_CONSUMED;
}
out:
- return;
+ return reason;
}
static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt)
@@ -1228,20 +1228,21 @@ errout:
rtnl_set_sk_err(net, RTNLGRP_ND_USEROPT, err);
}
-static void ndisc_router_discovery(struct sk_buff *skb)
+static enum skb_drop_reason ndisc_router_discovery(struct sk_buff *skb)
{
struct ra_msg *ra_msg = (struct ra_msg *)skb_transport_header(skb);
+ bool send_ifinfo_notify = false;
struct neighbour *neigh = NULL;
- struct inet6_dev *in6_dev;
+ struct ndisc_options ndopts;
struct fib6_info *rt = NULL;
+ struct inet6_dev *in6_dev;
u32 defrtr_usr_metric;
+ unsigned int pref = 0;
+ __u32 old_if_flags;
struct net *net;
+ SKB_DR(reason);
int lifetime;
- struct ndisc_options ndopts;
int optlen;
- unsigned int pref = 0;
- __u32 old_if_flags;
- bool send_ifinfo_notify = false;
__u8 *opt = (__u8 *)(ra_msg + 1);
@@ -1253,17 +1254,15 @@ static void ndisc_router_discovery(struct sk_buff *skb)
__func__, skb->dev->name);
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn, "RA: source address is not link-local\n");
- return;
- }
- if (optlen < 0) {
- ND_PRINTK(2, warn, "RA: packet too short\n");
- return;
+ return reason;
}
+ if (optlen < 0)
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
if (skb->ndisc_nodetype == NDISC_NODETYPE_HOST) {
ND_PRINTK(2, warn, "RA: from host or unauthorized router\n");
- return;
+ return reason;
}
#endif
@@ -1275,13 +1274,11 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (!in6_dev) {
ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n",
skb->dev->name);
- return;
+ return reason;
}
- if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts)) {
- ND_PRINTK(2, warn, "RA: invalid ND options\n");
- return;
- }
+ if (!ndisc_parse_options(skb->dev, opt, optlen, &ndopts))
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
if (!ipv6_accept_ra(in6_dev)) {
ND_PRINTK(2, info,
@@ -1362,7 +1359,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
"RA: %s got default router without neighbour\n",
__func__);
fib6_info_release(rt);
- return;
+ return reason;
}
}
/* Set default route metric as specified by user */
@@ -1387,7 +1384,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
ND_PRINTK(0, err,
"RA: %s failed to add default route\n",
__func__);
- return;
+ return reason;
}
neigh = ip6_neigh_lookup(&rt->fib6_nh->fib_nh_gw6,
@@ -1398,7 +1395,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
"RA: %s got default router without neighbour\n",
__func__);
fib6_info_release(rt);
- return;
+ return reason;
}
neigh->flags |= NTF_ROUTER;
} else if (rt && IPV6_EXTRACT_PREF(rt->fib6_flags) != pref) {
@@ -1485,6 +1482,7 @@ skip_linkparms:
NEIGH_UPDATE_F_OVERRIDE_ISROUTER|
NEIGH_UPDATE_F_ISROUTER,
NDISC_ROUTER_ADVERTISEMENT, &ndopts);
+ reason = SKB_CONSUMED;
}
if (!ipv6_accept_ra(in6_dev)) {
@@ -1595,15 +1593,17 @@ out:
fib6_info_release(rt);
if (neigh)
neigh_release(neigh);
+ return reason;
}
-static void ndisc_redirect_rcv(struct sk_buff *skb)
+static enum skb_drop_reason ndisc_redirect_rcv(struct sk_buff *skb)
{
- u8 *hdr;
- struct ndisc_options ndopts;
struct rd_msg *msg = (struct rd_msg *)skb_transport_header(skb);
u32 ndoptlen = skb_tail_pointer(skb) - (skb_transport_header(skb) +
offsetof(struct rd_msg, opt));
+ struct ndisc_options ndopts;
+ SKB_DR(reason);
+ u8 *hdr;
#ifdef CONFIG_IPV6_NDISC_NODETYPE
switch (skb->ndisc_nodetype) {
@@ -1611,31 +1611,31 @@ static void ndisc_redirect_rcv(struct sk_buff *skb)
case NDISC_NODETYPE_NODEFAULT:
ND_PRINTK(2, warn,
"Redirect: from host or unauthorized router\n");
- return;
+ return reason;
}
#endif
if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) {
ND_PRINTK(2, warn,
"Redirect: source address is not link-local\n");
- return;
+ return reason;
}
if (!ndisc_parse_options(skb->dev, msg->opt, ndoptlen, &ndopts))
- return;
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_OPTIONS;
if (!ndopts.nd_opts_rh) {
ip6_redirect_no_header(skb, dev_net(skb->dev),
skb->dev->ifindex);
- return;
+ return reason;
}
hdr = (u8 *)ndopts.nd_opts_rh;
hdr += 8;
if (!pskb_pull(skb, hdr - skb_transport_header(skb)))
- return;
+ return SKB_DROP_REASON_PKT_TOO_SMALL;
- icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
+ return icmpv6_notify(skb, NDISC_REDIRECT, 0, 0);
}
static void ndisc_fill_redirect_hdr_option(struct sk_buff *skb,
@@ -1781,8 +1781,9 @@ release:
static void pndisc_redo(struct sk_buff *skb)
{
- ndisc_recv_ns(skb);
- kfree_skb(skb);
+ enum skb_drop_reason reason = ndisc_recv_ns(skb);
+
+ kfree_skb_reason(skb, reason);
}
static int ndisc_is_multicast(const void *pkey)
@@ -1804,15 +1805,16 @@ static bool ndisc_suppress_frag_ndisc(struct sk_buff *skb)
return false;
}
-int ndisc_rcv(struct sk_buff *skb)
+enum skb_drop_reason ndisc_rcv(struct sk_buff *skb)
{
struct nd_msg *msg;
+ SKB_DR(reason);
if (ndisc_suppress_frag_ndisc(skb))
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_FRAG;
if (skb_linearize(skb))
- return 0;
+ return SKB_DROP_REASON_NOMEM;
msg = (struct nd_msg *)skb_transport_header(skb);
@@ -1821,39 +1823,39 @@ int ndisc_rcv(struct sk_buff *skb)
if (ipv6_hdr(skb)->hop_limit != 255) {
ND_PRINTK(2, warn, "NDISC: invalid hop-limit: %d\n",
ipv6_hdr(skb)->hop_limit);
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_HOP_LIMIT;
}
if (msg->icmph.icmp6_code != 0) {
ND_PRINTK(2, warn, "NDISC: invalid ICMPv6 code: %d\n",
msg->icmph.icmp6_code);
- return 0;
+ return SKB_DROP_REASON_IPV6_NDISC_BAD_CODE;
}
switch (msg->icmph.icmp6_type) {
case NDISC_NEIGHBOUR_SOLICITATION:
memset(NEIGH_CB(skb), 0, sizeof(struct neighbour_cb));
- ndisc_recv_ns(skb);
+ reason = ndisc_recv_ns(skb);
break;
case NDISC_NEIGHBOUR_ADVERTISEMENT:
- ndisc_recv_na(skb);
+ reason = ndisc_recv_na(skb);
break;
case NDISC_ROUTER_SOLICITATION:
- ndisc_recv_rs(skb);
+ reason = ndisc_recv_rs(skb);
break;
case NDISC_ROUTER_ADVERTISEMENT:
- ndisc_router_discovery(skb);
+ reason = ndisc_router_discovery(skb);
break;
case NDISC_REDIRECT:
- ndisc_redirect_rcv(skb);
+ reason = ndisc_redirect_rcv(skb);
break;
}
- return 0;
+ return reason;
}
static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr)
diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c
index 2d816277f2c5..0ce0ed17c758 100644
--- a/net/ipv6/netfilter/ip6_tables.c
+++ b/net/ipv6/netfilter/ip6_tables.c
@@ -1062,7 +1062,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
struct xt_counters *counters;
struct ip6t_entry *iter;
- ret = 0;
counters = xt_counters_alloc(num_counters);
if (!counters) {
ret = -ENOMEM;
@@ -1108,7 +1107,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks,
net_warn_ratelimited("ip6tables: counters copy to user failed while replacing table\n");
}
vfree(counters);
- return ret;
+ return 0;
put_module:
module_put(t->me);
@@ -1751,6 +1750,10 @@ int ip6t_register_table(struct net *net, const struct xt_table *table,
new_table = xt_register_table(net, table, &bootstrap, newinfo);
if (IS_ERR(new_table)) {
+ struct ip6t_entry *iter;
+
+ xt_entry_foreach(iter, loc_cpu_entry, newinfo->size)
+ cleanup_entry(iter, net);
xt_free_table_info(newinfo);
return PTR_ERR(new_table);
}
diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c
index a01d9b842bd0..67c87a88cde4 100644
--- a/net/ipv6/netfilter/ip6t_rpfilter.c
+++ b/net/ipv6/netfilter/ip6t_rpfilter.c
@@ -72,7 +72,9 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb,
goto out;
}
- if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE))
+ if (rt->rt6i_idev->dev == dev ||
+ l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex ||
+ (flags & XT_RPFILTER_LOOSE))
ret = true;
out:
ip6_rt_put(rt);
diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c
index f61d4f18e1cf..58ccdb08c0fd 100644
--- a/net/ipv6/netfilter/nf_reject_ipv6.c
+++ b/net/ipv6/netfilter/nf_reject_ipv6.c
@@ -345,6 +345,7 @@ void nf_send_reset6(struct net *net, struct sock *sk, struct sk_buff *oldskb,
nf_reject_ip6_tcphdr_put(nskb, oldskb, otcph, otcplen);
nf_ct_attach(nskb, oldskb);
+ nf_ct_set_closing(skb_nfct(oldskb));
#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
/* If we use ip6_local_out for bridged traffic, the MAC source on
diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c
index d6306aa46bb1..e20b3705c2d2 100644
--- a/net/ipv6/proc.c
+++ b/net/ipv6/proc.c
@@ -94,6 +94,7 @@ static const struct snmp_mib snmp6_icmp6_list[] = {
SNMP_MIB_ITEM("Icmp6OutMsgs", ICMP6_MIB_OUTMSGS),
SNMP_MIB_ITEM("Icmp6OutErrors", ICMP6_MIB_OUTERRORS),
SNMP_MIB_ITEM("Icmp6InCsumErrors", ICMP6_MIB_CSUMERRORS),
+ SNMP_MIB_ITEM("Icmp6OutRateLimitHost", ICMP6_MIB_RATELIMITHOST),
SNMP_MIB_SENTINEL
};
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index a06a9f847db5..bac9ba747bde 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -152,7 +152,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr)
saddr = &ipv6_hdr(skb)->saddr;
daddr = saddr + 1;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -338,7 +338,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
struct sock *sk;
int hash;
- hash = nexthdr & (RAW_HTABLE_SIZE - 1);
+ hash = raw_hashfunc(net, nexthdr);
hlist = &raw_v6_hashinfo.ht[hash];
rcu_read_lock();
sk_nulls_for_each(sk, hnode, hlist) {
@@ -355,17 +355,19 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr,
static inline int rawv6_rcv_skb(struct sock *sk, struct sk_buff *skb)
{
+ enum skb_drop_reason reason;
+
if ((raw6_sk(sk)->checksum || rcu_access_pointer(sk->sk_filter)) &&
skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
/* Charge it to the socket. */
skb_dst_drop(skb);
- if (sock_queue_rcv_skb(sk, skb) < 0) {
- kfree_skb(skb);
+ if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
+ kfree_skb_reason(skb, reason);
return NET_RX_DROP;
}
@@ -386,7 +388,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_XFRM_POLICY);
return NET_RX_DROP;
}
@@ -410,7 +412,7 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb)
if (inet->hdrincl) {
if (skb_checksum_complete(skb)) {
atomic_inc(&sk->sk_drops);
- kfree_skb(skb);
+ kfree_skb_reason(skb, SKB_DROP_REASON_SKB_CSUM);
return NET_RX_DROP;
}
}
@@ -505,6 +507,7 @@ csum_copy_err:
static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
struct raw6_sock *rp)
{
+ struct ipv6_txoptions *opt;
struct sk_buff *skb;
int err = 0;
int offset;
@@ -522,6 +525,9 @@ static int rawv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
offset = rp->offset;
total_len = inet_sk(sk)->cork.base.length;
+ opt = inet6_sk(sk)->cork.opt;
+ total_len -= opt ? opt->opt_flen : 0;
+
if (offset >= total_len - 1) {
err = -EINVAL;
ip6_flush_pending_frames(sk);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index e74e0361fd92..0fdb03df2287 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -91,7 +91,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *);
static void ip6_dst_destroy(struct dst_entry *);
static void ip6_dst_ifdown(struct dst_entry *,
struct net_device *dev, int how);
-static int ip6_dst_gc(struct dst_ops *ops);
+static void ip6_dst_gc(struct dst_ops *ops);
static int ip6_pkt_discard(struct sk_buff *skb);
static int ip6_pkt_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -2593,9 +2593,10 @@ INDIRECT_CALLABLE_SCOPE struct rt6_info *ip6_pol_route_output(struct net *net,
return ip6_pol_route(net, table, fl6->flowi6_oif, fl6, skb, flags);
}
-struct dst_entry *ip6_route_output_flags_noref(struct net *net,
- const struct sock *sk,
- struct flowi6 *fl6, int flags)
+static struct dst_entry *ip6_route_output_flags_noref(struct net *net,
+ const struct sock *sk,
+ struct flowi6 *fl6,
+ int flags)
{
bool any_src;
@@ -2624,7 +2625,6 @@ struct dst_entry *ip6_route_output_flags_noref(struct net *net,
return fib6_rule_lookup(net, fl6, NULL, flags, ip6_pol_route_output);
}
-EXPORT_SYMBOL_GPL(ip6_route_output_flags_noref);
struct dst_entry *ip6_route_output_flags(struct net *net,
const struct sock *sk,
@@ -3284,23 +3284,17 @@ out:
return dst;
}
-static int ip6_dst_gc(struct dst_ops *ops)
+static void ip6_dst_gc(struct dst_ops *ops)
{
struct net *net = container_of(ops, struct net, ipv6.ip6_dst_ops);
int rt_min_interval = net->ipv6.sysctl.ip6_rt_gc_min_interval;
- int rt_max_size = net->ipv6.sysctl.ip6_rt_max_size;
int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity;
int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout;
unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc;
unsigned int val;
int entries;
- entries = dst_entries_get_fast(ops);
- if (entries > rt_max_size)
- entries = dst_entries_get_slow(ops);
-
- if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
- entries <= rt_max_size)
+ if (time_after(rt_last_gc + rt_min_interval, jiffies))
goto out;
fib6_run_gc(atomic_inc_return(&net->ipv6.ip6_rt_gc_expire), net, true);
@@ -3310,7 +3304,6 @@ static int ip6_dst_gc(struct dst_ops *ops)
out:
val = atomic_read(&net->ipv6.ip6_rt_gc_expire);
atomic_set(&net->ipv6.ip6_rt_gc_expire, val - (val >> rt_elasticity));
- return entries > rt_max_size;
}
static int ip6_nh_lookup_table(struct net *net, struct fib6_config *cfg,
@@ -5540,16 +5533,17 @@ static size_t rt6_nlmsg_size(struct fib6_info *f6i)
nexthop_for_each_fib6_nh(f6i->nh, rt6_nh_nlmsg_size,
&nexthop_len);
} else {
+ struct fib6_info *sibling, *next_sibling;
struct fib6_nh *nh = f6i->fib6_nh;
nexthop_len = 0;
if (f6i->fib6_nsiblings) {
- nexthop_len = nla_total_size(0) /* RTA_MULTIPATH */
- + NLA_ALIGN(sizeof(struct rtnexthop))
- + nla_total_size(16) /* RTA_GATEWAY */
- + lwtunnel_get_encap_size(nh->fib_nh_lws);
+ rt6_nh_nlmsg_size(nh, &nexthop_len);
- nexthop_len *= f6i->fib6_nsiblings;
+ list_for_each_entry_safe(sibling, next_sibling,
+ &f6i->fib6_siblings, fib6_siblings) {
+ rt6_nh_nlmsg_size(sibling->fib6_nh, &nexthop_len);
+ }
}
nexthop_len += lwtunnel_get_encap_size(nh->fib_nh_lws);
}
@@ -6512,7 +6506,7 @@ static int __net_init ip6_route_net_init(struct net *net)
#endif
net->ipv6.sysctl.flush_delay = 0;
- net->ipv6.sysctl.ip6_rt_max_size = 4096;
+ net->ipv6.sysctl.ip6_rt_max_size = INT_MAX;
net->ipv6.sysctl.ip6_rt_gc_min_interval = HZ / 2;
net->ipv6.sysctl.ip6_rt_gc_timeout = 60*HZ;
net->ipv6.sysctl.ip6_rt_gc_interval = 30*HZ;
diff --git a/net/ipv6/rpl_iptunnel.c b/net/ipv6/rpl_iptunnel.c
index ff691d9f4a04..b1c028df686e 100644
--- a/net/ipv6/rpl_iptunnel.c
+++ b/net/ipv6/rpl_iptunnel.c
@@ -13,7 +13,7 @@
#include <net/rpl.h>
struct rpl_iptunnel_encap {
- struct ipv6_rpl_sr_hdr srh[0];
+ DECLARE_FLEX_ARRAY(struct ipv6_rpl_sr_hdr, srh);
};
struct rpl_lwt {
diff --git a/net/ipv6/seg6_local.c b/net/ipv6/seg6_local.c
index 487f8e98deaa..dd433cc265c8 100644
--- a/net/ipv6/seg6_local.c
+++ b/net/ipv6/seg6_local.c
@@ -109,8 +109,15 @@ struct bpf_lwt_prog {
#define next_csid_chk_lcnode_fn_bits(flen) \
next_csid_chk_lcblock_bits(flen)
+#define SEG6_F_LOCAL_FLV_OP(flvname) BIT(SEG6_LOCAL_FLV_OP_##flvname)
+#define SEG6_F_LOCAL_FLV_PSP SEG6_F_LOCAL_FLV_OP(PSP)
+
+/* Supported RFC8986 Flavor operations are reported in this bitmask */
+#define SEG6_LOCAL_FLV8986_SUPP_OPS SEG6_F_LOCAL_FLV_PSP
+
/* Supported Flavor operations are reported in this bitmask */
-#define SEG6_LOCAL_FLV_SUPP_OPS (BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID))
+#define SEG6_LOCAL_FLV_SUPP_OPS (SEG6_F_LOCAL_FLV_OP(NEXT_CSID) | \
+ SEG6_LOCAL_FLV8986_SUPP_OPS)
struct seg6_flavors_info {
/* Flavor operations */
@@ -364,6 +371,14 @@ static void seg6_next_csid_advance_arg(struct in6_addr *addr,
memset(&addr->s6_addr[16 - fnc_octects], 0x00, fnc_octects);
}
+static int input_action_end_finish(struct sk_buff *skb,
+ struct seg6_local_lwt *slwt)
+{
+ seg6_lookup_nexthop(skb, NULL, 0);
+
+ return dst_input(skb);
+}
+
static int input_action_end_core(struct sk_buff *skb,
struct seg6_local_lwt *slwt)
{
@@ -375,9 +390,7 @@ static int input_action_end_core(struct sk_buff *skb,
advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
- seg6_lookup_nexthop(skb, NULL, 0);
-
- return dst_input(skb);
+ return input_action_end_finish(skb, slwt);
drop:
kfree_skb(skb);
@@ -395,9 +408,7 @@ static int end_next_csid_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
/* update DA */
seg6_next_csid_advance_arg(daddr, finfo);
- seg6_lookup_nexthop(skb, NULL, 0);
-
- return dst_input(skb);
+ return input_action_end_finish(skb, slwt);
}
static bool seg6_next_csid_enabled(__u32 fops)
@@ -405,15 +416,331 @@ static bool seg6_next_csid_enabled(__u32 fops)
return fops & BIT(SEG6_LOCAL_FLV_OP_NEXT_CSID);
}
+/* We describe the packet state in relation to the absence/presence of the SRH
+ * and the Segment Left (SL) field.
+ * For our purposes, it is not necessary to record the exact value of the SL
+ * when the SID List consists of two or more segments.
+ */
+enum seg6_local_pktinfo {
+ /* the order really matters! */
+ SEG6_LOCAL_PKTINFO_NOHDR = 0,
+ SEG6_LOCAL_PKTINFO_SL_ZERO,
+ SEG6_LOCAL_PKTINFO_SL_ONE,
+ SEG6_LOCAL_PKTINFO_SL_MORE,
+ __SEG6_LOCAL_PKTINFO_MAX,
+};
+
+#define SEG6_LOCAL_PKTINFO_MAX (__SEG6_LOCAL_PKTINFO_MAX - 1)
+
+static enum seg6_local_pktinfo seg6_get_srh_pktinfo(struct ipv6_sr_hdr *srh)
+{
+ __u8 sgl;
+
+ if (!srh)
+ return SEG6_LOCAL_PKTINFO_NOHDR;
+
+ sgl = srh->segments_left;
+ if (sgl < 2)
+ return SEG6_LOCAL_PKTINFO_SL_ZERO + sgl;
+
+ return SEG6_LOCAL_PKTINFO_SL_MORE;
+}
+
+enum seg6_local_flv_action {
+ SEG6_LOCAL_FLV_ACT_UNSPEC = 0,
+ SEG6_LOCAL_FLV_ACT_END,
+ SEG6_LOCAL_FLV_ACT_PSP,
+ SEG6_LOCAL_FLV_ACT_USP,
+ SEG6_LOCAL_FLV_ACT_USD,
+ __SEG6_LOCAL_FLV_ACT_MAX
+};
+
+#define SEG6_LOCAL_FLV_ACT_MAX (__SEG6_LOCAL_FLV_ACT_MAX - 1)
+
+/* The action table for RFC8986 flavors (see the flv8986_act_tbl below)
+ * contains the actions (i.e. processing operations) to be applied on packets
+ * when flavors are configured for an End* behavior.
+ * By combining the pkinfo data and from the flavors mask, the macro
+ * computes the index used to access the elements (actions) stored in the
+ * action table. The index is structured as follows:
+ *
+ * index
+ * _______________/\________________
+ * / \
+ * +----------------+----------------+
+ * | pf | afm |
+ * +----------------+----------------+
+ * ph-1 ... p1 p0 fk-1 ... f1 f0
+ * MSB LSB
+ *
+ * where:
+ * - 'afm' (adjusted flavor mask) is the mask containing a combination of the
+ * RFC8986 flavors currently supported. 'afm' corresponds to the @fm
+ * argument of the macro whose value is righ-shifted by 1 bit. By doing so,
+ * we discard the SEG6_LOCAL_FLV_OP_UNSPEC flag (bit 0 in @fm) which is
+ * never used here;
+ * - 'pf' encodes the packet info (pktinfo) regarding the presence/absence of
+ * the SRH, SL = 0, etc. 'pf' is set with the value of @pf provided as
+ * argument to the macro.
+ */
+#define flv8986_act_tbl_idx(pf, fm) \
+ ((((pf) << bits_per(SEG6_LOCAL_FLV8986_SUPP_OPS)) | \
+ ((fm) & SEG6_LOCAL_FLV8986_SUPP_OPS)) >> SEG6_LOCAL_FLV_OP_PSP)
+
+/* We compute the size of the action table by considering the RFC8986 flavors
+ * actually supported by the kernel. In this way, the size is automatically
+ * adjusted when new flavors are supported.
+ */
+#define FLV8986_ACT_TBL_SIZE \
+ roundup_pow_of_two(flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_MAX, \
+ SEG6_LOCAL_FLV8986_SUPP_OPS))
+
+/* tbl_cfg(act, pf, fm) macro is used to easily configure the action
+ * table; it accepts 3 arguments:
+ * i) @act, the suffix from SEG6_LOCAL_FLV_ACT_{act} representing
+ * the action that should be applied on the packet;
+ * ii) @pf, the suffix from SEG6_LOCAL_PKTINFO_{pf} reporting the packet
+ * info about the lack/presence of SRH, SRH with SL = 0, etc;
+ * iii) @fm, the mask of flavors.
+ */
+#define tbl_cfg(act, pf, fm) \
+ [flv8986_act_tbl_idx(SEG6_LOCAL_PKTINFO_##pf, \
+ (fm))] = SEG6_LOCAL_FLV_ACT_##act
+
+/* shorthand for improving readability */
+#define F_PSP SEG6_F_LOCAL_FLV_PSP
+
+/* The table contains, for each combination of the pktinfo data and
+ * flavors, the action that should be taken on a packet (e.g.
+ * "standard" Endpoint processing, Penultimate Segment Pop, etc).
+ *
+ * By default, table entries not explicitly configured are initialized with the
+ * SEG6_LOCAL_FLV_ACT_UNSPEC action, which generally has the effect of
+ * discarding the processed packet.
+ */
+static const u8 flv8986_act_tbl[FLV8986_ACT_TBL_SIZE] = {
+ /* PSP variant for packet where SRH with SL = 1 */
+ tbl_cfg(PSP, SL_ONE, F_PSP),
+ /* End for packet where the SRH with SL > 1*/
+ tbl_cfg(END, SL_MORE, F_PSP),
+};
+
+#undef F_PSP
+#undef tbl_cfg
+
+/* For each flavor defined in RFC8986 (or a combination of them) an action is
+ * performed on the packet. The specific action depends on:
+ * - info extracted from the packet (i.e. pktinfo data) regarding the
+ * lack/presence of the SRH, and if the SRH is available, on the value of
+ * Segment Left field;
+ * - the mask of flavors configured for the specific SRv6 End* behavior.
+ *
+ * The function combines both the pkinfo and the flavors mask to evaluate the
+ * corresponding action to be taken on the packet.
+ */
+static enum seg6_local_flv_action
+seg6_local_flv8986_act_lookup(enum seg6_local_pktinfo pinfo, __u32 flvmask)
+{
+ unsigned long index;
+
+ /* check if the provided mask of flavors is supported */
+ if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS))
+ return SEG6_LOCAL_FLV_ACT_UNSPEC;
+
+ index = flv8986_act_tbl_idx(pinfo, flvmask);
+ if (unlikely(index >= FLV8986_ACT_TBL_SIZE))
+ return SEG6_LOCAL_FLV_ACT_UNSPEC;
+
+ return flv8986_act_tbl[index];
+}
+
+/* skb->data must be aligned with skb->network_header */
+static bool seg6_pop_srh(struct sk_buff *skb, int srhoff)
+{
+ struct ipv6_sr_hdr *srh;
+ struct ipv6hdr *iph;
+ __u8 srh_nexthdr;
+ int thoff = -1;
+ int srhlen;
+ int nhlen;
+
+ if (unlikely(srhoff < sizeof(*iph) ||
+ !pskb_may_pull(skb, srhoff + sizeof(*srh))))
+ return false;
+
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srhlen = ipv6_optlen(srh);
+
+ /* we are about to mangle the pkt, let's check if we can write on it */
+ if (unlikely(skb_ensure_writable(skb, srhoff + srhlen)))
+ return false;
+
+ /* skb_ensure_writable() may change skb pointers; evaluate srh again */
+ srh = (struct ipv6_sr_hdr *)(skb->data + srhoff);
+ srh_nexthdr = srh->nexthdr;
+
+ if (unlikely(!skb_transport_header_was_set(skb)))
+ goto pull;
+
+ nhlen = skb_network_header_len(skb);
+ /* we have to deal with the transport header: it could be set before
+ * the SRH, after the SRH, or within it (which is considered wrong,
+ * however).
+ */
+ if (likely(nhlen <= srhoff))
+ thoff = nhlen;
+ else if (nhlen >= srhoff + srhlen)
+ /* transport_header is set after the SRH */
+ thoff = nhlen - srhlen;
+ else
+ /* transport_header falls inside the SRH; hence, we can't
+ * restore the transport_header pointer properly after
+ * SRH removing operation.
+ */
+ return false;
+pull:
+ /* we need to pop the SRH:
+ * 1) first of all, we pull out everything from IPv6 header up to SRH
+ * (included) evaluating also the rcsum;
+ * 2) we overwrite (and then remove) the SRH by properly moving the
+ * IPv6 along with any extension header that precedes the SRH;
+ * 3) At the end, we push back the pulled headers (except for SRH,
+ * obviously).
+ */
+ skb_pull_rcsum(skb, srhoff + srhlen);
+ memmove(skb_network_header(skb) + srhlen, skb_network_header(skb),
+ srhoff);
+ skb_push(skb, srhoff);
+
+ skb_reset_network_header(skb);
+ skb_mac_header_rebuild(skb);
+ if (likely(thoff >= 0))
+ skb_set_transport_header(skb, thoff);
+
+ iph = ipv6_hdr(skb);
+ if (iph->nexthdr == NEXTHDR_ROUTING) {
+ iph->nexthdr = srh_nexthdr;
+ } else {
+ /* we must look for the extension header (EXTH, for short) that
+ * immediately precedes the SRH we have just removed.
+ * Then, we update the value of the EXTH nexthdr with the one
+ * contained in the SRH nexthdr.
+ */
+ unsigned int off = sizeof(*iph);
+ struct ipv6_opt_hdr *hp, _hdr;
+ __u8 nexthdr = iph->nexthdr;
+
+ for (;;) {
+ if (unlikely(!ipv6_ext_hdr(nexthdr) ||
+ nexthdr == NEXTHDR_NONE))
+ return false;
+
+ hp = skb_header_pointer(skb, off, sizeof(_hdr), &_hdr);
+ if (unlikely(!hp))
+ return false;
+
+ if (hp->nexthdr == NEXTHDR_ROUTING) {
+ hp->nexthdr = srh_nexthdr;
+ break;
+ }
+
+ switch (nexthdr) {
+ case NEXTHDR_FRAGMENT:
+ fallthrough;
+ case NEXTHDR_AUTH:
+ /* we expect SRH before FRAG and AUTH */
+ return false;
+ default:
+ off += ipv6_optlen(hp);
+ break;
+ }
+
+ nexthdr = hp->nexthdr;
+ }
+ }
+
+ iph->payload_len = htons(skb->len - sizeof(struct ipv6hdr));
+
+ skb_postpush_rcsum(skb, iph, srhoff);
+
+ return true;
+}
+
+/* process the packet on the basis of the RFC8986 flavors set for the given
+ * SRv6 End behavior instance.
+ */
+static int end_flv8986_core(struct sk_buff *skb, struct seg6_local_lwt *slwt)
+{
+ const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ enum seg6_local_flv_action action;
+ enum seg6_local_pktinfo pinfo;
+ struct ipv6_sr_hdr *srh;
+ __u32 flvmask;
+ int srhoff;
+
+ srh = seg6_get_srh(skb, 0);
+ srhoff = srh ? ((unsigned char *)srh - skb->data) : 0;
+ pinfo = seg6_get_srh_pktinfo(srh);
+#ifdef CONFIG_IPV6_SEG6_HMAC
+ if (srh && !seg6_hmac_validate_skb(skb))
+ goto drop;
+#endif
+ flvmask = finfo->flv_ops;
+ if (unlikely(flvmask & ~SEG6_LOCAL_FLV8986_SUPP_OPS)) {
+ pr_warn_once("seg6local: invalid RFC8986 flavors\n");
+ goto drop;
+ }
+
+ /* retrieve the action triggered by the combination of pktinfo data and
+ * the flavors mask.
+ */
+ action = seg6_local_flv8986_act_lookup(pinfo, flvmask);
+ switch (action) {
+ case SEG6_LOCAL_FLV_ACT_END:
+ /* process the packet as the "standard" End behavior */
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+ break;
+ case SEG6_LOCAL_FLV_ACT_PSP:
+ advance_nextseg(srh, &ipv6_hdr(skb)->daddr);
+
+ if (unlikely(!seg6_pop_srh(skb, srhoff)))
+ goto drop;
+ break;
+ case SEG6_LOCAL_FLV_ACT_UNSPEC:
+ fallthrough;
+ default:
+ /* by default, we drop the packet since we could not find a
+ * suitable action.
+ */
+ goto drop;
+ }
+
+ return input_action_end_finish(skb, slwt);
+
+drop:
+ kfree_skb(skb);
+ return -EINVAL;
+}
+
/* regular endpoint function */
static int input_action_end(struct sk_buff *skb, struct seg6_local_lwt *slwt)
{
const struct seg6_flavors_info *finfo = &slwt->flv_info;
+ __u32 fops = finfo->flv_ops;
- if (seg6_next_csid_enabled(finfo->flv_ops))
+ if (!fops)
+ return input_action_end_core(skb, slwt);
+
+ /* check for the presence of NEXT-C-SID since it applies first */
+ if (seg6_next_csid_enabled(fops))
return end_next_csid_core(skb, slwt);
- return input_action_end_core(skb, slwt);
+ /* the specific processing function to be performed on the packet
+ * depends on the combination of flavors defined in RFC8986 and some
+ * information extracted from the packet, e.g. presence/absence of SRH,
+ * Segment Left = 0, etc.
+ */
+ return end_flv8986_core(skb, slwt);
}
/* regular endpoint, and forward to specified nexthop */
@@ -2300,6 +2627,13 @@ int __init seg6_local_init(void)
BUILD_BUG_ON(next_csid_chk_lcblock_bits(SEG6_LOCAL_LCBLOCK_DBITS));
BUILD_BUG_ON(next_csid_chk_lcnode_fn_bits(SEG6_LOCAL_LCNODE_FN_DBITS));
+ /* To be memory efficient, we use 'u8' to represent the different
+ * actions related to RFC8986 flavors. If the kernel build stops here,
+ * it means that it is not possible to correctly encode these actions
+ * with the data type chosen for the action table.
+ */
+ BUILD_BUG_ON(SEG6_LOCAL_FLV_ACT_MAX > (typeof(flv8986_act_tbl[0]))~0U);
+
return lwtunnel_encap_add_ops(&seg6_local_ops,
LWTUNNEL_ENCAP_SEG6_LOCAL);
}
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 11b736a76bd7..1bf93b61aa06 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -272,6 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
fl6.flowi6_proto = IPPROTO_TCP;
fl6.daddr = sk->sk_v6_daddr;
fl6.saddr = saddr ? *saddr : np->saddr;
+ fl6.flowlabel = ip6_make_flowinfo(np->tclass, np->flow_label);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
fl6.fl6_dport = usin->sin6_port;
@@ -1387,14 +1388,11 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *
/* Clone pktoptions received with SYN, if we own the req */
if (ireq->pktopts) {
- newnp->pktoptions = skb_clone(ireq->pktopts,
- sk_gfp_mask(sk, GFP_ATOMIC));
+ newnp->pktoptions = skb_clone_and_charge_r(ireq->pktopts, newsk);
consume_skb(ireq->pktopts);
ireq->pktopts = NULL;
- if (newnp->pktoptions) {
+ if (newnp->pktoptions)
tcp_v6_restore_cb(newnp->pktoptions);
- skb_set_owner_r(newnp->pktoptions, newsk);
- }
}
} else {
if (!req_unhash && found_dup_sk) {
@@ -1466,7 +1464,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
--ANK (980728)
*/
if (np->rxopt.all)
- opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
+ opt_skb = skb_clone_and_charge_r(skb, sk);
reason = SKB_DROP_REASON_NOT_SPECIFIED;
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1552,7 +1550,6 @@ ipv6_pktoptions:
if (np->repflow)
np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
- skb_set_owner_r(opt_skb, sk);
tcp_v6_restore_cb(opt_skb);
opt_skb = xchg(&np->pktoptions, opt_skb);
} else {
@@ -1708,8 +1705,9 @@ process:
if (static_branch_unlikely(&ip6_min_hopcount)) {
/* min_hopcount can be changed concurrently from do_ipv6_setsockopt() */
- if (hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount)) {
+ if (unlikely(hdr->hop_limit < READ_ONCE(tcp_inet6_sk(sk)->min_hopcount))) {
__NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
+ drop_reason = SKB_DROP_REASON_TCP_MINTTL;
goto discard_and_relse;
}
}
diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c
index 890a2423f559..cfe828bd7fc6 100644
--- a/net/kcm/kcmsock.c
+++ b/net/kcm/kcmsock.c
@@ -28,6 +28,7 @@
#include <net/netns/generic.h>
#include <net/sock.h>
#include <uapi/linux/kcm.h>
+#include <trace/events/sock.h>
unsigned int kcm_net_id;
@@ -349,6 +350,8 @@ static void psock_data_ready(struct sock *sk)
{
struct kcm_psock *psock;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
psock = (struct kcm_psock *)sk->sk_user_data;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 2bdbcec781cd..a815f5ab4c49 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -1261,7 +1261,7 @@ static struct xfrm_state * pfkey_msg2xfrm_state(struct net *net,
const struct sadb_x_nat_t_type* n_type;
struct xfrm_encap_tmpl *natt;
- x->encap = kmalloc(sizeof(*x->encap), GFP_KERNEL);
+ x->encap = kzalloc(sizeof(*x->encap), GFP_KERNEL);
if (!x->encap) {
err = -ENOMEM;
goto out;
diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c
index 9a1415fe3fa7..03608d3ded4b 100644
--- a/net/l2tp/l2tp_core.c
+++ b/net/l2tp/l2tp_core.c
@@ -104,9 +104,9 @@ static struct workqueue_struct *l2tp_wq;
/* per-net private data for this module */
static unsigned int l2tp_net_id;
struct l2tp_net {
- struct list_head l2tp_tunnel_list;
- /* Lock for write access to l2tp_tunnel_list */
- spinlock_t l2tp_tunnel_list_lock;
+ /* Lock for write access to l2tp_tunnel_idr */
+ spinlock_t l2tp_tunnel_idr_lock;
+ struct idr l2tp_tunnel_idr;
struct hlist_head l2tp_session_hlist[L2TP_HASH_SIZE_2];
/* Lock for write access to l2tp_session_hlist */
spinlock_t l2tp_session_hlist_lock;
@@ -208,13 +208,10 @@ struct l2tp_tunnel *l2tp_tunnel_get(const struct net *net, u32 tunnel_id)
struct l2tp_tunnel *tunnel;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (tunnel->tunnel_id == tunnel_id &&
- refcount_inc_not_zero(&tunnel->ref_count)) {
- rcu_read_unlock_bh();
-
- return tunnel;
- }
+ tunnel = idr_find(&pn->l2tp_tunnel_idr, tunnel_id);
+ if (tunnel && refcount_inc_not_zero(&tunnel->ref_count)) {
+ rcu_read_unlock_bh();
+ return tunnel;
}
rcu_read_unlock_bh();
@@ -224,13 +221,14 @@ EXPORT_SYMBOL_GPL(l2tp_tunnel_get);
struct l2tp_tunnel *l2tp_tunnel_get_nth(const struct net *net, int nth)
{
- const struct l2tp_net *pn = l2tp_pernet(net);
+ struct l2tp_net *pn = l2tp_pernet(net);
+ unsigned long tunnel_id, tmp;
struct l2tp_tunnel *tunnel;
int count = 0;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- if (++count > nth &&
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel && ++count > nth &&
refcount_inc_not_zero(&tunnel->ref_count)) {
rcu_read_unlock_bh();
return tunnel;
@@ -1043,7 +1041,7 @@ static int l2tp_xmit_core(struct l2tp_session *session, struct sk_buff *skb, uns
IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED);
nf_reset_ct(skb);
- bh_lock_sock(sk);
+ bh_lock_sock_nested(sk);
if (sock_owned_by_user(sk)) {
kfree_skb(skb);
ret = NET_XMIT_DROP;
@@ -1227,6 +1225,15 @@ static void l2tp_udp_encap_destroy(struct sock *sk)
l2tp_tunnel_delete(tunnel);
}
+static void l2tp_tunnel_remove(struct net *net, struct l2tp_tunnel *tunnel)
+{
+ struct l2tp_net *pn = l2tp_pernet(net);
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_remove(&pn->l2tp_tunnel_idr, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+}
+
/* Workqueue tunnel deletion function */
static void l2tp_tunnel_del_work(struct work_struct *work)
{
@@ -1234,7 +1241,6 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
del_work);
struct sock *sk = tunnel->sock;
struct socket *sock = sk->sk_socket;
- struct l2tp_net *pn;
l2tp_tunnel_closeall(tunnel);
@@ -1248,12 +1254,7 @@ static void l2tp_tunnel_del_work(struct work_struct *work)
}
}
- /* Remove the tunnel struct from the tunnel list */
- pn = l2tp_pernet(tunnel->l2tp_net);
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_del_rcu(&tunnel->list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
+ l2tp_tunnel_remove(tunnel->l2tp_net, tunnel);
/* drop initial ref */
l2tp_tunnel_dec_refcount(tunnel);
@@ -1384,8 +1385,6 @@ out:
return err;
}
-static struct lock_class_key l2tp_socket_class;
-
int l2tp_tunnel_create(int fd, int version, u32 tunnel_id, u32 peer_tunnel_id,
struct l2tp_tunnel_cfg *cfg, struct l2tp_tunnel **tunnelp)
{
@@ -1455,12 +1454,19 @@ static int l2tp_validate_socket(const struct sock *sk, const struct net *net,
int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
struct l2tp_tunnel_cfg *cfg)
{
- struct l2tp_tunnel *tunnel_walk;
- struct l2tp_net *pn;
+ struct l2tp_net *pn = l2tp_pernet(net);
+ u32 tunnel_id = tunnel->tunnel_id;
struct socket *sock;
struct sock *sk;
int ret;
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ ret = idr_alloc_u32(&pn->l2tp_tunnel_idr, NULL, &tunnel_id, tunnel_id,
+ GFP_ATOMIC);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
+ if (ret)
+ return ret == -ENOSPC ? -EEXIST : ret;
+
if (tunnel->fd < 0) {
ret = l2tp_tunnel_sock_create(net, tunnel->tunnel_id,
tunnel->peer_tunnel_id, cfg,
@@ -1474,6 +1480,7 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
}
sk = sock->sk;
+ lock_sock(sk);
write_lock_bh(&sk->sk_callback_lock);
ret = l2tp_validate_socket(sk, net, tunnel->encap);
if (ret < 0)
@@ -1481,24 +1488,6 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
rcu_assign_sk_user_data(sk, tunnel);
write_unlock_bh(&sk->sk_callback_lock);
- tunnel->l2tp_net = net;
- pn = l2tp_pernet(net);
-
- sock_hold(sk);
- tunnel->sock = sk;
-
- spin_lock_bh(&pn->l2tp_tunnel_list_lock);
- list_for_each_entry(tunnel_walk, &pn->l2tp_tunnel_list, list) {
- if (tunnel_walk->tunnel_id == tunnel->tunnel_id) {
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
- sock_put(sk);
- ret = -EEXIST;
- goto err_sock;
- }
- }
- list_add_rcu(&tunnel->list, &pn->l2tp_tunnel_list);
- spin_unlock_bh(&pn->l2tp_tunnel_list_lock);
-
if (tunnel->encap == L2TP_ENCAPTYPE_UDP) {
struct udp_tunnel_sock_cfg udp_cfg = {
.sk_user_data = tunnel,
@@ -1512,9 +1501,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
tunnel->old_sk_destruct = sk->sk_destruct;
sk->sk_destruct = &l2tp_tunnel_destruct;
- lockdep_set_class_and_name(&sk->sk_lock.slock, &l2tp_socket_class,
- "l2tp_sock");
sk->sk_allocation = GFP_ATOMIC;
+ release_sock(sk);
+
+ sock_hold(sk);
+ tunnel->sock = sk;
+ tunnel->l2tp_net = net;
+
+ spin_lock_bh(&pn->l2tp_tunnel_idr_lock);
+ idr_replace(&pn->l2tp_tunnel_idr, tunnel, tunnel->tunnel_id);
+ spin_unlock_bh(&pn->l2tp_tunnel_idr_lock);
trace_register_tunnel(tunnel);
@@ -1523,17 +1519,16 @@ int l2tp_tunnel_register(struct l2tp_tunnel *tunnel, struct net *net,
return 0;
-err_sock:
- write_lock_bh(&sk->sk_callback_lock);
- rcu_assign_sk_user_data(sk, NULL);
err_inval_sock:
write_unlock_bh(&sk->sk_callback_lock);
+ release_sock(sk);
if (tunnel->fd < 0)
sock_release(sock);
else
sockfd_put(sock);
err:
+ l2tp_tunnel_remove(net, tunnel);
return ret;
}
EXPORT_SYMBOL_GPL(l2tp_tunnel_register);
@@ -1647,8 +1642,8 @@ static __net_init int l2tp_init_net(struct net *net)
struct l2tp_net *pn = net_generic(net, l2tp_net_id);
int hash;
- INIT_LIST_HEAD(&pn->l2tp_tunnel_list);
- spin_lock_init(&pn->l2tp_tunnel_list_lock);
+ idr_init(&pn->l2tp_tunnel_idr);
+ spin_lock_init(&pn->l2tp_tunnel_idr_lock);
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
INIT_HLIST_HEAD(&pn->l2tp_session_hlist[hash]);
@@ -1662,11 +1657,13 @@ static __net_exit void l2tp_exit_net(struct net *net)
{
struct l2tp_net *pn = l2tp_pernet(net);
struct l2tp_tunnel *tunnel = NULL;
+ unsigned long tunnel_id, tmp;
int hash;
rcu_read_lock_bh();
- list_for_each_entry_rcu(tunnel, &pn->l2tp_tunnel_list, list) {
- l2tp_tunnel_delete(tunnel);
+ idr_for_each_entry_ul(&pn->l2tp_tunnel_idr, tunnel, tmp, tunnel_id) {
+ if (tunnel)
+ l2tp_tunnel_delete(tunnel);
}
rcu_read_unlock_bh();
@@ -1676,6 +1673,7 @@ static __net_exit void l2tp_exit_net(struct net *net)
for (hash = 0; hash < L2TP_HASH_SIZE_2; hash++)
WARN_ON_ONCE(!hlist_empty(&pn->l2tp_session_hlist[hash]));
+ idr_destroy(&pn->l2tp_tunnel_idr);
}
static struct pernet_operations l2tp_net_ops = {
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index db2e584c625e..f011af6601c9 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -650,54 +650,22 @@ static int pppol2tp_tunnel_mtu(const struct l2tp_tunnel *tunnel)
return mtu - PPPOL2TP_HEADER_OVERHEAD;
}
-/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
- */
-static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
- int sockaddr_len, int flags)
+static struct l2tp_tunnel *pppol2tp_tunnel_get(struct net *net,
+ const struct l2tp_connect_info *info,
+ bool *new_tunnel)
{
- struct sock *sk = sock->sk;
- struct pppox_sock *po = pppox_sk(sk);
- struct l2tp_session *session = NULL;
- struct l2tp_connect_info info;
struct l2tp_tunnel *tunnel;
- struct pppol2tp_session *ps;
- struct l2tp_session_cfg cfg = { 0, };
- bool drop_refcnt = false;
- bool drop_tunnel = false;
- bool new_session = false;
- bool new_tunnel = false;
int error;
- error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
- if (error < 0)
- return error;
+ *new_tunnel = false;
- lock_sock(sk);
-
- /* Check for already bound sockets */
- error = -EBUSY;
- if (sk->sk_state & PPPOX_CONNECTED)
- goto end;
-
- /* We don't supporting rebinding anyway */
- error = -EALREADY;
- if (sk->sk_user_data)
- goto end; /* socket is already attached */
-
- /* Don't bind if tunnel_id is 0 */
- error = -EINVAL;
- if (!info.tunnel_id)
- goto end;
-
- tunnel = l2tp_tunnel_get(sock_net(sk), info.tunnel_id);
- if (tunnel)
- drop_tunnel = true;
+ tunnel = l2tp_tunnel_get(net, info->tunnel_id);
/* Special case: create tunnel context if session_id and
* peer_session_id is 0. Otherwise look up tunnel using supplied
* tunnel id.
*/
- if (!info.session_id && !info.peer_session_id) {
+ if (!info->session_id && !info->peer_session_id) {
if (!tunnel) {
struct l2tp_tunnel_cfg tcfg = {
.encap = L2TP_ENCAPTYPE_UDP,
@@ -706,40 +674,82 @@ static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
/* Prevent l2tp_tunnel_register() from trying to set up
* a kernel socket.
*/
- if (info.fd < 0) {
- error = -EBADF;
- goto end;
- }
+ if (info->fd < 0)
+ return ERR_PTR(-EBADF);
- error = l2tp_tunnel_create(info.fd,
- info.version,
- info.tunnel_id,
- info.peer_tunnel_id, &tcfg,
+ error = l2tp_tunnel_create(info->fd,
+ info->version,
+ info->tunnel_id,
+ info->peer_tunnel_id, &tcfg,
&tunnel);
if (error < 0)
- goto end;
+ return ERR_PTR(error);
l2tp_tunnel_inc_refcount(tunnel);
- error = l2tp_tunnel_register(tunnel, sock_net(sk),
- &tcfg);
+ error = l2tp_tunnel_register(tunnel, net, &tcfg);
if (error < 0) {
kfree(tunnel);
- goto end;
+ return ERR_PTR(error);
}
- drop_tunnel = true;
- new_tunnel = true;
+
+ *new_tunnel = true;
}
} else {
/* Error if we can't find the tunnel */
- error = -ENOENT;
if (!tunnel)
- goto end;
+ return ERR_PTR(-ENOENT);
/* Error if socket is not prepped */
- if (!tunnel->sock)
- goto end;
+ if (!tunnel->sock) {
+ l2tp_tunnel_dec_refcount(tunnel);
+ return ERR_PTR(-ENOENT);
+ }
}
+ return tunnel;
+}
+
+/* connect() handler. Attach a PPPoX socket to a tunnel UDP socket
+ */
+static int pppol2tp_connect(struct socket *sock, struct sockaddr *uservaddr,
+ int sockaddr_len, int flags)
+{
+ struct sock *sk = sock->sk;
+ struct pppox_sock *po = pppox_sk(sk);
+ struct l2tp_session *session = NULL;
+ struct l2tp_connect_info info;
+ struct l2tp_tunnel *tunnel;
+ struct pppol2tp_session *ps;
+ struct l2tp_session_cfg cfg = { 0, };
+ bool drop_refcnt = false;
+ bool new_session = false;
+ bool new_tunnel = false;
+ int error;
+
+ error = pppol2tp_sockaddr_get_info(uservaddr, sockaddr_len, &info);
+ if (error < 0)
+ return error;
+
+ /* Don't bind if tunnel_id is 0 */
+ if (!info.tunnel_id)
+ return -EINVAL;
+
+ tunnel = pppol2tp_tunnel_get(sock_net(sk), &info, &new_tunnel);
+ if (IS_ERR(tunnel))
+ return PTR_ERR(tunnel);
+
+ lock_sock(sk);
+
+ /* Check for already bound sockets */
+ error = -EBUSY;
+ if (sk->sk_state & PPPOX_CONNECTED)
+ goto end;
+
+ /* We don't supporting rebinding anyway */
+ error = -EALREADY;
+ if (sk->sk_user_data)
+ goto end; /* socket is already attached */
+
if (tunnel->peer_tunnel_id == 0)
tunnel->peer_tunnel_id = info.peer_tunnel_id;
@@ -840,8 +850,7 @@ end:
}
if (drop_refcnt)
l2tp_session_dec_refcount(session);
- if (drop_tunnel)
- l2tp_tunnel_dec_refcount(tunnel);
+ l2tp_tunnel_dec_refcount(tunnel);
release_sock(sk);
return error;
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 9c40f8d3bce8..f9514bacbd4a 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -491,7 +491,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
{
struct tid_ampdu_tx *tid_tx;
struct ieee80211_local *local = sta->local;
- struct ieee80211_sub_if_data *sdata = sta->sdata;
+ struct ieee80211_sub_if_data *sdata;
struct ieee80211_ampdu_params params = {
.sta = &sta->sta,
.action = IEEE80211_AMPDU_TX_START,
@@ -511,8 +511,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
clear_bit(HT_AGG_STATE_WANT_START, &tid_tx->state);
- ieee80211_agg_stop_txq(sta, tid);
-
/*
* Make sure no packets are being processed. This ensures that
* we have a valid starting sequence number and that in-flight
@@ -521,6 +519,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
synchronize_net();
+ sdata = sta->sdata;
params.ssn = sta->tid_seq[tid] >> 4;
ret = drv_ampdu_action(local, sdata, &params);
tid_tx->ssn = params.ssn;
@@ -534,6 +533,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
*/
set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state);
} else if (ret) {
+ if (!sdata)
+ return;
+
ht_dbg(sdata,
"BA request denied - HW unavailable for %pM tid %d\n",
sta->sta.addr, tid);
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 8f9a2ab502b3..8eb342300868 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -147,6 +147,7 @@ static int ieee80211_set_ap_mbssid_options(struct ieee80211_sub_if_data *sdata,
link_conf->bssid_index = 0;
link_conf->nontransmitted = false;
link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
if (sdata->vif.type != NL80211_IFTYPE_AP || !params.tx_wdev)
return -EINVAL;
@@ -1219,7 +1220,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
struct ieee80211_local *local = sdata->local;
struct beacon_data *old;
struct ieee80211_sub_if_data *vlan;
- u32 changed = BSS_CHANGED_BEACON_INT |
+ u64 changed = BSS_CHANGED_BEACON_INT |
BSS_CHANGED_BEACON_ENABLED |
BSS_CHANGED_BEACON |
BSS_CHANGED_P2P_PS |
@@ -1251,6 +1252,21 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
prev_beacon_int = link_conf->beacon_int;
link_conf->beacon_int = params->beacon_interval;
+ if (params->vht_cap) {
+ link_conf->vht_su_beamformer =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE);
+ link_conf->vht_su_beamformee =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE);
+ link_conf->vht_mu_beamformer =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE);
+ link_conf->vht_mu_beamformee =
+ params->vht_cap->vht_cap_info &
+ cpu_to_le32(IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
+ }
+
if (params->he_cap && params->he_oper) {
link_conf->he_support = true;
link_conf->htc_trig_based_pkt_ext =
@@ -1265,6 +1281,26 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
changed |= BSS_CHANGED_HE_BSS_COLOR;
}
+ if (params->he_cap) {
+ link_conf->he_su_beamformer =
+ params->he_cap->phy_cap_info[3] &
+ IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
+ link_conf->he_su_beamformee =
+ params->he_cap->phy_cap_info[4] &
+ IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE;
+ link_conf->he_mu_beamformer =
+ params->he_cap->phy_cap_info[4] &
+ IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
+ link_conf->he_full_ul_mumimo =
+ params->he_cap->phy_cap_info[2] &
+ IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO;
+ }
+
+ if (params->eht_cap) {
+ link_conf->eht_puncturing = params->punct_bitmap;
+ changed |= BSS_CHANGED_EHT_PUNCTURING;
+ }
+
if (sdata->vif.type == NL80211_IFTYPE_AP &&
params->mbssid_config.tx_wdev) {
err = ieee80211_set_ap_mbssid_options(sdata,
@@ -1511,6 +1547,12 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev,
kfree(link_conf->ftmr_params);
link_conf->ftmr_params = NULL;
+ sdata->vif.mbssid_tx_vif = NULL;
+ link_conf->bssid_index = 0;
+ link_conf->nontransmitted = false;
+ link_conf->ema_ap = false;
+ link_conf->bssid_indicator = 0;
+
__sta_info_flush(sdata, true);
ieee80211_free_keys(sdata, true);
@@ -2727,7 +2769,7 @@ static int ieee80211_scan(struct wiphy *wiphy,
* If the scan has been forced (and the driver supports
* forcing), don't care about being beaconing already.
* This will create problems to the attached stations (e.g. all
- * the frames sent while scanning on other channel will be
+ * the frames sent while scanning on other channel will be
* lost)
*/
if (sdata->deflink.u.ap.beacon &&
@@ -3509,6 +3551,12 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
lockdep_assert_held(&local->mtx);
lockdep_assert_held(&local->chanctx_mtx);
+ if (sdata->vif.bss_conf.eht_puncturing != sdata->vif.bss_conf.csa_punct_bitmap) {
+ sdata->vif.bss_conf.eht_puncturing =
+ sdata->vif.bss_conf.csa_punct_bitmap;
+ changed |= BSS_CHANGED_EHT_PUNCTURING;
+ }
+
/*
* using reservation isn't immediate as it may be deferred until later
* with multi-vif. once reservation is complete it will re-schedule the
@@ -3550,7 +3598,8 @@ static int __ieee80211_csa_finalize(struct ieee80211_sub_if_data *sdata)
if (err)
return err;
- cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0);
+ cfg80211_ch_switch_notify(sdata->dev, &sdata->deflink.csa_chandef, 0,
+ sdata->vif.bss_conf.eht_puncturing);
return 0;
}
@@ -3812,9 +3861,13 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
goto out;
}
+ if (params->punct_bitmap && !sdata->vif.bss_conf.eht_support)
+ goto out;
+
sdata->deflink.csa_chandef = params->chandef;
sdata->deflink.csa_block_tx = params->block_tx;
sdata->vif.bss_conf.csa_active = true;
+ sdata->vif.bss_conf.csa_punct_bitmap = params->punct_bitmap;
if (sdata->deflink.csa_block_tx)
ieee80211_stop_vif_queues(local, sdata,
@@ -3822,7 +3875,8 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
cfg80211_ch_switch_started_notify(sdata->dev,
&sdata->deflink.csa_chandef, 0,
- params->count, params->block_tx);
+ params->count, params->block_tx,
+ sdata->vif.bss_conf.csa_punct_bitmap);
if (changed) {
ieee80211_link_info_change_notify(sdata, &sdata->deflink,
@@ -4134,7 +4188,7 @@ static int ieee80211_set_ap_chanwidth(struct wiphy *wiphy,
struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
struct ieee80211_link_data *link;
int ret;
- u32 changed = 0;
+ u64 changed = 0;
link = sdata_dereference(sdata->link[link_id], sdata);
@@ -4615,6 +4669,19 @@ unlock:
sdata_unlock(sdata);
}
+void ieee80211_color_collision_detection_work(struct work_struct *work)
+{
+ struct delayed_work *delayed_work = to_delayed_work(work);
+ struct ieee80211_link_data *link =
+ container_of(delayed_work, struct ieee80211_link_data,
+ color_collision_detect_work);
+ struct ieee80211_sub_if_data *sdata = link->sdata;
+
+ sdata_lock(sdata);
+ cfg80211_obss_color_collision_notify(sdata->dev, link->color_bitmap);
+ sdata_unlock(sdata);
+}
+
void ieee80211_color_change_finish(struct ieee80211_vif *vif)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
@@ -4625,17 +4692,27 @@ void ieee80211_color_change_finish(struct ieee80211_vif *vif)
EXPORT_SYMBOL_GPL(ieee80211_color_change_finish);
void
-ieeee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
+ieee80211_obss_color_collision_notify(struct ieee80211_vif *vif,
u64 color_bitmap, gfp_t gfp)
{
struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif);
+ struct ieee80211_link_data *link = &sdata->deflink;
if (sdata->vif.bss_conf.color_change_active || sdata->vif.bss_conf.csa_active)
return;
- cfg80211_obss_color_collision_notify(sdata->dev, color_bitmap, gfp);
+ if (delayed_work_pending(&link->color_collision_detect_work))
+ return;
+
+ link->color_bitmap = color_bitmap;
+ /* queue the color collision detection event every 500 ms in order to
+ * avoid sending too much netlink messages to userspace.
+ */
+ ieee80211_queue_delayed_work(&sdata->local->hw,
+ &link->color_collision_detect_work,
+ msecs_to_jiffies(500));
}
-EXPORT_SYMBOL_GPL(ieeee80211_obss_color_collision_notify);
+EXPORT_SYMBOL_GPL(ieee80211_obss_color_collision_notify);
static int
ieee80211_color_change(struct wiphy *wiphy, struct net_device *dev,
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index e72cf0749d49..dbc34fbe7c8f 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -1916,7 +1916,7 @@ int ieee80211_link_use_reserved_context(struct ieee80211_link_data *link)
int ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
const struct cfg80211_chan_def *chandef,
- u32 *changed)
+ u64 *changed)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_bss_conf *link_conf = link->conf;
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index c87e1137e5da..0bac9af3ca96 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -603,8 +603,6 @@ IEEE80211_IF_FILE(fwded_mcast, u.mesh.mshstats.fwded_mcast, DEC);
IEEE80211_IF_FILE(fwded_unicast, u.mesh.mshstats.fwded_unicast, DEC);
IEEE80211_IF_FILE(fwded_frames, u.mesh.mshstats.fwded_frames, DEC);
IEEE80211_IF_FILE(dropped_frames_ttl, u.mesh.mshstats.dropped_frames_ttl, DEC);
-IEEE80211_IF_FILE(dropped_frames_congestion,
- u.mesh.mshstats.dropped_frames_congestion, DEC);
IEEE80211_IF_FILE(dropped_frames_no_route,
u.mesh.mshstats.dropped_frames_no_route, DEC);
@@ -740,7 +738,6 @@ static void add_mesh_stats(struct ieee80211_sub_if_data *sdata)
MESHSTATS_ADD(fwded_frames);
MESHSTATS_ADD(dropped_frames_ttl);
MESHSTATS_ADD(dropped_frames_no_route);
- MESHSTATS_ADD(dropped_frames_congestion);
#undef MESHSTATS_ADD
}
diff --git a/net/mac80211/debugfs_sta.c b/net/mac80211/debugfs_sta.c
index 7a3d7893e19d..f1914bf39f0e 100644
--- a/net/mac80211/debugfs_sta.c
+++ b/net/mac80211/debugfs_sta.c
@@ -167,7 +167,7 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
continue;
txqi = to_txq_info(sta->sta.txq[i]);
p += scnprintf(p, bufsz + buf - p,
- "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s)\n",
+ "%d %d %u %u %u %u %u %u %u %u %u 0x%lx(%s%s%s%s)\n",
txqi->txq.tid,
txqi->txq.ac,
txqi->tin.backlog_bytes,
@@ -182,7 +182,8 @@ static ssize_t sta_aqm_read(struct file *file, char __user *userbuf,
txqi->flags,
test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ? "STOP" : "RUN",
test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags) ? " AMPDU" : "",
- test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "");
+ test_bit(IEEE80211_TXQ_NO_AMSDU, &txqi->flags) ? " NO-AMSDU" : "",
+ test_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ? " DIRTY" : "");
}
rcu_read_unlock();
diff --git a/net/mac80211/driver-ops.c b/net/mac80211/driver-ops.c
index d737db4e07e2..cfb09e4aed4d 100644
--- a/net/mac80211/driver-ops.c
+++ b/net/mac80211/driver-ops.c
@@ -392,6 +392,9 @@ int drv_ampdu_action(struct ieee80211_local *local,
might_sleep();
+ if (!sdata)
+ return -EIO;
+
sdata = get_bss_sdata(sdata);
if (!check_sdata_in_driver(sdata))
return -EIO;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 809bad53e15b..5d13a3dfd366 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1199,7 +1199,7 @@ static inline void drv_wake_tx_queue(struct ieee80211_local *local,
/* In reconfig don't transmit now, but mark for waking later */
if (local->in_reconfig) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txq->flags);
+ set_bit(IEEE80211_TXQ_DIRTY, &txq->flags);
return;
}
diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c
index 83bc41346ae7..5315ab750280 100644
--- a/net/mac80211/ht.c
+++ b/net/mac80211/ht.c
@@ -391,6 +391,37 @@ void ieee80211_ba_session_work(struct work_struct *work)
tid_tx = sta->ampdu_mlme.tid_start_tx[tid];
if (!blocked && tid_tx) {
+ struct txq_info *txqi = to_txq_info(sta->sta.txq[tid]);
+ struct ieee80211_sub_if_data *sdata =
+ vif_to_sdata(txqi->txq.vif);
+ struct fq *fq = &sdata->local->fq;
+
+ spin_lock_bh(&fq->lock);
+
+ /* Allow only frags to be dequeued */
+ set_bit(IEEE80211_TXQ_STOP, &txqi->flags);
+
+ if (!skb_queue_empty(&txqi->frags)) {
+ /* Fragmented Tx is ongoing, wait for it to
+ * finish. Reschedule worker to retry later.
+ */
+
+ spin_unlock_bh(&fq->lock);
+ spin_unlock_bh(&sta->lock);
+
+ /* Give the task working on the txq a chance
+ * to send out the queued frags
+ */
+ synchronize_net();
+
+ mutex_unlock(&sta->ampdu_mlme.mtx);
+
+ ieee80211_queue_work(&sdata->local->hw, work);
+ return;
+ }
+
+ spin_unlock_bh(&fq->lock);
+
/*
* Assign it over to the normal tid_tx array
* where it "goes live".
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 63ff0d2524b6..ecc232eb1ee8 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -327,7 +327,6 @@ struct mesh_stats {
__u32 fwded_frames; /* Mesh total forwarded frames */
__u32 dropped_frames_ttl; /* Not transmitted since mesh_ttl == 0*/
__u32 dropped_frames_no_route; /* Not transmitted, no route found */
- __u32 dropped_frames_congestion;/* Not forwarded due to congestion */
};
#define PREQ_Q_F_START 0x1
@@ -838,7 +837,7 @@ enum txq_info_flags {
IEEE80211_TXQ_STOP,
IEEE80211_TXQ_AMPDU,
IEEE80211_TXQ_NO_AMSDU,
- IEEE80211_TXQ_STOP_NETIF_TX,
+ IEEE80211_TXQ_DIRTY,
};
/**
@@ -974,6 +973,8 @@ struct ieee80211_link_data {
struct cfg80211_chan_def csa_chandef;
struct work_struct color_change_finalize_work;
+ struct delayed_work color_collision_detect_work;
+ u64 color_bitmap;
/* context reservation -- protected with chanctx_mtx */
struct ieee80211_chanctx *reserved_chanctx;
@@ -1929,6 +1930,7 @@ int ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
/* color change handling */
void ieee80211_color_change_finalize_work(struct work_struct *work);
+void ieee80211_color_collision_detection_work(struct work_struct *work);
/* interface handling */
#define MAC80211_SUPPORTED_FEATURES_TX (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | \
@@ -2478,7 +2480,7 @@ int ieee80211_link_unreserve_chanctx(struct ieee80211_link_data *link);
int __must_check
ieee80211_link_change_bandwidth(struct ieee80211_link_data *link,
const struct cfg80211_chan_def *chandef,
- u32 *changed);
+ u64 *changed);
void ieee80211_link_release_channel(struct ieee80211_link_data *link);
void ieee80211_link_vlan_copy_chanctx(struct ieee80211_link_data *link);
void ieee80211_link_copy_chanctx_to_vlans(struct ieee80211_link_data *link,
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d49a5906a943..23ed13f15067 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -364,7 +364,9 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata,
/* No support for VLAN with MLO yet */
if (iftype == NL80211_IFTYPE_AP_VLAN &&
- nsdata->wdev.use_4addr)
+ sdata->wdev.use_4addr &&
+ nsdata->vif.type == NL80211_IFTYPE_AP &&
+ nsdata->vif.valid_links)
return -EOPNOTSUPP;
/*
@@ -2195,7 +2197,6 @@ int ieee80211_if_add(struct ieee80211_local *local, const char *name,
ret = cfg80211_register_netdevice(ndev);
if (ret) {
- ieee80211_if_free(ndev);
free_netdev(ndev);
return ret;
}
diff --git a/net/mac80211/link.c b/net/mac80211/link.c
index d1f5a9f7c647..8c8869cc1fb4 100644
--- a/net/mac80211/link.c
+++ b/net/mac80211/link.c
@@ -39,6 +39,8 @@ void ieee80211_link_init(struct ieee80211_sub_if_data *sdata,
ieee80211_csa_finalize_work);
INIT_WORK(&link->color_change_finalize_work,
ieee80211_color_change_finalize_work);
+ INIT_DELAYED_WORK(&link->color_collision_detect_work,
+ ieee80211_color_collision_detection_work);
INIT_LIST_HEAD(&link->assigned_chanctx_list);
INIT_LIST_HEAD(&link->reserved_chanctx_list);
INIT_DELAYED_WORK(&link->dfs_cac_timer_work,
@@ -66,6 +68,7 @@ void ieee80211_link_stop(struct ieee80211_link_data *link)
if (link->sdata->vif.type == NL80211_IFTYPE_STATION)
ieee80211_mgd_stop_link(link);
+ cancel_delayed_work_sync(&link->color_collision_detect_work);
ieee80211_link_release_channel(link);
}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0aee2392dd29..60792dfabc9d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <flamingice@sourmilk.net>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2022 Intel Corporation
+ * Copyright (C) 2018 - 2023 Intel Corporation
*/
#include <linux/delay.h>
@@ -89,6 +89,80 @@ MODULE_PARM_DESC(probe_wait_ms,
#define IEEE80211_SIGNAL_AVE_MIN_COUNT 4
/*
+ * Extract from the given disabled subchannel bitmap (raw format
+ * from the EHT Operation Element) the bits for the subchannel
+ * we're using right now.
+ */
+static u16
+ieee80211_extract_dis_subch_bmap(const struct ieee80211_eht_operation *eht_oper,
+ struct cfg80211_chan_def *chandef, u16 bitmap)
+{
+ struct ieee80211_eht_operation_info *info = (void *)eht_oper->optional;
+ struct cfg80211_chan_def ap_chandef = *chandef;
+ u32 ap_center_freq, local_center_freq;
+ u32 ap_bw, local_bw;
+ int ap_start_freq, local_start_freq;
+ u16 shift, mask;
+
+ if (!(eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) ||
+ !(eht_oper->params &
+ IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT))
+ return 0;
+
+ /* set 160/320 supported to get the full AP definition */
+ ieee80211_chandef_eht_oper(eht_oper, true, true, &ap_chandef);
+ ap_center_freq = ap_chandef.center_freq1;
+ ap_bw = 20 * BIT(u8_get_bits(info->control,
+ IEEE80211_EHT_OPER_CHAN_WIDTH));
+ ap_start_freq = ap_center_freq - ap_bw / 2;
+ local_center_freq = chandef->center_freq1;
+ local_bw = 20 * BIT(ieee80211_chan_width_to_rx_bw(chandef->width));
+ local_start_freq = local_center_freq - local_bw / 2;
+ shift = (local_start_freq - ap_start_freq) / 20;
+ mask = BIT(local_bw / 20) - 1;
+
+ return (bitmap >> shift) & mask;
+}
+
+/*
+ * Handle the puncturing bitmap, possibly downgrading bandwidth to get a
+ * valid bitmap.
+ */
+static void
+ieee80211_handle_puncturing_bitmap(struct ieee80211_link_data *link,
+ const struct ieee80211_eht_operation *eht_oper,
+ u16 bitmap, u64 *changed)
+{
+ struct cfg80211_chan_def *chandef = &link->conf->chandef;
+ u16 extracted;
+ u64 _changed = 0;
+
+ if (!changed)
+ changed = &_changed;
+
+ while (chandef->width > NL80211_CHAN_WIDTH_40) {
+ extracted =
+ ieee80211_extract_dis_subch_bmap(eht_oper, chandef,
+ bitmap);
+
+ if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
+ chandef))
+ break;
+ link->u.mgd.conn_flags |=
+ ieee80211_chandef_downgrade(chandef);
+ *changed |= BSS_CHANGED_BANDWIDTH;
+ }
+
+ if (chandef->width <= NL80211_CHAN_WIDTH_40)
+ extracted = 0;
+
+ if (link->conf->eht_puncturing != extracted) {
+ link->conf->eht_puncturing = extracted;
+ *changed |= BSS_CHANGED_EHT_PUNCTURING;
+ }
+}
+
+/*
* We can have multiple work items (and connection probing)
* scheduling this timer, but we need to take care to only
* reschedule it when it should fire _earlier_ than it was
@@ -413,7 +487,7 @@ static int ieee80211_config_bw(struct ieee80211_link_data *link,
const struct ieee80211_he_operation *he_oper,
const struct ieee80211_eht_operation *eht_oper,
const struct ieee80211_s1g_oper_ie *s1g_oper,
- const u8 *bssid, u32 *changed)
+ const u8 *bssid, u64 *changed)
{
struct ieee80211_sub_if_data *sdata = link->sdata;
struct ieee80211_local *local = sdata->local;
@@ -1704,7 +1778,7 @@ static void ieee80211_chswitch_post_beacon(struct ieee80211_link_data *link)
return;
}
- cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0);
+ cfg80211_ch_switch_notify(sdata->dev, &link->reserved_chandef, 0, 0);
}
void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success)
@@ -1914,7 +1988,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_link_data *link,
mutex_unlock(&local->mtx);
cfg80211_ch_switch_started_notify(sdata->dev, &csa_ie.chandef, 0,
- csa_ie.count, csa_ie.mode);
+ csa_ie.count, csa_ie.mode, 0);
if (local->ops->channel_switch) {
/* use driver's channel switch callback */
@@ -4145,6 +4219,7 @@ static bool ieee80211_assoc_config_link(struct ieee80211_link_data *link,
link_sta);
bss_conf->eht_support = link_sta->pub->eht_cap.has_eht;
+ *changed |= BSS_CHANGED_EHT_PUNCTURING;
} else {
bss_conf->eht_support = false;
}
@@ -5477,6 +5552,45 @@ static bool ieee80211_rx_our_beacon(const u8 *tx_bssid,
return ether_addr_equal(tx_bssid, bss->transmitted_bss->bssid);
}
+static bool ieee80211_config_puncturing(struct ieee80211_link_data *link,
+ const struct ieee80211_eht_operation *eht_oper,
+ u64 *changed)
+{
+ u16 bitmap = 0, extracted;
+
+ if ((eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
+ (eht_oper->params &
+ IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
+ const struct ieee80211_eht_operation_info *info =
+ (void *)eht_oper->optional;
+ const u8 *disable_subchannel_bitmap = info->optional;
+
+ bitmap = get_unaligned_le16(disable_subchannel_bitmap);
+ }
+
+ extracted = ieee80211_extract_dis_subch_bmap(eht_oper,
+ &link->conf->chandef,
+ bitmap);
+
+ /* accept if there are no changes */
+ if (!(*changed & BSS_CHANGED_BANDWIDTH) &&
+ extracted == link->conf->eht_puncturing)
+ return true;
+
+ if (!cfg80211_valid_disable_subchannel_bitmap(&bitmap,
+ &link->conf->chandef)) {
+ link_info(link,
+ "Got an invalid disable subchannel bitmap from AP %pM: bitmap = 0x%x, bw = 0x%x. disconnect\n",
+ link->u.mgd.bssid,
+ bitmap,
+ link->conf->chandef.width);
+ return false;
+ }
+
+ ieee80211_handle_puncturing_bitmap(link, eht_oper, bitmap, changed);
+ return true;
+}
+
static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_hdr *hdr, size_t len,
struct ieee80211_rx_status *rx_status)
@@ -5494,7 +5608,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
struct ieee80211_channel *chan;
struct link_sta_info *link_sta;
struct sta_info *sta;
- u32 changed = 0;
+ u64 changed = 0;
bool erp_valid;
u8 erp_value = 0;
u32 ncrc = 0;
@@ -5791,6 +5905,21 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_link_data *link,
elems->pwr_constr_elem,
elems->cisco_dtpc_elem);
+ if (elems->eht_operation &&
+ !(link->u.mgd.conn_flags & IEEE80211_CONN_DISABLE_EHT)) {
+ if (!ieee80211_config_puncturing(link, elems->eht_operation,
+ &changed)) {
+ ieee80211_set_disassoc(sdata, IEEE80211_STYPE_DEAUTH,
+ WLAN_REASON_DEAUTH_LEAVING,
+ true, deauth_buf);
+ ieee80211_report_disconnect(sdata, deauth_buf,
+ sizeof(deauth_buf), true,
+ WLAN_REASON_DEAUTH_LEAVING,
+ false);
+ goto free;
+ }
+ }
+
ieee80211_link_info_change_notify(sdata, link, changed);
free:
kfree(elems);
@@ -6892,9 +7021,12 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
}
+ link->conf->eht_puncturing = 0;
+
rcu_read_lock();
beacon_ies = rcu_dereference(cbss->beacon_ies);
if (beacon_ies) {
+ const struct ieee80211_eht_operation *eht_oper;
const struct element *elem;
u8 dtim_count = 0;
@@ -6923,6 +7055,31 @@ ieee80211_setup_assoc_link(struct ieee80211_sub_if_data *sdata,
link->conf->ema_ap = true;
else
link->conf->ema_ap = false;
+
+ elem = cfg80211_find_ext_elem(WLAN_EID_EXT_EHT_OPERATION,
+ beacon_ies->data, beacon_ies->len);
+ eht_oper = (const void *)(elem->data + 1);
+
+ if (elem &&
+ ieee80211_eht_oper_size_ok((const void *)(elem->data + 1),
+ elem->datalen - 1) &&
+ (eht_oper->params & IEEE80211_EHT_OPER_INFO_PRESENT) &&
+ (eht_oper->params & IEEE80211_EHT_OPER_DISABLED_SUBCHANNEL_BITMAP_PRESENT)) {
+ const struct ieee80211_eht_operation_info *info =
+ (void *)eht_oper->optional;
+ const u8 *disable_subchannel_bitmap = info->optional;
+ u16 bitmap;
+
+ bitmap = get_unaligned_le16(disable_subchannel_bitmap);
+ if (cfg80211_valid_disable_subchannel_bitmap(&bitmap,
+ &link->conf->chandef))
+ ieee80211_handle_puncturing_bitmap(link,
+ eht_oper,
+ bitmap,
+ NULL);
+ else
+ conn_flags |= IEEE80211_CONN_DISABLE_EHT;
+ }
}
rcu_read_unlock();
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 7e3ab6e1b28f..f7fdfe710951 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -2720,6 +2720,174 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx)
}
}
+static ieee80211_rx_result
+ieee80211_rx_mesh_data(struct ieee80211_sub_if_data *sdata, struct sta_info *sta,
+ struct sk_buff *skb)
+{
+#ifdef CONFIG_MAC80211_MESH
+ struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
+ struct ieee80211_local *local = sdata->local;
+ uint16_t fc = IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_DATA;
+ struct ieee80211_hdr hdr = {
+ .frame_control = cpu_to_le16(fc)
+ };
+ struct ieee80211_hdr *fwd_hdr;
+ struct ieee80211s_hdr *mesh_hdr;
+ struct ieee80211_tx_info *info;
+ struct sk_buff *fwd_skb;
+ struct ethhdr *eth;
+ bool multicast;
+ int tailroom = 0;
+ int hdrlen, mesh_hdrlen;
+ u8 *qos;
+
+ if (!ieee80211_vif_is_mesh(&sdata->vif))
+ return RX_CONTINUE;
+
+ if (!pskb_may_pull(skb, sizeof(*eth) + 6))
+ return RX_DROP_MONITOR;
+
+ mesh_hdr = (struct ieee80211s_hdr *)(skb->data + sizeof(*eth));
+ mesh_hdrlen = ieee80211_get_mesh_hdrlen(mesh_hdr);
+
+ if (!pskb_may_pull(skb, sizeof(*eth) + mesh_hdrlen))
+ return RX_DROP_MONITOR;
+
+ eth = (struct ethhdr *)skb->data;
+ multicast = is_multicast_ether_addr(eth->h_dest);
+
+ mesh_hdr = (struct ieee80211s_hdr *)(eth + 1);
+ if (!mesh_hdr->ttl)
+ return RX_DROP_MONITOR;
+
+ /* frame is in RMC, don't forward */
+ if (is_multicast_ether_addr(eth->h_dest) &&
+ mesh_rmc_check(sdata, eth->h_source, mesh_hdr))
+ return RX_DROP_MONITOR;
+
+ /* Frame has reached destination. Don't forward */
+ if (ether_addr_equal(sdata->vif.addr, eth->h_dest))
+ goto rx_accept;
+
+ if (!ifmsh->mshcfg.dot11MeshForwarding) {
+ if (is_multicast_ether_addr(eth->h_dest))
+ goto rx_accept;
+
+ return RX_DROP_MONITOR;
+ }
+
+ /* forward packet */
+ if (sdata->crypto_tx_tailroom_needed_cnt)
+ tailroom = IEEE80211_ENCRYPT_TAILROOM;
+
+ if (!--mesh_hdr->ttl) {
+ if (multicast)
+ goto rx_accept;
+
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_ttl);
+ return RX_DROP_MONITOR;
+ }
+
+ if (mesh_hdr->flags & MESH_FLAGS_AE) {
+ struct mesh_path *mppath;
+ char *proxied_addr;
+
+ if (multicast)
+ proxied_addr = mesh_hdr->eaddr1;
+ else if ((mesh_hdr->flags & MESH_FLAGS_AE) == MESH_FLAGS_AE_A5_A6)
+ /* has_a4 already checked in ieee80211_rx_mesh_check */
+ proxied_addr = mesh_hdr->eaddr2;
+ else
+ return RX_DROP_MONITOR;
+
+ rcu_read_lock();
+ mppath = mpp_path_lookup(sdata, proxied_addr);
+ if (!mppath) {
+ mpp_path_add(sdata, proxied_addr, eth->h_source);
+ } else {
+ spin_lock_bh(&mppath->state_lock);
+ if (!ether_addr_equal(mppath->mpp, eth->h_source))
+ memcpy(mppath->mpp, eth->h_source, ETH_ALEN);
+ mppath->exp_time = jiffies;
+ spin_unlock_bh(&mppath->state_lock);
+ }
+ rcu_read_unlock();
+ }
+
+ skb_set_queue_mapping(skb, ieee802_1d_to_ac[skb->priority]);
+
+ ieee80211_fill_mesh_addresses(&hdr, &hdr.frame_control,
+ eth->h_dest, eth->h_source);
+ hdrlen = ieee80211_hdrlen(hdr.frame_control);
+ if (multicast) {
+ int extra_head = sizeof(struct ieee80211_hdr) - sizeof(*eth);
+
+ fwd_skb = skb_copy_expand(skb, local->tx_headroom + extra_head +
+ IEEE80211_ENCRYPT_HEADROOM,
+ tailroom, GFP_ATOMIC);
+ if (!fwd_skb)
+ goto rx_accept;
+ } else {
+ fwd_skb = skb;
+ skb = NULL;
+
+ if (skb_cow_head(fwd_skb, hdrlen - sizeof(struct ethhdr)))
+ return RX_DROP_UNUSABLE;
+ }
+
+ fwd_hdr = skb_push(fwd_skb, hdrlen - sizeof(struct ethhdr));
+ memcpy(fwd_hdr, &hdr, hdrlen - 2);
+ qos = ieee80211_get_qos_ctl(fwd_hdr);
+ qos[0] = qos[1] = 0;
+
+ skb_reset_mac_header(fwd_skb);
+ hdrlen += mesh_hdrlen;
+ if (ieee80211_get_8023_tunnel_proto(fwd_skb->data + hdrlen,
+ &fwd_skb->protocol))
+ hdrlen += ETH_ALEN;
+ else
+ fwd_skb->protocol = htons(fwd_skb->len - hdrlen);
+ skb_set_network_header(fwd_skb, hdrlen);
+
+ info = IEEE80211_SKB_CB(fwd_skb);
+ memset(info, 0, sizeof(*info));
+ info->control.flags |= IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
+ info->control.vif = &sdata->vif;
+ info->control.jiffies = jiffies;
+ if (multicast) {
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
+ memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
+ /* update power mode indication when forwarding */
+ ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
+ } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) {
+ /* mesh power mode flags updated in mesh_nexthop_lookup */
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
+ } else {
+ /* unable to resolve next hop */
+ if (sta)
+ mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl,
+ hdr.addr3, 0,
+ WLAN_REASON_MESH_PATH_NOFORWARD,
+ sta->sta.addr);
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route);
+ kfree_skb(fwd_skb);
+ goto rx_accept;
+ }
+
+ IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
+ fwd_skb->dev = sdata->dev;
+ ieee80211_add_pending_skb(local, fwd_skb);
+
+rx_accept:
+ if (!skb)
+ return RX_QUEUED;
+
+ ieee80211_strip_8023_mesh_hdr(skb);
+#endif
+
+ return RX_CONTINUE;
+}
+
static ieee80211_rx_result debug_noinline
__ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
{
@@ -2728,6 +2896,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
__le16 fc = hdr->frame_control;
struct sk_buff_head frame_list;
+ static ieee80211_rx_result res;
struct ethhdr ethhdr;
const u8 *check_da = ethhdr.h_dest, *check_sa = ethhdr.h_source;
@@ -2746,6 +2915,7 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
break;
case NL80211_IFTYPE_MESH_POINT:
check_sa = NULL;
+ check_da = NULL;
break;
default:
break;
@@ -2760,20 +2930,43 @@ __ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx, u8 data_offset)
data_offset, true))
return RX_DROP_UNUSABLE;
+ if (rx->sta && rx->sta->amsdu_mesh_control < 0) {
+ bool valid_std = ieee80211_is_valid_amsdu(skb, true);
+ bool valid_nonstd = ieee80211_is_valid_amsdu(skb, false);
+
+ if (valid_std && !valid_nonstd)
+ rx->sta->amsdu_mesh_control = 1;
+ else if (valid_nonstd && !valid_std)
+ rx->sta->amsdu_mesh_control = 0;
+ }
+
ieee80211_amsdu_to_8023s(skb, &frame_list, dev->dev_addr,
rx->sdata->vif.type,
rx->local->hw.extra_tx_headroom,
- check_da, check_sa);
+ check_da, check_sa,
+ rx->sta->amsdu_mesh_control);
while (!skb_queue_empty(&frame_list)) {
rx->skb = __skb_dequeue(&frame_list);
- if (!ieee80211_frame_allowed(rx, fc)) {
- dev_kfree_skb(rx->skb);
+ res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
+ switch (res) {
+ case RX_QUEUED:
continue;
+ case RX_CONTINUE:
+ break;
+ default:
+ goto free;
}
+ if (!ieee80211_frame_allowed(rx, fc))
+ goto free;
+
ieee80211_deliver_skb(rx);
+ continue;
+
+free:
+ dev_kfree_skb(rx->skb);
}
return RX_QUEUED;
@@ -2806,6 +2999,8 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
if (!rx->sdata->u.mgd.use_4addr)
return RX_DROP_UNUSABLE;
break;
+ case NL80211_IFTYPE_MESH_POINT:
+ break;
default:
return RX_DROP_UNUSABLE;
}
@@ -2834,160 +3029,6 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx)
return __ieee80211_rx_h_amsdu(rx, 0);
}
-#ifdef CONFIG_MAC80211_MESH
-static ieee80211_rx_result
-ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx)
-{
- struct ieee80211_hdr *fwd_hdr, *hdr;
- struct ieee80211_tx_info *info;
- struct ieee80211s_hdr *mesh_hdr;
- struct sk_buff *skb = rx->skb, *fwd_skb;
- struct ieee80211_local *local = rx->local;
- struct ieee80211_sub_if_data *sdata = rx->sdata;
- struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
- u16 ac, q, hdrlen;
- int tailroom = 0;
-
- hdr = (struct ieee80211_hdr *) skb->data;
- hdrlen = ieee80211_hdrlen(hdr->frame_control);
-
- /* make sure fixed part of mesh header is there, also checks skb len */
- if (!pskb_may_pull(rx->skb, hdrlen + 6))
- return RX_DROP_MONITOR;
-
- mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
-
- /* make sure full mesh header is there, also checks skb len */
- if (!pskb_may_pull(rx->skb,
- hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr)))
- return RX_DROP_MONITOR;
-
- /* reload pointers */
- hdr = (struct ieee80211_hdr *) skb->data;
- mesh_hdr = (struct ieee80211s_hdr *) (skb->data + hdrlen);
-
- if (ieee80211_drop_unencrypted(rx, hdr->frame_control)) {
- int offset = hdrlen + ieee80211_get_mesh_hdrlen(mesh_hdr) +
- sizeof(rfc1042_header);
- __be16 ethertype;
-
- if (!ether_addr_equal(hdr->addr1, rx->sdata->vif.addr) ||
- skb_copy_bits(rx->skb, offset, &ethertype, 2) != 0 ||
- ethertype != rx->sdata->control_port_protocol)
- return RX_DROP_MONITOR;
- }
-
- /* frame is in RMC, don't forward */
- if (ieee80211_is_data(hdr->frame_control) &&
- is_multicast_ether_addr(hdr->addr1) &&
- mesh_rmc_check(rx->sdata, hdr->addr3, mesh_hdr))
- return RX_DROP_MONITOR;
-
- if (!ieee80211_is_data(hdr->frame_control))
- return RX_CONTINUE;
-
- if (!mesh_hdr->ttl)
- return RX_DROP_MONITOR;
-
- if (mesh_hdr->flags & MESH_FLAGS_AE) {
- struct mesh_path *mppath;
- char *proxied_addr;
- char *mpp_addr;
-
- if (is_multicast_ether_addr(hdr->addr1)) {
- mpp_addr = hdr->addr3;
- proxied_addr = mesh_hdr->eaddr1;
- } else if ((mesh_hdr->flags & MESH_FLAGS_AE) ==
- MESH_FLAGS_AE_A5_A6) {
- /* has_a4 already checked in ieee80211_rx_mesh_check */
- mpp_addr = hdr->addr4;
- proxied_addr = mesh_hdr->eaddr2;
- } else {
- return RX_DROP_MONITOR;
- }
-
- rcu_read_lock();
- mppath = mpp_path_lookup(sdata, proxied_addr);
- if (!mppath) {
- mpp_path_add(sdata, proxied_addr, mpp_addr);
- } else {
- spin_lock_bh(&mppath->state_lock);
- if (!ether_addr_equal(mppath->mpp, mpp_addr))
- memcpy(mppath->mpp, mpp_addr, ETH_ALEN);
- mppath->exp_time = jiffies;
- spin_unlock_bh(&mppath->state_lock);
- }
- rcu_read_unlock();
- }
-
- /* Frame has reached destination. Don't forward */
- if (!is_multicast_ether_addr(hdr->addr1) &&
- ether_addr_equal(sdata->vif.addr, hdr->addr3))
- return RX_CONTINUE;
-
- ac = ieee802_1d_to_ac[skb->priority];
- q = sdata->vif.hw_queue[ac];
- if (ieee80211_queue_stopped(&local->hw, q)) {
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_congestion);
- return RX_DROP_MONITOR;
- }
- skb_set_queue_mapping(skb, ac);
-
- if (!--mesh_hdr->ttl) {
- if (!is_multicast_ether_addr(hdr->addr1))
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh,
- dropped_frames_ttl);
- goto out;
- }
-
- if (!ifmsh->mshcfg.dot11MeshForwarding)
- goto out;
-
- if (sdata->crypto_tx_tailroom_needed_cnt)
- tailroom = IEEE80211_ENCRYPT_TAILROOM;
-
- fwd_skb = skb_copy_expand(skb, local->tx_headroom +
- IEEE80211_ENCRYPT_HEADROOM,
- tailroom, GFP_ATOMIC);
- if (!fwd_skb)
- goto out;
-
- fwd_skb->dev = sdata->dev;
- fwd_hdr = (struct ieee80211_hdr *) fwd_skb->data;
- fwd_hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_RETRY);
- info = IEEE80211_SKB_CB(fwd_skb);
- memset(info, 0, sizeof(*info));
- info->control.flags |= IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
- info->control.vif = &rx->sdata->vif;
- info->control.jiffies = jiffies;
- if (is_multicast_ether_addr(fwd_hdr->addr1)) {
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_mcast);
- memcpy(fwd_hdr->addr2, sdata->vif.addr, ETH_ALEN);
- /* update power mode indication when forwarding */
- ieee80211_mps_set_frame_flags(sdata, NULL, fwd_hdr);
- } else if (!mesh_nexthop_lookup(sdata, fwd_skb)) {
- /* mesh power mode flags updated in mesh_nexthop_lookup */
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_unicast);
- } else {
- /* unable to resolve next hop */
- mesh_path_error_tx(sdata, ifmsh->mshcfg.element_ttl,
- fwd_hdr->addr3, 0,
- WLAN_REASON_MESH_PATH_NOFORWARD,
- fwd_hdr->addr2);
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, dropped_frames_no_route);
- kfree_skb(fwd_skb);
- return RX_DROP_MONITOR;
- }
-
- IEEE80211_IFSTA_MESH_CTR_INC(ifmsh, fwded_frames);
- ieee80211_add_pending_skb(local, fwd_skb);
- out:
- if (is_multicast_ether_addr(hdr->addr1))
- return RX_CONTINUE;
- return RX_DROP_MONITOR;
-}
-#endif
-
static ieee80211_rx_result debug_noinline
ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
{
@@ -2996,6 +3037,7 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
struct net_device *dev = sdata->dev;
struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data;
__le16 fc = hdr->frame_control;
+ static ieee80211_rx_result res;
bool port_control;
int err;
@@ -3022,6 +3064,10 @@ ieee80211_rx_h_data(struct ieee80211_rx_data *rx)
if (unlikely(err))
return RX_DROP_UNUSABLE;
+ res = ieee80211_rx_mesh_data(rx->sdata, rx->sta, rx->skb);
+ if (res != RX_CONTINUE)
+ return res;
+
if (!ieee80211_frame_allowed(rx, fc))
return RX_DROP_MONITOR;
@@ -3219,9 +3265,9 @@ ieee80211_rx_check_bss_color_collision(struct ieee80211_rx_data *rx)
color = le32_get_bits(he_oper->he_oper_params,
IEEE80211_HE_OPERATION_BSS_COLOR_MASK);
if (color == bss_conf->he_bss_color.color)
- ieeee80211_obss_color_collision_notify(&rx->sdata->vif,
- BIT_ULL(color),
- GFP_ATOMIC);
+ ieee80211_obss_color_collision_notify(&rx->sdata->vif,
+ BIT_ULL(color),
+ GFP_ATOMIC);
}
}
@@ -3992,10 +4038,6 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
CALL_RXH(ieee80211_rx_h_defragment);
CALL_RXH(ieee80211_rx_h_michael_mic_verify);
/* must be after MMIC verify so header is counted in MPDU mic */
-#ifdef CONFIG_MAC80211_MESH
- if (ieee80211_vif_is_mesh(&rx->sdata->vif))
- CALL_RXH(ieee80211_rx_h_mesh_fwding);
-#endif
CALL_RXH(ieee80211_rx_h_amsdu);
CALL_RXH(ieee80211_rx_h_data);
@@ -4049,6 +4091,52 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx)
#undef CALL_RXH
}
+static bool
+ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
+{
+ return !!(sta->valid_links & BIT(link_id));
+}
+
+static bool ieee80211_rx_data_set_link(struct ieee80211_rx_data *rx,
+ u8 link_id)
+{
+ rx->link_id = link_id;
+ rx->link = rcu_dereference(rx->sdata->link[link_id]);
+
+ if (!rx->sta)
+ return rx->link;
+
+ if (!ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta, link_id))
+ return false;
+
+ rx->link_sta = rcu_dereference(rx->sta->link[link_id]);
+
+ return rx->link && rx->link_sta;
+}
+
+static bool ieee80211_rx_data_set_sta(struct ieee80211_rx_data *rx,
+ struct sta_info *sta, int link_id)
+{
+ rx->link_id = link_id;
+ rx->sta = sta;
+
+ if (sta) {
+ rx->local = sta->sdata->local;
+ if (!rx->sdata)
+ rx->sdata = sta->sdata;
+ rx->link_sta = &sta->deflink;
+ } else {
+ rx->link_sta = NULL;
+ }
+
+ if (link_id < 0)
+ rx->link = &rx->sdata->deflink;
+ else if (!ieee80211_rx_data_set_link(rx, link_id))
+ return false;
+
+ return true;
+}
+
/*
* This function makes calls into the RX path, therefore
* it has to be invoked under RCU read lock.
@@ -4057,16 +4145,19 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
{
struct sk_buff_head frames;
struct ieee80211_rx_data rx = {
- .sta = sta,
- .sdata = sta->sdata,
- .local = sta->local,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
struct tid_ampdu_rx *tid_agg_rx;
- u8 link_id;
+ int link_id = -1;
+
+ /* FIXME: statistics won't be right with this */
+ if (sta->sta.valid_links)
+ link_id = ffs(sta->sta.valid_links) - 1;
+
+ if (!ieee80211_rx_data_set_sta(&rx, sta, link_id))
+ return;
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
if (!tid_agg_rx)
@@ -4086,10 +4177,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid)
};
drv_event_callback(rx.local, rx.sdata, &event);
}
- /* FIXME: statistics won't be right with this */
- link_id = sta->sta.valid_links ? ffs(sta->sta.valid_links) - 1 : 0;
- rx.link = rcu_dereference(sta->sdata->link[link_id]);
- rx.link_sta = rcu_dereference(sta->link[link_id]);
ieee80211_rx_handlers(&rx, &frames);
}
@@ -4105,7 +4192,6 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
/* This is OK -- must be QoS data frame */
.security_idx = tid,
.seqno_idx = tid,
- .link_id = -1,
};
int i, diff;
@@ -4116,10 +4202,8 @@ void ieee80211_mark_rx_ba_filtered_frames(struct ieee80211_sta *pubsta, u8 tid,
sta = container_of(pubsta, struct sta_info, sta);
- rx.sta = sta;
- rx.sdata = sta->sdata;
- rx.link = &rx.sdata->deflink;
- rx.local = sta->local;
+ if (!ieee80211_rx_data_set_sta(&rx, sta, -1))
+ return;
rcu_read_lock();
tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]);
@@ -4197,7 +4281,8 @@ static bool ieee80211_accept_frame(struct ieee80211_rx_data *rx)
case NL80211_IFTYPE_STATION:
if (!bssid && !sdata->u.mgd.use_4addr)
return false;
- if (ieee80211_is_robust_mgmt_frame(skb) && !rx->sta)
+ if (ieee80211_is_first_frag(hdr->seq_ctrl) &&
+ ieee80211_is_robust_mgmt_frame(skb) && !rx->sta)
return false;
if (multicast)
return true;
@@ -4506,15 +4591,6 @@ void ieee80211_check_fast_rx_iface(struct ieee80211_sub_if_data *sdata)
mutex_unlock(&local->sta_mtx);
}
-static bool
-ieee80211_rx_is_valid_sta_link_id(struct ieee80211_sta *sta, u8 link_id)
-{
- if (!sta->mlo)
- return false;
-
- return !!(sta->valid_links & BIT(link_id));
-}
-
static void ieee80211_rx_8023(struct ieee80211_rx_data *rx,
struct ieee80211_fast_rx *fast_rx,
int orig_len)
@@ -4625,7 +4701,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
struct sk_buff *skb = rx->skb;
struct ieee80211_hdr *hdr = (void *)skb->data;
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- struct sta_info *sta = rx->sta;
int orig_len = skb->len;
int hdrlen = ieee80211_hdrlen(hdr->frame_control);
int snap_offs = hdrlen;
@@ -4637,7 +4712,6 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
u8 da[ETH_ALEN];
u8 sa[ETH_ALEN];
} addrs __aligned(2);
- struct link_sta_info *link_sta;
struct ieee80211_sta_rx_stats *stats;
/* for parallel-rx, we need to have DUP_VALIDATED, otherwise we write
@@ -4740,18 +4814,10 @@ static bool ieee80211_invoke_fast_rx(struct ieee80211_rx_data *rx,
drop:
dev_kfree_skb(skb);
- if (rx->link_id >= 0) {
- link_sta = rcu_dereference(sta->link[rx->link_id]);
- if (!link_sta)
- return true;
- } else {
- link_sta = &sta->deflink;
- }
-
if (fast_rx->uses_rss)
- stats = this_cpu_ptr(link_sta->pcpu_rx_stats);
+ stats = this_cpu_ptr(rx->link_sta->pcpu_rx_stats);
else
- stats = &link_sta->rx_stats;
+ stats = &rx->link_sta->rx_stats;
stats->dropped++;
return true;
@@ -4769,8 +4835,8 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
struct ieee80211_local *local = rx->local;
struct ieee80211_sub_if_data *sdata = rx->sdata;
struct ieee80211_hdr *hdr = (void *)skb->data;
- struct link_sta_info *link_sta = NULL;
- struct ieee80211_link_data *link;
+ struct link_sta_info *link_sta = rx->link_sta;
+ struct ieee80211_link_data *link = rx->link;
rx->skb = skb;
@@ -4792,35 +4858,6 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
if (!ieee80211_accept_frame(rx))
return false;
- if (rx->link_id >= 0) {
- link = rcu_dereference(rx->sdata->link[rx->link_id]);
-
- /* we might race link removal */
- if (!link)
- return true;
- rx->link = link;
-
- if (rx->sta) {
- rx->link_sta =
- rcu_dereference(rx->sta->link[rx->link_id]);
- if (!rx->link_sta)
- return true;
- }
- } else {
- if (rx->sta)
- rx->link_sta = &rx->sta->deflink;
-
- rx->link = &sdata->deflink;
- }
-
- if (unlikely(!is_multicast_ether_addr(hdr->addr1) &&
- rx->link_id >= 0 && rx->sta && rx->sta->sta.mlo)) {
- link_sta = rcu_dereference(rx->sta->link[rx->link_id]);
-
- if (WARN_ON_ONCE(!link_sta))
- return true;
- }
-
if (!consume) {
struct skb_shared_hwtstamps *shwt;
@@ -4838,9 +4875,13 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx,
*/
shwt = skb_hwtstamps(rx->skb);
shwt->hwtstamp = skb_hwtstamps(skb)->hwtstamp;
+
+ /* Update the hdr pointer to the new skb for translation below */
+ hdr = (struct ieee80211_hdr *)rx->skb->data;
}
- if (unlikely(link_sta)) {
+ if (unlikely(rx->sta && rx->sta->sta.mlo) &&
+ is_unicast_ether_addr(hdr->addr1)) {
/* translate to MLD addresses */
if (ether_addr_equal(link->conf->addr, hdr->addr1))
ether_addr_copy(hdr->addr1, rx->sdata->vif.addr);
@@ -4870,6 +4911,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
struct ieee80211_fast_rx *fast_rx;
struct ieee80211_rx_data rx;
+ struct sta_info *sta;
+ int link_id = -1;
memset(&rx, 0, sizeof(rx));
rx.skb = skb;
@@ -4886,12 +4929,8 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
if (!pubsta)
goto drop;
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
-
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, status->link_id))
- goto drop;
+ if (status->link_valid)
+ link_id = status->link_id;
/*
* TODO: Should the frame be dropped if the right link_id is not
@@ -4900,19 +4939,9 @@ static void __ieee80211_rx_handle_8023(struct ieee80211_hw *hw,
* link_id is used only for stats purpose and updating the stats on
* the deflink is fine?
*/
- if (status->link_valid)
- rx.link_id = status->link_id;
-
- if (rx.link_id >= 0) {
- struct ieee80211_link_data *link;
-
- link = rcu_dereference(rx.sdata->link[rx.link_id]);
- if (!link)
- goto drop;
- rx.link = link;
- } else {
- rx.link = &rx.sdata->deflink;
- }
+ sta = container_of(pubsta, struct sta_info, sta);
+ if (!ieee80211_rx_data_set_sta(&rx, sta, link_id))
+ goto drop;
fast_rx = rcu_dereference(rx.sta->fast_rx);
if (!fast_rx)
@@ -4930,6 +4959,8 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
{
struct link_sta_info *link_sta;
struct ieee80211_hdr *hdr = (void *)skb->data;
+ struct sta_info *sta;
+ int link_id = -1;
/*
* Look up link station first, in case there's a
@@ -4939,24 +4970,19 @@ static bool ieee80211_rx_for_interface(struct ieee80211_rx_data *rx,
*/
link_sta = link_sta_info_get_bss(rx->sdata, hdr->addr2);
if (link_sta) {
- rx->sta = link_sta->sta;
- rx->link_id = link_sta->link_id;
+ sta = link_sta->sta;
+ link_id = link_sta->link_id;
} else {
struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
- rx->sta = sta_info_get_bss(rx->sdata, hdr->addr2);
- if (rx->sta) {
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&rx->sta->sta,
- status->link_id))
- return false;
-
- rx->link_id = status->link_valid ? status->link_id : -1;
- } else {
- rx->link_id = -1;
- }
+ sta = sta_info_get_bss(rx->sdata, hdr->addr2);
+ if (status->link_valid)
+ link_id = status->link_id;
}
+ if (!ieee80211_rx_data_set_sta(rx, sta, link_id))
+ return false;
+
return ieee80211_prepare_and_rx_handle(rx, skb, consume);
}
@@ -5015,19 +5041,16 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (ieee80211_is_data(fc)) {
struct sta_info *sta, *prev_sta;
- u8 link_id = status->link_id;
+ int link_id = -1;
- if (pubsta) {
- rx.sta = container_of(pubsta, struct sta_info, sta);
- rx.sdata = rx.sta->sdata;
+ if (status->link_valid)
+ link_id = status->link_id;
- if (status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(pubsta, link_id))
+ if (pubsta) {
+ sta = container_of(pubsta, struct sta_info, sta);
+ if (!ieee80211_rx_data_set_sta(&rx, sta, link_id))
goto out;
- if (status->link_valid)
- rx.link_id = status->link_id;
-
/*
* In MLO connection, fetch the link_id using addr2
* when the driver does not pass link_id in status.
@@ -5045,7 +5068,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
if (!link_sta)
goto out;
- rx.link_id = link_sta->link_id;
+ ieee80211_rx_data_set_link(&rx, link_sta->link_id);
}
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
@@ -5061,30 +5084,25 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw,
continue;
}
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, prev_sta, link_id))
+ goto out;
+
+ if (!status->link_valid && prev_sta->sta.mlo)
continue;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
ieee80211_prepare_and_rx_handle(&rx, skb, false);
prev_sta = sta;
}
if (prev_sta) {
- if ((status->link_valid &&
- !ieee80211_rx_is_valid_sta_link_id(&prev_sta->sta,
- link_id)) ||
- (!status->link_valid && prev_sta->sta.mlo))
+ rx.sdata = prev_sta->sdata;
+ if (!ieee80211_rx_data_set_sta(&rx, prev_sta, link_id))
goto out;
- rx.link_id = status->link_valid ? link_id : -1;
- rx.sta = prev_sta;
- rx.sdata = prev_sta->sdata;
+ if (!status->link_valid && prev_sta->sta.mlo)
+ goto out;
if (ieee80211_prepare_and_rx_handle(&rx, skb, true))
return;
@@ -5215,6 +5233,15 @@ void ieee80211_rx_list(struct ieee80211_hw *hw, struct ieee80211_sta *pubsta,
status->rate_idx, status->nss))
goto drop;
break;
+ case RX_ENC_EHT:
+ if (WARN_ONCE(status->rate_idx > 15 ||
+ !status->nss ||
+ status->nss > 8 ||
+ status->eht.gi > NL80211_RATE_INFO_EHT_GI_3_2,
+ "Rate marked as an EHT rate but data is invalid: MCS:%d, NSS:%d, GI:%d\n",
+ status->rate_idx, status->nss, status->eht.gi))
+ goto drop;
+ break;
default:
WARN_ON_ONCE(1);
fallthrough;
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index 04e0f132b1d9..7d68dbc872d7 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -4,7 +4,7 @@
* Copyright 2006-2007 Jiri Benc <jbenc@suse.cz>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2021 Intel Corporation
+ * Copyright (C) 2018-2022 Intel Corporation
*/
#include <linux/module.h>
@@ -594,6 +594,9 @@ __sta_info_alloc(struct ieee80211_sub_if_data *sdata,
sta->sta_state = IEEE80211_STA_NONE;
+ if (sdata->vif.type == NL80211_IFTYPE_MESH_POINT)
+ sta->amsdu_mesh_control = -1;
+
/* Mark TID as unreserved */
sta->reserved_tid = IEEE80211_TID_UNRESERVED;
@@ -2406,12 +2409,19 @@ static void sta_stats_decode_rate(struct ieee80211_local *local, u32 rate,
rinfo->he_ru_alloc = STA_STATS_GET(HE_RU, rate);
rinfo->he_dcm = STA_STATS_GET(HE_DCM, rate);
break;
+ case STA_STATS_RATE_TYPE_EHT:
+ rinfo->flags = RATE_INFO_FLAGS_EHT_MCS;
+ rinfo->mcs = STA_STATS_GET(EHT_MCS, rate);
+ rinfo->nss = STA_STATS_GET(EHT_NSS, rate);
+ rinfo->eht_gi = STA_STATS_GET(EHT_GI, rate);
+ rinfo->eht_ru_alloc = STA_STATS_GET(EHT_RU, rate);
+ break;
}
}
static int sta_set_rate_info_rx(struct sta_info *sta, struct rate_info *rinfo)
{
- u16 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
+ u32 rate = READ_ONCE(sta_get_last_rx_stats(sta)->last_rate);
if (rate == STA_STATS_RATE_INVALID)
return -EINVAL;
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 69820b551668..e8e482a82d77 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -622,6 +622,8 @@ struct link_sta_info {
* taken from HT/VHT capabilities or VHT operating mode notification
* @cparams: CoDel parameters for this station.
* @reserved_tid: reserved TID (if any, otherwise IEEE80211_TID_UNRESERVED)
+ * @amsdu_mesh_control: track the mesh A-MSDU format used by the peer
+ * (-1: not yet known, 0: non-standard [without mesh header], 1: standard)
* @fast_tx: TX fastpath information
* @fast_rx: RX fastpath information
* @tdls_chandef: a TDLS peer can have a wider chandef that is compatible to
@@ -707,6 +709,7 @@ struct sta_info {
struct codel_params cparams;
u8 reserved_tid;
+ s8 amsdu_mesh_control;
struct cfg80211_chan_def tdls_chandef;
@@ -936,6 +939,7 @@ enum sta_stats_type {
STA_STATS_RATE_TYPE_VHT,
STA_STATS_RATE_TYPE_HE,
STA_STATS_RATE_TYPE_S1G,
+ STA_STATS_RATE_TYPE_EHT,
};
#define STA_STATS_FIELD_HT_MCS GENMASK( 7, 0)
@@ -945,12 +949,16 @@ enum sta_stats_type {
#define STA_STATS_FIELD_VHT_NSS GENMASK( 7, 4)
#define STA_STATS_FIELD_HE_MCS GENMASK( 3, 0)
#define STA_STATS_FIELD_HE_NSS GENMASK( 7, 4)
-#define STA_STATS_FIELD_BW GENMASK(11, 8)
-#define STA_STATS_FIELD_SGI GENMASK(12, 12)
-#define STA_STATS_FIELD_TYPE GENMASK(15, 13)
-#define STA_STATS_FIELD_HE_RU GENMASK(18, 16)
-#define STA_STATS_FIELD_HE_GI GENMASK(20, 19)
-#define STA_STATS_FIELD_HE_DCM GENMASK(21, 21)
+#define STA_STATS_FIELD_EHT_MCS GENMASK( 3, 0)
+#define STA_STATS_FIELD_EHT_NSS GENMASK( 7, 4)
+#define STA_STATS_FIELD_BW GENMASK(12, 8)
+#define STA_STATS_FIELD_SGI GENMASK(13, 13)
+#define STA_STATS_FIELD_TYPE GENMASK(16, 14)
+#define STA_STATS_FIELD_HE_RU GENMASK(19, 17)
+#define STA_STATS_FIELD_HE_GI GENMASK(21, 20)
+#define STA_STATS_FIELD_HE_DCM GENMASK(22, 22)
+#define STA_STATS_FIELD_EHT_RU GENMASK(20, 17)
+#define STA_STATS_FIELD_EHT_GI GENMASK(22, 21)
#define STA_STATS_FIELD(_n, _v) FIELD_PREP(STA_STATS_FIELD_ ## _n, _v)
#define STA_STATS_GET(_n, _v) FIELD_GET(STA_STATS_FIELD_ ## _n, _v)
@@ -989,6 +997,13 @@ static inline u32 sta_stats_encode_rate(struct ieee80211_rx_status *s)
r |= STA_STATS_FIELD(HE_RU, s->he_ru);
r |= STA_STATS_FIELD(HE_DCM, s->he_dcm);
break;
+ case RX_ENC_EHT:
+ r |= STA_STATS_FIELD(TYPE, STA_STATS_RATE_TYPE_EHT);
+ r |= STA_STATS_FIELD(EHT_NSS, s->nss);
+ r |= STA_STATS_FIELD(EHT_MCS, s->rate_idx);
+ r |= STA_STATS_FIELD(EHT_GI, s->eht.gi);
+ r |= STA_STATS_FIELD(EHT_RU, s->eht.ru);
+ break;
default:
WARN_ON(1);
return STA_STATS_RATE_INVALID;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 2171cd1ca807..7699fb410670 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -1129,7 +1129,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
struct sk_buff *purge_skb = NULL;
if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else if (test_bit(HT_AGG_STATE_WANT_START, &tid_tx->state)) {
/*
@@ -1161,7 +1160,6 @@ static bool ieee80211_tx_prep_agg(struct ieee80211_tx_data *tx,
if (!tid_tx) {
/* do nothing, let packet pass through */
} else if (test_bit(HT_AGG_STATE_OPERATIONAL, &tid_tx->state)) {
- info->flags |= IEEE80211_TX_CTL_AMPDU;
reset_agg_timer = true;
} else {
queued = true;
@@ -3677,8 +3675,7 @@ static void __ieee80211_xmit_fast(struct ieee80211_sub_if_data *sdata,
info->band = fast_tx->band;
info->control.vif = &sdata->vif;
info->flags = IEEE80211_TX_CTL_FIRST_FRAGMENT |
- IEEE80211_TX_CTL_DONTFRAG |
- (ampdu ? IEEE80211_TX_CTL_AMPDU : 0);
+ IEEE80211_TX_CTL_DONTFRAG;
info->control.flags = IEEE80211_TX_CTRL_FAST_XMIT |
u32_encode_bits(IEEE80211_LINK_UNSPECIFIED,
IEEE80211_TX_CTRL_MLO_LINK);
@@ -3783,6 +3780,8 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
struct ieee80211_tx_data tx;
ieee80211_tx_result r;
struct ieee80211_vif *vif = txq->vif;
+ int q = vif->hw_queue[txq->ac];
+ bool q_stopped;
WARN_ON_ONCE(softirq_count() == 0);
@@ -3790,17 +3789,18 @@ struct sk_buff *ieee80211_tx_dequeue(struct ieee80211_hw *hw,
return NULL;
begin:
- spin_lock_bh(&fq->lock);
-
- if (test_bit(IEEE80211_TXQ_STOP, &txqi->flags) ||
- test_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags))
- goto out;
+ spin_lock(&local->queue_stop_reason_lock);
+ q_stopped = local->queue_stop_reasons[q];
+ spin_unlock(&local->queue_stop_reason_lock);
- if (vif->txqs_stopped[txq->ac]) {
- set_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags);
- goto out;
+ if (unlikely(q_stopped)) {
+ /* mark for waking later */
+ set_bit(IEEE80211_TXQ_DIRTY, &txqi->flags);
+ return NULL;
}
+ spin_lock_bh(&fq->lock);
+
/* Make sure fragments stay together. */
skb = __skb_dequeue(&txqi->frags);
if (unlikely(skb)) {
@@ -3810,6 +3810,9 @@ begin:
IEEE80211_SKB_CB(skb)->control.flags &=
~IEEE80211_TX_INTCFL_NEED_TXPROCESSING;
} else {
+ if (unlikely(test_bit(IEEE80211_TXQ_STOP, &txqi->flags)))
+ goto out;
+
skb = fq_tin_dequeue(fq, tin, fq_tin_dequeue_func);
}
@@ -3860,9 +3863,8 @@ begin:
}
if (test_bit(IEEE80211_TXQ_AMPDU, &txqi->flags))
- info->flags |= IEEE80211_TX_CTL_AMPDU;
- else
- info->flags &= ~IEEE80211_TX_CTL_AMPDU;
+ info->flags |= (IEEE80211_TX_CTL_AMPDU |
+ IEEE80211_TX_CTL_DONTFRAG);
if (info->flags & IEEE80211_TX_CTL_HW_80211_ENCAP) {
if (!ieee80211_hw_check(&local->hw, HAS_RATE_CONTROL)) {
@@ -4432,7 +4434,7 @@ static void ieee80211_mlo_multicast_tx(struct net_device *dev,
u32 ctrl_flags = IEEE80211_TX_CTRL_MCAST_MLO_FIRST_TX;
if (hweight16(links) == 1) {
- ctrl_flags |= u32_encode_bits(ffs(links) - 1,
+ ctrl_flags |= u32_encode_bits(__ffs(links),
IEEE80211_TX_CTRL_MLO_LINK);
__ieee80211_subif_start_xmit(skb, sdata->dev, 0, ctrl_flags,
@@ -4596,8 +4598,6 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
info = IEEE80211_SKB_CB(skb);
memset(info, 0, sizeof(*info));
- if (tid_tx)
- info->flags |= IEEE80211_TX_CTL_AMPDU;
info->hw_queue = sdata->vif.hw_queue[queue];
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 6f5407038459..1a28fe5cb614 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -292,22 +292,12 @@ static void wake_tx_push_queue(struct ieee80211_local *local,
struct ieee80211_sub_if_data *sdata,
struct ieee80211_txq *queue)
{
- int q = sdata->vif.hw_queue[queue->ac];
struct ieee80211_tx_control control = {
.sta = queue->sta,
};
struct sk_buff *skb;
- unsigned long flags;
- bool q_stopped;
while (1) {
- spin_lock_irqsave(&local->queue_stop_reason_lock, flags);
- q_stopped = local->queue_stop_reasons[q];
- spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags);
-
- if (q_stopped)
- break;
-
skb = ieee80211_tx_dequeue(&local->hw, queue);
if (!skb)
break;
@@ -347,8 +337,6 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
local_bh_disable();
spin_lock(&fq->lock);
- sdata->vif.txqs_stopped[ac] = false;
-
if (!test_bit(SDATA_STATE_RUNNING, &sdata->state))
goto out;
@@ -370,7 +358,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
if (ac != txq->ac)
continue;
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX,
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY,
&txqi->flags))
continue;
@@ -385,7 +373,7 @@ static void __ieee80211_wake_txqs(struct ieee80211_sub_if_data *sdata, int ac)
txqi = to_txq_info(vif->txq);
- if (!test_and_clear_bit(IEEE80211_TXQ_STOP_NETIF_TX, &txqi->flags) ||
+ if (!test_and_clear_bit(IEEE80211_TXQ_DIRTY, &txqi->flags) ||
(ps && atomic_read(&ps->num_sta_ps)) || ac != vif->txq->ac)
goto out;
@@ -517,8 +505,6 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
bool refcounted)
{
struct ieee80211_local *local = hw_to_local(hw);
- struct ieee80211_sub_if_data *sdata;
- int n_acs = IEEE80211_NUM_ACS;
trace_stop_queue(local, queue, reason);
@@ -530,29 +516,7 @@ static void __ieee80211_stop_queue(struct ieee80211_hw *hw, int queue,
else
local->q_stop_reasons[queue][reason]++;
- if (__test_and_set_bit(reason, &local->queue_stop_reasons[queue]))
- return;
-
- if (local->hw.queues < IEEE80211_NUM_ACS)
- n_acs = 1;
-
- rcu_read_lock();
- list_for_each_entry_rcu(sdata, &local->interfaces, list) {
- int ac;
-
- if (!sdata->dev)
- continue;
-
- for (ac = 0; ac < n_acs; ac++) {
- if (sdata->vif.hw_queue[ac] == queue ||
- sdata->vif.cab_queue == queue) {
- spin_lock(&local->fq.lock);
- sdata->vif.txqs_stopped[ac] = true;
- spin_unlock(&local->fq.lock);
- }
- }
- }
- rcu_read_unlock();
+ set_bit(reason, &local->queue_stop_reasons[queue]);
}
void ieee80211_stop_queue_by_reason(struct ieee80211_hw *hw, int queue,
@@ -868,19 +832,6 @@ static void __iterate_stations(struct ieee80211_local *local,
}
}
-void ieee80211_iterate_stations(struct ieee80211_hw *hw,
- void (*iterator)(void *data,
- struct ieee80211_sta *sta),
- void *data)
-{
- struct ieee80211_local *local = hw_to_local(hw);
-
- mutex_lock(&local->sta_mtx);
- __iterate_stations(local, iterator, data);
- mutex_unlock(&local->sta_mtx);
-}
-EXPORT_SYMBOL_GPL(ieee80211_iterate_stations);
-
void ieee80211_iterate_stations_atomic(struct ieee80211_hw *hw,
void (*iterator)(void *data,
struct ieee80211_sta *sta),
@@ -4069,6 +4020,19 @@ u64 ieee80211_calculate_rx_timestamp(struct ieee80211_local *local,
/* Fill cfg80211 rate info */
switch (status->encoding) {
+ case RX_ENC_EHT:
+ ri.flags |= RATE_INFO_FLAGS_EHT_MCS;
+ ri.mcs = status->rate_idx;
+ ri.nss = status->nss;
+ ri.eht_ru_alloc = status->eht.ru;
+ if (status->enc_flags & RX_ENC_FLAG_SHORT_GI)
+ ri.flags |= RATE_INFO_FLAGS_SHORT_GI;
+ /* TODO/FIXME: is this right? handle other PPDUs */
+ if (status->flag & RX_FLAG_MACTIME_PLCP_START) {
+ mpdu_offset += 2;
+ ts += 36;
+ }
+ break;
case RX_ENC_HE:
ri.flags |= RATE_INFO_FLAGS_HE_MCS;
ri.mcs = status->rate_idx;
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 803de5881485..c1250aa47808 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -625,7 +625,7 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
enum ieee80211_sta_rx_bandwidth new_bw;
struct sta_opmode_info sta_opmode = {};
u32 changed = 0;
- u8 nss;
+ u8 nss, cur_nss;
/* ignore - no support for BF yet */
if (opmode & IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF)
@@ -636,10 +636,25 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
nss += 1;
if (link_sta->pub->rx_nss != nss) {
- link_sta->pub->rx_nss = nss;
- sta_opmode.rx_nss = nss;
- changed |= IEEE80211_RC_NSS_CHANGED;
- sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
+ cur_nss = link_sta->pub->rx_nss;
+ /* Reset rx_nss and call ieee80211_sta_set_rx_nss() which
+ * will set the same to max nss value calculated based on capability.
+ */
+ link_sta->pub->rx_nss = 0;
+ ieee80211_sta_set_rx_nss(link_sta);
+ /* Do not allow an nss change to rx_nss greater than max_nss
+ * negotiated and capped to APs capability during association.
+ */
+ if (nss <= link_sta->pub->rx_nss) {
+ link_sta->pub->rx_nss = nss;
+ sta_opmode.rx_nss = nss;
+ changed |= IEEE80211_RC_NSS_CHANGED;
+ sta_opmode.changed |= STA_OPMODE_N_SS_CHANGED;
+ } else {
+ link_sta->pub->rx_nss = cur_nss;
+ pr_warn_ratelimited("Ignoring NSS change in VHT Operating Mode Notification from %pM with invalid nss %d",
+ link_sta->pub->addr, nss);
+ }
}
switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
diff --git a/net/mac802154/Makefile b/net/mac802154/Makefile
index 4059295fdbf8..43d1347b37ee 100644
--- a/net/mac802154/Makefile
+++ b/net/mac802154/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
obj-$(CONFIG_MAC802154) += mac802154.o
mac802154-objs := main.o rx.o tx.o mac_cmd.o mib.o \
- iface.o llsec.o util.o cfg.o trace.o
+ iface.o llsec.o util.o cfg.o scan.o trace.o
CFLAGS_trace.o := -I$(src)
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index dc2d918fac68..5c3cb019f751 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -114,11 +114,15 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, u8 page, u8 channel)
wpan_phy->current_channel == channel)
return 0;
+ /* Refuse to change channels during scanning or beaconing */
+ if (mac802154_is_scanning(local) || mac802154_is_beaconing(local))
+ return -EBUSY;
+
ret = drv_set_channel(local, page, channel);
if (!ret) {
wpan_phy->current_page = page;
wpan_phy->current_channel = channel;
- ieee802154_configure_durations(wpan_phy);
+ ieee802154_configure_durations(wpan_phy, page, channel);
}
return ret;
@@ -261,6 +265,56 @@ ieee802154_set_ackreq_default(struct wpan_phy *wpan_phy,
return 0;
}
+static int mac802154_trigger_scan(struct wpan_phy *wpan_phy,
+ struct cfg802154_scan_request *request)
+{
+ struct ieee802154_sub_if_data *sdata;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(request->wpan_dev);
+
+ ASSERT_RTNL();
+
+ return mac802154_trigger_scan_locked(sdata, request);
+}
+
+static int mac802154_abort_scan(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ struct ieee802154_sub_if_data *sdata;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ ASSERT_RTNL();
+
+ return mac802154_abort_scan_locked(local, sdata);
+}
+
+static int mac802154_send_beacons(struct wpan_phy *wpan_phy,
+ struct cfg802154_beacon_request *request)
+{
+ struct ieee802154_sub_if_data *sdata;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(request->wpan_dev);
+
+ ASSERT_RTNL();
+
+ return mac802154_send_beacons_locked(sdata, request);
+}
+
+static int mac802154_stop_beacons(struct wpan_phy *wpan_phy,
+ struct wpan_dev *wpan_dev)
+{
+ struct ieee802154_local *local = wpan_phy_priv(wpan_phy);
+ struct ieee802154_sub_if_data *sdata;
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(wpan_dev);
+
+ ASSERT_RTNL();
+
+ return mac802154_stop_beacons_locked(local, sdata);
+}
+
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
static void
ieee802154_get_llsec_table(struct wpan_phy *wpan_phy,
@@ -468,6 +522,10 @@ const struct cfg802154_ops mac802154_config_ops = {
.set_max_frame_retries = ieee802154_set_max_frame_retries,
.set_lbt_mode = ieee802154_set_lbt_mode,
.set_ackreq_default = ieee802154_set_ackreq_default,
+ .trigger_scan = mac802154_trigger_scan,
+ .abort_scan = mac802154_abort_scan,
+ .send_beacons = mac802154_send_beacons,
+ .stop_beacons = mac802154_stop_beacons,
#ifdef CONFIG_IEEE802154_NL802154_EXPERIMENTAL
.get_llsec_table = ieee802154_get_llsec_table,
.lock_llsec_table = ieee802154_lock_llsec_table,
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 509e0172fe82..63bab99ed368 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -21,6 +21,11 @@
#include "llsec.h"
+enum ieee802154_ongoing {
+ IEEE802154_IS_SCANNING = BIT(0),
+ IEEE802154_IS_BEACONING = BIT(1),
+};
+
/* mac802154 device private data */
struct ieee802154_local {
struct ieee802154_hw hw;
@@ -43,15 +48,32 @@ struct ieee802154_local {
struct list_head interfaces;
struct mutex iflist_mtx;
- /* This one is used for scanning and other jobs not to be interfered
- * with serial driver.
- */
+ /* Data related workqueue */
struct workqueue_struct *workqueue;
+ /* MAC commands related workqueue */
+ struct workqueue_struct *mac_wq;
struct hrtimer ifs_timer;
+ /* Scanning */
+ u8 scan_page;
+ u8 scan_channel;
+ struct cfg802154_scan_request __rcu *scan_req;
+ struct delayed_work scan_work;
+
+ /* Beaconing */
+ unsigned int beacon_interval;
+ struct ieee802154_beacon_frame beacon;
+ struct cfg802154_beacon_request __rcu *beacon_req;
+ struct delayed_work beacon_work;
+
+ /* Asynchronous tasks */
+ struct list_head rx_beacon_list;
+ struct work_struct rx_beacon_work;
+
bool started;
bool suspended;
+ unsigned long ongoing;
struct tasklet_struct tasklet;
struct sk_buff_head skb_queue;
@@ -141,10 +163,16 @@ int ieee802154_mlme_op_pre(struct ieee802154_local *local);
int ieee802154_mlme_tx(struct ieee802154_local *local,
struct ieee802154_sub_if_data *sdata,
struct sk_buff *skb);
+int ieee802154_mlme_tx_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
void ieee802154_mlme_op_post(struct ieee802154_local *local);
int ieee802154_mlme_tx_one(struct ieee802154_local *local,
struct ieee802154_sub_if_data *sdata,
struct sk_buff *skb);
+int ieee802154_mlme_tx_one_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb);
netdev_tx_t
ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
netdev_tx_t
@@ -220,6 +248,33 @@ void mac802154_unlock_table(struct net_device *dev);
int mac802154_wpan_update_llsec(struct net_device *dev);
+/* PAN management handling */
+void mac802154_scan_worker(struct work_struct *work);
+int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata,
+ struct cfg802154_scan_request *request);
+int mac802154_abort_scan_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata);
+int mac802154_process_beacon(struct ieee802154_local *local,
+ struct sk_buff *skb,
+ u8 page, u8 channel);
+void mac802154_rx_beacon_worker(struct work_struct *work);
+
+static inline bool mac802154_is_scanning(struct ieee802154_local *local)
+{
+ return test_bit(IEEE802154_IS_SCANNING, &local->ongoing);
+}
+
+void mac802154_beacon_worker(struct work_struct *work);
+int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
+ struct cfg802154_beacon_request *request);
+int mac802154_stop_beacons_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata);
+
+static inline bool mac802154_is_beaconing(struct ieee802154_local *local)
+{
+ return test_bit(IEEE802154_IS_BEACONING, &local->ongoing);
+}
+
/* interface handling */
int ieee802154_iface_init(void);
void ieee802154_iface_exit(void);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index ac0b28025fb0..c0e2da5072be 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -302,6 +302,12 @@ static int mac802154_slave_close(struct net_device *dev)
ASSERT_RTNL();
+ if (mac802154_is_scanning(local))
+ mac802154_abort_scan_locked(local, sdata);
+
+ if (mac802154_is_beaconing(local))
+ mac802154_stop_beacons_locked(local, sdata);
+
netif_stop_queue(dev);
local->open_count--;
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 55550ead2ced..8d2eabc71bbe 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -707,7 +707,10 @@ int mac802154_llsec_encrypt(struct mac802154_llsec *sec, struct sk_buff *skb)
hlen = ieee802154_hdr_pull(skb, &hdr);
- if (hlen < 0 || hdr.fc.type != IEEE802154_FC_TYPE_DATA)
+ /* TODO: control frames security support */
+ if (hlen < 0 ||
+ (hdr.fc.type != IEEE802154_FC_TYPE_DATA &&
+ hdr.fc.type != IEEE802154_FC_TYPE_BEACON))
return -EINVAL;
if (!hdr.fc.security_enabled ||
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 3ed31daf7b9c..ee23e234b998 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -89,6 +89,7 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
local->ops = ops;
INIT_LIST_HEAD(&local->interfaces);
+ INIT_LIST_HEAD(&local->rx_beacon_list);
mutex_init(&local->iflist_mtx);
tasklet_setup(&local->tasklet, ieee802154_tasklet_handler);
@@ -96,6 +97,9 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
skb_queue_head_init(&local->skb_queue);
INIT_WORK(&local->sync_tx_work, ieee802154_xmit_sync_worker);
+ INIT_DELAYED_WORK(&local->scan_work, mac802154_scan_worker);
+ INIT_WORK(&local->rx_beacon_work, mac802154_rx_beacon_worker);
+ INIT_DELAYED_WORK(&local->beacon_work, mac802154_beacon_worker);
/* init supported flags with 802.15.4 default ranges */
phy->supported.max_minbe = 8;
@@ -113,32 +117,33 @@ ieee802154_alloc_hw(size_t priv_data_len, const struct ieee802154_ops *ops)
}
EXPORT_SYMBOL(ieee802154_alloc_hw);
-void ieee802154_configure_durations(struct wpan_phy *phy)
+void ieee802154_configure_durations(struct wpan_phy *phy,
+ unsigned int page, unsigned int channel)
{
u32 duration = 0;
- switch (phy->current_page) {
+ switch (page) {
case 0:
- if (BIT(phy->current_channel) & 0x1)
+ if (BIT(channel) & 0x1)
/* 868 MHz BPSK 802.15.4-2003: 20 ksym/s */
duration = 50 * NSEC_PER_USEC;
- else if (BIT(phy->current_channel) & 0x7FE)
+ else if (BIT(channel) & 0x7FE)
/* 915 MHz BPSK 802.15.4-2003: 40 ksym/s */
duration = 25 * NSEC_PER_USEC;
- else if (BIT(phy->current_channel) & 0x7FFF800)
+ else if (BIT(channel) & 0x7FFF800)
/* 2400 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
duration = 16 * NSEC_PER_USEC;
break;
case 2:
- if (BIT(phy->current_channel) & 0x1)
+ if (BIT(channel) & 0x1)
/* 868 MHz O-QPSK 802.15.4-2006: 25 ksym/s */
duration = 40 * NSEC_PER_USEC;
- else if (BIT(phy->current_channel) & 0x7FE)
+ else if (BIT(channel) & 0x7FE)
/* 915 MHz O-QPSK 802.15.4-2006: 62.5 ksym/s */
duration = 16 * NSEC_PER_USEC;
break;
case 3:
- if (BIT(phy->current_channel) & 0x3FFF)
+ if (BIT(channel) & 0x3FFF)
/* 2.4 GHz CSS 802.15.4a-2007: 1/6 Msym/s */
duration = 6 * NSEC_PER_USEC;
break;
@@ -184,6 +189,7 @@ static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
int ieee802154_register_hw(struct ieee802154_hw *hw)
{
struct ieee802154_local *local = hw_to_local(hw);
+ char mac_wq_name[IFNAMSIZ + 10] = {};
struct net_device *dev;
int rc = -ENOSYS;
@@ -194,6 +200,13 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
goto out;
}
+ snprintf(mac_wq_name, IFNAMSIZ + 10, "%s-mac-cmds", wpan_phy_name(local->phy));
+ local->mac_wq = create_singlethread_workqueue(mac_wq_name);
+ if (!local->mac_wq) {
+ rc = -ENOMEM;
+ goto out_wq;
+ }
+
hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
local->ifs_timer.function = ieee802154_xmit_ifs_timer;
@@ -201,7 +214,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
ieee802154_setup_wpan_phy_pib(local->phy);
- ieee802154_configure_durations(local->phy);
+ ieee802154_configure_durations(local->phy, local->phy->current_page,
+ local->phy->current_channel);
if (!(hw->flags & IEEE802154_HW_CSMA_PARAMS)) {
local->phy->supported.min_csma_backoffs = 4;
@@ -222,7 +236,7 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
rc = wpan_phy_register(local->phy);
if (rc < 0)
- goto out_wq;
+ goto out_mac_wq;
rtnl_lock();
@@ -241,6 +255,8 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
out_phy:
wpan_phy_unregister(local->phy);
+out_mac_wq:
+ destroy_workqueue(local->mac_wq);
out_wq:
destroy_workqueue(local->workqueue);
out:
@@ -261,6 +277,7 @@ void ieee802154_unregister_hw(struct ieee802154_hw *hw)
rtnl_unlock();
+ destroy_workqueue(local->mac_wq);
destroy_workqueue(local->workqueue);
wpan_phy_unregister(local->phy);
}
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index c2aae2a6d6a6..da0628ee3c89 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -29,12 +29,31 @@ static int ieee802154_deliver_skb(struct sk_buff *skb)
return netif_receive_skb(skb);
}
+void mac802154_rx_beacon_worker(struct work_struct *work)
+{
+ struct ieee802154_local *local =
+ container_of(work, struct ieee802154_local, rx_beacon_work);
+ struct cfg802154_mac_pkt *mac_pkt;
+
+ mac_pkt = list_first_entry_or_null(&local->rx_beacon_list,
+ struct cfg802154_mac_pkt, node);
+ if (!mac_pkt)
+ return;
+
+ mac802154_process_beacon(local, mac_pkt->skb, mac_pkt->page, mac_pkt->channel);
+
+ list_del(&mac_pkt->node);
+ kfree_skb(mac_pkt->skb);
+ kfree(mac_pkt);
+}
+
static int
ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
struct sk_buff *skb, const struct ieee802154_hdr *hdr)
{
- struct wpan_dev *wpan_dev = &sdata->wpan_dev;
struct wpan_phy *wpan_phy = sdata->local->hw.phy;
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct cfg802154_mac_pkt *mac_pkt;
__le16 span, sshort;
int rc;
@@ -106,6 +125,21 @@ ieee802154_subif_frame(struct ieee802154_sub_if_data *sdata,
switch (mac_cb(skb)->type) {
case IEEE802154_FC_TYPE_BEACON:
+ dev_dbg(&sdata->dev->dev, "BEACON received\n");
+ if (!mac802154_is_scanning(sdata->local))
+ goto fail;
+
+ mac_pkt = kzalloc(sizeof(*mac_pkt), GFP_ATOMIC);
+ if (!mac_pkt)
+ goto fail;
+
+ mac_pkt->skb = skb_get(skb);
+ mac_pkt->sdata = sdata;
+ mac_pkt->page = sdata->local->scan_page;
+ mac_pkt->channel = sdata->local->scan_channel;
+ list_add_tail(&mac_pkt->node, &sdata->local->rx_beacon_list);
+ queue_work(sdata->local->mac_wq, &sdata->local->rx_beacon_work);
+ return NET_RX_SUCCESS;
case IEEE802154_FC_TYPE_ACK:
case IEEE802154_FC_TYPE_MAC_CMD:
goto fail;
@@ -213,7 +247,6 @@ __ieee802154_rx_handle_packet(struct ieee802154_local *local,
ret = ieee802154_parse_frame_start(skb, &hdr);
if (ret) {
pr_debug("got invalid frame\n");
- kfree_skb(skb);
return;
}
diff --git a/net/mac802154/scan.c b/net/mac802154/scan.c
new file mode 100644
index 000000000000..9b0933a185eb
--- /dev/null
+++ b/net/mac802154/scan.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * IEEE 802.15.4 scanning management
+ *
+ * Copyright (C) 2021 Qorvo US, Inc
+ * Authors:
+ * - David Girault <david.girault@qorvo.com>
+ * - Miquel Raynal <miquel.raynal@bootlin.com>
+ */
+
+#include <linux/module.h>
+#include <linux/rtnetlink.h>
+#include <net/mac802154.h>
+
+#include "ieee802154_i.h"
+#include "driver-ops.h"
+#include "../ieee802154/nl802154.h"
+
+#define IEEE802154_BEACON_MHR_SZ 13
+#define IEEE802154_BEACON_PL_SZ 4
+#define IEEE802154_BEACON_SKB_SZ (IEEE802154_BEACON_MHR_SZ + \
+ IEEE802154_BEACON_PL_SZ)
+
+/* mac802154_scan_cleanup_locked() must be called upon scan completion or abort.
+ * - Completions are asynchronous, not locked by the rtnl and decided by the
+ * scan worker.
+ * - Aborts are decided by userspace, and locked by the rtnl.
+ *
+ * Concurrent modifications to the PHY, the interfaces or the hardware is in
+ * general prevented by the rtnl. So in most cases we don't need additional
+ * protection.
+ *
+ * However, the scan worker get's triggered without anybody noticing and thus we
+ * must ensure the presence of the devices as well as data consistency:
+ * - The sub-interface and device driver module get both their reference
+ * counters incremented whenever we start a scan, so they cannot disappear
+ * during operation.
+ * - Data consistency is achieved by the use of rcu protected pointers.
+ */
+static int mac802154_scan_cleanup_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ bool aborted)
+{
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct wpan_phy *wpan_phy = local->phy;
+ struct cfg802154_scan_request *request;
+ u8 arg;
+
+ /* Prevent any further use of the scan request */
+ clear_bit(IEEE802154_IS_SCANNING, &local->ongoing);
+ cancel_delayed_work(&local->scan_work);
+ request = rcu_replace_pointer(local->scan_req, NULL, 1);
+ if (!request)
+ return 0;
+ kfree_rcu(request);
+
+ /* Advertize first, while we know the devices cannot be removed */
+ if (aborted)
+ arg = NL802154_SCAN_DONE_REASON_ABORTED;
+ else
+ arg = NL802154_SCAN_DONE_REASON_FINISHED;
+ nl802154_scan_done(wpan_phy, wpan_dev, arg);
+
+ /* Cleanup software stack */
+ ieee802154_mlme_op_post(local);
+
+ /* Set the hardware back in its original state */
+ drv_set_channel(local, wpan_phy->current_page,
+ wpan_phy->current_channel);
+ ieee802154_configure_durations(wpan_phy, wpan_phy->current_page,
+ wpan_phy->current_channel);
+ drv_stop(local);
+ synchronize_net();
+ sdata->required_filtering = sdata->iface_default_filtering;
+ drv_start(local, sdata->required_filtering, &local->addr_filt);
+
+ return 0;
+}
+
+int mac802154_abort_scan_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata)
+{
+ ASSERT_RTNL();
+
+ if (!mac802154_is_scanning(local))
+ return -ESRCH;
+
+ return mac802154_scan_cleanup_locked(local, sdata, true);
+}
+
+static unsigned int mac802154_scan_get_channel_time(u8 duration_order,
+ u8 symbol_duration)
+{
+ u64 base_super_frame_duration = (u64)symbol_duration *
+ IEEE802154_SUPERFRAME_PERIOD * IEEE802154_SLOT_PERIOD;
+
+ return usecs_to_jiffies(base_super_frame_duration *
+ (BIT(duration_order) + 1));
+}
+
+static void mac802154_flush_queued_beacons(struct ieee802154_local *local)
+{
+ struct cfg802154_mac_pkt *mac_pkt, *tmp;
+
+ list_for_each_entry_safe(mac_pkt, tmp, &local->rx_beacon_list, node) {
+ list_del(&mac_pkt->node);
+ kfree_skb(mac_pkt->skb);
+ kfree(mac_pkt);
+ }
+}
+
+static void
+mac802154_scan_get_next_channel(struct ieee802154_local *local,
+ struct cfg802154_scan_request *scan_req,
+ u8 *channel)
+{
+ (*channel)++;
+ *channel = find_next_bit((const unsigned long *)&scan_req->channels,
+ IEEE802154_MAX_CHANNEL + 1,
+ *channel);
+}
+
+static int mac802154_scan_find_next_chan(struct ieee802154_local *local,
+ struct cfg802154_scan_request *scan_req,
+ u8 page, u8 *channel)
+{
+ mac802154_scan_get_next_channel(local, scan_req, channel);
+ if (*channel > IEEE802154_MAX_CHANNEL)
+ return -EINVAL;
+
+ return 0;
+}
+
+void mac802154_scan_worker(struct work_struct *work)
+{
+ struct ieee802154_local *local =
+ container_of(work, struct ieee802154_local, scan_work.work);
+ struct cfg802154_scan_request *scan_req;
+ struct ieee802154_sub_if_data *sdata;
+ unsigned int scan_duration = 0;
+ struct wpan_phy *wpan_phy;
+ u8 scan_req_duration;
+ u8 page, channel;
+ int ret;
+
+ /* Ensure the device receiver is turned off when changing channels
+ * because there is no atomic way to change the channel and know on
+ * which one a beacon might have been received.
+ */
+ drv_stop(local);
+ synchronize_net();
+ mac802154_flush_queued_beacons(local);
+
+ rcu_read_lock();
+ scan_req = rcu_dereference(local->scan_req);
+ if (unlikely(!scan_req)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(scan_req->wpan_dev);
+
+ /* Wait an arbitrary amount of time in case we cannot use the device */
+ if (local->suspended || !ieee802154_sdata_running(sdata)) {
+ rcu_read_unlock();
+ queue_delayed_work(local->mac_wq, &local->scan_work,
+ msecs_to_jiffies(1000));
+ return;
+ }
+
+ wpan_phy = scan_req->wpan_phy;
+ scan_req_duration = scan_req->duration;
+
+ /* Look for the next valid chan */
+ page = local->scan_page;
+ channel = local->scan_channel;
+ do {
+ ret = mac802154_scan_find_next_chan(local, scan_req, page, &channel);
+ if (ret) {
+ rcu_read_unlock();
+ goto end_scan;
+ }
+ } while (!ieee802154_chan_is_valid(scan_req->wpan_phy, page, channel));
+
+ rcu_read_unlock();
+
+ /* Bypass the stack on purpose when changing the channel */
+ rtnl_lock();
+ ret = drv_set_channel(local, page, channel);
+ rtnl_unlock();
+ if (ret) {
+ dev_err(&sdata->dev->dev,
+ "Channel change failure during scan, aborting (%d)\n", ret);
+ goto end_scan;
+ }
+
+ local->scan_page = page;
+ local->scan_channel = channel;
+
+ rtnl_lock();
+ ret = drv_start(local, IEEE802154_FILTERING_3_SCAN, &local->addr_filt);
+ rtnl_unlock();
+ if (ret) {
+ dev_err(&sdata->dev->dev,
+ "Restarting failure after channel change, aborting (%d)\n", ret);
+ goto end_scan;
+ }
+
+ ieee802154_configure_durations(wpan_phy, page, channel);
+ scan_duration = mac802154_scan_get_channel_time(scan_req_duration,
+ wpan_phy->symbol_duration);
+ dev_dbg(&sdata->dev->dev,
+ "Scan page %u channel %u for %ums\n",
+ page, channel, jiffies_to_msecs(scan_duration));
+ queue_delayed_work(local->mac_wq, &local->scan_work, scan_duration);
+ return;
+
+end_scan:
+ rtnl_lock();
+ mac802154_scan_cleanup_locked(local, sdata, false);
+ rtnl_unlock();
+}
+
+int mac802154_trigger_scan_locked(struct ieee802154_sub_if_data *sdata,
+ struct cfg802154_scan_request *request)
+{
+ struct ieee802154_local *local = sdata->local;
+
+ ASSERT_RTNL();
+
+ if (mac802154_is_scanning(local))
+ return -EBUSY;
+
+ /* TODO: support other scanning type */
+ if (request->type != NL802154_SCAN_PASSIVE)
+ return -EOPNOTSUPP;
+
+ /* Store scanning parameters */
+ rcu_assign_pointer(local->scan_req, request);
+
+ /* Software scanning requires to set promiscuous mode, so we need to
+ * pause the Tx queue during the entire operation.
+ */
+ ieee802154_mlme_op_pre(local);
+
+ sdata->required_filtering = IEEE802154_FILTERING_3_SCAN;
+ local->scan_page = request->page;
+ local->scan_channel = -1;
+ set_bit(IEEE802154_IS_SCANNING, &local->ongoing);
+
+ nl802154_scan_started(request->wpan_phy, request->wpan_dev);
+
+ queue_delayed_work(local->mac_wq, &local->scan_work, 0);
+
+ return 0;
+}
+
+int mac802154_process_beacon(struct ieee802154_local *local,
+ struct sk_buff *skb,
+ u8 page, u8 channel)
+{
+ struct ieee802154_beacon_hdr *bh = (void *)skb->data;
+ struct ieee802154_addr *src = &mac_cb(skb)->source;
+ struct cfg802154_scan_request *scan_req;
+ struct ieee802154_coord_desc desc;
+
+ if (skb->len != sizeof(*bh))
+ return -EINVAL;
+
+ if (unlikely(src->mode == IEEE802154_ADDR_NONE))
+ return -EINVAL;
+
+ dev_dbg(&skb->dev->dev,
+ "BEACON received on page %u channel %u\n",
+ page, channel);
+
+ memcpy(&desc.addr, src, sizeof(desc.addr));
+ desc.page = page;
+ desc.channel = channel;
+ desc.link_quality = mac_cb(skb)->lqi;
+ desc.superframe_spec = get_unaligned_le16(skb->data);
+ desc.gts_permit = bh->gts_permit;
+
+ trace_802154_scan_event(&desc);
+
+ rcu_read_lock();
+ scan_req = rcu_dereference(local->scan_req);
+ if (likely(scan_req))
+ nl802154_scan_event(scan_req->wpan_phy, scan_req->wpan_dev, &desc);
+ rcu_read_unlock();
+
+ return 0;
+}
+
+static int mac802154_transmit_beacon(struct ieee802154_local *local,
+ struct wpan_dev *wpan_dev)
+{
+ struct cfg802154_beacon_request *beacon_req;
+ struct ieee802154_sub_if_data *sdata;
+ struct sk_buff *skb;
+ int ret;
+
+ /* Update the sequence number */
+ local->beacon.mhr.seq = atomic_inc_return(&wpan_dev->bsn) & 0xFF;
+
+ skb = alloc_skb(IEEE802154_BEACON_SKB_SZ, GFP_KERNEL);
+ if (!skb)
+ return -ENOBUFS;
+
+ rcu_read_lock();
+ beacon_req = rcu_dereference(local->beacon_req);
+ if (unlikely(!beacon_req)) {
+ rcu_read_unlock();
+ kfree_skb(skb);
+ return -EINVAL;
+ }
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(beacon_req->wpan_dev);
+ skb->dev = sdata->dev;
+
+ rcu_read_unlock();
+
+ ret = ieee802154_beacon_push(skb, &local->beacon);
+ if (ret) {
+ kfree_skb(skb);
+ return ret;
+ }
+
+ /* Using the MLME transmission helper for sending beacons is a bit
+ * overkill because we do not really care about the final outcome.
+ *
+ * Even though, going through the whole net stack with a regular
+ * dev_queue_xmit() is not relevant either because we want beacons to be
+ * sent "now" rather than go through the whole net stack scheduling
+ * (qdisc & co).
+ *
+ * Finally, using ieee802154_subif_start_xmit() would only be an option
+ * if we had a generic transmit helper which would acquire the
+ * HARD_TX_LOCK() to prevent buffer handling conflicts with regular
+ * packets.
+ *
+ * So for now we keep it simple and send beacons with our MLME helper,
+ * even if it stops the ieee802154 queue entirely during these
+ * transmissions, wich anyway does not have a huge impact on the
+ * performances given the current design of the stack.
+ */
+ return ieee802154_mlme_tx(local, sdata, skb);
+}
+
+void mac802154_beacon_worker(struct work_struct *work)
+{
+ struct ieee802154_local *local =
+ container_of(work, struct ieee802154_local, beacon_work.work);
+ struct cfg802154_beacon_request *beacon_req;
+ struct ieee802154_sub_if_data *sdata;
+ struct wpan_dev *wpan_dev;
+ int ret;
+
+ rcu_read_lock();
+ beacon_req = rcu_dereference(local->beacon_req);
+ if (unlikely(!beacon_req)) {
+ rcu_read_unlock();
+ return;
+ }
+
+ sdata = IEEE802154_WPAN_DEV_TO_SUB_IF(beacon_req->wpan_dev);
+
+ /* Wait an arbitrary amount of time in case we cannot use the device */
+ if (local->suspended || !ieee802154_sdata_running(sdata)) {
+ rcu_read_unlock();
+ queue_delayed_work(local->mac_wq, &local->beacon_work,
+ msecs_to_jiffies(1000));
+ return;
+ }
+
+ wpan_dev = beacon_req->wpan_dev;
+
+ rcu_read_unlock();
+
+ dev_dbg(&sdata->dev->dev, "Sending beacon\n");
+ ret = mac802154_transmit_beacon(local, wpan_dev);
+ if (ret)
+ dev_err(&sdata->dev->dev,
+ "Beacon could not be transmitted (%d)\n", ret);
+
+ queue_delayed_work(local->mac_wq, &local->beacon_work,
+ local->beacon_interval);
+}
+
+int mac802154_stop_beacons_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata)
+{
+ struct wpan_dev *wpan_dev = &sdata->wpan_dev;
+ struct cfg802154_beacon_request *request;
+
+ ASSERT_RTNL();
+
+ if (!mac802154_is_beaconing(local))
+ return -ESRCH;
+
+ clear_bit(IEEE802154_IS_BEACONING, &local->ongoing);
+ cancel_delayed_work(&local->beacon_work);
+ request = rcu_replace_pointer(local->beacon_req, NULL, 1);
+ if (!request)
+ return 0;
+ kfree_rcu(request);
+
+ nl802154_beaconing_done(wpan_dev);
+
+ return 0;
+}
+
+int mac802154_send_beacons_locked(struct ieee802154_sub_if_data *sdata,
+ struct cfg802154_beacon_request *request)
+{
+ struct ieee802154_local *local = sdata->local;
+
+ ASSERT_RTNL();
+
+ if (mac802154_is_beaconing(local))
+ mac802154_stop_beacons_locked(local, sdata);
+
+ /* Store beaconing parameters */
+ rcu_assign_pointer(local->beacon_req, request);
+
+ set_bit(IEEE802154_IS_BEACONING, &local->ongoing);
+
+ memset(&local->beacon, 0, sizeof(local->beacon));
+ local->beacon.mhr.fc.type = IEEE802154_FC_TYPE_BEACON;
+ local->beacon.mhr.fc.security_enabled = 0;
+ local->beacon.mhr.fc.frame_pending = 0;
+ local->beacon.mhr.fc.ack_request = 0;
+ local->beacon.mhr.fc.intra_pan = 0;
+ local->beacon.mhr.fc.dest_addr_mode = IEEE802154_NO_ADDRESSING;
+ local->beacon.mhr.fc.version = IEEE802154_2003_STD;
+ local->beacon.mhr.fc.source_addr_mode = IEEE802154_EXTENDED_ADDRESSING;
+ atomic_set(&request->wpan_dev->bsn, -1);
+ local->beacon.mhr.source.mode = IEEE802154_ADDR_LONG;
+ local->beacon.mhr.source.pan_id = request->wpan_dev->pan_id;
+ local->beacon.mhr.source.extended_addr = request->wpan_dev->extended_addr;
+ local->beacon.mac_pl.beacon_order = request->interval;
+ local->beacon.mac_pl.superframe_order = request->interval;
+ local->beacon.mac_pl.final_cap_slot = 0xf;
+ local->beacon.mac_pl.battery_life_ext = 0;
+ /* TODO: Fill this field depending on the coordinator capacity */
+ local->beacon.mac_pl.pan_coordinator = 1;
+ local->beacon.mac_pl.assoc_permit = 1;
+
+ /* Start the beacon work */
+ local->beacon_interval =
+ mac802154_scan_get_channel_time(request->interval,
+ request->wpan_phy->symbol_duration);
+ queue_delayed_work(local->mac_wq, &local->beacon_work, 0);
+
+ return 0;
+}
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index 9d8d43cf1e64..2a6f1ed763c9 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -137,34 +137,37 @@ int ieee802154_mlme_op_pre(struct ieee802154_local *local)
return ieee802154_sync_and_hold_queue(local);
}
-int ieee802154_mlme_tx(struct ieee802154_local *local,
- struct ieee802154_sub_if_data *sdata,
- struct sk_buff *skb)
+int ieee802154_mlme_tx_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
{
- int ret;
-
/* Avoid possible calls to ->ndo_stop() when we asynchronously perform
* MLME transmissions.
*/
- rtnl_lock();
+ ASSERT_RTNL();
/* Ensure the device was not stopped, otherwise error out */
- if (!local->open_count) {
- rtnl_unlock();
+ if (!local->open_count)
return -ENETDOWN;
- }
/* Warn if the ieee802154 core thinks MLME frames can be sent while the
* net interface expects this cannot happen.
*/
- if (WARN_ON_ONCE(!netif_running(sdata->dev))) {
- rtnl_unlock();
+ if (WARN_ON_ONCE(!netif_running(sdata->dev)))
return -ENETDOWN;
- }
ieee802154_tx(local, skb);
- ret = ieee802154_sync_queue(local);
+ return ieee802154_sync_queue(local);
+}
+
+int ieee802154_mlme_tx(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ int ret;
+ rtnl_lock();
+ ret = ieee802154_mlme_tx_locked(local, sdata, skb);
rtnl_unlock();
return ret;
@@ -188,6 +191,19 @@ int ieee802154_mlme_tx_one(struct ieee802154_local *local,
return ret;
}
+int ieee802154_mlme_tx_one_locked(struct ieee802154_local *local,
+ struct ieee802154_sub_if_data *sdata,
+ struct sk_buff *skb)
+{
+ int ret;
+
+ ieee802154_mlme_op_pre(local);
+ ret = ieee802154_mlme_tx_locked(local, sdata, skb);
+ ieee802154_mlme_op_post(local);
+
+ return ret;
+}
+
static bool ieee802154_queue_is_stopped(struct ieee802154_local *local)
{
return test_bit(WPAN_PHY_FLAG_STATE_QUEUE_STOPPED, &local->phy->flags);
diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c
index fc9e728b6333..3150f3f0c872 100644
--- a/net/mctp/af_mctp.c
+++ b/net/mctp/af_mctp.c
@@ -544,9 +544,6 @@ static int mctp_sk_init(struct sock *sk)
static void mctp_sk_close(struct sock *sk, long timeout)
{
- struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk);
-
- del_timer_sync(&msk->key_expiry);
sk_common_release(sk);
}
@@ -580,7 +577,19 @@ static void mctp_sk_unhash(struct sock *sk)
spin_lock_irqsave(&key->lock, fl2);
__mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED);
}
+ sock_set_flag(sk, SOCK_DEAD);
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
+
+ /* Since there are no more tag allocations (we have removed all of the
+ * keys), stop any pending expiry events. the timer cannot be re-queued
+ * as the sk is no longer observable
+ */
+ del_timer_sync(&msk->key_expiry);
+}
+
+static void mctp_sk_destruct(struct sock *sk)
+{
+ skb_queue_purge(&sk->sk_receive_queue);
}
static struct proto mctp_proto = {
@@ -619,6 +628,7 @@ static int mctp_pf_create(struct net *net, struct socket *sock,
return -ENOMEM;
sock_init_data(sock, sk);
+ sk->sk_destruct = mctp_sk_destruct;
rc = 0;
if (sk->sk_prot->init)
diff --git a/net/mctp/route.c b/net/mctp/route.c
index f9a80b82dc51..f51a05ec7162 100644
--- a/net/mctp/route.c
+++ b/net/mctp/route.c
@@ -147,6 +147,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk,
key->valid = true;
spin_lock_init(&key->lock);
refcount_set(&key->refs, 1);
+ sock_hold(key->sk);
return key;
}
@@ -165,6 +166,7 @@ void mctp_key_unref(struct mctp_sk_key *key)
mctp_dev_release_key(key->dev, key);
spin_unlock_irqrestore(&key->lock, flags);
+ sock_put(key->sk);
kfree(key);
}
@@ -177,6 +179,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
spin_lock_irqsave(&net->mctp.keys_lock, flags);
+ if (sock_flag(&msk->sk, SOCK_DEAD)) {
+ rc = -EINVAL;
+ goto out_unlock;
+ }
+
hlist_for_each_entry(tmp, &net->mctp.keys, hlist) {
if (mctp_key_match(tmp, key->local_addr, key->peer_addr,
key->tag)) {
@@ -198,6 +205,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk)
hlist_add_head(&key->sklist, &msk->keys);
}
+out_unlock:
spin_unlock_irqrestore(&net->mctp.keys_lock, flags);
return rc;
@@ -315,8 +323,8 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb)
static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
{
+ struct mctp_sk_key *key, *any_key = NULL;
struct net *net = dev_net(skb->dev);
- struct mctp_sk_key *key;
struct mctp_sock *msk;
struct mctp_hdr *mh;
unsigned long f;
@@ -361,13 +369,11 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* key for reassembly - we'll create a more specific
* one for future packets if required (ie, !EOM).
*/
- key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
- if (key) {
- msk = container_of(key->sk,
+ any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f);
+ if (any_key) {
+ msk = container_of(any_key->sk,
struct mctp_sock, sk);
- spin_unlock_irqrestore(&key->lock, f);
- mctp_key_unref(key);
- key = NULL;
+ spin_unlock_irqrestore(&any_key->lock, f);
}
}
@@ -419,14 +425,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb)
* this function.
*/
rc = mctp_key_add(key, msk);
- if (rc) {
- kfree(key);
- } else {
+ if (!rc)
trace_mctp_key_acquire(key);
- /* we don't need to release key->lock on exit */
- mctp_key_unref(key);
- }
+ /* we don't need to release key->lock on exit, so
+ * clean up here and suppress the unlock via
+ * setting to NULL
+ */
+ mctp_key_unref(key);
key = NULL;
} else {
@@ -473,6 +479,8 @@ out_unlock:
spin_unlock_irqrestore(&key->lock, f);
mctp_key_unref(key);
}
+ if (any_key)
+ mctp_key_unref(any_key);
out:
if (rc)
kfree_skb(skb);
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 35b5f806fdda..dc5165d3eec4 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1428,6 +1428,7 @@ static int mpls_dev_sysctl_register(struct net_device *dev,
free:
kfree(table);
out:
+ mdev->sysctl = NULL;
return -ENOBUFS;
}
@@ -1437,6 +1438,9 @@ static void mpls_dev_sysctl_unregister(struct net_device *dev,
struct net *net = dev_net(dev);
struct ctl_table *table;
+ if (!mdev->sysctl)
+ return;
+
table = mdev->sysctl->ctl_table_arg;
unregister_net_sysctl_table(mdev->sysctl);
kfree(table);
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 5ded85e2c374..b30cea2fbf3f 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -1594,8 +1594,7 @@ mp_rst:
TCPOLEN_MPTCP_PRIO,
opts->backup, TCPOPT_NOP);
- MPTCP_INC_STATS(sock_net((const struct sock *)tp),
- MPTCP_MIB_MPPRIOTX);
+ MPTCP_INC_STATS(sock_net(ssk), MPTCP_MIB_MPPRIOTX);
}
mp_capable_done:
diff --git a/net/mptcp/pm.c b/net/mptcp/pm.c
index 45e2a48397b9..70f0ced3ca86 100644
--- a/net/mptcp/pm.c
+++ b/net/mptcp/pm.c
@@ -420,6 +420,31 @@ void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk)
}
}
+/* if sk is ipv4 or ipv6_only allows only same-family local and remote addresses,
+ * otherwise allow any matching local/remote pair
+ */
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem)
+{
+ bool mptcp_is_v4 = sk->sk_family == AF_INET;
+
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ bool loc_is_v4 = loc->family == AF_INET || ipv6_addr_v4mapped(&loc->addr6);
+ bool rem_is_v4 = rem->family == AF_INET || ipv6_addr_v4mapped(&rem->addr6);
+
+ if (mptcp_is_v4)
+ return loc_is_v4 && rem_is_v4;
+
+ if (ipv6_only_sock(sk))
+ return !loc_is_v4 && !rem_is_v4;
+
+ return loc_is_v4 == rem_is_v4;
+#else
+ return mptcp_is_v4 && loc->family == AF_INET && rem->family == AF_INET;
+#endif
+}
+
void mptcp_pm_data_reset(struct mptcp_sock *msk)
{
u8 pm_type = mptcp_get_pm_type(sock_net((struct sock *)msk));
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index 2ea7eae43bdb..56628b52d100 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -152,7 +152,6 @@ static struct mptcp_pm_addr_entry *
select_local_address(const struct pm_nl_pernet *pernet,
const struct mptcp_sock *msk)
{
- const struct sock *sk = (const struct sock *)msk;
struct mptcp_pm_addr_entry *entry, *ret = NULL;
msk_owned_by_me(msk);
@@ -165,16 +164,6 @@ select_local_address(const struct pm_nl_pernet *pernet,
if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
-
ret = entry;
break;
}
@@ -423,7 +412,9 @@ static bool lookup_address_in_vec(const struct mptcp_addr_info *addrs, unsigned
/* Fill all the remote addresses into the array addrs[],
* and return the array size.
*/
-static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullmesh,
+static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *local,
+ bool fullmesh,
struct mptcp_addr_info *addrs)
{
bool deny_id0 = READ_ONCE(msk->pm.remote_deny_join_id0);
@@ -443,6 +434,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0)
return 0;
+ if (!mptcp_pm_addr_families_match(sk, local, &remote))
+ return 0;
+
msk->pm.subflows++;
addrs[i++] = remote;
} else {
@@ -453,6 +447,9 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
if (deny_id0 && !addrs[i].id)
continue;
+ if (!mptcp_pm_addr_families_match(sk, local, &addrs[i]))
+ continue;
+
if (!lookup_address_in_vec(addrs, i, &addrs[i]) &&
msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -603,9 +600,11 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
fullmesh = !!(local->flags & MPTCP_PM_ADDR_FLAG_FULLMESH);
msk->pm.local_addr_used++;
- nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
- if (nr)
- __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
+ nr = fill_remote_addresses_vec(msk, &local->addr, fullmesh, addrs);
+ if (nr == 0)
+ continue;
+
spin_unlock_bh(&msk->pm.lock);
for (i = 0; i < nr; i++)
__mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
@@ -628,11 +627,11 @@ static void mptcp_pm_nl_subflow_established(struct mptcp_sock *msk)
* and return the array size.
*/
static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
+ struct mptcp_addr_info *remote,
struct mptcp_addr_info *addrs)
{
struct sock *sk = (struct sock *)msk;
struct mptcp_pm_addr_entry *entry;
- struct mptcp_addr_info local;
struct pm_nl_pernet *pernet;
unsigned int subflows_max;
int i = 0;
@@ -645,15 +644,8 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
if (!(entry->flags & MPTCP_PM_ADDR_FLAG_FULLMESH))
continue;
- if (entry->addr.family != sk->sk_family) {
-#if IS_ENABLED(CONFIG_MPTCP_IPV6)
- if ((entry->addr.family == AF_INET &&
- !ipv6_addr_v4mapped(&sk->sk_v6_daddr)) ||
- (sk->sk_family == AF_INET &&
- !ipv6_addr_v4mapped(&entry->addr.addr6)))
-#endif
- continue;
- }
+ if (!mptcp_pm_addr_families_match(sk, &entry->addr, remote))
+ continue;
if (msk->pm.subflows < subflows_max) {
msk->pm.subflows++;
@@ -666,8 +658,18 @@ static unsigned int fill_local_addresses_vec(struct mptcp_sock *msk,
* 'IPADDRANY' local address
*/
if (!i) {
+ struct mptcp_addr_info local;
+
memset(&local, 0, sizeof(local));
- local.family = msk->pm.remote.family;
+ local.family =
+#if IS_ENABLED(CONFIG_MPTCP_IPV6)
+ remote->family == AF_INET6 &&
+ ipv6_addr_v4mapped(&remote->addr6) ? AF_INET :
+#endif
+ remote->family;
+
+ if (!mptcp_pm_addr_families_match(sk, &local, remote))
+ return 0;
msk->pm.subflows++;
addrs[i++] = local;
@@ -706,7 +708,9 @@ static void mptcp_pm_nl_add_addr_received(struct mptcp_sock *msk)
/* connect to the specified remote address, using whatever
* local address the routing configuration will pick.
*/
- nr = fill_local_addresses_vec(msk, addrs);
+ nr = fill_local_addresses_vec(msk, &remote, addrs);
+ if (nr == 0)
+ return;
msk->pm.add_addr_accepted++;
if (msk->pm.add_addr_accepted >= add_addr_accept_max ||
@@ -998,8 +1002,8 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
{
int addrlen = sizeof(struct sockaddr_in);
struct sockaddr_storage addr;
- struct mptcp_sock *msk;
struct socket *ssock;
+ struct sock *newsk;
int backlog = 1024;
int err;
@@ -1008,11 +1012,13 @@ static int mptcp_pm_nl_create_listen_socket(struct sock *sk,
if (err)
return err;
- msk = mptcp_sk(entry->lsk->sk);
- if (!msk)
+ newsk = entry->lsk->sk;
+ if (!newsk)
return -EINVAL;
- ssock = __mptcp_nmpc_socket(msk);
+ lock_sock(newsk);
+ ssock = __mptcp_nmpc_socket(mptcp_sk(newsk));
+ release_sock(newsk);
if (!ssock)
return -EINVAL;
@@ -1143,7 +1149,7 @@ void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ss
if (!tcp_rtx_and_write_queues_empty(ssk)) {
subflow->stale = 1;
__mptcp_retransmit_pending_data(sk);
- MPTCP_INC_STATS(sock_net(sk), MPTCP_MIB_SUBFLOWSTALE);
+ MPTCP_INC_STATS(net, MPTCP_MIB_SUBFLOWSTALE);
}
unlock_sock_fast(ssk, slow);
@@ -1903,8 +1909,7 @@ static int mptcp_nl_cmd_set_flags(struct sk_buff *skb, struct genl_info *info)
}
if (token)
- return mptcp_userspace_pm_set_flags(sock_net(skb->sk),
- token, &addr, &remote, bkup);
+ return mptcp_userspace_pm_set_flags(net, token, &addr, &remote, bkup);
spin_lock_bh(&pernet->lock);
entry = __lookup_addr(pernet, &addr.addr, lookup_by_id);
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 65dcc55a8ad8..a02d3cbf2a1b 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -59,8 +59,8 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
*/
e = sock_kmalloc(sk, sizeof(*e), GFP_ATOMIC);
if (!e) {
- spin_unlock_bh(&msk->pm.lock);
- return -ENOMEM;
+ ret = -ENOMEM;
+ goto append_err;
}
*e = *entry;
@@ -74,6 +74,7 @@ int mptcp_userspace_pm_append_new_local_addr(struct mptcp_sock *msk,
ret = entry->addr.id;
}
+append_err:
spin_unlock_bh(&msk->pm.lock);
return ret;
}
@@ -294,6 +295,13 @@ int mptcp_nl_cmd_sf_create(struct sk_buff *skb, struct genl_info *info)
}
sk = (struct sock *)msk;
+
+ if (!mptcp_pm_addr_families_match(sk, &addr_l, &addr_r)) {
+ GENL_SET_ERR_MSG(info, "families mismatch");
+ err = -EINVAL;
+ goto create_err;
+ }
+
lock_sock(sk);
err = __mptcp_subflow_connect(sk, &addr_l, &addr_r);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index f6f93957275b..3ad9c46202fc 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -98,7 +98,7 @@ static int __mptcp_socket_create(struct mptcp_sock *msk)
struct socket *ssock;
int err;
- err = mptcp_subflow_create_socket(sk, &ssock);
+ err = mptcp_subflow_create_socket(sk, sk->sk_family, &ssock);
if (err)
return err;
@@ -923,9 +923,8 @@ static void mptcp_check_for_eof(struct mptcp_sock *msk)
static struct sock *mptcp_subflow_recv_lookup(const struct mptcp_sock *msk)
{
struct mptcp_subflow_context *subflow;
- struct sock *sk = (struct sock *)msk;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
mptcp_for_each_subflow(msk, subflow) {
if (READ_ONCE(subflow->data_avail))
@@ -1408,7 +1407,7 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
u64 linger_time;
long tout = 0;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk)) {
if (!msk->first)
@@ -1662,6 +1661,8 @@ static void mptcp_set_nospace(struct sock *sk)
set_bit(MPTCP_NOSPACE, &mptcp_sk(sk)->flags);
}
+static int mptcp_disconnect(struct sock *sk, int flags);
+
static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msghdr *msg,
size_t len, int *copied_syn)
{
@@ -1672,9 +1673,9 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
lock_sock(ssk);
msg->msg_flags |= MSG_DONTWAIT;
msk->connect_flags = O_NONBLOCK;
- msk->is_sendmsg = 1;
+ msk->fastopening = 1;
ret = tcp_sendmsg_fastopen(ssk, msg, copied_syn, len, NULL);
- msk->is_sendmsg = 0;
+ msk->fastopening = 0;
msg->msg_flags = saved_flags;
release_sock(ssk);
@@ -1688,6 +1689,8 @@ static int mptcp_sendmsg_fastopen(struct sock *sk, struct sock *ssk, struct msgh
*/
if (ret && ret != -EINPROGRESS && ret != -ERESTARTSYS && ret != -EINTR)
*copied_syn = 0;
+ } else if (ret && ret != -EINPROGRESS) {
+ mptcp_disconnect(sk, 0);
}
return ret;
@@ -1886,7 +1889,7 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
u32 time, advmss = 1;
u64 rtt_us, mstamp;
- sock_owned_by_me(sk);
+ msk_owned_by_me(msk);
if (copied <= 0)
return;
@@ -2213,7 +2216,7 @@ static struct sock *mptcp_subflow_get_retrans(struct mptcp_sock *msk)
struct mptcp_subflow_context *subflow;
int min_stale_count = INT_MAX;
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (__mptcp_check_fallback(msk))
return NULL;
@@ -2353,7 +2356,7 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
/* otherwise tcp will dispose of the ssk and subflow ctx */
if (ssk->sk_state == TCP_LISTEN) {
tcp_set_state(ssk, TCP_CLOSE);
- mptcp_subflow_queue_clean(ssk);
+ mptcp_subflow_queue_clean(sk, ssk);
inet_csk_listen_stop(ssk);
mptcp_event_pm_listener(ssk, MPTCP_EVENT_LISTENER_CLOSED);
}
@@ -2720,8 +2723,8 @@ static int mptcp_init_sock(struct sock *sk)
mptcp_ca_reset(sk);
sk_sockets_allocated_inc(sk);
- sk->sk_rcvbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_rmem[1]);
- sk->sk_sndbuf = READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_wmem[1]);
+ sk->sk_rcvbuf = READ_ONCE(net->ipv4.sysctl_tcp_rmem[1]);
+ sk->sk_sndbuf = READ_ONCE(net->ipv4.sysctl_tcp_wmem[1]);
return 0;
}
@@ -2872,7 +2875,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
sk_stream_kill_queues(sk);
xfrm_sk_free_policy(sk);
- sk_refcnt_debug_release(sk);
sock_put(sk);
}
@@ -2888,15 +2890,23 @@ static __poll_t mptcp_check_readable(struct mptcp_sock *msk)
return EPOLLIN | EPOLLRDNORM;
}
+static void mptcp_listen_inuse_dec(struct sock *sk)
+{
+ if (inet_sk_state_load(sk) == TCP_LISTEN)
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
+}
+
bool __mptcp_close(struct sock *sk, long timeout)
{
struct mptcp_subflow_context *subflow;
struct mptcp_sock *msk = mptcp_sk(sk);
bool do_cancel_work = false;
+ int subflows_alive = 0;
sk->sk_shutdown = SHUTDOWN_MASK;
if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE)) {
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
goto cleanup;
}
@@ -2918,6 +2928,8 @@ cleanup:
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
bool slow = lock_sock_fast_nested(ssk);
+ subflows_alive += ssk->sk_state != TCP_CLOSE;
+
/* since the close timeout takes precedence on the fail one,
* cancel the latter
*/
@@ -2933,6 +2945,12 @@ cleanup:
}
sock_orphan(sk);
+ /* all the subflows are closed, only timeout can change the msk
+ * state, let's not keep resources busy for no reasons
+ */
+ if (subflows_alive == 0)
+ inet_sk_state_store(sk, TCP_CLOSE);
+
sock_hold(sk);
pr_debug("msk=%p state=%d", sk, sk->sk_state);
if (msk->token)
@@ -2989,6 +3007,15 @@ static int mptcp_disconnect(struct sock *sk, int flags)
{
struct mptcp_sock *msk = mptcp_sk(sk);
+ /* We are on the fastopen error path. We can't call straight into the
+ * subflows cleanup code due to lock nesting (we are already under
+ * msk->firstsocket lock). Do nothing and leave the cleanup to the
+ * caller.
+ */
+ if (msk->fastopening)
+ return 0;
+
+ mptcp_listen_inuse_dec(sk);
inet_sk_state_store(sk, TCP_CLOSE);
mptcp_stop_timer(sk);
@@ -3532,7 +3559,7 @@ static int mptcp_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len)
/* if reaching here via the fastopen/sendmsg path, the caller already
* acquired the subflow socket lock, too.
*/
- if (msk->is_sendmsg)
+ if (msk->fastopening)
err = __inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags, 1);
else
err = inet_stream_connect(ssock, uaddr, addr_len, msk->connect_flags);
@@ -3627,12 +3654,13 @@ static int mptcp_stream_connect(struct socket *sock, struct sockaddr *uaddr,
static int mptcp_listen(struct socket *sock, int backlog)
{
struct mptcp_sock *msk = mptcp_sk(sock->sk);
+ struct sock *sk = sock->sk;
struct socket *ssock;
int err;
pr_debug("msk=%p", msk);
- lock_sock(sock->sk);
+ lock_sock(sk);
ssock = __mptcp_nmpc_socket(msk);
if (!ssock) {
err = -EINVAL;
@@ -3640,18 +3668,20 @@ static int mptcp_listen(struct socket *sock, int backlog)
}
mptcp_token_destroy(msk);
- inet_sk_state_store(sock->sk, TCP_LISTEN);
- sock_set_flag(sock->sk, SOCK_RCU_FREE);
+ inet_sk_state_store(sk, TCP_LISTEN);
+ sock_set_flag(sk, SOCK_RCU_FREE);
err = ssock->ops->listen(ssock, backlog);
- inet_sk_state_store(sock->sk, inet_sk_state_load(ssock->sk));
- if (!err)
- mptcp_copy_inaddrs(sock->sk, ssock->sk);
+ inet_sk_state_store(sk, inet_sk_state_load(ssock->sk));
+ if (!err) {
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
+ mptcp_copy_inaddrs(sk, ssock->sk);
+ }
mptcp_event_pm_listener(ssock->sk, MPTCP_EVENT_LISTENER_CREATED);
unlock:
- release_sock(sock->sk);
+ release_sock(sk);
return err;
}
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index 955fb3d88eb3..61fd8eabfca2 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -295,7 +295,7 @@ struct mptcp_sock {
u8 recvmsg_inq:1,
cork:1,
nodelay:1,
- is_sendmsg:1;
+ fastopening:1;
int connect_flags;
struct work_struct work;
struct sk_buff *ooo_last_skb;
@@ -628,7 +628,7 @@ void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
struct mptcp_subflow_context *subflow);
void __mptcp_subflow_send_ack(struct sock *ssk);
void mptcp_subflow_reset(struct sock *ssk);
-void mptcp_subflow_queue_clean(struct sock *ssk);
+void mptcp_subflow_queue_clean(struct sock *sk, struct sock *ssk);
void mptcp_sock_graft(struct sock *sk, struct socket *parent);
struct socket *__mptcp_nmpc_socket(const struct mptcp_sock *msk);
bool __mptcp_close(struct sock *sk, long timeout);
@@ -641,7 +641,8 @@ bool mptcp_addresses_equal(const struct mptcp_addr_info *a,
/* called with sk socket lock held */
int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
const struct mptcp_addr_info *remote);
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock);
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock);
void mptcp_info2sockaddr(const struct mptcp_addr_info *info,
struct sockaddr_storage *addr,
unsigned short family);
@@ -754,7 +755,7 @@ static inline void mptcp_token_init_request(struct request_sock *req)
int mptcp_token_new_request(struct request_sock *req);
void mptcp_token_destroy_request(struct request_sock *req);
-int mptcp_token_new_connect(struct sock *sk);
+int mptcp_token_new_connect(struct sock *ssk);
void mptcp_token_accept(struct mptcp_subflow_request_sock *r,
struct mptcp_sock *msk);
bool mptcp_token_exists(u32 token);
@@ -776,6 +777,9 @@ int mptcp_pm_parse_addr(struct nlattr *attr, struct genl_info *info,
int mptcp_pm_parse_entry(struct nlattr *attr, struct genl_info *info,
bool require_family,
struct mptcp_pm_addr_entry *entry);
+bool mptcp_pm_addr_families_match(const struct sock *sk,
+ const struct mptcp_addr_info *loc,
+ const struct mptcp_addr_info *rem);
void mptcp_pm_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_nl_subflow_chk_stale(const struct mptcp_sock *msk, struct sock *ssk);
void mptcp_pm_new_connection(struct mptcp_sock *msk, const struct sock *ssk, int server_side);
diff --git a/net/mptcp/sockopt.c b/net/mptcp/sockopt.c
index d4b1e6ec1b36..8a9656248b0f 100644
--- a/net/mptcp/sockopt.c
+++ b/net/mptcp/sockopt.c
@@ -18,7 +18,7 @@
static struct sock *__mptcp_tcp_fallback(struct mptcp_sock *msk)
{
- sock_owned_by_me((const struct sock *)msk);
+ msk_owned_by_me(msk);
if (likely(!__mptcp_check_fallback(msk)))
return NULL;
@@ -760,14 +760,21 @@ static int mptcp_setsockopt_v4(struct mptcp_sock *msk, int optname,
static int mptcp_setsockopt_first_sf_only(struct mptcp_sock *msk, int level, int optname,
sockptr_t optval, unsigned int optlen)
{
+ struct sock *sk = (struct sock *)msk;
struct socket *sock;
+ int ret = -EINVAL;
/* Limit to first subflow, before the connection establishment */
+ lock_sock(sk);
sock = __mptcp_nmpc_socket(msk);
if (!sock)
- return -EINVAL;
+ goto unlock;
+
+ ret = tcp_setsockopt(sock->sk, level, optname, optval, optlen);
- return tcp_setsockopt(sock->sk, level, optname, optval, optlen);
+unlock:
+ release_sock(sk);
+ return ret;
}
static int mptcp_setsockopt_sol_tcp(struct mptcp_sock *msk, int optname,
@@ -1255,6 +1262,7 @@ static void sync_socket_options(struct mptcp_sock *msk, struct sock *ssk)
ssk->sk_priority = sk->sk_priority;
ssk->sk_bound_dev_if = sk->sk_bound_dev_if;
ssk->sk_incoming_cpu = sk->sk_incoming_cpu;
+ ssk->sk_ipv6only = sk->sk_ipv6only;
__ip_sock_set_tos(ssk, inet_sk(sk)->tos);
if (sk->sk_userlocks & tx_rx_locks) {
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index d1d32a66ae3f..4ae1a7304cf0 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -26,6 +26,7 @@
#include "mib.h"
#include <trace/events/mptcp.h>
+#include <trace/events/sock.h>
static void mptcp_subflow_ops_undo_override(struct sock *ssk);
@@ -1399,6 +1400,7 @@ void __mptcp_error_report(struct sock *sk)
mptcp_for_each_subflow(msk, subflow) {
struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
int err = sock_error(ssk);
+ int ssk_state;
if (!err)
continue;
@@ -1409,7 +1411,14 @@ void __mptcp_error_report(struct sock *sk)
if (sk->sk_state != TCP_SYN_SENT && !__mptcp_check_fallback(msk))
continue;
- inet_sk_state_store(sk, inet_sk_state_load(ssk));
+ /* We need to propagate only transition to CLOSE state.
+ * Orphaned socket will see such state change via
+ * subflow_sched_work_if_closed() and that path will properly
+ * destroy the msk as needed.
+ */
+ ssk_state = inet_sk_state_load(ssk);
+ if (ssk_state == TCP_CLOSE && !sock_flag(sk, SOCK_DEAD))
+ inet_sk_state_store(sk, ssk_state);
sk->sk_err = -err;
/* This barrier is coupled with smp_rmb() in mptcp_poll() */
@@ -1438,6 +1447,8 @@ static void subflow_data_ready(struct sock *sk)
struct sock *parent = subflow->conn;
struct mptcp_sock *msk;
+ trace_sk_data_ready(sk);
+
msk = mptcp_sk(parent);
if (state & TCPF_LISTEN) {
/* MPJ subflow are removed from accept queue before reaching here,
@@ -1547,7 +1558,7 @@ int __mptcp_subflow_connect(struct sock *sk, const struct mptcp_addr_info *loc,
if (!mptcp_is_fully_established(sk))
goto err_out;
- err = mptcp_subflow_create_socket(sk, &sf);
+ err = mptcp_subflow_create_socket(sk, loc->family, &sf);
if (err)
goto err_out;
@@ -1660,7 +1671,9 @@ static void mptcp_subflow_ops_undo_override(struct sock *ssk)
#endif
ssk->sk_prot = &tcp_prot;
}
-int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
+
+int mptcp_subflow_create_socket(struct sock *sk, unsigned short family,
+ struct socket **new_sock)
{
struct mptcp_subflow_context *subflow;
struct net *net = sock_net(sk);
@@ -1673,12 +1686,11 @@ int mptcp_subflow_create_socket(struct sock *sk, struct socket **new_sock)
if (unlikely(!sk->sk_socket))
return -EINVAL;
- err = sock_create_kern(net, sk->sk_family, SOCK_STREAM, IPPROTO_TCP,
- &sf);
+ err = sock_create_kern(net, family, SOCK_STREAM, IPPROTO_TCP, &sf);
if (err)
return err;
- lock_sock(sf->sk);
+ lock_sock_nested(sf->sk, SINGLE_DEPTH_NESTING);
/* the newly created socket has to be in the same cgroup as its parent */
mptcp_attach_cgroup(sk, sf->sk);
@@ -1791,7 +1803,7 @@ static void subflow_state_change(struct sock *sk)
}
}
-void mptcp_subflow_queue_clean(struct sock *listener_ssk)
+void mptcp_subflow_queue_clean(struct sock *listener_sk, struct sock *listener_ssk)
{
struct request_sock_queue *queue = &inet_csk(listener_ssk)->icsk_accept_queue;
struct mptcp_sock *msk, *next, *head = NULL;
@@ -1840,8 +1852,23 @@ void mptcp_subflow_queue_clean(struct sock *listener_ssk)
do_cancel_work = __mptcp_close(sk, 0);
release_sock(sk);
- if (do_cancel_work)
+ if (do_cancel_work) {
+ /* lockdep will report a false positive ABBA deadlock
+ * between cancel_work_sync and the listener socket.
+ * The involved locks belong to different sockets WRT
+ * the existing AB chain.
+ * Using a per socket key is problematic as key
+ * deregistration requires process context and must be
+ * performed at socket disposal time, in atomic
+ * context.
+ * Just tell lockdep to consider the listener socket
+ * released here.
+ */
+ mutex_release(&listener_sk->sk_lock.dep_map, _RET_IP_);
mptcp_cancel_work(sk);
+ mutex_acquire(&listener_sk->sk_lock.dep_map,
+ SINGLE_DEPTH_NESTING, 0, _RET_IP_);
+ }
sock_put(sk);
}
diff --git a/net/mptcp/token.c b/net/mptcp/token.c
index 65430f314a68..5bb924534387 100644
--- a/net/mptcp/token.c
+++ b/net/mptcp/token.c
@@ -134,7 +134,7 @@ int mptcp_token_new_request(struct request_sock *req)
/**
* mptcp_token_new_connect - create new key/idsn/token for subflow
- * @sk: the socket that will initiate a connection
+ * @ssk: the socket that will initiate a connection
*
* This function is called when a new outgoing mptcp connection is
* initiated.
@@ -148,11 +148,12 @@ int mptcp_token_new_request(struct request_sock *req)
*
* returns 0 on success.
*/
-int mptcp_token_new_connect(struct sock *sk)
+int mptcp_token_new_connect(struct sock *ssk)
{
- struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+ struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
struct mptcp_sock *msk = mptcp_sk(subflow->conn);
int retries = MPTCP_TOKEN_MAX_RETRIES;
+ struct sock *sk = subflow->conn;
struct token_bucket *bucket;
again:
@@ -169,12 +170,13 @@ again:
}
pr_debug("ssk=%p, local_key=%llu, token=%u, idsn=%llu\n",
- sk, subflow->local_key, subflow->token, subflow->idsn);
+ ssk, subflow->local_key, subflow->token, subflow->idsn);
WRITE_ONCE(msk->token, subflow->token);
__sk_nulls_add_node_rcu((struct sock *)msk, &bucket->msk_chain);
bucket->chain_len++;
spin_unlock_bh(&bucket->lock);
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
return 0;
}
@@ -190,8 +192,10 @@ void mptcp_token_accept(struct mptcp_subflow_request_sock *req,
struct mptcp_sock *msk)
{
struct mptcp_subflow_request_sock *pos;
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
bucket = token_bucket(req->token);
spin_lock_bh(&bucket->lock);
@@ -370,12 +374,14 @@ void mptcp_token_destroy_request(struct request_sock *req)
*/
void mptcp_token_destroy(struct mptcp_sock *msk)
{
+ struct sock *sk = (struct sock *)msk;
struct token_bucket *bucket;
struct mptcp_sock *pos;
if (sk_unhashed((struct sock *)msk))
return;
+ sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
bucket = token_bucket(msk->token);
spin_lock_bh(&bucket->lock);
pos = __token_lookup_msk(bucket, msk->token);
diff --git a/net/mptcp/token_test.c b/net/mptcp/token_test.c
index 5d984bec1cd8..0758865ab658 100644
--- a/net/mptcp/token_test.c
+++ b/net/mptcp/token_test.c
@@ -57,6 +57,9 @@ static struct mptcp_sock *build_msk(struct kunit *test)
KUNIT_EXPECT_NOT_ERR_OR_NULL(test, msk);
refcount_set(&((struct sock *)msk)->sk_refcnt, 1);
sock_net_set((struct sock *)msk, &init_net);
+
+ /* be sure the token helpers can dereference sk->sk_prot */
+ ((struct sock *)msk)->sk_prot = &tcp_prot;
return msk;
}
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index f71b41c7ce2f..4d6737160857 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -189,6 +189,9 @@ config NF_CONNTRACK_LABELS
to connection tracking entries. It can be used with xtables connlabel
match and the nftables ct expression.
+config NF_CONNTRACK_OVS
+ bool
+
config NF_CT_PROTO_DCCP
bool 'DCCP protocol connection tracking support'
depends on NETFILTER_ADVANCED
diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile
index 3754eb06fb41..5ffef1cd6143 100644
--- a/net/netfilter/Makefile
+++ b/net/netfilter/Makefile
@@ -11,6 +11,7 @@ nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMEOUT) += nf_conntrack_timeout.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_TIMESTAMP) += nf_conntrack_timestamp.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_EVENTS) += nf_conntrack_ecache.o
nf_conntrack-$(CONFIG_NF_CONNTRACK_LABELS) += nf_conntrack_labels.o
+nf_conntrack-$(CONFIG_NF_CONNTRACK_OVS) += nf_conntrack_ovs.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_DCCP) += nf_conntrack_proto_dccp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_SCTP) += nf_conntrack_proto_sctp.o
nf_conntrack-$(CONFIG_NF_CT_PROTO_GRE) += nf_conntrack_proto_gre.o
@@ -98,6 +99,12 @@ nf_tables-objs += nft_set_pipapo_avx2.o
endif
endif
+ifdef CONFIG_NFT_CT
+ifdef CONFIG_RETPOLINE
+nf_tables-objs += nft_ct_fast.o
+endif
+endif
+
obj-$(CONFIG_NF_TABLES) += nf_tables.o
obj-$(CONFIG_NFT_COMPAT) += nft_compat.o
obj-$(CONFIG_NFT_CONNLIMIT) += nft_connlimit.o
diff --git a/net/netfilter/core.c b/net/netfilter/core.c
index 5a6705a0e4ec..358220b58521 100644
--- a/net/netfilter/core.c
+++ b/net/netfilter/core.c
@@ -669,6 +669,9 @@ const struct nf_ct_hook __rcu *nf_ct_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_ct_hook);
#if IS_ENABLED(CONFIG_NF_CONNTRACK)
+u8 nf_ctnetlink_has_listener;
+EXPORT_SYMBOL_GPL(nf_ctnetlink_has_listener);
+
const struct nf_nat_hook __rcu *nf_nat_hook __read_mostly;
EXPORT_SYMBOL_GPL(nf_nat_hook);
@@ -702,6 +705,22 @@ void nf_conntrack_destroy(struct nf_conntrack *nfct)
}
EXPORT_SYMBOL(nf_conntrack_destroy);
+void nf_ct_set_closing(struct nf_conntrack *nfct)
+{
+ const struct nf_ct_hook *ct_hook;
+
+ if (!nfct)
+ return;
+
+ rcu_read_lock();
+ ct_hook = rcu_dereference(nf_ct_hook);
+ if (ct_hook)
+ ct_hook->set_closing(nfct);
+
+ rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(nf_ct_set_closing);
+
bool nf_ct_get_tuple_skb(struct nf_conntrack_tuple *dst_tuple,
const struct sk_buff *skb)
{
diff --git a/net/netfilter/ipset/Kconfig b/net/netfilter/ipset/Kconfig
index 3c273483df23..b1ea054bb82c 100644
--- a/net/netfilter/ipset/Kconfig
+++ b/net/netfilter/ipset/Kconfig
@@ -30,7 +30,7 @@ config IP_SET_BITMAP_IP
depends on IP_SET
help
This option adds the bitmap:ip set type support, by which one
- can store IPv4 addresses (or network addresse) from a range.
+ can store IPv4 addresses (or network addresses) from a range.
To compile it as a module, choose M here. If unsure, say N.
diff --git a/net/netfilter/ipset/ip_set_bitmap_ip.c b/net/netfilter/ipset/ip_set_bitmap_ip.c
index a8ce04a4bb72..e4fa00abde6a 100644
--- a/net/netfilter/ipset/ip_set_bitmap_ip.c
+++ b/net/netfilter/ipset/ip_set_bitmap_ip.c
@@ -308,8 +308,8 @@ bitmap_ip_create(struct net *net, struct ip_set *set, struct nlattr *tb[],
return -IPSET_ERR_BITMAP_RANGE;
pr_debug("mask_bits %u, netmask %u\n", mask_bits, netmask);
- hosts = 2 << (32 - netmask - 1);
- elements = 2 << (netmask - mask_bits - 1);
+ hosts = 2U << (32 - netmask - 1);
+ elements = 2UL << (netmask - mask_bits - 1);
}
if (elements > IPSET_BITMAP_MAX_RANGE + 1)
return -IPSET_ERR_BITMAP_RANGE_SIZE;
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index e7ba5b6dd2b7..46ebee9400da 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1698,9 +1698,10 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
retried = true;
- } while (ret == -EAGAIN &&
- set->variant->resize &&
- (ret = set->variant->resize(set, retried)) == 0);
+ } while (ret == -ERANGE ||
+ (ret == -EAGAIN &&
+ set->variant->resize &&
+ (ret = set->variant->resize(set, retried)) == 0));
if (!ret || (ret == -IPSET_ERR_EXIST && eexist))
return 0;
diff --git a/net/netfilter/ipset/ip_set_hash_ip.c b/net/netfilter/ipset/ip_set_hash_ip.c
index e30513cefd90..c9f4e3859663 100644
--- a/net/netfilter/ipset/ip_set_hash_ip.c
+++ b/net/netfilter/ipset/ip_set_hash_ip.c
@@ -100,11 +100,11 @@ static int
hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ip4 *h = set->data;
+ struct hash_ip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ip4_elem e = { 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, hosts;
+ u32 ip = 0, ip_to = 0, hosts, i = 0;
int ret = 0;
if (tb[IPSET_ATTR_LINENO])
@@ -149,14 +149,14 @@ hash_ip4_uadt(struct ip_set *set, struct nlattr *tb[],
hosts = h->netmask == 32 ? 1 : 2 << (32 - h->netmask - 1);
- /* 64bit division is not allowed on 32bit */
- if (((u64)ip_to - ip + 1) >> (32 - h->netmask) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to;) {
+ for (; ip <= ip_to; i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_ipmark.c b/net/netfilter/ipset/ip_set_hash_ipmark.c
index 153de3457423..a22ec1a6f6ec 100644
--- a/net/netfilter/ipset/ip_set_hash_ipmark.c
+++ b/net/netfilter/ipset/ip_set_hash_ipmark.c
@@ -97,11 +97,11 @@ static int
hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipmark4 *h = set->data;
+ struct hash_ipmark4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipmark4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0;
+ u32 ip, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -148,13 +148,14 @@ hash_ipmark4_uadt(struct ip_set *set, struct nlattr *tb[],
ip_set_mask_from_to(ip, ip_to, cidr);
}
- if (((u64)ip_to - ip + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
- for (; ip <= ip_to; ip++) {
+ for (; ip <= ip_to; ip++, i++) {
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipmark4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipport.c b/net/netfilter/ipset/ip_set_hash_ipport.c
index 2ffbd0b78a8c..e977b5a9c48d 100644
--- a/net/netfilter/ipset/ip_set_hash_ipport.c
+++ b/net/netfilter/ipset/ip_set_hash_ipport.c
@@ -112,11 +112,11 @@ static int
hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipport4 *h = set->data;
+ struct hash_ipport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipport4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -184,17 +184,18 @@ hash_ipport4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportip.c b/net/netfilter/ipset/ip_set_hash_ipportip.c
index 334fb1ad0e86..39a01934b153 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportip.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportip.c
@@ -108,11 +108,11 @@ static int
hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportip4 *h = set->data;
+ struct hash_ipportip4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportip4_elem e = { .ip = 0 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip, ip_to = 0, p = 0, port, port_to;
+ u32 ip, ip_to = 0, p = 0, port, port_to, i = 0;
bool with_ports = false;
int ret;
@@ -180,17 +180,18 @@ hash_ipportip4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
if (retried)
ip = ntohl(h->next.ip);
for (; ip <= ip_to; ip++) {
p = retried && ip == ntohl(h->next.ip) ? ntohs(h->next.port)
: port;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.ip = htonl(ip);
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportip4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_ipportnet.c b/net/netfilter/ipset/ip_set_hash_ipportnet.c
index 7df94f437f60..5c6de605a9fb 100644
--- a/net/netfilter/ipset/ip_set_hash_ipportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_ipportnet.c
@@ -160,12 +160,12 @@ static int
hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_ipportnet4 *h = set->data;
+ struct hash_ipportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_ipportnet4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -253,9 +253,6 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
swap(port, port_to);
}
- if (((u64)ip_to - ip + 1)*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
-
ip2_to = ip2_from;
if (tb[IPSET_ATTR_IP2_TO]) {
ret = ip_set_get_hostipaddr4(tb[IPSET_ATTR_IP2_TO], &ip2_to);
@@ -282,9 +279,15 @@ hash_ipportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip2 = htonl(ip2);
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &cidr);
e.cidr = cidr - 1;
+ if (i > IPSET_MAX_RANGE) {
+ hash_ipportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_net.c b/net/netfilter/ipset/ip_set_hash_net.c
index 1422739d9aa2..ce0a9ce5a91f 100644
--- a/net/netfilter/ipset/ip_set_hash_net.c
+++ b/net/netfilter/ipset/ip_set_hash_net.c
@@ -136,11 +136,11 @@ static int
hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_net4 *h = set->data;
+ struct hash_net4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_net4_elem e = { .cidr = HOST_MASK };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -188,19 +188,16 @@ hash_net4_uadt(struct ip_set *set, struct nlattr *tb[],
if (ip + UINT_MAX == ip_to)
return -IPSET_ERR_HASH_RANGE;
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_net4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 9810f5bf63f5..031073286236 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -202,7 +202,7 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netiface4_elem e = { .cidr = HOST_MASK, .elem = 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 ip = 0, ip_to = 0, ipn, n = 0;
+ u32 ip = 0, ip_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -256,19 +256,16 @@ hash_netiface4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried)
ip = ntohl(h->next.ip);
do {
+ i++;
e.ip = htonl(ip);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netiface4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr);
ret = adtfn(set, &e, &ext, &ext, flags);
diff --git a/net/netfilter/ipset/ip_set_hash_netnet.c b/net/netfilter/ipset/ip_set_hash_netnet.c
index cdfb78c6e0d3..8fbe649c9dd3 100644
--- a/net/netfilter/ipset/ip_set_hash_netnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netnet.c
@@ -166,13 +166,12 @@ static int
hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netnet4 *h = set->data;
+ struct hash_netnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0;
- u32 ip2 = 0, ip2_from = 0, ip2_to = 0, ipn;
- u64 n = 0, m = 0;
+ u32 ip2 = 0, ip2_from = 0, ip2_to = 0, i = 0;
int ret;
if (tb[IPSET_ATTR_LINENO])
@@ -248,19 +247,6 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -273,7 +259,12 @@ hash_netnet4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip[0] = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
do {
+ i++;
e.ip[1] = htonl(ip2);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netnet4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ip2 = ip_set_range_to_cidr(ip2, ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
diff --git a/net/netfilter/ipset/ip_set_hash_netport.c b/net/netfilter/ipset/ip_set_hash_netport.c
index 09cf72eb37f8..d1a0628df4ef 100644
--- a/net/netfilter/ipset/ip_set_hash_netport.c
+++ b/net/netfilter/ipset/ip_set_hash_netport.c
@@ -154,12 +154,11 @@ static int
hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netport4 *h = set->data;
+ struct hash_netport4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netport4_elem e = { .cidr = HOST_MASK - 1 };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
- u32 port, port_to, p = 0, ip = 0, ip_to = 0, ipn;
- u64 n = 0;
+ u32 port, port_to, p = 0, ip = 0, ip_to = 0, i = 0;
bool with_ports = false;
u8 cidr;
int ret;
@@ -236,14 +235,6 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip, ip_to, e.cidr + 1);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &cidr);
- n++;
- } while (ipn++ < ip_to);
-
- if (n*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip);
@@ -255,8 +246,12 @@ hash_netport4_uadt(struct ip_set *set, struct nlattr *tb[],
e.ip = htonl(ip);
ip = ip_set_range_to_cidr(ip, ip_to, &cidr);
e.cidr = cidr - 1;
- for (; p <= port_to; p++) {
+ for (; p <= port_to; p++, i++) {
e.port = htons(p);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netport4_data_next(&h->next, &e);
+ return -ERANGE;
+ }
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipset/ip_set_hash_netportnet.c b/net/netfilter/ipset/ip_set_hash_netportnet.c
index 19bcdb3141f6..005a7ce87217 100644
--- a/net/netfilter/ipset/ip_set_hash_netportnet.c
+++ b/net/netfilter/ipset/ip_set_hash_netportnet.c
@@ -173,17 +173,26 @@ hash_netportnet4_kadt(struct ip_set *set, const struct sk_buff *skb,
return adtfn(set, &e, &ext, &opt->ext, opt->cmdflags);
}
+static u32
+hash_netportnet4_range_to_cidr(u32 from, u32 to, u8 *cidr)
+{
+ if (from == 0 && to == UINT_MAX) {
+ *cidr = 0;
+ return to;
+ }
+ return ip_set_range_to_cidr(from, to, cidr);
+}
+
static int
hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
enum ipset_adt adt, u32 *lineno, u32 flags, bool retried)
{
- const struct hash_netportnet4 *h = set->data;
+ struct hash_netportnet4 *h = set->data;
ipset_adtfn adtfn = set->variant->adt[adt];
struct hash_netportnet4_elem e = { };
struct ip_set_ext ext = IP_SET_INIT_UEXT(set);
u32 ip = 0, ip_to = 0, p = 0, port, port_to;
- u32 ip2_from = 0, ip2_to = 0, ip2, ipn;
- u64 n = 0, m = 0;
+ u32 ip2_from = 0, ip2_to = 0, ip2, i = 0;
bool with_ports = false;
int ret;
@@ -285,19 +294,6 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
} else {
ip_set_mask_from_to(ip2_from, ip2_to, e.cidr[1]);
}
- ipn = ip;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip_to, &e.cidr[0]);
- n++;
- } while (ipn++ < ip_to);
- ipn = ip2_from;
- do {
- ipn = ip_set_range_to_cidr(ipn, ip2_to, &e.cidr[1]);
- m++;
- } while (ipn++ < ip2_to);
-
- if (n*m*(port_to - port + 1) > IPSET_MAX_RANGE)
- return -ERANGE;
if (retried) {
ip = ntohl(h->next.ip[0]);
@@ -310,13 +306,19 @@ hash_netportnet4_uadt(struct ip_set *set, struct nlattr *tb[],
do {
e.ip[0] = htonl(ip);
- ip = ip_set_range_to_cidr(ip, ip_to, &e.cidr[0]);
+ ip = hash_netportnet4_range_to_cidr(ip, ip_to, &e.cidr[0]);
for (; p <= port_to; p++) {
e.port = htons(p);
do {
+ i++;
e.ip[1] = htonl(ip2);
- ip2 = ip_set_range_to_cidr(ip2, ip2_to,
- &e.cidr[1]);
+ if (i > IPSET_MAX_RANGE) {
+ hash_netportnet4_data_next(&h->next,
+ &e);
+ return -ERANGE;
+ }
+ ip2 = hash_netportnet4_range_to_cidr(ip2,
+ ip2_to, &e.cidr[1]);
ret = adtfn(set, &e, &ext, &ext, flags);
if (ret && !ip_set_eexist(ret, flags))
return ret;
diff --git a/net/netfilter/ipvs/ip_vs_est.c b/net/netfilter/ipvs/ip_vs_est.c
index ce2a1549b304..c5970ba416ae 100644
--- a/net/netfilter/ipvs/ip_vs_est.c
+++ b/net/netfilter/ipvs/ip_vs_est.c
@@ -549,7 +549,7 @@ void ip_vs_stop_estimator(struct netns_ipvs *ipvs, struct ip_vs_stats *stats)
__set_bit(row, kd->avail);
if (!kd->tick_len[row]) {
RCU_INIT_POINTER(kd->ticks[row], NULL);
- kfree_rcu(td);
+ kfree_rcu(td, rcu_head);
}
kd->est_count--;
if (kd->est_count) {
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 029171379884..80448885c3d7 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -994,7 +994,7 @@ ip_vs_prepare_tunneled_skb(struct sk_buff *skb, int skb_af,
old_dsfield = ipv4_get_dsfield(old_iph);
*ttl = old_iph->ttl;
if (payload_len)
- *payload_len = ntohs(old_iph->tot_len);
+ *payload_len = skb_ip_totlen(skb);
}
/* Implement full-functionality option for ECN encapsulation */
diff --git a/net/netfilter/nf_conntrack_bpf.c b/net/netfilter/nf_conntrack_bpf.c
index 24002bc61e07..cd99e6dc1f35 100644
--- a/net/netfilter/nf_conntrack_bpf.c
+++ b/net/netfilter/nf_conntrack_bpf.c
@@ -249,7 +249,7 @@ __diag_ignore_all("-Wmissing-prototypes",
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -283,7 +283,7 @@ bpf_xdp_ct_alloc(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -316,7 +316,7 @@ bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn___init *
+__bpf_kfunc struct nf_conn___init *
bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -351,7 +351,7 @@ bpf_skb_ct_alloc(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @opts__sz - Length of the bpf_ct_opts structure
* Must be NF_BPF_CT_OPTS_SZ (12)
*/
-struct nf_conn *
+__bpf_kfunc struct nf_conn *
bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
u32 tuple__sz, struct bpf_ct_opts *opts, u32 opts__sz)
{
@@ -376,12 +376,11 @@ bpf_skb_ct_lookup(struct __sk_buff *skb_ctx, struct bpf_sock_tuple *bpf_tuple,
* @nfct - Pointer to referenced nf_conn___init object, obtained
* using bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
*/
-struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
+__bpf_kfunc struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
{
struct nf_conn *nfct = (struct nf_conn *)nfct_i;
int err;
- nfct->status |= IPS_CONFIRMED;
err = nf_conntrack_hash_check_insert(nfct);
if (err < 0) {
nf_conntrack_free(nfct);
@@ -400,7 +399,7 @@ struct nf_conn *bpf_ct_insert_entry(struct nf_conn___init *nfct_i)
* @nf_conn - Pointer to referenced nf_conn object, obtained using
* bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
*/
-void bpf_ct_release(struct nf_conn *nfct)
+__bpf_kfunc void bpf_ct_release(struct nf_conn *nfct)
{
if (!nfct)
return;
@@ -417,7 +416,7 @@ void bpf_ct_release(struct nf_conn *nfct)
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @timeout - Timeout in msecs.
*/
-void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
+__bpf_kfunc void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
{
__nf_ct_set_timeout((struct nf_conn *)nfct, msecs_to_jiffies(timeout));
}
@@ -432,7 +431,7 @@ void bpf_ct_set_timeout(struct nf_conn___init *nfct, u32 timeout)
* bpf_ct_insert_entry, bpf_xdp_ct_lookup, or bpf_skb_ct_lookup.
* @timeout - New timeout in msecs.
*/
-int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
+__bpf_kfunc int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
{
return __nf_ct_change_timeout(nfct, msecs_to_jiffies(timeout));
}
@@ -447,7 +446,7 @@ int bpf_ct_change_timeout(struct nf_conn *nfct, u32 timeout)
* bpf_xdp_ct_alloc or bpf_skb_ct_alloc.
* @status - New status value.
*/
-int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
+__bpf_kfunc int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
{
return nf_ct_change_status_common((struct nf_conn *)nfct, status);
}
@@ -462,7 +461,7 @@ int bpf_ct_set_status(const struct nf_conn___init *nfct, u32 status)
* bpf_ct_insert_entry, bpf_xdp_ct_lookup or bpf_skb_ct_lookup.
* @status - New status value.
*/
-int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
+__bpf_kfunc int bpf_ct_change_status(struct nf_conn *nfct, u32 status)
{
return nf_ct_change_status_common(nfct, status);
}
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 496c4920505b..7250082e7de5 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -514,7 +514,6 @@ EXPORT_SYMBOL_GPL(nf_ct_get_id);
static void
clean_from_lists(struct nf_conn *ct)
{
- pr_debug("clean_from_lists(%p)\n", ct);
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
hlist_nulls_del_rcu(&ct->tuplehash[IP_CT_DIR_REPLY].hnnode);
@@ -582,7 +581,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
- pr_debug("%s(%p)\n", __func__, ct);
WARN_ON(refcount_read(&nfct->use) != 0);
if (unlikely(nf_ct_is_template(ct))) {
@@ -603,7 +601,6 @@ void nf_ct_destroy(struct nf_conntrack *nfct)
if (ct->master)
nf_ct_put(ct->master);
- pr_debug("%s: returning ct=%p to slab\n", __func__, ct);
nf_conntrack_free(ct);
}
EXPORT_SYMBOL(nf_ct_destroy);
@@ -786,8 +783,6 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
- rcu_read_lock();
-
h = ____nf_conntrack_find(net, zone, tuple, hash);
if (h) {
/* We have a candidate that matches the tuple we're interested
@@ -799,7 +794,7 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
smp_acquire__after_ctrl_dep();
if (likely(nf_ct_key_equal(h, tuple, zone, net)))
- goto found;
+ return h;
/* TYPESAFE_BY_RCU recycled the candidate */
nf_ct_put(ct);
@@ -807,8 +802,6 @@ __nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
h = NULL;
}
-found:
- rcu_read_unlock();
return h;
}
@@ -820,16 +813,21 @@ nf_conntrack_find_get(struct net *net, const struct nf_conntrack_zone *zone,
unsigned int rid, zone_id = nf_ct_zone_id(zone, IP_CT_DIR_ORIGINAL);
struct nf_conntrack_tuple_hash *thash;
+ rcu_read_lock();
+
thash = __nf_conntrack_find_get(net, zone, tuple,
hash_conntrack_raw(tuple, zone_id, net));
if (thash)
- return thash;
+ goto out_unlock;
rid = nf_ct_zone_id(zone, IP_CT_DIR_REPLY);
if (rid != zone_id)
- return __nf_conntrack_find_get(net, zone, tuple,
- hash_conntrack_raw(tuple, rid, net));
+ thash = __nf_conntrack_find_get(net, zone, tuple,
+ hash_conntrack_raw(tuple, rid, net));
+
+out_unlock:
+ rcu_read_unlock();
return thash;
}
EXPORT_SYMBOL_GPL(nf_conntrack_find_get);
@@ -886,10 +884,8 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
zone = nf_ct_zone(ct);
- if (!nf_ct_ext_valid_pre(ct->ext)) {
- NF_CT_STAT_INC_ATOMIC(net, insert_failed);
- return -ETIMEDOUT;
- }
+ if (!nf_ct_ext_valid_pre(ct->ext))
+ return -EAGAIN;
local_bh_disable();
do {
@@ -924,6 +920,19 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
goto chaintoolong;
}
+ /* If genid has changed, we can't insert anymore because ct
+ * extensions could have stale pointers and nf_ct_iterate_destroy
+ * might have completed its table scan already.
+ *
+ * Increment of the ext genid right after this check is fine:
+ * nf_ct_iterate_destroy blocks until locks are released.
+ */
+ if (!nf_ct_ext_valid_post(ct->ext)) {
+ err = -EAGAIN;
+ goto out;
+ }
+
+ ct->status |= IPS_CONFIRMED;
smp_wmb();
/* The caller holds a reference to this object */
refcount_set(&ct->ct_general.use, 2);
@@ -932,12 +941,6 @@ nf_conntrack_hash_check_insert(struct nf_conn *ct)
NF_CT_STAT_INC(net, insert);
local_bh_enable();
- if (!nf_ct_ext_valid_post(ct->ext)) {
- nf_ct_kill(ct);
- NF_CT_STAT_INC_ATOMIC(net, drop);
- return -ETIMEDOUT;
- }
-
return 0;
chaintoolong:
NF_CT_STAT_INC(net, chaintoolong);
@@ -1210,7 +1213,6 @@ __nf_conntrack_confirm(struct sk_buff *skb)
goto dying;
}
- pr_debug("Confirming conntrack %p\n", ct);
/* We have to check the DYING flag after unlink to prevent
* a race against nf_ct_get_next_corpse() possibly called from
* user context, else we insert an already 'dead' hash, blocking
@@ -1374,9 +1376,6 @@ static unsigned int early_drop_list(struct net *net,
hlist_nulls_for_each_entry_rcu(h, n, head, hnnode) {
tmp = nf_ct_tuplehash_to_ctrack(h);
- if (test_bit(IPS_OFFLOAD_BIT, &tmp->status))
- continue;
-
if (nf_ct_is_expired(tmp)) {
nf_ct_gc_expired(tmp);
continue;
@@ -1446,11 +1445,14 @@ static bool gc_worker_skip_ct(const struct nf_conn *ct)
static bool gc_worker_can_early_drop(const struct nf_conn *ct)
{
const struct nf_conntrack_l4proto *l4proto;
+ u8 protonum = nf_ct_protonum(ct);
+ if (test_bit(IPS_OFFLOAD_BIT, &ct->status) && protonum != IPPROTO_UDP)
+ return false;
if (!test_bit(IPS_ASSURED_BIT, &ct->status))
return true;
- l4proto = nf_ct_l4proto_find(nf_ct_protonum(ct));
+ l4proto = nf_ct_l4proto_find(protonum);
if (l4proto->can_early_drop && l4proto->can_early_drop(ct))
return true;
@@ -1507,7 +1509,8 @@ static void gc_worker(struct work_struct *work)
if (test_bit(IPS_OFFLOAD_BIT, &tmp->status)) {
nf_ct_offload_timeout(tmp);
- continue;
+ if (!nf_conntrack_max95)
+ continue;
}
if (expired_count > GC_SCAN_EXPIRED_MAX) {
@@ -1721,10 +1724,8 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
struct nf_conntrack_zone tmp;
struct nf_conntrack_net *cnet;
- if (!nf_ct_invert_tuple(&repl_tuple, tuple)) {
- pr_debug("Can't invert tuple.\n");
+ if (!nf_ct_invert_tuple(&repl_tuple, tuple))
return NULL;
- }
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
ct = __nf_conntrack_alloc(net, zone, tuple, &repl_tuple, GFP_ATOMIC,
@@ -1764,8 +1765,6 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
spin_lock_bh(&nf_conntrack_expect_lock);
exp = nf_ct_find_expectation(net, zone, tuple);
if (exp) {
- pr_debug("expectation arrives ct=%p exp=%p\n",
- ct, exp);
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
/* exp->master safe, refcnt bumped in nf_ct_find_expectation */
@@ -1829,10 +1828,8 @@ resolve_normal_ct(struct nf_conn *tmpl,
if (!nf_ct_get_tuple(skb, skb_network_offset(skb),
dataoff, state->pf, protonum, state->net,
- &tuple)) {
- pr_debug("Can't get tuple\n");
+ &tuple))
return 0;
- }
/* look for tuple match */
zone = nf_ct_zone_tmpl(tmpl, skb, &tmp);
@@ -1864,17 +1861,15 @@ resolve_normal_ct(struct nf_conn *tmpl,
if (NF_CT_DIRECTION(h) == IP_CT_DIR_REPLY) {
ctinfo = IP_CT_ESTABLISHED_REPLY;
} else {
+ unsigned long status = READ_ONCE(ct->status);
+
/* Once we've had two way comms, always ESTABLISHED. */
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
- pr_debug("normal packet for %p\n", ct);
+ if (likely(status & IPS_SEEN_REPLY))
ctinfo = IP_CT_ESTABLISHED;
- } else if (test_bit(IPS_EXPECTED_BIT, &ct->status)) {
- pr_debug("related packet for %p\n", ct);
+ else if (status & IPS_EXPECTED)
ctinfo = IP_CT_RELATED;
- } else {
- pr_debug("new packet for %p\n", ct);
+ else
ctinfo = IP_CT_NEW;
- }
}
nf_ct_set(skb, ct, ctinfo);
return 0;
@@ -1988,7 +1983,6 @@ nf_conntrack_in(struct sk_buff *skb, const struct nf_hook_state *state)
/* rcu_read_lock()ed by nf_hook_thresh */
dataoff = get_l4proto(skb, skb_network_offset(skb), state->pf, &protonum);
if (dataoff <= 0) {
- pr_debug("not prepared to track yet or error occurred\n");
NF_CT_STAT_INC_ATOMIC(state->net, invalid);
ret = NF_ACCEPT;
goto out;
@@ -2027,7 +2021,6 @@ repeat:
if (ret <= 0) {
/* Invalid: inverse of the return code tells
* the netfilter core what to do */
- pr_debug("nf_conntrack_in: Can't track with proto module\n");
nf_ct_put(ct);
skb->_nfct = 0;
/* Special case: TCP tracker reports an attempt to reopen a
@@ -2066,7 +2059,6 @@ void nf_conntrack_alter_reply(struct nf_conn *ct,
/* Should be unconfirmed, so not in hash table yet */
WARN_ON(nf_ct_is_confirmed(ct));
- pr_debug("Altering reply tuple of %p to ", ct);
nf_ct_dump_tuple(newreply);
ct->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
@@ -2761,11 +2753,23 @@ err_cachep:
return ret;
}
+static void nf_conntrack_set_closing(struct nf_conntrack *nfct)
+{
+ struct nf_conn *ct = nf_ct_to_nf_conn(nfct);
+
+ switch (nf_ct_protonum(ct)) {
+ case IPPROTO_TCP:
+ nf_conntrack_tcp_set_closing(ct);
+ break;
+ }
+}
+
static const struct nf_ct_hook nf_conntrack_hook = {
.update = nf_conntrack_update,
.destroy = nf_ct_destroy,
.get_tuple_skb = nf_conntrack_get_tuple_skb,
.attach = nf_conntrack_attach,
+ .set_closing = nf_conntrack_set_closing,
};
void nf_conntrack_init_end(void)
diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c
index 8698b3424646..69948e1d6974 100644
--- a/net/netfilter/nf_conntrack_ecache.c
+++ b/net/netfilter/nf_conntrack_ecache.c
@@ -309,7 +309,7 @@ bool nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp
break;
return true;
case 2: /* autodetect: no event listener, don't allocate extension. */
- if (!READ_ONCE(net->ct.ctnetlink_has_listener))
+ if (!READ_ONCE(nf_ctnetlink_has_listener))
return true;
fallthrough;
case 1:
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 48ea6d0264b5..0c4db2f2ac43 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -242,104 +242,6 @@ int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
}
EXPORT_SYMBOL_GPL(__nf_ct_try_assign_helper);
-/* 'skb' should already be pulled to nh_ofs. */
-int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
- enum ip_conntrack_info ctinfo, u16 proto)
-{
- const struct nf_conntrack_helper *helper;
- const struct nf_conn_help *help;
- unsigned int protoff;
- int err;
-
- if (ctinfo == IP_CT_RELATED_REPLY)
- return NF_ACCEPT;
-
- help = nfct_help(ct);
- if (!help)
- return NF_ACCEPT;
-
- helper = rcu_dereference(help->helper);
- if (!helper)
- return NF_ACCEPT;
-
- if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
- helper->tuple.src.l3num != proto)
- return NF_ACCEPT;
-
- switch (proto) {
- case NFPROTO_IPV4:
- protoff = ip_hdrlen(skb);
- proto = ip_hdr(skb)->protocol;
- break;
- case NFPROTO_IPV6: {
- u8 nexthdr = ipv6_hdr(skb)->nexthdr;
- __be16 frag_off;
- int ofs;
-
- ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
- &frag_off);
- if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
- pr_debug("proto header not found\n");
- return NF_ACCEPT;
- }
- protoff = ofs;
- proto = nexthdr;
- break;
- }
- default:
- WARN_ONCE(1, "helper invoked on non-IP family!");
- return NF_DROP;
- }
-
- if (helper->tuple.dst.protonum != proto)
- return NF_ACCEPT;
-
- err = helper->help(skb, protoff, ct, ctinfo);
- if (err != NF_ACCEPT)
- return err;
-
- /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
- * FTP with NAT) adusting the TCP payload size when mangling IP
- * addresses and/or port numbers in the text-based control connection.
- */
- if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
- !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
- return NF_DROP;
- return NF_ACCEPT;
-}
-EXPORT_SYMBOL_GPL(nf_ct_helper);
-
-int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
- u8 proto, bool nat, struct nf_conntrack_helper **hp)
-{
- struct nf_conntrack_helper *helper;
- struct nf_conn_help *help;
- int ret = 0;
-
- helper = nf_conntrack_helper_try_module_get(name, family, proto);
- if (!helper)
- return -EINVAL;
-
- help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
- if (!help) {
- nf_conntrack_helper_put(helper);
- return -ENOMEM;
- }
-#if IS_ENABLED(CONFIG_NF_NAT)
- if (nat) {
- ret = nf_nat_helper_try_module_get(name, family, proto);
- if (ret) {
- nf_conntrack_helper_put(helper);
- return ret;
- }
- }
-#endif
- rcu_assign_pointer(help->helper, helper);
- *hp = helper;
- return ret;
-}
-EXPORT_SYMBOL_GPL(nf_ct_add_helper);
-
/* appropriate ct lock protecting must be taken by caller */
static int unhelp(struct nf_conn *ct, void *me)
{
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 1286ae7d4609..c11dff91d52d 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -2316,9 +2316,6 @@ ctnetlink_create_conntrack(struct net *net,
nfct_seqadj_ext_add(ct);
nfct_synproxy_ext_add(ct);
- /* we must add conntrack extensions before confirmation. */
- ct->status |= IPS_CONFIRMED;
-
if (cda[CTA_STATUS]) {
err = ctnetlink_change_status(ct, cda);
if (err < 0)
@@ -2375,12 +2372,15 @@ ctnetlink_create_conntrack(struct net *net,
err = nf_conntrack_hash_check_insert(ct);
if (err < 0)
- goto err2;
+ goto err3;
rcu_read_unlock();
return ct;
+err3:
+ if (ct->master)
+ nf_ct_put(ct->master);
err2:
rcu_read_unlock();
err1:
@@ -3866,7 +3866,7 @@ static int __init ctnetlink_init(void)
{
int ret;
- BUILD_BUG_ON(sizeof(struct ctnetlink_list_dump_ctx) > sizeof_field(struct netlink_callback, ctx));
+ NL_ASSERT_DUMP_CTX_FITS(struct ctnetlink_list_dump_ctx);
ret = nfnetlink_subsys_register(&ctnl_subsys);
if (ret < 0) {
diff --git a/net/netfilter/nf_conntrack_ovs.c b/net/netfilter/nf_conntrack_ovs.c
new file mode 100644
index 000000000000..52b776bdf526
--- /dev/null
+++ b/net/netfilter/nf_conntrack_ovs.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Support ct functions for openvswitch and used by OVS and TC conntrack. */
+
+#include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_seqadj.h>
+#include <net/netfilter/ipv6/nf_defrag_ipv6.h>
+#include <net/ipv6_frag.h>
+#include <net/ip.h>
+
+/* 'skb' should already be pulled to nh_ofs. */
+int nf_ct_helper(struct sk_buff *skb, struct nf_conn *ct,
+ enum ip_conntrack_info ctinfo, u16 proto)
+{
+ const struct nf_conntrack_helper *helper;
+ const struct nf_conn_help *help;
+ unsigned int protoff;
+ int err;
+
+ if (ctinfo == IP_CT_RELATED_REPLY)
+ return NF_ACCEPT;
+
+ help = nfct_help(ct);
+ if (!help)
+ return NF_ACCEPT;
+
+ helper = rcu_dereference(help->helper);
+ if (!helper)
+ return NF_ACCEPT;
+
+ if (helper->tuple.src.l3num != NFPROTO_UNSPEC &&
+ helper->tuple.src.l3num != proto)
+ return NF_ACCEPT;
+
+ switch (proto) {
+ case NFPROTO_IPV4:
+ protoff = ip_hdrlen(skb);
+ proto = ip_hdr(skb)->protocol;
+ break;
+ case NFPROTO_IPV6: {
+ u8 nexthdr = ipv6_hdr(skb)->nexthdr;
+ __be16 frag_off;
+ int ofs;
+
+ ofs = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr,
+ &frag_off);
+ if (ofs < 0 || (frag_off & htons(~0x7)) != 0) {
+ pr_debug("proto header not found\n");
+ return NF_ACCEPT;
+ }
+ protoff = ofs;
+ proto = nexthdr;
+ break;
+ }
+ default:
+ WARN_ONCE(1, "helper invoked on non-IP family!");
+ return NF_DROP;
+ }
+
+ if (helper->tuple.dst.protonum != proto)
+ return NF_ACCEPT;
+
+ err = helper->help(skb, protoff, ct, ctinfo);
+ if (err != NF_ACCEPT)
+ return err;
+
+ /* Adjust seqs after helper. This is needed due to some helpers (e.g.,
+ * FTP with NAT) adusting the TCP payload size when mangling IP
+ * addresses and/or port numbers in the text-based control connection.
+ */
+ if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) &&
+ !nf_ct_seq_adjust(skb, ct, ctinfo, protoff))
+ return NF_DROP;
+ return NF_ACCEPT;
+}
+EXPORT_SYMBOL_GPL(nf_ct_helper);
+
+int nf_ct_add_helper(struct nf_conn *ct, const char *name, u8 family,
+ u8 proto, bool nat, struct nf_conntrack_helper **hp)
+{
+ struct nf_conntrack_helper *helper;
+ struct nf_conn_help *help;
+ int ret = 0;
+
+ helper = nf_conntrack_helper_try_module_get(name, family, proto);
+ if (!helper)
+ return -EINVAL;
+
+ help = nf_ct_helper_ext_add(ct, GFP_KERNEL);
+ if (!help) {
+ nf_conntrack_helper_put(helper);
+ return -ENOMEM;
+ }
+#if IS_ENABLED(CONFIG_NF_NAT)
+ if (nat) {
+ ret = nf_nat_helper_try_module_get(name, family, proto);
+ if (ret) {
+ nf_conntrack_helper_put(helper);
+ return ret;
+ }
+ }
+#endif
+ rcu_assign_pointer(help->helper, helper);
+ *hp = helper;
+ return ret;
+}
+EXPORT_SYMBOL_GPL(nf_ct_add_helper);
+
+/* Trim the skb to the length specified by the IP/IPv6 header,
+ * removing any trailing lower-layer padding. This prepares the skb
+ * for higher-layer processing that assumes skb->len excludes padding
+ * (such as nf_ip_checksum). The caller needs to pull the skb to the
+ * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
+ */
+int nf_ct_skb_network_trim(struct sk_buff *skb, int family)
+{
+ unsigned int len;
+
+ switch (family) {
+ case NFPROTO_IPV4:
+ len = skb_ip_totlen(skb);
+ break;
+ case NFPROTO_IPV6:
+ len = sizeof(struct ipv6hdr)
+ + ntohs(ipv6_hdr(skb)->payload_len);
+ break;
+ default:
+ len = skb->len;
+ }
+
+ return pskb_trim_rcsum(skb, len);
+}
+EXPORT_SYMBOL_GPL(nf_ct_skb_network_trim);
+
+/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
+ * value if 'skb' is freed.
+ */
+int nf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
+ u16 zone, u8 family, u8 *proto, u16 *mru)
+{
+ int err;
+
+ if (family == NFPROTO_IPV4) {
+ enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
+
+ memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
+ local_bh_disable();
+ err = ip_defrag(net, skb, user);
+ local_bh_enable();
+ if (err)
+ return err;
+
+ *mru = IPCB(skb)->frag_max_size;
+#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
+ } else if (family == NFPROTO_IPV6) {
+ enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
+
+ memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
+ err = nf_ct_frag6_gather(net, skb, user);
+ if (err) {
+ if (err != -EINPROGRESS)
+ kfree_skb(skb);
+ return err;
+ }
+
+ *proto = ipv6_hdr(skb)->nexthdr;
+ *mru = IP6CB(skb)->frag_max_size;
+#endif
+ } else {
+ kfree_skb(skb);
+ return -EPFNOSUPPORT;
+ }
+
+ skb_clear_hash(skb);
+ skb->ignore_df = 1;
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(nf_ct_handle_fragments);
diff --git a/net/netfilter/nf_conntrack_proto.c b/net/netfilter/nf_conntrack_proto.c
index 99323fb12d0f..c928ff63b10e 100644
--- a/net/netfilter/nf_conntrack_proto.c
+++ b/net/netfilter/nf_conntrack_proto.c
@@ -141,6 +141,7 @@ unsigned int nf_confirm(void *priv,
struct nf_conn *ct;
bool seqadj_needed;
__be16 frag_off;
+ int start;
u8 pnum;
ct = nf_ct_get(skb, &ctinfo);
@@ -163,9 +164,11 @@ unsigned int nf_confirm(void *priv,
break;
case NFPROTO_IPV6:
pnum = ipv6_hdr(skb)->nexthdr;
- protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
- if (protoff < 0 || (frag_off & htons(~0x7)) != 0)
+ start = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, &frag_off);
+ if (start < 0 || (frag_off & htons(~0x7)) != 0)
return nf_conntrack_confirm(skb);
+
+ protoff = start;
break;
default:
return nf_conntrack_confirm(skb);
@@ -281,16 +284,11 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
/* We only do TCP and SCTP at the moment: is there a better way? */
if (tuple.dst.protonum != IPPROTO_TCP &&
- tuple.dst.protonum != IPPROTO_SCTP) {
- pr_debug("SO_ORIGINAL_DST: Not a TCP/SCTP socket\n");
+ tuple.dst.protonum != IPPROTO_SCTP)
return -ENOPROTOOPT;
- }
- if ((unsigned int)*len < sizeof(struct sockaddr_in)) {
- pr_debug("SO_ORIGINAL_DST: len %d not %zu\n",
- *len, sizeof(struct sockaddr_in));
+ if ((unsigned int)*len < sizeof(struct sockaddr_in))
return -EINVAL;
- }
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
if (h) {
@@ -304,17 +302,12 @@ getorigdst(struct sock *sk, int optval, void __user *user, int *len)
.tuple.dst.u3.ip;
memset(sin.sin_zero, 0, sizeof(sin.sin_zero));
- pr_debug("SO_ORIGINAL_DST: %pI4 %u\n",
- &sin.sin_addr.s_addr, ntohs(sin.sin_port));
nf_ct_put(ct);
if (copy_to_user(user, &sin, sizeof(sin)) != 0)
return -EFAULT;
else
return 0;
}
- pr_debug("SO_ORIGINAL_DST: Can't find %pI4/%u-%pI4/%u.\n",
- &tuple.src.u3.ip, ntohs(tuple.src.u.tcp.port),
- &tuple.dst.u3.ip, ntohs(tuple.dst.u.tcp.port));
return -ENOENT;
}
@@ -357,12 +350,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len)
return -EINVAL;
h = nf_conntrack_find_get(sock_net(sk), &nf_ct_zone_dflt, &tuple);
- if (!h) {
- pr_debug("IP6T_SO_ORIGINAL_DST: Can't find %pI6c/%u-%pI6c/%u.\n",
- &tuple.src.u3.ip6, ntohs(tuple.src.u.tcp.port),
- &tuple.dst.u3.ip6, ntohs(tuple.dst.u.tcp.port));
+ if (!h)
return -ENOENT;
- }
ct = nf_ct_tuplehash_to_ctrack(h);
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index d88b92a8ffca..91eacc9b0b98 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -27,22 +27,16 @@
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_conntrack_timeout.h>
-/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
- closely. They're more complex. --RR
-
- And so for me for SCTP :D -Kiran */
-
static const char *const sctp_conntrack_names[] = {
- "NONE",
- "CLOSED",
- "COOKIE_WAIT",
- "COOKIE_ECHOED",
- "ESTABLISHED",
- "SHUTDOWN_SENT",
- "SHUTDOWN_RECD",
- "SHUTDOWN_ACK_SENT",
- "HEARTBEAT_SENT",
- "HEARTBEAT_ACKED",
+ [SCTP_CONNTRACK_NONE] = "NONE",
+ [SCTP_CONNTRACK_CLOSED] = "CLOSED",
+ [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT",
+ [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED",
+ [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED",
+ [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT",
+ [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD",
+ [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT",
+ [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT",
};
#define SECS * HZ
@@ -54,13 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
[SCTP_CONNTRACK_CLOSED] = 10 SECS,
[SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS,
[SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS,
- [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS,
+ [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS,
[SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000,
[SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS,
[SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS,
- [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS,
- [SCTP_CONNTRACK_DATA_SENT] = 30 SECS,
};
#define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1
@@ -74,8 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = {
#define sSR SCTP_CONNTRACK_SHUTDOWN_RECD
#define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT
#define sHS SCTP_CONNTRACK_HEARTBEAT_SENT
-#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED
-#define sDS SCTP_CONNTRACK_DATA_SENT
#define sIV SCTP_CONNTRACK_MAX
/*
@@ -98,10 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite
CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of
the SHUTDOWN chunk. Connection is closed.
HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow.
-HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction
- opposite to that of the HEARTBEAT/DATA chunk. Secondary connection
- is established.
-DATA_SENT - We have seen a DATA/SACK in a new flow.
*/
/* TODO
@@ -115,38 +101,36 @@ cookie echoed to closed.
*/
/* SCTP conntrack state transitions */
-static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = {
+static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = {
{
/* ORIGINAL */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW},
-/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},
-/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
-/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL},
-/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA},
-/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/
-/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */
-/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */
-/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL},
-/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS},
-/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW},
+/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},
+/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL},
+/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL},
+/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA},
+/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/
+/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */
+/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */
+/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL},
+/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
},
{
/* REPLY */
-/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */
-/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */
-/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV},
-/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV},
-/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV},
-/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */
-/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV},
-/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV},
-/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA},
-/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
-/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA},
+/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */
+/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */
+/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV},
+/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV},
+/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV},
+/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV},
+/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV},
+/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */
+/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV},
+/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV},
+/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS},
+/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES},
}
};
@@ -158,6 +142,7 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct)
}
#endif
+/* do_basic_checks ensures sch->length > 0, do not use before */
#define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \
for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
(offset) < (skb)->len && \
@@ -168,7 +153,8 @@ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \
static int do_basic_checks(struct nf_conn *ct,
const struct sk_buff *skb,
unsigned int dataoff,
- unsigned long *map)
+ unsigned long *map,
+ const struct nf_hook_state *state)
{
u_int32_t offset, count;
struct sctp_chunkhdr _sch, *sch;
@@ -177,8 +163,6 @@ static int do_basic_checks(struct nf_conn *ct,
flag = 0;
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
- pr_debug("Chunk Num: %d Type: %d\n", count, sch->type);
-
if (sch->type == SCTP_CID_INIT ||
sch->type == SCTP_CID_INIT_ACK ||
sch->type == SCTP_CID_SHUTDOWN_COMPLETE)
@@ -193,7 +177,9 @@ static int do_basic_checks(struct nf_conn *ct,
sch->type == SCTP_CID_COOKIE_ECHO ||
flag) &&
count != 0) || !sch->length) {
- pr_debug("Basic checks failed\n");
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "%s failed. chunk num %d, type %d, len %d flag %d\n",
+ __func__, count, sch->type, sch->length, flag);
return 1;
}
@@ -201,7 +187,6 @@ static int do_basic_checks(struct nf_conn *ct,
set_bit(sch->type, map);
}
- pr_debug("Basic checks passed\n");
return count == 0;
}
@@ -211,69 +196,47 @@ static int sctp_new_state(enum ip_conntrack_dir dir,
{
int i;
- pr_debug("Chunk type: %d\n", chunk_type);
-
switch (chunk_type) {
case SCTP_CID_INIT:
- pr_debug("SCTP_CID_INIT\n");
i = 0;
break;
case SCTP_CID_INIT_ACK:
- pr_debug("SCTP_CID_INIT_ACK\n");
i = 1;
break;
case SCTP_CID_ABORT:
- pr_debug("SCTP_CID_ABORT\n");
i = 2;
break;
case SCTP_CID_SHUTDOWN:
- pr_debug("SCTP_CID_SHUTDOWN\n");
i = 3;
break;
case SCTP_CID_SHUTDOWN_ACK:
- pr_debug("SCTP_CID_SHUTDOWN_ACK\n");
i = 4;
break;
case SCTP_CID_ERROR:
- pr_debug("SCTP_CID_ERROR\n");
i = 5;
break;
case SCTP_CID_COOKIE_ECHO:
- pr_debug("SCTP_CID_COOKIE_ECHO\n");
i = 6;
break;
case SCTP_CID_COOKIE_ACK:
- pr_debug("SCTP_CID_COOKIE_ACK\n");
i = 7;
break;
case SCTP_CID_SHUTDOWN_COMPLETE:
- pr_debug("SCTP_CID_SHUTDOWN_COMPLETE\n");
i = 8;
break;
case SCTP_CID_HEARTBEAT:
- pr_debug("SCTP_CID_HEARTBEAT");
i = 9;
break;
case SCTP_CID_HEARTBEAT_ACK:
- pr_debug("SCTP_CID_HEARTBEAT_ACK");
i = 10;
break;
- case SCTP_CID_DATA:
- case SCTP_CID_SACK:
- pr_debug("SCTP_CID_DATA/SACK");
- i = 11;
- break;
default:
/* Other chunks like DATA or SACK do not change the state */
- pr_debug("Unknown chunk type, Will stay in %s\n",
- sctp_conntrack_names[cur_state]);
+ pr_debug("Unknown chunk type %d, Will stay in %s\n",
+ chunk_type, sctp_conntrack_names[cur_state]);
return cur_state;
}
- pr_debug("dir: %d cur_state: %s chunk_type: %d new_state: %s\n",
- dir, sctp_conntrack_names[cur_state], chunk_type,
- sctp_conntrack_names[sctp_conntracks[dir][i][cur_state]]);
-
return sctp_conntracks[dir][i][cur_state];
}
@@ -316,9 +279,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
ih->init_tag);
ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag;
- } else if (sch->type == SCTP_CID_HEARTBEAT ||
- sch->type == SCTP_CID_DATA ||
- sch->type == SCTP_CID_SACK) {
+ } else if (sch->type == SCTP_CID_HEARTBEAT) {
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
@@ -392,7 +353,7 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (sh == NULL)
goto out;
- if (do_basic_checks(ct, skb, dataoff, map) != 0)
+ if (do_basic_checks(ct, skb, dataoff, map, state) != 0)
goto out;
if (!nf_ct_is_confirmed(ct)) {
@@ -404,19 +365,21 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
if (!sctp_new(ct, skb, sh, dataoff))
return -NF_ACCEPT;
- } else {
- /* Check the verification tag (Sec 8.5) */
- if (!test_bit(SCTP_CID_INIT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
- !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
- !test_bit(SCTP_CID_ABORT, map) &&
- !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
- !test_bit(SCTP_CID_HEARTBEAT, map) &&
- !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
- sh->vtag != ct->proto.sctp.vtag[dir]) {
- pr_debug("Verification tag check failed\n");
- goto out;
- }
+ }
+
+ /* Check the verification tag (Sec 8.5) */
+ if (!test_bit(SCTP_CID_INIT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) &&
+ !test_bit(SCTP_CID_COOKIE_ECHO, map) &&
+ !test_bit(SCTP_CID_ABORT, map) &&
+ !test_bit(SCTP_CID_SHUTDOWN_ACK, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT, map) &&
+ !test_bit(SCTP_CID_HEARTBEAT_ACK, map) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) {
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "verification tag check failed %x vs %x for dir %d",
+ sh->vtag, ct->proto.sctp.vtag[dir], dir);
+ goto out;
}
old_state = new_state = SCTP_CONNTRACK_NONE;
@@ -424,22 +387,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) {
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
- /* Sec 8.5.1 (A) */
+ /* (A) vtag MUST be zero */
if (sh->vtag != 0)
goto out_unlock;
} else if (sch->type == SCTP_CID_ABORT) {
- /* Sec 8.5.1 (B) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir])
+ /* (B) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
- /* Sec 8.5.1 (C) */
- if (sh->vtag != ct->proto.sctp.vtag[dir] &&
- sh->vtag != ct->proto.sctp.vtag[!dir] &&
- sch->flags & SCTP_CHUNK_FLAG_T)
+ /* (C) vtag MUST match own vtag if T flag is unset OR
+ * MUST match peer's vtag if T flag is set
+ */
+ if ((!(sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[dir]) ||
+ ((sch->flags & SCTP_CHUNK_FLAG_T) &&
+ sh->vtag != ct->proto.sctp.vtag[!dir]))
goto out_unlock;
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
- /* Sec 8.5.1 (D) */
+ /* (D) vtag must be same as init_vtag as found in INIT_ACK */
if (sh->vtag != ct->proto.sctp.vtag[dir])
goto out_unlock;
} else if (sch->type == SCTP_CID_HEARTBEAT) {
@@ -476,11 +446,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
} else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) {
ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED;
}
- } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) {
- if (ct->proto.sctp.vtag[dir] == 0) {
- pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir);
- ct->proto.sctp.vtag[dir] = sh->vtag;
- }
}
old_state = ct->proto.sctp.state;
@@ -488,9 +453,10 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
/* Invalid */
if (new_state == SCTP_CONNTRACK_MAX) {
- pr_debug("nf_conntrack_sctp: Invalid dir=%i ctype=%u "
- "conntrack=%u\n",
- dir, sch->type, old_state);
+ nf_ct_l4proto_log_invalid(skb, ct, state,
+ "Invalid, old_state %d, dir %d, type %d",
+ old_state, dir, sch->type);
+
goto out_unlock;
}
@@ -518,8 +484,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
}
ct->proto.sctp.state = new_state;
- if (old_state != new_state)
+ if (old_state != new_state) {
nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+ if (new_state == SCTP_CONNTRACK_ESTABLISHED &&
+ !test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
+ nf_conntrack_event_cache(IPCT_ASSURED, ct);
+ }
}
spin_unlock_bh(&ct->lock);
@@ -533,14 +503,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct,
nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]);
- if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED &&
- dir == IP_CT_DIR_REPLY &&
- new_state == SCTP_CONNTRACK_ESTABLISHED) {
- pr_debug("Setting assured bit\n");
- set_bit(IPS_ASSURED_BIT, &ct->status);
- nf_conntrack_event_cache(IPCT_ASSURED, ct);
- }
-
return NF_ACCEPT;
out_unlock:
@@ -701,7 +663,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = {
[CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 },
[CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 },
- [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 },
};
#endif /* CONFIG_NF_CONNTRACK_TIMEOUT */
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index 656631083177..4018acb1d674 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -911,6 +911,41 @@ static bool tcp_can_early_drop(const struct nf_conn *ct)
return false;
}
+void nf_conntrack_tcp_set_closing(struct nf_conn *ct)
+{
+ enum tcp_conntrack old_state;
+ const unsigned int *timeouts;
+ u32 timeout;
+
+ if (!nf_ct_is_confirmed(ct))
+ return;
+
+ spin_lock_bh(&ct->lock);
+ old_state = ct->proto.tcp.state;
+ ct->proto.tcp.state = TCP_CONNTRACK_CLOSE;
+
+ if (old_state == TCP_CONNTRACK_CLOSE ||
+ test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status)) {
+ spin_unlock_bh(&ct->lock);
+ return;
+ }
+
+ timeouts = nf_ct_timeout_lookup(ct);
+ if (!timeouts) {
+ const struct nf_tcp_net *tn;
+
+ tn = nf_tcp_pernet(nf_ct_net(ct));
+ timeouts = tn->timeouts;
+ }
+
+ timeout = timeouts[TCP_CONNTRACK_CLOSE];
+ WRITE_ONCE(ct->timeout, timeout + nfct_time_stamp);
+
+ spin_unlock_bh(&ct->lock);
+
+ nf_conntrack_event_cache(IPCT_PROTOINFO, ct);
+}
+
static void nf_ct_tcp_state_reset(struct ip_ct_tcp_state *state)
{
state->td_end = 0;
@@ -930,7 +965,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
{
struct net *net = nf_ct_net(ct);
struct nf_tcp_net *tn = nf_tcp_pernet(net);
- struct nf_conntrack_tuple *tuple;
enum tcp_conntrack new_state, old_state;
unsigned int index, *timeouts;
enum nf_ct_tcp_action res;
@@ -954,7 +988,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
new_state = tcp_conntracks[dir][index][old_state];
- tuple = &ct->tuplehash[dir].tuple;
switch (new_state) {
case TCP_CONNTRACK_SYN_SENT:
@@ -1068,6 +1101,13 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_flags |=
IP_CT_EXP_CHALLENGE_ACK;
}
+
+ /* possible challenge ack reply to syn */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ index == TCP_ACK_SET &&
+ dir == IP_CT_DIR_REPLY)
+ ct->proto.tcp.last_ack = ntohl(th->ack_seq);
+
spin_unlock_bh(&ct->lock);
nf_ct_l4proto_log_invalid(skb, ct, state,
"packet (index %d) in dir %d ignored, state %s",
@@ -1193,6 +1233,14 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
* segments we ignored. */
goto in_window;
}
+
+ /* Reset in response to a challenge-ack we let through earlier */
+ if (old_state == TCP_CONNTRACK_SYN_SENT &&
+ ct->proto.tcp.last_index == TCP_ACK_SET &&
+ ct->proto.tcp.last_dir == IP_CT_DIR_REPLY &&
+ ntohl(th->seq) == ct->proto.tcp.last_ack)
+ goto in_window;
+
break;
default:
/* Keep compilers happy. */
@@ -1217,13 +1265,6 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
ct->proto.tcp.last_index = index;
ct->proto.tcp.last_dir = dir;
- pr_debug("tcp_conntracks: ");
- nf_ct_dump_tuple(tuple);
- pr_debug("syn=%i ack=%i fin=%i rst=%i old=%i new=%i\n",
- (th->syn ? 1 : 0), (th->ack ? 1 : 0),
- (th->fin ? 1 : 0), (th->rst ? 1 : 0),
- old_state, new_state);
-
ct->proto.tcp.state = new_state;
if (old_state != new_state
&& new_state == TCP_CONNTRACK_FIN_WAIT)
diff --git a/net/netfilter/nf_conntrack_proto_udp.c b/net/netfilter/nf_conntrack_proto_udp.c
index 3b516cffc779..0030fbe8885c 100644
--- a/net/netfilter/nf_conntrack_proto_udp.c
+++ b/net/netfilter/nf_conntrack_proto_udp.c
@@ -88,6 +88,7 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
const struct nf_hook_state *state)
{
unsigned int *timeouts;
+ unsigned long status;
if (udp_error(skb, dataoff, state))
return -NF_ACCEPT;
@@ -96,26 +97,27 @@ int nf_conntrack_udp_packet(struct nf_conn *ct,
if (!timeouts)
timeouts = udp_get_timeouts(nf_ct_net(ct));
- if (!nf_ct_is_confirmed(ct))
+ status = READ_ONCE(ct->status);
+ if ((status & IPS_CONFIRMED) == 0)
ct->proto.udp.stream_ts = 2 * HZ + jiffies;
/* If we've seen traffic both ways, this is some kind of UDP
* stream. Set Assured.
*/
- if (test_bit(IPS_SEEN_REPLY_BIT, &ct->status)) {
+ if (status & IPS_SEEN_REPLY) {
unsigned long extra = timeouts[UDP_CT_UNREPLIED];
bool stream = false;
/* Still active after two seconds? Extend timeout. */
if (time_after(jiffies, ct->proto.udp.stream_ts)) {
extra = timeouts[UDP_CT_REPLIED];
- stream = true;
+ stream = (status & IPS_ASSURED) == 0;
}
nf_ct_refresh_acct(ct, ctinfo, skb, extra);
/* never set ASSURED for IPS_NAT_CLASH, they time out soon */
- if (unlikely((ct->status & IPS_NAT_CLASH)))
+ if (unlikely((status & IPS_NAT_CLASH)))
return NF_ACCEPT;
/* Also, more likely to be important, and not a probe */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 0250725e38a4..57f6724c99a7 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -275,7 +275,7 @@ static const char* l4proto_name(u16 proto)
return "unknown";
}
-static unsigned int
+static void
seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
{
struct nf_conn_acct *acct;
@@ -283,14 +283,12 @@ seq_print_acct(struct seq_file *s, const struct nf_conn *ct, int dir)
acct = nf_conn_acct_find(ct);
if (!acct)
- return 0;
+ return;
counter = acct->counter;
seq_printf(s, "packets=%llu bytes=%llu ",
(unsigned long long)atomic64_read(&counter[dir].packets),
(unsigned long long)atomic64_read(&counter[dir].bytes));
-
- return 0;
}
/* return 0 on success, 1 in case of error */
@@ -342,8 +340,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
if (seq_has_overflowed(s))
goto release;
- if (seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL))
- goto release;
+ seq_print_acct(s, ct, IP_CT_DIR_ORIGINAL);
if (!(test_bit(IPS_SEEN_REPLY_BIT, &ct->status)))
seq_puts(s, "[UNREPLIED] ");
@@ -352,8 +349,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
ct_show_zone(s, ct, NF_CT_ZONE_DIR_REPL);
- if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
- goto release;
+ seq_print_acct(s, ct, IP_CT_DIR_REPLY);
if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
seq_puts(s, "[HW_OFFLOAD] ");
@@ -601,8 +597,6 @@ enum nf_ct_sysctl_index {
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT,
NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED,
- NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT,
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST,
@@ -887,18 +881,6 @@ static struct ctl_table nf_ct_sysctl_table[] = {
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = {
- .procname = "nf_conntrack_sctp_timeout_heartbeat_acked",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
- [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = {
- .procname = "nf_conntrack_sctp_timeout_data_sent",
- .maxlen = sizeof(unsigned int),
- .mode = 0644,
- .proc_handler = proc_dointvec_jiffies,
- },
#endif
#ifdef CONFIG_NF_CT_PROTO_DCCP
[NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = {
@@ -1042,8 +1024,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net,
XASSIGN(SHUTDOWN_RECD, sn);
XASSIGN(SHUTDOWN_ACK_SENT, sn);
XASSIGN(HEARTBEAT_SENT, sn);
- XASSIGN(HEARTBEAT_ACKED, sn);
- XASSIGN(DATA_SENT, sn);
#undef XASSIGN
#endif
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 81c26a96c30b..04bd0ed4d2ae 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -193,8 +193,11 @@ static void flow_offload_fixup_ct(struct nf_conn *ct)
timeout -= tn->offload_timeout;
} else if (l4num == IPPROTO_UDP) {
struct nf_udp_net *tn = nf_udp_pernet(net);
+ enum udp_conntrack state =
+ test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ UDP_CT_REPLIED : UDP_CT_UNREPLIED;
- timeout = tn->timeouts[UDP_CT_REPLIED];
+ timeout = tn->timeouts[state];
timeout -= tn->offload_timeout;
} else {
return;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 0ccabf3fa6aa..9505f9d188ff 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -39,7 +39,7 @@ nf_flow_offload_inet_hook(void *priv, struct sk_buff *skb,
}
static int nf_flow_rule_route_inet(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 4d9b99abe37d..1c26f03fc661 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -679,7 +679,7 @@ nf_flow_rule_route_common(struct net *net, const struct flow_offload *flow,
return 0;
}
-int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -704,7 +704,7 @@ int nf_flow_rule_route_ipv4(struct net *net, const struct flow_offload *flow,
}
EXPORT_SYMBOL_GPL(nf_flow_rule_route_ipv4);
-int nf_flow_rule_route_ipv6(struct net *net, const struct flow_offload *flow,
+int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
enum flow_offload_tuple_dir dir,
struct nf_flow_rule *flow_rule)
{
@@ -735,7 +735,7 @@ nf_flow_offload_rule_alloc(struct net *net,
{
const struct nf_flowtable *flowtable = offload->flowtable;
const struct flow_offload_tuple *tuple, *other_tuple;
- const struct flow_offload *flow = offload->flow;
+ struct flow_offload *flow = offload->flow;
struct dst_entry *other_dst = NULL;
struct nf_flow_rule *flow_rule;
int err = -ENOMEM;
@@ -895,8 +895,9 @@ static int flow_offload_rule_add(struct flow_offload_work *offload,
ok_count += flow_offload_tuple_add(offload, flow_rule[0],
FLOW_OFFLOAD_DIR_ORIGINAL);
- ok_count += flow_offload_tuple_add(offload, flow_rule[1],
- FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ ok_count += flow_offload_tuple_add(offload, flow_rule[1],
+ FLOW_OFFLOAD_DIR_REPLY);
if (ok_count == 0)
return -ENOENT;
@@ -926,7 +927,8 @@ static void flow_offload_work_del(struct flow_offload_work *offload)
{
clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
- flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
}
@@ -946,7 +948,9 @@ static void flow_offload_work_stats(struct flow_offload_work *offload)
u64 lastused;
flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_ORIGINAL, &stats[0]);
- flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY, &stats[1]);
+ if (test_bit(NF_FLOW_HW_BIDIRECTIONAL, &offload->flow->flags))
+ flow_offload_tuple_stats(offload, FLOW_OFFLOAD_DIR_REPLY,
+ &stats[1]);
lastused = max_t(u64, stats[0].lastused, stats[1].lastused);
offload->flow->timeout = max_t(u64, offload->flow->timeout,
diff --git a/net/netfilter/nf_log_syslog.c b/net/netfilter/nf_log_syslog.c
index cb894f0d63e9..c66689ad2b49 100644
--- a/net/netfilter/nf_log_syslog.c
+++ b/net/netfilter/nf_log_syslog.c
@@ -322,7 +322,7 @@ dump_ipv4_packet(struct net *net, struct nf_log_buf *m,
/* Max length: 46 "LEN=65535 TOS=0xFF PREC=0xFF TTL=255 ID=65535 " */
nf_log_buf_add(m, "LEN=%u TOS=0x%02X PREC=0x%02X TTL=%u ID=%u ",
- ntohs(ih->tot_len), ih->tos & IPTOS_TOS_MASK,
+ iph_totlen(skb, ih), ih->tos & IPTOS_TOS_MASK,
ih->tos & IPTOS_PREC_MASK, ih->ttl, ntohs(ih->id));
/* Max length: 6 "CE DF MF " */
diff --git a/net/netfilter/nf_nat_bpf.c b/net/netfilter/nf_nat_bpf.c
index 0fa5a0bbb0ff..141ee7783223 100644
--- a/net/netfilter/nf_nat_bpf.c
+++ b/net/netfilter/nf_nat_bpf.c
@@ -30,9 +30,9 @@ __diag_ignore_all("-Wmissing-prototypes",
* interpreted as select a random port.
* @manip - NF_NAT_MANIP_SRC or NF_NAT_MANIP_DST
*/
-int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
- union nf_inet_addr *addr, int port,
- enum nf_nat_manip_type manip)
+__bpf_kfunc int bpf_ct_set_nat_info(struct nf_conn___init *nfct,
+ union nf_inet_addr *addr, int port,
+ enum nf_nat_manip_type manip)
{
struct nf_conn *ct = (struct nf_conn *)nfct;
u16 proto = nf_ct_l3num(ct);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 832b881f7c17..6004d4b24451 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -465,8 +465,9 @@ static int nft_delrule_by_chain(struct nft_ctx *ctx)
return 0;
}
-static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
- struct nft_set *set)
+static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set,
+ const struct nft_set_desc *desc)
{
struct nft_trans *trans;
@@ -474,17 +475,28 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
if (trans == NULL)
return -ENOMEM;
- if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] != NULL) {
+ if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) {
nft_trans_set_id(trans) =
ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID]));
nft_activate_next(ctx->net, set);
}
nft_trans_set(trans) = set;
+ if (desc) {
+ nft_trans_set_update(trans) = true;
+ nft_trans_set_gc_int(trans) = desc->gc_int;
+ nft_trans_set_timeout(trans) = desc->timeout;
+ }
nft_trans_commit_list_add_tail(ctx->net, trans);
return 0;
}
+static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
+ struct nft_set *set)
+{
+ return __nft_trans_set_add(ctx, msg_type, set, NULL);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -1389,6 +1401,10 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(table)) {
+ if (PTR_ERR(table) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYTABLE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(table);
}
@@ -2627,6 +2643,10 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
chain = nft_chain_lookup(net, table, attr, genmask);
}
if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYCHAIN)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(chain);
}
@@ -3704,6 +3724,10 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
chain = nft_chain_lookup(net, table, nla[NFTA_RULE_CHAIN],
genmask);
if (IS_ERR(chain)) {
+ if (PTR_ERR(chain) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN]);
return PTR_ERR(chain);
}
@@ -3717,6 +3741,10 @@ static int nf_tables_delrule(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_RULE_HANDLE]) {
rule = nft_rule_lookup(chain, nla[NFTA_RULE_HANDLE]);
if (IS_ERR(rule)) {
+ if (PTR_ERR(rule) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYRULE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_HANDLE]);
return PTR_ERR(rule);
}
@@ -3780,8 +3808,7 @@ static bool nft_set_ops_candidate(const struct nft_set_type *type, u32 flags)
static const struct nft_set_ops *
nft_select_set_ops(const struct nft_ctx *ctx,
const struct nlattr * const nla[],
- const struct nft_set_desc *desc,
- enum nft_set_policies policy)
+ const struct nft_set_desc *desc)
{
struct nftables_pernet *nft_net = nft_pernet(ctx->net);
const struct nft_set_ops *ops, *bops;
@@ -3810,7 +3837,7 @@ nft_select_set_ops(const struct nft_ctx *ctx,
if (!ops->estimate(desc, flags, &est))
continue;
- switch (policy) {
+ switch (desc->policy) {
case NFT_SET_POL_PERFORMANCE:
if (est.lookup < best.lookup)
break;
@@ -4045,8 +4072,10 @@ static int nf_tables_fill_set_concat(struct sk_buff *skb,
static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
const struct nft_set *set, u16 event, u16 flags)
{
- struct nlmsghdr *nlh;
+ u64 timeout = READ_ONCE(set->timeout);
+ u32 gc_int = READ_ONCE(set->gc_int);
u32 portid = ctx->portid;
+ struct nlmsghdr *nlh;
struct nlattr *nest;
u32 seq = ctx->seq;
int i;
@@ -4082,13 +4111,13 @@ static int nf_tables_fill_set(struct sk_buff *skb, const struct nft_ctx *ctx,
nla_put_be32(skb, NFTA_SET_OBJ_TYPE, htonl(set->objtype)))
goto nla_put_failure;
- if (set->timeout &&
+ if (timeout &&
nla_put_be64(skb, NFTA_SET_TIMEOUT,
- nf_jiffies64_to_msecs(set->timeout),
+ nf_jiffies64_to_msecs(timeout),
NFTA_SET_PAD))
goto nla_put_failure;
- if (set->gc_int &&
- nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(set->gc_int)))
+ if (gc_int &&
+ nla_put_be32(skb, NFTA_SET_GC_INTERVAL, htonl(gc_int)))
goto nla_put_failure;
if (set->policy != NFT_SET_POL_PERFORMANCE) {
@@ -4389,15 +4418,94 @@ static int nf_tables_set_desc_parse(struct nft_set_desc *desc,
return err;
}
+static int nft_set_expr_alloc(struct nft_ctx *ctx, struct nft_set *set,
+ const struct nlattr * const *nla,
+ struct nft_expr **exprs, int *num_exprs,
+ u32 flags)
+{
+ struct nft_expr *expr;
+ int err, i;
+
+ if (nla[NFTA_SET_EXPR]) {
+ expr = nft_set_elem_expr_alloc(ctx, set, nla[NFTA_SET_EXPR]);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[0] = expr;
+ (*num_exprs)++;
+ } else if (nla[NFTA_SET_EXPRESSIONS]) {
+ struct nlattr *tmp;
+ int left;
+
+ if (!(flags & NFT_SET_EXPR)) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ i = 0;
+ nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
+ if (i == NFT_SET_EXPR_MAX) {
+ err = -E2BIG;
+ goto err_set_expr_alloc;
+ }
+ if (nla_type(tmp) != NFTA_LIST_ELEM) {
+ err = -EINVAL;
+ goto err_set_expr_alloc;
+ }
+ expr = nft_set_elem_expr_alloc(ctx, set, tmp);
+ if (IS_ERR(expr)) {
+ err = PTR_ERR(expr);
+ goto err_set_expr_alloc;
+ }
+ exprs[i++] = expr;
+ (*num_exprs)++;
+ }
+ }
+
+ return 0;
+
+err_set_expr_alloc:
+ for (i = 0; i < *num_exprs; i++)
+ nft_expr_destroy(ctx, exprs[i]);
+
+ return err;
+}
+
+static bool nft_set_is_same(const struct nft_set *set,
+ const struct nft_set_desc *desc,
+ struct nft_expr *exprs[], u32 num_exprs, u32 flags)
+{
+ int i;
+
+ if (set->ktype != desc->ktype ||
+ set->dtype != desc->dtype ||
+ set->flags != flags ||
+ set->klen != desc->klen ||
+ set->dlen != desc->dlen ||
+ set->field_count != desc->field_count ||
+ set->num_exprs != num_exprs)
+ return false;
+
+ for (i = 0; i < desc->field_count; i++) {
+ if (set->field_len[i] != desc->field_len[i])
+ return false;
+ }
+
+ for (i = 0; i < num_exprs; i++) {
+ if (set->exprs[i]->ops != exprs[i]->ops)
+ return false;
+ }
+
+ return true;
+}
+
static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const nla[])
{
- u32 ktype, dtype, flags, policy, gc_int, objtype;
struct netlink_ext_ack *extack = info->extack;
u8 genmask = nft_genmask_next(info->net);
u8 family = info->nfmsg->nfgen_family;
const struct nft_set_ops *ops;
- struct nft_expr *expr = NULL;
struct net *net = info->net;
struct nft_set_desc desc;
struct nft_table *table;
@@ -4405,10 +4513,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
struct nft_set *set;
struct nft_ctx ctx;
size_t alloc_size;
- u64 timeout;
+ int num_exprs = 0;
char *name;
int err, i;
u16 udlen;
+ u32 flags;
u64 size;
if (nla[NFTA_SET_TABLE] == NULL ||
@@ -4419,10 +4528,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
memset(&desc, 0, sizeof(desc));
- ktype = NFT_DATA_VALUE;
+ desc.ktype = NFT_DATA_VALUE;
if (nla[NFTA_SET_KEY_TYPE] != NULL) {
- ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
- if ((ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
+ desc.ktype = ntohl(nla_get_be32(nla[NFTA_SET_KEY_TYPE]));
+ if ((desc.ktype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK)
return -EINVAL;
}
@@ -4447,17 +4556,17 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
}
- dtype = 0;
+ desc.dtype = 0;
if (nla[NFTA_SET_DATA_TYPE] != NULL) {
if (!(flags & NFT_SET_MAP))
return -EINVAL;
- dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
- if ((dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
- dtype != NFT_DATA_VERDICT)
+ desc.dtype = ntohl(nla_get_be32(nla[NFTA_SET_DATA_TYPE]));
+ if ((desc.dtype & NFT_DATA_RESERVED_MASK) == NFT_DATA_RESERVED_MASK &&
+ desc.dtype != NFT_DATA_VERDICT)
return -EINVAL;
- if (dtype != NFT_DATA_VERDICT) {
+ if (desc.dtype != NFT_DATA_VERDICT) {
if (nla[NFTA_SET_DATA_LEN] == NULL)
return -EINVAL;
desc.dlen = ntohl(nla_get_be32(nla[NFTA_SET_DATA_LEN]));
@@ -4472,34 +4581,34 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_OBJECT))
return -EINVAL;
- objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
- if (objtype == NFT_OBJECT_UNSPEC ||
- objtype > NFT_OBJECT_MAX)
+ desc.objtype = ntohl(nla_get_be32(nla[NFTA_SET_OBJ_TYPE]));
+ if (desc.objtype == NFT_OBJECT_UNSPEC ||
+ desc.objtype > NFT_OBJECT_MAX)
return -EOPNOTSUPP;
} else if (flags & NFT_SET_OBJECT)
return -EINVAL;
else
- objtype = NFT_OBJECT_UNSPEC;
+ desc.objtype = NFT_OBJECT_UNSPEC;
- timeout = 0;
+ desc.timeout = 0;
if (nla[NFTA_SET_TIMEOUT] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &timeout);
+ err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
}
- gc_int = 0;
+ desc.gc_int = 0;
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
- gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
+ desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
- policy = NFT_SET_POL_PERFORMANCE;
+ desc.policy = NFT_SET_POL_PERFORMANCE;
if (nla[NFTA_SET_POLICY] != NULL)
- policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
+ desc.policy = ntohl(nla_get_be32(nla[NFTA_SET_POLICY]));
if (nla[NFTA_SET_DESC] != NULL) {
err = nf_tables_set_desc_parse(&desc, nla[NFTA_SET_DESC]);
@@ -4531,6 +4640,8 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
return PTR_ERR(set);
}
} else {
+ struct nft_expr *exprs[NFT_SET_EXPR_MAX] = {};
+
if (info->nlh->nlmsg_flags & NLM_F_EXCL) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
return -EEXIST;
@@ -4538,13 +4649,29 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- return 0;
+ err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
+ if (err < 0)
+ return err;
+
+ err = 0;
+ if (!nft_set_is_same(set, &desc, exprs, num_exprs, flags)) {
+ NL_SET_BAD_ATTR(extack, nla[NFTA_SET_NAME]);
+ err = -EEXIST;
+ }
+
+ for (i = 0; i < num_exprs; i++)
+ nft_expr_destroy(&ctx, exprs[i]);
+
+ if (err < 0)
+ return err;
+
+ return __nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set, &desc);
}
if (!(info->nlh->nlmsg_flags & NLM_F_CREATE))
return -ENOENT;
- ops = nft_select_set_ops(&ctx, nla, &desc, policy);
+ ops = nft_select_set_ops(&ctx, nla, &desc);
if (IS_ERR(ops))
return PTR_ERR(ops);
@@ -4584,18 +4711,18 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->table = table;
write_pnet(&set->net, net);
set->ops = ops;
- set->ktype = ktype;
+ set->ktype = desc.ktype;
set->klen = desc.klen;
- set->dtype = dtype;
- set->objtype = objtype;
+ set->dtype = desc.dtype;
+ set->objtype = desc.objtype;
set->dlen = desc.dlen;
set->flags = flags;
set->size = desc.size;
- set->policy = policy;
+ set->policy = desc.policy;
set->udlen = udlen;
set->udata = udata;
- set->timeout = timeout;
- set->gc_int = gc_int;
+ set->timeout = desc.timeout;
+ set->gc_int = desc.gc_int;
set->field_count = desc.field_count;
for (i = 0; i < desc.field_count; i++)
@@ -4605,43 +4732,11 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (err < 0)
goto err_set_init;
- if (nla[NFTA_SET_EXPR]) {
- expr = nft_set_elem_expr_alloc(&ctx, set, nla[NFTA_SET_EXPR]);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[0] = expr;
- set->num_exprs++;
- } else if (nla[NFTA_SET_EXPRESSIONS]) {
- struct nft_expr *expr;
- struct nlattr *tmp;
- int left;
-
- if (!(flags & NFT_SET_EXPR)) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- i = 0;
- nla_for_each_nested(tmp, nla[NFTA_SET_EXPRESSIONS], left) {
- if (i == NFT_SET_EXPR_MAX) {
- err = -E2BIG;
- goto err_set_expr_alloc;
- }
- if (nla_type(tmp) != NFTA_LIST_ELEM) {
- err = -EINVAL;
- goto err_set_expr_alloc;
- }
- expr = nft_set_elem_expr_alloc(&ctx, set, tmp);
- if (IS_ERR(expr)) {
- err = PTR_ERR(expr);
- goto err_set_expr_alloc;
- }
- set->exprs[i++] = expr;
- set->num_exprs++;
- }
- }
+ err = nft_set_expr_alloc(&ctx, set, nla, set->exprs, &num_exprs, flags);
+ if (err < 0)
+ goto err_set_destroy;
+ set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
@@ -4655,7 +4750,7 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
-
+err_set_destroy:
ops->destroy(set);
err_set_init:
kfree(set->name);
@@ -4729,6 +4824,10 @@ static int nf_tables_delset(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(set)) {
+ if (PTR_ERR(set) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSET)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(set);
}
@@ -5408,7 +5507,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
- genmask, NETLINK_CB(skb).portid);
+ genmask, 0);
if (IS_ERR(table)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_SET_ELEM_LIST_TABLE]);
return PTR_ERR(table);
@@ -6008,7 +6107,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
} else if (set->flags & NFT_SET_TIMEOUT &&
!(flags & NFT_SET_ELEM_INTERVAL_END)) {
- timeout = set->timeout;
+ timeout = READ_ONCE(set->timeout);
}
expiration = 0;
@@ -6109,7 +6208,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (err < 0)
goto err_parse_key_end;
- if (timeout != set->timeout) {
+ if (timeout != READ_ONCE(set->timeout)) {
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_TIMEOUT);
if (err < 0)
goto err_parse_key_end;
@@ -6611,6 +6710,10 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
err = nft_del_setelem(&ctx, set, attr);
+ if (err == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYSETELEM)
+ continue;
+
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
@@ -6920,6 +7023,9 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
type = __nft_obj_type_get(objtype);
+ if (WARN_ON_ONCE(!type))
+ return -ENOENT;
+
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
return nf_tables_updobj(&ctx, type, nla[NFTA_OBJ_DATA], obj);
@@ -7255,6 +7361,10 @@ static int nf_tables_delobj(struct sk_buff *skb, const struct nfnl_info *info,
}
if (IS_ERR(obj)) {
+ if (PTR_ERR(obj) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYOBJ)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(obj);
}
@@ -7885,6 +7995,10 @@ static int nf_tables_delflowtable(struct sk_buff *skb,
}
if (IS_ERR(flowtable)) {
+ if (PTR_ERR(flowtable) == -ENOENT &&
+ NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_DESTROYFLOWTABLE)
+ return 0;
+
NL_SET_BAD_ATTR(extack, attr);
return PTR_ERR(flowtable);
}
@@ -8294,6 +8408,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_TABLE_MAX,
.policy = nft_table_policy,
},
+ [NFT_MSG_DESTROYTABLE] = {
+ .call = nf_tables_deltable,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_TABLE_MAX,
+ .policy = nft_table_policy,
+ },
[NFT_MSG_NEWCHAIN] = {
.call = nf_tables_newchain,
.type = NFNL_CB_BATCH,
@@ -8312,6 +8432,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_CHAIN_MAX,
.policy = nft_chain_policy,
},
+ [NFT_MSG_DESTROYCHAIN] = {
+ .call = nf_tables_delchain,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_CHAIN_MAX,
+ .policy = nft_chain_policy,
+ },
[NFT_MSG_NEWRULE] = {
.call = nf_tables_newrule,
.type = NFNL_CB_BATCH,
@@ -8336,6 +8462,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_RULE_MAX,
.policy = nft_rule_policy,
},
+ [NFT_MSG_DESTROYRULE] = {
+ .call = nf_tables_delrule,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_RULE_MAX,
+ .policy = nft_rule_policy,
+ },
[NFT_MSG_NEWSET] = {
.call = nf_tables_newset,
.type = NFNL_CB_BATCH,
@@ -8354,6 +8486,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_MAX,
.policy = nft_set_policy,
},
+ [NFT_MSG_DESTROYSET] = {
+ .call = nf_tables_delset,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_SET_MAX,
+ .policy = nft_set_policy,
+ },
[NFT_MSG_NEWSETELEM] = {
.call = nf_tables_newsetelem,
.type = NFNL_CB_BATCH,
@@ -8372,6 +8510,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_DESTROYSETELEM] = {
+ .call = nf_tables_delsetelem,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
[NFT_MSG_GETGEN] = {
.call = nf_tables_getgen,
.type = NFNL_CB_RCU,
@@ -8394,6 +8538,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_OBJ_MAX,
.policy = nft_obj_policy,
},
+ [NFT_MSG_DESTROYOBJ] = {
+ .call = nf_tables_delobj,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_OBJ_MAX,
+ .policy = nft_obj_policy,
+ },
[NFT_MSG_GETOBJ_RESET] = {
.call = nf_tables_getobj,
.type = NFNL_CB_RCU,
@@ -8418,6 +8568,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_FLOWTABLE_MAX,
.policy = nft_flowtable_policy,
},
+ [NFT_MSG_DESTROYFLOWTABLE] = {
+ .call = nf_tables_delflowtable,
+ .type = NFNL_CB_BATCH,
+ .attr_count = NFTA_FLOWTABLE_MAX,
+ .policy = nft_flowtable_policy,
+ },
};
static int nf_tables_validate(struct net *net)
@@ -8511,6 +8667,7 @@ static void nft_commit_release(struct nft_trans *trans)
{
switch (trans->msg_type) {
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
nf_tables_table_destroy(&trans->ctx);
break;
case NFT_MSG_NEWCHAIN:
@@ -8518,23 +8675,29 @@ static void nft_commit_release(struct nft_trans *trans)
kfree(nft_trans_chain_name(trans));
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
nf_tables_chain_destroy(&trans->ctx);
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans));
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
nft_set_destroy(&trans->ctx, nft_trans_set(trans));
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
nf_tables_set_elem_destroy(&trans->ctx,
nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans))
nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
else
@@ -8986,8 +9149,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
list_del_rcu(&trans->ctx.table->list);
- nf_tables_table_notify(&trans->ctx, NFT_MSG_DELTABLE);
+ nf_tables_table_notify(&trans->ctx, trans->msg_type);
break;
case NFT_MSG_NEWCHAIN:
if (nft_trans_chain_update(trans)) {
@@ -9002,8 +9166,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
nft_chain_del(trans->ctx.chain);
- nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN);
+ nf_tables_chain_notify(&trans->ctx, trans->msg_type);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
trans->ctx.chain);
@@ -9019,10 +9184,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
list_del_rcu(&nft_trans_rule(trans)->list);
nf_tables_rule_notify(&trans->ctx,
nft_trans_rule(trans),
- NFT_MSG_DELRULE);
+ trans->msg_type);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
NFT_TRANS_COMMIT);
@@ -9031,22 +9197,29 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_NEWSET:
- nft_clear(net, nft_trans_set(trans));
- /* This avoids hitting -EBUSY when deleting the table
- * from the transaction.
- */
- if (nft_set_is_anonymous(nft_trans_set(trans)) &&
- !list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ if (nft_trans_set_update(trans)) {
+ struct nft_set *set = nft_trans_set(trans);
+ WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
+ WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+ } else {
+ nft_clear(net, nft_trans_set(trans));
+ /* This avoids hitting -EBUSY when deleting the table
+ * from the transaction.
+ */
+ if (nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !list_empty(&nft_trans_set(trans)->bindings))
+ trans->ctx.table->use--;
+ }
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
list_del_rcu(&nft_trans_set(trans)->list);
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
- NFT_MSG_DELSET, GFP_KERNEL);
+ trans->msg_type, GFP_KERNEL);
break;
case NFT_MSG_NEWSETELEM:
te = (struct nft_trans_elem *)trans->data;
@@ -9058,11 +9231,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
- NFT_MSG_DELSETELEM);
+ trans->msg_type);
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem)) {
atomic_dec(&te->set->nelems);
@@ -9084,9 +9258,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
nft_obj_del(nft_trans_obj(trans));
nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans),
- NFT_MSG_DELOBJ);
+ trans->msg_type);
break;
case NFT_MSG_NEWFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
@@ -9108,11 +9283,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nft_trans_destroy(trans);
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable_hooks(trans),
- NFT_MSG_DELFLOWTABLE);
+ trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
@@ -9120,7 +9296,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_flowtable_notify(&trans->ctx,
nft_trans_flowtable(trans),
&nft_trans_flowtable(trans)->hook_list,
- NFT_MSG_DELFLOWTABLE);
+ trans->msg_type);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
}
@@ -9216,6 +9392,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELTABLE:
+ case NFT_MSG_DESTROYTABLE:
nft_clear(trans->ctx.net, trans->ctx.table);
nft_trans_destroy(trans);
break;
@@ -9237,6 +9414,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELCHAIN:
+ case NFT_MSG_DESTROYCHAIN:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, trans->ctx.chain);
nft_trans_destroy(trans);
@@ -9251,6 +9429,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_flow_rule_destroy(nft_trans_flow_rule(trans));
break;
case NFT_MSG_DELRULE:
+ case NFT_MSG_DESTROYRULE:
trans->ctx.chain->use++;
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
@@ -9260,6 +9439,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSET:
+ if (nft_trans_set_update(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
trans->ctx.table->use--;
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
@@ -9268,6 +9451,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_del_rcu(&nft_trans_set(trans)->list);
break;
case NFT_MSG_DELSET:
+ case NFT_MSG_DESTROYSET:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_set(trans));
nft_trans_destroy(trans);
@@ -9283,6 +9467,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
atomic_dec(&te->set->nelems);
break;
case NFT_MSG_DELSETELEM:
+ case NFT_MSG_DESTROYSETELEM:
te = (struct nft_trans_elem *)trans->data;
nft_setelem_data_activate(net, te->set, &te->elem);
@@ -9302,6 +9487,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELOBJ:
+ case NFT_MSG_DESTROYOBJ:
trans->ctx.table->use++;
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
@@ -9318,6 +9504,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
break;
case NFT_MSG_DELFLOWTABLE:
+ case NFT_MSG_DESTROYFLOWTABLE:
if (nft_trans_flowtable_update(trans)) {
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
diff --git a/net/netfilter/nf_tables_core.c b/net/netfilter/nf_tables_core.c
index 709a736c301c..6ecd0ba2e546 100644
--- a/net/netfilter/nf_tables_core.c
+++ b/net/netfilter/nf_tables_core.c
@@ -21,6 +21,26 @@
#include <net/netfilter/nf_log.h>
#include <net/netfilter/nft_meta.h>
+#if defined(CONFIG_RETPOLINE) && defined(CONFIG_X86)
+
+static struct static_key_false nf_tables_skip_direct_calls;
+
+static bool nf_skip_indirect_calls(void)
+{
+ return static_branch_likely(&nf_tables_skip_direct_calls);
+}
+
+static void __init nf_skip_indirect_calls_enable(void)
+{
+ if (!cpu_feature_enabled(X86_FEATURE_RETPOLINE))
+ static_branch_enable(&nf_tables_skip_direct_calls);
+}
+#else
+static inline bool nf_skip_indirect_calls(void) { return false; }
+
+static inline void nf_skip_indirect_calls_enable(void) { }
+#endif
+
static noinline void __nft_trace_packet(struct nft_traceinfo *info,
const struct nft_chain *chain,
enum nft_trace_types type)
@@ -193,7 +213,12 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
struct nft_pktinfo *pkt)
{
#ifdef CONFIG_RETPOLINE
- unsigned long e = (unsigned long)expr->ops->eval;
+ unsigned long e;
+
+ if (nf_skip_indirect_calls())
+ goto indirect_call;
+
+ e = (unsigned long)expr->ops->eval;
#define X(e, fun) \
do { if ((e) == (unsigned long)(fun)) \
return fun(expr, regs, pkt); } while (0)
@@ -203,13 +228,19 @@ static void expr_call_ops_eval(const struct nft_expr *expr,
X(e, nft_counter_eval);
X(e, nft_meta_get_eval);
X(e, nft_lookup_eval);
+#if IS_ENABLED(CONFIG_NFT_CT)
+ X(e, nft_ct_get_fast_eval);
+#endif
X(e, nft_range_eval);
X(e, nft_immediate_eval);
X(e, nft_byteorder_eval);
X(e, nft_dynset_eval);
X(e, nft_rt_get_eval);
X(e, nft_bitwise_eval);
+ X(e, nft_objref_eval);
+ X(e, nft_objref_map_eval);
#undef X
+indirect_call:
#endif /* CONFIG_RETPOLINE */
expr->ops->eval(expr, regs, pkt);
}
@@ -369,6 +400,8 @@ int __init nf_tables_core_module_init(void)
goto err;
}
+ nf_skip_indirect_calls_enable();
+
return 0;
err:
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 6d18fb346868..81c7737c803a 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -29,6 +29,7 @@
#include <net/netlink.h>
#include <net/netns/generic.h>
+#include <linux/netfilter.h>
#include <linux/netfilter/nfnetlink.h>
MODULE_LICENSE("GPL");
@@ -685,12 +686,12 @@ static void nfnetlink_bind_event(struct net *net, unsigned int group)
group_bit = (1 << group);
spin_lock(&nfnl_grp_active_lock);
- v = READ_ONCE(net->ct.ctnetlink_has_listener);
+ v = READ_ONCE(nf_ctnetlink_has_listener);
if ((v & group_bit) == 0) {
v |= group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
- WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+ WRITE_ONCE(nf_ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
@@ -744,12 +745,12 @@ static void nfnetlink_unbind(struct net *net, int group)
spin_lock(&nfnl_grp_active_lock);
if (!nfnetlink_has_listeners(net, group)) {
- u8 v = READ_ONCE(net->ct.ctnetlink_has_listener);
+ u8 v = READ_ONCE(nf_ctnetlink_has_listener);
v &= ~group_bit;
/* read concurrently without nfnl_grp_active_lock held. */
- WRITE_ONCE(net->ct.ctnetlink_has_listener, v);
+ WRITE_ONCE(nf_ctnetlink_has_listener, v);
}
spin_unlock(&nfnl_grp_active_lock);
#endif
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index c68e2151defe..b9c84499438b 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -12,7 +12,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
-#include <net/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_tuple.h>
@@ -23,16 +23,6 @@
#include <net/netfilter/nf_conntrack_l4proto.h>
#include <net/netfilter/nf_conntrack_expect.h>
-struct nft_ct {
- enum nft_ct_keys key:8;
- enum ip_conntrack_dir dir:8;
- u8 len;
- union {
- u8 dreg;
- u8 sreg;
- };
-};
-
struct nft_ct_helper_obj {
struct nf_conntrack_helper *helper4;
struct nf_conntrack_helper *helper6;
@@ -759,6 +749,18 @@ static bool nft_ct_set_reduce(struct nft_regs_track *track,
return false;
}
+#ifdef CONFIG_RETPOLINE
+static const struct nft_expr_ops nft_ct_get_fast_ops = {
+ .type = &nft_ct_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
+ .eval = nft_ct_get_fast_eval,
+ .init = nft_ct_get_init,
+ .destroy = nft_ct_get_destroy,
+ .dump = nft_ct_get_dump,
+ .reduce = nft_ct_set_reduce,
+};
+#endif
+
static const struct nft_expr_ops nft_ct_set_ops = {
.type = &nft_ct_type,
.size = NFT_EXPR_SIZE(sizeof(struct nft_ct)),
@@ -791,8 +793,21 @@ nft_ct_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_CT_DREG] && tb[NFTA_CT_SREG])
return ERR_PTR(-EINVAL);
- if (tb[NFTA_CT_DREG])
+ if (tb[NFTA_CT_DREG]) {
+#ifdef CONFIG_RETPOLINE
+ u32 k = ntohl(nla_get_be32(tb[NFTA_CT_KEY]));
+
+ switch (k) {
+ case NFT_CT_STATE:
+ case NFT_CT_DIRECTION:
+ case NFT_CT_STATUS:
+ case NFT_CT_MARK:
+ case NFT_CT_SECMARK:
+ return &nft_ct_get_fast_ops;
+ }
+#endif
return &nft_ct_get_ops;
+ }
if (tb[NFTA_CT_SREG]) {
#ifdef CONFIG_NF_CONNTRACK_ZONES
diff --git a/net/netfilter/nft_ct_fast.c b/net/netfilter/nft_ct_fast.c
new file mode 100644
index 000000000000..89983b0613fa
--- /dev/null
+++ b/net/netfilter/nft_ct_fast.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#if IS_ENABLED(CONFIG_NFT_CT)
+#include <linux/netfilter/nf_tables.h>
+#include <net/netfilter/nf_tables_core.h>
+#include <net/netfilter/nf_conntrack.h>
+
+void nft_ct_get_fast_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ const struct nft_ct *priv = nft_expr_priv(expr);
+ u32 *dest = &regs->data[priv->dreg];
+ enum ip_conntrack_info ctinfo;
+ const struct nf_conn *ct;
+ unsigned int state;
+
+ ct = nf_ct_get(pkt->skb, &ctinfo);
+ if (!ct) {
+ regs->verdict.code = NFT_BREAK;
+ return;
+ }
+
+ switch (priv->key) {
+ case NFT_CT_STATE:
+ if (ct)
+ state = NF_CT_STATE_BIT(ctinfo);
+ else if (ctinfo == IP_CT_UNTRACKED)
+ state = NF_CT_STATE_UNTRACKED_BIT;
+ else
+ state = NF_CT_STATE_INVALID_BIT;
+ *dest = state;
+ return;
+ case NFT_CT_DIRECTION:
+ nft_reg_store8(dest, CTINFO2DIR(ctinfo));
+ return;
+ case NFT_CT_STATUS:
+ *dest = ct->status;
+ return;
+#ifdef CONFIG_NF_CONNTRACK_MARK
+ case NFT_CT_MARK:
+ *dest = ct->mark;
+ return;
+#endif
+#ifdef CONFIG_NF_CONNTRACK_SECMARK
+ case NFT_CT_SECMARK:
+ *dest = ct->secmark;
+ return;
+#endif
+ default:
+ WARN_ON_ONCE(1);
+ regs->verdict.code = NFT_BREAK;
+ break;
+ }
+}
+EXPORT_SYMBOL_GPL(nft_ct_get_fast_eval);
+#endif
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index 7b01aa2ef653..cb37169608ba 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -13,9 +13,9 @@
#define nft_objref_priv(expr) *((struct nft_object **)nft_expr_priv(expr))
-static void nft_objref_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_objref_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_object *obj = nft_objref_priv(expr);
@@ -100,9 +100,9 @@ struct nft_objref_map {
struct nft_set_binding binding;
};
-static void nft_objref_map_eval(const struct nft_expr *expr,
- struct nft_regs *regs,
- const struct nft_pktinfo *pkt)
+void nft_objref_map_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
{
struct nft_objref_map *priv = nft_expr_priv(expr);
const struct nft_set *set = priv->set;
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 17b418a5a593..3a3c7746e88f 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -63,7 +63,7 @@ nft_payload_copy_vlan(u32 *d, const struct sk_buff *skb, u8 offset, u8 len)
return false;
if (offset + len > VLAN_ETH_HLEN + vlan_hlen)
- ethlen -= offset + len - VLAN_ETH_HLEN + vlan_hlen;
+ ethlen -= offset + len - VLAN_ETH_HLEN - vlan_hlen;
memcpy(dst_u8, vlanh + offset - vlan_hlen, ethlen);
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 7325bee7d144..19ea4d3c3553 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
return !nft_rbtree_interval_end(rbe);
}
-static bool nft_rbtree_equal(const struct nft_set *set, const void *this,
- const struct nft_rbtree_elem *interval)
+static int nft_rbtree_cmp(const struct nft_set *set,
+ const struct nft_rbtree_elem *e1,
+ const struct nft_rbtree_elem *e2)
{
- return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0;
+ return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
+ set->klen);
}
static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
@@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
const struct nft_rbtree_elem *rbe, *interval = NULL;
u8 genmask = nft_genmask_cur(net);
const struct rb_node *parent;
- const void *this;
int d;
parent = rcu_dereference_raw(priv->root.rb_node);
@@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- this = nft_set_ext_key(&rbe->ext);
- d = memcmp(this, key, set->klen);
+ d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen);
if (d < 0) {
parent = rcu_dereference_raw(parent->rb_left);
if (interval &&
- nft_rbtree_equal(set, this, interval) &&
+ !nft_rbtree_cmp(set, rbe, interval) &&
nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(interval))
continue;
@@ -215,154 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set,
return rbe;
}
+static int nft_rbtree_gc_elem(const struct nft_set *__set,
+ struct nft_rbtree *priv,
+ struct nft_rbtree_elem *rbe)
+{
+ struct nft_set *set = (struct nft_set *)__set;
+ struct rb_node *prev = rb_prev(&rbe->node);
+ struct nft_rbtree_elem *rbe_prev;
+ struct nft_set_gc_batch *gcb;
+
+ gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC);
+ if (!gcb)
+ return -ENOMEM;
+
+ /* search for expired end interval coming before this element. */
+ do {
+ rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
+ if (nft_rbtree_interval_end(rbe_prev))
+ break;
+
+ prev = rb_prev(prev);
+ } while (prev != NULL);
+
+ rb_erase(&rbe_prev->node, &priv->root);
+ rb_erase(&rbe->node, &priv->root);
+ atomic_sub(2, &set->nelems);
+
+ nft_set_gc_batch_add(gcb, rbe);
+ nft_set_gc_batch_complete(gcb);
+
+ return 0;
+}
+
+static bool nft_rbtree_update_first(const struct nft_set *set,
+ struct nft_rbtree_elem *rbe,
+ struct rb_node *first)
+{
+ struct nft_rbtree_elem *first_elem;
+
+ first_elem = rb_entry(first, struct nft_rbtree_elem, node);
+ /* this element is closest to where the new element is to be inserted:
+ * update the first element for the node list path.
+ */
+ if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
+ return true;
+
+ return false;
+}
+
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_set_ext **ext)
{
- bool overlap = false, dup_end_left = false, dup_end_right = false;
+ struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL;
+ struct rb_node *node, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 genmask = nft_genmask_next(net);
- struct nft_rbtree_elem *rbe;
- struct rb_node *parent, **p;
- int d;
+ int d, err;
- /* Detect overlaps as we descend the tree. Set the flag in these cases:
- *
- * a1. _ _ __>| ?_ _ __| (insert end before existing end)
- * a2. _ _ ___| ?_ _ _>| (insert end after existing end)
- * a3. _ _ ___? >|_ _ __| (insert start before existing end)
- *
- * and clear it later on, as we eventually reach the points indicated by
- * '?' above, in the cases described below. We'll always meet these
- * later, locally, due to tree ordering, and overlaps for the intervals
- * that are the closest together are always evaluated last.
- *
- * b1. _ _ __>| !_ _ __| (insert end before existing start)
- * b2. _ _ ___| !_ _ _>| (insert end after existing start)
- * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf)
- * '--' no nodes falling in this range
- * b4. >|_ _ ! (insert start before existing start)
- *
- * Case a3. resolves to b3.:
- * - if the inserted start element is the leftmost, because the '0'
- * element in the tree serves as end element
- * - otherwise, if an existing end is found immediately to the left. If
- * there are existing nodes in between, we need to further descend the
- * tree before we can conclude the new start isn't causing an overlap
- *
- * or to b4., which, preceded by a3., means we already traversed one or
- * more existing intervals entirely, from the right.
- *
- * For a new, rightmost pair of elements, we'll hit cases b3. and b2.,
- * in that order.
- *
- * The flag is also cleared in two special cases:
- *
- * b5. |__ _ _!|<_ _ _ (insert start right before existing end)
- * b6. |__ _ >|!__ _ _ (insert end right after existing start)
- *
- * which always happen as last step and imply that no further
- * overlapping is possible.
- *
- * Another special case comes from the fact that start elements matching
- * an already existing start element are allowed: insertion is not
- * performed but we return -EEXIST in that case, and the error will be
- * cleared by the caller if NLM_F_EXCL is not present in the request.
- * This way, request for insertion of an exact overlap isn't reported as
- * error to userspace if not desired.
- *
- * However, if the existing start matches a pre-existing start, but the
- * end element doesn't match the corresponding pre-existing end element,
- * we need to report a partial overlap. This is a local condition that
- * can be noticed without need for a tracking flag, by checking for a
- * local duplicated end for a corresponding start, from left and right,
- * separately.
+ /* Descend the tree to search for an existing element greater than the
+ * key value to insert that is greater than the new element. This is the
+ * first element to walk the ordered elements to find possible overlap.
*/
-
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
- d = memcmp(nft_set_ext_key(&rbe->ext),
- nft_set_ext_key(&new->ext),
- set->klen);
+ d = nft_rbtree_cmp(set, rbe, new);
+
if (d < 0) {
p = &parent->rb_left;
-
- if (nft_rbtree_interval_start(new)) {
- if (nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext) && !*p)
- overlap = false;
- } else {
- if (dup_end_left && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_right = true;
- continue;
- }
- }
} else if (d > 0) {
- p = &parent->rb_right;
+ if (!first ||
+ nft_rbtree_update_first(set, rbe, first))
+ first = &rbe->node;
- if (nft_rbtree_interval_end(new)) {
- if (dup_end_right && !*p)
- return -ENOTEMPTY;
-
- overlap = nft_rbtree_interval_end(rbe) &&
- nft_set_elem_active(&rbe->ext,
- genmask) &&
- !nft_set_elem_expired(&rbe->ext);
-
- if (overlap) {
- dup_end_left = true;
- continue;
- }
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- overlap = nft_rbtree_interval_end(rbe);
- }
+ p = &parent->rb_right;
} else {
- if (nft_rbtree_interval_end(rbe) &&
- nft_rbtree_interval_start(new)) {
+ if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
-
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_rbtree_interval_start(rbe) &&
- nft_rbtree_interval_end(new)) {
+ else
p = &parent->rb_right;
+ }
+ }
+
+ if (!first)
+ first = rb_first(&priv->root);
+
+ /* Detect overlap by going through the list of valid tree nodes.
+ * Values stored in the tree are in reversed order, starting from
+ * highest to lowest value.
+ */
+ for (node = first; node != NULL; node = rb_next(node)) {
+ rbe = rb_entry(node, struct nft_rbtree_elem, node);
- if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext))
- overlap = false;
- } else if (nft_set_elem_active(&rbe->ext, genmask) &&
- !nft_set_elem_expired(&rbe->ext)) {
- *ext = &rbe->ext;
- return -EEXIST;
- } else {
- overlap = false;
- if (nft_rbtree_interval_end(rbe))
- p = &parent->rb_left;
- else
- p = &parent->rb_right;
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* perform garbage collection to avoid bogus overlap reports. */
+ if (nft_set_elem_expired(&rbe->ext)) {
+ err = nft_rbtree_gc_elem(set, priv, rbe);
+ if (err < 0)
+ return err;
+
+ continue;
+ }
+
+ d = nft_rbtree_cmp(set, rbe, new);
+ if (d == 0) {
+ /* Matching end element: no need to look for an
+ * overlapping greater or equal element.
+ */
+ if (nft_rbtree_interval_end(rbe)) {
+ rbe_le = rbe;
+ break;
+ }
+
+ /* first element that is greater or equal to key value. */
+ if (!rbe_ge) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* this is a closer more or equal element, update it. */
+ if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
+ rbe_ge = rbe;
+ continue;
+ }
+
+ /* element is equal to key value, make sure flags are
+ * the same, an existing more or equal start element
+ * must not be replaced by more or equal end element.
+ */
+ if ((nft_rbtree_interval_start(new) &&
+ nft_rbtree_interval_start(rbe_ge)) ||
+ (nft_rbtree_interval_end(new) &&
+ nft_rbtree_interval_end(rbe_ge))) {
+ rbe_ge = rbe;
+ continue;
}
+ } else if (d > 0) {
+ /* annotate element greater than the new element. */
+ rbe_ge = rbe;
+ continue;
+ } else if (d < 0) {
+ /* annotate element less than the new element. */
+ rbe_le = rbe;
+ break;
}
+ }
- dup_end_left = dup_end_right = false;
+ /* - new start element matching existing start element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
+ nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
+ *ext = &rbe_ge->ext;
+ return -EEXIST;
}
- if (overlap)
+ /* - new end element matching existing end element: full overlap
+ * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given.
+ */
+ if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
+ nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
+ *ext = &rbe_le->ext;
+ return -EEXIST;
+ }
+
+ /* - new start element with existing closest, less or equal key value
+ * being a start element: partial overlap, reported as -ENOTEMPTY.
+ * Anonymous sets allow for two consecutive start element since they
+ * are constant, skip them to avoid bogus overlap reports.
+ */
+ if (!nft_set_is_anonymous(set) && rbe_le &&
+ nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new))
+ return -ENOTEMPTY;
+
+ /* - new end element with existing closest, less or equal key value
+ * being a end element: partial overlap, reported as -ENOTEMPTY.
+ */
+ if (rbe_le &&
+ nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
+ /* - new end element with existing closest, greater or equal key value
+ * being an end element: partial overlap, reported as -ENOTEMPTY
+ */
+ if (rbe_ge &&
+ nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
+ return -ENOTEMPTY;
+
+ /* Accepted element: pick insertion point depending on key value */
+ parent = NULL;
+ p = &priv->root.rb_node;
+ while (*p != NULL) {
+ parent = *p;
+ rbe = rb_entry(parent, struct nft_rbtree_elem, node);
+ d = nft_rbtree_cmp(set, rbe, new);
+
+ if (d < 0)
+ p = &parent->rb_left;
+ else if (d > 0)
+ p = &parent->rb_right;
+ else if (nft_rbtree_interval_end(rbe))
+ p = &parent->rb_left;
+ else
+ p = &parent->rb_right;
+ }
+
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
@@ -501,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work)
struct nft_rbtree *priv;
struct rb_node *node;
struct nft_set *set;
+ struct net *net;
+ u8 genmask;
priv = container_of(work, struct nft_rbtree, gc_work.work);
set = nft_set_container_of(priv);
+ net = read_pnet(&set->net);
+ genmask = nft_genmask_cur(net);
write_lock_bh(&priv->lock);
write_seqcount_begin(&priv->count);
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
+ if (!nft_set_elem_active(&rbe->ext, genmask))
+ continue;
+
+ /* elements are reversed in the rbtree for historical reasons,
+ * from highest to lowest value, that is why end element is
+ * always visited before the start element.
+ */
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!nft_set_elem_expired(&rbe->ext))
continue;
- if (nft_set_elem_mark_busy(&rbe->ext))
+
+ if (nft_set_elem_mark_busy(&rbe->ext)) {
+ rbe_end = NULL;
continue;
+ }
if (rbe_prev) {
rb_erase(&rbe_prev->node, &priv->root);
diff --git a/net/netfilter/xt_length.c b/net/netfilter/xt_length.c
index 1873da3a945a..ca730cedb5d4 100644
--- a/net/netfilter/xt_length.c
+++ b/net/netfilter/xt_length.c
@@ -21,7 +21,7 @@ static bool
length_mt(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_length_info *info = par->matchinfo;
- u_int16_t pktlen = ntohs(ip_hdr(skb)->tot_len);
+ u32 pktlen = skb_ip_totlen(skb);
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
@@ -30,8 +30,7 @@ static bool
length_mt6(const struct sk_buff *skb, struct xt_action_param *par)
{
const struct xt_length_info *info = par->matchinfo;
- const u_int16_t pktlen = ntohs(ipv6_hdr(skb)->payload_len) +
- sizeof(struct ipv6hdr);
+ u32 pktlen = skb->len;
return (pktlen >= info->min && pktlen <= info->max) ^ info->invert;
}
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index bca2a470ccad..c64277659753 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -580,7 +580,9 @@ static int netlink_insert(struct sock *sk, u32 portid)
if (nlk_sk(sk)->bound)
goto err;
- nlk_sk(sk)->portid = portid;
+ /* portid can be read locklessly from netlink_getname(). */
+ WRITE_ONCE(nlk_sk(sk)->portid, portid);
+
sock_hold(sk);
err = __netlink_insert(table, sk);
@@ -1096,9 +1098,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
return -EINVAL;
if (addr->sa_family == AF_UNSPEC) {
- sk->sk_state = NETLINK_UNCONNECTED;
- nlk->dst_portid = 0;
- nlk->dst_group = 0;
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, 0);
+ WRITE_ONCE(nlk->dst_group, 0);
return 0;
}
if (addr->sa_family != AF_NETLINK)
@@ -1119,9 +1123,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr,
err = netlink_autobind(sock);
if (err == 0) {
- sk->sk_state = NETLINK_CONNECTED;
- nlk->dst_portid = nladdr->nl_pid;
- nlk->dst_group = ffs(nladdr->nl_groups);
+ /* paired with READ_ONCE() in netlink_getsockbyportid() */
+ WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED);
+ /* dst_portid and dst_group can be read locklessly */
+ WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid);
+ WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups));
}
return err;
@@ -1138,10 +1144,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr,
nladdr->nl_pad = 0;
if (peer) {
- nladdr->nl_pid = nlk->dst_portid;
- nladdr->nl_groups = netlink_group_mask(nlk->dst_group);
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ nladdr->nl_pid = READ_ONCE(nlk->dst_portid);
+ nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group));
} else {
- nladdr->nl_pid = nlk->portid;
+ /* Paired with WRITE_ONCE() in netlink_insert() */
+ nladdr->nl_pid = READ_ONCE(nlk->portid);
netlink_lock_table();
nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0;
netlink_unlock_table();
@@ -1168,8 +1176,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid)
/* Don't bother queuing skb if kernel socket has no input function */
nlk = nlk_sk(sock);
- if (sock->sk_state == NETLINK_CONNECTED &&
- nlk->dst_portid != nlk_sk(ssk)->portid) {
+ /* dst_portid and sk_state can be changed in netlink_connect() */
+ if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED &&
+ READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) {
sock_put(sock);
return ERR_PTR(-ECONNREFUSED);
}
@@ -1886,8 +1895,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
goto out;
netlink_skb_flags |= NETLINK_SKB_DST;
} else {
- dst_portid = nlk->dst_portid;
- dst_group = nlk->dst_group;
+ /* Paired with WRITE_ONCE() in netlink_connect() */
+ dst_portid = READ_ONCE(nlk->dst_portid);
+ dst_group = READ_ONCE(nlk->dst_group);
}
/* Paired with WRITE_ONCE() in netlink_insert() */
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 600993c80050..04c4036bf406 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -13,7 +13,7 @@
#include <linux/errno.h>
#include <linux/types.h>
#include <linux/socket.h>
-#include <linux/string.h>
+#include <linux/string_helpers.h>
#include <linux/skbuff.h>
#include <linux/mutex.h>
#include <linux/bitmap.h>
@@ -457,7 +457,7 @@ static int genl_validate_assign_mc_groups(struct genl_family *family)
if (WARN_ON(grp->name[0] == '\0'))
return -EINVAL;
- if (WARN_ON(memchr(grp->name, '\0', GENL_NAMSIZ) == NULL))
+ if (WARN_ON(!string_is_terminated(grp->name, GENL_NAMSIZ)))
return -EINVAL;
}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 6f7f4392cffb..5a4cb796150f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -400,6 +400,11 @@ static int nr_listen(struct socket *sock, int backlog)
struct sock *sk = sock->sk;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&nr_sk(sk)->user_addr, 0, AX25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c
index a8da88db7893..4e7c968cde2d 100644
--- a/net/netrom/nr_timer.c
+++ b/net/netrom/nr_timer.c
@@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t)
is accepted() it isn't 'dead' so doesn't get removed. */
if (sock_flag(sk, SOCK_DESTROY) ||
(sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) {
+ sock_hold(sk);
bh_unlock_sock(sk);
nr_destroy_socket(sk);
goto out;
diff --git a/net/nfc/llcp_core.c b/net/nfc/llcp_core.c
index 3364caabef8b..a27e1842b2a0 100644
--- a/net/nfc/llcp_core.c
+++ b/net/nfc/llcp_core.c
@@ -157,6 +157,7 @@ static void local_cleanup(struct nfc_llcp_local *local)
cancel_work_sync(&local->rx_work);
cancel_work_sync(&local->timeout_work);
kfree_skb(local->rx_pending);
+ local->rx_pending = NULL;
del_timer_sync(&local->sdreq_timer);
cancel_work_sync(&local->sdreq_timeout_work);
nfc_llcp_free_sdp_tlv_list(&local->pending_sdreqs);
diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c
index 9d91087b9399..348bf561bc9f 100644
--- a/net/nfc/netlink.c
+++ b/net/nfc/netlink.c
@@ -1442,7 +1442,11 @@ static int nfc_se_io(struct nfc_dev *dev, u32 se_idx,
rc = dev->ops->se_io(dev, se_idx, apdu,
apdu_length, cb, cb_context);
+ device_unlock(&dev->dev);
+ return rc;
+
error:
+ kfree(cb_context);
device_unlock(&dev->dev);
return rc;
}
@@ -1497,6 +1501,7 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
u32 dev_idx, se_idx;
u8 *apdu;
size_t apdu_len;
+ int rc;
if (!info->attrs[NFC_ATTR_DEVICE_INDEX] ||
!info->attrs[NFC_ATTR_SE_INDEX] ||
@@ -1510,25 +1515,37 @@ static int nfc_genl_se_io(struct sk_buff *skb, struct genl_info *info)
if (!dev)
return -ENODEV;
- if (!dev->ops || !dev->ops->se_io)
- return -ENOTSUPP;
+ if (!dev->ops || !dev->ops->se_io) {
+ rc = -EOPNOTSUPP;
+ goto put_dev;
+ }
apdu_len = nla_len(info->attrs[NFC_ATTR_SE_APDU]);
- if (apdu_len == 0)
- return -EINVAL;
+ if (apdu_len == 0) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
apdu = nla_data(info->attrs[NFC_ATTR_SE_APDU]);
- if (!apdu)
- return -EINVAL;
+ if (!apdu) {
+ rc = -EINVAL;
+ goto put_dev;
+ }
ctx = kzalloc(sizeof(struct se_io_ctx), GFP_KERNEL);
- if (!ctx)
- return -ENOMEM;
+ if (!ctx) {
+ rc = -ENOMEM;
+ goto put_dev;
+ }
ctx->dev_idx = dev_idx;
ctx->se_idx = se_idx;
- return nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+ rc = nfc_se_io(dev, se_idx, apdu, apdu_len, se_io_cb, ctx);
+
+put_dev:
+ nfc_put_device(dev);
+ return rc;
}
static int nfc_genl_vendor_cmd(struct sk_buff *skb,
@@ -1551,14 +1568,21 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
subcmd = nla_get_u32(info->attrs[NFC_ATTR_VENDOR_SUBCMD]);
dev = nfc_get_device(dev_idx);
- if (!dev || !dev->vendor_cmds || !dev->n_vendor_cmds)
+ if (!dev)
return -ENODEV;
+ if (!dev->vendor_cmds || !dev->n_vendor_cmds) {
+ err = -ENODEV;
+ goto put_dev;
+ }
+
if (info->attrs[NFC_ATTR_VENDOR_DATA]) {
data = nla_data(info->attrs[NFC_ATTR_VENDOR_DATA]);
data_len = nla_len(info->attrs[NFC_ATTR_VENDOR_DATA]);
- if (data_len == 0)
- return -EINVAL;
+ if (data_len == 0) {
+ err = -EINVAL;
+ goto put_dev;
+ }
} else {
data = NULL;
data_len = 0;
@@ -1573,10 +1597,14 @@ static int nfc_genl_vendor_cmd(struct sk_buff *skb,
dev->cur_cmd_info = info;
err = cmd->doit(dev, data, data_len);
dev->cur_cmd_info = NULL;
- return err;
+ goto put_dev;
}
- return -EOPNOTSUPP;
+ err = -EOPNOTSUPP;
+
+put_dev:
+ nfc_put_device(dev);
+ return err;
}
/* message building helper */
diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig
index 747d537a3f06..29a7081858cd 100644
--- a/net/openvswitch/Kconfig
+++ b/net/openvswitch/Kconfig
@@ -15,6 +15,7 @@ config OPENVSWITCH
select NET_MPLS_GSO
select DST_CACHE
select NET_NSH
+ select NF_CONNTRACK_OVS if NF_CONNTRACK
select NF_NAT_OVS if NF_NAT
help
Open vSwitch is a multilayer Ethernet switch targeted at virtualized
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index c8b137649ca4..331730fd3580 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -9,6 +9,7 @@
#include <linux/udp.h>
#include <linux/sctp.h>
#include <linux/static_key.h>
+#include <linux/string_helpers.h>
#include <net/ip.h>
#include <net/genetlink.h>
#include <net/netfilter/nf_conntrack_core.h>
@@ -434,52 +435,21 @@ static int ovs_ct_set_labels(struct nf_conn *ct, struct sw_flow_key *key,
return 0;
}
-/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
- * value if 'skb' is freed.
- */
-static int handle_fragments(struct net *net, struct sw_flow_key *key,
- u16 zone, struct sk_buff *skb)
+static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
+ u16 zone, int family, struct sk_buff *skb)
{
struct ovs_skb_cb ovs_cb = *OVS_CB(skb);
int err;
- if (key->eth.type == htons(ETH_P_IP)) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- err = ip_defrag(net, skb, user);
- if (err)
- return err;
-
- ovs_cb.mru = IPCB(skb)->frag_max_size;
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- } else if (key->eth.type == htons(ETH_P_IPV6)) {
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err) {
- if (err != -EINPROGRESS)
- kfree_skb(skb);
- return err;
- }
-
- key->ip.proto = ipv6_hdr(skb)->nexthdr;
- ovs_cb.mru = IP6CB(skb)->frag_max_size;
-#endif
- } else {
- kfree_skb(skb);
- return -EPFNOSUPPORT;
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &key->ip.proto, &ovs_cb.mru);
+ if (err)
+ return err;
/* The key extracted from the fragment that completed this datagram
* likely didn't have an L4 header, so regenerate it.
*/
ovs_flow_key_update_l3l4(skb, key);
-
key->ip.frag = OVS_FRAG_TYPE_NONE;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
*OVS_CB(skb) = ovs_cb;
return 0;
@@ -1090,36 +1060,6 @@ static int ovs_ct_commit(struct net *net, struct sw_flow_key *key,
return 0;
}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int ovs_skb_network_trim(struct sk_buff *skb)
-{
- unsigned int len;
- int err;
-
- switch (skb->protocol) {
- case htons(ETH_P_IP):
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case htons(ETH_P_IPV6):
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- err = pskb_trim_rcsum(skb, len);
- if (err)
- kfree_skb(skb);
-
- return err;
-}
-
/* Returns 0 on success, -EINPROGRESS if 'skb' is stolen, or other nonzero
* value if 'skb' is freed.
*/
@@ -1134,12 +1074,15 @@ int ovs_ct_execute(struct net *net, struct sk_buff *skb,
nh_ofs = skb_network_offset(skb);
skb_pull_rcsum(skb, nh_ofs);
- err = ovs_skb_network_trim(skb);
- if (err)
+ err = nf_ct_skb_network_trim(skb, info->family);
+ if (err) {
+ kfree_skb(skb);
return err;
+ }
if (key->ip.frag != OVS_FRAG_TYPE_NONE) {
- err = handle_fragments(net, key, info->zone.id, skb);
+ err = ovs_ct_handle_fragments(net, key, info->zone.id,
+ info->family, skb);
if (err)
return err;
}
@@ -1383,7 +1326,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#endif
case OVS_CT_ATTR_HELPER:
*helper = nla_data(a);
- if (!memchr(*helper, '\0', nla_len(a))) {
+ if (!string_is_terminated(*helper, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack helper");
return -EINVAL;
}
@@ -1404,7 +1347,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info,
#ifdef CONFIG_NF_CONNTRACK_TIMEOUT
case OVS_CT_ATTR_TIMEOUT:
memcpy(info->timeout, nla_data(a), nla_len(a));
- if (!memchr(info->timeout, '\0', nla_len(a))) {
+ if (!string_is_terminated(info->timeout, nla_len(a))) {
OVS_NLERR(log, "Invalid conntrack timeout");
return -EINVAL;
}
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index 9ca721c9fa71..fcee6012293b 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -1004,14 +1004,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
key = kzalloc(sizeof(*key), GFP_KERNEL);
if (!key) {
error = -ENOMEM;
- goto err_kfree_key;
+ goto err_kfree_flow;
}
ovs_match_init(&match, key, false, &mask);
error = ovs_nla_get_match(net, &match, a[OVS_FLOW_ATTR_KEY],
a[OVS_FLOW_ATTR_MASK], log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
ovs_flow_mask_key(&new_flow->key, key, true, &mask);
@@ -1019,14 +1019,14 @@ static int ovs_flow_cmd_new(struct sk_buff *skb, struct genl_info *info)
error = ovs_nla_get_identifier(&new_flow->id, a[OVS_FLOW_ATTR_UFID],
key, log);
if (error)
- goto err_kfree_flow;
+ goto err_kfree_key;
/* Validate actions. */
error = ovs_nla_copy_actions(net, a[OVS_FLOW_ATTR_ACTIONS],
&new_flow->key, &acts, log);
if (error) {
OVS_NLERR(log, "Flow actions may not be safe on all matching packets.");
- goto err_kfree_flow;
+ goto err_kfree_key;
}
reply = ovs_flow_cmd_alloc_info(acts, &new_flow->id, info, false,
@@ -1126,10 +1126,10 @@ err_unlock_ovs:
kfree_skb(reply);
err_kfree_acts:
ovs_nla_free_flow_actions(acts);
-err_kfree_flow:
- ovs_flow_free(new_flow, false);
err_kfree_key:
kfree(key);
+err_kfree_flow:
+ ovs_flow_free(new_flow, false);
error:
return error;
}
@@ -1861,7 +1861,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto err_destroy_portids;
+ goto err_destroy_vport;
}
err = ovs_dp_cmd_fill_info(dp, reply, info->snd_portid,
@@ -1876,6 +1876,8 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info)
ovs_notify(&dp_datapath_genl_family, reply, info);
return 0;
+err_destroy_vport:
+ ovs_dp_detach_port(vport);
err_destroy_portids:
kfree(rcu_dereference_raw(dp->upcall_portids));
err_unlock_and_destroy_meters:
@@ -2323,7 +2325,7 @@ restart:
vport->upcall_stats = netdev_alloc_pcpu_stats(struct vport_upcall_stats_percpu);
if (!vport->upcall_stats) {
err = -ENOMEM;
- goto exit_unlock_free;
+ goto exit_unlock_free_vport;
}
err = ovs_vport_cmd_fill_info(vport, reply, genl_info_net(info),
@@ -2343,6 +2345,8 @@ restart:
ovs_notify(&dp_vport_genl_family, reply, info);
return 0;
+exit_unlock_free_vport:
+ ovs_dp_detach_port(vport);
exit_unlock_free:
ovs_unlock();
kfree_skb(reply);
diff --git a/net/openvswitch/flow.c b/net/openvswitch/flow.c
index e20d1a973417..33b21a0c0548 100644
--- a/net/openvswitch/flow.c
+++ b/net/openvswitch/flow.c
@@ -107,7 +107,8 @@ void ovs_flow_stats_update(struct sw_flow *flow, __be16 tcp_flags,
rcu_assign_pointer(flow->stats[cpu],
new_stats);
- cpumask_set_cpu(cpu, &flow->cpu_used_mask);
+ cpumask_set_cpu(cpu,
+ flow->cpu_used_mask);
goto unlock;
}
}
@@ -135,7 +136,8 @@ void ovs_flow_stats_get(const struct sw_flow *flow,
memset(ovs_stats, 0, sizeof(*ovs_stats));
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = rcu_dereference_ovsl(flow->stats[cpu]);
if (stats) {
@@ -159,7 +161,8 @@ void ovs_flow_stats_clear(struct sw_flow *flow)
int cpu;
/* We open code this to make sure cpu 0 is always considered */
- for (cpu = 0; cpu < nr_cpu_ids; cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ for (cpu = 0; cpu < nr_cpu_ids;
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
struct sw_flow_stats *stats = ovsl_dereference(flow->stats[cpu]);
if (stats) {
@@ -1038,7 +1041,8 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
if (tc_skb_ext_tc_enabled()) {
tc_ext = skb_ext_find(skb, TC_SKB_EXT);
- key->recirc_id = tc_ext ? tc_ext->chain : 0;
+ key->recirc_id = tc_ext && !tc_ext->act_miss ?
+ tc_ext->chain : 0;
OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
post_ct = tc_ext ? tc_ext->post_ct : false;
post_ct_snat = post_ct ? tc_ext->post_ct_snat : false;
diff --git a/net/openvswitch/flow.h b/net/openvswitch/flow.h
index 073ab73ffeaa..b5711aff6e76 100644
--- a/net/openvswitch/flow.h
+++ b/net/openvswitch/flow.h
@@ -229,7 +229,7 @@ struct sw_flow {
*/
struct sw_flow_key key;
struct sw_flow_id id;
- struct cpumask cpu_used_mask;
+ struct cpumask *cpu_used_mask;
struct sw_flow_mask *mask;
struct sw_flow_actions __rcu *sf_acts;
struct sw_flow_stats __rcu *stats[]; /* One for each CPU. First one
diff --git a/net/openvswitch/flow_table.c b/net/openvswitch/flow_table.c
index 0a0e4c283f02..4f3b1798e0b2 100644
--- a/net/openvswitch/flow_table.c
+++ b/net/openvswitch/flow_table.c
@@ -79,6 +79,7 @@ struct sw_flow *ovs_flow_alloc(void)
return ERR_PTR(-ENOMEM);
flow->stats_last_writer = -1;
+ flow->cpu_used_mask = (struct cpumask *)&flow->stats[nr_cpu_ids];
/* Initialize the default stat node. */
stats = kmem_cache_alloc_node(flow_stats_cache,
@@ -91,7 +92,7 @@ struct sw_flow *ovs_flow_alloc(void)
RCU_INIT_POINTER(flow->stats[0], stats);
- cpumask_set_cpu(0, &flow->cpu_used_mask);
+ cpumask_set_cpu(0, flow->cpu_used_mask);
return flow;
err:
@@ -115,7 +116,7 @@ static void flow_free(struct sw_flow *flow)
flow->sf_acts);
/* We open code this to make sure cpu 0 is always considered */
for (cpu = 0; cpu < nr_cpu_ids;
- cpu = cpumask_next(cpu, &flow->cpu_used_mask)) {
+ cpu = cpumask_next(cpu, flow->cpu_used_mask)) {
if (flow->stats[cpu])
kmem_cache_free(flow_stats_cache,
(struct sw_flow_stats __force *)flow->stats[cpu]);
@@ -1012,7 +1013,7 @@ static int flow_mask_insert(struct flow_table *tbl, struct sw_flow *flow,
mask = flow_mask_find(tbl, new);
if (!mask) {
- /* Allocate a new mask if none exsits. */
+ /* Allocate a new mask if none exists. */
mask = mask_alloc();
if (!mask)
return -ENOMEM;
@@ -1196,7 +1197,8 @@ int ovs_flow_init(void)
flow_cache = kmem_cache_create("sw_flow", sizeof(struct sw_flow)
+ (nr_cpu_ids
- * sizeof(struct sw_flow_stats *)),
+ * sizeof(struct sw_flow_stats *))
+ + cpumask_size(),
0, 0, NULL);
if (flow_cache == NULL)
return -ENOMEM;
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 6e38f68f88c2..f2698d2316df 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -449,7 +449,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
err = attach_meter(meter_tbl, meter);
if (err)
- goto exit_unlock;
+ goto exit_free_old_meter;
ovs_unlock();
@@ -472,6 +472,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
genlmsg_end(reply, ovs_reply_header);
return genlmsg_reply(reply, info);
+exit_free_old_meter:
+ ovs_meter_free(old_meter);
exit_unlock:
ovs_unlock();
nlmsg_free(reply);
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b5ab98ca2511..d4e76e2ae153 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1335,8 +1335,6 @@ static void packet_sock_destruct(struct sock *sk)
pr_err("Attempt to release alive packet socket: %p\n", sk);
return;
}
-
- sk_refcnt_debug_dec(sk);
}
static bool fanout_flow_is_huge(struct packet_sock *po, struct sk_buff *skb)
@@ -2296,6 +2294,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev,
else if (skb->pkt_type != PACKET_OUTGOING &&
skb_csum_unnecessary(skb))
status |= TP_STATUS_CSUM_VALID;
+ if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
+ status |= TP_STATUS_GSO_TCP;
if (snaplen > res)
snaplen = res;
@@ -3172,7 +3172,6 @@ static int packet_release(struct socket *sock)
skb_queue_purge(&sk->sk_receive_queue);
packet_free_pending(po);
- sk_refcnt_debug_release(sk);
sock_put(sk);
return 0;
@@ -3362,7 +3361,6 @@ static int packet_create(struct net *net, struct socket *sock, int protocol,
packet_cached_dev_reset(po);
sk->sk_destruct = packet_sock_destruct;
- sk_refcnt_debug_inc(sk);
/*
* Attach a protocol block
@@ -3522,6 +3520,8 @@ static int packet_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
else if (skb->pkt_type != PACKET_OUTGOING &&
skb_csum_unnecessary(skb))
aux.tp_status |= TP_STATUS_CSUM_VALID;
+ if (skb_is_gso(skb) && skb_is_gso_tcp(skb))
+ aux.tp_status |= TP_STATUS_GSO_TCP;
aux.tp_len = origlen;
aux.tp_snaplen = skb->len;
diff --git a/net/phonet/pep-gprs.c b/net/phonet/pep-gprs.c
index 1f5df0432d37..7f68d8662cfb 100644
--- a/net/phonet/pep-gprs.c
+++ b/net/phonet/pep-gprs.c
@@ -19,6 +19,8 @@
#include <net/tcp_states.h>
#include <net/phonet/gprs.h>
+#include <trace/events/sock.h>
+
#define GPRS_DEFAULT_MTU 1400
struct gprs_dev {
@@ -138,6 +140,8 @@ static void gprs_data_ready(struct sock *sk)
struct gprs_dev *gp = sk->sk_user_data;
struct sk_buff *skb;
+ trace_sk_data_ready(sk);
+
while ((skb = pep_read(sk)) != NULL) {
skb_orphan(skb);
gprs_recv(gp, skb);
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index 1990d496fcfc..722936f7dd98 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -12,6 +12,7 @@
#include "qrtr.h"
+#include <trace/events/sock.h>
#define CREATE_TRACE_POINTS
#include <trace/events/qrtr.h>
@@ -83,7 +84,10 @@ static struct qrtr_node *node_get(unsigned int node_id)
node->id = node_id;
- radix_tree_insert(&nodes, node_id, node);
+ if (radix_tree_insert(&nodes, node_id, node)) {
+ kfree(node);
+ return NULL;
+ }
return node;
}
@@ -752,6 +756,8 @@ static void qrtr_ns_worker(struct work_struct *work)
static void qrtr_ns_data_ready(struct sock *sk)
{
+ trace_sk_data_ready(sk);
+
queue_work(qrtr_ns.workqueue, &qrtr_ns.work);
}
diff --git a/net/rds/ib_recv.c b/net/rds/ib_recv.c
index cfbf0e129cba..e53b7f266bd7 100644
--- a/net/rds/ib_recv.c
+++ b/net/rds/ib_recv.c
@@ -31,6 +31,7 @@
*
*/
#include <linux/kernel.h>
+#include <linux/sched/clock.h>
#include <linux/slab.h>
#include <linux/pci.h>
#include <linux/dma-mapping.h>
diff --git a/net/rds/message.c b/net/rds/message.c
index b47e4f0a1639..7af59d2443e5 100644
--- a/net/rds/message.c
+++ b/net/rds/message.c
@@ -104,9 +104,9 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
spin_lock_irqsave(&q->lock, flags);
head = &q->zcookie_head;
if (!list_empty(head)) {
- info = list_entry(head, struct rds_msg_zcopy_info,
- rs_zcookie_next);
- if (info && rds_zcookie_add(info, cookie)) {
+ info = list_first_entry(head, struct rds_msg_zcopy_info,
+ rs_zcookie_next);
+ if (rds_zcookie_add(info, cookie)) {
spin_unlock_irqrestore(&q->lock, flags);
kfree(rds_info_from_znotifier(znotif));
/* caller invokes rds_wake_sk_sleep() */
@@ -118,7 +118,7 @@ static void rds_rm_zerocopy_callback(struct rds_sock *rs,
ck = &info->zcookies;
memset(ck, 0, sizeof(*ck));
WARN_ON(!rds_zcookie_add(info, cookie));
- list_add_tail(&q->zcookie_head, &info->rs_zcookie_next);
+ list_add_tail(&info->rs_zcookie_next, &q->zcookie_head);
spin_unlock_irqrestore(&q->lock, flags);
/* caller invokes rds_wake_sk_sleep() */
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 5b426dc3634d..c71b923764fd 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -35,6 +35,7 @@
#include <net/sock.h>
#include <linux/in.h>
#include <linux/export.h>
+#include <linux/sched/clock.h>
#include <linux/time.h>
#include <linux/rds.h>
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 7edf2e69d3fe..014fa24418c1 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -34,6 +34,7 @@
#include <linux/gfp.h>
#include <linux/in.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "rds.h"
#include "tcp.h"
@@ -234,6 +235,7 @@ void rds_tcp_listen_data_ready(struct sock *sk)
{
void (*ready)(struct sock *sk);
+ trace_sk_data_ready(sk);
rdsdebug("listen data ready sk %p\n", sk);
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c
index f4ee13da90c7..c00f04a1a534 100644
--- a/net/rds/tcp_recv.c
+++ b/net/rds/tcp_recv.c
@@ -33,6 +33,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <net/tcp.h>
+#include <trace/events/sock.h>
#include "rds.h"
#include "tcp.h"
@@ -309,6 +310,7 @@ void rds_tcp_data_ready(struct sock *sk)
struct rds_conn_path *cp;
struct rds_tcp_connection *tc;
+ trace_sk_data_ready(sk);
rdsdebug("data ready sk %p\n", sk);
read_lock_bh(&sk->sk_callback_lock);
diff --git a/net/rfkill/core.c b/net/rfkill/core.c
index b390ff245d5e..01fca7a10b4b 100644
--- a/net/rfkill/core.c
+++ b/net/rfkill/core.c
@@ -685,7 +685,7 @@ static ssize_t name_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%s\n", rfkill->name);
+ return sysfs_emit(buf, "%s\n", rfkill->name);
}
static DEVICE_ATTR_RO(name);
@@ -694,7 +694,7 @@ static ssize_t type_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%s\n", rfkill_types[rfkill->type]);
+ return sysfs_emit(buf, "%s\n", rfkill_types[rfkill->type]);
}
static DEVICE_ATTR_RO(type);
@@ -703,7 +703,7 @@ static ssize_t index_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%d\n", rfkill->idx);
+ return sysfs_emit(buf, "%d\n", rfkill->idx);
}
static DEVICE_ATTR_RO(index);
@@ -712,7 +712,7 @@ static ssize_t persistent_show(struct device *dev,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%d\n", rfkill->persistent);
+ return sysfs_emit(buf, "%d\n", rfkill->persistent);
}
static DEVICE_ATTR_RO(persistent);
@@ -721,7 +721,7 @@ static ssize_t hard_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0 );
+ return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_HW) ? 1 : 0);
}
static DEVICE_ATTR_RO(hard);
@@ -730,7 +730,7 @@ static ssize_t soft_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0 );
+ return sysfs_emit(buf, "%d\n", (rfkill->state & RFKILL_BLOCK_SW) ? 1 : 0);
}
static ssize_t soft_store(struct device *dev, struct device_attribute *attr,
@@ -764,7 +764,7 @@ static ssize_t hard_block_reasons_show(struct device *dev,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "0x%lx\n", rfkill->hard_block_reasons);
+ return sysfs_emit(buf, "0x%lx\n", rfkill->hard_block_reasons);
}
static DEVICE_ATTR_RO(hard_block_reasons);
@@ -783,7 +783,7 @@ static ssize_t state_show(struct device *dev, struct device_attribute *attr,
{
struct rfkill *rfkill = to_rfkill(dev);
- return sprintf(buf, "%d\n", user_state_from_blocked(rfkill->state));
+ return sysfs_emit(buf, "%d\n", user_state_from_blocked(rfkill->state));
}
static ssize_t state_store(struct device *dev, struct device_attribute *attr,
diff --git a/net/rfkill/rfkill-gpio.c b/net/rfkill/rfkill-gpio.c
index f5afc9bcdee6..786dbfdad772 100644
--- a/net/rfkill/rfkill-gpio.c
+++ b/net/rfkill/rfkill-gpio.c
@@ -75,6 +75,8 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
{
struct rfkill_gpio_data *rfkill;
struct gpio_desc *gpio;
+ const char *name_property;
+ const char *type_property;
const char *type_name;
int ret;
@@ -82,8 +84,15 @@ static int rfkill_gpio_probe(struct platform_device *pdev)
if (!rfkill)
return -ENOMEM;
- device_property_read_string(&pdev->dev, "name", &rfkill->name);
- device_property_read_string(&pdev->dev, "type", &type_name);
+ if (dev_of_node(&pdev->dev)) {
+ name_property = "label";
+ type_property = "radio-type";
+ } else {
+ name_property = "name";
+ type_property = "type";
+ }
+ device_property_read_string(&pdev->dev, name_property, &rfkill->name);
+ device_property_read_string(&pdev->dev, type_property, &type_name);
if (!rfkill->name)
rfkill->name = dev_name(&pdev->dev);
@@ -157,12 +166,19 @@ static const struct acpi_device_id rfkill_acpi_match[] = {
MODULE_DEVICE_TABLE(acpi, rfkill_acpi_match);
#endif
+static const struct of_device_id rfkill_of_match[] __maybe_unused = {
+ { .compatible = "rfkill-gpio", },
+ { },
+};
+MODULE_DEVICE_TABLE(of, rfkill_of_match);
+
static struct platform_driver rfkill_gpio_driver = {
.probe = rfkill_gpio_probe,
.remove = rfkill_gpio_remove,
.driver = {
.name = "rfkill_gpio",
.acpi_match_table = ACPI_PTR(rfkill_acpi_match),
+ .of_match_table = of_match_ptr(rfkill_of_match),
},
};
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 36fefc3957d7..ca2b17f32670 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -488,6 +488,12 @@ static int rose_listen(struct socket *sock, int backlog)
{
struct sock *sk = sock->sk;
+ lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ release_sock(sk);
+ return -EINVAL;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
struct rose_sock *rose = rose_sk(sk);
@@ -497,8 +503,10 @@ static int rose_listen(struct socket *sock, int backlog)
memset(rose->dest_digis, 0, AX25_ADDR_LEN * ROSE_MAX_DIGIS);
sk->sk_max_ack_backlog = backlog;
sk->sk_state = TCP_LISTEN;
+ release_sock(sk);
return 0;
}
+ release_sock(sk);
return -EOPNOTSUPP;
}
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 7ae023b37a83..a20986806fea 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -36,6 +36,15 @@ config AF_RXRPC_INJECT_LOSS
Say Y here to inject packet loss by discarding some received and some
transmitted packets.
+config AF_RXRPC_INJECT_RX_DELAY
+ bool "Inject delay into packet reception"
+ depends on SYSCTL
+ help
+ Say Y here to inject a delay into packet reception, allowing an
+ extended RTT time to be modelled. The delay can be configured using
+ /proc/sys/net/rxrpc/rxrpc_inject_rx_delay, setting a number of
+ milliseconds up to 0.5s (note that the granularity is actually in
+ jiffies).
config AF_RXRPC_DEBUG
bool "RxRPC dynamic debugging"
diff --git a/net/rxrpc/Makefile b/net/rxrpc/Makefile
index e76d3459d78e..ac5caf5a48e1 100644
--- a/net/rxrpc/Makefile
+++ b/net/rxrpc/Makefile
@@ -10,6 +10,7 @@ rxrpc-y := \
call_accept.o \
call_event.o \
call_object.o \
+ call_state.o \
conn_client.o \
conn_event.o \
conn_object.o \
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 7ea576f6ba4b..102f5cbff91a 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -155,10 +155,10 @@ static int rxrpc_bind(struct socket *sock, struct sockaddr *saddr, int len)
if (service_id) {
write_lock(&local->services_lock);
- if (rcu_access_pointer(local->service))
+ if (local->service)
goto service_in_use;
rx->local = local;
- rcu_assign_pointer(local->service, rx);
+ local->service = rx;
write_unlock(&local->services_lock);
rx->sk.sk_state = RXRPC_SERVER_BOUND;
@@ -328,7 +328,6 @@ struct rxrpc_call *rxrpc_kernel_begin_call(struct socket *sock,
mutex_unlock(&call->user_mutex);
}
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p", call);
return call;
}
@@ -374,13 +373,17 @@ EXPORT_SYMBOL(rxrpc_kernel_end_call);
* @sock: The socket the call is on
* @call: The call to check
*
- * Allow a kernel service to find out whether a call is still alive -
- * ie. whether it has completed.
+ * Allow a kernel service to find out whether a call is still alive - whether
+ * it has completed successfully and all received data has been consumed.
*/
bool rxrpc_kernel_check_life(const struct socket *sock,
const struct rxrpc_call *call)
{
- return call->state != RXRPC_CALL_COMPLETE;
+ if (!rxrpc_call_is_complete(call))
+ return true;
+ if (call->completion != RXRPC_CALL_SUCCEEDED)
+ return false;
+ return !skb_queue_empty(&call->recvmsg_queue);
}
EXPORT_SYMBOL(rxrpc_kernel_check_life);
@@ -783,7 +786,7 @@ static int rxrpc_create(struct net *net, struct socket *sock, int protocol,
INIT_LIST_HEAD(&rx->sock_calls);
INIT_LIST_HEAD(&rx->to_be_accepted);
INIT_LIST_HEAD(&rx->recvmsg_q);
- rwlock_init(&rx->recvmsg_lock);
+ spin_lock_init(&rx->recvmsg_lock);
rwlock_init(&rx->call_lock);
memset(&rx->srx, 0, sizeof(rx->srx));
@@ -872,9 +875,9 @@ static int rxrpc_release_sock(struct sock *sk)
sk->sk_state = RXRPC_CLOSE;
- if (rx->local && rcu_access_pointer(rx->local->service) == rx) {
+ if (rx->local && rx->local->service == rx) {
write_lock(&rx->local->services_lock);
- rcu_assign_pointer(rx->local->service, NULL);
+ rx->local->service = NULL;
write_unlock(&rx->local->services_lock);
}
@@ -957,16 +960,9 @@ static const struct net_proto_family rxrpc_family_ops = {
static int __init af_rxrpc_init(void)
{
int ret = -1;
- unsigned int tmp;
BUILD_BUG_ON(sizeof(struct rxrpc_skb_priv) > sizeof_field(struct sk_buff, cb));
- get_random_bytes(&tmp, sizeof(tmp));
- tmp &= 0x3fffffff;
- if (tmp == 0)
- tmp = 1;
- idr_set_cursor(&rxrpc_client_conn_ids, tmp);
-
ret = -ENOMEM;
rxrpc_call_jar = kmem_cache_create(
"rxrpc_call_jar", sizeof(struct rxrpc_call), 0,
@@ -1062,7 +1058,6 @@ static void __exit af_rxrpc_exit(void)
* are released.
*/
rcu_barrier();
- rxrpc_destroy_client_conn_ids();
destroy_workqueue(rxrpc_workqueue);
rxrpc_exit_security();
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index 18092526d3c8..67b0a894162d 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -38,6 +38,7 @@ struct rxrpc_txbuf;
enum rxrpc_skb_mark {
RXRPC_SKB_MARK_PACKET, /* Received packet */
RXRPC_SKB_MARK_ERROR, /* Error notification */
+ RXRPC_SKB_MARK_SERVICE_CONN_SECURED, /* Service connection response has been verified */
RXRPC_SKB_MARK_REJECT_BUSY, /* Reject with BUSY */
RXRPC_SKB_MARK_REJECT_ABORT, /* Reject with ABORT (code in skb->priority) */
};
@@ -75,13 +76,7 @@ struct rxrpc_net {
bool live;
- bool kill_all_client_conns;
atomic_t nr_client_conns;
- spinlock_t client_conn_cache_lock; /* Lock for ->*_client_conns */
- struct mutex client_conn_discard_lock; /* Prevent multiple discarders */
- struct list_head idle_client_conns;
- struct work_struct client_conn_reaper;
- struct timer_list client_conn_reap_timer;
struct hlist_head local_endpoints;
struct mutex local_mutex; /* Lock for ->local_endpoints */
@@ -154,7 +149,7 @@ struct rxrpc_sock {
struct list_head sock_calls; /* List of calls owned by this socket */
struct list_head to_be_accepted; /* calls awaiting acceptance */
struct list_head recvmsg_q; /* Calls awaiting recvmsg's attention */
- rwlock_t recvmsg_lock; /* Lock for recvmsg_q */
+ spinlock_t recvmsg_lock; /* Lock for recvmsg_q */
struct key *key; /* security for this socket */
struct key *securities; /* list of server security descriptors */
struct rb_root calls; /* User ID -> call mapping */
@@ -202,6 +197,7 @@ struct rxrpc_host_header {
* - max 48 bytes (struct sk_buff::cb)
*/
struct rxrpc_skb_priv {
+ struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
u16 offset; /* Offset of data */
u16 len; /* Length of data */
u8 flags;
@@ -262,13 +258,11 @@ struct rxrpc_security {
/* respond to a challenge */
int (*respond_to_challenge)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* verify a response */
int (*verify_response)(struct rxrpc_connection *,
- struct sk_buff *,
- u32 *);
+ struct sk_buff *);
/* clear connection security */
void (*clear)(struct rxrpc_connection *);
@@ -283,22 +277,36 @@ struct rxrpc_local {
struct rcu_head rcu;
atomic_t active_users; /* Number of users of the local endpoint */
refcount_t ref; /* Number of references to the structure */
- struct rxrpc_net *rxnet; /* The network ns in which this resides */
+ struct net *net; /* The network namespace */
+ struct rxrpc_net *rxnet; /* Our bits in the network namespace */
struct hlist_node link;
struct socket *socket; /* my UDP socket */
struct task_struct *io_thread;
struct completion io_thread_ready; /* Indication that the I/O thread started */
- struct rxrpc_sock __rcu *service; /* Service(s) listening on this endpoint */
- struct rw_semaphore defrag_sem; /* control re-enablement of IP DF bit */
+ struct rxrpc_sock *service; /* Service(s) listening on this endpoint */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ struct sk_buff_head rx_delay_queue; /* Delay injection queue */
+#endif
struct sk_buff_head rx_queue; /* Received packets */
+ struct list_head conn_attend_q; /* Conns requiring immediate attention */
struct list_head call_attend_q; /* Calls requiring immediate attention */
+
struct rb_root client_bundles; /* Client connection bundles by socket params */
spinlock_t client_bundles_lock; /* Lock for client_bundles */
+ bool kill_all_client_conns;
+ struct list_head idle_client_conns;
+ struct timer_list client_conn_reap_timer;
+ unsigned long client_conn_flags;
+#define RXRPC_CLIENT_CONN_REAP_TIMER 0 /* The client conn reap timer expired */
+
spinlock_t lock; /* access lock */
rwlock_t services_lock; /* lock for services list */
int debug_id; /* debug ID for printks */
bool dead;
bool service_closed; /* Service socket closed */
+ struct idr conn_ids; /* List of connection IDs */
+ struct list_head new_client_calls; /* Newly created client calls need connection */
+ spinlock_t client_call_lock; /* Lock for ->new_client_calls */
struct sockaddr_rxrpc srx; /* local address */
};
@@ -356,7 +364,6 @@ struct rxrpc_conn_proto {
struct rxrpc_conn_parameters {
struct rxrpc_local *local; /* Representation of local endpoint */
- struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
@@ -365,10 +372,21 @@ struct rxrpc_conn_parameters {
};
/*
+ * Call completion condition (state == RXRPC_CALL_COMPLETE).
+ */
+enum rxrpc_call_completion {
+ RXRPC_CALL_SUCCEEDED, /* - Normal termination */
+ RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
+ RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
+ RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
+ RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
+ NR__RXRPC_CALL_COMPLETIONS
+};
+
+/*
* Bits in the connection flags.
*/
enum rxrpc_conn_flag {
- RXRPC_CONN_HAS_IDR, /* Has a client conn ID assigned */
RXRPC_CONN_IN_SERVICE_CONNS, /* Conn is in peer->service_conns */
RXRPC_CONN_DONT_REUSE, /* Don't reuse this connection */
RXRPC_CONN_PROBING_FOR_UPGRADE, /* Probing for service upgrade */
@@ -388,6 +406,7 @@ enum rxrpc_conn_flag {
*/
enum rxrpc_conn_event {
RXRPC_CONN_EV_CHALLENGE, /* Send challenge packet */
+ RXRPC_CONN_EV_ABORT_CALLS, /* Abort attached calls */
};
/*
@@ -395,13 +414,13 @@ enum rxrpc_conn_event {
*/
enum rxrpc_conn_proto_state {
RXRPC_CONN_UNUSED, /* Connection not yet attempted */
+ RXRPC_CONN_CLIENT_UNSECURED, /* Client connection needs security init */
RXRPC_CONN_CLIENT, /* Client connection */
RXRPC_CONN_SERVICE_PREALLOC, /* Service connection preallocation */
RXRPC_CONN_SERVICE_UNSECURED, /* Service unsecured connection */
RXRPC_CONN_SERVICE_CHALLENGING, /* Service challenging for security */
RXRPC_CONN_SERVICE, /* Service secured connection */
- RXRPC_CONN_REMOTELY_ABORTED, /* Conn aborted by peer */
- RXRPC_CONN_LOCALLY_ABORTED, /* Conn aborted locally */
+ RXRPC_CONN_ABORTED, /* Conn aborted */
RXRPC_CONN__NR_STATES
};
@@ -412,17 +431,16 @@ struct rxrpc_bundle {
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_peer *peer; /* Remote endpoint */
struct key *key; /* Security details */
+ const struct rxrpc_security *security; /* applied security module */
refcount_t ref;
atomic_t active; /* Number of active users */
unsigned int debug_id;
u32 security_level; /* Security level selected */
u16 service_id; /* Service ID for this connection */
bool try_upgrade; /* True if the bundle is attempting upgrade */
- bool alloc_conn; /* True if someone's getting a conn */
bool exclusive; /* T if conn is exclusive */
bool upgrade; /* T if service ID can be upgraded */
- short alloc_error; /* Error from last conn allocation */
- spinlock_t channel_lock;
+ unsigned short alloc_error; /* Error from last conn allocation */
struct rb_node local_node; /* Node in local->client_conns */
struct list_head waiting_calls; /* Calls waiting for channels */
unsigned long avail_chans; /* Mask of available channels */
@@ -440,6 +458,7 @@ struct rxrpc_connection {
struct rxrpc_peer *peer; /* Remote endpoint */
struct rxrpc_net *rxnet; /* Network namespace to which call belongs */
struct key *key; /* Security details */
+ struct list_head attend_link; /* Link in local->conn_attend_q */
refcount_t ref;
atomic_t active; /* Active count for service conns */
@@ -449,7 +468,7 @@ struct rxrpc_connection {
unsigned char act_chans; /* Mask of active channels */
struct rxrpc_channel {
unsigned long final_ack_at; /* Time at which to issue final ACK */
- struct rxrpc_call __rcu *call; /* Active call */
+ struct rxrpc_call *call; /* Active call */
unsigned int call_debug_id; /* call->debug_id */
u32 call_id; /* ID of current call */
u32 call_counter; /* Call ID counter */
@@ -470,6 +489,7 @@ struct rxrpc_connection {
struct list_head link; /* link in master connection list */
struct sk_buff_head rx_queue; /* received conn-level packets */
+ struct mutex security_lock; /* Lock for security management */
const struct rxrpc_security *security; /* applied security module */
union {
struct {
@@ -483,7 +503,8 @@ struct rxrpc_connection {
unsigned long idle_timestamp; /* Time at which last became idle */
spinlock_t state_lock; /* state-change lock */
enum rxrpc_conn_proto_state state; /* current state of connection */
- u32 abort_code; /* Abort code of connection abort */
+ enum rxrpc_call_completion completion; /* Completion condition */
+ s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
atomic_t serial; /* packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
@@ -527,7 +548,8 @@ enum rxrpc_call_flag {
RXRPC_CALL_KERNEL, /* The call was made by the kernel */
RXRPC_CALL_UPGRADE, /* Service upgrade was requested for the call */
RXRPC_CALL_EXCLUSIVE, /* The call uses a once-only connection */
- RXRPC_CALL_RX_IS_IDLE, /* Reception is idle - send an ACK */
+ RXRPC_CALL_RX_IS_IDLE, /* recvmsg() is idle - send an ACK */
+ RXRPC_CALL_RECVMSG_READ_ALL, /* recvmsg() read all of the received data */
};
/*
@@ -558,18 +580,6 @@ enum rxrpc_call_state {
};
/*
- * Call completion condition (state == RXRPC_CALL_COMPLETE).
- */
-enum rxrpc_call_completion {
- RXRPC_CALL_SUCCEEDED, /* - Normal termination */
- RXRPC_CALL_REMOTELY_ABORTED, /* - call aborted by peer */
- RXRPC_CALL_LOCALLY_ABORTED, /* - call aborted locally on error or close */
- RXRPC_CALL_LOCAL_ERROR, /* - call failed due to local error */
- RXRPC_CALL_NETWORK_ERROR, /* - call terminated by network error */
- NR__RXRPC_CALL_COMPLETIONS
-};
-
-/*
* Call Tx congestion management modes.
*/
enum rxrpc_congest_mode {
@@ -587,6 +597,7 @@ enum rxrpc_congest_mode {
struct rxrpc_call {
struct rcu_head rcu;
struct rxrpc_connection *conn; /* connection carrying call */
+ struct rxrpc_bundle *bundle; /* Connection bundle to use */
struct rxrpc_peer *peer; /* Peer record for remote address */
struct rxrpc_local *local; /* Representation of local endpoint */
struct rxrpc_sock __rcu *socket; /* socket responsible */
@@ -609,7 +620,7 @@ struct rxrpc_call {
struct work_struct destroyer; /* In-process-context destroyer */
rxrpc_notify_rx_t notify_rx; /* kernel service Rx notification function */
struct list_head link; /* link in master call list */
- struct list_head chan_wait_link; /* Link in conn->bundle->waiting_calls */
+ struct list_head wait_link; /* Link in local->new_client_calls */
struct hlist_node error_link; /* link in error distribution list */
struct list_head accept_link; /* Link in rx->acceptq */
struct list_head recvmsg_link; /* Link in rx->recvmsg_q */
@@ -623,10 +634,13 @@ struct rxrpc_call {
unsigned long flags;
unsigned long events;
spinlock_t notify_lock; /* Kernel notification lock */
- rwlock_t state_lock; /* lock for state transition */
- u32 abort_code; /* Local/remote abort code */
+ unsigned int send_abort_why; /* Why the abort [enum rxrpc_abort_reason] */
+ s32 send_abort; /* Abort code to be sent */
+ short send_abort_err; /* Error to be associated with the abort */
+ rxrpc_seq_t send_abort_seq; /* DATA packet that incurred the abort (or 0) */
+ s32 abort_code; /* Local/remote abort code */
int error; /* Local error incurred */
- enum rxrpc_call_state state; /* current state of call */
+ enum rxrpc_call_state _state; /* Current state of call (needs barrier) */
enum rxrpc_call_completion completion; /* Call completion condition */
refcount_t ref;
u8 security_ix; /* Security type */
@@ -676,9 +690,11 @@ struct rxrpc_call {
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
+ u16 ackr_sack_base; /* Starting slot in SACK table ring */
rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
- atomic64_t ackr_window; /* Base (in LSW) and top (in MSW) of SACK window */
- atomic_t ackr_nr_unacked; /* Number of unacked packets */
+ rxrpc_seq_t ackr_window; /* Base of SACK window */
+ rxrpc_seq_t ackr_wtop; /* Base of SACK window */
+ unsigned int ackr_nr_unacked; /* Number of unacked packets */
atomic_t ackr_nr_consumed; /* Number of packets needing hard ACK */
struct {
#define RXRPC_SACK_SIZE 256
@@ -783,7 +799,7 @@ struct rxrpc_txbuf {
u8 data[RXRPC_JUMBO_DATALEN]; /* Data packet */
struct {
struct rxrpc_ackpacket ack;
- u8 acks[0];
+ DECLARE_FLEX_ARRAY(u8, acks);
};
};
} __aligned(64);
@@ -812,9 +828,11 @@ extern struct workqueue_struct *rxrpc_workqueue;
*/
int rxrpc_service_prealloc(struct rxrpc_sock *, gfp_t);
void rxrpc_discard_prealloc(struct rxrpc_sock *);
-int rxrpc_new_incoming_call(struct rxrpc_local *, struct rxrpc_peer *,
- struct rxrpc_connection *, struct sockaddr_rxrpc *,
- struct sk_buff *);
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb);
void rxrpc_accept_incoming_calls(struct rxrpc_local *);
int rxrpc_user_charge_accept(struct rxrpc_sock *, unsigned long);
@@ -834,7 +852,7 @@ void rxrpc_reduce_call_timer(struct rxrpc_call *call,
unsigned long now,
enum rxrpc_timer_trace why);
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb);
/*
* call_object.c
@@ -851,6 +869,7 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *,
struct sockaddr_rxrpc *,
struct rxrpc_call_params *, gfp_t,
unsigned int);
+void rxrpc_start_call_timer(struct rxrpc_call *call);
void rxrpc_incoming_call(struct rxrpc_sock *, struct rxrpc_call *,
struct sk_buff *);
void rxrpc_release_call(struct rxrpc_sock *, struct rxrpc_call *);
@@ -873,32 +892,88 @@ static inline bool rxrpc_is_client_call(const struct rxrpc_call *call)
}
/*
+ * call_state.c
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error);
+bool rxrpc_call_completed(struct rxrpc_call *call);
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why);
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error);
+
+static inline void rxrpc_set_call_state(struct rxrpc_call *call,
+ enum rxrpc_call_state state)
+{
+ /* Order write of completion info before write of ->state. */
+ smp_store_release(&call->_state, state);
+ wake_up(&call->waitq);
+}
+
+static inline enum rxrpc_call_state __rxrpc_call_state(const struct rxrpc_call *call)
+{
+ return call->_state; /* Only inside I/O thread */
+}
+
+static inline bool __rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return __rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline enum rxrpc_call_state rxrpc_call_state(const struct rxrpc_call *call)
+{
+ /* Order read ->state before read of completion info. */
+ return smp_load_acquire(&call->_state);
+}
+
+static inline bool rxrpc_call_is_complete(const struct rxrpc_call *call)
+{
+ return rxrpc_call_state(call) == RXRPC_CALL_COMPLETE;
+}
+
+static inline bool rxrpc_call_has_failed(const struct rxrpc_call *call)
+{
+ return rxrpc_call_is_complete(call) && call->completion != RXRPC_CALL_SUCCEEDED;
+}
+
+/*
* conn_client.c
*/
extern unsigned int rxrpc_reap_client_connections;
extern unsigned long rxrpc_conn_idle_client_expiry;
extern unsigned long rxrpc_conn_idle_client_fast_expiry;
-extern struct idr rxrpc_client_conn_ids;
-void rxrpc_destroy_client_conn_ids(void);
+void rxrpc_purge_client_connections(struct rxrpc_local *local);
struct rxrpc_bundle *rxrpc_get_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
void rxrpc_put_bundle(struct rxrpc_bundle *, enum rxrpc_bundle_trace);
-int rxrpc_connect_call(struct rxrpc_sock *, struct rxrpc_call *,
- struct rxrpc_conn_parameters *, struct sockaddr_rxrpc *,
- gfp_t);
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp);
+void rxrpc_connect_client_calls(struct rxrpc_local *local);
void rxrpc_expose_client_call(struct rxrpc_call *);
void rxrpc_disconnect_client_call(struct rxrpc_bundle *, struct rxrpc_call *);
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
void rxrpc_put_client_conn(struct rxrpc_connection *, enum rxrpc_conn_trace);
-void rxrpc_discard_expired_client_conns(struct work_struct *);
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *);
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local);
void rxrpc_clean_up_local_conns(struct rxrpc_local *);
/*
* conn_event.c
*/
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn, struct sk_buff *skb,
+ unsigned int channel);
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why);
void rxrpc_process_connection(struct work_struct *);
void rxrpc_process_delayed_final_acks(struct rxrpc_connection *, bool);
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb);
+
+static inline bool rxrpc_is_conn_aborted(const struct rxrpc_connection *conn)
+{
+ /* Order reading the abort info after the state check. */
+ return smp_load_acquire(&conn->state) == RXRPC_CONN_ABORTED;
+}
/*
* conn_object.c
@@ -906,6 +981,7 @@ int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb);
extern unsigned int rxrpc_connection_expiry;
extern unsigned int rxrpc_closed_conn_expiry;
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why);
struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *, gfp_t);
struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *,
struct sockaddr_rxrpc *,
@@ -961,12 +1037,19 @@ void rxrpc_implicit_end_call(struct rxrpc_call *, struct sk_buff *);
*/
int rxrpc_encap_rcv(struct sock *, struct sk_buff *);
void rxrpc_error_report(struct sock *);
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err);
int rxrpc_io_thread(void *data);
static inline void rxrpc_wake_up_io_thread(struct rxrpc_local *local)
{
wake_up_process(local->io_thread);
}
+static inline bool rxrpc_protocol_error(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EPROTO);
+}
+
/*
* insecure.c
*/
@@ -1030,6 +1113,9 @@ extern unsigned long rxrpc_idle_ack_delay;
extern unsigned int rxrpc_rx_window_size;
extern unsigned int rxrpc_rx_mtu;
extern unsigned int rxrpc_rx_jumbo_max;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+extern unsigned long rxrpc_inject_rx_delay;
+#endif
/*
* net_ns.c
@@ -1048,6 +1134,7 @@ static inline struct rxrpc_net *rxrpc_net(struct net *net)
int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
int rxrpc_send_abort_packet(struct rxrpc_call *);
int rxrpc_send_data_packet(struct rxrpc_call *, struct rxrpc_txbuf *);
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn);
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb);
void rxrpc_send_keepalive(struct rxrpc_peer *);
void rxrpc_transmit_one(struct rxrpc_call *call, struct rxrpc_txbuf *txb);
@@ -1063,12 +1150,11 @@ void rxrpc_peer_keepalive_worker(struct work_struct *);
*/
struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *,
const struct sockaddr_rxrpc *);
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct sockaddr_rxrpc *, gfp_t);
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
+ struct sockaddr_rxrpc *srx, gfp_t gfp);
struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *, gfp_t,
enum rxrpc_peer_trace);
-void rxrpc_new_incoming_peer(struct rxrpc_sock *, struct rxrpc_local *,
- struct rxrpc_peer *);
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer);
void rxrpc_destroy_all_peers(struct rxrpc_net *);
struct rxrpc_peer *rxrpc_get_peer(struct rxrpc_peer *, enum rxrpc_peer_trace);
struct rxrpc_peer *rxrpc_get_peer_maybe(struct rxrpc_peer *, enum rxrpc_peer_trace);
@@ -1086,33 +1172,22 @@ extern const struct seq_operations rxrpc_local_seq_ops;
* recvmsg.c
*/
void rxrpc_notify_socket(struct rxrpc_call *);
-bool __rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool rxrpc_set_call_completion(struct rxrpc_call *, enum rxrpc_call_completion, u32, int);
-bool __rxrpc_call_completed(struct rxrpc_call *);
-bool rxrpc_call_completed(struct rxrpc_call *);
-bool __rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
-bool rxrpc_abort_call(const char *, struct rxrpc_call *, rxrpc_seq_t, u32, int);
int rxrpc_recvmsg(struct socket *, struct msghdr *, size_t, int);
/*
* Abort a call due to a protocol error.
*/
-static inline bool __rxrpc_abort_eproto(struct rxrpc_call *call,
- struct sk_buff *skb,
- const char *eproto_why,
- const char *why,
- u32 abort_code)
+static inline int rxrpc_abort_eproto(struct rxrpc_call *call,
+ struct sk_buff *skb,
+ s32 abort_code,
+ enum rxrpc_abort_reason why)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- trace_rxrpc_rx_eproto(call, sp->hdr.serial, eproto_why);
- return rxrpc_abort_call(why, call, sp->hdr.seq, abort_code, -EPROTO);
+ rxrpc_abort_call(call, sp->hdr.seq, abort_code, -EPROTO, why);
+ return -EPROTO;
}
-#define rxrpc_abort_eproto(call, skb, eproto_why, abort_why, abort_code) \
- __rxrpc_abort_eproto((call), (skb), tracepoint_string(eproto_why), \
- (abort_why), (abort_code))
-
/*
* rtt.c
*/
@@ -1144,6 +1219,8 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *,
/*
* sendmsg.c
*/
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why);
int rxrpc_do_sendmsg(struct rxrpc_sock *, struct msghdr *, size_t);
/*
diff --git a/net/rxrpc/call_accept.c b/net/rxrpc/call_accept.c
index c02401656fa9..0f5a1d77b890 100644
--- a/net/rxrpc/call_accept.c
+++ b/net/rxrpc/call_accept.c
@@ -99,7 +99,7 @@ static int rxrpc_service_prealloc_one(struct rxrpc_sock *rx,
if (!call)
return -ENOMEM;
call->flags |= (1 << RXRPC_CALL_IS_SERVICE);
- call->state = RXRPC_CALL_SERVER_PREALLOC;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_PREALLOC);
__set_bit(RXRPC_CALL_EV_INITIAL_PING, &call->events);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
@@ -195,7 +195,7 @@ void rxrpc_discard_prealloc(struct rxrpc_sock *rx)
tail = b->peer_backlog_tail;
while (CIRC_CNT(head, tail, size) > 0) {
struct rxrpc_peer *peer = b->peer_backlog[tail];
- rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_conn);
+ rxrpc_put_local(peer->local, rxrpc_local_put_prealloc_peer);
kfree(peer);
tail = (tail + 1) & (size - 1);
}
@@ -280,7 +280,7 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
(peer_tail + 1) &
(RXRPC_BACKLOG_MAX - 1));
- rxrpc_new_incoming_peer(rx, local, peer);
+ rxrpc_new_incoming_peer(local, peer);
}
/* Now allocate and set up the connection */
@@ -326,11 +326,11 @@ static struct rxrpc_call *rxrpc_alloc_incoming_call(struct rxrpc_sock *rx,
* If we want to report an error, we mark the skb with the packet type and
* abort code and return false.
*/
-int rxrpc_new_incoming_call(struct rxrpc_local *local,
- struct rxrpc_peer *peer,
- struct rxrpc_connection *conn,
- struct sockaddr_rxrpc *peer_srx,
- struct sk_buff *skb)
+bool rxrpc_new_incoming_call(struct rxrpc_local *local,
+ struct rxrpc_peer *peer,
+ struct rxrpc_connection *conn,
+ struct sockaddr_rxrpc *peer_srx,
+ struct sk_buff *skb)
{
const struct rxrpc_security *sec = NULL;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
@@ -339,18 +339,17 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_enter("");
- /* Don't set up a call for anything other than the first DATA packet. */
- if (sp->hdr.seq != 1 ||
- sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
- return 0; /* Just discard */
+ /* Don't set up a call for anything other than a DATA packet. */
+ if (sp->hdr.type != RXRPC_PACKET_TYPE_DATA)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_service_call);
- rcu_read_lock();
+ read_lock(&local->services_lock);
/* Weed out packets to services we're not offering. Packets that would
* begin a call are explicitly rejected and the rest are just
* discarded.
*/
- rx = rcu_dereference(local->service);
+ rx = local->service;
if (!rx || (sp->hdr.serviceId != rx->srx.srx_service &&
sp->hdr.serviceId != rx->second_service)
) {
@@ -363,16 +362,14 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
if (!conn) {
sec = rxrpc_get_incoming_security(rx, skb);
if (!sec)
- goto reject;
+ goto unsupported_security;
}
spin_lock(&rx->incoming_lock);
if (rx->sk.sk_state == RXRPC_SERVER_LISTEN_DISABLED ||
rx->sk.sk_state == RXRPC_CLOSE) {
- trace_rxrpc_abort(0, "CLS", sp->hdr.cid, sp->hdr.callNumber,
- sp->hdr.seq, RX_INVALID_OPERATION, ESHUTDOWN);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_shut_down,
+ RX_INVALID_OPERATION, -ESHUTDOWN);
goto no_call;
}
@@ -402,7 +399,7 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
spin_unlock(&conn->state_lock);
spin_unlock(&rx->incoming_lock);
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
if (hlist_unhashed(&call->error_link)) {
spin_lock(&call->peer->lock);
@@ -413,22 +410,24 @@ int rxrpc_new_incoming_call(struct rxrpc_local *local,
_leave(" = %p{%d}", call, call->debug_id);
rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
+ return true;
unsupported_service:
- trace_rxrpc_abort(0, "INV", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EOPNOTSUPP);
- skb->priority = RX_INVALID_OPERATION;
- goto reject;
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EOPNOTSUPP);
+unsupported_security:
+ read_unlock(&local->services_lock);
+ return rxrpc_direct_abort(skb, rxrpc_abort_service_not_offered,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
no_call:
spin_unlock(&rx->incoming_lock);
-reject:
- rcu_read_unlock();
+ read_unlock(&local->services_lock);
_leave(" = f [%u]", skb->mark);
- return -EPROTO;
+ return false;
discard:
- rcu_read_unlock();
- return 0;
+ read_unlock(&local->services_lock);
+ return true;
}
/*
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index b2cf448fb02c..e363f21a2014 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -251,6 +251,41 @@ out:
_leave("");
}
+/*
+ * Start transmitting the reply to a service. This cancels the need to ACK the
+ * request if we haven't yet done so.
+ */
+static void rxrpc_begin_service_reply(struct rxrpc_call *call)
+{
+ unsigned long now = jiffies;
+
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SEND_REPLY);
+ WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
+ if (call->ackr_reason == RXRPC_ACK_DELAY)
+ call->ackr_reason = 0;
+ trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
+}
+
+/*
+ * Close the transmission phase. After this point there is no more data to be
+ * transmitted in the call.
+ */
+static void rxrpc_close_tx_phase(struct rxrpc_call *call)
+{
+ _debug("________awaiting reply/ACK__________");
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ break;
+ case RXRPC_CALL_SERVER_SEND_REPLY:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_AWAIT_ACK);
+ break;
+ default:
+ break;
+ }
+}
+
static bool rxrpc_tx_window_has_space(struct rxrpc_call *call)
{
unsigned int winsize = min_t(unsigned int, call->tx_winsize,
@@ -270,9 +305,11 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
{
struct rxrpc_txbuf *txb;
- if (rxrpc_is_client_call(call) &&
- !test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ if (!test_bit(RXRPC_CALL_EXPOSED, &call->flags)) {
+ if (list_empty(&call->tx_sendmsg))
+ return;
rxrpc_expose_client_call(call);
+ }
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
@@ -283,6 +320,9 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
call->tx_top = txb->seq;
list_add_tail(&txb->call_link, &call->tx_buffer);
+ if (txb->wire.flags & RXRPC_LAST_PACKET)
+ rxrpc_close_tx_phase(call);
+
rxrpc_transmit_one(call, txb);
if (!rxrpc_tx_window_has_space(call))
@@ -292,16 +332,15 @@ static void rxrpc_decant_prepared_tx(struct rxrpc_call *call)
static void rxrpc_transmit_some_data(struct rxrpc_call *call)
{
- switch (call->state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_ACK_REQUEST:
if (list_empty(&call->tx_sendmsg))
return;
+ rxrpc_begin_service_reply(call);
fallthrough;
case RXRPC_CALL_SERVER_SEND_REPLY:
- case RXRPC_CALL_SERVER_AWAIT_ACK:
case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_CLIENT_AWAIT_REPLY:
if (!rxrpc_tx_window_has_space(call))
return;
if (list_empty(&call->tx_sendmsg)) {
@@ -331,20 +370,30 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
/*
* Handle retransmission and deferred ACK/abort generation.
*/
-void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
+bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
+ s32 abort_code;
rxrpc_see_call(call, rxrpc_call_see_input);
//printk("\n--------------------\n");
_enter("{%d,%s,%lx}",
- call->debug_id, rxrpc_call_states[call->state], call->events);
+ call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)],
+ call->events);
+
+ if (__rxrpc_call_is_complete(call))
+ goto out;
- if (call->state == RXRPC_CALL_COMPLETE)
+ /* Handle abort request locklessly, vs rxrpc_propose_abort(). */
+ abort_code = smp_load_acquire(&call->send_abort);
+ if (abort_code) {
+ rxrpc_abort_call(call, 0, call->send_abort, call->send_abort_err,
+ call->send_abort_why);
goto out;
+ }
if (skb && skb->mark == RXRPC_SKB_MARK_ERROR)
goto out;
@@ -358,7 +407,7 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
t = READ_ONCE(call->expect_req_by);
- if (call->state == RXRPC_CALL_SERVER_RECV_REQUEST &&
+ if (__rxrpc_call_state(call) == RXRPC_CALL_SERVER_RECV_REQUEST &&
time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_idle, now);
expired = true;
@@ -429,11 +478,12 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (test_bit(RXRPC_CALL_RX_HEARD, &call->flags) &&
(int)call->conn->hi_serial - (int)call->rx_serial > 0) {
trace_rxrpc_call_reset(call);
- rxrpc_abort_call("EXP", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_reset);
} else {
- rxrpc_abort_call("EXP", call, 0, RX_CALL_TIMEOUT, -ETIME);
+ rxrpc_abort_call(call, 0, RX_CALL_TIMEOUT, -ETIME,
+ rxrpc_abort_call_timeout);
}
- rxrpc_send_abort_packet(call);
goto out;
}
@@ -441,19 +491,28 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
rxrpc_propose_ack_ping_for_lost_ack);
- if (resend && call->state != RXRPC_CALL_CLIENT_RECV_REPLY)
+ if (resend && __rxrpc_call_state(call) != RXRPC_CALL_CLIENT_RECV_REPLY)
rxrpc_resend(call, NULL);
if (test_and_clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
rxrpc_propose_ack_rx_idle);
- if (atomic_read(&call->ackr_nr_unacked) > 2)
- rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
- rxrpc_propose_ack_input_data);
+ if (call->ackr_nr_unacked > 2) {
+ if (call->peer->rtt_count < 3)
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_rtt);
+ else if (ktime_before(ktime_add_ms(call->peer->rtt_last_req, 1000),
+ ktime_get_real()))
+ rxrpc_send_ACK(call, RXRPC_ACK_PING, 0,
+ rxrpc_propose_ack_ping_for_old_rtt);
+ else
+ rxrpc_send_ACK(call, RXRPC_ACK_IDLE, 0,
+ rxrpc_propose_ack_input_data);
+ }
/* Make sure the timer is restarted */
- if (call->state != RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
next = call->expect_rx_by;
#define set(T) { t = READ_ONCE(T); if (time_before(t, next)) next = t; }
@@ -474,9 +533,15 @@ void rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
}
out:
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call)) {
del_timer_sync(&call->timer);
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
+ rxrpc_disconnect_call(call);
+ if (call->security)
+ call->security->free_call_crypto(call);
+ }
if (call->acks_hard_ack != call->tx_bottom)
rxrpc_shrink_call_tx_buffer(call);
_leave("");
+ return true;
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 89dcf60b1158..e9f1f49d18c2 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -50,16 +50,18 @@ void rxrpc_poke_call(struct rxrpc_call *call, enum rxrpc_call_poke_trace what)
struct rxrpc_local *local = call->local;
bool busy;
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!test_bit(RXRPC_CALL_DISCONNECTED, &call->flags)) {
spin_lock_bh(&local->lock);
busy = !list_empty(&call->attend_link);
trace_rxrpc_poke_call(call, busy, what);
+ if (!busy && !rxrpc_try_get_call(call, rxrpc_call_get_poke))
+ busy = true;
if (!busy) {
- rxrpc_get_call(call, rxrpc_call_get_poke);
list_add_tail(&call->attend_link, &local->call_attend_q);
}
spin_unlock_bh(&local->lock);
- rxrpc_wake_up_io_thread(local);
+ if (!busy)
+ rxrpc_wake_up_io_thread(local);
}
}
@@ -69,7 +71,7 @@ static void rxrpc_call_timer_expired(struct timer_list *t)
_enter("%d", call->debug_id);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
trace_rxrpc_timer_expired(call, jiffies);
rxrpc_poke_call(call, rxrpc_call_poke_timer);
}
@@ -150,7 +152,7 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
timer_setup(&call->timer, rxrpc_call_timer_expired, 0);
INIT_WORK(&call->destroyer, rxrpc_destroy_call);
INIT_LIST_HEAD(&call->link);
- INIT_LIST_HEAD(&call->chan_wait_link);
+ INIT_LIST_HEAD(&call->wait_link);
INIT_LIST_HEAD(&call->accept_link);
INIT_LIST_HEAD(&call->recvmsg_link);
INIT_LIST_HEAD(&call->sock_link);
@@ -162,13 +164,13 @@ struct rxrpc_call *rxrpc_alloc_call(struct rxrpc_sock *rx, gfp_t gfp,
init_waitqueue_head(&call->waitq);
spin_lock_init(&call->notify_lock);
spin_lock_init(&call->tx_lock);
- rwlock_init(&call->state_lock);
refcount_set(&call->ref, 1);
call->debug_id = debug_id;
call->tx_total_len = -1;
call->next_rx_timo = 20 * HZ;
call->next_req_timo = 1 * HZ;
- atomic64_set(&call->ackr_window, 0x100000001ULL);
+ call->ackr_window = 1;
+ call->ackr_wtop = 1;
memset(&call->sock_node, 0xed, sizeof(call->sock_node));
@@ -211,7 +213,6 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
now = ktime_get_real();
call->acks_latest_ts = now;
call->cong_tstamp = now;
- call->state = RXRPC_CALL_CLIENT_AWAIT_CONN;
call->dest_srx = *srx;
call->interruptibility = p->interruptibility;
call->tx_total_len = p->tx_total_len;
@@ -227,11 +228,13 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
ret = rxrpc_init_client_call_security(call);
if (ret < 0) {
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, ret);
rxrpc_put_call(call, rxrpc_call_put_discard_error);
return ERR_PTR(ret);
}
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_CONN);
+
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
p->user_call_ID, rxrpc_call_new_client);
@@ -242,7 +245,7 @@ static struct rxrpc_call *rxrpc_alloc_client_call(struct rxrpc_sock *rx,
/*
* Initiate the call ack/resend/expiry timer.
*/
-static void rxrpc_start_call_timer(struct rxrpc_call *call)
+void rxrpc_start_call_timer(struct rxrpc_call *call)
{
unsigned long now = jiffies;
unsigned long j = now + MAX_JIFFY_OFFSET;
@@ -287,6 +290,39 @@ static void rxrpc_put_call_slot(struct rxrpc_call *call)
}
/*
+ * Start the process of connecting a call. We obtain a peer and a connection
+ * bundle, but the actual association of a call with a connection is offloaded
+ * to the I/O thread to simplify locking.
+ */
+static int rxrpc_connect_call(struct rxrpc_call *call, gfp_t gfp)
+{
+ struct rxrpc_local *local = call->local;
+ int ret = -ENOMEM;
+
+ _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
+
+ call->peer = rxrpc_lookup_peer(local, &call->dest_srx, gfp);
+ if (!call->peer)
+ goto error;
+
+ ret = rxrpc_look_up_bundle(call, gfp);
+ if (ret < 0)
+ goto error;
+
+ trace_rxrpc_client(NULL, -1, rxrpc_client_queue_new_call);
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+ spin_lock(&local->client_call_lock);
+ list_add_tail(&call->wait_link, &local->new_client_calls);
+ spin_unlock(&local->client_call_lock);
+ rxrpc_wake_up_io_thread(local);
+ return 0;
+
+error:
+ __set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ return ret;
+}
+
+/*
* Set up a call for the given parameters.
* - Called with the socket lock held, which it must release.
* - If it returns a call, the call's lock will need releasing by the caller.
@@ -365,14 +401,10 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
/* Set up or get a connection record and set the protocol parameters,
* including channel number and call ID.
*/
- ret = rxrpc_connect_call(rx, call, cp, srx, gfp);
+ ret = rxrpc_connect_call(call, gfp);
if (ret < 0)
goto error_attached_to_socket;
- rxrpc_see_call(call, rxrpc_call_see_connected);
-
- rxrpc_start_call_timer(call);
-
_leave(" = %p [new]", call);
return call;
@@ -384,27 +416,23 @@ struct rxrpc_call *rxrpc_new_client_call(struct rxrpc_sock *rx,
error_dup_user_ID:
write_unlock(&rx->call_lock);
release_sock(&rx->sk);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, -EEXIST);
+ rxrpc_prefail_call(call, RXRPC_CALL_LOCAL_ERROR, -EEXIST);
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), 0,
rxrpc_call_see_userid_exists);
- rxrpc_release_call(rx, call);
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_userid_exists);
_leave(" = -EEXIST");
return ERR_PTR(-EEXIST);
/* We got an error, but the call is attached to the socket and is in
- * need of release. However, we might now race with recvmsg() when
- * completing the call queues it. Return 0 from sys_sendmsg() and
+ * need of release. However, we might now race with recvmsg() when it
+ * completion notifies the socket. Return 0 from sys_sendmsg() and
* leave the error to recvmsg() to deal with.
*/
error_attached_to_socket:
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref), ret,
rxrpc_call_see_connect_failed);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- __rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR,
- RX_CALL_DEAD, ret);
+ rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
_leave(" = c=%08x [err]", call->debug_id);
return call;
}
@@ -427,32 +455,32 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
call->call_id = sp->hdr.callNumber;
call->dest_srx.srx_service = sp->hdr.serviceId;
call->cid = sp->hdr.cid;
- call->state = RXRPC_CALL_SERVER_SECURING;
call->cong_tstamp = skb->tstamp;
+ __set_bit(RXRPC_CALL_EXPOSED, &call->flags);
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
+
spin_lock(&conn->state_lock);
switch (conn->state) {
case RXRPC_CONN_SERVICE_UNSECURED:
case RXRPC_CONN_SERVICE_CHALLENGING:
- call->state = RXRPC_CALL_SERVER_SECURING;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_SECURING);
break;
case RXRPC_CONN_SERVICE:
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
break;
- case RXRPC_CONN_REMOTELY_ABORTED:
- __rxrpc_set_call_completion(call, RXRPC_CALL_REMOTELY_ABORTED,
- conn->abort_code, conn->error);
- break;
- case RXRPC_CONN_LOCALLY_ABORTED:
- __rxrpc_abort_call("CON", call, 1,
- conn->abort_code, conn->error);
+ case RXRPC_CONN_ABORTED:
+ rxrpc_set_call_completion(call, conn->completion,
+ conn->abort_code, conn->error);
break;
default:
BUG();
}
+ rxrpc_get_call(call, rxrpc_call_get_io_thread);
+
/* Set the channel for this call. We don't get channel_lock as we're
* only defending against the data_ready handler (which we're called
* from) and the RESPONSE packet parser (which is only really
@@ -462,7 +490,7 @@ void rxrpc_incoming_call(struct rxrpc_sock *rx,
chan = sp->hdr.cid & RXRPC_CHANNELMASK;
conn->channels[chan].call_counter = call->call_id;
conn->channels[chan].call_id = call->call_id;
- rcu_assign_pointer(conn->channels[chan].call, call);
+ conn->channels[chan].call = call;
spin_unlock(&conn->state_lock);
spin_lock(&conn->peer->lock);
@@ -522,23 +550,20 @@ static void rxrpc_cleanup_ring(struct rxrpc_call *call)
void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
- bool put = false;
+ bool put = false, putu = false;
_enter("{%d,%d}", call->debug_id, refcount_read(&call->ref));
trace_rxrpc_call(call->debug_id, refcount_read(&call->ref),
call->flags, rxrpc_call_see_release);
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
-
if (test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags))
BUG();
rxrpc_put_call_slot(call);
- del_timer_sync(&call->timer);
/* Make sure we don't get any more notifications */
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (!list_empty(&call->recvmsg_link)) {
_debug("unlinking once-pending call %p { e=%lx f=%lx }",
@@ -551,7 +576,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
call->recvmsg_link.next = NULL;
call->recvmsg_link.prev = NULL;
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (put)
rxrpc_put_call(call, rxrpc_call_put_unnotify);
@@ -560,7 +585,7 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
if (test_and_clear_bit(RXRPC_CALL_HAS_USERID, &call->flags)) {
rb_erase(&call->sock_node, &rx->calls);
memset(&call->sock_node, 0xdd, sizeof(call->sock_node));
- rxrpc_put_call(call, rxrpc_call_put_userid_exists);
+ putu = true;
}
list_del(&call->sock_link);
@@ -568,10 +593,9 @@ void rxrpc_release_call(struct rxrpc_sock *rx, struct rxrpc_call *call)
_debug("RELEASE CALL %p (%d CONN %p)", call, call->debug_id, conn);
- if (conn && !test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
- rxrpc_disconnect_call(call);
- if (call->security)
- call->security->free_call_crypto(call);
+ if (putu)
+ rxrpc_put_call(call, rxrpc_call_put_userid);
+
_leave("");
}
@@ -588,7 +612,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->to_be_accepted.next,
struct rxrpc_call, accept_link);
list_del(&call->accept_link);
- rxrpc_abort_call("SKR", call, 0, RX_CALL_DEAD, -ECONNRESET);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release_tba);
rxrpc_put_call(call, rxrpc_call_put_release_sock_tba);
}
@@ -596,8 +621,8 @@ void rxrpc_release_calls_on_socket(struct rxrpc_sock *rx)
call = list_entry(rx->sock_calls.next,
struct rxrpc_call, sock_link);
rxrpc_get_call(call, rxrpc_call_get_release_sock);
- rxrpc_abort_call("SKT", call, 0, RX_CALL_DEAD, -ECONNRESET);
- rxrpc_send_abort_packet(call);
+ rxrpc_propose_abort(call, RX_CALL_DEAD, -ECONNRESET,
+ rxrpc_abort_call_sock_release);
rxrpc_release_call(rx, call);
rxrpc_put_call(call, rxrpc_call_put_release_sock);
}
@@ -620,7 +645,7 @@ void rxrpc_put_call(struct rxrpc_call *call, enum rxrpc_call_trace why)
dead = __refcount_dec_and_test(&call->ref, &r);
trace_rxrpc_call(debug_id, r - 1, 0, why);
if (dead) {
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
if (!list_empty(&call->link)) {
spin_lock(&rxnet->call_lock);
@@ -669,6 +694,8 @@ static void rxrpc_destroy_call(struct work_struct *work)
rxrpc_put_txbuf(call->tx_pending, rxrpc_txbuf_put_cleaned);
rxrpc_put_connection(call->conn, rxrpc_conn_put_call);
+ rxrpc_deactivate_bundle(call->bundle);
+ rxrpc_put_bundle(call->bundle, rxrpc_bundle_put_call);
rxrpc_put_peer(call->peer, rxrpc_peer_put_call);
rxrpc_put_local(call->local, rxrpc_local_put_call);
call_rcu(&call->rcu, rxrpc_rcu_free_call);
@@ -681,7 +708,7 @@ void rxrpc_cleanup_call(struct rxrpc_call *call)
{
memset(&call->sock_node, 0xcd, sizeof(call->sock_node));
- ASSERTCMP(call->state, ==, RXRPC_CALL_COMPLETE);
+ ASSERTCMP(__rxrpc_call_state(call), ==, RXRPC_CALL_COMPLETE);
ASSERT(test_bit(RXRPC_CALL_RELEASED, &call->flags));
del_timer(&call->timer);
@@ -719,7 +746,7 @@ void rxrpc_destroy_all_calls(struct rxrpc_net *rxnet)
pr_err("Call %p still in use (%d,%s,%lx,%lx)!\n",
call, refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[__rxrpc_call_state(call)],
call->flags, call->events);
spin_unlock(&rxnet->call_lock);
diff --git a/net/rxrpc/call_state.c b/net/rxrpc/call_state.c
new file mode 100644
index 000000000000..6afb54373ebb
--- /dev/null
+++ b/net/rxrpc/call_state.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Call state changing functions.
+ *
+ * Copyright (C) 2022 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells (dhowells@redhat.com)
+ */
+
+#include "ar-internal.h"
+
+/*
+ * Transition a call to the complete state.
+ */
+bool rxrpc_set_call_completion(struct rxrpc_call *call,
+ enum rxrpc_call_completion compl,
+ u32 abort_code,
+ int error)
+{
+ if (__rxrpc_call_state(call) == RXRPC_CALL_COMPLETE)
+ return false;
+
+ call->abort_code = abort_code;
+ call->error = error;
+ call->completion = compl;
+ /* Allow reader of completion state to operate locklessly */
+ rxrpc_set_call_state(call, RXRPC_CALL_COMPLETE);
+ trace_rxrpc_call_complete(call);
+ wake_up(&call->waitq);
+ rxrpc_notify_socket(call);
+ return true;
+}
+
+/*
+ * Record that a call successfully completed.
+ */
+bool rxrpc_call_completed(struct rxrpc_call *call)
+{
+ return rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
+}
+
+/*
+ * Record that a call is locally aborted.
+ */
+bool rxrpc_abort_call(struct rxrpc_call *call, rxrpc_seq_t seq,
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
+{
+ trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
+ abort_code, error);
+ if (!rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
+ abort_code, error))
+ return false;
+ if (test_bit(RXRPC_CALL_EXPOSED, &call->flags))
+ rxrpc_send_abort_packet(call);
+ return true;
+}
+
+/*
+ * Record that a call errored out before even getting off the ground, thereby
+ * setting the state to allow it to be destroyed.
+ */
+void rxrpc_prefail_call(struct rxrpc_call *call, enum rxrpc_call_completion compl,
+ int error)
+{
+ call->abort_code = RX_CALL_DEAD;
+ call->error = error;
+ call->completion = compl;
+ call->_state = RXRPC_CALL_COMPLETE;
+ trace_rxrpc_call_complete(call);
+ WARN_ON_ONCE(__test_and_set_bit(RXRPC_CALL_RELEASED, &call->flags));
+}
diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c
index 87efa0373aed..981ca5b98bcb 100644
--- a/net/rxrpc/conn_client.c
+++ b/net/rxrpc/conn_client.c
@@ -34,104 +34,59 @@ __read_mostly unsigned int rxrpc_reap_client_connections = 900;
__read_mostly unsigned long rxrpc_conn_idle_client_expiry = 2 * 60 * HZ;
__read_mostly unsigned long rxrpc_conn_idle_client_fast_expiry = 2 * HZ;
-/*
- * We use machine-unique IDs for our client connections.
- */
-DEFINE_IDR(rxrpc_client_conn_ids);
-static DEFINE_SPINLOCK(rxrpc_conn_id_lock);
-
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle);
-
-/*
- * Get a connection ID and epoch for a client connection from the global pool.
- * The connection struct pointer is then recorded in the idr radix tree. The
- * epoch doesn't change until the client is rebooted (or, at least, unless the
- * module is unloaded).
- */
-static int rxrpc_get_client_connection_id(struct rxrpc_connection *conn,
- gfp_t gfp)
+static void rxrpc_activate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_net *rxnet = conn->rxnet;
- int id;
-
- _enter("");
-
- idr_preload(gfp);
- spin_lock(&rxrpc_conn_id_lock);
-
- id = idr_alloc_cyclic(&rxrpc_client_conn_ids, conn,
- 1, 0x40000000, GFP_NOWAIT);
- if (id < 0)
- goto error;
-
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
-
- conn->proto.epoch = rxnet->epoch;
- conn->proto.cid = id << RXRPC_CIDSHIFT;
- set_bit(RXRPC_CONN_HAS_IDR, &conn->flags);
- _leave(" [CID %x]", conn->proto.cid);
- return 0;
-
-error:
- spin_unlock(&rxrpc_conn_id_lock);
- idr_preload_end();
- _leave(" = %d", id);
- return id;
+ atomic_inc(&bundle->active);
}
/*
- * Release a connection ID for a client connection from the global pool.
+ * Release a connection ID for a client connection.
*/
-static void rxrpc_put_client_connection_id(struct rxrpc_connection *conn)
+static void rxrpc_put_client_connection_id(struct rxrpc_local *local,
+ struct rxrpc_connection *conn)
{
- if (test_bit(RXRPC_CONN_HAS_IDR, &conn->flags)) {
- spin_lock(&rxrpc_conn_id_lock);
- idr_remove(&rxrpc_client_conn_ids,
- conn->proto.cid >> RXRPC_CIDSHIFT);
- spin_unlock(&rxrpc_conn_id_lock);
- }
+ idr_remove(&local->conn_ids, conn->proto.cid >> RXRPC_CIDSHIFT);
}
/*
* Destroy the client connection ID tree.
*/
-void rxrpc_destroy_client_conn_ids(void)
+static void rxrpc_destroy_client_conn_ids(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
int id;
- if (!idr_is_empty(&rxrpc_client_conn_ids)) {
- idr_for_each_entry(&rxrpc_client_conn_ids, conn, id) {
+ if (!idr_is_empty(&local->conn_ids)) {
+ idr_for_each_entry(&local->conn_ids, conn, id) {
pr_err("AF_RXRPC: Leaked client conn %p {%d}\n",
conn, refcount_read(&conn->ref));
}
BUG();
}
- idr_destroy(&rxrpc_client_conn_ids);
+ idr_destroy(&local->conn_ids);
}
/*
* Allocate a connection bundle.
*/
-static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_conn_parameters *cp,
+static struct rxrpc_bundle *rxrpc_alloc_bundle(struct rxrpc_call *call,
gfp_t gfp)
{
struct rxrpc_bundle *bundle;
bundle = kzalloc(sizeof(*bundle), gfp);
if (bundle) {
- bundle->local = cp->local;
- bundle->peer = rxrpc_get_peer(cp->peer, rxrpc_peer_get_bundle);
- bundle->key = cp->key;
- bundle->exclusive = cp->exclusive;
- bundle->upgrade = cp->upgrade;
- bundle->service_id = cp->service_id;
- bundle->security_level = cp->security_level;
+ bundle->local = call->local;
+ bundle->peer = rxrpc_get_peer(call->peer, rxrpc_peer_get_bundle);
+ bundle->key = key_get(call->key);
+ bundle->security = call->security;
+ bundle->exclusive = test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags);
+ bundle->upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
+ bundle->service_id = call->dest_srx.srx_service;
+ bundle->security_level = call->security_level;
refcount_set(&bundle->ref, 1);
atomic_set(&bundle->active, 1);
- spin_lock_init(&bundle->channel_lock);
INIT_LIST_HEAD(&bundle->waiting_calls);
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_new);
}
@@ -152,84 +107,87 @@ static void rxrpc_free_bundle(struct rxrpc_bundle *bundle)
{
trace_rxrpc_bundle(bundle->debug_id, 1, rxrpc_bundle_free);
rxrpc_put_peer(bundle->peer, rxrpc_peer_put_bundle);
+ key_put(bundle->key);
kfree(bundle);
}
void rxrpc_put_bundle(struct rxrpc_bundle *bundle, enum rxrpc_bundle_trace why)
{
- unsigned int id = bundle->debug_id;
+ unsigned int id;
bool dead;
int r;
- dead = __refcount_dec_and_test(&bundle->ref, &r);
- trace_rxrpc_bundle(id, r - 1, why);
- if (dead)
- rxrpc_free_bundle(bundle);
+ if (bundle) {
+ id = bundle->debug_id;
+ dead = __refcount_dec_and_test(&bundle->ref, &r);
+ trace_rxrpc_bundle(id, r - 1, why);
+ if (dead)
+ rxrpc_free_bundle(bundle);
+ }
+}
+
+/*
+ * Get rid of outstanding client connection preallocations when a local
+ * endpoint is destroyed.
+ */
+void rxrpc_purge_client_connections(struct rxrpc_local *local)
+{
+ rxrpc_destroy_client_conn_ids(local);
}
/*
* Allocate a client connection.
*/
static struct rxrpc_connection *
-rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle, gfp_t gfp)
+rxrpc_alloc_client_connection(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- int ret;
+ struct rxrpc_local *local = bundle->local;
+ struct rxrpc_net *rxnet = local->rxnet;
+ int id;
_enter("");
- conn = rxrpc_alloc_connection(rxnet, gfp);
- if (!conn) {
- _leave(" = -ENOMEM");
+ conn = rxrpc_alloc_connection(rxnet, GFP_ATOMIC | __GFP_NOWARN);
+ if (!conn)
return ERR_PTR(-ENOMEM);
+
+ id = idr_alloc_cyclic(&local->conn_ids, conn, 1, 0x40000000,
+ GFP_ATOMIC | __GFP_NOWARN);
+ if (id < 0) {
+ kfree(conn);
+ return ERR_PTR(id);
}
refcount_set(&conn->ref, 1);
- conn->bundle = bundle;
- conn->local = bundle->local;
- conn->peer = bundle->peer;
- conn->key = bundle->key;
+ conn->proto.cid = id << RXRPC_CIDSHIFT;
+ conn->proto.epoch = local->rxnet->epoch;
+ conn->out_clientflag = RXRPC_CLIENT_INITIATED;
+ conn->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
+ conn->local = rxrpc_get_local(bundle->local, rxrpc_local_get_client_conn);
+ conn->peer = rxrpc_get_peer(bundle->peer, rxrpc_peer_get_client_conn);
+ conn->key = key_get(bundle->key);
+ conn->security = bundle->security;
conn->exclusive = bundle->exclusive;
conn->upgrade = bundle->upgrade;
conn->orig_service_id = bundle->service_id;
conn->security_level = bundle->security_level;
- conn->out_clientflag = RXRPC_CLIENT_INITIATED;
- conn->state = RXRPC_CONN_CLIENT;
+ conn->state = RXRPC_CONN_CLIENT_UNSECURED;
conn->service_id = conn->orig_service_id;
- ret = rxrpc_get_client_connection_id(conn, gfp);
- if (ret < 0)
- goto error_0;
-
- ret = rxrpc_init_client_conn_security(conn);
- if (ret < 0)
- goto error_1;
+ if (conn->security == &rxrpc_no_security)
+ conn->state = RXRPC_CONN_CLIENT;
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
list_add_tail(&conn->proc_link, &rxnet->conn_proc_list);
write_unlock(&rxnet->conn_lock);
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_conn);
- rxrpc_get_peer(conn->peer, rxrpc_peer_get_client_conn);
- rxrpc_get_local(conn->local, rxrpc_local_get_client_conn);
- key_get(conn->key);
-
- trace_rxrpc_conn(conn->debug_id, refcount_read(&conn->ref),
- rxrpc_conn_new_client);
+ rxrpc_see_connection(conn, rxrpc_conn_new_client);
atomic_inc(&rxnet->nr_client_conns);
trace_rxrpc_client(conn, -1, rxrpc_client_alloc);
- _leave(" = %p", conn);
return conn;
-
-error_1:
- rxrpc_put_client_connection_id(conn);
-error_0:
- kfree(conn);
- _leave(" = %d", ret);
- return ERR_PTR(ret);
}
/*
@@ -247,7 +205,8 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
if (test_bit(RXRPC_CONN_DONT_REUSE, &conn->flags))
goto dont_reuse;
- if (conn->state != RXRPC_CONN_CLIENT ||
+ if ((conn->state != RXRPC_CONN_CLIENT_UNSECURED &&
+ conn->state != RXRPC_CONN_CLIENT) ||
conn->proto.epoch != rxnet->epoch)
goto mark_dont_reuse;
@@ -257,7 +216,7 @@ static bool rxrpc_may_reuse_conn(struct rxrpc_connection *conn)
* times the maximum number of client conns away from the current
* allocation point to try and keep the IDs concentrated.
*/
- id_cursor = idr_get_cursor(&rxrpc_client_conn_ids);
+ id_cursor = idr_get_cursor(&conn->local->conn_ids);
id = conn->proto.cid >> RXRPC_CIDSHIFT;
distance = id - id_cursor;
if (distance < 0)
@@ -278,20 +237,23 @@ dont_reuse:
* Look up the conn bundle that matches the connection parameters, adding it if
* it doesn't yet exist.
*/
-static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *cp,
- gfp_t gfp)
+int rxrpc_look_up_bundle(struct rxrpc_call *call, gfp_t gfp)
{
static atomic_t rxrpc_bundle_id;
struct rxrpc_bundle *bundle, *candidate;
- struct rxrpc_local *local = cp->local;
+ struct rxrpc_local *local = call->local;
struct rb_node *p, **pp, *parent;
long diff;
+ bool upgrade = test_bit(RXRPC_CALL_UPGRADE, &call->flags);
_enter("{%px,%x,%u,%u}",
- cp->peer, key_serial(cp->key), cp->security_level, cp->upgrade);
+ call->peer, key_serial(call->key), call->security_level,
+ upgrade);
- if (cp->exclusive)
- return rxrpc_alloc_bundle(cp, gfp);
+ if (test_bit(RXRPC_CALL_EXCLUSIVE, &call->flags)) {
+ call->bundle = rxrpc_alloc_bundle(call, gfp);
+ return call->bundle ? 0 : -ENOMEM;
+ }
/* First, see if the bundle is already there. */
_debug("search 1");
@@ -300,11 +262,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
while (p) {
bundle = rb_entry(p, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
p = p->rb_left;
@@ -317,9 +279,9 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
_debug("not found");
/* It wasn't. We need to add one. */
- candidate = rxrpc_alloc_bundle(cp, gfp);
+ candidate = rxrpc_alloc_bundle(call, gfp);
if (!candidate)
- return NULL;
+ return -ENOMEM;
_debug("search 2");
spin_lock(&local->client_bundles_lock);
@@ -329,11 +291,11 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
parent = *pp;
bundle = rb_entry(parent, struct rxrpc_bundle, local_node);
-#define cmp(X) ((long)bundle->X - (long)cp->X)
- diff = (cmp(peer) ?:
- cmp(key) ?:
- cmp(security_level) ?:
- cmp(upgrade));
+#define cmp(X, Y) ((long)(X) - (long)(Y))
+ diff = (cmp(bundle->peer, call->peer) ?:
+ cmp(bundle->key, call->key) ?:
+ cmp(bundle->security_level, call->security_level) ?:
+ cmp(bundle->upgrade, upgrade));
#undef cmp
if (diff < 0)
pp = &(*pp)->rb_left;
@@ -347,178 +309,89 @@ static struct rxrpc_bundle *rxrpc_look_up_bundle(struct rxrpc_conn_parameters *c
candidate->debug_id = atomic_inc_return(&rxrpc_bundle_id);
rb_link_node(&candidate->local_node, parent, pp);
rb_insert_color(&candidate->local_node, &local->client_bundles);
- rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
+ call->bundle = rxrpc_get_bundle(candidate, rxrpc_bundle_get_client_call);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [new]", candidate->debug_id);
- return candidate;
+ _leave(" = B=%u [new]", call->bundle->debug_id);
+ return 0;
found_bundle_free:
rxrpc_free_bundle(candidate);
found_bundle:
- rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
- atomic_inc(&bundle->active);
+ call->bundle = rxrpc_get_bundle(bundle, rxrpc_bundle_get_client_call);
+ rxrpc_activate_bundle(bundle);
spin_unlock(&local->client_bundles_lock);
- _leave(" = %u [found]", bundle->debug_id);
- return bundle;
-}
-
-/*
- * Create or find a client bundle to use for a call.
- *
- * If we return with a connection, the call will be on its waiting list. It's
- * left to the caller to assign a channel and wake up the call.
- */
-static struct rxrpc_bundle *rxrpc_prep_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
-{
- struct rxrpc_bundle *bundle;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- cp->peer = rxrpc_lookup_peer(rx, cp->local, srx, gfp);
- if (!cp->peer)
- goto error;
-
- call->tx_last_sent = ktime_get_real();
- call->cong_ssthresh = cp->peer->cong_ssthresh;
- if (call->cong_cwnd >= call->cong_ssthresh)
- call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
- else
- call->cong_mode = RXRPC_CALL_SLOW_START;
- if (cp->upgrade)
- __set_bit(RXRPC_CALL_UPGRADE, &call->flags);
-
- /* Find the client connection bundle. */
- bundle = rxrpc_look_up_bundle(cp, gfp);
- if (!bundle)
- goto error;
-
- /* Get this call queued. Someone else may activate it whilst we're
- * lining up a new connection, but that's fine.
- */
- spin_lock(&bundle->channel_lock);
- list_add_tail(&call->chan_wait_link, &bundle->waiting_calls);
- spin_unlock(&bundle->channel_lock);
-
- _leave(" = [B=%x]", bundle->debug_id);
- return bundle;
-
-error:
- _leave(" = -ENOMEM");
- return ERR_PTR(-ENOMEM);
+ _leave(" = B=%u [found]", call->bundle->debug_id);
+ return 0;
}
/*
* Allocate a new connection and add it into a bundle.
*/
-static void rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle, gfp_t gfp)
- __releases(bundle->channel_lock)
+static bool rxrpc_add_conn_to_bundle(struct rxrpc_bundle *bundle,
+ unsigned int slot)
{
- struct rxrpc_connection *candidate = NULL, *old = NULL;
- bool conflict;
- int i;
-
- _enter("");
-
- conflict = bundle->alloc_conn;
- if (!conflict)
- bundle->alloc_conn = true;
- spin_unlock(&bundle->channel_lock);
- if (conflict) {
- _leave(" [conf]");
- return;
- }
-
- candidate = rxrpc_alloc_client_connection(bundle, gfp);
-
- spin_lock(&bundle->channel_lock);
- bundle->alloc_conn = false;
-
- if (IS_ERR(candidate)) {
- bundle->alloc_error = PTR_ERR(candidate);
- spin_unlock(&bundle->channel_lock);
- _leave(" [err %ld]", PTR_ERR(candidate));
- return;
- }
-
- bundle->alloc_error = 0;
-
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
- unsigned int shift = i * RXRPC_MAXCALLS;
- int j;
-
- old = bundle->conns[i];
- if (!rxrpc_may_reuse_conn(old)) {
- if (old)
- trace_rxrpc_client(old, -1, rxrpc_client_replace);
- candidate->bundle_shift = shift;
- atomic_inc(&bundle->active);
- bundle->conns[i] = candidate;
- for (j = 0; j < RXRPC_MAXCALLS; j++)
- set_bit(shift + j, &bundle->avail_chans);
- candidate = NULL;
- break;
- }
+ struct rxrpc_connection *conn, *old;
+ unsigned int shift = slot * RXRPC_MAXCALLS;
+ unsigned int i;
- old = NULL;
+ old = bundle->conns[slot];
+ if (old) {
+ bundle->conns[slot] = NULL;
+ trace_rxrpc_client(old, -1, rxrpc_client_replace);
+ rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
}
- spin_unlock(&bundle->channel_lock);
-
- if (candidate) {
- _debug("discard C=%x", candidate->debug_id);
- trace_rxrpc_client(candidate, -1, rxrpc_client_duplicate);
- rxrpc_put_connection(candidate, rxrpc_conn_put_discard);
+ conn = rxrpc_alloc_client_connection(bundle);
+ if (IS_ERR(conn)) {
+ bundle->alloc_error = PTR_ERR(conn);
+ return false;
}
- rxrpc_put_connection(old, rxrpc_conn_put_noreuse);
- _leave("");
+ rxrpc_activate_bundle(bundle);
+ conn->bundle_shift = shift;
+ bundle->conns[slot] = conn;
+ for (i = 0; i < RXRPC_MAXCALLS; i++)
+ set_bit(shift + i, &bundle->avail_chans);
+ return true;
}
/*
* Add a connection to a bundle if there are no usable connections or we have
* connections waiting for extra capacity.
*/
-static void rxrpc_maybe_add_conn(struct rxrpc_bundle *bundle, gfp_t gfp)
+static bool rxrpc_bundle_has_space(struct rxrpc_bundle *bundle)
{
- struct rxrpc_call *call;
- int i, usable;
+ int slot = -1, i, usable;
_enter("");
- spin_lock(&bundle->channel_lock);
+ bundle->alloc_error = 0;
/* See if there are any usable connections. */
usable = 0;
- for (i = 0; i < ARRAY_SIZE(bundle->conns); i++)
+ for (i = 0; i < ARRAY_SIZE(bundle->conns); i++) {
if (rxrpc_may_reuse_conn(bundle->conns[i]))
usable++;
-
- if (!usable && !list_empty(&bundle->waiting_calls)) {
- call = list_first_entry(&bundle->waiting_calls,
- struct rxrpc_call, chan_wait_link);
- if (test_bit(RXRPC_CALL_UPGRADE, &call->flags))
- bundle->try_upgrade = true;
+ else if (slot == -1)
+ slot = i;
}
+ if (!usable && bundle->upgrade)
+ bundle->try_upgrade = true;
+
if (!usable)
goto alloc_conn;
if (!bundle->avail_chans &&
!bundle->try_upgrade &&
- !list_empty(&bundle->waiting_calls) &&
usable < ARRAY_SIZE(bundle->conns))
goto alloc_conn;
- spin_unlock(&bundle->channel_lock);
_leave("");
- return;
+ return usable;
alloc_conn:
- return rxrpc_add_conn_to_bundle(bundle, gfp);
+ return slot >= 0 ? rxrpc_add_conn_to_bundle(bundle, slot) : false;
}
/*
@@ -532,11 +405,13 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
struct rxrpc_channel *chan = &conn->channels[channel];
struct rxrpc_bundle *bundle = conn->bundle;
struct rxrpc_call *call = list_entry(bundle->waiting_calls.next,
- struct rxrpc_call, chan_wait_link);
+ struct rxrpc_call, wait_link);
u32 call_id = chan->call_counter + 1;
_enter("C=%x,%u", conn->debug_id, channel);
+ list_del_init(&call->wait_link);
+
trace_rxrpc_client(conn, channel, rxrpc_client_chan_activate);
/* Cancel the final ACK on the previous call if it hasn't been sent yet
@@ -546,68 +421,50 @@ static void rxrpc_activate_one_channel(struct rxrpc_connection *conn,
clear_bit(conn->bundle_shift + channel, &bundle->avail_chans);
rxrpc_see_call(call, rxrpc_call_see_activate_client);
- list_del_init(&call->chan_wait_link);
- call->peer = rxrpc_get_peer(conn->peer, rxrpc_peer_get_activate_call);
call->conn = rxrpc_get_connection(conn, rxrpc_conn_get_activate_call);
call->cid = conn->proto.cid | channel;
call->call_id = call_id;
call->dest_srx.srx_service = conn->service_id;
-
- trace_rxrpc_connect_call(call);
-
- write_lock(&call->state_lock);
- call->state = RXRPC_CALL_CLIENT_SEND_REQUEST;
- write_unlock(&call->state_lock);
-
- /* Paired with the read barrier in rxrpc_connect_call(). This orders
- * cid and epoch in the connection wrt to call_id without the need to
- * take the channel_lock.
- *
- * We provisionally assign a callNumber at this point, but we don't
- * confirm it until the call is about to be exposed.
- *
- * TODO: Pair with a barrier in the data_ready handler when that looks
- * at the call ID through a connection channel.
- */
- smp_wmb();
+ call->cong_ssthresh = call->peer->cong_ssthresh;
+ if (call->cong_cwnd >= call->cong_ssthresh)
+ call->cong_mode = RXRPC_CALL_CONGEST_AVOIDANCE;
+ else
+ call->cong_mode = RXRPC_CALL_SLOW_START;
chan->call_id = call_id;
chan->call_debug_id = call->debug_id;
- rcu_assign_pointer(chan->call, call);
+ chan->call = call;
+
+ rxrpc_see_call(call, rxrpc_call_see_connected);
+ trace_rxrpc_connect_call(call);
+ call->tx_last_sent = ktime_get_real();
+ rxrpc_start_call_timer(call);
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_SEND_REQUEST);
wake_up(&call->waitq);
}
/*
* Remove a connection from the idle list if it's on it.
*/
-static void rxrpc_unidle_conn(struct rxrpc_bundle *bundle, struct rxrpc_connection *conn)
+static void rxrpc_unidle_conn(struct rxrpc_connection *conn)
{
- struct rxrpc_net *rxnet = bundle->local->rxnet;
- bool drop_ref;
-
if (!list_empty(&conn->cache_link)) {
- drop_ref = false;
- spin_lock(&rxnet->client_conn_cache_lock);
- if (!list_empty(&conn->cache_link)) {
- list_del_init(&conn->cache_link);
- drop_ref = true;
- }
- spin_unlock(&rxnet->client_conn_cache_lock);
- if (drop_ref)
- rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
+ list_del_init(&conn->cache_link);
+ rxrpc_put_connection(conn, rxrpc_conn_put_unidle);
}
}
/*
- * Assign channels and callNumbers to waiting calls with channel_lock
- * held by caller.
+ * Assign channels and callNumbers to waiting calls.
*/
-static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
+static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
{
struct rxrpc_connection *conn;
unsigned long avail, mask;
unsigned int channel, slot;
+ trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
+
if (bundle->try_upgrade)
mask = 1;
else
@@ -627,7 +484,7 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
if (bundle->try_upgrade)
set_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags);
- rxrpc_unidle_conn(bundle, conn);
+ rxrpc_unidle_conn(conn);
channel &= (RXRPC_MAXCALLS - 1);
conn->act_chans |= 1 << channel;
@@ -636,132 +493,24 @@ static void rxrpc_activate_channels_locked(struct rxrpc_bundle *bundle)
}
/*
- * Assign channels and callNumbers to waiting calls.
- */
-static void rxrpc_activate_channels(struct rxrpc_bundle *bundle)
-{
- _enter("B=%x", bundle->debug_id);
-
- trace_rxrpc_client(NULL, -1, rxrpc_client_activate_chans);
-
- if (!bundle->avail_chans)
- return;
-
- spin_lock(&bundle->channel_lock);
- rxrpc_activate_channels_locked(bundle);
- spin_unlock(&bundle->channel_lock);
- _leave("");
-}
-
-/*
- * Wait for a callNumber and a channel to be granted to a call.
- */
-static int rxrpc_wait_for_channel(struct rxrpc_bundle *bundle,
- struct rxrpc_call *call, gfp_t gfp)
-{
- DECLARE_WAITQUEUE(myself, current);
- int ret = 0;
-
- _enter("%d", call->debug_id);
-
- if (!gfpflags_allow_blocking(gfp)) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error ?: -EAGAIN;
- goto out;
- }
-
- add_wait_queue_exclusive(&call->waitq, &myself);
- for (;;) {
- rxrpc_maybe_add_conn(bundle, gfp);
- rxrpc_activate_channels(bundle);
- ret = bundle->alloc_error;
- if (ret < 0)
- break;
-
- switch (call->interruptibility) {
- case RXRPC_INTERRUPTIBLE:
- case RXRPC_PREINTERRUPTIBLE:
- set_current_state(TASK_INTERRUPTIBLE);
- break;
- case RXRPC_UNINTERRUPTIBLE:
- default:
- set_current_state(TASK_UNINTERRUPTIBLE);
- break;
- }
- if (READ_ONCE(call->state) != RXRPC_CALL_CLIENT_AWAIT_CONN)
- break;
- if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
- call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
- signal_pending(current)) {
- ret = -ERESTARTSYS;
- break;
- }
- schedule();
- }
- remove_wait_queue(&call->waitq, &myself);
- __set_current_state(TASK_RUNNING);
-
-out:
- _leave(" = %d", ret);
- return ret;
-}
-
-/*
- * find a connection for a call
- * - called in process context with IRQs enabled
+ * Connect waiting channels (called from the I/O thread).
*/
-int rxrpc_connect_call(struct rxrpc_sock *rx,
- struct rxrpc_call *call,
- struct rxrpc_conn_parameters *cp,
- struct sockaddr_rxrpc *srx,
- gfp_t gfp)
+void rxrpc_connect_client_calls(struct rxrpc_local *local)
{
- struct rxrpc_bundle *bundle;
- struct rxrpc_net *rxnet = cp->local->rxnet;
- int ret = 0;
-
- _enter("{%d,%lx},", call->debug_id, call->user_call_ID);
-
- rxrpc_discard_expired_client_conns(&rxnet->client_conn_reaper);
+ struct rxrpc_call *call;
- bundle = rxrpc_prep_call(rx, call, cp, srx, gfp);
- if (IS_ERR(bundle)) {
- ret = PTR_ERR(bundle);
- goto out;
- }
+ while ((call = list_first_entry_or_null(&local->new_client_calls,
+ struct rxrpc_call, wait_link))
+ ) {
+ struct rxrpc_bundle *bundle = call->bundle;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = rxrpc_wait_for_channel(bundle, call, gfp);
- if (ret < 0)
- goto wait_failed;
- }
+ spin_lock(&local->client_call_lock);
+ list_move_tail(&call->wait_link, &bundle->waiting_calls);
+ spin_unlock(&local->client_call_lock);
-granted_channel:
- /* Paired with the write barrier in rxrpc_activate_one_channel(). */
- smp_rmb();
-
-out_put_bundle:
- rxrpc_deactivate_bundle(bundle);
- rxrpc_put_bundle(bundle, rxrpc_bundle_get_client_call);
-out:
- _leave(" = %d", ret);
- return ret;
-
-wait_failed:
- spin_lock(&bundle->channel_lock);
- list_del_init(&call->chan_wait_link);
- spin_unlock(&bundle->channel_lock);
-
- if (call->state != RXRPC_CALL_CLIENT_AWAIT_CONN) {
- ret = 0;
- goto granted_channel;
+ if (rxrpc_bundle_has_space(bundle))
+ rxrpc_activate_channels(bundle);
}
-
- trace_rxrpc_client(call->conn, ret, rxrpc_client_chan_wait_failed);
- rxrpc_set_call_completion(call, RXRPC_CALL_LOCAL_ERROR, 0, ret);
- rxrpc_disconnect_client_call(bundle, call);
- goto out_put_bundle;
}
/*
@@ -794,14 +543,14 @@ void rxrpc_expose_client_call(struct rxrpc_call *call)
/*
* Set the reap timer.
*/
-static void rxrpc_set_client_reap_timer(struct rxrpc_net *rxnet)
+static void rxrpc_set_client_reap_timer(struct rxrpc_local *local)
{
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
unsigned long now = jiffies;
unsigned long reap_at = now + rxrpc_conn_idle_client_expiry;
- if (rxnet->live)
- timer_reduce(&rxnet->client_conn_reap_timer, reap_at);
+ if (local->rxnet->live)
+ timer_reduce(&local->client_conn_reap_timer, reap_at);
}
}
@@ -812,16 +561,13 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
{
struct rxrpc_connection *conn;
struct rxrpc_channel *chan = NULL;
- struct rxrpc_net *rxnet = bundle->local->rxnet;
+ struct rxrpc_local *local = bundle->local;
unsigned int channel;
bool may_reuse;
u32 cid;
_enter("c=%x", call->debug_id);
- spin_lock(&bundle->channel_lock);
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
-
/* Calls that have never actually been assigned a channel can simply be
* discarded.
*/
@@ -830,8 +576,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
_debug("call is waiting");
ASSERTCMP(call->call_id, ==, 0);
ASSERT(!test_bit(RXRPC_CALL_EXPOSED, &call->flags));
- list_del_init(&call->chan_wait_link);
- goto out;
+ list_del_init(&call->wait_link);
+ return;
}
cid = call->cid;
@@ -839,10 +585,8 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
chan = &conn->channels[channel];
trace_rxrpc_client(conn, channel, rxrpc_client_chan_disconnect);
- if (rcu_access_pointer(chan->call) != call) {
- spin_unlock(&bundle->channel_lock);
- BUG();
- }
+ if (WARN_ON(chan->call != call))
+ return;
may_reuse = rxrpc_may_reuse_conn(conn);
@@ -863,16 +607,15 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
trace_rxrpc_client(conn, channel, rxrpc_client_to_active);
bundle->try_upgrade = false;
if (may_reuse)
- rxrpc_activate_channels_locked(bundle);
+ rxrpc_activate_channels(bundle);
}
-
}
/* See if we can pass the channel directly to another call. */
if (may_reuse && !list_empty(&bundle->waiting_calls)) {
trace_rxrpc_client(conn, channel, rxrpc_client_chan_pass);
rxrpc_activate_one_channel(conn, channel);
- goto out;
+ return;
}
/* Schedule the final ACK to be transmitted in a short while so that it
@@ -890,7 +633,7 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
}
/* Deactivate the channel. */
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
set_bit(conn->bundle_shift + channel, &conn->bundle->avail_chans);
conn->act_chans &= ~(1 << channel);
@@ -903,17 +646,10 @@ void rxrpc_disconnect_client_call(struct rxrpc_bundle *bundle, struct rxrpc_call
conn->idle_timestamp = jiffies;
rxrpc_get_connection(conn, rxrpc_conn_get_idle);
- spin_lock(&rxnet->client_conn_cache_lock);
- list_move_tail(&conn->cache_link, &rxnet->idle_client_conns);
- spin_unlock(&rxnet->client_conn_cache_lock);
+ list_move_tail(&conn->cache_link, &local->idle_client_conns);
- rxrpc_set_client_reap_timer(rxnet);
+ rxrpc_set_client_reap_timer(local);
}
-
-out:
- spin_unlock(&bundle->channel_lock);
- _leave("");
- return;
}
/*
@@ -923,7 +659,6 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
{
struct rxrpc_bundle *bundle = conn->bundle;
unsigned int bindex;
- bool need_drop = false;
int i;
_enter("C=%x", conn->debug_id);
@@ -931,18 +666,13 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
rxrpc_process_delayed_final_acks(conn, true);
- spin_lock(&bundle->channel_lock);
bindex = conn->bundle_shift / RXRPC_MAXCALLS;
if (bundle->conns[bindex] == conn) {
_debug("clear slot %u", bindex);
bundle->conns[bindex] = NULL;
for (i = 0; i < RXRPC_MAXCALLS; i++)
clear_bit(conn->bundle_shift + i, &bundle->avail_chans);
- need_drop = true;
- }
- spin_unlock(&bundle->channel_lock);
-
- if (need_drop) {
+ rxrpc_put_client_connection_id(bundle->local, conn);
rxrpc_deactivate_bundle(bundle);
rxrpc_put_connection(conn, rxrpc_conn_put_unbundle);
}
@@ -951,11 +681,15 @@ static void rxrpc_unbundle_conn(struct rxrpc_connection *conn)
/*
* Drop the active count on a bundle.
*/
-static void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
+void rxrpc_deactivate_bundle(struct rxrpc_bundle *bundle)
{
- struct rxrpc_local *local = bundle->local;
+ struct rxrpc_local *local;
bool need_put = false;
+ if (!bundle)
+ return;
+
+ local = bundle->local;
if (atomic_dec_and_lock(&bundle->active, &local->client_bundles_lock)) {
if (!bundle->exclusive) {
_debug("erase bundle");
@@ -982,7 +716,7 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
trace_rxrpc_client(conn, -1, rxrpc_client_cleanup);
atomic_dec(&rxnet->nr_client_conns);
- rxrpc_put_client_connection_id(conn);
+ rxrpc_put_client_connection_id(local, conn);
}
/*
@@ -992,42 +726,26 @@ void rxrpc_kill_client_conn(struct rxrpc_connection *conn)
* This may be called from conn setup or from a work item so cannot be
* considered non-reentrant.
*/
-void rxrpc_discard_expired_client_conns(struct work_struct *work)
+void rxrpc_discard_expired_client_conns(struct rxrpc_local *local)
{
struct rxrpc_connection *conn;
- struct rxrpc_net *rxnet =
- container_of(work, struct rxrpc_net, client_conn_reaper);
unsigned long expiry, conn_expires_at, now;
unsigned int nr_conns;
_enter("");
- if (list_empty(&rxnet->idle_client_conns)) {
- _leave(" [empty]");
- return;
- }
-
- /* Don't double up on the discarding */
- if (!mutex_trylock(&rxnet->client_conn_discard_lock)) {
- _leave(" [already]");
- return;
- }
-
/* We keep an estimate of what the number of conns ought to be after
* we've discarded some so that we don't overdo the discarding.
*/
- nr_conns = atomic_read(&rxnet->nr_client_conns);
+ nr_conns = atomic_read(&local->rxnet->nr_client_conns);
next:
- spin_lock(&rxnet->client_conn_cache_lock);
-
- if (list_empty(&rxnet->idle_client_conns))
- goto out;
-
- conn = list_entry(rxnet->idle_client_conns.next,
- struct rxrpc_connection, cache_link);
+ conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link);
+ if (!conn)
+ return;
- if (!rxnet->kill_all_client_conns) {
+ if (!local->kill_all_client_conns) {
/* If the number of connections is over the reap limit, we
* expedite discard by reducing the expiry timeout. We must,
* however, have at least a short grace period to be able to do
@@ -1050,8 +768,6 @@ next:
trace_rxrpc_client(conn, -1, rxrpc_client_discard);
list_del_init(&conn->cache_link);
- spin_unlock(&rxnet->client_conn_cache_lock);
-
rxrpc_unbundle_conn(conn);
/* Drop the ->cache_link ref */
rxrpc_put_connection(conn, rxrpc_conn_put_discard_idle);
@@ -1068,31 +784,8 @@ not_yet_expired:
* then things get messier.
*/
_debug("not yet");
- if (!rxnet->kill_all_client_conns)
- timer_reduce(&rxnet->client_conn_reap_timer, conn_expires_at);
-
-out:
- spin_unlock(&rxnet->client_conn_cache_lock);
- mutex_unlock(&rxnet->client_conn_discard_lock);
- _leave("");
-}
-
-/*
- * Preemptively destroy all the client connection records rather than waiting
- * for them to time out
- */
-void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
-{
- _enter("");
-
- spin_lock(&rxnet->client_conn_cache_lock);
- rxnet->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
-
- del_timer_sync(&rxnet->client_conn_reap_timer);
-
- if (!rxrpc_queue_work(&rxnet->client_conn_reaper))
- _debug("destroy: queue failed");
+ if (!local->kill_all_client_conns)
+ timer_reduce(&local->client_conn_reap_timer, conn_expires_at);
_leave("");
}
@@ -1102,29 +795,19 @@ void rxrpc_destroy_all_client_connections(struct rxrpc_net *rxnet)
*/
void rxrpc_clean_up_local_conns(struct rxrpc_local *local)
{
- struct rxrpc_connection *conn, *tmp;
- struct rxrpc_net *rxnet = local->rxnet;
- LIST_HEAD(graveyard);
+ struct rxrpc_connection *conn;
_enter("");
- spin_lock(&rxnet->client_conn_cache_lock);
-
- list_for_each_entry_safe(conn, tmp, &rxnet->idle_client_conns,
- cache_link) {
- if (conn->local == local) {
- atomic_dec(&conn->active);
- trace_rxrpc_client(conn, -1, rxrpc_client_discard);
- list_move(&conn->cache_link, &graveyard);
- }
- }
+ local->kill_all_client_conns = true;
- spin_unlock(&rxnet->client_conn_cache_lock);
+ del_timer_sync(&local->client_conn_reap_timer);
- while (!list_empty(&graveyard)) {
- conn = list_entry(graveyard.next,
- struct rxrpc_connection, cache_link);
+ while ((conn = list_first_entry_or_null(&local->idle_client_conns,
+ struct rxrpc_connection, cache_link))) {
list_del_init(&conn->cache_link);
+ atomic_dec(&conn->active);
+ trace_rxrpc_client(conn, -1, rxrpc_client_discard);
rxrpc_unbundle_conn(conn);
rxrpc_put_connection(conn, rxrpc_conn_put_local_dead);
}
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 480364bcbf85..95f4bc206b3d 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -17,11 +17,65 @@
#include "ar-internal.h"
/*
+ * Set the completion state on an aborted connection.
+ */
+static bool rxrpc_set_conn_aborted(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err,
+ enum rxrpc_call_completion compl)
+{
+ bool aborted = false;
+
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ spin_lock(&conn->state_lock);
+ if (conn->state != RXRPC_CONN_ABORTED) {
+ conn->abort_code = abort_code;
+ conn->error = err;
+ conn->completion = compl;
+ /* Order the abort info before the state change. */
+ smp_store_release(&conn->state, RXRPC_CONN_ABORTED);
+ set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
+ set_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events);
+ aborted = true;
+ }
+ spin_unlock(&conn->state_lock);
+ }
+
+ return aborted;
+}
+
+/*
+ * Mark a socket buffer to indicate that the connection it's on should be aborted.
+ */
+int rxrpc_abort_conn(struct rxrpc_connection *conn, struct sk_buff *skb,
+ s32 abort_code, int err, enum rxrpc_abort_reason why)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ if (rxrpc_set_conn_aborted(conn, skb, abort_code, err,
+ RXRPC_CALL_LOCALLY_ABORTED)) {
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber,
+ sp->hdr.seq, abort_code, err);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_abort);
+ }
+ return -EPROTO;
+}
+
+/*
+ * Mark a connection as being remotely aborted.
+ */
+static bool rxrpc_input_conn_abort(struct rxrpc_connection *conn,
+ struct sk_buff *skb)
+{
+ return rxrpc_set_conn_aborted(conn, skb, skb->priority, -ECONNABORTED,
+ RXRPC_CALL_REMOTELY_ABORTED);
+}
+
+/*
* Retransmit terminal ACK or ABORT of the previous call.
*/
-static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- unsigned int channel)
+void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
+ struct sk_buff *skb,
+ unsigned int channel)
{
struct rxrpc_skb_priv *sp = skb ? rxrpc_skb(skb) : NULL;
struct rxrpc_channel *chan;
@@ -46,9 +100,7 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/* If the last call got moved on whilst we were waiting to run, just
* ignore this packet.
*/
- call_id = READ_ONCE(chan->last_call);
- /* Sync with __rxrpc_disconnect_call() */
- smp_rmb();
+ call_id = chan->last_call;
if (skb && call_id != sp->hdr.callNumber)
return;
@@ -65,9 +117,12 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
+ serial = atomic_inc_return(&conn->serial);
+
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
pkt.whdr.callNumber = htonl(call_id);
+ pkt.whdr.serial = htonl(serial);
pkt.whdr.seq = 0;
pkt.whdr.type = chan->last_type;
pkt.whdr.flags = conn->out_clientflag;
@@ -104,31 +159,15 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[0].iov_len += sizeof(pkt.ack);
len += sizeof(pkt.ack) + 3 + sizeof(ack_info);
ioc = 3;
- break;
-
- default:
- return;
- }
-
- /* Resync with __rxrpc_disconnect_call() and check that the last call
- * didn't get advanced whilst we were filling out the packets.
- */
- smp_rmb();
- if (READ_ONCE(chan->last_call) != call_id)
- return;
-
- serial = atomic_inc_return(&conn->serial);
- pkt.whdr.serial = htonl(serial);
- switch (chan->last_type) {
- case RXRPC_PACKET_TYPE_ABORT:
- break;
- case RXRPC_PACKET_TYPE_ACK:
trace_rxrpc_tx_ack(chan->call_debug_id, serial,
ntohl(pkt.ack.firstPacket),
ntohl(pkt.ack.serial),
- pkt.ack.reason, 0);
+ pkt.ack.reason, 0, rxrpc_rx_window_size);
break;
+
+ default:
+ return;
}
ret = kernel_sendmsg(conn->local->socket, &msg, iov, ioc, len);
@@ -146,131 +185,34 @@ static void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
/*
* pass a connection-level abort onto all calls on that connection
*/
-static void rxrpc_abort_calls(struct rxrpc_connection *conn,
- enum rxrpc_call_completion compl,
- rxrpc_serial_t serial)
+static void rxrpc_abort_calls(struct rxrpc_connection *conn)
{
struct rxrpc_call *call;
int i;
_enter("{%d},%x", conn->debug_id, conn->abort_code);
- spin_lock(&conn->bundle->channel_lock);
-
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call) {
- if (compl == RXRPC_CALL_LOCALLY_ABORTED)
- trace_rxrpc_abort(call->debug_id,
- "CON", call->cid,
- call->call_id, 0,
+ call = conn->channels[i].call;
+ if (call)
+ rxrpc_set_call_completion(call,
+ conn->completion,
conn->abort_code,
conn->error);
- else
- trace_rxrpc_rx_abort(call, serial,
- conn->abort_code);
- rxrpc_set_call_completion(call, compl,
- conn->abort_code,
- conn->error);
- }
}
- spin_unlock(&conn->bundle->channel_lock);
_leave("");
}
/*
- * generate a connection-level abort
- */
-static int rxrpc_abort_connection(struct rxrpc_connection *conn,
- int error, u32 abort_code)
-{
- struct rxrpc_wire_header whdr;
- struct msghdr msg;
- struct kvec iov[2];
- __be32 word;
- size_t len;
- u32 serial;
- int ret;
-
- _enter("%d,,%u,%u", conn->debug_id, error, abort_code);
-
- /* generate a connection-level abort */
- spin_lock(&conn->state_lock);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- spin_unlock(&conn->state_lock);
- _leave(" = 0 [already dead]");
- return 0;
- }
-
- conn->error = error;
- conn->abort_code = abort_code;
- conn->state = RXRPC_CONN_LOCALLY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- spin_unlock(&conn->state_lock);
-
- msg.msg_name = &conn->peer->srx.transport;
- msg.msg_namelen = conn->peer->srx.transport_len;
- msg.msg_control = NULL;
- msg.msg_controllen = 0;
- msg.msg_flags = 0;
-
- whdr.epoch = htonl(conn->proto.epoch);
- whdr.cid = htonl(conn->proto.cid);
- whdr.callNumber = 0;
- whdr.seq = 0;
- whdr.type = RXRPC_PACKET_TYPE_ABORT;
- whdr.flags = conn->out_clientflag;
- whdr.userStatus = 0;
- whdr.securityIndex = conn->security_ix;
- whdr._rsvd = 0;
- whdr.serviceId = htons(conn->service_id);
-
- word = htonl(conn->abort_code);
-
- iov[0].iov_base = &whdr;
- iov[0].iov_len = sizeof(whdr);
- iov[1].iov_base = &word;
- iov[1].iov_len = sizeof(word);
-
- len = iov[0].iov_len + iov[1].iov_len;
-
- serial = atomic_inc_return(&conn->serial);
- rxrpc_abort_calls(conn, RXRPC_CALL_LOCALLY_ABORTED, serial);
- whdr.serial = htonl(serial);
-
- ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
- if (ret < 0) {
- trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
- rxrpc_tx_point_conn_abort);
- _debug("sendmsg failed: %d", ret);
- return -EAGAIN;
- }
-
- trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
-
- conn->peer->last_tx_at = ktime_get_seconds();
-
- _leave(" = 0");
- return 0;
-}
-
-/*
* mark a call as being on a now-secured channel
* - must be called with BH's disabled.
*/
static void rxrpc_call_is_secure(struct rxrpc_call *call)
{
- _enter("%p", call);
- if (call) {
- write_lock(&call->state_lock);
- if (call->state == RXRPC_CALL_SERVER_SECURING) {
- call->state = RXRPC_CALL_SERVER_RECV_REQUEST;
- rxrpc_notify_socket(call);
- }
- write_unlock(&call->state_lock);
+ if (call && __rxrpc_call_state(call) == RXRPC_CALL_SERVER_SECURING) {
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_RECV_REQUEST);
+ rxrpc_notify_socket(call);
}
}
@@ -278,44 +220,22 @@ static void rxrpc_call_is_secure(struct rxrpc_call *call)
* connection-level Rx packet processor
*/
static int rxrpc_process_event(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- int loop, ret;
+ int ret;
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
+ if (conn->state == RXRPC_CONN_ABORTED)
return -ECONNABORTED;
- }
_enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
- case RXRPC_PACKET_TYPE_BUSY:
- /* Just ignore BUSY packets for now. */
- return 0;
-
- case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
-
case RXRPC_PACKET_TYPE_CHALLENGE:
- return conn->security->respond_to_challenge(conn, skb,
- _abort_code);
+ return conn->security->respond_to_challenge(conn, skb);
case RXRPC_PACKET_TYPE_RESPONSE:
- ret = conn->security->verify_response(conn, skb, _abort_code);
+ ret = conn->security->verify_response(conn, skb);
if (ret < 0)
return ret;
@@ -324,27 +244,25 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
if (ret < 0)
return ret;
- spin_lock(&conn->bundle->channel_lock);
spin_lock(&conn->state_lock);
-
- if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING) {
+ if (conn->state == RXRPC_CONN_SERVICE_CHALLENGING)
conn->state = RXRPC_CONN_SERVICE;
- spin_unlock(&conn->state_lock);
- for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
- rxrpc_call_is_secure(
- rcu_dereference_protected(
- conn->channels[loop].call,
- lockdep_is_held(&conn->bundle->channel_lock)));
- } else {
- spin_unlock(&conn->state_lock);
- }
+ spin_unlock(&conn->state_lock);
- spin_unlock(&conn->bundle->channel_lock);
+ if (conn->state == RXRPC_CONN_SERVICE) {
+ /* Offload call state flipping to the I/O thread. As
+ * we've already received the packet, put it on the
+ * front of the queue.
+ */
+ skb->mark = RXRPC_SKB_MARK_SERVICE_CONN_SECURED;
+ rxrpc_get_skb(skb, rxrpc_skb_get_conn_secured);
+ skb_queue_head(&conn->local->rx_queue, skb);
+ rxrpc_wake_up_io_thread(conn->local);
+ }
return 0;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
+ WARN_ON_ONCE(1);
return -EPROTO;
}
}
@@ -354,26 +272,9 @@ static int rxrpc_process_event(struct rxrpc_connection *conn,
*/
static void rxrpc_secure_connection(struct rxrpc_connection *conn)
{
- u32 abort_code;
- int ret;
-
- _enter("{%d}", conn->debug_id);
-
- ASSERT(conn->security_ix != 0);
-
- if (conn->security->issue_challenge(conn) < 0) {
- abort_code = RX_CALL_DEAD;
- ret = -ENOMEM;
- goto abort;
- }
-
- _leave("");
- return;
-
-abort:
- _debug("abort %d, %d", ret, abort_code);
- rxrpc_abort_connection(conn, ret, abort_code);
- _leave(" [aborted]");
+ if (conn->security->issue_challenge(conn) < 0)
+ rxrpc_abort_conn(conn, NULL, RX_CALL_DEAD, -ENOMEM,
+ rxrpc_abort_nomem);
}
/*
@@ -395,9 +296,7 @@ again:
if (!test_bit(RXRPC_CONN_FINAL_ACK_0 + channel, &conn->flags))
continue;
- smp_rmb(); /* vs rxrpc_disconnect_client_call */
- ack_at = READ_ONCE(chan->final_ack_at);
-
+ ack_at = chan->final_ack_at;
if (time_before(j, ack_at) && !force) {
if (time_before(ack_at, next_j)) {
next_j = ack_at;
@@ -424,47 +323,27 @@ again:
static void rxrpc_do_process_connection(struct rxrpc_connection *conn)
{
struct sk_buff *skb;
- u32 abort_code = RX_PROTOCOL_ERROR;
int ret;
if (test_and_clear_bit(RXRPC_CONN_EV_CHALLENGE, &conn->events))
rxrpc_secure_connection(conn);
- /* Process delayed ACKs whose time has come. */
- if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
- rxrpc_process_delayed_final_acks(conn, false);
-
/* go through the conn-level event packets, releasing the ref on this
* connection that each one has when we've finished with it */
while ((skb = skb_dequeue(&conn->rx_queue))) {
rxrpc_see_skb(skb, rxrpc_skb_see_conn_work);
- ret = rxrpc_process_event(conn, skb, &abort_code);
+ ret = rxrpc_process_event(conn, skb);
switch (ret) {
- case -EPROTO:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- goto protocol_error;
case -ENOMEM:
case -EAGAIN:
- goto requeue_and_leave;
- case -ECONNABORTED:
+ skb_queue_head(&conn->rx_queue, skb);
+ rxrpc_queue_conn(conn, rxrpc_conn_queue_retry_work);
+ break;
default:
rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
break;
}
}
-
- return;
-
-requeue_and_leave:
- skb_queue_head(&conn->rx_queue, skb);
- return;
-
-protocol_error:
- if (rxrpc_abort_connection(conn, ret, abort_code) < 0)
- goto requeue_and_leave;
- rxrpc_free_skb(skb, rxrpc_skb_put_conn_work);
- return;
}
void rxrpc_process_connection(struct work_struct *work)
@@ -498,44 +377,59 @@ static void rxrpc_post_packet_to_conn(struct rxrpc_connection *conn,
/*
* Input a connection-level packet.
*/
-int rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
+bool rxrpc_input_conn_packet(struct rxrpc_connection *conn, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- if (conn->state >= RXRPC_CONN_REMOTELY_ABORTED) {
- _leave(" = -ECONNABORTED [%u]", conn->state);
- return -ECONNABORTED;
- }
-
- _enter("{%d},{%u,%%%u},", conn->debug_id, sp->hdr.type, sp->hdr.serial);
-
switch (sp->hdr.type) {
- case RXRPC_PACKET_TYPE_DATA:
- case RXRPC_PACKET_TYPE_ACK:
- rxrpc_conn_retransmit_call(conn, skb,
- sp->hdr.cid & RXRPC_CHANNELMASK);
- return 0;
-
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets for now. */
- return 0;
+ return true;
case RXRPC_PACKET_TYPE_ABORT:
- conn->error = -ECONNABORTED;
- conn->abort_code = skb->priority;
- conn->state = RXRPC_CONN_REMOTELY_ABORTED;
- set_bit(RXRPC_CONN_DONT_REUSE, &conn->flags);
- rxrpc_abort_calls(conn, RXRPC_CALL_REMOTELY_ABORTED, sp->hdr.serial);
- return -ECONNABORTED;
+ if (rxrpc_is_conn_aborted(conn))
+ return true;
+ rxrpc_input_conn_abort(conn, skb);
+ rxrpc_abort_calls(conn);
+ return true;
case RXRPC_PACKET_TYPE_CHALLENGE:
case RXRPC_PACKET_TYPE_RESPONSE:
+ if (rxrpc_is_conn_aborted(conn)) {
+ if (conn->completion == RXRPC_CALL_LOCALLY_ABORTED)
+ rxrpc_send_conn_abort(conn);
+ return true;
+ }
rxrpc_post_packet_to_conn(conn, skb);
- return 0;
+ return true;
default:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_conn_pkt"));
- return -EPROTO;
+ WARN_ON_ONCE(1);
+ return true;
}
}
+
+/*
+ * Input a connection event.
+ */
+void rxrpc_input_conn_event(struct rxrpc_connection *conn, struct sk_buff *skb)
+{
+ unsigned int loop;
+
+ if (test_and_clear_bit(RXRPC_CONN_EV_ABORT_CALLS, &conn->events))
+ rxrpc_abort_calls(conn);
+
+ switch (skb->mark) {
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ if (conn->state != RXRPC_CONN_SERVICE)
+ break;
+
+ for (loop = 0; loop < RXRPC_MAXCALLS; loop++)
+ rxrpc_call_is_secure(conn->channels[loop].call);
+ break;
+ }
+
+ /* Process delayed ACKs whose time has come. */
+ if (conn->flags & RXRPC_CONN_FINAL_ACK_MASK)
+ rxrpc_process_delayed_final_acks(conn, false);
+}
diff --git a/net/rxrpc/conn_object.c b/net/rxrpc/conn_object.c
index 3c8f83dacb2b..ac85d4644a3c 100644
--- a/net/rxrpc/conn_object.c
+++ b/net/rxrpc/conn_object.c
@@ -23,12 +23,30 @@ static void rxrpc_clean_up_connection(struct work_struct *work);
static void rxrpc_set_service_reap_timer(struct rxrpc_net *rxnet,
unsigned long reap_at);
+void rxrpc_poke_conn(struct rxrpc_connection *conn, enum rxrpc_conn_trace why)
+{
+ struct rxrpc_local *local = conn->local;
+ bool busy;
+
+ if (WARN_ON_ONCE(!local))
+ return;
+
+ spin_lock_bh(&local->lock);
+ busy = !list_empty(&conn->attend_link);
+ if (!busy) {
+ rxrpc_get_connection(conn, why);
+ list_add_tail(&conn->attend_link, &local->conn_attend_q);
+ }
+ spin_unlock_bh(&local->lock);
+ rxrpc_wake_up_io_thread(local);
+}
+
static void rxrpc_connection_timer(struct timer_list *timer)
{
struct rxrpc_connection *conn =
container_of(timer, struct rxrpc_connection, timer);
- rxrpc_queue_conn(conn, rxrpc_conn_queue_timer);
+ rxrpc_poke_conn(conn, rxrpc_conn_get_poke_timer);
}
/*
@@ -49,6 +67,7 @@ struct rxrpc_connection *rxrpc_alloc_connection(struct rxrpc_net *rxnet,
INIT_WORK(&conn->destructor, rxrpc_clean_up_connection);
INIT_LIST_HEAD(&conn->proc_link);
INIT_LIST_HEAD(&conn->link);
+ mutex_init(&conn->security_lock);
skb_queue_head_init(&conn->rx_queue);
conn->rxnet = rxnet;
conn->security = &rxrpc_no_security;
@@ -82,10 +101,10 @@ struct rxrpc_connection *rxrpc_find_client_connection_rcu(struct rxrpc_local *lo
_enter(",%x", sp->hdr.cid & RXRPC_CIDMASK);
- /* Look up client connections by connection ID alone as their IDs are
- * unique for this machine.
+ /* Look up client connections by connection ID alone as their
+ * IDs are unique for this machine.
*/
- conn = idr_find(&rxrpc_client_conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
+ conn = idr_find(&local->conn_ids, sp->hdr.cid >> RXRPC_CIDSHIFT);
if (!conn || refcount_read(&conn->ref) == 0) {
_debug("no conn");
goto not_found;
@@ -139,7 +158,7 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
_enter("%d,%x", conn->debug_id, call->cid);
- if (rcu_access_pointer(chan->call) == call) {
+ if (chan->call == call) {
/* Save the result of the call so that we can repeat it if necessary
* through the channel, whilst disposing of the actual call record.
*/
@@ -159,12 +178,9 @@ void __rxrpc_disconnect_call(struct rxrpc_connection *conn,
break;
}
- /* Sync with rxrpc_conn_retransmit(). */
- smp_wmb();
chan->last_call = chan->call_id;
chan->call_id = chan->call_counter;
-
- rcu_assign_pointer(chan->call, NULL);
+ chan->call = NULL;
}
_leave("");
@@ -178,6 +194,9 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
{
struct rxrpc_connection *conn = call->conn;
+ set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
+ rxrpc_see_call(call, rxrpc_call_see_disconnected);
+
call->peer->cong_ssthresh = call->cong_ssthresh;
if (!hlist_unhashed(&call->error_link)) {
@@ -186,18 +205,17 @@ void rxrpc_disconnect_call(struct rxrpc_call *call)
spin_unlock(&call->peer->lock);
}
- if (rxrpc_is_client_call(call))
- return rxrpc_disconnect_client_call(conn->bundle, call);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
+ if (rxrpc_is_client_call(call)) {
+ rxrpc_disconnect_client_call(call->bundle, call);
+ } else {
+ __rxrpc_disconnect_call(conn, call);
+ conn->idle_timestamp = jiffies;
+ if (atomic_dec_and_test(&conn->active))
+ rxrpc_set_service_reap_timer(conn->rxnet,
+ jiffies + rxrpc_connection_expiry);
+ }
- set_bit(RXRPC_CALL_DISCONNECTED, &call->flags);
- conn->idle_timestamp = jiffies;
- if (atomic_dec_and_test(&conn->active))
- rxrpc_set_service_reap_timer(conn->rxnet,
- jiffies + rxrpc_connection_expiry);
+ rxrpc_put_call(call, rxrpc_call_put_io_thread);
}
/*
@@ -293,10 +311,10 @@ static void rxrpc_clean_up_connection(struct work_struct *work)
container_of(work, struct rxrpc_connection, destructor);
struct rxrpc_net *rxnet = conn->rxnet;
- ASSERT(!rcu_access_pointer(conn->channels[0].call) &&
- !rcu_access_pointer(conn->channels[1].call) &&
- !rcu_access_pointer(conn->channels[2].call) &&
- !rcu_access_pointer(conn->channels[3].call));
+ ASSERT(!conn->channels[0].call &&
+ !conn->channels[1].call &&
+ !conn->channels[2].call &&
+ !conn->channels[3].call);
ASSERT(list_empty(&conn->cache_link));
del_timer_sync(&conn->timer);
@@ -447,7 +465,6 @@ void rxrpc_destroy_all_connections(struct rxrpc_net *rxnet)
_enter("");
atomic_dec(&rxnet->nr_conns);
- rxrpc_destroy_all_client_connections(rxnet);
del_timer_sync(&rxnet->service_conn_reap_timer);
rxrpc_queue_work(&rxnet->service_conn_reaper);
diff --git a/net/rxrpc/conn_service.c b/net/rxrpc/conn_service.c
index 2a55a88b2a5b..89ac05a711a4 100644
--- a/net/rxrpc/conn_service.c
+++ b/net/rxrpc/conn_service.c
@@ -8,12 +8,6 @@
#include <linux/slab.h>
#include "ar-internal.h"
-static struct rxrpc_bundle rxrpc_service_dummy_bundle = {
- .ref = REFCOUNT_INIT(1),
- .debug_id = UINT_MAX,
- .channel_lock = __SPIN_LOCK_UNLOCKED(&rxrpc_service_dummy_bundle.channel_lock),
-};
-
/*
* Find a service connection under RCU conditions.
*
@@ -133,8 +127,6 @@ struct rxrpc_connection *rxrpc_prealloc_service_connection(struct rxrpc_net *rxn
*/
conn->state = RXRPC_CONN_SERVICE_PREALLOC;
refcount_set(&conn->ref, 2);
- conn->bundle = rxrpc_get_bundle(&rxrpc_service_dummy_bundle,
- rxrpc_bundle_get_service_conn);
atomic_inc(&rxnet->nr_conns);
write_lock(&rxnet->conn_lock);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index d0e20e946e48..030d64f282f3 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -9,11 +9,10 @@
#include "ar-internal.h"
-static void rxrpc_proto_abort(const char *why,
- struct rxrpc_call *call, rxrpc_seq_t seq)
+static void rxrpc_proto_abort(struct rxrpc_call *call, rxrpc_seq_t seq,
+ enum rxrpc_abort_reason why)
{
- if (rxrpc_abort_call(why, call, seq, RX_PROTOCOL_ERROR, -EBADMSG))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, seq, RX_PROTOCOL_ERROR, -EBADMSG, why);
}
/*
@@ -185,7 +184,7 @@ void rxrpc_congestion_degrade(struct rxrpc_call *call)
if (call->cong_mode != RXRPC_CALL_SLOW_START &&
call->cong_mode != RXRPC_CALL_CONGEST_AVOIDANCE)
return;
- if (call->state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
+ if (__rxrpc_call_state(call) == RXRPC_CALL_CLIENT_AWAIT_REPLY)
return;
rtt = ns_to_ktime(call->peer->srtt_us * (1000 / 8));
@@ -250,47 +249,34 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
* This occurs when we get an ACKALL packet, the first DATA packet of a reply,
* or a final ACK packet.
*/
-static bool rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
- const char *abort_why)
+static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
+ enum rxrpc_abort_reason abort_why)
{
- unsigned int state;
-
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
- write_lock(&call->state_lock);
-
- state = call->state;
- switch (state) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
- if (reply_begun)
- call->state = state = RXRPC_CALL_CLIENT_RECV_REPLY;
- else
- call->state = state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
+ if (reply_begun) {
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_RECV_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
+ break;
+ }
+
+ rxrpc_set_call_state(call, RXRPC_CALL_CLIENT_AWAIT_REPLY);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
break;
case RXRPC_CALL_SERVER_AWAIT_ACK:
- __rxrpc_call_completed(call);
- state = call->state;
+ rxrpc_call_completed(call);
+ trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
break;
default:
- goto bad_state;
+ kdebug("end_tx %s", rxrpc_call_states[__rxrpc_call_state(call)]);
+ rxrpc_proto_abort(call, call->tx_top, abort_why);
+ break;
}
-
- write_unlock(&call->state_lock);
- if (state == RXRPC_CALL_CLIENT_AWAIT_REPLY)
- trace_rxrpc_txqueue(call, rxrpc_txqueue_await_reply);
- else
- trace_rxrpc_txqueue(call, rxrpc_txqueue_end);
- _leave(" = ok");
- return true;
-
-bad_state:
- write_unlock(&call->state_lock);
- kdebug("end_tx %s", rxrpc_call_states[call->state]);
- rxrpc_proto_abort(abort_why, call, call->tx_top);
- return false;
}
/*
@@ -305,24 +291,55 @@ static bool rxrpc_receiving_reply(struct rxrpc_call *call)
if (call->ackr_reason) {
now = jiffies;
timo = now + MAX_JIFFY_OFFSET;
- WRITE_ONCE(call->resend_at, timo);
+
WRITE_ONCE(call->delay_ack_at, timo);
trace_rxrpc_timer(call, rxrpc_timer_init_for_reply, now);
}
if (!test_bit(RXRPC_CALL_TX_LAST, &call->flags)) {
if (!rxrpc_rotate_tx_window(call, top, &summary)) {
- rxrpc_proto_abort("TXL", call, top);
+ rxrpc_proto_abort(call, top, rxrpc_eproto_early_reply);
return false;
}
}
- return rxrpc_end_tx_phase(call, true, "ETD");
+
+ rxrpc_end_tx_phase(call, true, rxrpc_eproto_unexpected_reply);
+ return true;
+}
+
+/*
+ * End the packet reception phase.
+ */
+static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
+{
+ rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
+
+ _enter("%d,%s", call->debug_id, rxrpc_call_states[__rxrpc_call_state(call)]);
+
+ trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
+
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_RECV_REPLY:
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
+ rxrpc_call_completed(call);
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST:
+ rxrpc_set_call_state(call, RXRPC_CALL_SERVER_ACK_REQUEST);
+ call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
+ rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_processing_op);
+ break;
+
+ default:
+ break;
+ }
}
static void rxrpc_input_update_ack_window(struct rxrpc_call *call,
rxrpc_seq_t window, rxrpc_seq_t wtop)
{
- atomic64_set_release(&call->ackr_window, ((u64)wtop) << 32 | window);
+ call->ackr_window = window;
+ call->ackr_wtop = wtop;
}
/*
@@ -337,8 +354,9 @@ static void rxrpc_input_queue_data(struct rxrpc_call *call, struct sk_buff *skb,
__skb_queue_tail(&call->recvmsg_queue, skb);
rxrpc_input_update_ack_window(call, window, wtop);
-
trace_rxrpc_receive(call, last ? why + 1 : why, sp->hdr.serial, sp->hdr.seq);
+ if (last)
+ rxrpc_end_rx_phase(call, sp->hdr.serial);
}
/*
@@ -350,9 +368,9 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct sk_buff *oos;
rxrpc_serial_t serial = sp->hdr.serial;
- u64 win = atomic64_read(&call->ackr_window);
- rxrpc_seq_t window = lower_32_bits(win);
- rxrpc_seq_t wtop = upper_32_bits(win);
+ unsigned int sack = call->ackr_sack_base;
+ rxrpc_seq_t window = call->ackr_window;
+ rxrpc_seq_t wtop = call->ackr_wtop;
rxrpc_seq_t wlimit = window + call->rx_winsize - 1;
rxrpc_seq_t seq = sp->hdr.seq;
bool last = sp->hdr.flags & RXRPC_LAST_PACKET;
@@ -366,17 +384,14 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
if (last) {
if (test_and_set_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
- seq + 1 != wtop) {
- rxrpc_proto_abort("LSN", call, seq);
- return;
- }
+ seq + 1 != wtop)
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_different_last);
} else {
if (test_bit(RXRPC_CALL_RX_LAST, &call->flags) &&
after_eq(seq, wtop)) {
pr_warn("Packet beyond last: c=%x q=%x window=%x-%x wlimit=%x\n",
call->debug_id, seq, window, wtop, wlimit);
- rxrpc_proto_abort("LSA", call, seq);
- return;
+ return rxrpc_proto_abort(call, seq, rxrpc_eproto_data_after_last);
}
}
@@ -396,20 +411,23 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
/* Queue the packet. */
if (seq == window) {
- rxrpc_seq_t reset_from;
- bool reset_sack = false;
-
if (sp->hdr.flags & RXRPC_REQUEST_ACK)
ack_reason = RXRPC_ACK_REQUESTED;
/* Send an immediate ACK if we fill in a hole */
else if (!skb_queue_empty(&call->rx_oos_queue))
ack_reason = RXRPC_ACK_DELAY;
else
- atomic_inc_return(&call->ackr_nr_unacked);
+ call->ackr_nr_unacked++;
window++;
- if (after(window, wtop))
+ if (after(window, wtop)) {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_none);
wtop = window;
+ } else {
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_advance);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
+ }
+
rxrpc_get_skb(skb, rxrpc_skb_get_to_recvmsg);
@@ -426,43 +444,39 @@ static void rxrpc_input_data_one(struct rxrpc_call *call, struct sk_buff *skb,
__skb_unlink(oos, &call->rx_oos_queue);
last = osp->hdr.flags & RXRPC_LAST_PACKET;
seq = osp->hdr.seq;
- if (!reset_sack) {
- reset_from = seq;
- reset_sack = true;
- }
+ call->ackr_sack_table[sack] = 0;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_fill);
+ sack = (sack + 1) % RXRPC_SACK_SIZE;
window++;
rxrpc_input_queue_data(call, oos, window, wtop,
- rxrpc_receive_queue_oos);
+ rxrpc_receive_queue_oos);
}
spin_unlock(&call->recvmsg_queue.lock);
- if (reset_sack) {
- do {
- call->ackr_sack_table[reset_from % RXRPC_SACK_SIZE] = 0;
- } while (reset_from++, before(reset_from, window));
- }
+ call->ackr_sack_base = sack;
} else {
- bool keep = false;
+ unsigned int slot;
ack_reason = RXRPC_ACK_OUT_OF_SEQUENCE;
- if (!call->ackr_sack_table[seq % RXRPC_SACK_SIZE]) {
- call->ackr_sack_table[seq % RXRPC_SACK_SIZE] = 1;
- keep = 1;
+ slot = seq - window;
+ sack = (sack + slot) % RXRPC_SACK_SIZE;
+
+ if (call->ackr_sack_table[sack % RXRPC_SACK_SIZE]) {
+ ack_reason = RXRPC_ACK_DUPLICATE;
+ goto send_ack;
}
+ call->ackr_sack_table[sack % RXRPC_SACK_SIZE] |= 1;
+ trace_rxrpc_sack(call, seq, sack, rxrpc_sack_oos);
+
if (after(seq + 1, wtop)) {
wtop = seq + 1;
rxrpc_input_update_ack_window(call, window, wtop);
}
- if (!keep) {
- ack_reason = RXRPC_ACK_DUPLICATE;
- goto send_ack;
- }
-
skb_queue_walk(&call->rx_oos_queue, oos) {
struct rxrpc_skb_priv *osp = rxrpc_skb(oos);
@@ -550,19 +564,27 @@ protocol_error:
static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- enum rxrpc_call_state state;
rxrpc_serial_t serial = sp->hdr.serial;
rxrpc_seq_t seq0 = sp->hdr.seq;
- _enter("{%llx,%x},{%u,%x}",
- atomic64_read(&call->ackr_window), call->rx_highest_seq,
+ _enter("{%x,%x,%x},{%u,%x}",
+ call->ackr_window, call->ackr_wtop, call->rx_highest_seq,
skb->len, seq0);
- state = READ_ONCE(call->state);
- if (state >= RXRPC_CALL_COMPLETE)
+ if (__rxrpc_call_is_complete(call))
return;
- if (state == RXRPC_CALL_SERVER_RECV_REQUEST) {
+ switch (__rxrpc_call_state(call)) {
+ case RXRPC_CALL_CLIENT_SEND_REQUEST:
+ case RXRPC_CALL_CLIENT_AWAIT_REPLY:
+ /* Received data implicitly ACKs all of the request
+ * packets we sent when we're acting as a client.
+ */
+ if (!rxrpc_receiving_reply(call))
+ goto out_notify;
+ break;
+
+ case RXRPC_CALL_SERVER_RECV_REQUEST: {
unsigned long timo = READ_ONCE(call->next_req_timo);
unsigned long now, expect_req_by;
@@ -573,21 +595,18 @@ static void rxrpc_input_data(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_reduce_call_timer(call, expect_req_by, now,
rxrpc_timer_set_for_idle);
}
+ break;
}
- /* Received data implicitly ACKs all of the request packets we sent
- * when we're acting as a client.
- */
- if ((state == RXRPC_CALL_CLIENT_SEND_REQUEST ||
- state == RXRPC_CALL_CLIENT_AWAIT_REPLY) &&
- !rxrpc_receiving_reply(call))
- goto out_notify;
+ default:
+ break;
+ }
if (!rxrpc_input_split_jumbo(call, skb)) {
- rxrpc_proto_abort("VLD", call, sp->hdr.seq);
+ rxrpc_proto_abort(call, sp->hdr.seq, rxrpc_badmsg_bad_jumbo);
goto out_notify;
}
- skb = NULL;
+ return;
out_notify:
trace_rxrpc_notify_socket(call->debug_id, serial);
@@ -765,7 +784,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
offset = sizeof(struct rxrpc_wire_header);
if (skb_copy_bits(skb, offset, &ack, sizeof(ack)) < 0)
- return rxrpc_proto_abort("XAK", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack);
offset += sizeof(ack);
ack_serial = sp->hdr.serial;
@@ -845,7 +864,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
ioffset = offset + nr_acks + 3;
if (skb->len >= ioffset + sizeof(info) &&
skb_copy_bits(skb, ioffset, &info, sizeof(info)) < 0)
- return rxrpc_proto_abort("XAI", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_badmsg_short_ack_info);
if (nr_acks > 0)
skb_condense(skb);
@@ -868,10 +887,10 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
rxrpc_input_ackinfo(call, skb, &info);
if (first_soft_ack == 0)
- return rxrpc_proto_abort("AK0", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_zero);
/* Ignore ACKs unless we are or have just been transmitting. */
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
case RXRPC_CALL_SERVER_SEND_REPLY:
@@ -883,20 +902,20 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (before(hard_ack, call->acks_hard_ack) ||
after(hard_ack, call->tx_top))
- return rxrpc_proto_abort("AKW", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_outside_window);
if (nr_acks > call->tx_top - hard_ack)
- return rxrpc_proto_abort("AKN", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_sack_overflow);
if (after(hard_ack, call->acks_hard_ack)) {
if (rxrpc_rotate_tx_window(call, hard_ack, &summary)) {
- rxrpc_end_tx_phase(call, false, "ETA");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ack);
return;
}
}
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
- return rxrpc_proto_abort("XSA", call, 0);
+ return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
nr_acks, &summary);
}
@@ -918,7 +937,7 @@ static void rxrpc_input_ackall(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_ack_summary summary = { 0 };
if (rxrpc_rotate_tx_window(call, call->tx_top, &summary))
- rxrpc_end_tx_phase(call, false, "ETL");
+ rxrpc_end_tx_phase(call, false, rxrpc_eproto_unexpected_ackall);
}
/*
@@ -963,27 +982,23 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_DATA:
- rxrpc_input_data(call, skb);
- break;
+ return rxrpc_input_data(call, skb);
case RXRPC_PACKET_TYPE_ACK:
- rxrpc_input_ack(call, skb);
- break;
+ return rxrpc_input_ack(call, skb);
case RXRPC_PACKET_TYPE_BUSY:
/* Just ignore BUSY packets from the server; the retry and
* lifespan timers will take care of business. BUSY packets
* from the client don't make sense.
*/
- break;
+ return;
case RXRPC_PACKET_TYPE_ABORT:
- rxrpc_input_abort(call, skb);
- break;
+ return rxrpc_input_abort(call, skb);
case RXRPC_PACKET_TYPE_ACKALL:
- rxrpc_input_ackall(call, skb);
- break;
+ return rxrpc_input_ackall(call, skb);
default:
break;
@@ -998,24 +1013,18 @@ void rxrpc_input_call_packet(struct rxrpc_call *call, struct sk_buff *skb)
*/
void rxrpc_implicit_end_call(struct rxrpc_call *call, struct sk_buff *skb)
{
- struct rxrpc_connection *conn = call->conn;
-
- switch (READ_ONCE(call->state)) {
+ switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_SERVER_AWAIT_ACK:
rxrpc_call_completed(call);
fallthrough;
case RXRPC_CALL_COMPLETE:
break;
default:
- if (rxrpc_abort_call("IMP", call, 0, RX_CALL_DEAD, -ESHUTDOWN))
- rxrpc_send_abort_packet(call);
+ rxrpc_abort_call(call, 0, RX_CALL_DEAD, -ESHUTDOWN,
+ rxrpc_eproto_improper_term);
trace_rxrpc_improper_term(call);
break;
}
rxrpc_input_call_event(call, skb);
-
- spin_lock(&conn->bundle->channel_lock);
- __rxrpc_disconnect_call(conn, call);
- spin_unlock(&conn->bundle->channel_lock);
}
diff --git a/net/rxrpc/insecure.c b/net/rxrpc/insecure.c
index 0eb8471bfc53..34353b6e584b 100644
--- a/net/rxrpc/insecure.c
+++ b/net/rxrpc/insecure.c
@@ -43,25 +43,17 @@ static void none_free_call_crypto(struct rxrpc_call *call)
}
static int none_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("chall_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_challenge);
}
static int none_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
-
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("resp_none"));
- return -EPROTO;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxrpc_eproto_rxnull_response);
}
static void none_clear(struct rxrpc_connection *conn)
diff --git a/net/rxrpc/io_thread.c b/net/rxrpc/io_thread.c
index 1ad067d66fb6..4a3a08a0e2cd 100644
--- a/net/rxrpc/io_thread.c
+++ b/net/rxrpc/io_thread.c
@@ -25,6 +25,7 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
*/
int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
{
+ struct sk_buff_head *rx_queue;
struct rxrpc_local *local = rcu_dereference_sk_user_data(udp_sk);
if (unlikely(!local)) {
@@ -36,7 +37,16 @@ int rxrpc_encap_rcv(struct sock *udp_sk, struct sk_buff *skb)
skb->mark = RXRPC_SKB_MARK_PACKET;
rxrpc_new_skb(skb, rxrpc_skb_new_encap_rcv);
- skb_queue_tail(&local->rx_queue, skb);
+ rx_queue = &local->rx_queue;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ if (rxrpc_inject_rx_delay ||
+ !skb_queue_empty(&local->rx_delay_queue)) {
+ skb->tstamp = ktime_add_ms(skb->tstamp, rxrpc_inject_rx_delay);
+ rx_queue = &local->rx_delay_queue;
+ }
+#endif
+
+ skb_queue_tail(rx_queue, skb);
rxrpc_wake_up_io_thread(local);
return 0;
}
@@ -67,9 +77,31 @@ void rxrpc_error_report(struct sock *sk)
}
/*
+ * Directly produce an abort from a packet.
+ */
+bool rxrpc_direct_abort(struct sk_buff *skb, enum rxrpc_abort_reason why,
+ s32 abort_code, int err)
+{
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+
+ trace_rxrpc_abort(0, why, sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
+ abort_code, err);
+ skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
+ skb->priority = abort_code;
+ return false;
+}
+
+static bool rxrpc_bad_message(struct sk_buff *skb, enum rxrpc_abort_reason why)
+{
+ return rxrpc_direct_abort(skb, why, RX_PROTOCOL_ERROR, -EBADMSG);
+}
+
+#define just_discard true
+
+/*
* Process event packets targeted at a local endpoint.
*/
-static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
+static bool rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
{
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
char v;
@@ -81,22 +113,21 @@ static void rxrpc_input_version(struct rxrpc_local *local, struct sk_buff *skb)
if (v == 0)
rxrpc_send_version_request(local, &sp->hdr, skb);
}
+
+ return true;
}
/*
* Extract the wire header from a packet and translate the byte order.
*/
-static noinline
-int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
+static bool rxrpc_extract_header(struct rxrpc_skb_priv *sp,
+ struct sk_buff *skb)
{
struct rxrpc_wire_header whdr;
/* dig out the RxRPC connection details */
- if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0) {
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial,
- tracepoint_string("bad_hdr"));
- return -EBADMSG;
- }
+ if (skb_copy_bits(skb, 0, &whdr, sizeof(whdr)) < 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_short_hdr);
memset(sp, 0, sizeof(*sp));
sp->hdr.epoch = ntohl(whdr.epoch);
@@ -110,7 +141,7 @@ int rxrpc_extract_header(struct rxrpc_skb_priv *sp, struct sk_buff *skb)
sp->hdr.securityIndex = whdr.securityIndex;
sp->hdr._rsvd = ntohs(whdr._rsvd);
sp->hdr.serviceId = ntohs(whdr.serviceId);
- return 0;
+ return true;
}
/*
@@ -130,28 +161,28 @@ static bool rxrpc_extract_abort(struct sk_buff *skb)
/*
* Process packets received on the local endpoint
*/
-static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
+static bool rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
{
struct rxrpc_connection *conn;
struct sockaddr_rxrpc peer_srx;
struct rxrpc_skb_priv *sp;
struct rxrpc_peer *peer = NULL;
struct sk_buff *skb = *_skb;
- int ret = 0;
+ bool ret = false;
skb_pull(skb, sizeof(struct udphdr));
sp = rxrpc_skb(skb);
/* dig out the RxRPC connection details */
- if (rxrpc_extract_header(sp, skb) < 0)
- goto bad_message;
+ if (!rxrpc_extract_header(sp, skb))
+ return just_discard;
if (IS_ENABLED(CONFIG_AF_RXRPC_INJECT_LOSS)) {
static int lose;
if ((lose++ & 7) == 7) {
trace_rxrpc_rx_lose(sp);
- return 0;
+ return just_discard;
}
}
@@ -160,28 +191,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
switch (sp->hdr.type) {
case RXRPC_PACKET_TYPE_VERSION:
if (rxrpc_to_client(sp))
- return 0;
- rxrpc_input_version(local, skb);
- return 0;
+ return just_discard;
+ return rxrpc_input_version(local, skb);
case RXRPC_PACKET_TYPE_BUSY:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
fallthrough;
case RXRPC_PACKET_TYPE_ACK:
case RXRPC_PACKET_TYPE_ACKALL:
if (sp->hdr.callNumber == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
break;
case RXRPC_PACKET_TYPE_ABORT:
if (!rxrpc_extract_abort(skb))
- return 0; /* Just discard if malformed */
+ return just_discard; /* Just discard if malformed */
break;
case RXRPC_PACKET_TYPE_DATA:
- if (sp->hdr.callNumber == 0 ||
- sp->hdr.seq == 0)
- goto bad_message;
+ if (sp->hdr.callNumber == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_call);
+ if (sp->hdr.seq == 0)
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_seq);
/* Unshare the packet so that it can be modified for in-place
* decryption.
@@ -191,7 +222,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
if (!skb) {
rxrpc_eaten_skb(*_skb, rxrpc_skb_eaten_by_unshare_nomem);
*_skb = NULL;
- return 0;
+ return just_discard;
}
if (skb != *_skb) {
@@ -205,28 +236,28 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
case RXRPC_PACKET_TYPE_CHALLENGE:
if (rxrpc_to_server(sp))
- return 0;
+ return just_discard;
break;
case RXRPC_PACKET_TYPE_RESPONSE:
if (rxrpc_to_client(sp))
- return 0;
+ return just_discard;
break;
/* Packet types 9-11 should just be ignored. */
case RXRPC_PACKET_TYPE_PARAMS:
case RXRPC_PACKET_TYPE_10:
case RXRPC_PACKET_TYPE_11:
- return 0;
+ return just_discard;
default:
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_unsupported_packet);
}
if (sp->hdr.serviceId == 0)
- goto bad_message;
+ return rxrpc_bad_message(skb, rxrpc_badmsg_zero_service);
if (WARN_ON_ONCE(rxrpc_extract_addr_from_skb(&peer_srx, skb) < 0))
- return true; /* Unsupported address type - discard. */
+ return just_discard; /* Unsupported address type. */
if (peer_srx.transport.family != local->srx.transport.family &&
(peer_srx.transport.family == AF_INET &&
@@ -234,7 +265,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
pr_warn_ratelimited("AF_RXRPC: Protocol mismatch %u not %u\n",
peer_srx.transport.family,
local->srx.transport.family);
- return true; /* Wrong address type - discard. */
+ return just_discard; /* Wrong address type. */
}
if (rxrpc_to_client(sp)) {
@@ -242,12 +273,8 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
conn = rxrpc_find_client_connection_rcu(local, &peer_srx, skb);
conn = rxrpc_get_connection_maybe(conn, rxrpc_conn_get_call_input);
rcu_read_unlock();
- if (!conn) {
- trace_rxrpc_abort(0, "NCC", sp->hdr.cid,
- sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- goto protocol_error;
- }
+ if (!conn)
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_conn);
ret = rxrpc_input_packet_on_conn(conn, &peer_srx, skb);
rxrpc_put_connection(conn, rxrpc_conn_put_call_input);
@@ -280,19 +307,7 @@ static int rxrpc_input_packet(struct rxrpc_local *local, struct sk_buff **_skb)
ret = rxrpc_new_incoming_call(local, peer, NULL, &peer_srx, skb);
rxrpc_put_peer(peer, rxrpc_peer_put_input);
- if (ret < 0)
- goto reject_packet;
- return 0;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(local, skb);
- return 0;
+ return ret;
}
/*
@@ -306,21 +321,23 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
struct rxrpc_channel *chan;
struct rxrpc_call *call = NULL;
unsigned int channel;
+ bool ret;
if (sp->hdr.securityIndex != conn->security_ix)
- goto wrong_security;
+ return rxrpc_direct_abort(skb, rxrpc_eproto_wrong_security,
+ RXKADINCONSISTENCY, -EBADMSG);
if (sp->hdr.serviceId != conn->service_id) {
int old_id;
if (!test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags))
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_reupgrade);
+
old_id = cmpxchg(&conn->service_id, conn->orig_service_id,
sp->hdr.serviceId);
-
if (old_id != conn->orig_service_id &&
old_id != sp->hdr.serviceId)
- goto reupgrade;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_bad_upgrade);
}
if (after(sp->hdr.serial, conn->hi_serial))
@@ -336,19 +353,19 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
/* Ignore really old calls */
if (sp->hdr.callNumber < chan->last_call)
- return 0;
+ return just_discard;
if (sp->hdr.callNumber == chan->last_call) {
if (chan->call ||
sp->hdr.type == RXRPC_PACKET_TYPE_ABORT)
- return 0;
+ return just_discard;
/* For the previous service call, if completed successfully, we
* discard all further packets.
*/
if (rxrpc_conn_is_service(conn) &&
chan->last_type == RXRPC_PACKET_TYPE_ACK)
- return 0;
+ return just_discard;
/* But otherwise we need to retransmit the final packet from
* data cached in the connection record.
@@ -358,19 +375,17 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
sp->hdr.seq,
sp->hdr.serial,
sp->hdr.flags);
- rxrpc_input_conn_packet(conn, skb);
- return 0;
+ rxrpc_conn_retransmit_call(conn, skb, channel);
+ return just_discard;
}
- rcu_read_lock();
- call = rxrpc_try_get_call(rcu_dereference(chan->call),
- rxrpc_call_get_input);
- rcu_read_unlock();
+ call = rxrpc_try_get_call(chan->call, rxrpc_call_get_input);
if (sp->hdr.callNumber > chan->call_id) {
if (rxrpc_to_client(sp)) {
rxrpc_put_call(call, rxrpc_call_put_input);
- goto reject_packet;
+ return rxrpc_protocol_error(skb,
+ rxrpc_eproto_unexpected_implicit_end);
}
if (call) {
@@ -382,38 +397,14 @@ static int rxrpc_input_packet_on_conn(struct rxrpc_connection *conn,
if (!call) {
if (rxrpc_to_client(sp))
- goto bad_message;
- if (rxrpc_new_incoming_call(conn->local, conn->peer, conn,
- peer_srx, skb) == 0)
- return 0;
- goto reject_packet;
+ return rxrpc_protocol_error(skb, rxrpc_eproto_no_client_call);
+ return rxrpc_new_incoming_call(conn->local, conn->peer, conn,
+ peer_srx, skb);
}
- rxrpc_input_call_event(call, skb);
+ ret = rxrpc_input_call_event(call, skb);
rxrpc_put_call(call, rxrpc_call_put_input);
- return 0;
-
-wrong_security:
- trace_rxrpc_abort(0, "SEC", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RXKADINCONSISTENCY, EBADMSG);
- skb->priority = RXKADINCONSISTENCY;
- goto post_abort;
-
-reupgrade:
- trace_rxrpc_abort(0, "UPG", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
- goto protocol_error;
-
-bad_message:
- trace_rxrpc_abort(0, "BAD", sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_PROTOCOL_ERROR, EBADMSG);
-protocol_error:
- skb->priority = RX_PROTOCOL_ERROR;
-post_abort:
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
-reject_packet:
- rxrpc_reject_packet(conn->local, skb);
- return 0;
+ return ret;
}
/*
@@ -421,10 +412,14 @@ reject_packet:
*/
int rxrpc_io_thread(void *data)
{
+ struct rxrpc_connection *conn;
struct sk_buff_head rx_queue;
struct rxrpc_local *local = data;
struct rxrpc_call *call;
struct sk_buff *skb;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ ktime_t now;
+#endif
bool should_stop;
complete(&local->io_thread_ready);
@@ -436,6 +431,24 @@ int rxrpc_io_thread(void *data)
for (;;) {
rxrpc_inc_stat(local->rxnet, stat_io_loop);
+ /* Deal with connections that want immediate attention. */
+ conn = list_first_entry_or_null(&local->conn_attend_q,
+ struct rxrpc_connection,
+ attend_link);
+ if (conn) {
+ spin_lock_bh(&local->lock);
+ list_del_init(&conn->attend_link);
+ spin_unlock_bh(&local->lock);
+
+ rxrpc_input_conn_event(conn, NULL);
+ rxrpc_put_connection(conn, rxrpc_conn_put_poke);
+ continue;
+ }
+
+ if (test_and_clear_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags))
+ rxrpc_discard_expired_client_conns(local);
+
/* Deal with calls that want immediate attention. */
if ((call = list_first_entry_or_null(&local->call_attend_q,
struct rxrpc_call,
@@ -450,12 +463,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ if (!list_empty(&local->new_client_calls))
+ rxrpc_connect_client_calls(local);
+
/* Process received packets and errors. */
if ((skb = __skb_dequeue(&rx_queue))) {
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
switch (skb->mark) {
case RXRPC_SKB_MARK_PACKET:
skb->priority = 0;
- rxrpc_input_packet(local, &skb);
+ if (!rxrpc_input_packet(local, &skb))
+ rxrpc_reject_packet(local, skb);
trace_rxrpc_rx_done(skb->mark, skb->priority);
rxrpc_free_skb(skb, rxrpc_skb_put_input);
break;
@@ -463,6 +481,11 @@ int rxrpc_io_thread(void *data)
rxrpc_input_error(local, skb);
rxrpc_free_skb(skb, rxrpc_skb_put_error_report);
break;
+ case RXRPC_SKB_MARK_SERVICE_CONN_SECURED:
+ rxrpc_input_conn_event(sp->conn, skb);
+ rxrpc_put_connection(sp->conn, rxrpc_conn_put_poke);
+ rxrpc_free_skb(skb, rxrpc_skb_put_conn_secured);
+ break;
default:
WARN_ON_ONCE(1);
rxrpc_free_skb(skb, rxrpc_skb_put_unknown);
@@ -471,6 +494,17 @@ int rxrpc_io_thread(void *data)
continue;
}
+ /* Inject a delay into packets if requested. */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ now = ktime_get_real();
+ while ((skb = skb_peek(&local->rx_delay_queue))) {
+ if (ktime_before(now, skb->tstamp))
+ break;
+ skb = skb_dequeue(&local->rx_delay_queue);
+ skb_queue_tail(&local->rx_queue, skb);
+ }
+#endif
+
if (!skb_queue_empty(&local->rx_queue)) {
spin_lock_irq(&local->rx_queue.lock);
skb_queue_splice_tail_init(&local->rx_queue, &rx_queue);
@@ -481,13 +515,39 @@ int rxrpc_io_thread(void *data)
set_current_state(TASK_INTERRUPTIBLE);
should_stop = kthread_should_stop();
if (!skb_queue_empty(&local->rx_queue) ||
- !list_empty(&local->call_attend_q)) {
+ !list_empty(&local->call_attend_q) ||
+ !list_empty(&local->conn_attend_q) ||
+ !list_empty(&local->new_client_calls) ||
+ test_bit(RXRPC_CLIENT_CONN_REAP_TIMER,
+ &local->client_conn_flags)) {
__set_current_state(TASK_RUNNING);
continue;
}
if (should_stop)
break;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb = skb_peek(&local->rx_delay_queue);
+ if (skb) {
+ unsigned long timeout;
+ ktime_t tstamp = skb->tstamp;
+ ktime_t now = ktime_get_real();
+ s64 delay_ns = ktime_to_ns(ktime_sub(tstamp, now));
+
+ if (delay_ns <= 0) {
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+
+ timeout = nsecs_to_jiffies(delay_ns);
+ timeout = max(timeout, 1UL);
+ schedule_timeout(timeout);
+ __set_current_state(TASK_RUNNING);
+ continue;
+ }
+#endif
+
schedule();
}
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 270b63d8f37a..7d910aee4f8c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -82,31 +82,61 @@ static long rxrpc_local_cmp_key(const struct rxrpc_local *local,
}
}
+static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
+{
+ struct rxrpc_local *local =
+ container_of(timer, struct rxrpc_local, client_conn_reap_timer);
+
+ if (local->kill_all_client_conns &&
+ test_and_set_bit(RXRPC_CLIENT_CONN_REAP_TIMER, &local->client_conn_flags))
+ rxrpc_wake_up_io_thread(local);
+}
+
/*
* Allocate a new local endpoint.
*/
-static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
+static struct rxrpc_local *rxrpc_alloc_local(struct net *net,
const struct sockaddr_rxrpc *srx)
{
struct rxrpc_local *local;
+ u32 tmp;
local = kzalloc(sizeof(struct rxrpc_local), GFP_KERNEL);
if (local) {
refcount_set(&local->ref, 1);
atomic_set(&local->active_users, 1);
- local->rxnet = rxnet;
+ local->net = net;
+ local->rxnet = rxrpc_net(net);
INIT_HLIST_NODE(&local->link);
- init_rwsem(&local->defrag_sem);
init_completion(&local->io_thread_ready);
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ skb_queue_head_init(&local->rx_delay_queue);
+#endif
skb_queue_head_init(&local->rx_queue);
+ INIT_LIST_HEAD(&local->conn_attend_q);
INIT_LIST_HEAD(&local->call_attend_q);
+
local->client_bundles = RB_ROOT;
spin_lock_init(&local->client_bundles_lock);
+ local->kill_all_client_conns = false;
+ INIT_LIST_HEAD(&local->idle_client_conns);
+ timer_setup(&local->client_conn_reap_timer,
+ rxrpc_client_conn_reap_timeout, 0);
+
spin_lock_init(&local->lock);
rwlock_init(&local->services_lock);
local->debug_id = atomic_inc_return(&rxrpc_debug_id);
memcpy(&local->srx, srx, sizeof(*srx));
local->srx.srx_service = 0;
+ idr_init(&local->conn_ids);
+ get_random_bytes(&tmp, sizeof(tmp));
+ tmp &= 0x3fffffff;
+ if (tmp == 0)
+ tmp = 1;
+ idr_set_cursor(&local->conn_ids, tmp);
+ INIT_LIST_HEAD(&local->new_client_calls);
+ spin_lock_init(&local->client_call_lock);
+
trace_rxrpc_local(local->debug_id, rxrpc_local_new, 1, 1);
}
@@ -248,7 +278,7 @@ struct rxrpc_local *rxrpc_lookup_local(struct net *net,
goto found;
}
- local = rxrpc_alloc_local(rxnet, srx);
+ local = rxrpc_alloc_local(net, srx);
if (!local)
goto nomem;
@@ -406,7 +436,11 @@ void rxrpc_destroy_local(struct rxrpc_local *local)
/* At this point, there should be no more packets coming in to the
* local endpoint.
*/
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ rxrpc_purge_queue(&local->rx_delay_queue);
+#endif
rxrpc_purge_queue(&local->rx_queue);
+ rxrpc_purge_client_connections(local);
}
/*
diff --git a/net/rxrpc/misc.c b/net/rxrpc/misc.c
index 056c428d8bf3..825b81183046 100644
--- a/net/rxrpc/misc.c
+++ b/net/rxrpc/misc.c
@@ -53,3 +53,10 @@ unsigned int rxrpc_rx_mtu = 5692;
* sender that we're willing to handle.
*/
unsigned int rxrpc_rx_jumbo_max = 4;
+
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+/*
+ * The delay to inject into packet reception.
+ */
+unsigned long rxrpc_inject_rx_delay;
+#endif
diff --git a/net/rxrpc/net_ns.c b/net/rxrpc/net_ns.c
index 5905530e2f33..a0319c040c25 100644
--- a/net/rxrpc/net_ns.c
+++ b/net/rxrpc/net_ns.c
@@ -10,15 +10,6 @@
unsigned int rxrpc_net_id;
-static void rxrpc_client_conn_reap_timeout(struct timer_list *timer)
-{
- struct rxrpc_net *rxnet =
- container_of(timer, struct rxrpc_net, client_conn_reap_timer);
-
- if (rxnet->live)
- rxrpc_queue_work(&rxnet->client_conn_reaper);
-}
-
static void rxrpc_service_conn_reap_timeout(struct timer_list *timer)
{
struct rxrpc_net *rxnet =
@@ -63,14 +54,6 @@ static __net_init int rxrpc_init_net(struct net *net)
rxrpc_service_conn_reap_timeout, 0);
atomic_set(&rxnet->nr_client_conns, 0);
- rxnet->kill_all_client_conns = false;
- spin_lock_init(&rxnet->client_conn_cache_lock);
- mutex_init(&rxnet->client_conn_discard_lock);
- INIT_LIST_HEAD(&rxnet->idle_client_conns);
- INIT_WORK(&rxnet->client_conn_reaper,
- rxrpc_discard_expired_client_conns);
- timer_setup(&rxnet->client_conn_reap_timer,
- rxrpc_client_conn_reap_timeout, 0);
INIT_HLIST_HEAD(&rxnet->local_endpoints);
mutex_init(&rxnet->local_mutex);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index 3d8c9f830ee0..5e53429c6922 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -80,62 +80,40 @@ static void rxrpc_set_keepalive(struct rxrpc_call *call)
*/
static size_t rxrpc_fill_out_ack(struct rxrpc_connection *conn,
struct rxrpc_call *call,
- struct rxrpc_txbuf *txb)
+ struct rxrpc_txbuf *txb,
+ u16 *_rwind)
{
struct rxrpc_ackinfo ackinfo;
- unsigned int qsize;
- rxrpc_seq_t window, wtop, wrap_point, ix, first;
+ unsigned int qsize, sack, wrap, to;
+ rxrpc_seq_t window, wtop;
int rsize;
- u64 wtmp;
u32 mtu, jmax;
u8 *ackp = txb->acks;
- u8 sack_buffer[sizeof(call->ackr_sack_table)] __aligned(8);
- atomic_set(&call->ackr_nr_unacked, 0);
+ call->ackr_nr_unacked = 0;
atomic_set(&call->ackr_nr_consumed, 0);
rxrpc_inc_stat(call->rxnet, stat_tx_ack_fill);
+ clear_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags);
- /* Barrier against rxrpc_input_data(). */
-retry:
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
+ window = call->ackr_window;
+ wtop = call->ackr_wtop;
+ sack = call->ackr_sack_base % RXRPC_SACK_SIZE;
txb->ack.firstPacket = htonl(window);
- txb->ack.nAcks = 0;
+ txb->ack.nAcks = wtop - window;
if (after(wtop, window)) {
- /* Try to copy the SACK ring locklessly. We can use the copy,
- * only if the now-current top of the window didn't go past the
- * previously read base - otherwise we can't know whether we
- * have old data or new data.
- */
- memcpy(sack_buffer, call->ackr_sack_table, sizeof(sack_buffer));
- wrap_point = window + RXRPC_SACK_SIZE - 1;
- wtmp = atomic64_read_acquire(&call->ackr_window);
- window = lower_32_bits(wtmp);
- wtop = upper_32_bits(wtmp);
- if (after(wtop, wrap_point)) {
- cond_resched();
- goto retry;
- }
-
- /* The buffer is maintained as a ring with an invariant mapping
- * between bit position and sequence number, so we'll probably
- * need to rotate it.
- */
- txb->ack.nAcks = wtop - window;
- ix = window % RXRPC_SACK_SIZE;
- first = sizeof(sack_buffer) - ix;
+ wrap = RXRPC_SACK_SIZE - sack;
+ to = min_t(unsigned int, txb->ack.nAcks, RXRPC_SACK_SIZE);
- if (ix + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
- memcpy(txb->acks, sack_buffer + ix, txb->ack.nAcks);
+ if (sack + txb->ack.nAcks <= RXRPC_SACK_SIZE) {
+ memcpy(txb->acks, call->ackr_sack_table + sack, txb->ack.nAcks);
} else {
- memcpy(txb->acks, sack_buffer + ix, first);
- memcpy(txb->acks + first, sack_buffer,
- txb->ack.nAcks - first);
+ memcpy(txb->acks, call->ackr_sack_table + sack, wrap);
+ memcpy(txb->acks + wrap, call->ackr_sack_table,
+ to - wrap);
}
- ackp += txb->ack.nAcks;
+ ackp += to;
} else if (before(wtop, window)) {
pr_warn("ack window backward %x %x", window, wtop);
} else if (txb->ack.reason == RXRPC_ACK_DELAY) {
@@ -147,6 +125,7 @@ retry:
jmax = rxrpc_rx_jumbo_max;
qsize = (window - 1) - call->rx_consumed;
rsize = max_t(int, call->rx_winsize - qsize, 0);
+ *_rwind = rsize;
ackinfo.rxMTU = htonl(rxrpc_rx_mtu);
ackinfo.maxMTU = htonl(mtu);
ackinfo.rwind = htonl(rsize);
@@ -213,6 +192,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
rxrpc_serial_t serial;
size_t len, n;
int ret, rtt_slot = -1;
+ u16 rwind;
if (test_bit(RXRPC_CALL_DISCONNECTED, &call->flags))
return -ECONNRESET;
@@ -228,7 +208,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
if (txb->ack.reason == RXRPC_ACK_PING)
txb->wire.flags |= RXRPC_REQUEST_ACK;
- n = rxrpc_fill_out_ack(conn, call, txb);
+ n = rxrpc_fill_out_ack(conn, call, txb, &rwind);
if (n == 0)
return 0;
@@ -240,7 +220,8 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
- ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks);
+ ntohl(txb->ack.serial), txb->ack.reason, txb->ack.nAcks,
+ rwind);
if (txb->ack.reason == RXRPC_ACK_PING)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_ping);
@@ -253,15 +234,18 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, len);
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
call->peer->last_tx_at = ktime_get_seconds();
- if (ret < 0)
+ if (ret < 0) {
trace_rxrpc_tx_fail(call->debug_id, serial, ret,
rxrpc_tx_point_call_ack);
- else
+ } else {
trace_rxrpc_tx_packet(call->debug_id, &txb->wire,
rxrpc_tx_point_call_ack);
+ if (txb->wire.flags & RXRPC_REQUEST_ACK)
+ call->peer->rtt_last_req = ktime_get_real();
+ }
rxrpc_tx_backoff(call, ret);
- if (call->state < RXRPC_CALL_COMPLETE) {
+ if (!__rxrpc_call_is_complete(call)) {
if (ret < 0)
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
rxrpc_set_keepalive(call);
@@ -429,8 +413,6 @@ dont_set_request_ack:
if (txb->len >= call->peer->maxdata)
goto send_fragmentable;
- down_read(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -445,7 +427,6 @@ dont_set_request_ack:
ret = do_udp_sendmsg(conn->local->socket, &msg, len);
conn->peer->last_tx_at = ktime_get_seconds();
- up_read(&conn->local->defrag_sem);
if (ret < 0) {
rxrpc_inc_stat(call->rxnet, stat_tx_data_send_fail);
rxrpc_cancel_rtt_probe(call, serial, rtt_slot);
@@ -506,8 +487,6 @@ send_fragmentable:
/* attempt to send this message with fragmentation enabled */
_debug("send fragment");
- down_write(&conn->local->defrag_sem);
-
txb->last_sent = ktime_get_real();
if (txb->wire.flags & RXRPC_REQUEST_ACK)
rtt_slot = rxrpc_begin_rtt_probe(call, serial, rxrpc_rtt_tx_data);
@@ -539,12 +518,66 @@ send_fragmentable:
rxrpc_tx_point_call_data_frag);
}
rxrpc_tx_backoff(call, ret);
-
- up_write(&conn->local->defrag_sem);
goto done;
}
/*
+ * Transmit a connection-level abort.
+ */
+void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
+{
+ struct rxrpc_wire_header whdr;
+ struct msghdr msg;
+ struct kvec iov[2];
+ __be32 word;
+ size_t len;
+ u32 serial;
+ int ret;
+
+ msg.msg_name = &conn->peer->srx.transport;
+ msg.msg_namelen = conn->peer->srx.transport_len;
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ msg.msg_flags = 0;
+
+ whdr.epoch = htonl(conn->proto.epoch);
+ whdr.cid = htonl(conn->proto.cid);
+ whdr.callNumber = 0;
+ whdr.seq = 0;
+ whdr.type = RXRPC_PACKET_TYPE_ABORT;
+ whdr.flags = conn->out_clientflag;
+ whdr.userStatus = 0;
+ whdr.securityIndex = conn->security_ix;
+ whdr._rsvd = 0;
+ whdr.serviceId = htons(conn->service_id);
+
+ word = htonl(conn->abort_code);
+
+ iov[0].iov_base = &whdr;
+ iov[0].iov_len = sizeof(whdr);
+ iov[1].iov_base = &word;
+ iov[1].iov_len = sizeof(word);
+
+ len = iov[0].iov_len + iov[1].iov_len;
+
+ serial = atomic_inc_return(&conn->serial);
+ whdr.serial = htonl(serial);
+
+ iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
+ ret = do_udp_sendmsg(conn->local->socket, &msg, len);
+ if (ret < 0) {
+ trace_rxrpc_tx_fail(conn->debug_id, serial, ret,
+ rxrpc_tx_point_conn_abort);
+ _debug("sendmsg failed: %d", ret);
+ return;
+ }
+
+ trace_rxrpc_tx_packet(conn->debug_id, &whdr, rxrpc_tx_point_conn_abort);
+
+ conn->peer->last_tx_at = ktime_get_seconds();
+}
+
+/*
* Reject a packet through the local endpoint.
*/
void rxrpc_reject_packet(struct rxrpc_local *local, struct sk_buff *skb)
@@ -667,7 +700,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *peer)
static inline void rxrpc_instant_resend(struct rxrpc_call *call,
struct rxrpc_txbuf *txb)
{
- if (call->state < RXRPC_CALL_COMPLETE)
+ if (!__rxrpc_call_is_complete(call))
kdebug("resend");
}
diff --git a/net/rxrpc/peer_object.c b/net/rxrpc/peer_object.c
index 4eecea2be307..8d7a715a0bb1 100644
--- a/net/rxrpc/peer_object.c
+++ b/net/rxrpc/peer_object.c
@@ -147,10 +147,10 @@ struct rxrpc_peer *rxrpc_lookup_peer_rcu(struct rxrpc_local *local,
* assess the MTU size for the network interface through which this peer is
* reached
*/
-static void rxrpc_assess_MTU_size(struct rxrpc_sock *rx,
+static void rxrpc_assess_MTU_size(struct rxrpc_local *local,
struct rxrpc_peer *peer)
{
- struct net *net = sock_net(&rx->sk);
+ struct net *net = local->net;
struct dst_entry *dst;
struct rtable *rt;
struct flowi fl;
@@ -236,11 +236,11 @@ struct rxrpc_peer *rxrpc_alloc_peer(struct rxrpc_local *local, gfp_t gfp,
/*
* Initialise peer record.
*/
-static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
+static void rxrpc_init_peer(struct rxrpc_local *local, struct rxrpc_peer *peer,
unsigned long hash_key)
{
peer->hash_key = hash_key;
- rxrpc_assess_MTU_size(rx, peer);
+ rxrpc_assess_MTU_size(local, peer);
peer->mtu = peer->if_mtu;
peer->rtt_last_req = ktime_get_real();
@@ -272,8 +272,7 @@ static void rxrpc_init_peer(struct rxrpc_sock *rx, struct rxrpc_peer *peer,
/*
* Set up a new peer.
*/
-static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx,
unsigned long hash_key,
gfp_t gfp)
@@ -285,7 +284,7 @@ static struct rxrpc_peer *rxrpc_create_peer(struct rxrpc_sock *rx,
peer = rxrpc_alloc_peer(local, gfp, rxrpc_peer_new_client);
if (peer) {
memcpy(&peer->srx, srx, sizeof(*srx));
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
}
_leave(" = %p", peer);
@@ -304,14 +303,13 @@ static void rxrpc_free_peer(struct rxrpc_peer *peer)
* since we've already done a search in the list from the non-reentrant context
* (the data_ready handler) that is the only place we can add new peers.
*/
-void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
- struct rxrpc_peer *peer)
+void rxrpc_new_incoming_peer(struct rxrpc_local *local, struct rxrpc_peer *peer)
{
struct rxrpc_net *rxnet = local->rxnet;
unsigned long hash_key;
hash_key = rxrpc_peer_hash_key(local, &peer->srx);
- rxrpc_init_peer(rx, peer, hash_key);
+ rxrpc_init_peer(local, peer, hash_key);
spin_lock(&rxnet->peer_hash_lock);
hash_add_rcu(rxnet->peer_hash, &peer->hash_link, hash_key);
@@ -322,8 +320,7 @@ void rxrpc_new_incoming_peer(struct rxrpc_sock *rx, struct rxrpc_local *local,
/*
* obtain a remote transport endpoint for the specified address
*/
-struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
- struct rxrpc_local *local,
+struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_local *local,
struct sockaddr_rxrpc *srx, gfp_t gfp)
{
struct rxrpc_peer *peer, *candidate;
@@ -343,7 +340,7 @@ struct rxrpc_peer *rxrpc_lookup_peer(struct rxrpc_sock *rx,
/* The peer is not yet present in hash - create a candidate
* for a new record and then redo the search.
*/
- candidate = rxrpc_create_peer(rx, local, srx, hash_key, gfp);
+ candidate = rxrpc_create_peer(local, srx, hash_key, gfp);
if (!candidate) {
_leave(" = NULL [nomem]");
return NULL;
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 3a59591ec061..682636d3b060 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -12,13 +12,13 @@
static const char *const rxrpc_conn_states[RXRPC_CONN__NR_STATES] = {
[RXRPC_CONN_UNUSED] = "Unused ",
+ [RXRPC_CONN_CLIENT_UNSECURED] = "ClUnsec ",
[RXRPC_CONN_CLIENT] = "Client ",
[RXRPC_CONN_SERVICE_PREALLOC] = "SvPrealc",
[RXRPC_CONN_SERVICE_UNSECURED] = "SvUnsec ",
[RXRPC_CONN_SERVICE_CHALLENGING] = "SvChall ",
[RXRPC_CONN_SERVICE] = "SvSecure",
- [RXRPC_CONN_REMOTELY_ABORTED] = "RmtAbort",
- [RXRPC_CONN_LOCALLY_ABORTED] = "LocAbort",
+ [RXRPC_CONN_ABORTED] = "Aborted ",
};
/*
@@ -51,10 +51,10 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
struct rxrpc_local *local;
struct rxrpc_call *call;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ enum rxrpc_call_state state;
unsigned long timeout = 0;
rxrpc_seq_t acks_hard_ack;
char lbuff[50], rbuff[50];
- u64 wtmp;
if (v == &rxnet->calls) {
seq_puts(seq,
@@ -75,13 +75,13 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
sprintf(rbuff, "%pISpc", &call->dest_srx.transport);
- if (call->state != RXRPC_CALL_SERVER_PREALLOC) {
+ state = rxrpc_call_state(call);
+ if (state != RXRPC_CALL_SERVER_PREALLOC) {
timeout = READ_ONCE(call->expect_rx_by);
timeout -= jiffies;
}
acks_hard_ack = READ_ONCE(call->acks_hard_ack);
- wtmp = atomic64_read_acquire(&call->ackr_window);
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %08x %s %3u"
" %-8.8s %08x %08x %08x %02x %08x %02x %08x %02x %06lx\n",
@@ -92,11 +92,11 @@ static int rxrpc_call_seq_show(struct seq_file *seq, void *v)
call->call_id,
rxrpc_is_service_call(call) ? "Svc" : "Clt",
refcount_read(&call->ref),
- rxrpc_call_states[call->state],
+ rxrpc_call_states[state],
call->abort_code,
call->debug_id,
acks_hard_ack, READ_ONCE(call->tx_top) - acks_hard_ack,
- lower_32_bits(wtmp), upper_32_bits(wtmp) - lower_32_bits(wtmp),
+ call->ackr_window, call->ackr_wtop - call->ackr_window,
call->rx_serial,
call->cong_cwnd,
timeout);
@@ -143,6 +143,7 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
{
struct rxrpc_connection *conn;
struct rxrpc_net *rxnet = rxrpc_net(seq_file_net(seq));
+ const char *state;
char lbuff[50], rbuff[50];
if (v == &rxnet->conn_proc_list) {
@@ -163,9 +164,11 @@ static int rxrpc_connection_seq_show(struct seq_file *seq, void *v)
}
sprintf(lbuff, "%pISpc", &conn->local->srx.transport);
-
sprintf(rbuff, "%pISpc", &conn->peer->srx.transport);
print:
+ state = rxrpc_is_conn_aborted(conn) ?
+ rxrpc_call_completions[conn->completion] :
+ rxrpc_conn_states[conn->state];
seq_printf(seq,
"UDP %-47.47s %-47.47s %4x %08x %s %3u %3d"
" %s %08x %08x %08x %08x %08x %08x %08x\n",
@@ -176,7 +179,7 @@ print:
rxrpc_conn_is_service(conn) ? "Svc" : "Clt",
refcount_read(&conn->ref),
atomic_read(&conn->active),
- rxrpc_conn_states[conn->state],
+ state,
key_serial(conn->key),
atomic_read(&conn->serial),
conn->hi_serial,
diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c
index 36b25d003cf0..a482f88c5fc5 100644
--- a/net/rxrpc/recvmsg.c
+++ b/net/rxrpc/recvmsg.c
@@ -40,12 +40,12 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
call->notify_rx(sk, call, call->user_call_ID);
spin_unlock(&call->notify_lock);
} else {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
if (list_empty(&call->recvmsg_link)) {
rxrpc_get_call(call, rxrpc_call_get_notify_socket);
list_add_tail(&call->recvmsg_link, &rx->recvmsg_q);
}
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
if (!sock_flag(sk, SOCK_DEAD)) {
_debug("call %ps", sk->sk_data_ready);
@@ -59,85 +59,6 @@ void rxrpc_notify_socket(struct rxrpc_call *call)
}
/*
- * Transition a call to the complete state.
- */
-bool __rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- if (call->state < RXRPC_CALL_COMPLETE) {
- call->abort_code = abort_code;
- call->error = error;
- call->completion = compl;
- call->state = RXRPC_CALL_COMPLETE;
- trace_rxrpc_call_complete(call);
- wake_up(&call->waitq);
- rxrpc_notify_socket(call);
- return true;
- }
- return false;
-}
-
-bool rxrpc_set_call_completion(struct rxrpc_call *call,
- enum rxrpc_call_completion compl,
- u32 abort_code,
- int error)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_set_call_completion(call, compl, abort_code, error);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call successfully completed.
- */
-bool __rxrpc_call_completed(struct rxrpc_call *call)
-{
- return __rxrpc_set_call_completion(call, RXRPC_CALL_SUCCEEDED, 0, 0);
-}
-
-bool rxrpc_call_completed(struct rxrpc_call *call)
-{
- bool ret = false;
-
- if (call->state < RXRPC_CALL_COMPLETE) {
- write_lock(&call->state_lock);
- ret = __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- }
- return ret;
-}
-
-/*
- * Record that a call is locally aborted.
- */
-bool __rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- trace_rxrpc_abort(call->debug_id, why, call->cid, call->call_id, seq,
- abort_code, error);
- return __rxrpc_set_call_completion(call, RXRPC_CALL_LOCALLY_ABORTED,
- abort_code, error);
-}
-
-bool rxrpc_abort_call(const char *why, struct rxrpc_call *call,
- rxrpc_seq_t seq, u32 abort_code, int error)
-{
- bool ret;
-
- write_lock(&call->state_lock);
- ret = __rxrpc_abort_call(why, call, seq, abort_code, error);
- write_unlock(&call->state_lock);
- return ret;
-}
-
-/*
* Pass a call terminating message to userspace.
*/
static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
@@ -168,53 +89,18 @@ static int rxrpc_recvmsg_term(struct rxrpc_call *call, struct msghdr *msg)
ret = put_cmsg(msg, SOL_RXRPC, RXRPC_LOCAL_ERROR, 4, &tmp);
break;
default:
- pr_err("Invalid terminal call state %u\n", call->state);
+ pr_err("Invalid terminal call state %u\n", call->completion);
BUG();
break;
}
trace_rxrpc_recvdata(call, rxrpc_recvmsg_terminal,
- lower_32_bits(atomic64_read(&call->ackr_window)) - 1,
+ call->ackr_window - 1,
call->rx_pkt_offset, call->rx_pkt_len, ret);
return ret;
}
/*
- * End the packet reception phase.
- */
-static void rxrpc_end_rx_phase(struct rxrpc_call *call, rxrpc_serial_t serial)
-{
- rxrpc_seq_t whigh = READ_ONCE(call->rx_highest_seq);
-
- _enter("%d,%s", call->debug_id, rxrpc_call_states[call->state]);
-
- trace_rxrpc_receive(call, rxrpc_receive_end, 0, whigh);
-
- if (call->state == RXRPC_CALL_CLIENT_RECV_REPLY)
- rxrpc_propose_delay_ACK(call, serial, rxrpc_propose_ack_terminal_ack);
-
- write_lock(&call->state_lock);
-
- switch (call->state) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- __rxrpc_call_completed(call);
- write_unlock(&call->state_lock);
- break;
-
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- call->state = RXRPC_CALL_SERVER_ACK_REQUEST;
- call->expect_req_by = jiffies + MAX_JIFFY_OFFSET;
- write_unlock(&call->state_lock);
- rxrpc_propose_delay_ACK(call, serial,
- rxrpc_propose_ack_processing_op);
- break;
- default:
- write_unlock(&call->state_lock);
- break;
- }
-}
-
-/*
* Discard a packet we've used up and advance the Rx window by one.
*/
static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
@@ -244,15 +130,14 @@ static void rxrpc_rotate_rx_window(struct rxrpc_call *call)
trace_rxrpc_receive(call, last ? rxrpc_receive_rotate_last : rxrpc_receive_rotate,
serial, call->rx_consumed);
- if (last) {
- rxrpc_end_rx_phase(call, serial);
- return;
- }
+
+ if (last)
+ set_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags);
/* Check to see if there's an ACK that needs sending. */
acked = atomic_add_return(call->rx_consumed - old_consumed,
&call->ackr_nr_consumed);
- if (acked > 2 &&
+ if (acked > 8 &&
!test_and_set_bit(RXRPC_CALL_RX_IS_IDLE, &call->flags))
rxrpc_poke_call(call, rxrpc_call_poke_idle);
}
@@ -272,7 +157,8 @@ static int rxrpc_verify_data(struct rxrpc_call *call, struct sk_buff *skb)
/*
* Deliver messages to a call. This keeps processing packets until the buffer
* is filled and we find either more DATA (returns 0) or the end of the DATA
- * (returns 1). If more packets are required, it returns -EAGAIN.
+ * (returns 1). If more packets are required, it returns -EAGAIN and if the
+ * call has failed it returns -EIO.
*/
static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
struct msghdr *msg, struct iov_iter *iter,
@@ -288,8 +174,14 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
rx_pkt_offset = call->rx_pkt_offset;
rx_pkt_len = call->rx_pkt_len;
- if (call->state >= RXRPC_CALL_SERVER_ACK_REQUEST) {
- seq = lower_32_bits(atomic64_read(&call->ackr_window)) - 1;
+ if (rxrpc_call_has_failed(call)) {
+ seq = call->ackr_window - 1;
+ ret = -EIO;
+ goto done;
+ }
+
+ if (test_bit(RXRPC_CALL_RECVMSG_READ_ALL, &call->flags)) {
+ seq = call->ackr_window - 1;
ret = 1;
goto done;
}
@@ -312,14 +204,15 @@ static int rxrpc_recvmsg_data(struct socket *sock, struct rxrpc_call *call,
if (rx_pkt_offset == 0) {
ret2 = rxrpc_verify_data(call, skb);
- rx_pkt_offset = sp->offset;
- rx_pkt_len = sp->len;
trace_rxrpc_recvdata(call, rxrpc_recvmsg_next, seq,
- rx_pkt_offset, rx_pkt_len, ret2);
+ sp->offset, sp->len, ret2);
if (ret2 < 0) {
+ kdebug("verify = %d", ret2);
ret = ret2;
goto out;
}
+ rx_pkt_offset = sp->offset;
+ rx_pkt_len = sp->len;
} else {
trace_rxrpc_recvdata(call, rxrpc_recvmsg_cont, seq,
rx_pkt_offset, rx_pkt_len, 0);
@@ -388,13 +281,14 @@ int rxrpc_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
struct rxrpc_call *call;
struct rxrpc_sock *rx = rxrpc_sk(sock->sk);
struct list_head *l;
+ unsigned int call_debug_id = 0;
size_t copied = 0;
long timeo;
int ret;
DEFINE_WAIT(wait);
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_enter, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_enter, 0);
if (flags & (MSG_OOB | MSG_TRUNC))
return -EOPNOTSUPP;
@@ -431,7 +325,7 @@ try_again:
if (list_empty(&rx->recvmsg_q)) {
if (signal_pending(current))
goto wait_interrupted;
- trace_rxrpc_recvmsg(NULL, rxrpc_recvmsg_wait, 0);
+ trace_rxrpc_recvmsg(0, rxrpc_recvmsg_wait, 0);
timeo = schedule_timeout(timeo);
}
finish_wait(sk_sleep(&rx->sk), &wait);
@@ -440,17 +334,31 @@ try_again:
/* Find the next call and dequeue it if we're not just peeking. If we
* do dequeue it, that comes with a ref that we will need to release.
+ * We also want to weed out calls that got requeued whilst we were
+ * shovelling data out.
*/
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
l = rx->recvmsg_q.next;
call = list_entry(l, struct rxrpc_call, recvmsg_link);
+
+ if (!rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue)) {
+ list_del_init(&call->recvmsg_link);
+ spin_unlock(&rx->recvmsg_lock);
+ release_sock(&rx->sk);
+ trace_rxrpc_recvmsg(call->debug_id, rxrpc_recvmsg_unqueue, 0);
+ rxrpc_put_call(call, rxrpc_call_put_recvmsg);
+ goto try_again;
+ }
+
if (!(flags & MSG_PEEK))
list_del_init(&call->recvmsg_link);
else
rxrpc_get_call(call, rxrpc_call_get_recvmsg);
- write_unlock(&rx->recvmsg_lock);
+ spin_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_dequeue, 0);
+ call_debug_id = call->debug_id;
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_dequeue, 0);
/* We're going to drop the socket lock, so we need to lock the call
* against interference by sendmsg.
@@ -492,36 +400,37 @@ try_again:
msg->msg_namelen = len;
}
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
- flags, &copied);
- if (ret == -EAGAIN)
- ret = 0;
-
- if (!skb_queue_empty(&call->recvmsg_queue))
- rxrpc_notify_socket(call);
- break;
- default:
+ ret = rxrpc_recvmsg_data(sock, call, msg, &msg->msg_iter, len,
+ flags, &copied);
+ if (ret == -EAGAIN)
ret = 0;
- break;
- }
-
+ if (ret == -EIO)
+ goto call_failed;
if (ret < 0)
goto error_unlock_call;
- if (call->state == RXRPC_CALL_COMPLETE) {
- ret = rxrpc_recvmsg_term(call, msg);
- if (ret < 0)
- goto error_unlock_call;
- if (!(flags & MSG_PEEK))
- rxrpc_release_call(rx, call);
- msg->msg_flags |= MSG_EOR;
- ret = 1;
- }
+ if (rxrpc_call_is_complete(call) &&
+ skb_queue_empty(&call->recvmsg_queue))
+ goto call_complete;
+ if (rxrpc_call_has_failed(call))
+ goto call_failed;
+
+ if (!skb_queue_empty(&call->recvmsg_queue))
+ rxrpc_notify_socket(call);
+ goto not_yet_complete;
+call_failed:
+ rxrpc_purge_queue(&call->recvmsg_queue);
+call_complete:
+ ret = rxrpc_recvmsg_term(call, msg);
+ if (ret < 0)
+ goto error_unlock_call;
+ if (!(flags & MSG_PEEK))
+ rxrpc_release_call(rx, call);
+ msg->msg_flags |= MSG_EOR;
+ ret = 1;
+
+not_yet_complete:
if (ret == 0)
msg->msg_flags |= MSG_MORE;
else
@@ -531,22 +440,22 @@ try_again:
error_unlock_call:
mutex_unlock(&call->user_mutex);
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
error_requeue_call:
if (!(flags & MSG_PEEK)) {
- write_lock(&rx->recvmsg_lock);
+ spin_lock(&rx->recvmsg_lock);
list_add(&call->recvmsg_link, &rx->recvmsg_q);
- write_unlock(&rx->recvmsg_lock);
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_requeue, 0);
+ spin_unlock(&rx->recvmsg_lock);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_requeue, 0);
} else {
rxrpc_put_call(call, rxrpc_call_put_recvmsg);
}
error_no_call:
release_sock(&rx->sk);
error_trace:
- trace_rxrpc_recvmsg(call, rxrpc_recvmsg_return, ret);
+ trace_rxrpc_recvmsg(call_debug_id, rxrpc_recvmsg_return, ret);
return ret;
wait_interrupted:
@@ -584,49 +493,34 @@ int rxrpc_kernel_recv_data(struct socket *sock, struct rxrpc_call *call,
size_t offset = 0;
int ret;
- _enter("{%d,%s},%zu,%d",
- call->debug_id, rxrpc_call_states[call->state],
- *_len, want_more);
-
- ASSERTCMP(call->state, !=, RXRPC_CALL_SERVER_SECURING);
+ _enter("{%d},%zu,%d", call->debug_id, *_len, want_more);
mutex_lock(&call->user_mutex);
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_RECV_REPLY:
- case RXRPC_CALL_SERVER_RECV_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- ret = rxrpc_recvmsg_data(sock, call, NULL, iter,
- *_len, 0, &offset);
- *_len -= offset;
- if (ret < 0)
- goto out;
-
- /* We can only reach here with a partially full buffer if we
- * have reached the end of the data. We must otherwise have a
- * full buffer or have been given -EAGAIN.
- */
- if (ret == 1) {
- if (iov_iter_count(iter) > 0)
- goto short_data;
- if (!want_more)
- goto read_phase_complete;
- ret = 0;
- goto out;
- }
-
- if (!want_more)
- goto excess_data;
+ ret = rxrpc_recvmsg_data(sock, call, NULL, iter, *_len, 0, &offset);
+ *_len -= offset;
+ if (ret == -EIO)
+ goto call_failed;
+ if (ret < 0)
goto out;
- case RXRPC_CALL_COMPLETE:
- goto call_complete;
-
- default:
- ret = -EINPROGRESS;
+ /* We can only reach here with a partially full buffer if we have
+ * reached the end of the data. We must otherwise have a full buffer
+ * or have been given -EAGAIN.
+ */
+ if (ret == 1) {
+ if (iov_iter_count(iter) > 0)
+ goto short_data;
+ if (!want_more)
+ goto read_phase_complete;
+ ret = 0;
goto out;
}
+ if (!want_more)
+ goto excess_data;
+ goto out;
+
read_phase_complete:
ret = 1;
out:
@@ -637,14 +531,18 @@ out:
return ret;
short_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("short_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_short_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EBADMSG);
ret = -EBADMSG;
goto out;
excess_data:
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("excess_data"));
+ trace_rxrpc_abort(call->debug_id, rxrpc_recvmsg_excess_data,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EMSGSIZE);
ret = -EMSGSIZE;
goto out;
-call_complete:
+call_failed:
*_abort = call->abort_code;
ret = call->error;
if (call->completion == RXRPC_CALL_SUCCEEDED) {
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index d1233720e05f..1bf571a66e02 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -411,18 +411,15 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist sg[16];
- bool aborted;
u32 data_size, buf;
u16 check;
int ret;
_enter("");
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_hdr", "V1H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -442,11 +439,9 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
skcipher_request_zero(req);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_len", "XV1",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_encdata);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -456,26 +451,16 @@ static int rxkad_verify_packet_1(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_check", "V1C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
-
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_1_datalen", "V1L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_1_short_check);
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_1_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -490,18 +475,15 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv;
struct scatterlist _sg[4], *sg;
- bool aborted;
u32 data_size, buf;
u16 check;
int nsg, ret;
_enter(",{%d}", sp->len);
- if (sp->len < 8) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_hdr", "V2H",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (sp->len < 8)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_header);
/* Decrypt the skbuff in-place. TODO: We really want to decrypt
* directly into the target buffer.
@@ -513,7 +495,7 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
} else {
sg = kmalloc_array(nsg, sizeof(*sg), GFP_NOIO);
if (!sg)
- goto nomem;
+ return -ENOMEM;
}
sg_init_table(sg, nsg);
@@ -537,11 +519,9 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
kfree(sg);
/* Extract the decrypted packet length */
- if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_len", "XV2",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (skb_copy_bits(skb, sp->offset, &sechdr, sizeof(sechdr)) < 0)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_len);
sp->offset += sizeof(sechdr);
sp->len -= sizeof(sechdr);
@@ -551,30 +531,17 @@ static int rxkad_verify_packet_2(struct rxrpc_call *call, struct sk_buff *skb,
check = buf >> 16;
check ^= seq ^ call->call_id;
check &= 0xffff;
- if (check != 0) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_check", "V2C",
- RXKADSEALEDINCON);
- goto protocol_error;
- }
+ if (check != 0)
+ return rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_2_short_check);
- if (data_size > sp->len) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_2_datalen", "V2L",
- RXKADDATALEN);
- goto protocol_error;
- }
+ if (data_size > sp->len)
+ return rxrpc_abort_eproto(call, skb, RXKADDATALEN,
+ rxkad_abort_2_short_data);
sp->len = data_size;
_leave(" = 0 [dlen=%x]", data_size);
return 0;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
-
-nomem:
- _leave(" = -ENOMEM");
- return -ENOMEM;
}
/*
@@ -590,7 +557,6 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
__be32 buf[2];
} crypto __aligned(8);
rxrpc_seq_t seq = sp->hdr.seq;
- bool aborted;
int ret;
u16 cksum;
u32 x, y;
@@ -627,9 +593,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
cksum = 1; /* zero checksums are not permitted */
if (cksum != sp->hdr.cksum) {
- aborted = rxrpc_abort_eproto(call, skb, "rxkad_csum", "VCK",
- RXKADSEALEDINCON);
- goto protocol_error;
+ ret = rxrpc_abort_eproto(call, skb, RXKADSEALEDINCON,
+ rxkad_abort_bad_checksum);
+ goto out;
}
switch (call->conn->security_level) {
@@ -647,13 +613,9 @@ static int rxkad_verify_packet(struct rxrpc_call *call, struct sk_buff *skb)
break;
}
+out:
skcipher_request_free(req);
return ret;
-
-protocol_error:
- if (aborted)
- rxrpc_send_abort_packet(call);
- return -EPROTO;
}
/*
@@ -821,34 +783,30 @@ static int rxkad_encrypt_response(struct rxrpc_connection *conn,
* respond to a challenge packet
*/
static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
const struct rxrpc_key_token *token;
struct rxkad_challenge challenge;
struct rxkad_response *resp;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
- const char *eproto;
- u32 version, nonce, min_level, abort_code;
- int ret;
+ u32 version, nonce, min_level;
+ int ret = -EPROTO;
_enter("{%d,%x}", conn->debug_id, key_serial(conn->key));
- eproto = tracepoint_string("chall_no_key");
- abort_code = RX_PROTOCOL_ERROR;
if (!conn->key)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RX_PROTOCOL_ERROR, -EPROTO,
+ rxkad_abort_chall_no_key);
- abort_code = RXKADEXPIRED;
ret = key_validate(conn->key);
if (ret < 0)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_chall_key_expired);
- eproto = tracepoint_string("chall_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
&challenge, sizeof(challenge)) < 0)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_chall_short);
version = ntohl(challenge.version);
nonce = ntohl(challenge.nonce);
@@ -856,15 +814,13 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
trace_rxrpc_rx_challenge(conn, sp->hdr.serial, version, nonce, min_level);
- eproto = tracepoint_string("chall_ver");
- abort_code = RXKADINCONSISTENCY;
if (version != RXKAD_VERSION)
- goto protocol_error;
+ return rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_chall_version);
- abort_code = RXKADLEVELFAIL;
- ret = -EACCES;
if (conn->security_level < min_level)
- goto other_error;
+ return rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EACCES,
+ rxkad_abort_chall_level);
token = conn->key->payload.data[0];
@@ -893,13 +849,6 @@ static int rxkad_respond_to_challenge(struct rxrpc_connection *conn,
ret = rxkad_send_response(conn, &sp->hdr, resp, token->kad);
kfree(resp);
return ret;
-
-protocol_error:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
}
/*
@@ -910,20 +859,15 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
struct sk_buff *skb,
void *ticket, size_t ticket_len,
struct rxrpc_crypt *_session_key,
- time64_t *_expiry,
- u32 *_abort_code)
+ time64_t *_expiry)
{
struct skcipher_request *req;
- struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt iv, key;
struct scatterlist sg[1];
struct in_addr addr;
unsigned int life;
- const char *eproto;
time64_t issue, now;
bool little_endian;
- int ret;
- u32 abort_code;
u8 *p, *q, *name, *end;
_enter("{%d},{%x}", conn->debug_id, key_serial(server_key));
@@ -935,10 +879,9 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
memcpy(&iv, &server_key->payload.data[2], sizeof(iv));
- ret = -ENOMEM;
req = skcipher_request_alloc(server_key->payload.data[0], GFP_NOFS);
if (!req)
- goto temporary_error;
+ return -ENOMEM;
sg_init_one(&sg[0], ticket, ticket_len);
skcipher_request_set_callback(req, 0, NULL, NULL);
@@ -949,18 +892,21 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p = ticket;
end = p + ticket_len;
-#define Z(field) \
- ({ \
- u8 *__str = p; \
- eproto = tracepoint_string("rxkad_bad_"#field); \
- q = memchr(p, 0, end - p); \
- if (!q || q - p > (field##_SZ)) \
- goto bad_ticket; \
- for (; p < q; p++) \
- if (!isprint(*p)) \
- goto bad_ticket; \
- p++; \
- __str; \
+#define Z(field, fieldl) \
+ ({ \
+ u8 *__str = p; \
+ q = memchr(p, 0, end - p); \
+ if (!q || q - p > field##_SZ) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ for (; p < q; p++) \
+ if (!isprint(*p)) \
+ return rxrpc_abort_conn( \
+ conn, skb, RXKADBADTICKET, -EPROTO, \
+ rxkad_abort_resp_tkt_##fieldl); \
+ p++; \
+ __str; \
})
/* extract the ticket flags */
@@ -969,20 +915,20 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
p++;
/* extract the authentication name */
- name = Z(ANAME);
+ name = Z(ANAME, aname);
_debug("KIV ANAME: %s", name);
/* extract the principal's instance */
- name = Z(INST);
+ name = Z(INST, inst);
_debug("KIV INST : %s", name);
/* extract the principal's authentication domain */
- name = Z(REALM);
+ name = Z(REALM, realm);
_debug("KIV REALM: %s", name);
- eproto = tracepoint_string("rxkad_bad_len");
if (end - p < 4 + 8 + 4 + 2)
- goto bad_ticket;
+ return rxrpc_abort_conn(conn, skb, RXKADBADTICKET, -EPROTO,
+ rxkad_abort_resp_tkt_short);
/* get the IPv4 address of the entity that requested the ticket */
memcpy(&addr, p, sizeof(addr));
@@ -1014,38 +960,23 @@ static int rxkad_decrypt_ticket(struct rxrpc_connection *conn,
_debug("KIV ISSUE: %llx [%llx]", issue, now);
/* check the ticket is in date */
- if (issue > now) {
- abort_code = RXKADNOAUTH;
- ret = -EKEYREJECTED;
- goto other_error;
- }
-
- if (issue < now - life) {
- abort_code = RXKADEXPIRED;
- ret = -EKEYEXPIRED;
- goto other_error;
- }
+ if (issue > now)
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, -EKEYREJECTED,
+ rxkad_abort_resp_tkt_future);
+ if (issue < now - life)
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, -EKEYEXPIRED,
+ rxkad_abort_resp_tkt_expired);
*_expiry = issue + life;
/* get the service name */
- name = Z(SNAME);
+ name = Z(SNAME, sname);
_debug("KIV SNAME: %s", name);
/* get the service instance name */
- name = Z(INST);
+ name = Z(INST, sinst);
_debug("KIV SINST: %s", name);
return 0;
-
-bad_ticket:
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
- abort_code = RXKADBADTICKET;
- ret = -EPROTO;
-other_error:
- *_abort_code = abort_code;
- return ret;
-temporary_error:
- return ret;
}
/*
@@ -1086,17 +1017,15 @@ static void rxkad_decrypt_response(struct rxrpc_connection *conn,
* verify a response
*/
static int rxkad_verify_response(struct rxrpc_connection *conn,
- struct sk_buff *skb,
- u32 *_abort_code)
+ struct sk_buff *skb)
{
struct rxkad_response *response;
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_crypt session_key;
struct key *server_key;
- const char *eproto;
time64_t expiry;
void *ticket;
- u32 abort_code, version, kvno, ticket_len, level;
+ u32 version, kvno, ticket_len, level;
__be32 csum;
int ret, i;
@@ -1104,22 +1033,18 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
server_key = rxrpc_look_up_server_security(conn, skb, 0, 0);
if (IS_ERR(server_key)) {
- switch (PTR_ERR(server_key)) {
+ ret = PTR_ERR(server_key);
+ switch (ret) {
case -ENOKEY:
- abort_code = RXKADUNKNOWNKEY;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, ret,
+ rxkad_abort_resp_nokey);
case -EKEYEXPIRED:
- abort_code = RXKADEXPIRED;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADEXPIRED, ret,
+ rxkad_abort_resp_key_expired);
default:
- abort_code = RXKADNOAUTH;
- break;
+ return rxrpc_abort_conn(conn, skb, RXKADNOAUTH, ret,
+ rxkad_abort_resp_key_rejected);
}
- trace_rxrpc_abort(0, "SVK",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- abort_code, PTR_ERR(server_key));
- *_abort_code = abort_code;
- return -EPROTO;
}
ret = -ENOMEM;
@@ -1127,11 +1052,12 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!response)
goto temporary_error;
- eproto = tracepoint_string("rxkad_rsp_short");
- abort_code = RXKADPACKETSHORT;
if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
- response, sizeof(*response)) < 0)
+ response, sizeof(*response)) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short);
goto protocol_error;
+ }
version = ntohl(response->version);
ticket_len = ntohl(response->ticket_len);
@@ -1139,20 +1065,23 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
trace_rxrpc_rx_response(conn, sp->hdr.serial, version, kvno, ticket_len);
- eproto = tracepoint_string("rxkad_rsp_ver");
- abort_code = RXKADINCONSISTENCY;
- if (version != RXKAD_VERSION)
+ if (version != RXKAD_VERSION) {
+ rxrpc_abort_conn(conn, skb, RXKADINCONSISTENCY, -EPROTO,
+ rxkad_abort_resp_version);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_tktlen");
- abort_code = RXKADTICKETLEN;
- if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN)
+ if (ticket_len < 4 || ticket_len > MAXKRB5TICKETLEN) {
+ rxrpc_abort_conn(conn, skb, RXKADTICKETLEN, -EPROTO,
+ rxkad_abort_resp_tkt_len);
goto protocol_error;
+ }
- eproto = tracepoint_string("rxkad_rsp_unkkey");
- abort_code = RXKADUNKNOWNKEY;
- if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5)
+ if (kvno >= RXKAD_TKT_TYPE_KERBEROS_V5) {
+ rxrpc_abort_conn(conn, skb, RXKADUNKNOWNKEY, -EPROTO,
+ rxkad_abort_resp_unknown_tkt);
goto protocol_error;
+ }
/* extract the kerberos ticket and decrypt and decode it */
ret = -ENOMEM;
@@ -1160,15 +1089,15 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
if (!ticket)
goto temporary_error_free_resp;
- eproto = tracepoint_string("rxkad_tkt_short");
- abort_code = RXKADPACKETSHORT;
- ret = skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
- ticket, ticket_len);
- if (ret < 0)
- goto temporary_error_free_ticket;
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header) + sizeof(*response),
+ ticket, ticket_len) < 0) {
+ rxrpc_abort_conn(conn, skb, RXKADPACKETSHORT, -EPROTO,
+ rxkad_abort_resp_short_tkt);
+ goto protocol_error;
+ }
ret = rxkad_decrypt_ticket(conn, server_key, skb, ticket, ticket_len,
- &session_key, &expiry, _abort_code);
+ &session_key, &expiry);
if (ret < 0)
goto temporary_error_free_ticket;
@@ -1176,56 +1105,61 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
* response */
rxkad_decrypt_response(conn, response, &session_key);
- eproto = tracepoint_string("rxkad_rsp_param");
- abort_code = RXKADSEALEDINCON;
- if (ntohl(response->encrypted.epoch) != conn->proto.epoch)
- goto protocol_error_free;
- if (ntohl(response->encrypted.cid) != conn->proto.cid)
- goto protocol_error_free;
- if (ntohl(response->encrypted.securityIndex) != conn->security_ix)
+ if (ntohl(response->encrypted.epoch) != conn->proto.epoch ||
+ ntohl(response->encrypted.cid) != conn->proto.cid ||
+ ntohl(response->encrypted.securityIndex) != conn->security_ix) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_param);
goto protocol_error_free;
+ }
+
csum = response->encrypted.checksum;
response->encrypted.checksum = 0;
rxkad_calc_response_checksum(response);
- eproto = tracepoint_string("rxkad_rsp_csum");
- if (response->encrypted.checksum != csum)
+ if (response->encrypted.checksum != csum) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_checksum);
goto protocol_error_free;
+ }
- spin_lock(&conn->bundle->channel_lock);
for (i = 0; i < RXRPC_MAXCALLS; i++) {
- struct rxrpc_call *call;
u32 call_id = ntohl(response->encrypted.call_id[i]);
+ u32 counter = READ_ONCE(conn->channels[i].call_counter);
+
+ if (call_id > INT_MAX) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_bad_callid);
+ goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_callid");
- if (call_id > INT_MAX)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callctr");
- if (call_id < conn->channels[i].call_counter)
- goto protocol_error_unlock;
-
- eproto = tracepoint_string("rxkad_rsp_callst");
- if (call_id > conn->channels[i].call_counter) {
- call = rcu_dereference_protected(
- conn->channels[i].call,
- lockdep_is_held(&conn->bundle->channel_lock));
- if (call && call->state < RXRPC_CALL_COMPLETE)
- goto protocol_error_unlock;
+ if (call_id < counter) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_ctr);
+ goto protocol_error_free;
+ }
+
+ if (call_id > counter) {
+ if (conn->channels[i].call) {
+ rxrpc_abort_conn(conn, skb, RXKADSEALEDINCON, -EPROTO,
+ rxkad_abort_resp_call_state);
+ goto protocol_error_free;
+ }
conn->channels[i].call_counter = call_id;
}
}
- spin_unlock(&conn->bundle->channel_lock);
- eproto = tracepoint_string("rxkad_rsp_seq");
- abort_code = RXKADOUTOFSEQUENCE;
- if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1)
+ if (ntohl(response->encrypted.inc_nonce) != conn->rxkad.nonce + 1) {
+ rxrpc_abort_conn(conn, skb, RXKADOUTOFSEQUENCE, -EPROTO,
+ rxkad_abort_resp_ooseq);
goto protocol_error_free;
+ }
- eproto = tracepoint_string("rxkad_rsp_level");
- abort_code = RXKADLEVELFAIL;
level = ntohl(response->encrypted.level);
- if (level > RXRPC_SECURITY_ENCRYPT)
+ if (level > RXRPC_SECURITY_ENCRYPT) {
+ rxrpc_abort_conn(conn, skb, RXKADLEVELFAIL, -EPROTO,
+ rxkad_abort_resp_level);
goto protocol_error_free;
+ }
conn->security_level = level;
/* create a key to hold the security data and expiration time - after
@@ -1240,15 +1174,11 @@ static int rxkad_verify_response(struct rxrpc_connection *conn,
_leave(" = 0");
return 0;
-protocol_error_unlock:
- spin_unlock(&conn->bundle->channel_lock);
protocol_error_free:
kfree(ticket);
protocol_error:
kfree(response);
- trace_rxrpc_rx_eproto(NULL, sp->hdr.serial, eproto);
key_put(server_key);
- *_abort_code = abort_code;
return -EPROTO;
temporary_error_free_ticket:
diff --git a/net/rxrpc/rxperf.c b/net/rxrpc/rxperf.c
index d33a109e846c..4a2e90015ca7 100644
--- a/net/rxrpc/rxperf.c
+++ b/net/rxrpc/rxperf.c
@@ -10,6 +10,8 @@
#include <linux/slab.h>
#include <net/sock.h>
#include <net/af_rxrpc.h>
+#define RXRPC_TRACE_ONLY_DEFINE_ENUMS
+#include <trace/events/rxrpc.h>
MODULE_DESCRIPTION("rxperf test server (afs)");
MODULE_AUTHOR("Red Hat, Inc.");
@@ -307,12 +309,14 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -EOPNOTSUPP:
abort_code = RXGEN_OPCODE;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GOP");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -ENOTSUPP:
abort_code = RX_USER_ABORT;
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- abort_code, ret, "GUA");
+ abort_code, ret,
+ rxperf_abort_op_not_supported);
goto call_complete;
case -EIO:
pr_err("Call %u in bad state %u\n",
@@ -324,11 +328,13 @@ static void rxperf_deliver_to_call(struct work_struct *work)
case -ENOMEM:
case -EFAULT:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_UNMARSHAL, ret, "GUM");
+ RXGEN_SS_UNMARSHAL, ret,
+ rxperf_abort_unmarshal_error);
goto call_complete;
default:
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RX_CALL_DEAD, ret, "GER");
+ RX_CALL_DEAD, ret,
+ rxperf_abort_general_error);
goto call_complete;
}
}
@@ -487,7 +493,7 @@ static int rxperf_deliver_request(struct rxperf_call *call)
static int rxperf_process_call(struct rxperf_call *call)
{
struct msghdr msg = {};
- struct bio_vec bv[1];
+ struct bio_vec bv;
struct kvec iov[1];
ssize_t n;
size_t reply_len = call->reply_len, len;
@@ -497,10 +503,8 @@ static int rxperf_process_call(struct rxperf_call *call)
while (reply_len > 0) {
len = min_t(size_t, reply_len, PAGE_SIZE);
- bv[0].bv_page = ZERO_PAGE(0);
- bv[0].bv_offset = 0;
- bv[0].bv_len = len;
- iov_iter_bvec(&msg.msg_iter, WRITE, bv, 1, len);
+ bvec_set_page(&bv, ZERO_PAGE(0), len, 0);
+ iov_iter_bvec(&msg.msg_iter, WRITE, &bv, 1, len);
msg.msg_flags = MSG_MORE;
n = rxrpc_kernel_send_data(rxperf_socket, call->rxcall, &msg,
len, rxperf_notify_end_reply_tx);
@@ -523,7 +527,8 @@ static int rxperf_process_call(struct rxperf_call *call)
if (n == -ENOMEM)
rxrpc_kernel_abort_call(rxperf_socket, call->rxcall,
- RXGEN_SS_MARSHAL, -ENOMEM, "GOM");
+ RXGEN_SS_MARSHAL, -ENOMEM,
+ rxperf_abort_oom);
return n;
}
diff --git a/net/rxrpc/security.c b/net/rxrpc/security.c
index ab968f65a490..cb8dd1d3b1d4 100644
--- a/net/rxrpc/security.c
+++ b/net/rxrpc/security.c
@@ -97,38 +97,31 @@ found:
*/
int rxrpc_init_client_conn_security(struct rxrpc_connection *conn)
{
- const struct rxrpc_security *sec;
struct rxrpc_key_token *token;
struct key *key = conn->key;
- int ret;
+ int ret = 0;
_enter("{%d},{%x}", conn->debug_id, key_serial(key));
- if (!key)
- return 0;
-
- ret = key_validate(key);
- if (ret < 0)
- return ret;
-
for (token = key->payload.data[0]; token; token = token->next) {
- sec = rxrpc_security_lookup(token->security_index);
- if (sec)
+ if (token->security_index == conn->security->security_index)
goto found;
}
return -EKEYREJECTED;
found:
- conn->security = sec;
-
- ret = conn->security->init_connection_security(conn, token);
- if (ret < 0) {
- conn->security = &rxrpc_no_security;
- return ret;
+ mutex_lock(&conn->security_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = conn->security->init_connection_security(conn, token);
+ if (ret == 0) {
+ spin_lock(&conn->state_lock);
+ if (conn->state == RXRPC_CONN_CLIENT_UNSECURED)
+ conn->state = RXRPC_CONN_CLIENT;
+ spin_unlock(&conn->state_lock);
+ }
}
-
- _leave(" = 0");
- return 0;
+ mutex_unlock(&conn->security_lock);
+ return ret;
}
/*
@@ -144,21 +137,15 @@ const struct rxrpc_security *rxrpc_get_incoming_security(struct rxrpc_sock *rx,
sec = rxrpc_security_lookup(sp->hdr.securityIndex);
if (!sec) {
- trace_rxrpc_abort(0, "SVS",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = RX_INVALID_OPERATION;
+ rxrpc_direct_abort(skb, rxrpc_abort_unsupported_security,
+ RX_INVALID_OPERATION, -EKEYREJECTED);
return NULL;
}
if (sp->hdr.securityIndex != RXRPC_SECURITY_NONE &&
!rx->securities) {
- trace_rxrpc_abort(0, "SVR",
- sp->hdr.cid, sp->hdr.callNumber, sp->hdr.seq,
- RX_INVALID_OPERATION, EKEYREJECTED);
- skb->mark = RXRPC_SKB_MARK_REJECT_ABORT;
- skb->priority = sec->no_key_abort;
+ rxrpc_direct_abort(skb, rxrpc_abort_no_service_key,
+ sec->no_key_abort, -EKEYREJECTED);
return NULL;
}
@@ -191,9 +178,9 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
sprintf(kdesc, "%u:%u",
sp->hdr.serviceId, sp->hdr.securityIndex);
- rcu_read_lock();
+ read_lock(&conn->local->services_lock);
- rx = rcu_dereference(conn->local->service);
+ rx = conn->local->service;
if (!rx)
goto out;
@@ -215,6 +202,6 @@ struct key *rxrpc_look_up_server_security(struct rxrpc_connection *conn,
}
out:
- rcu_read_unlock();
+ read_unlock(&conn->local->services_lock);
return key;
}
diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c
index cde1e65f16b4..da49fcf1c456 100644
--- a/net/rxrpc/sendmsg.c
+++ b/net/rxrpc/sendmsg.c
@@ -18,6 +18,81 @@
#include "ar-internal.h"
/*
+ * Propose an abort to be made in the I/O thread.
+ */
+bool rxrpc_propose_abort(struct rxrpc_call *call, s32 abort_code, int error,
+ enum rxrpc_abort_reason why)
+{
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
+
+ if (!call->send_abort && !rxrpc_call_is_complete(call)) {
+ call->send_abort_why = why;
+ call->send_abort_err = error;
+ call->send_abort_seq = 0;
+ /* Request abort locklessly vs rxrpc_input_call_event(). */
+ smp_store_release(&call->send_abort, abort_code);
+ rxrpc_poke_call(call, rxrpc_call_poke_abort);
+ return true;
+ }
+
+ return false;
+}
+
+/*
+ * Wait for a call to become connected. Interruption here doesn't cause the
+ * call to be aborted.
+ */
+static int rxrpc_wait_to_be_connected(struct rxrpc_call *call, long *timeo)
+{
+ DECLARE_WAITQUEUE(myself, current);
+ int ret = 0;
+
+ _enter("%d", call->debug_id);
+
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN)
+ return call->error;
+
+ add_wait_queue_exclusive(&call->waitq, &myself);
+
+ for (;;) {
+ ret = call->error;
+ if (ret < 0)
+ break;
+
+ switch (call->interruptibility) {
+ case RXRPC_INTERRUPTIBLE:
+ case RXRPC_PREINTERRUPTIBLE:
+ set_current_state(TASK_INTERRUPTIBLE);
+ break;
+ case RXRPC_UNINTERRUPTIBLE:
+ default:
+ set_current_state(TASK_UNINTERRUPTIBLE);
+ break;
+ }
+ if (rxrpc_call_state(call) != RXRPC_CALL_CLIENT_AWAIT_CONN) {
+ ret = call->error;
+ break;
+ }
+ if ((call->interruptibility == RXRPC_INTERRUPTIBLE ||
+ call->interruptibility == RXRPC_PREINTERRUPTIBLE) &&
+ signal_pending(current)) {
+ ret = sock_intr_errno(*timeo);
+ break;
+ }
+ *timeo = schedule_timeout(*timeo);
+ }
+
+ remove_wait_queue(&call->waitq, &myself);
+ __set_current_state(TASK_RUNNING);
+
+ if (ret == 0 && rxrpc_call_is_complete(call))
+ ret = call->error;
+
+ _leave(" = %d", ret);
+ return ret;
+}
+
+/*
* Return true if there's sufficient Tx queue space.
*/
static bool rxrpc_check_tx_space(struct rxrpc_call *call, rxrpc_seq_t *_tx_win)
@@ -39,7 +114,7 @@ static int rxrpc_wait_for_tx_window_intr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (signal_pending(current))
@@ -74,7 +149,7 @@ static int rxrpc_wait_for_tx_window_waitall(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, &tx_win))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
if (timeout == 0 &&
@@ -103,7 +178,7 @@ static int rxrpc_wait_for_tx_window_nonintr(struct rxrpc_sock *rx,
if (rxrpc_check_tx_space(call, NULL))
return 0;
- if (call->state >= RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
return call->error;
trace_rxrpc_txqueue(call, rxrpc_txqueue_wait);
@@ -168,7 +243,6 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
struct rxrpc_txbuf *txb,
rxrpc_notify_end_tx_t notify_end_tx)
{
- unsigned long now;
rxrpc_seq_t seq = txb->seq;
bool last = test_bit(RXRPC_TXBUF_LAST, &txb->flags), poke;
@@ -191,36 +265,10 @@ static void rxrpc_queue_packet(struct rxrpc_sock *rx, struct rxrpc_call *call,
poke = list_empty(&call->tx_sendmsg);
list_add_tail(&txb->call_link, &call->tx_sendmsg);
call->tx_prepared = seq;
+ if (last)
+ rxrpc_notify_end_tx(rx, call, notify_end_tx);
spin_unlock(&call->tx_lock);
- if (last || call->state == RXRPC_CALL_SERVER_ACK_REQUEST) {
- _debug("________awaiting reply/ACK__________");
- write_lock(&call->state_lock);
- switch (call->state) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- call->state = RXRPC_CALL_CLIENT_AWAIT_REPLY;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- call->state = RXRPC_CALL_SERVER_SEND_REPLY;
- now = jiffies;
- WRITE_ONCE(call->delay_ack_at, now + MAX_JIFFY_OFFSET);
- if (call->ackr_reason == RXRPC_ACK_DELAY)
- call->ackr_reason = 0;
- trace_rxrpc_timer(call, rxrpc_timer_init_for_send_reply, now);
- if (!last)
- break;
- fallthrough;
- case RXRPC_CALL_SERVER_SEND_REPLY:
- call->state = RXRPC_CALL_SERVER_AWAIT_ACK;
- rxrpc_notify_end_tx(rx, call, notify_end_tx);
- break;
- default:
- break;
- }
- write_unlock(&call->state_lock);
- }
-
if (poke)
rxrpc_poke_call(call, rxrpc_call_poke_start);
}
@@ -245,6 +293,16 @@ static int rxrpc_send_data(struct rxrpc_sock *rx,
timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
+ ret = rxrpc_wait_to_be_connected(call, &timeo);
+ if (ret < 0)
+ return ret;
+
+ if (call->conn->state == RXRPC_CONN_CLIENT_UNSECURED) {
+ ret = rxrpc_init_client_conn_security(call->conn);
+ if (ret < 0)
+ return ret;
+ }
+
/* this should be in poll */
sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk);
@@ -252,15 +310,20 @@ reload:
ret = -EPIPE;
if (sk->sk_shutdown & SEND_SHUTDOWN)
goto maybe_error;
- state = READ_ONCE(call->state);
+ state = rxrpc_call_state(call);
ret = -ESHUTDOWN;
if (state >= RXRPC_CALL_COMPLETE)
goto maybe_error;
ret = -EPROTO;
if (state != RXRPC_CALL_CLIENT_SEND_REQUEST &&
state != RXRPC_CALL_SERVER_ACK_REQUEST &&
- state != RXRPC_CALL_SERVER_SEND_REPLY)
+ state != RXRPC_CALL_SERVER_SEND_REPLY) {
+ /* Request phase complete for this client call */
+ trace_rxrpc_abort(call->debug_id, rxrpc_sendmsg_late_send,
+ call->cid, call->call_id, call->rx_consumed,
+ 0, -EPROTO);
goto maybe_error;
+ }
ret = -EMSGSIZE;
if (call->tx_total_len != -1) {
@@ -329,7 +392,7 @@ reload:
/* check for the far side aborting the call or a network error
* occurring */
- if (call->state == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto call_terminated;
/* add the packet to the send queue if it's now full */
@@ -354,12 +417,9 @@ reload:
success:
ret = copied;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE) {
- read_lock(&call->state_lock);
- if (call->error < 0)
- ret = call->error;
- read_unlock(&call->state_lock);
- }
+ if (rxrpc_call_is_complete(call) &&
+ call->error < 0)
+ ret = call->error;
out:
call->tx_pending = txb;
_leave(" = %d", ret);
@@ -543,7 +603,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
atomic_inc_return(&rxrpc_debug_id));
/* The socket is now unlocked */
- rxrpc_put_peer(cp.peer, rxrpc_peer_put_discard_tmp);
_leave(" = %p\n", call);
return call;
}
@@ -556,7 +615,6 @@ rxrpc_new_client_call_for_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg,
int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
__releases(&rx->sk.sk_lock.slock)
{
- enum rxrpc_call_state state;
struct rxrpc_call *call;
unsigned long now, j;
bool dropped_lock = false;
@@ -598,10 +656,10 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
return PTR_ERR(call);
/* ... and we have the call lock. */
ret = 0;
- if (READ_ONCE(call->state) == RXRPC_CALL_COMPLETE)
+ if (rxrpc_call_is_complete(call))
goto out_put_unlock;
} else {
- switch (READ_ONCE(call->state)) {
+ switch (rxrpc_call_state(call)) {
case RXRPC_CALL_UNINITIALISED:
case RXRPC_CALL_CLIENT_AWAIT_CONN:
case RXRPC_CALL_SERVER_PREALLOC:
@@ -655,17 +713,13 @@ int rxrpc_do_sendmsg(struct rxrpc_sock *rx, struct msghdr *msg, size_t len)
break;
}
- state = READ_ONCE(call->state);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, state, call->conn);
-
- if (state >= RXRPC_CALL_COMPLETE) {
+ if (rxrpc_call_is_complete(call)) {
/* it's too late for this call */
ret = -ESHUTDOWN;
} else if (p.command == RXRPC_CMD_SEND_ABORT) {
+ rxrpc_propose_abort(call, p.abort_code, -ECONNABORTED,
+ rxrpc_abort_call_sendmsg);
ret = 0;
- if (rxrpc_abort_call("CMD", call, 0, p.abort_code, -ECONNABORTED))
- ret = rxrpc_send_abort_packet(call);
} else if (p.command != RXRPC_CMD_SEND_DATA) {
ret = -EINVAL;
} else {
@@ -705,34 +759,17 @@ int rxrpc_kernel_send_data(struct socket *sock, struct rxrpc_call *call,
bool dropped_lock = false;
int ret;
- _enter("{%d,%s},", call->debug_id, rxrpc_call_states[call->state]);
+ _enter("{%d},", call->debug_id);
ASSERTCMP(msg->msg_name, ==, NULL);
ASSERTCMP(msg->msg_control, ==, NULL);
mutex_lock(&call->user_mutex);
- _debug("CALL %d USR %lx ST %d on CONN %p",
- call->debug_id, call->user_call_ID, call->state, call->conn);
-
- switch (READ_ONCE(call->state)) {
- case RXRPC_CALL_CLIENT_SEND_REQUEST:
- case RXRPC_CALL_SERVER_ACK_REQUEST:
- case RXRPC_CALL_SERVER_SEND_REPLY:
- ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
- notify_end_tx, &dropped_lock);
- break;
- case RXRPC_CALL_COMPLETE:
- read_lock(&call->state_lock);
+ ret = rxrpc_send_data(rxrpc_sk(sock->sk), call, msg, len,
+ notify_end_tx, &dropped_lock);
+ if (ret == -ESHUTDOWN)
ret = call->error;
- read_unlock(&call->state_lock);
- break;
- default:
- /* Request phase complete for this client call */
- trace_rxrpc_rx_eproto(call, 0, tracepoint_string("late_send"));
- ret = -EPROTO;
- break;
- }
if (!dropped_lock)
mutex_unlock(&call->user_mutex);
@@ -747,24 +784,20 @@ EXPORT_SYMBOL(rxrpc_kernel_send_data);
* @call: The call to be aborted
* @abort_code: The abort code to stick into the ABORT packet
* @error: Local error value
- * @why: 3-char string indicating why.
+ * @why: Indication as to why.
*
* Allow a kernel service to abort a call, if it's still in an abortable state
* and return true if the call was aborted, false if it was already complete.
*/
bool rxrpc_kernel_abort_call(struct socket *sock, struct rxrpc_call *call,
- u32 abort_code, int error, const char *why)
+ u32 abort_code, int error, enum rxrpc_abort_reason why)
{
bool aborted;
- _enter("{%d},%d,%d,%s", call->debug_id, abort_code, error, why);
+ _enter("{%d},%d,%d,%u", call->debug_id, abort_code, error, why);
mutex_lock(&call->user_mutex);
-
- aborted = rxrpc_abort_call(why, call, 0, abort_code, error);
- if (aborted)
- rxrpc_send_abort_packet(call);
-
+ aborted = rxrpc_propose_abort(call, abort_code, error, why);
mutex_unlock(&call->user_mutex);
return aborted;
}
diff --git a/net/rxrpc/skbuff.c b/net/rxrpc/skbuff.c
index ebe0c75e7b07..3bcd6ee80396 100644
--- a/net/rxrpc/skbuff.c
+++ b/net/rxrpc/skbuff.c
@@ -63,7 +63,7 @@ void rxrpc_free_skb(struct sk_buff *skb, enum rxrpc_skb_trace why)
if (skb) {
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n, why);
- kfree_skb(skb);
+ consume_skb(skb);
}
}
@@ -78,6 +78,6 @@ void rxrpc_purge_queue(struct sk_buff_head *list)
int n = atomic_dec_return(select_skb_count(skb));
trace_rxrpc_skb(skb, refcount_read(&skb->users), n,
rxrpc_skb_put_purge);
- kfree_skb(skb);
+ consume_skb(skb);
}
}
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index cde3224a5cd2..ecaeb4ecfb58 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -17,6 +17,9 @@ static const unsigned int n_65535 = 65535;
static const unsigned int n_max_acks = 255;
static const unsigned long one_jiffy = 1;
static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+static const unsigned long max_500 = 500;
+#endif
/*
* RxRPC operating parameters.
@@ -63,6 +66,19 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra2 = (void *)&max_jiffies,
},
+ /* Values used in milliseconds */
+#ifdef CONFIG_AF_RXRPC_INJECT_RX_DELAY
+ {
+ .procname = "inject_rx_delay",
+ .data = &rxrpc_inject_rx_delay,
+ .maxlen = sizeof(unsigned long),
+ .mode = 0644,
+ .proc_handler = proc_doulongvec_minmax,
+ .extra1 = (void *)SYSCTL_LONG_ZERO,
+ .extra2 = (void *)&max_500,
+ },
+#endif
+
/* Non-time values */
{
.procname = "reap_client_conns",
@@ -109,7 +125,6 @@ static struct ctl_table rxrpc_sysctl_table[] = {
.extra1 = (void *)SYSCTL_ONE,
.extra2 = (void *)&four,
},
-
{ }
};
diff --git a/net/rxrpc/txbuf.c b/net/rxrpc/txbuf.c
index d2cf2aac3adb..d43be8512386 100644
--- a/net/rxrpc/txbuf.c
+++ b/net/rxrpc/txbuf.c
@@ -110,12 +110,8 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
_enter("%x/%x/%x", call->tx_bottom, call->acks_hard_ack, call->tx_top);
- for (;;) {
- spin_lock(&call->tx_lock);
- txb = list_first_entry_or_null(&call->tx_buffer,
- struct rxrpc_txbuf, call_link);
- if (!txb)
- break;
+ while ((txb = list_first_entry_or_null(&call->tx_buffer,
+ struct rxrpc_txbuf, call_link))) {
hard_ack = smp_load_acquire(&call->acks_hard_ack);
if (before(hard_ack, txb->seq))
break;
@@ -128,15 +124,11 @@ void rxrpc_shrink_call_tx_buffer(struct rxrpc_call *call)
trace_rxrpc_txqueue(call, rxrpc_txqueue_dequeue);
- spin_unlock(&call->tx_lock);
-
rxrpc_put_txbuf(txb, rxrpc_txbuf_put_rotated);
if (after(call->acks_hard_ack, call->tx_bottom + 128))
wake = true;
}
- spin_unlock(&call->tx_lock);
-
if (wake)
wake_up(&call->waitq);
}
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index 777d6b50505c..4b95cb1ac435 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -45,23 +45,6 @@ if NET_SCHED
comment "Queueing/Scheduling"
-config NET_SCH_CBQ
- tristate "Class Based Queueing (CBQ)"
- help
- Say Y here if you want to use the Class-Based Queueing (CBQ) packet
- scheduling algorithm. This algorithm classifies the waiting packets
- into a tree-like hierarchy of classes; the leaves of this tree are
- in turn scheduled by separate algorithms.
-
- See the top of <file:net/sched/sch_cbq.c> for more details.
-
- CBQ is a commonly used scheduler, so if you're unsure, you should
- say Y here. Then say Y to all the queueing algorithms below that you
- want to use as leaf disciplines.
-
- To compile this code as a module, choose M here: the
- module will be called sch_cbq.
-
config NET_SCH_HTB
tristate "Hierarchical Token Bucket (HTB)"
help
@@ -85,20 +68,6 @@ config NET_SCH_HFSC
To compile this code as a module, choose M here: the
module will be called sch_hfsc.
-config NET_SCH_ATM
- tristate "ATM Virtual Circuits (ATM)"
- depends on ATM
- help
- Say Y here if you want to use the ATM pseudo-scheduler. This
- provides a framework for invoking classifiers, which in turn
- select classes of this queuing discipline. Each class maps
- the flow(s) it is handling to a given virtual circuit.
-
- See the top of <file:net/sched/sch_atm.c> for more details.
-
- To compile this code as a module, choose M here: the
- module will be called sch_atm.
-
config NET_SCH_PRIO
tristate "Multi Band Priority Queueing (PRIO)"
help
@@ -195,8 +164,14 @@ config NET_SCH_ETF
To compile this code as a module, choose M here: the
module will be called sch_etf.
+config NET_SCH_MQPRIO_LIB
+ tristate
+ help
+ Common library for manipulating mqprio queue configurations.
+
config NET_SCH_TAPRIO
tristate "Time Aware Priority (taprio) Scheduler"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Time Aware Priority (taprio) packet
scheduling algorithm.
@@ -217,17 +192,6 @@ config NET_SCH_GRED
To compile this code as a module, choose M here: the
module will be called sch_gred.
-config NET_SCH_DSMARK
- tristate "Differentiated Services marker (DSMARK)"
- help
- Say Y if you want to schedule packets according to the
- Differentiated Services architecture proposed in RFC 2475.
- Technical information on this method, with pointers to associated
- RFCs, is available at <http://www.gta.ufrj.br/diffserv/>.
-
- To compile this code as a module, choose M here: the
- module will be called sch_dsmark.
-
config NET_SCH_NETEM
tristate "Network emulator (NETEM)"
help
@@ -253,6 +217,7 @@ config NET_SCH_DRR
config NET_SCH_MQPRIO
tristate "Multi-queue priority scheduler (MQPRIO)"
+ select NET_SCH_MQPRIO_LIB
help
Say Y here if you want to use the Multi-queue Priority scheduler.
This scheduler allows QOS to be offloaded on NICs that have support
@@ -337,7 +302,7 @@ config NET_SCH_FQ
Say Y here if you want to use the FQ packet scheduling algorithm.
FQ does flow separation, and is able to respect pacing requirements
- set by TCP stack into sk->sk_pacing_rate (for localy generated
+ set by TCP stack into sk->sk_pacing_rate (for locally generated
traffic)
To compile this driver as a module, choose M here: the module
@@ -503,17 +468,6 @@ config NET_CLS_BASIC
To compile this code as a module, choose M here: the
module will be called cls_basic.
-config NET_CLS_TCINDEX
- tristate "Traffic-Control Index (TCINDEX)"
- select NET_CLS
- help
- Say Y here if you want to be able to classify packets based on
- traffic control indices. You will want this feature if you want
- to implement Differentiated Services together with DSMARK.
-
- To compile this code as a module, choose M here: the
- module will be called cls_tcindex.
-
config NET_CLS_ROUTE4
tristate "Routing decision (ROUTE)"
depends on INET
@@ -559,34 +513,6 @@ config CLS_U32_MARK
help
Say Y here to be able to use netfilter marks as u32 key.
-config NET_CLS_RSVP
- tristate "IPv4 Resource Reservation Protocol (RSVP)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp.
-
-config NET_CLS_RSVP6
- tristate "IPv6 Resource Reservation Protocol (RSVP6)"
- select NET_CLS
- help
- The Resource Reservation Protocol (RSVP) permits end systems to
- request a minimum and maximum data flow rate for a connection; this
- is important for real time data such as streaming sound or video.
-
- Say Y here if you want to be able to classify outgoing packets based
- on their RSVP requests and you are using the IPv6 protocol.
-
- To compile this code as a module, choose M here: the
- module will be called cls_rsvp6.
-
config NET_CLS_FLOW
tristate "Flow classifier"
select NET_CLS
@@ -977,6 +903,7 @@ config NET_ACT_TUNNEL_KEY
config NET_ACT_CT
tristate "connection tracking tc action"
depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE
+ select NF_CONNTRACK_OVS
select NF_NAT_OVS if NF_NAT
help
Say Y here to allow sending the packets to conntrack module.
diff --git a/net/sched/Makefile b/net/sched/Makefile
index dd14ef413fda..b5fd49641d91 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -33,25 +33,23 @@ obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
obj-$(CONFIG_NET_ACT_CT) += act_ct.o
obj-$(CONFIG_NET_ACT_GATE) += act_gate.o
obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o
-obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o
obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o
obj-$(CONFIG_NET_SCH_HFSC) += sch_hfsc.o
obj-$(CONFIG_NET_SCH_RED) += sch_red.o
obj-$(CONFIG_NET_SCH_GRED) += sch_gred.o
obj-$(CONFIG_NET_SCH_INGRESS) += sch_ingress.o
-obj-$(CONFIG_NET_SCH_DSMARK) += sch_dsmark.o
obj-$(CONFIG_NET_SCH_SFB) += sch_sfb.o
obj-$(CONFIG_NET_SCH_SFQ) += sch_sfq.o
obj-$(CONFIG_NET_SCH_TBF) += sch_tbf.o
obj-$(CONFIG_NET_SCH_TEQL) += sch_teql.o
obj-$(CONFIG_NET_SCH_PRIO) += sch_prio.o
obj-$(CONFIG_NET_SCH_MULTIQ) += sch_multiq.o
-obj-$(CONFIG_NET_SCH_ATM) += sch_atm.o
obj-$(CONFIG_NET_SCH_NETEM) += sch_netem.o
obj-$(CONFIG_NET_SCH_DRR) += sch_drr.o
obj-$(CONFIG_NET_SCH_PLUG) += sch_plug.o
obj-$(CONFIG_NET_SCH_ETS) += sch_ets.o
obj-$(CONFIG_NET_SCH_MQPRIO) += sch_mqprio.o
+obj-$(CONFIG_NET_SCH_MQPRIO_LIB) += sch_mqprio_lib.o
obj-$(CONFIG_NET_SCH_SKBPRIO) += sch_skbprio.o
obj-$(CONFIG_NET_SCH_CHOKE) += sch_choke.o
obj-$(CONFIG_NET_SCH_QFQ) += sch_qfq.o
@@ -69,9 +67,6 @@ obj-$(CONFIG_NET_SCH_TAPRIO) += sch_taprio.o
obj-$(CONFIG_NET_CLS_U32) += cls_u32.o
obj-$(CONFIG_NET_CLS_ROUTE4) += cls_route.o
obj-$(CONFIG_NET_CLS_FW) += cls_fw.o
-obj-$(CONFIG_NET_CLS_RSVP) += cls_rsvp.o
-obj-$(CONFIG_NET_CLS_TCINDEX) += cls_tcindex.o
-obj-$(CONFIG_NET_CLS_RSVP6) += cls_rsvp6.o
obj-$(CONFIG_NET_CLS_BASIC) += cls_basic.o
obj-$(CONFIG_NET_CLS_FLOW) += cls_flow.o
obj-$(CONFIG_NET_CLS_CGROUP) += cls_cgroup.o
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index 5b3c0ac495be..34c508675041 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -125,7 +125,7 @@ static void free_tcf(struct tc_action *p)
free_percpu(p->cpu_bstats_hw);
free_percpu(p->cpu_qstats);
- tcf_set_action_cookie(&p->act_cookie, NULL);
+ tcf_set_action_cookie(&p->user_cookie, NULL);
if (chain)
tcf_chain_put_by_act(chain);
@@ -169,11 +169,6 @@ static bool tc_act_skip_sw(u32 flags)
return (flags & TCA_ACT_FLAGS_SKIP_SW) ? true : false;
}
-static bool tc_act_in_hw(struct tc_action *act)
-{
- return !!act->in_hw_count;
-}
-
/* SKIP_HW and SKIP_SW are mutually exclusive flags. */
static bool tc_act_flags_valid(u32 flags)
{
@@ -192,6 +187,7 @@ static int offload_action_init(struct flow_offload_action *fl_action,
fl_action->extack = extack;
fl_action->command = cmd;
fl_action->index = act->tcfa_index;
+ fl_action->cookie = (unsigned long)act;
if (act->ops->offload_act_setup) {
spin_lock_bh(&act->tcfa_lock);
@@ -272,7 +268,7 @@ static int tcf_action_offload_add_ex(struct tc_action *action,
if (err)
goto fl_err;
- err = tc_setup_action(&fl_action->action, actions, extack);
+ err = tc_setup_action(&fl_action->action, actions, 0, extack);
if (err) {
NL_SET_ERR_MSG_MOD(extack,
"Failed to setup tc actions for offload");
@@ -307,9 +303,6 @@ int tcf_action_update_hw_stats(struct tc_action *action)
struct flow_offload_action fl_act = {};
int err;
- if (!tc_act_in_hw(action))
- return -EOPNOTSUPP;
-
err = offload_action_init(&fl_act, action, FLOW_ACT_STATS, NULL);
if (err)
return err;
@@ -438,14 +431,14 @@ EXPORT_SYMBOL(tcf_idr_release);
static size_t tcf_action_shared_attrs_size(const struct tc_action *act)
{
- struct tc_cookie *act_cookie;
+ struct tc_cookie *user_cookie;
u32 cookie_len = 0;
rcu_read_lock();
- act_cookie = rcu_dereference(act->act_cookie);
+ user_cookie = rcu_dereference(act->user_cookie);
- if (act_cookie)
- cookie_len = nla_total_size(act_cookie->len);
+ if (user_cookie)
+ cookie_len = nla_total_size(user_cookie->len);
rcu_read_unlock();
return nla_total_size(0) /* action number nested */
@@ -495,7 +488,7 @@ tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a, bool from_act)
goto nla_put_failure;
rcu_read_lock();
- cookie = rcu_dereference(a->act_cookie);
+ cookie = rcu_dereference(a->user_cookie);
if (cookie) {
if (nla_put(skb, TCA_ACT_COOKIE, cookie->len, cookie->data)) {
rcu_read_unlock();
@@ -539,6 +532,8 @@ static int tcf_dump_walker(struct tcf_idrinfo *idrinfo, struct sk_buff *skb,
(unsigned long)p->tcfa_tm.lastuse))
continue;
+ tcf_action_update_hw_stats(p);
+
nest = nla_nest_start_noflag(skb, n_i);
if (!nest) {
index--;
@@ -1367,9 +1362,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
{
bool police = flags & TCA_ACT_FLAGS_POLICE;
struct nla_bitfield32 userflags = { 0, 0 };
+ struct tc_cookie *user_cookie = NULL;
u8 hw_stats = TCA_ACT_HW_STATS_ANY;
struct nlattr *tb[TCA_ACT_MAX + 1];
- struct tc_cookie *cookie = NULL;
struct tc_action *a;
int err;
@@ -1380,8 +1375,8 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
if (err < 0)
return ERR_PTR(err);
if (tb[TCA_ACT_COOKIE]) {
- cookie = nla_memdup_cookie(tb);
- if (!cookie) {
+ user_cookie = nla_memdup_cookie(tb);
+ if (!user_cookie) {
NL_SET_ERR_MSG(extack, "No memory to generate TC cookie");
err = -ENOMEM;
goto err_out;
@@ -1407,7 +1402,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
*init_res = err;
if (!police && tb[TCA_ACT_COOKIE])
- tcf_set_action_cookie(&a->act_cookie, cookie);
+ tcf_set_action_cookie(&a->user_cookie, user_cookie);
if (!police)
a->hw_stats = hw_stats;
@@ -1415,9 +1410,9 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
return a;
err_out:
- if (cookie) {
- kfree(cookie->data);
- kfree(cookie);
+ if (user_cookie) {
+ kfree(user_cookie->data);
+ kfree(user_cookie);
}
return ERR_PTR(err);
}
@@ -1539,9 +1534,6 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *p,
if (p == NULL)
goto errout;
- /* update hw stats for this action */
- tcf_action_update_hw_stats(p);
-
/* compat_mode being true specifies a call that is supposed
* to add additional backward compatibility statistic TLVs.
*/
@@ -1582,7 +1574,7 @@ errout:
static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
u32 portid, u32 seq, u16 flags, int event, int bind,
- int ref)
+ int ref, struct netlink_ext_ack *extack)
{
struct tcamsg *t;
struct nlmsghdr *nlh;
@@ -1604,9 +1596,14 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
goto out_nlmsg_trim;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nla_nest_end(skb, nest);
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1625,7 +1622,7 @@ tcf_get_notify(struct net *net, u32 portid, struct nlmsghdr *n,
if (!skb)
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, event,
- 0, 1) <= 0) {
+ 0, 1, NULL) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
@@ -1799,7 +1796,7 @@ tcf_reoffload_del_notify(struct net *net, struct tc_action *action)
if (!skb)
return -ENOBUFS;
- if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1) <= 0) {
+ if (tca_get_fill(skb, actions, 0, 0, 0, RTM_DELACTION, 0, 1, NULL) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1886,7 +1883,7 @@ tcf_del_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, 0, RTM_DELACTION,
- 0, 2) <= 0) {
+ 0, 2, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink TC action attributes");
kfree_skb(skb);
return -EINVAL;
@@ -1965,7 +1962,7 @@ tcf_add_notify(struct net *net, struct nlmsghdr *n, struct tc_action *actions[],
return -ENOBUFS;
if (tca_get_fill(skb, actions, portid, n->nlmsg_seq, n->nlmsg_flags,
- RTM_NEWACTION, 0, 0) <= 0) {
+ RTM_NEWACTION, 0, 0, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to fill netlink attributes while adding TC action");
kfree_skb(skb);
return -EINVAL;
diff --git a/net/sched/act_connmark.c b/net/sched/act_connmark.c
index 7e63ff7e3ed7..8dabfb52ea3d 100644
--- a/net/sched/act_connmark.c
+++ b/net/sched/act_connmark.c
@@ -36,13 +36,15 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
struct nf_conntrack_tuple tuple;
enum ip_conntrack_info ctinfo;
struct tcf_connmark_info *ca = to_connmark(a);
+ struct tcf_connmark_parms *parms;
struct nf_conntrack_zone zone;
struct nf_conn *c;
int proto;
- spin_lock(&ca->tcf_lock);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
+
+ parms = rcu_dereference_bh(ca->parms);
switch (skb_protocol(skb, true)) {
case htons(ETH_P_IP):
@@ -64,31 +66,29 @@ TC_INDIRECT_SCOPE int tcf_connmark_act(struct sk_buff *skb,
c = nf_ct_get(skb, &ctinfo);
if (c) {
skb->mark = READ_ONCE(c->mark);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
- goto out;
+ goto count;
}
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
- proto, ca->net, &tuple))
+ if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, parms->net,
+ &tuple))
goto out;
- zone.id = ca->zone;
+ zone.id = parms->zone;
zone.dir = NF_CT_DEFAULT_ZONE_DIR;
- thash = nf_conntrack_find_get(ca->net, &zone, &tuple);
+ thash = nf_conntrack_find_get(parms->net, &zone, &tuple);
if (!thash)
goto out;
c = nf_ct_tuplehash_to_ctrack(thash);
- /* using overlimits stats to count how many packets marked */
- ca->tcf_qstats.overlimits++;
skb->mark = READ_ONCE(c->mark);
nf_ct_put(c);
+count:
+ /* using overlimits stats to count how many packets marked */
+ tcf_action_inc_overlimit_qstats(&ca->common);
out:
- spin_unlock(&ca->tcf_lock);
- return ca->tcf_action;
+ return READ_ONCE(ca->tcf_action);
}
static const struct nla_policy connmark_policy[TCA_CONNMARK_MAX + 1] = {
@@ -101,6 +101,7 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
struct netlink_ext_ack *extack)
{
struct tc_action_net *tn = net_generic(net, act_connmark_ops.net_id);
+ struct tcf_connmark_parms *nparms, *oparms;
struct nlattr *tb[TCA_CONNMARK_MAX + 1];
bool bind = flags & TCA_ACT_FLAGS_BIND;
struct tcf_chain *goto_ch = NULL;
@@ -120,52 +121,66 @@ static int tcf_connmark_init(struct net *net, struct nlattr *nla,
if (!tb[TCA_CONNMARK_PARMS])
return -EINVAL;
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms)
+ return -ENOMEM;
+
parm = nla_data(tb[TCA_CONNMARK_PARMS]);
index = parm->index;
ret = tcf_idr_check_alloc(tn, &index, a, bind);
if (!ret) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_connmark_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_connmark_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- return ret;
+ err = ret;
+ goto out_free;
}
ci = to_connmark(*a);
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
- goto release_idr;
- tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->net = net;
- ci->zone = parm->zone;
+
+ nparms->net = net;
+ nparms->zone = parm->zone;
ret = ACT_P_CREATED;
} else if (ret > 0) {
ci = to_connmark(*a);
- if (bind)
- return 0;
- if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ if (bind) {
+ err = 0;
+ goto out_free;
}
- err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch,
- extack);
- if (err < 0)
+ if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ err = -EEXIST;
goto release_idr;
- /* replacing action and zone */
- spin_lock_bh(&ci->tcf_lock);
- goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
- ci->zone = parm->zone;
- spin_unlock_bh(&ci->tcf_lock);
- if (goto_ch)
- tcf_chain_put_by_act(goto_ch);
+ }
+
+ nparms->net = rtnl_dereference(ci->parms)->net;
+ nparms->zone = parm->zone;
+
ret = 0;
}
+ err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+ if (err < 0)
+ goto release_idr;
+
+ spin_lock_bh(&ci->tcf_lock);
+ goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(ci->parms, nparms, lockdep_is_held(&ci->tcf_lock));
+ spin_unlock_bh(&ci->tcf_lock);
+
+ if (goto_ch)
+ tcf_chain_put_by_act(goto_ch);
+
+ if (oparms)
+ kfree_rcu(oparms, rcu);
+
return ret;
+
release_idr:
tcf_idr_release(*a, bind);
+out_free:
+ kfree(nparms);
return err;
}
@@ -179,11 +194,14 @@ static inline int tcf_connmark_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&ci->tcf_refcnt) - ref,
.bindcnt = atomic_read(&ci->tcf_bindcnt) - bind,
};
+ struct tcf_connmark_parms *parms;
struct tcf_t t;
spin_lock_bh(&ci->tcf_lock);
+ parms = rcu_dereference_protected(ci->parms, lockdep_is_held(&ci->tcf_lock));
+
opt.action = ci->tcf_action;
- opt.zone = ci->zone;
+ opt.zone = parms->zone;
if (nla_put(skb, TCA_CONNMARK_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -201,6 +219,16 @@ nla_put_failure:
return -1;
}
+static void tcf_connmark_cleanup(struct tc_action *a)
+{
+ struct tcf_connmark_info *ci = to_connmark(a);
+ struct tcf_connmark_parms *parms;
+
+ parms = rcu_dereference_protected(ci->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_connmark_ops = {
.kind = "connmark",
.id = TCA_ID_CONNMARK,
@@ -208,6 +236,7 @@ static struct tc_action_ops act_connmark_ops = {
.act = tcf_connmark_act,
.dump = tcf_connmark_dump,
.init = tcf_connmark_init,
+ .cleanup = tcf_connmark_cleanup,
.size = sizeof(struct tcf_connmark_info),
};
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 0ca2bb8ed026..9cc0bc7c71ed 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -170,11 +170,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple,
static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
enum ip_conntrack_dir dir,
+ enum ip_conntrack_info ctinfo,
struct flow_action *action)
{
struct nf_conn_labels *ct_labels;
struct flow_action_entry *entry;
- enum ip_conntrack_info ctinfo;
u32 *act_ct_labels;
entry = tcf_ct_flow_table_flow_action_get_next(action);
@@ -182,8 +182,6 @@ static void tcf_ct_flow_table_add_action_meta(struct nf_conn *ct,
#if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK)
entry->ct_metadata.mark = READ_ONCE(ct->mark);
#endif
- ctinfo = dir == IP_CT_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
/* aligns with the CT reference on the SKB nf_ct_set */
entry->ct_metadata.cookie = (unsigned long)ct | ctinfo;
entry->ct_metadata.orig_dir = dir == IP_CT_DIR_ORIGINAL;
@@ -237,22 +235,28 @@ static int tcf_ct_flow_table_add_action_nat(struct net *net,
}
static int tcf_ct_flow_table_fill_actions(struct net *net,
- const struct flow_offload *flow,
+ struct flow_offload *flow,
enum flow_offload_tuple_dir tdir,
struct nf_flow_rule *flow_rule)
{
struct flow_action *action = &flow_rule->rule->action;
int num_entries = action->num_entries;
struct nf_conn *ct = flow->ct;
+ enum ip_conntrack_info ctinfo;
enum ip_conntrack_dir dir;
int i, err;
switch (tdir) {
case FLOW_OFFLOAD_DIR_ORIGINAL:
dir = IP_CT_DIR_ORIGINAL;
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ if (ctinfo == IP_CT_ESTABLISHED)
+ set_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
break;
case FLOW_OFFLOAD_DIR_REPLY:
dir = IP_CT_DIR_REPLY;
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
break;
default:
return -EOPNOTSUPP;
@@ -262,7 +266,7 @@ static int tcf_ct_flow_table_fill_actions(struct net *net,
if (err)
goto err_nat;
- tcf_ct_flow_table_add_action_meta(ct, dir, action);
+ tcf_ct_flow_table_add_action_meta(ct, dir, ctinfo, action);
return 0;
err_nat:
@@ -365,7 +369,7 @@ static void tcf_ct_flow_tc_ifidx(struct flow_offload *entry,
static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
- bool tcp)
+ bool tcp, bool bidirectional)
{
struct nf_conn_act_ct_ext *act_ct_ext;
struct flow_offload *entry;
@@ -384,6 +388,8 @@ static void tcf_ct_flow_table_add(struct tcf_ct_flow_table *ct_ft,
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
ct->proto.tcp.seen[1].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
}
+ if (bidirectional)
+ __set_bit(NF_FLOW_HW_BIDIRECTIONAL, &entry->flags);
act_ct_ext = nf_conn_act_ct_ext_find(ct);
if (act_ct_ext) {
@@ -407,26 +413,34 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
struct nf_conn *ct,
enum ip_conntrack_info ctinfo)
{
- bool tcp = false;
-
- if ((ctinfo != IP_CT_ESTABLISHED && ctinfo != IP_CT_ESTABLISHED_REPLY) ||
- !test_bit(IPS_ASSURED_BIT, &ct->status))
- return;
+ bool tcp = false, bidirectional = true;
switch (nf_ct_protonum(ct)) {
case IPPROTO_TCP:
- tcp = true;
- if (ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->proto.tcp.state != TCP_CONNTRACK_ESTABLISHED)
return;
+
+ tcp = true;
break;
case IPPROTO_UDP:
+ if (!nf_ct_is_confirmed(ct))
+ return;
+ if (!test_bit(IPS_ASSURED_BIT, &ct->status))
+ bidirectional = false;
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE: {
struct nf_conntrack_tuple *tuple;
- if (ct->status & IPS_NAT_MASK)
+ if ((ctinfo != IP_CT_ESTABLISHED &&
+ ctinfo != IP_CT_ESTABLISHED_REPLY) ||
+ !test_bit(IPS_ASSURED_BIT, &ct->status) ||
+ ct->status & IPS_NAT_MASK)
return;
+
tuple = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
/* No support for GRE v1 */
if (tuple->src.u.gre.key || tuple->dst.u.gre.key)
@@ -442,7 +456,7 @@ static void tcf_ct_flow_table_process_conn(struct tcf_ct_flow_table *ct_ft,
ct->status & IPS_SEQ_ADJUST)
return;
- tcf_ct_flow_table_add(ct_ft, ct, tcp);
+ tcf_ct_flow_table_add(ct_ft, ct, tcp, bidirectional);
}
static bool
@@ -621,13 +635,30 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
ct = flow->ct;
+ if (dir == FLOW_OFFLOAD_DIR_REPLY &&
+ !test_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags)) {
+ /* Only offload reply direction after connection became
+ * assured.
+ */
+ if (test_bit(IPS_ASSURED_BIT, &ct->status))
+ set_bit(NF_FLOW_HW_BIDIRECTIONAL, &flow->flags);
+ else if (test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags))
+ /* If flow_table flow has already been updated to the
+ * established state, then don't refresh.
+ */
+ return false;
+ }
+
if (tcph && (unlikely(tcph->fin || tcph->rst))) {
flow_offload_teardown(flow);
return false;
}
- ctinfo = dir == FLOW_OFFLOAD_DIR_ORIGINAL ? IP_CT_ESTABLISHED :
- IP_CT_ESTABLISHED_REPLY;
+ if (dir == FLOW_OFFLOAD_DIR_ORIGINAL)
+ ctinfo = test_bit(IPS_SEEN_REPLY_BIT, &ct->status) ?
+ IP_CT_ESTABLISHED : IP_CT_NEW;
+ else
+ ctinfo = IP_CT_ESTABLISHED_REPLY;
flow_offload_refresh(nf_ft, flow);
nf_conntrack_get(&ct->ct_general);
@@ -695,31 +726,6 @@ drop_ct:
return false;
}
-/* Trim the skb to the length specified by the IP/IPv6 header,
- * removing any trailing lower-layer padding. This prepares the skb
- * for higher-layer processing that assumes skb->len excludes padding
- * (such as nf_ip_checksum). The caller needs to pull the skb to the
- * network header, and ensure ip_hdr/ipv6_hdr points to valid data.
- */
-static int tcf_ct_skb_network_trim(struct sk_buff *skb, int family)
-{
- unsigned int len;
-
- switch (family) {
- case NFPROTO_IPV4:
- len = ntohs(ip_hdr(skb)->tot_len);
- break;
- case NFPROTO_IPV6:
- len = sizeof(struct ipv6hdr)
- + ntohs(ipv6_hdr(skb)->payload_len);
- break;
- default:
- len = skb->len;
- }
-
- return pskb_trim_rcsum(skb, len);
-}
-
static u8 tcf_ct_skb_nf_family(struct sk_buff *skb)
{
u8 family = NFPROTO_UNSPEC;
@@ -779,6 +785,7 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
struct nf_conn *ct;
int err = 0;
bool frag;
+ u8 proto;
u16 mru;
/* Previously seen (loopback)? Ignore. */
@@ -794,50 +801,14 @@ static int tcf_ct_handle_fragments(struct net *net, struct sk_buff *skb,
return err;
skb_get(skb);
- mru = tc_skb_cb(skb)->mru;
-
- if (family == NFPROTO_IPV4) {
- enum ip_defrag_users user = IP_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IPCB(skb), 0, sizeof(struct inet_skb_parm));
- local_bh_disable();
- err = ip_defrag(net, skb, user);
- local_bh_enable();
- if (err && err != -EINPROGRESS)
- return err;
-
- if (!err) {
- *defrag = true;
- mru = IPCB(skb)->frag_max_size;
- }
- } else { /* NFPROTO_IPV6 */
-#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
- enum ip6_defrag_users user = IP6_DEFRAG_CONNTRACK_IN + zone;
-
- memset(IP6CB(skb), 0, sizeof(struct inet6_skb_parm));
- err = nf_ct_frag6_gather(net, skb, user);
- if (err && err != -EINPROGRESS)
- goto out_free;
-
- if (!err) {
- *defrag = true;
- mru = IP6CB(skb)->frag_max_size;
- }
-#else
- err = -EOPNOTSUPP;
- goto out_free;
-#endif
- }
+ err = nf_ct_handle_fragments(net, skb, zone, family, &proto, &mru);
+ if (err)
+ return err;
- if (err != -EINPROGRESS)
- tc_skb_cb(skb)->mru = mru;
- skb_clear_hash(skb);
- skb->ignore_df = 1;
- return err;
+ *defrag = true;
+ tc_skb_cb(skb)->mru = mru;
-out_free:
- kfree_skb(skb);
- return err;
+ return 0;
}
static void tcf_ct_params_free(struct tcf_ct_params *params)
@@ -980,7 +951,7 @@ TC_INDIRECT_SCOPE int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
if (err)
goto drop;
- err = tcf_ct_skb_network_trim(skb, family);
+ err = nf_ct_skb_network_trim(skb, family);
if (err)
goto drop;
diff --git a/net/sched/act_ctinfo.c b/net/sched/act_ctinfo.c
index 4b1b59da5c0b..4d15b6a6169c 100644
--- a/net/sched/act_ctinfo.c
+++ b/net/sched/act_ctinfo.c
@@ -93,7 +93,7 @@ TC_INDIRECT_SCOPE int tcf_ctinfo_act(struct sk_buff *skb,
cp = rcu_dereference_bh(ca->params);
tcf_lastuse_update(&ca->tcf_tm);
- bstats_update(&ca->tcf_bstats, skb);
+ tcf_action_update_bstats(&ca->common, skb);
action = READ_ONCE(ca->tcf_action);
wlen = skb_network_offset(skb);
@@ -212,8 +212,8 @@ static int tcf_ctinfo_init(struct net *net, struct nlattr *nla,
index = actparm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_ctinfo_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_ctinfo_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 9b8def0be41e..c9a811f4c7ee 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -119,35 +119,37 @@ TC_INDIRECT_SCOPE int tcf_gate_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_gate *gact = to_gate(a);
-
- spin_lock(&gact->tcf_lock);
+ int action = READ_ONCE(gact->tcf_action);
tcf_lastuse_update(&gact->tcf_tm);
- bstats_update(&gact->tcf_bstats, skb);
+ tcf_action_update_bstats(&gact->common, skb);
+ spin_lock(&gact->tcf_lock);
if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
+ return action;
}
- if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+ if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN)) {
+ spin_unlock(&gact->tcf_lock);
goto drop;
+ }
if (gact->current_max_octets >= 0) {
gact->current_entry_octets += qdisc_pkt_len(skb);
if (gact->current_entry_octets > gact->current_max_octets) {
- gact->tcf_qstats.overlimits++;
- goto drop;
+ spin_unlock(&gact->tcf_lock);
+ goto overlimit;
}
}
-
spin_unlock(&gact->tcf_lock);
- return gact->tcf_action;
-drop:
- gact->tcf_qstats.drops++;
- spin_unlock(&gact->tcf_lock);
+ return action;
+overlimit:
+ tcf_action_inc_overlimit_qstats(&gact->common);
+drop:
+ tcf_action_inc_drop_qstats(&gact->common);
return TC_ACT_SHOT;
}
@@ -357,8 +359,8 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
return 0;
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_gate_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_gate_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c
index 7284bcea7b0b..8037ec9b1d31 100644
--- a/net/sched/act_mirred.c
+++ b/net/sched/act_mirred.c
@@ -29,8 +29,8 @@
static LIST_HEAD(mirred_list);
static DEFINE_SPINLOCK(mirred_list_lock);
-#define MIRRED_RECURSION_LIMIT 4
-static DEFINE_PER_CPU(unsigned int, mirred_rec_level);
+#define MIRRED_NEST_LIMIT 4
+static DEFINE_PER_CPU(unsigned int, mirred_nest_level);
static bool tcf_mirred_is_act_redirect(int action)
{
@@ -206,12 +206,19 @@ release_idr:
return err;
}
+static bool is_mirred_nested(void)
+{
+ return unlikely(__this_cpu_read(mirred_nest_level) > 1);
+}
+
static int tcf_mirred_forward(bool want_ingress, struct sk_buff *skb)
{
int err;
if (!want_ingress)
err = tcf_dev_queue_xmit(skb, dev_queue_xmit);
+ else if (is_mirred_nested())
+ err = netif_rx(skb);
else
err = netif_receive_skb(skb);
@@ -226,7 +233,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
struct sk_buff *skb2 = skb;
bool m_mac_header_xmit;
struct net_device *dev;
- unsigned int rec_level;
+ unsigned int nest_level;
int retval, err = 0;
bool use_reinsert;
bool want_ingress;
@@ -237,11 +244,11 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
int mac_len;
bool at_nh;
- rec_level = __this_cpu_inc_return(mirred_rec_level);
- if (unlikely(rec_level > MIRRED_RECURSION_LIMIT)) {
+ nest_level = __this_cpu_inc_return(mirred_nest_level);
+ if (unlikely(nest_level > MIRRED_NEST_LIMIT)) {
net_warn_ratelimited("Packet exceeded mirred recursion limit on dev %s\n",
netdev_name(skb->dev));
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return TC_ACT_SHOT;
}
@@ -310,7 +317,7 @@ TC_INDIRECT_SCOPE int tcf_mirred_act(struct sk_buff *skb,
err = tcf_mirred_forward(want_ingress, skb);
if (err)
tcf_action_inc_overlimit_qstats(&m->common);
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return TC_ACT_CONSUMED;
}
}
@@ -322,7 +329,7 @@ out:
if (tcf_mirred_is_act_redirect(m_eaction))
retval = TC_ACT_SHOT;
}
- __this_cpu_dec(mirred_rec_level);
+ __this_cpu_dec(mirred_nest_level);
return retval;
}
diff --git a/net/sched/act_mpls.c b/net/sched/act_mpls.c
index ff47ce4d3968..809f7928a1be 100644
--- a/net/sched/act_mpls.c
+++ b/net/sched/act_mpls.c
@@ -134,6 +134,11 @@ static int valid_label(const struct nlattr *attr,
{
const u32 *label = nla_data(attr);
+ if (nla_len(attr) != sizeof(*label)) {
+ NL_SET_ERR_MSG_MOD(extack, "Invalid MPLS label length");
+ return -EINVAL;
+ }
+
if (*label & ~MPLS_LABEL_MASK || *label == MPLS_LABEL_IMPLNULL) {
NL_SET_ERR_MSG_MOD(extack, "MPLS label out of range");
return -EINVAL;
@@ -145,7 +150,8 @@ static int valid_label(const struct nlattr *attr,
static const struct nla_policy mpls_policy[TCA_MPLS_MAX + 1] = {
[TCA_MPLS_PARMS] = NLA_POLICY_EXACT_LEN(sizeof(struct tc_mpls)),
[TCA_MPLS_PROTO] = { .type = NLA_U16 },
- [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_U32, valid_label),
+ [TCA_MPLS_LABEL] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
+ valid_label),
[TCA_MPLS_TC] = NLA_POLICY_RANGE(NLA_U8, 0, 7),
[TCA_MPLS_TTL] = NLA_POLICY_MIN(NLA_U8, 1),
[TCA_MPLS_BOS] = NLA_POLICY_RANGE(NLA_U8, 0, 1),
@@ -184,40 +190,67 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
parm = nla_data(tb[TCA_MPLS_PARMS]);
index = parm->index;
+ err = tcf_idr_check_alloc(tn, &index, a, bind);
+ if (err < 0)
+ return err;
+ exists = err;
+ if (exists && bind)
+ return 0;
+
+ if (!exists) {
+ ret = tcf_idr_create(tn, index, est, a, &act_mpls_ops, bind,
+ true, flags);
+ if (ret) {
+ tcf_idr_cleanup(tn, index);
+ return ret;
+ }
+
+ ret = ACT_P_CREATED;
+ } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
+ tcf_idr_release(*a, bind);
+ return -EEXIST;
+ }
+
/* Verify parameters against action type. */
switch (parm->m_action) {
case TCA_MPLS_ACT_POP:
if (!tb[TCA_MPLS_PROTO]) {
NL_SET_ERR_MSG_MOD(extack, "Protocol must be set for MPLS pop");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
if (!eth_proto_is_802_3(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
NL_SET_ERR_MSG_MOD(extack, "Invalid protocol type for MPLS pop");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
if (tb[TCA_MPLS_LABEL] || tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] ||
tb[TCA_MPLS_BOS]) {
NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC or BOS cannot be used with MPLS pop");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
break;
case TCA_MPLS_ACT_DEC_TTL:
if (tb[TCA_MPLS_PROTO] || tb[TCA_MPLS_LABEL] ||
tb[TCA_MPLS_TTL] || tb[TCA_MPLS_TC] || tb[TCA_MPLS_BOS]) {
NL_SET_ERR_MSG_MOD(extack, "Label, TTL, TC, BOS or protocol cannot be used with MPLS dec_ttl");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
break;
case TCA_MPLS_ACT_PUSH:
case TCA_MPLS_ACT_MAC_PUSH:
if (!tb[TCA_MPLS_LABEL]) {
NL_SET_ERR_MSG_MOD(extack, "Label is required for MPLS push");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
if (tb[TCA_MPLS_PROTO] &&
!eth_p_mpls(nla_get_be16(tb[TCA_MPLS_PROTO]))) {
NL_SET_ERR_MSG_MOD(extack, "Protocol must be an MPLS type for MPLS push");
- return -EPROTONOSUPPORT;
+ err = -EPROTONOSUPPORT;
+ goto release_idr;
}
/* Push needs a TTL - if not specified, set a default value. */
if (!tb[TCA_MPLS_TTL]) {
@@ -232,33 +265,14 @@ static int tcf_mpls_init(struct net *net, struct nlattr *nla,
case TCA_MPLS_ACT_MODIFY:
if (tb[TCA_MPLS_PROTO]) {
NL_SET_ERR_MSG_MOD(extack, "Protocol cannot be used with MPLS modify");
- return -EINVAL;
+ err = -EINVAL;
+ goto release_idr;
}
break;
default:
NL_SET_ERR_MSG_MOD(extack, "Unknown MPLS action");
- return -EINVAL;
- }
-
- err = tcf_idr_check_alloc(tn, &index, a, bind);
- if (err < 0)
- return err;
- exists = err;
- if (exists && bind)
- return 0;
-
- if (!exists) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_mpls_ops, bind, true, flags);
- if (ret) {
- tcf_idr_cleanup(tn, index);
- return ret;
- }
-
- ret = ACT_P_CREATED;
- } else if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
- tcf_idr_release(*a, bind);
- return -EEXIST;
+ err = -EINVAL;
+ goto release_idr;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
diff --git a/net/sched/act_nat.c b/net/sched/act_nat.c
index 74c74be33048..4184af5abbf3 100644
--- a/net/sched/act_nat.c
+++ b/net/sched/act_nat.c
@@ -38,6 +38,7 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
{
struct tc_action_net *tn = net_generic(net, act_nat_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
+ struct tcf_nat_parms *nparm, *oparm;
struct nlattr *tb[TCA_NAT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
struct tc_nat *parm;
@@ -59,8 +60,8 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_nat_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a, &act_nat_ops,
+ bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
return ret;
@@ -79,19 +80,31 @@ static int tcf_nat_init(struct net *net, struct nlattr *nla, struct nlattr *est,
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
+
+ nparm = kzalloc(sizeof(*nparm), GFP_KERNEL);
+ if (!nparm) {
+ err = -ENOMEM;
+ goto release_idr;
+ }
+
+ nparm->old_addr = parm->old_addr;
+ nparm->new_addr = parm->new_addr;
+ nparm->mask = parm->mask;
+ nparm->flags = parm->flags;
+
p = to_tcf_nat(*a);
spin_lock_bh(&p->tcf_lock);
- p->old_addr = parm->old_addr;
- p->new_addr = parm->new_addr;
- p->mask = parm->mask;
- p->flags = parm->flags;
-
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparm = rcu_replace_pointer(p->parms, nparm, lockdep_is_held(&p->tcf_lock));
spin_unlock_bh(&p->tcf_lock);
+
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+ if (oparm)
+ kfree_rcu(oparm, rcu);
+
return ret;
release_idr:
tcf_idr_release(*a, bind);
@@ -103,6 +116,7 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
struct tcf_result *res)
{
struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
struct iphdr *iph;
__be32 old_addr;
__be32 new_addr;
@@ -113,18 +127,16 @@ TC_INDIRECT_SCOPE int tcf_nat_act(struct sk_buff *skb,
int ihl;
int noff;
- spin_lock(&p->tcf_lock);
-
tcf_lastuse_update(&p->tcf_tm);
- old_addr = p->old_addr;
- new_addr = p->new_addr;
- mask = p->mask;
- egress = p->flags & TCA_NAT_FLAG_EGRESS;
- action = p->tcf_action;
+ tcf_action_update_bstats(&p->common, skb);
- bstats_update(&p->tcf_bstats, skb);
+ action = READ_ONCE(p->tcf_action);
- spin_unlock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
+ old_addr = parms->old_addr;
+ new_addr = parms->new_addr;
+ mask = parms->mask;
+ egress = parms->flags & TCA_NAT_FLAG_EGRESS;
if (unlikely(action == TC_ACT_SHOT))
goto drop;
@@ -248,9 +260,7 @@ out:
return action;
drop:
- spin_lock(&p->tcf_lock);
- p->tcf_qstats.drops++;
- spin_unlock(&p->tcf_lock);
+ tcf_action_inc_drop_qstats(&p->common);
return TC_ACT_SHOT;
}
@@ -264,15 +274,20 @@ static int tcf_nat_dump(struct sk_buff *skb, struct tc_action *a,
.refcnt = refcount_read(&p->tcf_refcnt) - ref,
.bindcnt = atomic_read(&p->tcf_bindcnt) - bind,
};
+ struct tcf_nat_parms *parms;
struct tcf_t t;
spin_lock_bh(&p->tcf_lock);
- opt.old_addr = p->old_addr;
- opt.new_addr = p->new_addr;
- opt.mask = p->mask;
- opt.flags = p->flags;
+
opt.action = p->tcf_action;
+ parms = rcu_dereference_protected(p->parms, lockdep_is_held(&p->tcf_lock));
+
+ opt.old_addr = parms->old_addr;
+ opt.new_addr = parms->new_addr;
+ opt.mask = parms->mask;
+ opt.flags = parms->flags;
+
if (nla_put(skb, TCA_NAT_PARMS, sizeof(opt), &opt))
goto nla_put_failure;
@@ -289,6 +304,16 @@ nla_put_failure:
return -1;
}
+static void tcf_nat_cleanup(struct tc_action *a)
+{
+ struct tcf_nat *p = to_tcf_nat(a);
+ struct tcf_nat_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
+ if (parms)
+ kfree_rcu(parms, rcu);
+}
+
static struct tc_action_ops act_nat_ops = {
.kind = "nat",
.id = TCA_ID_NAT,
@@ -296,6 +321,7 @@ static struct tc_action_ops act_nat_ops = {
.act = tcf_nat_act,
.dump = tcf_nat_dump,
.init = tcf_nat_init,
+ .cleanup = tcf_nat_cleanup,
.size = sizeof(struct tcf_nat),
};
diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c
index a0378e9f0121..4559a1507ea5 100644
--- a/net/sched/act_pedit.c
+++ b/net/sched/act_pedit.c
@@ -134,6 +134,17 @@ nla_failure:
return -EINVAL;
}
+static void tcf_pedit_cleanup_rcu(struct rcu_head *head)
+{
+ struct tcf_pedit_parms *parms =
+ container_of(head, struct tcf_pedit_parms, rcu);
+
+ kfree(parms->tcfp_keys_ex);
+ kfree(parms->tcfp_keys);
+
+ kfree(parms);
+}
+
static int tcf_pedit_init(struct net *net, struct nlattr *nla,
struct nlattr *est, struct tc_action **a,
struct tcf_proto *tp, u32 flags,
@@ -141,10 +152,9 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
{
struct tc_action_net *tn = net_generic(net, act_pedit_ops.net_id);
bool bind = flags & TCA_ACT_FLAGS_BIND;
- struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tcf_chain *goto_ch = NULL;
- struct tc_pedit_key *keys = NULL;
- struct tcf_pedit_key_ex *keys_ex;
+ struct tcf_pedit_parms *oparms, *nparms;
+ struct nlattr *tb[TCA_PEDIT_MAX + 1];
struct tc_pedit *parm;
struct nlattr *pattr;
struct tcf_pedit *p;
@@ -171,109 +181,125 @@ static int tcf_pedit_init(struct net *net, struct nlattr *nla,
}
parm = nla_data(pattr);
- if (!parm->nkeys) {
- NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
- return -EINVAL;
- }
- ksize = parm->nkeys * sizeof(struct tc_pedit_key);
- if (nla_len(pattr) < sizeof(*parm) + ksize) {
- NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
- return -EINVAL;
- }
-
- keys_ex = tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
- if (IS_ERR(keys_ex))
- return PTR_ERR(keys_ex);
index = parm->index;
err = tcf_idr_check_alloc(tn, &index, a, bind);
if (!err) {
- ret = tcf_idr_create(tn, index, est, a,
- &act_pedit_ops, bind, false, flags);
+ ret = tcf_idr_create_from_flags(tn, index, est, a,
+ &act_pedit_ops, bind, flags);
if (ret) {
tcf_idr_cleanup(tn, index);
- goto out_free;
+ return ret;
}
ret = ACT_P_CREATED;
} else if (err > 0) {
if (bind)
- goto out_free;
+ return 0;
if (!(flags & TCA_ACT_FLAGS_REPLACE)) {
ret = -EEXIST;
goto out_release;
}
} else {
- ret = err;
+ return err;
+ }
+
+ if (!parm->nkeys) {
+ NL_SET_ERR_MSG_MOD(extack, "Pedit requires keys to be passed");
+ ret = -EINVAL;
+ goto out_release;
+ }
+ ksize = parm->nkeys * sizeof(struct tc_pedit_key);
+ if (nla_len(pattr) < sizeof(*parm) + ksize) {
+ NL_SET_ERR_MSG_ATTR(extack, pattr, "Length of TCA_PEDIT_PARMS or TCA_PEDIT_PARMS_EX pedit attribute is invalid");
+ ret = -EINVAL;
+ goto out_release;
+ }
+
+ nparms = kzalloc(sizeof(*nparms), GFP_KERNEL);
+ if (!nparms) {
+ ret = -ENOMEM;
+ goto out_release;
+ }
+
+ nparms->tcfp_keys_ex =
+ tcf_pedit_keys_ex_parse(tb[TCA_PEDIT_KEYS_EX], parm->nkeys);
+ if (IS_ERR(nparms->tcfp_keys_ex)) {
+ ret = PTR_ERR(nparms->tcfp_keys_ex);
goto out_free;
}
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0) {
ret = err;
- goto out_release;
+ goto out_free_ex;
}
- p = to_pedit(*a);
- spin_lock_bh(&p->tcf_lock);
- if (ret == ACT_P_CREATED ||
- (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys)) {
- keys = kmalloc(ksize, GFP_ATOMIC);
- if (!keys) {
- spin_unlock_bh(&p->tcf_lock);
- ret = -ENOMEM;
- goto put_chain;
- }
- kfree(p->tcfp_keys);
- p->tcfp_keys = keys;
- p->tcfp_nkeys = parm->nkeys;
+ nparms->tcfp_off_max_hint = 0;
+ nparms->tcfp_flags = parm->flags;
+ nparms->tcfp_nkeys = parm->nkeys;
+
+ nparms->tcfp_keys = kmalloc(ksize, GFP_KERNEL);
+ if (!nparms->tcfp_keys) {
+ ret = -ENOMEM;
+ goto put_chain;
}
- memcpy(p->tcfp_keys, parm->keys, ksize);
- p->tcfp_off_max_hint = 0;
- for (i = 0; i < p->tcfp_nkeys; ++i) {
- u32 cur = p->tcfp_keys[i].off;
+
+ memcpy(nparms->tcfp_keys, parm->keys, ksize);
+
+ for (i = 0; i < nparms->tcfp_nkeys; ++i) {
+ u32 cur = nparms->tcfp_keys[i].off;
/* sanitize the shift value for any later use */
- p->tcfp_keys[i].shift = min_t(size_t, BITS_PER_TYPE(int) - 1,
- p->tcfp_keys[i].shift);
+ nparms->tcfp_keys[i].shift = min_t(size_t,
+ BITS_PER_TYPE(int) - 1,
+ nparms->tcfp_keys[i].shift);
/* The AT option can read a single byte, we can bound the actual
* value with uchar max.
*/
- cur += (0xff & p->tcfp_keys[i].offmask) >> p->tcfp_keys[i].shift;
+ cur += (0xff & nparms->tcfp_keys[i].offmask) >> nparms->tcfp_keys[i].shift;
/* Each key touches 4 bytes starting from the computed offset */
- p->tcfp_off_max_hint = max(p->tcfp_off_max_hint, cur + 4);
+ nparms->tcfp_off_max_hint =
+ max(nparms->tcfp_off_max_hint, cur + 4);
}
- p->tcfp_flags = parm->flags;
+ p = to_pedit(*a);
+
+ spin_lock_bh(&p->tcf_lock);
goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+ oparms = rcu_replace_pointer(p->parms, nparms, 1);
+ spin_unlock_bh(&p->tcf_lock);
- kfree(p->tcfp_keys_ex);
- p->tcfp_keys_ex = keys_ex;
+ if (oparms)
+ call_rcu(&oparms->rcu, tcf_pedit_cleanup_rcu);
- spin_unlock_bh(&p->tcf_lock);
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+
return ret;
put_chain:
if (goto_ch)
tcf_chain_put_by_act(goto_ch);
+out_free_ex:
+ kfree(nparms->tcfp_keys_ex);
+out_free:
+ kfree(nparms);
out_release:
tcf_idr_release(*a, bind);
-out_free:
- kfree(keys_ex);
return ret;
-
}
static void tcf_pedit_cleanup(struct tc_action *a)
{
struct tcf_pedit *p = to_pedit(a);
- struct tc_pedit_key *keys = p->tcfp_keys;
+ struct tcf_pedit_parms *parms;
+
+ parms = rcu_dereference_protected(p->parms, 1);
- kfree(keys);
- kfree(p->tcfp_keys_ex);
+ if (parms)
+ call_rcu(&parms->rcu, tcf_pedit_cleanup_rcu);
}
static bool offset_valid(struct sk_buff *skb, int offset)
@@ -324,109 +350,105 @@ TC_INDIRECT_SCOPE int tcf_pedit_act(struct sk_buff *skb,
const struct tc_action *a,
struct tcf_result *res)
{
+ enum pedit_header_type htype = TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
+ enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_key_ex *tkey_ex;
+ struct tcf_pedit_parms *parms;
+ struct tc_pedit_key *tkey;
u32 max_offset;
int i;
- spin_lock(&p->tcf_lock);
+ parms = rcu_dereference_bh(p->parms);
max_offset = (skb_transport_header_was_set(skb) ?
skb_transport_offset(skb) :
skb_network_offset(skb)) +
- p->tcfp_off_max_hint;
+ parms->tcfp_off_max_hint;
if (skb_ensure_writable(skb, min(skb->len, max_offset)))
- goto unlock;
+ goto done;
tcf_lastuse_update(&p->tcf_tm);
+ tcf_action_update_bstats(&p->common, skb);
- if (p->tcfp_nkeys > 0) {
- struct tc_pedit_key *tkey = p->tcfp_keys;
- struct tcf_pedit_key_ex *tkey_ex = p->tcfp_keys_ex;
- enum pedit_header_type htype =
- TCA_PEDIT_KEY_EX_HDR_TYPE_NETWORK;
- enum pedit_cmd cmd = TCA_PEDIT_KEY_EX_CMD_SET;
-
- for (i = p->tcfp_nkeys; i > 0; i--, tkey++) {
- u32 *ptr, hdata;
- int offset = tkey->off;
- int hoffset;
- u32 val;
- int rc;
-
- if (tkey_ex) {
- htype = tkey_ex->htype;
- cmd = tkey_ex->cmd;
-
- tkey_ex++;
- }
+ tkey = parms->tcfp_keys;
+ tkey_ex = parms->tcfp_keys_ex;
- rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
- if (rc) {
- pr_info("tc action pedit bad header type specified (0x%x)\n",
- htype);
- goto bad;
- }
+ for (i = parms->tcfp_nkeys; i > 0; i--, tkey++) {
+ int offset = tkey->off;
+ u32 *ptr, hdata;
+ int hoffset;
+ u32 val;
+ int rc;
- if (tkey->offmask) {
- u8 *d, _d;
-
- if (!offset_valid(skb, hoffset + tkey->at)) {
- pr_info("tc action pedit 'at' offset %d out of bounds\n",
- hoffset + tkey->at);
- goto bad;
- }
- d = skb_header_pointer(skb, hoffset + tkey->at,
- sizeof(_d), &_d);
- if (!d)
- goto bad;
- offset += (*d & tkey->offmask) >> tkey->shift;
- }
+ if (tkey_ex) {
+ htype = tkey_ex->htype;
+ cmd = tkey_ex->cmd;
- if (offset % 4) {
- pr_info("tc action pedit offset must be on 32 bit boundaries\n");
- goto bad;
- }
+ tkey_ex++;
+ }
- if (!offset_valid(skb, hoffset + offset)) {
- pr_info("tc action pedit offset %d out of bounds\n",
- hoffset + offset);
- goto bad;
- }
+ rc = pedit_skb_hdr_offset(skb, htype, &hoffset);
+ if (rc) {
+ pr_info("tc action pedit bad header type specified (0x%x)\n",
+ htype);
+ goto bad;
+ }
- ptr = skb_header_pointer(skb, hoffset + offset,
- sizeof(hdata), &hdata);
- if (!ptr)
- goto bad;
- /* just do it, baby */
- switch (cmd) {
- case TCA_PEDIT_KEY_EX_CMD_SET:
- val = tkey->val;
- break;
- case TCA_PEDIT_KEY_EX_CMD_ADD:
- val = (*ptr + tkey->val) & ~tkey->mask;
- break;
- default:
- pr_info("tc action pedit bad command (%d)\n",
- cmd);
+ if (tkey->offmask) {
+ u8 *d, _d;
+
+ if (!offset_valid(skb, hoffset + tkey->at)) {
+ pr_info("tc action pedit 'at' offset %d out of bounds\n",
+ hoffset + tkey->at);
goto bad;
}
+ d = skb_header_pointer(skb, hoffset + tkey->at,
+ sizeof(_d), &_d);
+ if (!d)
+ goto bad;
+ offset += (*d & tkey->offmask) >> tkey->shift;
+ }
- *ptr = ((*ptr & tkey->mask) ^ val);
- if (ptr == &hdata)
- skb_store_bits(skb, hoffset + offset, ptr, 4);
+ if (offset % 4) {
+ pr_info("tc action pedit offset must be on 32 bit boundaries\n");
+ goto bad;
}
- goto done;
- } else {
- WARN(1, "pedit BUG: index %d\n", p->tcf_index);
+ if (!offset_valid(skb, hoffset + offset)) {
+ pr_info("tc action pedit offset %d out of bounds\n",
+ hoffset + offset);
+ goto bad;
+ }
+
+ ptr = skb_header_pointer(skb, hoffset + offset,
+ sizeof(hdata), &hdata);
+ if (!ptr)
+ goto bad;
+ /* just do it, baby */
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ val = tkey->val;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ val = (*ptr + tkey->val) & ~tkey->mask;
+ break;
+ default:
+ pr_info("tc action pedit bad command (%d)\n",
+ cmd);
+ goto bad;
+ }
+
+ *ptr = ((*ptr & tkey->mask) ^ val);
+ if (ptr == &hdata)
+ skb_store_bits(skb, hoffset + offset, ptr, 4);
}
+ goto done;
+
bad:
- p->tcf_qstats.overlimits++;
+ tcf_action_inc_overlimit_qstats(&p->common);
done:
- bstats_update(&p->tcf_bstats, skb);
-unlock:
- spin_unlock(&p->tcf_lock);
return p->tcf_action;
}
@@ -445,30 +467,33 @@ static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,
{
unsigned char *b = skb_tail_pointer(skb);
struct tcf_pedit *p = to_pedit(a);
+ struct tcf_pedit_parms *parms;
struct tc_pedit *opt;
struct tcf_t t;
int s;
- s = struct_size(opt, keys, p->tcfp_nkeys);
+ spin_lock_bh(&p->tcf_lock);
+ parms = rcu_dereference_protected(p->parms, 1);
+ s = struct_size(opt, keys, parms->tcfp_nkeys);
- /* netlink spinlocks held above us - must use ATOMIC */
opt = kzalloc(s, GFP_ATOMIC);
- if (unlikely(!opt))
+ if (unlikely(!opt)) {
+ spin_unlock_bh(&p->tcf_lock);
return -ENOBUFS;
+ }
- spin_lock_bh(&p->tcf_lock);
- memcpy(opt->keys, p->tcfp_keys, flex_array_size(opt, keys, p->tcfp_nkeys));
+ memcpy(opt->keys, parms->tcfp_keys,
+ flex_array_size(opt, keys, parms->tcfp_nkeys));
opt->index = p->tcf_index;
- opt->nkeys = p->tcfp_nkeys;
- opt->flags = p->tcfp_flags;
+ opt->nkeys = parms->tcfp_nkeys;
+ opt->flags = parms->tcfp_flags;
opt->action = p->tcf_action;
opt->refcnt = refcount_read(&p->tcf_refcnt) - ref;
opt->bindcnt = atomic_read(&p->tcf_bindcnt) - bind;
- if (p->tcfp_keys_ex) {
- if (tcf_pedit_key_ex_dump(skb,
- p->tcfp_keys_ex,
- p->tcfp_nkeys))
+ if (parms->tcfp_keys_ex) {
+ if (tcf_pedit_key_ex_dump(skb, parms->tcfp_keys_ex,
+ parms->tcfp_nkeys))
goto nla_put_failure;
if (nla_put(skb, TCA_PEDIT_PARMS_EX, s, opt))
@@ -522,7 +547,28 @@ static int tcf_pedit_offload_act_setup(struct tc_action *act, void *entry_data,
}
*index_inc = k;
} else {
- return -EOPNOTSUPP;
+ struct flow_offload_action *fl_action = entry_data;
+ u32 cmd = tcf_pedit_cmd(act, 0);
+ int k;
+
+ switch (cmd) {
+ case TCA_PEDIT_KEY_EX_CMD_SET:
+ fl_action->id = FLOW_ACTION_MANGLE;
+ break;
+ case TCA_PEDIT_KEY_EX_CMD_ADD:
+ fl_action->id = FLOW_ACTION_ADD;
+ break;
+ default:
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
+ return -EOPNOTSUPP;
+ }
+
+ for (k = 1; k < tcf_pedit_nkeys(act); k++) {
+ if (cmd != tcf_pedit_cmd(act, k)) {
+ NL_SET_ERR_MSG_MOD(extack, "Unsupported pedit command offload");
+ return -EOPNOTSUPP;
+ }
+ }
}
return 0;
diff --git a/net/sched/act_sample.c b/net/sched/act_sample.c
index f7416b5598e0..4c670e7568dc 100644
--- a/net/sched/act_sample.c
+++ b/net/sched/act_sample.c
@@ -55,8 +55,8 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
sample_policy, NULL);
if (ret < 0)
return ret;
- if (!tb[TCA_SAMPLE_PARMS] || !tb[TCA_SAMPLE_RATE] ||
- !tb[TCA_SAMPLE_PSAMPLE_GROUP])
+
+ if (!tb[TCA_SAMPLE_PARMS])
return -EINVAL;
parm = nla_data(tb[TCA_SAMPLE_PARMS]);
@@ -80,6 +80,13 @@ static int tcf_sample_init(struct net *net, struct nlattr *nla,
tcf_idr_release(*a, bind);
return -EEXIST;
}
+
+ if (!tb[TCA_SAMPLE_RATE] || !tb[TCA_SAMPLE_PSAMPLE_GROUP]) {
+ NL_SET_ERR_MSG(extack, "sample rate and group are required");
+ err = -EINVAL;
+ goto release_idr;
+ }
+
err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
if (err < 0)
goto release_idr;
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 668130f08903..2a6b6be0811b 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -22,6 +22,7 @@
#include <linux/idr.h>
#include <linux/jhash.h>
#include <linux/rculist.h>
+#include <linux/rhashtable.h>
#include <net/net_namespace.h>
#include <net/sock.h>
#include <net/netlink.h>
@@ -50,6 +51,109 @@ static LIST_HEAD(tcf_proto_base);
/* Protects list of registered TC modules. It is pure SMP lock. */
static DEFINE_RWLOCK(cls_mod_lock);
+static struct xarray tcf_exts_miss_cookies_xa;
+struct tcf_exts_miss_cookie_node {
+ const struct tcf_chain *chain;
+ const struct tcf_proto *tp;
+ const struct tcf_exts *exts;
+ u32 chain_index;
+ u32 tp_prio;
+ u32 handle;
+ u32 miss_cookie_base;
+ struct rcu_head rcu;
+};
+
+/* Each tc action entry cookie will be comprised of 32bit miss_cookie_base +
+ * action index in the exts tc actions array.
+ */
+union tcf_exts_miss_cookie {
+ struct {
+ u32 miss_cookie_base;
+ u32 act_index;
+ };
+ u64 miss_cookie;
+};
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static int
+tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
+ u32 handle)
+{
+ struct tcf_exts_miss_cookie_node *n;
+ static u32 next;
+ int err;
+
+ if (WARN_ON(!handle || !tp->ops->get_exts))
+ return -EINVAL;
+
+ n = kzalloc(sizeof(*n), GFP_KERNEL);
+ if (!n)
+ return -ENOMEM;
+
+ n->chain_index = tp->chain->index;
+ n->chain = tp->chain;
+ n->tp_prio = tp->prio;
+ n->tp = tp;
+ n->exts = exts;
+ n->handle = handle;
+
+ err = xa_alloc_cyclic(&tcf_exts_miss_cookies_xa, &n->miss_cookie_base,
+ n, xa_limit_32b, &next, GFP_KERNEL);
+ if (err)
+ goto err_xa_alloc;
+
+ exts->miss_cookie_node = n;
+ return 0;
+
+err_xa_alloc:
+ kfree(n);
+ return err;
+}
+
+static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
+{
+ struct tcf_exts_miss_cookie_node *n;
+
+ if (!exts->miss_cookie_node)
+ return;
+
+ n = exts->miss_cookie_node;
+ xa_erase(&tcf_exts_miss_cookies_xa, n->miss_cookie_base);
+ kfree_rcu(n, rcu);
+}
+
+static struct tcf_exts_miss_cookie_node *
+tcf_exts_miss_cookie_lookup(u64 miss_cookie, int *act_index)
+{
+ union tcf_exts_miss_cookie mc = { .miss_cookie = miss_cookie, };
+
+ *act_index = mc.act_index;
+ return xa_load(&tcf_exts_miss_cookies_xa, mc.miss_cookie_base);
+}
+#else /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
+static int
+tcf_exts_miss_cookie_base_alloc(struct tcf_exts *exts, struct tcf_proto *tp,
+ u32 handle)
+{
+ return 0;
+}
+
+static void tcf_exts_miss_cookie_base_destroy(struct tcf_exts *exts)
+{
+}
+#endif /* IS_ENABLED(CONFIG_NET_TC_SKB_EXT) */
+
+static u64 tcf_exts_miss_cookie_get(u32 miss_cookie_base, int act_index)
+{
+ union tcf_exts_miss_cookie mc = { .act_index = act_index, };
+
+ if (!miss_cookie_base)
+ return 0;
+
+ mc.miss_cookie_base = miss_cookie_base;
+ return mc.miss_cookie;
+}
+
#ifdef CONFIG_NET_CLS_ACT
DEFINE_STATIC_KEY_FALSE(tc_skb_ext_tc);
EXPORT_SYMBOL(tc_skb_ext_tc);
@@ -488,7 +592,8 @@ static struct tcf_chain *tcf_chain_lookup_rcu(const struct tcf_block *block,
#endif
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
- u32 seq, u16 flags, int event, bool unicast);
+ u32 seq, u16 flags, int event, bool unicast,
+ struct netlink_ext_ack *extack);
static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
u32 chain_index, bool create,
@@ -521,7 +626,7 @@ static struct tcf_chain *__tcf_chain_get(struct tcf_block *block,
*/
if (is_first_reference && !by_act)
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
- RTM_NEWCHAIN, false);
+ RTM_NEWCHAIN, false, NULL);
return chain;
@@ -1548,6 +1653,8 @@ static inline int __tcf_classify(struct sk_buff *skb,
const struct tcf_proto *orig_tp,
struct tcf_result *res,
bool compat_mode,
+ struct tcf_exts_miss_cookie_node *n,
+ int act_index,
u32 *last_executed_chain)
{
#ifdef CONFIG_NET_CLS_ACT
@@ -1559,13 +1666,36 @@ reclassify:
#endif
for (; tp; tp = rcu_dereference_bh(tp->next)) {
__be16 protocol = skb_protocol(skb, false);
- int err;
+ int err = 0;
- if (tp->protocol != protocol &&
- tp->protocol != htons(ETH_P_ALL))
- continue;
+ if (n) {
+ struct tcf_exts *exts;
+
+ if (n->tp_prio != tp->prio)
+ continue;
+
+ /* We re-lookup the tp and chain based on index instead
+ * of having hard refs and locks to them, so do a sanity
+ * check if any of tp,chain,exts was replaced by the
+ * time we got here with a cookie from hardware.
+ */
+ if (unlikely(n->tp != tp || n->tp->chain != n->chain ||
+ !tp->ops->get_exts))
+ return TC_ACT_SHOT;
+
+ exts = tp->ops->get_exts(tp, n->handle);
+ if (unlikely(!exts || n->exts != exts))
+ return TC_ACT_SHOT;
- err = tc_classify(skb, tp, res);
+ n = NULL;
+ err = tcf_exts_exec_ex(skb, exts, act_index, res);
+ } else {
+ if (tp->protocol != protocol &&
+ tp->protocol != htons(ETH_P_ALL))
+ continue;
+
+ err = tc_classify(skb, tp, res);
+ }
#ifdef CONFIG_NET_CLS_ACT
if (unlikely(err == TC_ACT_RECLASSIFY && !compat_mode)) {
first_tp = orig_tp;
@@ -1581,6 +1711,9 @@ reclassify:
return err;
}
+ if (unlikely(n))
+ return TC_ACT_SHOT;
+
return TC_ACT_UNSPEC; /* signal: continue lookup */
#ifdef CONFIG_NET_CLS_ACT
reset:
@@ -1605,21 +1738,35 @@ int tcf_classify(struct sk_buff *skb,
#if !IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
u32 last_executed_chain = 0;
- return __tcf_classify(skb, tp, tp, res, compat_mode,
+ return __tcf_classify(skb, tp, tp, res, compat_mode, NULL, 0,
&last_executed_chain);
#else
u32 last_executed_chain = tp ? tp->chain->index : 0;
+ struct tcf_exts_miss_cookie_node *n = NULL;
const struct tcf_proto *orig_tp = tp;
struct tc_skb_ext *ext;
+ int act_index = 0;
int ret;
if (block) {
ext = skb_ext_find(skb, TC_SKB_EXT);
- if (ext && ext->chain) {
+ if (ext && (ext->chain || ext->act_miss)) {
struct tcf_chain *fchain;
+ u32 chain;
+
+ if (ext->act_miss) {
+ n = tcf_exts_miss_cookie_lookup(ext->act_miss_cookie,
+ &act_index);
+ if (!n)
+ return TC_ACT_SHOT;
- fchain = tcf_chain_lookup_rcu(block, ext->chain);
+ chain = n->chain_index;
+ } else {
+ chain = ext->chain;
+ }
+
+ fchain = tcf_chain_lookup_rcu(block, chain);
if (!fchain)
return TC_ACT_SHOT;
@@ -1631,7 +1778,7 @@ int tcf_classify(struct sk_buff *skb,
}
}
- ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode,
+ ret = __tcf_classify(skb, tp, orig_tp, res, compat_mode, n, act_index,
&last_executed_chain);
if (tc_skb_ext_tc_enabled()) {
@@ -1817,7 +1964,8 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
struct tcf_proto *tp, struct tcf_block *block,
struct Qdisc *q, u32 parent, void *fh,
u32 portid, u32 seq, u16 flags, int event,
- bool terse_dump, bool rtnl_held)
+ bool terse_dump, bool rtnl_held,
+ struct netlink_ext_ack *extack)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1857,7 +2005,13 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
goto nla_put_failure;
}
+
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto nla_put_failure;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1871,7 +2025,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct tcf_proto *tp,
struct tcf_block *block, struct Qdisc *q,
u32 parent, void *fh, int event, bool unicast,
- bool rtnl_held)
+ bool rtnl_held, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1883,7 +2037,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, event,
- false, rtnl_held) <= 0) {
+ false, rtnl_held, extack) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1912,7 +2066,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
- false, rtnl_held) <= 0) {
+ false, rtnl_held, extack) <= 0) {
NL_SET_ERR_MSG(extack, "Failed to build del event notification");
kfree_skb(skb);
return -EINVAL;
@@ -1938,14 +2092,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
static void tfilter_notify_chain(struct net *net, struct sk_buff *oskb,
struct tcf_block *block, struct Qdisc *q,
u32 parent, struct nlmsghdr *n,
- struct tcf_chain *chain, int event)
+ struct tcf_chain *chain, int event,
+ struct netlink_ext_ack *extack)
{
struct tcf_proto *tp;
for (tp = tcf_get_next_proto(chain, NULL);
tp; tp = tcf_get_next_proto(chain, tp))
- tfilter_notify(net, oskb, n, tp, block,
- q, parent, NULL, event, false, true);
+ tfilter_notify(net, oskb, n, tp, block, q, parent, NULL,
+ event, false, true, extack);
}
static void tfilter_put(struct tcf_proto *tp, void *fh)
@@ -2156,7 +2311,7 @@ replay:
flags, extack);
if (err == 0) {
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_NEWTFILTER, false, rtnl_held);
+ RTM_NEWTFILTER, false, rtnl_held, extack);
tfilter_put(tp, fh);
/* q pointer is NULL for shared blocks */
if (q)
@@ -2284,7 +2439,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
if (prio == 0) {
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, extack);
tcf_chain_flush(chain, rtnl_held);
err = 0;
goto errout;
@@ -2308,7 +2463,7 @@ static int tc_del_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
tcf_proto_put(tp, rtnl_held, NULL);
tfilter_notify(net, skb, n, tp, block, q, parent, fh,
- RTM_DELTFILTER, false, rtnl_held);
+ RTM_DELTFILTER, false, rtnl_held, extack);
err = 0;
goto errout;
}
@@ -2452,7 +2607,7 @@ static int tc_get_tfilter(struct sk_buff *skb, struct nlmsghdr *n,
err = -ENOENT;
} else {
err = tfilter_notify(net, skb, n, tp, block, q, parent,
- fh, RTM_NEWTFILTER, true, rtnl_held);
+ fh, RTM_NEWTFILTER, true, rtnl_held, NULL);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send filter notify message");
}
@@ -2490,7 +2645,7 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
n, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, a->terse_dump, true);
+ RTM_NEWTFILTER, a->terse_dump, true, NULL);
}
static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
@@ -2524,7 +2679,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTFILTER, false, true) <= 0)
+ RTM_NEWTFILTER, false, true, NULL) <= 0)
goto errout;
cb->args[1] = 1;
}
@@ -2667,7 +2822,8 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
void *tmplt_priv, u32 chain_index,
struct net *net, struct sk_buff *skb,
struct tcf_block *block,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ struct netlink_ext_ack *extack)
{
unsigned char *b = skb_tail_pointer(skb);
const struct tcf_proto_ops *ops;
@@ -2704,7 +2860,12 @@ static int tc_chain_fill_node(const struct tcf_proto_ops *tmplt_ops,
goto nla_put_failure;
}
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -2714,7 +2875,8 @@ nla_put_failure:
}
static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
- u32 seq, u16 flags, int event, bool unicast)
+ u32 seq, u16 flags, int event, bool unicast,
+ struct netlink_ext_ack *extack)
{
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
struct tcf_block *block = chain->block;
@@ -2728,7 +2890,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb,
if (tc_chain_fill_node(chain->tmplt_ops, chain->tmplt_priv,
chain->index, net, skb, block, portid,
- seq, flags, event) <= 0) {
+ seq, flags, event, extack) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2756,7 +2918,7 @@ static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops,
return -ENOBUFS;
if (tc_chain_fill_node(tmplt_ops, tmplt_priv, chain_index, net, skb,
- block, portid, seq, flags, RTM_DELCHAIN) <= 0) {
+ block, portid, seq, flags, RTM_DELCHAIN, NULL) <= 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2908,11 +3070,11 @@ replay:
}
tc_chain_notify(chain, NULL, 0, NLM_F_CREATE | NLM_F_EXCL,
- RTM_NEWCHAIN, false);
+ RTM_NEWCHAIN, false, extack);
break;
case RTM_DELCHAIN:
tfilter_notify_chain(net, skb, block, q, parent, n,
- chain, RTM_DELTFILTER);
+ chain, RTM_DELTFILTER, extack);
/* Flush the chain first as the user requested chain removal. */
tcf_chain_flush(chain, true);
/* In case the chain was successfully deleted, put a reference
@@ -2922,7 +3084,7 @@ replay:
break;
case RTM_GETCHAIN:
err = tc_chain_notify(chain, skb, n->nlmsg_seq,
- n->nlmsg_flags, n->nlmsg_type, true);
+ n->nlmsg_flags, n->nlmsg_type, true, extack);
if (err < 0)
NL_SET_ERR_MSG(extack, "Failed to send chain notify message");
break;
@@ -3022,7 +3184,7 @@ static int tc_dump_chain(struct sk_buff *skb, struct netlink_callback *cb)
chain->index, net, skb, block,
NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWCHAIN);
+ RTM_NEWCHAIN, NULL);
if (err <= 0)
break;
index++;
@@ -3040,8 +3202,47 @@ out:
return skb->len;
}
+int tcf_exts_init_ex(struct tcf_exts *exts, struct net *net, int action,
+ int police, struct tcf_proto *tp, u32 handle,
+ bool use_action_miss)
+{
+ int err = 0;
+
+#ifdef CONFIG_NET_CLS_ACT
+ exts->type = 0;
+ exts->nr_actions = 0;
+ /* Note: we do not own yet a reference on net.
+ * This reference might be taken later from tcf_exts_get_net().
+ */
+ exts->net = net;
+ exts->actions = kcalloc(TCA_ACT_MAX_PRIO, sizeof(struct tc_action *),
+ GFP_KERNEL);
+ if (!exts->actions)
+ return -ENOMEM;
+#endif
+
+ exts->action = action;
+ exts->police = police;
+
+ if (!use_action_miss)
+ return 0;
+
+ err = tcf_exts_miss_cookie_base_alloc(exts, tp, handle);
+ if (err)
+ goto err_miss_alloc;
+
+ return 0;
+
+err_miss_alloc:
+ tcf_exts_destroy(exts);
+ return err;
+}
+EXPORT_SYMBOL(tcf_exts_init_ex);
+
void tcf_exts_destroy(struct tcf_exts *exts)
{
+ tcf_exts_miss_cookie_base_destroy(exts);
+
#ifdef CONFIG_NET_CLS_ACT
if (exts->actions) {
tcf_action_destroy(exts->actions, TCA_ACT_UNBIND);
@@ -3474,28 +3675,28 @@ int tc_setup_cb_reoffload(struct tcf_block *block, struct tcf_proto *tp,
}
EXPORT_SYMBOL(tc_setup_cb_reoffload);
-static int tcf_act_get_cookie(struct flow_action_entry *entry,
- const struct tc_action *act)
+static int tcf_act_get_user_cookie(struct flow_action_entry *entry,
+ const struct tc_action *act)
{
- struct tc_cookie *cookie;
+ struct tc_cookie *user_cookie;
int err = 0;
rcu_read_lock();
- cookie = rcu_dereference(act->act_cookie);
- if (cookie) {
- entry->cookie = flow_action_cookie_create(cookie->data,
- cookie->len,
- GFP_ATOMIC);
- if (!entry->cookie)
+ user_cookie = rcu_dereference(act->user_cookie);
+ if (user_cookie) {
+ entry->user_cookie = flow_action_cookie_create(user_cookie->data,
+ user_cookie->len,
+ GFP_ATOMIC);
+ if (!entry->user_cookie)
err = -ENOMEM;
}
rcu_read_unlock();
return err;
}
-static void tcf_act_put_cookie(struct flow_action_entry *entry)
+static void tcf_act_put_user_cookie(struct flow_action_entry *entry)
{
- flow_action_cookie_destroy(entry->cookie);
+ flow_action_cookie_destroy(entry->user_cookie);
}
void tc_cleanup_offload_action(struct flow_action *flow_action)
@@ -3504,7 +3705,7 @@ void tc_cleanup_offload_action(struct flow_action *flow_action)
int i;
flow_action_for_each(i, entry, flow_action) {
- tcf_act_put_cookie(entry);
+ tcf_act_put_user_cookie(entry);
if (entry->destructor)
entry->destructor(entry->destructor_priv);
}
@@ -3531,6 +3732,7 @@ static int tc_setup_offload_act(struct tc_action *act,
int tc_setup_action(struct flow_action *flow_action,
struct tc_action *actions[],
+ u32 miss_cookie_base,
struct netlink_ext_ack *extack)
{
int i, j, k, index, err = 0;
@@ -3549,7 +3751,7 @@ int tc_setup_action(struct flow_action *flow_action,
entry = &flow_action->entries[j];
spin_lock_bh(&act->tcfa_lock);
- err = tcf_act_get_cookie(entry, act);
+ err = tcf_act_get_user_cookie(entry, act);
if (err)
goto err_out_locked;
@@ -3561,6 +3763,9 @@ int tc_setup_action(struct flow_action *flow_action,
for (k = 0; k < index ; k++) {
entry[k].hw_stats = tc_act_hw_stats(act->hw_stats);
entry[k].hw_index = act->tcfa_index;
+ entry[k].cookie = (unsigned long)act;
+ entry[k].miss_cookie =
+ tcf_exts_miss_cookie_get(miss_cookie_base, i);
}
j += index;
@@ -3583,10 +3788,15 @@ int tc_setup_offload_action(struct flow_action *flow_action,
struct netlink_ext_ack *extack)
{
#ifdef CONFIG_NET_CLS_ACT
+ u32 miss_cookie_base;
+
if (!exts)
return 0;
- return tc_setup_action(flow_action, exts->actions, extack);
+ miss_cookie_base = exts->miss_cookie_node ?
+ exts->miss_cookie_node->miss_cookie_base : 0;
+ return tc_setup_action(flow_action, exts->actions, miss_cookie_base,
+ extack);
#else
return 0;
#endif
@@ -3754,6 +3964,8 @@ static int __init tc_filter_init(void)
if (err)
goto err_register_pernet_subsys;
+ xa_init_flags(&tcf_exts_miss_cookies_xa, XA_FLAGS_ALLOC1);
+
rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
RTNL_FLAG_DOIT_UNLOCKED);
rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0b15698b3531..e960a46b0520 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -502,12 +502,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f,
tc_setup_cb_call(block, TC_SETUP_CLSFLOWER, &cls_flower, false,
rtnl_held);
- tcf_exts_hw_stats_update(&f->exts, cls_flower.stats.bytes,
- cls_flower.stats.pkts,
- cls_flower.stats.drops,
- cls_flower.stats.lastused,
- cls_flower.stats.used_hw_stats,
- cls_flower.stats.used_hw_stats_valid);
+ tcf_exts_hw_stats_update(&f->exts, &cls_flower.stats, cls_flower.use_act_stats);
}
static void __fl_put(struct cls_fl_filter *f)
@@ -534,6 +529,15 @@ static struct cls_fl_filter *__fl_get(struct cls_fl_head *head, u32 handle)
return f;
}
+static struct tcf_exts *fl_get_exts(const struct tcf_proto *tp, u32 handle)
+{
+ struct cls_fl_head *head = rcu_dereference_bh(tp->root);
+ struct cls_fl_filter *f;
+
+ f = idr_find(&head->handle_idr, handle);
+ return f ? &f->exts : NULL;
+}
+
static int __fl_delete(struct tcf_proto *tp, struct cls_fl_filter *f,
bool *last, bool rtnl_held,
struct netlink_ext_ack *extack)
@@ -2192,10 +2196,6 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
INIT_LIST_HEAD(&fnew->hw_list);
refcount_set(&fnew->refcnt, 1);
- err = tcf_exts_init(&fnew->exts, net, TCA_FLOWER_ACT, 0);
- if (err < 0)
- goto errout;
-
if (tb[TCA_FLOWER_FLAGS]) {
fnew->flags = nla_get_u32(tb[TCA_FLOWER_FLAGS]);
@@ -2205,15 +2205,46 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
}
}
+ if (!fold) {
+ spin_lock(&tp->lock);
+ if (!handle) {
+ handle = 1;
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ INT_MAX, GFP_ATOMIC);
+ } else {
+ err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
+ handle, GFP_ATOMIC);
+
+ /* Filter with specified handle was concurrently
+ * inserted after initial check in cls_api. This is not
+ * necessarily an error if NLM_F_EXCL is not set in
+ * message flags. Returning EAGAIN will cause cls_api to
+ * try to update concurrently inserted rule.
+ */
+ if (err == -ENOSPC)
+ err = -EAGAIN;
+ }
+ spin_unlock(&tp->lock);
+
+ if (err)
+ goto errout;
+ }
+ fnew->handle = handle;
+
+ err = tcf_exts_init_ex(&fnew->exts, net, TCA_FLOWER_ACT, 0, tp, handle,
+ !tc_skip_hw(fnew->flags));
+ if (err < 0)
+ goto errout_idr;
+
err = fl_set_parms(net, tp, fnew, mask, base, tb, tca[TCA_RATE],
tp->chain->tmplt_priv, flags, fnew->flags,
extack);
if (err)
- goto errout;
+ goto errout_idr;
err = fl_check_assign_mask(head, fnew, fold, mask);
if (err)
- goto errout;
+ goto errout_idr;
err = fl_ht_insert_unique(fnew, fold, &in_ht);
if (err)
@@ -2279,29 +2310,9 @@ static int fl_change(struct net *net, struct sk_buff *in_skb,
refcount_dec(&fold->refcnt);
__fl_put(fold);
} else {
- if (handle) {
- /* user specifies a handle and it doesn't exist */
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- handle, GFP_ATOMIC);
-
- /* Filter with specified handle was concurrently
- * inserted after initial check in cls_api. This is not
- * necessarily an error if NLM_F_EXCL is not set in
- * message flags. Returning EAGAIN will cause cls_api to
- * try to update concurrently inserted rule.
- */
- if (err == -ENOSPC)
- err = -EAGAIN;
- } else {
- handle = 1;
- err = idr_alloc_u32(&head->handle_idr, fnew, &handle,
- INT_MAX, GFP_ATOMIC);
- }
- if (err)
- goto errout_hw;
+ idr_replace(&head->handle_idr, fnew, fnew->handle);
refcount_inc(&fnew->refcnt);
- fnew->handle = handle;
list_add_tail_rcu(&fnew->list, &fnew->mask->filters);
spin_unlock(&tp->lock);
}
@@ -2324,6 +2335,8 @@ errout_hw:
fnew->mask->filter_ht_params);
errout_mask:
fl_mask_put(head, fnew->mask);
+errout_idr:
+ idr_remove(&head->handle_idr, fnew->handle);
errout:
__fl_put(fnew);
errout_tb:
@@ -3441,6 +3454,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
.tmplt_dump = fl_tmplt_dump,
+ .get_exts = fl_get_exts,
.owner = THIS_MODULE,
.flags = TCF_PROTO_OPS_DOIT_UNLOCKED,
};
diff --git a/net/sched/cls_matchall.c b/net/sched/cls_matchall.c
index 705f63da2c21..fa3bbd187eb9 100644
--- a/net/sched/cls_matchall.c
+++ b/net/sched/cls_matchall.c
@@ -331,11 +331,7 @@ static void mall_stats_hw_filter(struct tcf_proto *tp,
tc_setup_cb_call(block, TC_SETUP_CLSMATCHALL, &cls_mall, false, true);
- tcf_exts_hw_stats_update(&head->exts, cls_mall.stats.bytes,
- cls_mall.stats.pkts, cls_mall.stats.drops,
- cls_mall.stats.lastused,
- cls_mall.stats.used_hw_stats,
- cls_mall.stats.used_hw_stats_valid);
+ tcf_exts_hw_stats_update(&head->exts, &cls_mall.stats, cls_mall.use_act_stats);
}
static int mall_dump(struct net *net, struct tcf_proto *tp, void *fh,
diff --git a/net/sched/cls_rsvp.c b/net/sched/cls_rsvp.c
deleted file mode 100644
index 03d8619bd9c6..000000000000
--- a/net/sched/cls_rsvp.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp.c Special RSVP packet classifier for IPv4.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/ip.h>
-#include <net/netlink.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN 1
-#define RSVP_ID "rsvp"
-#define RSVP_OPS cls_rsvp_ops
-#define RSVP_CLS rsvp_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_rsvp.h b/net/sched/cls_rsvp.h
deleted file mode 100644
index 869efba9f834..000000000000
--- a/net/sched/cls_rsvp.h
+++ /dev/null
@@ -1,764 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-/*
- Comparing to general packet classification problem,
- RSVP needs only several relatively simple rules:
-
- * (dst, protocol) are always specified,
- so that we are able to hash them.
- * src may be exact, or may be wildcard, so that
- we can keep a hash table plus one wildcard entry.
- * source port (or flow label) is important only if src is given.
-
- IMPLEMENTATION.
-
- We use a two level hash table: The top level is keyed by
- destination address and protocol ID, every bucket contains a list
- of "rsvp sessions", identified by destination address, protocol and
- DPI(="Destination Port ID"): triple (key, mask, offset).
-
- Every bucket has a smaller hash table keyed by source address
- (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
- Every bucket is again a list of "RSVP flows", selected by
- source address and SPI(="Source Port ID" here rather than
- "security parameter index"): triple (key, mask, offset).
-
-
- NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
- and all fragmented packets go to the best-effort traffic class.
-
-
- NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
- only one "Generalized Port Identifier". So that for classic
- ah, esp (and udp,tcp) both *pi should coincide or one of them
- should be wildcard.
-
- At first sight, this redundancy is just a waste of CPU
- resources. But DPI and SPI add the possibility to assign different
- priorities to GPIs. Look also at note 4 about tunnels below.
-
-
- NOTE 3. One complication is the case of tunneled packets.
- We implement it as following: if the first lookup
- matches a special session with "tunnelhdr" value not zero,
- flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
- In this case, we pull tunnelhdr bytes and restart lookup
- with tunnel ID added to the list of keys. Simple and stupid 8)8)
- It's enough for PIMREG and IPIP.
-
-
- NOTE 4. Two GPIs make it possible to parse even GRE packets.
- F.e. DPI can select ETH_P_IP (and necessary flags to make
- tunnelhdr correct) in GRE protocol field and SPI matches
- GRE key. Is it not nice? 8)8)
-
-
- Well, as result, despite its simplicity, we get a pretty
- powerful classification engine. */
-
-
-struct rsvp_head {
- u32 tmap[256/32];
- u32 hgenerator;
- u8 tgenerator;
- struct rsvp_session __rcu *ht[256];
- struct rcu_head rcu;
-};
-
-struct rsvp_session {
- struct rsvp_session __rcu *next;
- __be32 dst[RSVP_DST_LEN];
- struct tc_rsvp_gpi dpi;
- u8 protocol;
- u8 tunnelid;
- /* 16 (src,sport) hash slots, and one wildcard source slot */
- struct rsvp_filter __rcu *ht[16 + 1];
- struct rcu_head rcu;
-};
-
-
-struct rsvp_filter {
- struct rsvp_filter __rcu *next;
- __be32 src[RSVP_DST_LEN];
- struct tc_rsvp_gpi spi;
- u8 tunnelhdr;
-
- struct tcf_result res;
- struct tcf_exts exts;
-
- u32 handle;
- struct rsvp_session *sess;
- struct rcu_work rwork;
-};
-
-static inline unsigned int hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
-{
- unsigned int h = (__force __u32)dst[RSVP_DST_LEN - 1];
-
- h ^= h>>16;
- h ^= h>>8;
- return (h ^ protocol ^ tunnelid) & 0xFF;
-}
-
-static inline unsigned int hash_src(__be32 *src)
-{
- unsigned int h = (__force __u32)src[RSVP_DST_LEN-1];
-
- h ^= h>>16;
- h ^= h>>8;
- h ^= h>>4;
- return h & 0xF;
-}
-
-#define RSVP_APPLY_RESULT() \
-{ \
- int r = tcf_exts_exec(skb, &f->exts, res); \
- if (r < 0) \
- continue; \
- else if (r > 0) \
- return r; \
-}
-
-TC_INDIRECT_SCOPE int RSVP_CLS(struct sk_buff *skb, const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct rsvp_head *head = rcu_dereference_bh(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1, h2;
- __be32 *dst, *src;
- u8 protocol;
- u8 tunnelid = 0;
- u8 *xprt;
-#if RSVP_DST_LEN == 4
- struct ipv6hdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ipv6_hdr(skb);
-#else
- struct iphdr *nhptr;
-
- if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
- return -1;
- nhptr = ip_hdr(skb);
-#endif
-restart:
-
-#if RSVP_DST_LEN == 4
- src = &nhptr->saddr.s6_addr32[0];
- dst = &nhptr->daddr.s6_addr32[0];
- protocol = nhptr->nexthdr;
- xprt = ((u8 *)nhptr) + sizeof(struct ipv6hdr);
-#else
- src = &nhptr->saddr;
- dst = &nhptr->daddr;
- protocol = nhptr->protocol;
- xprt = ((u8 *)nhptr) + (nhptr->ihl<<2);
- if (ip_is_fragment(nhptr))
- return -1;
-#endif
-
- h1 = hash_dst(dst, protocol, tunnelid);
- h2 = hash_src(src);
-
- for (s = rcu_dereference_bh(head->ht[h1]); s;
- s = rcu_dereference_bh(s->next)) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN - 1] &&
- protocol == s->protocol &&
- !(s->dpi.mask &
- (*(u32 *)(xprt + s->dpi.offset) ^ s->dpi.key)) &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- tunnelid == s->tunnelid) {
-
- for (f = rcu_dereference_bh(s->ht[h2]); f;
- f = rcu_dereference_bh(f->next)) {
- if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN - 1] &&
- !(f->spi.mask & (*(u32 *)(xprt + f->spi.offset) ^ f->spi.key))
-#if RSVP_DST_LEN == 4
- &&
- src[0] == f->src[0] &&
- src[1] == f->src[1] &&
- src[2] == f->src[2]
-#endif
- ) {
- *res = f->res;
- RSVP_APPLY_RESULT();
-
-matched:
- if (f->tunnelhdr == 0)
- return 0;
-
- tunnelid = f->res.classid;
- nhptr = (void *)(xprt + f->tunnelhdr - sizeof(*nhptr));
- goto restart;
- }
- }
-
- /* And wildcard bucket... */
- for (f = rcu_dereference_bh(s->ht[16]); f;
- f = rcu_dereference_bh(f->next)) {
- *res = f->res;
- RSVP_APPLY_RESULT();
- goto matched;
- }
- return -1;
- }
- }
- return -1;
-}
-
-static void rsvp_replace(struct tcf_proto *tp, struct rsvp_filter *n, u32 h)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter __rcu **ins;
- struct rsvp_filter *pins;
- unsigned int h1 = h & 0xFF;
- unsigned int h2 = (h >> 8) & 0xFF;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (ins = &s->ht[h2], pins = rtnl_dereference(*ins); ;
- ins = &pins->next, pins = rtnl_dereference(*ins)) {
- if (pins->handle == h) {
- RCU_INIT_POINTER(n->next, pins->next);
- rcu_assign_pointer(*ins, n);
- return;
- }
- }
- }
-
- /* Something went wrong if we are trying to replace a non-existent
- * node. Mind as well halt instead of silently failing.
- */
- BUG_ON(1);
-}
-
-static void *rsvp_get(struct tcf_proto *tp, u32 handle)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_session *s;
- struct rsvp_filter *f;
- unsigned int h1 = handle & 0xFF;
- unsigned int h2 = (handle >> 8) & 0xFF;
-
- if (h2 > 16)
- return NULL;
-
- for (s = rtnl_dereference(head->ht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->handle == handle)
- return f;
- }
- }
- return NULL;
-}
-
-static int rsvp_init(struct tcf_proto *tp)
-{
- struct rsvp_head *data;
-
- data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
- if (data) {
- rcu_assign_pointer(tp->root, data);
- return 0;
- }
- return -ENOBUFS;
-}
-
-static void __rsvp_delete_filter(struct rsvp_filter *f)
-{
- tcf_exts_destroy(&f->exts);
- tcf_exts_put_net(&f->exts);
- kfree(f);
-}
-
-static void rsvp_delete_filter_work(struct work_struct *work)
-{
- struct rsvp_filter *f = container_of(to_rcu_work(work),
- struct rsvp_filter,
- rwork);
- rtnl_lock();
- __rsvp_delete_filter(f);
- rtnl_unlock();
-}
-
-static void rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
-{
- tcf_unbind_filter(tp, &f->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (tcf_exts_get_net(&f->exts))
- tcf_queue_work(&f->rwork, rsvp_delete_filter_work);
- else
- __rsvp_delete_filter(f);
-}
-
-static void rsvp_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int h1, h2;
-
- if (data == NULL)
- return;
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
-
- while ((s = rtnl_dereference(data->ht[h1])) != NULL) {
- RCU_INIT_POINTER(data->ht[h1], s->next);
-
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- while ((f = rtnl_dereference(s->ht[h2])) != NULL) {
- rcu_assign_pointer(s->ht[h2], f->next);
- rsvp_delete_filter(tp, f);
- }
- }
- kfree_rcu(s, rcu);
- }
- }
- kfree_rcu(data, rcu);
-}
-
-static int rsvp_delete(struct tcf_proto *tp, void *arg, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- struct rsvp_filter *nfp, *f = arg;
- struct rsvp_filter __rcu **fp;
- unsigned int h = f->handle;
- struct rsvp_session __rcu **sp;
- struct rsvp_session *nsp, *s = f->sess;
- int i, h1;
-
- fp = &s->ht[(h >> 8) & 0xFF];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- if (nfp == f) {
- RCU_INIT_POINTER(*fp, f->next);
- rsvp_delete_filter(tp, f);
-
- /* Strip tree */
-
- for (i = 0; i <= 16; i++)
- if (s->ht[i])
- goto out;
-
- /* OK, session has no flows */
- sp = &head->ht[h & 0xFF];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if (nsp == s) {
- RCU_INIT_POINTER(*sp, s->next);
- kfree_rcu(s, rcu);
- goto out;
- }
- }
-
- break;
- }
- }
-
-out:
- *last = true;
- for (h1 = 0; h1 < 256; h1++) {
- if (rcu_access_pointer(head->ht[h1])) {
- *last = false;
- break;
- }
- }
-
- return 0;
-}
-
-static unsigned int gen_handle(struct tcf_proto *tp, unsigned salt)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- int i = 0xFFFF;
-
- while (i-- > 0) {
- u32 h;
-
- if ((data->hgenerator += 0x10000) == 0)
- data->hgenerator = 0x10000;
- h = data->hgenerator|salt;
- if (!rsvp_get(tp, h))
- return h;
- }
- return 0;
-}
-
-static int tunnel_bts(struct rsvp_head *data)
-{
- int n = data->tgenerator >> 5;
- u32 b = 1 << (data->tgenerator & 0x1F);
-
- if (data->tmap[n] & b)
- return 0;
- data->tmap[n] |= b;
- return 1;
-}
-
-static void tunnel_recycle(struct rsvp_head *data)
-{
- struct rsvp_session __rcu **sht = data->ht;
- u32 tmap[256/32];
- int h1, h2;
-
- memset(tmap, 0, sizeof(tmap));
-
- for (h1 = 0; h1 < 256; h1++) {
- struct rsvp_session *s;
- for (s = rtnl_dereference(sht[h1]); s;
- s = rtnl_dereference(s->next)) {
- for (h2 = 0; h2 <= 16; h2++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h2]); f;
- f = rtnl_dereference(f->next)) {
- if (f->tunnelhdr == 0)
- continue;
- data->tgenerator = f->res.classid;
- tunnel_bts(data);
- }
- }
- }
- }
-
- memcpy(data->tmap, tmap, sizeof(tmap));
-}
-
-static u32 gen_tunnel(struct rsvp_head *data)
-{
- int i, k;
-
- for (k = 0; k < 2; k++) {
- for (i = 255; i > 0; i--) {
- if (++data->tgenerator == 0)
- data->tgenerator = 1;
- if (tunnel_bts(data))
- return data->tgenerator;
- }
- tunnel_recycle(data);
- }
- return 0;
-}
-
-static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
- [TCA_RSVP_CLASSID] = { .type = NLA_U32 },
- [TCA_RSVP_DST] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_SRC] = { .len = RSVP_DST_LEN * sizeof(u32) },
- [TCA_RSVP_PINFO] = { .len = sizeof(struct tc_rsvp_pinfo) },
-};
-
-static int rsvp_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base,
- u32 handle, struct nlattr **tca,
- void **arg, u32 flags,
- struct netlink_ext_ack *extack)
-{
- struct rsvp_head *data = rtnl_dereference(tp->root);
- struct rsvp_filter *f, *nfp;
- struct rsvp_filter __rcu **fp;
- struct rsvp_session *nsp, *s;
- struct rsvp_session __rcu **sp;
- struct tc_rsvp_pinfo *pinfo = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_RSVP_MAX + 1];
- struct tcf_exts e;
- unsigned int h1, h2;
- __be32 *dst;
- int err;
-
- if (opt == NULL)
- return handle ? -EINVAL : 0;
-
- err = nla_parse_nested_deprecated(tb, TCA_RSVP_MAX, opt, rsvp_policy,
- NULL);
- if (err < 0)
- return err;
-
- err = tcf_exts_init(&e, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, tca[TCA_RATE], &e, flags,
- extack);
- if (err < 0)
- goto errout2;
-
- f = *arg;
- if (f) {
- /* Node exists: adjust only classid */
- struct rsvp_filter *n;
-
- if (f->handle != handle && handle)
- goto errout2;
-
- n = kmemdup(f, sizeof(*f), GFP_KERNEL);
- if (!n) {
- err = -ENOMEM;
- goto errout2;
- }
-
- err = tcf_exts_init(&n->exts, net, TCA_RSVP_ACT,
- TCA_RSVP_POLICE);
- if (err < 0) {
- kfree(n);
- goto errout2;
- }
-
- if (tb[TCA_RSVP_CLASSID]) {
- n->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
- tcf_bind_filter(tp, &n->res, base);
- }
-
- tcf_exts_change(&n->exts, &e);
- rsvp_replace(tp, n, handle);
- return 0;
- }
-
- /* Now more serious part... */
- err = -EINVAL;
- if (handle)
- goto errout2;
- if (tb[TCA_RSVP_DST] == NULL)
- goto errout2;
-
- err = -ENOBUFS;
- f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
- if (f == NULL)
- goto errout2;
-
- err = tcf_exts_init(&f->exts, net, TCA_RSVP_ACT, TCA_RSVP_POLICE);
- if (err < 0)
- goto errout;
- h2 = 16;
- if (tb[TCA_RSVP_SRC]) {
- memcpy(f->src, nla_data(tb[TCA_RSVP_SRC]), sizeof(f->src));
- h2 = hash_src(f->src);
- }
- if (tb[TCA_RSVP_PINFO]) {
- pinfo = nla_data(tb[TCA_RSVP_PINFO]);
- f->spi = pinfo->spi;
- f->tunnelhdr = pinfo->tunnelhdr;
- }
- if (tb[TCA_RSVP_CLASSID])
- f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID]);
-
- dst = nla_data(tb[TCA_RSVP_DST]);
- h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
-
- err = -ENOMEM;
- if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
- goto errout;
-
- if (f->tunnelhdr) {
- err = -EINVAL;
- if (f->res.classid > 255)
- goto errout;
-
- err = -ENOMEM;
- if (f->res.classid == 0 &&
- (f->res.classid = gen_tunnel(data)) == 0)
- goto errout;
- }
-
- for (sp = &data->ht[h1];
- (s = rtnl_dereference(*sp)) != NULL;
- sp = &s->next) {
- if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
- pinfo && pinfo->protocol == s->protocol &&
- memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
-#if RSVP_DST_LEN == 4
- dst[0] == s->dst[0] &&
- dst[1] == s->dst[1] &&
- dst[2] == s->dst[2] &&
-#endif
- pinfo->tunnelid == s->tunnelid) {
-
-insert:
- /* OK, we found appropriate session */
-
- fp = &s->ht[h2];
-
- f->sess = s;
- if (f->tunnelhdr == 0)
- tcf_bind_filter(tp, &f->res, base);
-
- tcf_exts_change(&f->exts, &e);
-
- fp = &s->ht[h2];
- for (nfp = rtnl_dereference(*fp); nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp)) {
- __u32 mask = nfp->spi.mask & f->spi.mask;
-
- if (mask != f->spi.mask)
- break;
- }
- RCU_INIT_POINTER(f->next, nfp);
- rcu_assign_pointer(*fp, f);
-
- *arg = f;
- return 0;
- }
- }
-
- /* No session found. Create new one. */
-
- err = -ENOBUFS;
- s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
- if (s == NULL)
- goto errout;
- memcpy(s->dst, dst, sizeof(s->dst));
-
- if (pinfo) {
- s->dpi = pinfo->dpi;
- s->protocol = pinfo->protocol;
- s->tunnelid = pinfo->tunnelid;
- }
- sp = &data->ht[h1];
- for (nsp = rtnl_dereference(*sp); nsp;
- sp = &nsp->next, nsp = rtnl_dereference(*sp)) {
- if ((nsp->dpi.mask & s->dpi.mask) != s->dpi.mask)
- break;
- }
- RCU_INIT_POINTER(s->next, nsp);
- rcu_assign_pointer(*sp, s);
-
- goto insert;
-
-errout:
- tcf_exts_destroy(&f->exts);
- kfree(f);
-errout2:
- tcf_exts_destroy(&e);
- return err;
-}
-
-static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg,
- bool rtnl_held)
-{
- struct rsvp_head *head = rtnl_dereference(tp->root);
- unsigned int h, h1;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < 256; h++) {
- struct rsvp_session *s;
-
- for (s = rtnl_dereference(head->ht[h]); s;
- s = rtnl_dereference(s->next)) {
- for (h1 = 0; h1 <= 16; h1++) {
- struct rsvp_filter *f;
-
- for (f = rtnl_dereference(s->ht[h1]); f;
- f = rtnl_dereference(f->next)) {
- if (!tc_cls_stats_dump(tp, arg, f))
- return;
- }
- }
- }
- }
-}
-
-static int rsvp_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
- struct rsvp_filter *f = fh;
- struct rsvp_session *s;
- struct nlattr *nest;
- struct tc_rsvp_pinfo pinfo;
-
- if (f == NULL)
- return skb->len;
- s = f->sess;
-
- t->tcm_handle = f->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst))
- goto nla_put_failure;
- pinfo.dpi = s->dpi;
- pinfo.spi = f->spi;
- pinfo.protocol = s->protocol;
- pinfo.tunnelid = s->tunnelid;
- pinfo.tunnelhdr = f->tunnelhdr;
- pinfo.pad = 0;
- if (nla_put(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo))
- goto nla_put_failure;
- if (f->res.classid &&
- nla_put_u32(skb, TCA_RSVP_CLASSID, f->res.classid))
- goto nla_put_failure;
- if (((f->handle >> 8) & 0xFF) != 16 &&
- nla_put(skb, TCA_RSVP_SRC, sizeof(f->src), f->src))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &f->exts) < 0)
- goto nla_put_failure;
-
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &f->exts) < 0)
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void rsvp_bind_class(void *fh, u32 classid, unsigned long cl, void *q,
- unsigned long base)
-{
- struct rsvp_filter *f = fh;
-
- tc_cls_bind_class(classid, cl, q, &f->res, base);
-}
-
-static struct tcf_proto_ops RSVP_OPS __read_mostly = {
- .kind = RSVP_ID,
- .classify = RSVP_CLS,
- .init = rsvp_init,
- .destroy = rsvp_destroy,
- .get = rsvp_get,
- .change = rsvp_change,
- .delete = rsvp_delete,
- .walk = rsvp_walk,
- .dump = rsvp_dump,
- .bind_class = rsvp_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_rsvp(void)
-{
- return register_tcf_proto_ops(&RSVP_OPS);
-}
-
-static void __exit exit_rsvp(void)
-{
- unregister_tcf_proto_ops(&RSVP_OPS);
-}
-
-module_init(init_rsvp)
-module_exit(exit_rsvp)
diff --git a/net/sched/cls_rsvp6.c b/net/sched/cls_rsvp6.c
deleted file mode 100644
index e627cc32d633..000000000000
--- a/net/sched/cls_rsvp6.c
+++ /dev/null
@@ -1,26 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/cls_rsvp6.c Special RSVP packet classifier for IPv6.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/ipv6.h>
-#include <linux/skbuff.h>
-#include <net/act_api.h>
-#include <net/pkt_cls.h>
-#include <net/netlink.h>
-#include <net/tc_wrapper.h>
-
-#define RSVP_DST_LEN 4
-#define RSVP_ID "rsvp6"
-#define RSVP_OPS cls_rsvp6_ops
-#define RSVP_CLS rsvp6_classify
-
-#include "cls_rsvp.h"
-MODULE_LICENSE("GPL");
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
deleted file mode 100644
index eb0e9458e722..000000000000
--- a/net/sched/cls_tcindex.c
+++ /dev/null
@@ -1,724 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * net/sched/cls_tcindex.c Packet classifier for skb->tc_index
- *
- * Written 1998,1999 by Werner Almesberger, EPFL ICA
- */
-
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/skbuff.h>
-#include <linux/errno.h>
-#include <linux/slab.h>
-#include <linux/refcount.h>
-#include <net/act_api.h>
-#include <net/netlink.h>
-#include <net/pkt_cls.h>
-#include <net/sch_generic.h>
-#include <net/tc_wrapper.h>
-
-/*
- * Passing parameters to the root seems to be done more awkwardly than really
- * necessary. At least, u32 doesn't seem to use such dirty hacks. To be
- * verified. FIXME.
- */
-
-#define PERFECT_HASH_THRESHOLD 64 /* use perfect hash if not bigger */
-#define DEFAULT_HASH_SIZE 64 /* optimized for diffserv */
-
-
-struct tcindex_data;
-
-struct tcindex_filter_result {
- struct tcf_exts exts;
- struct tcf_result res;
- struct tcindex_data *p;
- struct rcu_work rwork;
-};
-
-struct tcindex_filter {
- u16 key;
- struct tcindex_filter_result result;
- struct tcindex_filter __rcu *next;
- struct rcu_work rwork;
-};
-
-
-struct tcindex_data {
- struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
- struct tcindex_filter __rcu **h; /* imperfect hash; */
- struct tcf_proto *tp;
- u16 mask; /* AND key with mask */
- u32 shift; /* shift ANDed key to the right */
- u32 hash; /* hash table size; 0 if undefined */
- u32 alloc_hash; /* allocated size */
- u32 fall_through; /* 0: only classify if explicit match */
- refcount_t refcnt; /* a temporary refcnt for perfect hash */
- struct rcu_work rwork;
-};
-
-static inline int tcindex_filter_is_set(struct tcindex_filter_result *r)
-{
- return tcf_exts_has_actions(&r->exts) || r->res.classid;
-}
-
-static void tcindex_data_get(struct tcindex_data *p)
-{
- refcount_inc(&p->refcnt);
-}
-
-static void tcindex_data_put(struct tcindex_data *p)
-{
- if (refcount_dec_and_test(&p->refcnt)) {
- kfree(p->perfect);
- kfree(p->h);
- kfree(p);
- }
-}
-
-static struct tcindex_filter_result *tcindex_lookup(struct tcindex_data *p,
- u16 key)
-{
- if (p->perfect) {
- struct tcindex_filter_result *f = p->perfect + key;
-
- return tcindex_filter_is_set(f) ? f : NULL;
- } else if (p->h) {
- struct tcindex_filter __rcu **fp;
- struct tcindex_filter *f;
-
- fp = &p->h[key % p->hash];
- for (f = rcu_dereference_bh_rtnl(*fp);
- f;
- fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
- if (f->key == key)
- return &f->result;
- }
-
- return NULL;
-}
-
-TC_INDIRECT_SCOPE int tcindex_classify(struct sk_buff *skb,
- const struct tcf_proto *tp,
- struct tcf_result *res)
-{
- struct tcindex_data *p = rcu_dereference_bh(tp->root);
- struct tcindex_filter_result *f;
- int key = (skb->tc_index & p->mask) >> p->shift;
-
- pr_debug("tcindex_classify(skb %p,tp %p,res %p),p %p\n",
- skb, tp, res, p);
-
- f = tcindex_lookup(p, key);
- if (!f) {
- struct Qdisc *q = tcf_block_q(tp->chain->block);
-
- if (!p->fall_through)
- return -1;
- res->classid = TC_H_MAKE(TC_H_MAJ(q->handle), key);
- res->class = 0;
- pr_debug("alg 0x%x\n", res->classid);
- return 0;
- }
- *res = f->res;
- pr_debug("map 0x%x\n", res->classid);
-
- return tcf_exts_exec(skb, &f->exts, res);
-}
-
-
-static void *tcindex_get(struct tcf_proto *tp, u32 handle)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r;
-
- pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
- if (p->perfect && handle >= p->alloc_hash)
- return NULL;
- r = tcindex_lookup(p, handle);
- return r && tcindex_filter_is_set(r) ? r : NULL;
-}
-
-static int tcindex_init(struct tcf_proto *tp)
-{
- struct tcindex_data *p;
-
- pr_debug("tcindex_init(tp %p)\n", tp);
- p = kzalloc(sizeof(struct tcindex_data), GFP_KERNEL);
- if (!p)
- return -ENOMEM;
-
- p->mask = 0xffff;
- p->hash = DEFAULT_HASH_SIZE;
- p->fall_through = 1;
- refcount_set(&p->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
- rcu_assign_pointer(tp->root, p);
- return 0;
-}
-
-static void __tcindex_destroy_rexts(struct tcindex_filter_result *r)
-{
- tcf_exts_destroy(&r->exts);
- tcf_exts_put_net(&r->exts);
- tcindex_data_put(r->p);
-}
-
-static void tcindex_destroy_rexts_work(struct work_struct *work)
-{
- struct tcindex_filter_result *r;
-
- r = container_of(to_rcu_work(work),
- struct tcindex_filter_result,
- rwork);
- rtnl_lock();
- __tcindex_destroy_rexts(r);
- rtnl_unlock();
-}
-
-static void __tcindex_destroy_fexts(struct tcindex_filter *f)
-{
- tcf_exts_destroy(&f->result.exts);
- tcf_exts_put_net(&f->result.exts);
- kfree(f);
-}
-
-static void tcindex_destroy_fexts_work(struct work_struct *work)
-{
- struct tcindex_filter *f = container_of(to_rcu_work(work),
- struct tcindex_filter,
- rwork);
-
- rtnl_lock();
- __tcindex_destroy_fexts(f);
- rtnl_unlock();
-}
-
-static int tcindex_delete(struct tcf_proto *tp, void *arg, bool *last,
- bool rtnl_held, struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = arg;
- struct tcindex_filter __rcu **walk;
- struct tcindex_filter *f = NULL;
-
- pr_debug("tcindex_delete(tp %p,arg %p),p %p\n", tp, arg, p);
- if (p->perfect) {
- if (!r->res.class)
- return -ENOENT;
- } else {
- int i;
-
- for (i = 0; i < p->hash; i++) {
- walk = p->h + i;
- for (f = rtnl_dereference(*walk); f;
- walk = &f->next, f = rtnl_dereference(*walk)) {
- if (&f->result == r)
- goto found;
- }
- }
- return -ENOENT;
-
-found:
- rcu_assign_pointer(*walk, rtnl_dereference(f->next));
- }
- tcf_unbind_filter(tp, &r->res);
- /* all classifiers are required to call tcf_exts_destroy() after rcu
- * grace period, since converted-to-rcu actions are relying on that
- * in cleanup() callback
- */
- if (f) {
- if (tcf_exts_get_net(&f->result.exts))
- tcf_queue_work(&f->rwork, tcindex_destroy_fexts_work);
- else
- __tcindex_destroy_fexts(f);
- } else {
- tcindex_data_get(p);
-
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork, tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
-
- *last = false;
- return 0;
-}
-
-static void tcindex_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- tcindex_data_put(p);
-}
-
-static inline int
-valid_perfect_hash(struct tcindex_data *p)
-{
- return p->hash > (p->mask >> p->shift);
-}
-
-static const struct nla_policy tcindex_policy[TCA_TCINDEX_MAX + 1] = {
- [TCA_TCINDEX_HASH] = { .type = NLA_U32 },
- [TCA_TCINDEX_MASK] = { .type = NLA_U16 },
- [TCA_TCINDEX_SHIFT] = { .type = NLA_U32 },
- [TCA_TCINDEX_FALL_THROUGH] = { .type = NLA_U32 },
- [TCA_TCINDEX_CLASSID] = { .type = NLA_U32 },
-};
-
-static int tcindex_filter_result_init(struct tcindex_filter_result *r,
- struct tcindex_data *p,
- struct net *net)
-{
- memset(r, 0, sizeof(*r));
- r->p = p;
- return tcf_exts_init(&r->exts, net, TCA_TCINDEX_ACT,
- TCA_TCINDEX_POLICE);
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp);
-
-static void tcindex_partial_destroy_work(struct work_struct *work)
-{
- struct tcindex_data *p = container_of(to_rcu_work(work),
- struct tcindex_data,
- rwork);
-
- rtnl_lock();
- if (p->perfect)
- tcindex_free_perfect_hash(p);
- kfree(p);
- rtnl_unlock();
-}
-
-static void tcindex_free_perfect_hash(struct tcindex_data *cp)
-{
- int i;
-
- for (i = 0; i < cp->hash; i++)
- tcf_exts_destroy(&cp->perfect[i].exts);
- kfree(cp->perfect);
-}
-
-static int tcindex_alloc_perfect_hash(struct net *net, struct tcindex_data *cp)
-{
- int i, err = 0;
-
- cp->perfect = kcalloc(cp->hash, sizeof(struct tcindex_filter_result),
- GFP_KERNEL | __GFP_NOWARN);
- if (!cp->perfect)
- return -ENOMEM;
-
- for (i = 0; i < cp->hash; i++) {
- err = tcf_exts_init(&cp->perfect[i].exts, net,
- TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- goto errout;
- cp->perfect[i].p = cp;
- }
-
- return 0;
-
-errout:
- tcindex_free_perfect_hash(cp);
- return err;
-}
-
-static int
-tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
- u32 handle, struct tcindex_data *p,
- struct tcindex_filter_result *r, struct nlattr **tb,
- struct nlattr *est, u32 flags, struct netlink_ext_ack *extack)
-{
- struct tcindex_filter_result new_filter_result, *old_r = r;
- struct tcindex_data *cp = NULL, *oldp;
- struct tcindex_filter *f = NULL; /* make gcc behave */
- struct tcf_result cr = {};
- int err, balloc = 0;
- struct tcf_exts e;
-
- err = tcf_exts_init(&e, net, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
- if (err < 0)
- return err;
- err = tcf_exts_validate(net, tp, tb, est, &e, flags, extack);
- if (err < 0)
- goto errout;
-
- err = -ENOMEM;
- /* tcindex_data attributes must look atomic to classifier/lookup so
- * allocate new tcindex data and RCU assign it onto root. Keeping
- * perfect hash and hash pointers from old data.
- */
- cp = kzalloc(sizeof(*cp), GFP_KERNEL);
- if (!cp)
- goto errout;
-
- cp->mask = p->mask;
- cp->shift = p->shift;
- cp->hash = p->hash;
- cp->alloc_hash = p->alloc_hash;
- cp->fall_through = p->fall_through;
- cp->tp = tp;
- refcount_set(&cp->refcnt, 1); /* Paired with tcindex_destroy_work() */
-
- if (tb[TCA_TCINDEX_HASH])
- cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
-
- if (tb[TCA_TCINDEX_MASK])
- cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
-
- if (tb[TCA_TCINDEX_SHIFT]) {
- cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
- if (cp->shift > 16) {
- err = -EINVAL;
- goto errout;
- }
- }
- if (!cp->hash) {
- /* Hash not specified, use perfect hash if the upper limit
- * of the hashing index is below the threshold.
- */
- if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
- cp->hash = (cp->mask >> cp->shift) + 1;
- else
- cp->hash = DEFAULT_HASH_SIZE;
- }
-
- if (p->perfect) {
- int i;
-
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout;
- cp->alloc_hash = cp->hash;
- for (i = 0; i < min(cp->hash, p->hash); i++)
- cp->perfect[i].res = p->perfect[i].res;
- balloc = 1;
- }
- cp->h = p->h;
-
- err = tcindex_filter_result_init(&new_filter_result, cp, net);
- if (err < 0)
- goto errout_alloc;
- if (old_r)
- cr = r->res;
-
- err = -EBUSY;
-
- /* Hash already allocated, make sure that we still meet the
- * requirements for the allocated hash.
- */
- if (cp->perfect) {
- if (!valid_perfect_hash(cp) ||
- cp->hash > cp->alloc_hash)
- goto errout_alloc;
- } else if (cp->h && cp->hash != cp->alloc_hash) {
- goto errout_alloc;
- }
-
- err = -EINVAL;
- if (tb[TCA_TCINDEX_FALL_THROUGH])
- cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
-
- if (!cp->perfect && !cp->h)
- cp->alloc_hash = cp->hash;
-
- /* Note: this could be as restrictive as if (handle & ~(mask >> shift))
- * but then, we'd fail handles that may become valid after some future
- * mask change. While this is extremely unlikely to ever matter,
- * the check below is safer (and also more backwards-compatible).
- */
- if (cp->perfect || valid_perfect_hash(cp))
- if (handle >= cp->alloc_hash)
- goto errout_alloc;
-
-
- err = -ENOMEM;
- if (!cp->perfect && !cp->h) {
- if (valid_perfect_hash(cp)) {
- if (tcindex_alloc_perfect_hash(net, cp) < 0)
- goto errout_alloc;
- balloc = 1;
- } else {
- struct tcindex_filter __rcu **hash;
-
- hash = kcalloc(cp->hash,
- sizeof(struct tcindex_filter *),
- GFP_KERNEL);
-
- if (!hash)
- goto errout_alloc;
-
- cp->h = hash;
- balloc = 2;
- }
- }
-
- if (cp->perfect)
- r = cp->perfect + handle;
- else
- r = tcindex_lookup(cp, handle) ? : &new_filter_result;
-
- if (r == &new_filter_result) {
- f = kzalloc(sizeof(*f), GFP_KERNEL);
- if (!f)
- goto errout_alloc;
- f->key = handle;
- f->next = NULL;
- err = tcindex_filter_result_init(&f->result, cp, net);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
- if (tb[TCA_TCINDEX_CLASSID]) {
- cr.classid = nla_get_u32(tb[TCA_TCINDEX_CLASSID]);
- tcf_bind_filter(tp, &cr, base);
- }
-
- if (old_r && old_r != r) {
- err = tcindex_filter_result_init(old_r, cp, net);
- if (err < 0) {
- kfree(f);
- goto errout_alloc;
- }
- }
-
- oldp = p;
- r->res = cr;
- tcf_exts_change(&r->exts, &e);
-
- rcu_assign_pointer(tp->root, cp);
-
- if (r == &new_filter_result) {
- struct tcindex_filter *nfp;
- struct tcindex_filter __rcu **fp;
-
- f->result.res = r->res;
- tcf_exts_change(&f->result.exts, &r->exts);
-
- fp = cp->h + (handle % cp->hash);
- for (nfp = rtnl_dereference(*fp);
- nfp;
- fp = &nfp->next, nfp = rtnl_dereference(*fp))
- ; /* nothing */
-
- rcu_assign_pointer(*fp, f);
- } else {
- tcf_exts_destroy(&new_filter_result.exts);
- }
-
- if (oldp)
- tcf_queue_work(&oldp->rwork, tcindex_partial_destroy_work);
- return 0;
-
-errout_alloc:
- if (balloc == 1)
- tcindex_free_perfect_hash(cp);
- else if (balloc == 2)
- kfree(cp->h);
- tcf_exts_destroy(&new_filter_result.exts);
-errout:
- kfree(cp);
- tcf_exts_destroy(&e);
- return err;
-}
-
-static int
-tcindex_change(struct net *net, struct sk_buff *in_skb,
- struct tcf_proto *tp, unsigned long base, u32 handle,
- struct nlattr **tca, void **arg, u32 flags,
- struct netlink_ext_ack *extack)
-{
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_TCINDEX_MAX + 1];
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = *arg;
- int err;
-
- pr_debug("tcindex_change(tp %p,handle 0x%08x,tca %p,arg %p),opt %p,"
- "p %p,r %p,*arg %p\n",
- tp, handle, tca, arg, opt, p, r, *arg);
-
- if (!opt)
- return 0;
-
- err = nla_parse_nested_deprecated(tb, TCA_TCINDEX_MAX, opt,
- tcindex_policy, NULL);
- if (err < 0)
- return err;
-
- return tcindex_set_parms(net, tp, base, handle, p, r, tb,
- tca[TCA_RATE], flags, extack);
-}
-
-static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker,
- bool rtnl_held)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter *f, *next;
- int i;
-
- pr_debug("tcindex_walk(tp %p,walker %p),p %p\n", tp, walker, p);
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- if (!p->perfect[i].res.class)
- continue;
- if (!tc_cls_stats_dump(tp, walker, p->perfect + i))
- return;
- }
- }
- if (!p->h)
- return;
- for (i = 0; i < p->hash; i++) {
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- if (!tc_cls_stats_dump(tp, walker, &f->result))
- return;
- }
- }
-}
-
-static void tcindex_destroy(struct tcf_proto *tp, bool rtnl_held,
- struct netlink_ext_ack *extack)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- int i;
-
- pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
-
- if (p->perfect) {
- for (i = 0; i < p->hash; i++) {
- struct tcindex_filter_result *r = p->perfect + i;
-
- /* tcf_queue_work() does not guarantee the ordering we
- * want, so we have to take this refcnt temporarily to
- * ensure 'p' is freed after all tcindex_filter_result
- * here. Imperfect hash does not need this, because it
- * uses linked lists rather than an array.
- */
- tcindex_data_get(p);
-
- tcf_unbind_filter(tp, &r->res);
- if (tcf_exts_get_net(&r->exts))
- tcf_queue_work(&r->rwork,
- tcindex_destroy_rexts_work);
- else
- __tcindex_destroy_rexts(r);
- }
- }
-
- for (i = 0; p->h && i < p->hash; i++) {
- struct tcindex_filter *f, *next;
- bool last;
-
- for (f = rtnl_dereference(p->h[i]); f; f = next) {
- next = rtnl_dereference(f->next);
- tcindex_delete(tp, &f->result, &last, rtnl_held, NULL);
- }
- }
-
- tcf_queue_work(&p->rwork, tcindex_destroy_work);
-}
-
-
-static int tcindex_dump(struct net *net, struct tcf_proto *tp, void *fh,
- struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
-{
- struct tcindex_data *p = rtnl_dereference(tp->root);
- struct tcindex_filter_result *r = fh;
- struct nlattr *nest;
-
- pr_debug("tcindex_dump(tp %p,fh %p,skb %p,t %p),p %p,r %p\n",
- tp, fh, skb, t, p, r);
- pr_debug("p->perfect %p p->h %p\n", p->perfect, p->h);
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (!fh) {
- t->tcm_handle = ~0; /* whatever ... */
- if (nla_put_u32(skb, TCA_TCINDEX_HASH, p->hash) ||
- nla_put_u16(skb, TCA_TCINDEX_MASK, p->mask) ||
- nla_put_u32(skb, TCA_TCINDEX_SHIFT, p->shift) ||
- nla_put_u32(skb, TCA_TCINDEX_FALL_THROUGH, p->fall_through))
- goto nla_put_failure;
- nla_nest_end(skb, nest);
- } else {
- if (p->perfect) {
- t->tcm_handle = r - p->perfect;
- } else {
- struct tcindex_filter *f;
- struct tcindex_filter __rcu **fp;
- int i;
-
- t->tcm_handle = 0;
- for (i = 0; !t->tcm_handle && i < p->hash; i++) {
- fp = &p->h[i];
- for (f = rtnl_dereference(*fp);
- !t->tcm_handle && f;
- fp = &f->next, f = rtnl_dereference(*fp)) {
- if (&f->result == r)
- t->tcm_handle = f->key;
- }
- }
- }
- pr_debug("handle = %d\n", t->tcm_handle);
- if (r->res.class &&
- nla_put_u32(skb, TCA_TCINDEX_CLASSID, r->res.classid))
- goto nla_put_failure;
-
- if (tcf_exts_dump(skb, &r->exts) < 0)
- goto nla_put_failure;
- nla_nest_end(skb, nest);
-
- if (tcf_exts_dump_stats(skb, &r->exts) < 0)
- goto nla_put_failure;
- }
-
- return skb->len;
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static void tcindex_bind_class(void *fh, u32 classid, unsigned long cl,
- void *q, unsigned long base)
-{
- struct tcindex_filter_result *r = fh;
-
- tc_cls_bind_class(classid, cl, q, &r->res, base);
-}
-
-static struct tcf_proto_ops cls_tcindex_ops __read_mostly = {
- .kind = "tcindex",
- .classify = tcindex_classify,
- .init = tcindex_init,
- .destroy = tcindex_destroy,
- .get = tcindex_get,
- .change = tcindex_change,
- .delete = tcindex_delete,
- .walk = tcindex_walk,
- .dump = tcindex_dump,
- .bind_class = tcindex_bind_class,
- .owner = THIS_MODULE,
-};
-
-static int __init init_tcindex(void)
-{
- return register_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-static void __exit exit_tcindex(void)
-{
- unregister_tcf_proto_ops(&cls_tcindex_ops);
-}
-
-module_init(init_tcindex)
-module_exit(exit_tcindex)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 2317db02c764..aba789c30a2e 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -902,7 +902,8 @@ static void qdisc_offload_graft_root(struct net_device *dev,
}
static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
- u32 portid, u32 seq, u16 flags, int event)
+ u32 portid, u32 seq, u16 flags, int event,
+ struct netlink_ext_ack *extack)
{
struct gnet_stats_basic_sync __percpu *cpu_bstats = NULL;
struct gnet_stats_queue __percpu *cpu_qstats = NULL;
@@ -970,7 +971,12 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -991,7 +997,8 @@ static bool tc_qdisc_dump_ignore(struct Qdisc *q, bool dump_invisible)
static int qdisc_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1002,12 +1009,12 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb,
if (old && !tc_qdisc_dump_ignore(old, false)) {
if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq,
- 0, RTM_DELQDISC) < 0)
+ 0, RTM_DELQDISC, extack) < 0)
goto err_out;
}
if (new && !tc_qdisc_dump_ignore(new, false)) {
if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq,
- old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0)
+ old ? NLM_F_REPLACE : 0, RTM_NEWQDISC, extack) < 0)
goto err_out;
}
@@ -1022,10 +1029,11 @@ err_out:
static void notify_and_destroy(struct net *net, struct sk_buff *skb,
struct nlmsghdr *n, u32 clid,
- struct Qdisc *old, struct Qdisc *new)
+ struct Qdisc *old, struct Qdisc *new,
+ struct netlink_ext_ack *extack)
{
if (new || old)
- qdisc_notify(net, skb, n, clid, old, new);
+ qdisc_notify(net, skb, n, clid, old, new, extack);
if (old)
qdisc_put(old);
@@ -1105,12 +1113,12 @@ skip:
qdisc_refcount_inc(new);
rcu_assign_pointer(dev->qdisc, new ? : &noop_qdisc);
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
if (new && new->ops->attach)
new->ops->attach(new);
} else {
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
}
if (dev->flags & IFF_UP)
@@ -1133,10 +1141,15 @@ skip:
return -ENOENT;
}
+ if (new && new->ops == &noqueue_qdisc_ops) {
+ NL_SET_ERR_MSG(extack, "Cannot assign noqueue to a class");
+ return -EINVAL;
+ }
+
err = cops->graft(parent, cl, new, &old, extack);
if (err)
return err;
- notify_and_destroy(net, skb, n, classid, old, new);
+ notify_and_destroy(net, skb, n, classid, old, new, extack);
}
return 0;
}
@@ -1269,20 +1282,21 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
if (err)
goto err_out3;
- if (ops->init) {
- err = ops->init(sch, tca[TCA_OPTIONS], extack);
- if (err != 0)
- goto err_out5;
- }
-
if (tca[TCA_STAB]) {
stab = qdisc_get_stab(tca[TCA_STAB], extack);
if (IS_ERR(stab)) {
err = PTR_ERR(stab);
- goto err_out4;
+ goto err_out3;
}
rcu_assign_pointer(sch->stab, stab);
}
+
+ if (ops->init) {
+ err = ops->init(sch, tca[TCA_OPTIONS], extack);
+ if (err != 0)
+ goto err_out4;
+ }
+
if (tca[TCA_RATE]) {
err = -EOPNOTSUPP;
if (sch->flags & TCQ_F_MQROOT) {
@@ -1307,10 +1321,13 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
return sch;
-err_out5:
- /* ops->init() failed, we call ->destroy() like qdisc_create_dflt() */
+err_out4:
+ /* Even if ops->init() failed, we call ops->destroy()
+ * like qdisc_create_dflt().
+ */
if (ops->destroy)
ops->destroy(sch);
+ qdisc_put_stab(rtnl_dereference(sch->stab));
err_out3:
netdev_put(dev, &sch->dev_tracker);
qdisc_free(sch);
@@ -1319,16 +1336,6 @@ err_out2:
err_out:
*errp = err;
return NULL;
-
-err_out4:
- /*
- * Any broken qdiscs that would require a ops->reset() here?
- * The qdisc was never in action so it shouldn't be necessary.
- */
- qdisc_put_stab(rtnl_dereference(sch->stab));
- if (ops->destroy)
- ops->destroy(sch);
- goto err_out3;
}
static int qdisc_change(struct Qdisc *sch, struct nlattr **tca,
@@ -1504,7 +1511,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n,
if (err != 0)
return err;
} else {
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_notify(net, skb, n, clid, NULL, q, NULL);
}
return 0;
}
@@ -1643,7 +1650,7 @@ replay:
}
err = qdisc_change(q, tca, extack);
if (err == 0)
- qdisc_notify(net, skb, n, clid, NULL, q);
+ qdisc_notify(net, skb, n, clid, NULL, q, extack);
return err;
create_n_graft:
@@ -1710,7 +1717,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1732,7 +1739,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb,
if (!tc_qdisc_dump_ignore(q, dump_invisible) &&
tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid,
cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWQDISC) <= 0)
+ RTM_NEWQDISC, NULL) <= 0)
goto done;
q_idx++;
}
@@ -1805,8 +1812,8 @@ done:
************************************************/
static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
- unsigned long cl,
- u32 portid, u32 seq, u16 flags, int event)
+ unsigned long cl, u32 portid, u32 seq, u16 flags,
+ int event, struct netlink_ext_ack *extack)
{
struct tcmsg *tcm;
struct nlmsghdr *nlh;
@@ -1841,7 +1848,12 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q,
if (gnet_stats_finish_copy(&d) < 0)
goto nla_put_failure;
+ if (extack && extack->_msg &&
+ nla_put_string(skb, TCA_EXT_WARN_MSG, extack->_msg))
+ goto out_nlmsg_trim;
+
nlh->nlmsg_len = skb_tail_pointer(skb) - b;
+
return skb->len;
out_nlmsg_trim:
@@ -1852,7 +1864,7 @@ nla_put_failure:
static int tclass_notify(struct net *net, struct sk_buff *oskb,
struct nlmsghdr *n, struct Qdisc *q,
- unsigned long cl, int event)
+ unsigned long cl, int event, struct netlink_ext_ack *extack)
{
struct sk_buff *skb;
u32 portid = oskb ? NETLINK_CB(oskb).portid : 0;
@@ -1861,7 +1873,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb,
if (!skb)
return -ENOBUFS;
- if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) {
+ if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event, extack) < 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -1888,7 +1900,7 @@ static int tclass_del_notify(struct net *net,
return -ENOBUFS;
if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0,
- RTM_DELTCLASS) < 0) {
+ RTM_DELTCLASS, extack) < 0) {
kfree_skb(skb);
return -EINVAL;
}
@@ -2095,7 +2107,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
tc_bind_tclass(q, portid, clid, 0);
goto out;
case RTM_GETTCLASS:
- err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS);
+ err = tclass_notify(net, skb, n, q, cl, RTM_NEWTCLASS, extack);
goto out;
default:
err = -EINVAL;
@@ -2113,7 +2125,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n,
if (cops->change)
err = cops->change(q, clid, portid, tca, &new_cl, extack);
if (err == 0) {
- tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS);
+ tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS, extack);
/* We just create a new class, need to do reverse binding. */
if (cl != new_cl)
tc_bind_tclass(q, portid, clid, new_cl);
@@ -2135,7 +2147,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl,
return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid,
a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
- RTM_NEWTCLASS);
+ RTM_NEWTCLASS, NULL);
}
static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c
deleted file mode 100644
index f52255fea652..000000000000
--- a/net/sched/sch_atm.c
+++ /dev/null
@@ -1,703 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_atm.c - ATM VC selection "queueing discipline" */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/atmdev.h>
-#include <linux/atmclip.h>
-#include <linux/rtnetlink.h>
-#include <linux/file.h> /* for fput */
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-/*
- * The ATM queuing discipline provides a framework for invoking classifiers
- * (aka "filters"), which in turn select classes of this queuing discipline.
- * Each class maps the flow(s) it is handling to a given VC. Multiple classes
- * may share the same VC.
- *
- * When creating a class, VCs are specified by passing the number of the open
- * socket descriptor by which the calling process references the VC. The kernel
- * keeps the VC open at least until all classes using it are removed.
- *
- * In this file, most functions are named atm_tc_* to avoid confusion with all
- * the atm_* in net/atm. This naming convention differs from what's used in the
- * rest of net/sched.
- *
- * Known bugs:
- * - sometimes messes up the IP stack
- * - any manipulations besides the few operations described in the README, are
- * untested and likely to crash the system
- * - should lock the flow while there is data in the queue (?)
- */
-
-#define VCC2FLOW(vcc) ((struct atm_flow_data *) ((vcc)->user_back))
-
-struct atm_flow_data {
- struct Qdisc_class_common common;
- struct Qdisc *q; /* FIFO, TBF, etc. */
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct atm_vcc *vcc; /* VCC; NULL if VCC is closed */
- void (*old_pop)(struct atm_vcc *vcc,
- struct sk_buff *skb); /* chaining */
- struct atm_qdisc_data *parent; /* parent qdisc */
- struct socket *sock; /* for closing */
- int ref; /* reference count */
- struct gnet_stats_basic_sync bstats;
- struct gnet_stats_queue qstats;
- struct list_head list;
- struct atm_flow_data *excess; /* flow for excess traffic;
- NULL to set CLP instead */
- int hdr_len;
- unsigned char hdr[]; /* header data; MUST BE LAST */
-};
-
-struct atm_qdisc_data {
- struct atm_flow_data link; /* unclassified skbs go here */
- struct list_head flows; /* NB: "link" is also on this
- list */
- struct tasklet_struct task; /* dequeue tasklet */
-};
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static inline struct atm_flow_data *lookup_flow(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- list_for_each_entry(flow, &p->flows, list) {
- if (flow->common.classid == classid)
- return flow;
- }
- return NULL;
-}
-
-static int atm_tc_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_graft(sch %p,[qdisc %p],flow %p,new %p,old %p)\n",
- sch, p, flow, new, old);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (!new)
- new = &noop_qdisc;
- *old = flow->q;
- flow->q = new;
- if (*old)
- qdisc_reset(*old);
- return 0;
-}
-
-static struct Qdisc *atm_tc_leaf(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_leaf(sch %p,flow %p)\n", sch, flow);
- return flow ? flow->q : NULL;
-}
-
-static unsigned long atm_tc_find(struct Qdisc *sch, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-static unsigned long atm_tc_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- struct atm_qdisc_data *p __maybe_unused = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n", __func__, sch, p, classid);
- flow = lookup_flow(sch, classid);
- if (flow)
- flow->ref++;
- pr_debug("%s: flow %p\n", __func__, flow);
- return (unsigned long)flow;
-}
-
-/*
- * atm_tc_put handles all destructions, including the ones that are explicitly
- * requested (atm_tc_destroy, etc.). The assumption here is that we never drop
- * anything that still seems to be in use.
- */
-static void atm_tc_put(struct Qdisc *sch, unsigned long cl)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_put(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (--flow->ref)
- return;
- pr_debug("atm_tc_put: destroying\n");
- list_del_init(&flow->list);
- pr_debug("atm_tc_put: qdisc %p\n", flow->q);
- qdisc_put(flow->q);
- tcf_block_put(flow->block);
- if (flow->sock) {
- pr_debug("atm_tc_put: f_count %ld\n",
- file_count(flow->sock->file));
- flow->vcc->pop = flow->old_pop;
- sockfd_put(flow->sock);
- }
- if (flow->excess)
- atm_tc_put(sch, (unsigned long)flow->excess);
- if (flow != &p->link)
- kfree(flow);
- /*
- * If flow == &p->link, the qdisc no longer works at this point and
- * needs to be removed. (By the caller of atm_tc_put.)
- */
-}
-
-static void sch_atm_pop(struct atm_vcc *vcc, struct sk_buff *skb)
-{
- struct atm_qdisc_data *p = VCC2FLOW(vcc)->parent;
-
- pr_debug("sch_atm_pop(vcc %p,skb %p,[qdisc %p])\n", vcc, skb, p);
- VCC2FLOW(vcc)->old_pop(vcc, skb);
- tasklet_schedule(&p->task);
-}
-
-static const u8 llc_oui_ip[] = {
- 0xaa, /* DSAP: non-ISO */
- 0xaa, /* SSAP: non-ISO */
- 0x03, /* Ctrl: Unnumbered Information Command PDU */
- 0x00, /* OUI: EtherType */
- 0x00, 0x00,
- 0x08, 0x00
-}; /* Ethertype IP (0800) */
-
-static const struct nla_policy atm_policy[TCA_ATM_MAX + 1] = {
- [TCA_ATM_FD] = { .type = NLA_U32 },
- [TCA_ATM_EXCESS] = { .type = NLA_U32 },
-};
-
-static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)*arg;
- struct atm_flow_data *excess = NULL;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_ATM_MAX + 1];
- struct socket *sock;
- int fd, error, hdr_len;
- void *hdr;
-
- pr_debug("atm_tc_change(sch %p,[qdisc %p],classid %x,parent %x,"
- "flow %p,opt %p)\n", sch, p, classid, parent, flow, opt);
- /*
- * The concept of parents doesn't apply for this qdisc.
- */
- if (parent && parent != TC_H_ROOT && parent != sch->handle)
- return -EINVAL;
- /*
- * ATM classes cannot be changed. In order to change properties of the
- * ATM connection, that socket needs to be modified directly (via the
- * native ATM API. In order to send a flow to a different VC, the old
- * class needs to be removed and a new one added. (This may be changed
- * later.)
- */
- if (flow)
- return -EBUSY;
- if (opt == NULL)
- return -EINVAL;
-
- error = nla_parse_nested_deprecated(tb, TCA_ATM_MAX, opt, atm_policy,
- NULL);
- if (error < 0)
- return error;
-
- if (!tb[TCA_ATM_FD])
- return -EINVAL;
- fd = nla_get_u32(tb[TCA_ATM_FD]);
- pr_debug("atm_tc_change: fd %d\n", fd);
- if (tb[TCA_ATM_HDR]) {
- hdr_len = nla_len(tb[TCA_ATM_HDR]);
- hdr = nla_data(tb[TCA_ATM_HDR]);
- } else {
- hdr_len = RFC1483LLC_LEN;
- hdr = NULL; /* default LLC/SNAP for IP */
- }
- if (!tb[TCA_ATM_EXCESS])
- excess = NULL;
- else {
- excess = (struct atm_flow_data *)
- atm_tc_find(sch, nla_get_u32(tb[TCA_ATM_EXCESS]));
- if (!excess)
- return -ENOENT;
- }
- pr_debug("atm_tc_change: type %d, payload %d, hdr_len %d\n",
- opt->nla_type, nla_len(opt), hdr_len);
- sock = sockfd_lookup(fd, &error);
- if (!sock)
- return error; /* f_count++ */
- pr_debug("atm_tc_change: f_count %ld\n", file_count(sock->file));
- if (sock->ops->family != PF_ATMSVC && sock->ops->family != PF_ATMPVC) {
- error = -EPROTOTYPE;
- goto err_out;
- }
- /* @@@ should check if the socket is really operational or we'll crash
- on vcc->send */
- if (classid) {
- if (TC_H_MAJ(classid ^ sch->handle)) {
- pr_debug("atm_tc_change: classid mismatch\n");
- error = -EINVAL;
- goto err_out;
- }
- } else {
- int i;
- unsigned long cl;
-
- for (i = 1; i < 0x8000; i++) {
- classid = TC_H_MAKE(sch->handle, 0x8000 | i);
- cl = atm_tc_find(sch, classid);
- if (!cl)
- break;
- }
- }
- pr_debug("atm_tc_change: new id %x\n", classid);
- flow = kzalloc(sizeof(struct atm_flow_data) + hdr_len, GFP_KERNEL);
- pr_debug("atm_tc_change: flow %p\n", flow);
- if (!flow) {
- error = -ENOBUFS;
- goto err_out;
- }
-
- error = tcf_block_get(&flow->block, &flow->filter_list, sch,
- extack);
- if (error) {
- kfree(flow);
- goto err_out;
- }
-
- flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- extack);
- if (!flow->q)
- flow->q = &noop_qdisc;
- pr_debug("atm_tc_change: qdisc %p\n", flow->q);
- flow->sock = sock;
- flow->vcc = ATM_SD(sock); /* speedup */
- flow->vcc->user_back = flow;
- pr_debug("atm_tc_change: vcc %p\n", flow->vcc);
- flow->old_pop = flow->vcc->pop;
- flow->parent = p;
- flow->vcc->pop = sch_atm_pop;
- flow->common.classid = classid;
- flow->ref = 1;
- flow->excess = excess;
- list_add(&flow->list, &p->link.list);
- flow->hdr_len = hdr_len;
- if (hdr)
- memcpy(flow->hdr, hdr, hdr_len);
- else
- memcpy(flow->hdr, llc_oui_ip, sizeof(llc_oui_ip));
- *arg = (unsigned long)flow;
- return 0;
-err_out:
- sockfd_put(sock);
- return error;
-}
-
-static int atm_tc_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- pr_debug("atm_tc_delete(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- if (list_empty(&flow->list))
- return -EINVAL;
- if (rcu_access_pointer(flow->filter_list) || flow == &p->link)
- return -EBUSY;
- /*
- * Reference count must be 2: one for "keepalive" (set at class
- * creation), and one for the reference held when calling delete.
- */
- if (flow->ref < 2) {
- pr_err("atm_tc_delete: flow->ref == %d\n", flow->ref);
- return -EINVAL;
- }
- if (flow->ref > 2)
- return -EBUSY; /* catch references via excess, etc. */
- atm_tc_put(sch, arg);
- return 0;
-}
-
-static void atm_tc_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_walk(sch %p,[qdisc %p],walker %p)\n", sch, p, walker);
- if (walker->stop)
- return;
- list_for_each_entry(flow, &p->flows, list) {
- if (!tc_qdisc_stats_dump(sch, (unsigned long)flow, walker))
- break;
- }
-}
-
-static struct tcf_block *atm_tc_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
-
- pr_debug("atm_tc_find_tcf(sch %p,[qdisc %p],flow %p)\n", sch, p, flow);
- return flow ? flow->block : p->link.block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int atm_tc_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
- struct tcf_result res;
- int result;
- int ret = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-
- pr_debug("atm_tc_enqueue(skb %p,sch %p,[qdisc %p])\n", skb, sch, p);
- result = TC_ACT_OK; /* be nice to gcc */
- flow = NULL;
- if (TC_H_MAJ(skb->priority) != sch->handle ||
- !(flow = (struct atm_flow_data *)atm_tc_find(sch, skb->priority))) {
- struct tcf_proto *fl;
-
- list_for_each_entry(flow, &p->flows, list) {
- fl = rcu_dereference_bh(flow->filter_list);
- if (fl) {
- result = tcf_classify(skb, NULL, fl, &res, true);
- if (result < 0)
- continue;
- flow = (struct atm_flow_data *)res.class;
- if (!flow)
- flow = lookup_flow(sch, res.classid);
- goto done;
- }
- }
- flow = NULL;
-done:
- ;
- }
- if (!flow) {
- flow = &p->link;
- } else {
- if (flow->vcc)
- ATM_SKB(skb)->atm_options = flow->vcc->atm_options;
- /*@@@ looks good ... but it's not supposed to work :-) */
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- case TC_ACT_SHOT:
- __qdisc_drop(skb, to_free);
- goto drop;
- case TC_ACT_RECLASSIFY:
- if (flow->excess)
- flow = flow->excess;
- else
- ATM_SKB(skb)->atm_options |= ATM_ATMOPT_CLP;
- break;
- }
-#endif
- }
-
- ret = qdisc_enqueue(skb, flow->q, to_free);
- if (ret != NET_XMIT_SUCCESS) {
-drop: __maybe_unused
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- if (flow)
- flow->qstats.drops++;
- }
- return ret;
- }
- /*
- * Okay, this may seem weird. We pretend we've dropped the packet if
- * it goes via ATM. The reason for this is that the outer qdisc
- * expects to be able to q->dequeue the packet later on if we return
- * success at this place. Also, sch->q.qdisc needs to reflect whether
- * there is a packet egligible for dequeuing or not. Note that the
- * statistics of the outer qdisc are necessarily wrong because of all
- * this. There's currently no correct solution for this.
- */
- if (flow == &p->link) {
- sch->q.qlen++;
- return NET_XMIT_SUCCESS;
- }
- tasklet_schedule(&p->task);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-/*
- * Dequeue packets and send them over ATM. Note that we quite deliberately
- * avoid checking net_device's flow control here, simply because sch_atm
- * uses its own channels, which have nothing to do with any CLIP/LANE/or
- * non-ATM interfaces.
- */
-
-static void sch_atm_dequeue(struct tasklet_struct *t)
-{
- struct atm_qdisc_data *p = from_tasklet(p, t, task);
- struct Qdisc *sch = qdisc_from_priv(p);
- struct atm_flow_data *flow;
- struct sk_buff *skb;
-
- pr_debug("sch_atm_dequeue(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- if (flow == &p->link)
- continue;
- /*
- * If traffic is properly shaped, this won't generate nasty
- * little bursts. Otherwise, it may ... (but that's okay)
- */
- while ((skb = flow->q->ops->peek(flow->q))) {
- if (!atm_may_send(flow->vcc, skb->truesize))
- break;
-
- skb = qdisc_dequeue_peeked(flow->q);
- if (unlikely(!skb))
- break;
-
- qdisc_bstats_update(sch, skb);
- bstats_update(&flow->bstats, skb);
- pr_debug("atm_tc_dequeue: sending on class %p\n", flow);
- /* remove any LL header somebody else has attached */
- skb_pull(skb, skb_network_offset(skb));
- if (skb_headroom(skb) < flow->hdr_len) {
- struct sk_buff *new;
-
- new = skb_realloc_headroom(skb, flow->hdr_len);
- dev_kfree_skb(skb);
- if (!new)
- continue;
- skb = new;
- }
- pr_debug("sch_atm_dequeue: ip %p, data %p\n",
- skb_network_header(skb), skb->data);
- ATM_SKB(skb)->vcc = flow->vcc;
- memcpy(skb_push(skb, flow->hdr_len), flow->hdr,
- flow->hdr_len);
- refcount_add(skb->truesize,
- &sk_atm(flow->vcc)->sk_wmem_alloc);
- /* atm.atm_options are already set by atm_tc_enqueue */
- flow->vcc->send(flow->vcc, skb);
- }
- }
-}
-
-static struct sk_buff *atm_tc_dequeue(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
-
- pr_debug("atm_tc_dequeue(sch %p,[qdisc %p])\n", sch, p);
- tasklet_schedule(&p->task);
- skb = qdisc_dequeue_peeked(p->link.q);
- if (skb)
- sch->q.qlen--;
- return skb;
-}
-
-static struct sk_buff *atm_tc_peek(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("atm_tc_peek(sch %p,[qdisc %p])\n", sch, p);
-
- return p->link.q->ops->peek(p->link.q);
-}
-
-static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("atm_tc_init(sch %p,[qdisc %p],opt %p)\n", sch, p, opt);
- INIT_LIST_HEAD(&p->flows);
- INIT_LIST_HEAD(&p->link.list);
- gnet_stats_basic_sync_init(&p->link.bstats);
- list_add(&p->link.list, &p->flows);
- p->link.q = qdisc_create_dflt(sch->dev_queue,
- &pfifo_qdisc_ops, sch->handle, extack);
- if (!p->link.q)
- p->link.q = &noop_qdisc;
- pr_debug("atm_tc_init: link (%p) qdisc %p\n", &p->link, p->link.q);
- p->link.vcc = NULL;
- p->link.sock = NULL;
- p->link.common.classid = sch->handle;
- p->link.ref = 1;
-
- err = tcf_block_get(&p->link.block, &p->link.filter_list, sch,
- extack);
- if (err)
- return err;
-
- tasklet_setup(&p->task, sch_atm_dequeue);
- return 0;
-}
-
-static void atm_tc_reset(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow;
-
- pr_debug("atm_tc_reset(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list)
- qdisc_reset(flow->q);
-}
-
-static void atm_tc_destroy(struct Qdisc *sch)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow, *tmp;
-
- pr_debug("atm_tc_destroy(sch %p,[qdisc %p])\n", sch, p);
- list_for_each_entry(flow, &p->flows, list) {
- tcf_block_put(flow->block);
- flow->block = NULL;
- }
-
- list_for_each_entry_safe(flow, tmp, &p->flows, list) {
- if (flow->ref > 1)
- pr_err("atm_destroy: %p->ref = %d\n", flow, flow->ref);
- atm_tc_put(sch, (unsigned long)flow);
- }
- tasklet_kill(&p->task);
-}
-
-static int atm_tc_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct atm_qdisc_data *p = qdisc_priv(sch);
- struct atm_flow_data *flow = (struct atm_flow_data *)cl;
- struct nlattr *nest;
-
- pr_debug("atm_tc_dump_class(sch %p,[qdisc %p],flow %p,skb %p,tcm %p)\n",
- sch, p, flow, skb, tcm);
- if (list_empty(&flow->list))
- return -EINVAL;
- tcm->tcm_handle = flow->common.classid;
- tcm->tcm_info = flow->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
-
- if (nla_put(skb, TCA_ATM_HDR, flow->hdr_len, flow->hdr))
- goto nla_put_failure;
- if (flow->vcc) {
- struct sockaddr_atmpvc pvc;
- int state;
-
- memset(&pvc, 0, sizeof(pvc));
- pvc.sap_family = AF_ATMPVC;
- pvc.sap_addr.itf = flow->vcc->dev ? flow->vcc->dev->number : -1;
- pvc.sap_addr.vpi = flow->vcc->vpi;
- pvc.sap_addr.vci = flow->vcc->vci;
- if (nla_put(skb, TCA_ATM_ADDR, sizeof(pvc), &pvc))
- goto nla_put_failure;
- state = ATM_VF2VS(flow->vcc->flags);
- if (nla_put_u32(skb, TCA_ATM_STATE, state))
- goto nla_put_failure;
- }
- if (flow->excess) {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, flow->common.classid))
- goto nla_put_failure;
- } else {
- if (nla_put_u32(skb, TCA_ATM_EXCESS, 0))
- goto nla_put_failure;
- }
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-static int
-atm_tc_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct atm_flow_data *flow = (struct atm_flow_data *)arg;
-
- if (gnet_stats_copy_basic(d, NULL, &flow->bstats, true) < 0 ||
- gnet_stats_copy_queue(d, NULL, &flow->qstats, flow->q->q.qlen) < 0)
- return -1;
-
- return 0;
-}
-
-static int atm_tc_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- return 0;
-}
-
-static const struct Qdisc_class_ops atm_class_ops = {
- .graft = atm_tc_graft,
- .leaf = atm_tc_leaf,
- .find = atm_tc_find,
- .change = atm_tc_change,
- .delete = atm_tc_delete,
- .walk = atm_tc_walk,
- .tcf_block = atm_tc_tcf_block,
- .bind_tcf = atm_tc_bind_filter,
- .unbind_tcf = atm_tc_put,
- .dump = atm_tc_dump_class,
- .dump_stats = atm_tc_dump_class_stats,
-};
-
-static struct Qdisc_ops atm_qdisc_ops __read_mostly = {
- .cl_ops = &atm_class_ops,
- .id = "atm",
- .priv_size = sizeof(struct atm_qdisc_data),
- .enqueue = atm_tc_enqueue,
- .dequeue = atm_tc_dequeue,
- .peek = atm_tc_peek,
- .init = atm_tc_init,
- .reset = atm_tc_reset,
- .destroy = atm_tc_destroy,
- .dump = atm_tc_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init atm_init(void)
-{
- return register_qdisc(&atm_qdisc_ops);
-}
-
-static void __exit atm_exit(void)
-{
- unregister_qdisc(&atm_qdisc_ops);
-}
-
-module_init(atm_init)
-module_exit(atm_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 3ed0c3342189..7970217b565a 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -1209,7 +1209,7 @@ static struct sk_buff *cake_ack_filter(struct cake_sched_data *q,
iph_check->daddr != iph->daddr)
continue;
- seglen = ntohs(iph_check->tot_len) -
+ seglen = iph_totlen(skb, iph_check) -
(4 * iph_check->ihl);
} else if (iph_check->version == 6) {
ipv6h = (struct ipv6hdr *)iph;
diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c
deleted file mode 100644
index 6568e17c4c63..000000000000
--- a/net/sched/sch_cbq.c
+++ /dev/null
@@ -1,1727 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * net/sched/sch_cbq.c Class-Based Queueing discipline.
- *
- * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
- */
-
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/kernel.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <net/netlink.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-
-
-/* Class-Based Queueing (CBQ) algorithm.
- =======================================
-
- Sources: [1] Sally Floyd and Van Jacobson, "Link-sharing and Resource
- Management Models for Packet Networks",
- IEEE/ACM Transactions on Networking, Vol.3, No.4, 1995
-
- [2] Sally Floyd, "Notes on CBQ and Guaranteed Service", 1995
-
- [3] Sally Floyd, "Notes on Class-Based Queueing: Setting
- Parameters", 1996
-
- [4] Sally Floyd and Michael Speer, "Experimental Results
- for Class-Based Queueing", 1998, not published.
-
- -----------------------------------------------------------------------
-
- Algorithm skeleton was taken from NS simulator cbq.cc.
- If someone wants to check this code against the LBL version,
- he should take into account that ONLY the skeleton was borrowed,
- the implementation is different. Particularly:
-
- --- The WRR algorithm is different. Our version looks more
- reasonable (I hope) and works when quanta are allowed to be
- less than MTU, which is always the case when real time classes
- have small rates. Note, that the statement of [3] is
- incomplete, delay may actually be estimated even if class
- per-round allotment is less than MTU. Namely, if per-round
- allotment is W*r_i, and r_1+...+r_k = r < 1
-
- delay_i <= ([MTU/(W*r_i)]*W*r + W*r + k*MTU)/B
-
- In the worst case we have IntServ estimate with D = W*r+k*MTU
- and C = MTU*r. The proof (if correct at all) is trivial.
-
-
- --- It seems that cbq-2.0 is not very accurate. At least, I cannot
- interpret some places, which look like wrong translations
- from NS. Anyone is advised to find these differences
- and explain to me, why I am wrong 8).
-
- --- Linux has no EOI event, so that we cannot estimate true class
- idle time. Workaround is to consider the next dequeue event
- as sign that previous packet is finished. This is wrong because of
- internal device queueing, but on a permanently loaded link it is true.
- Moreover, combined with clock integrator, this scheme looks
- very close to an ideal solution. */
-
-struct cbq_sched_data;
-
-
-struct cbq_class {
- struct Qdisc_class_common common;
- struct cbq_class *next_alive; /* next class with backlog in this priority band */
-
-/* Parameters */
- unsigned char priority; /* class priority */
- unsigned char priority2; /* priority to be used after overlimit */
- unsigned char ewma_log; /* time constant for idle time calculation */
-
- u32 defmap;
-
- /* Link-sharing scheduler parameters */
- long maxidle; /* Class parameters: see below. */
- long offtime;
- long minidle;
- u32 avpkt;
- struct qdisc_rate_table *R_tab;
-
- /* General scheduler (WRR) parameters */
- long allot;
- long quantum; /* Allotment per WRR round */
- long weight; /* Relative allotment: see below */
-
- struct Qdisc *qdisc; /* Ptr to CBQ discipline */
- struct cbq_class *split; /* Ptr to split node */
- struct cbq_class *share; /* Ptr to LS parent in the class tree */
- struct cbq_class *tparent; /* Ptr to tree parent in the class tree */
- struct cbq_class *borrow; /* NULL if class is bandwidth limited;
- parent otherwise */
- struct cbq_class *sibling; /* Sibling chain */
- struct cbq_class *children; /* Pointer to children chain */
-
- struct Qdisc *q; /* Elementary queueing discipline */
-
-
-/* Variables */
- unsigned char cpriority; /* Effective priority */
- unsigned char delayed;
- unsigned char level; /* level of the class in hierarchy:
- 0 for leaf classes, and maximal
- level of children + 1 for nodes.
- */
-
- psched_time_t last; /* Last end of service */
- psched_time_t undertime;
- long avgidle;
- long deficit; /* Saved deficit for WRR */
- psched_time_t penalized;
- struct gnet_stats_basic_sync bstats;
- struct gnet_stats_queue qstats;
- struct net_rate_estimator __rcu *rate_est;
- struct tc_cbq_xstats xstats;
-
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
-
- int filters;
-
- struct cbq_class *defaults[TC_PRIO_MAX + 1];
-};
-
-struct cbq_sched_data {
- struct Qdisc_class_hash clhash; /* Hash table of all classes */
- int nclasses[TC_CBQ_MAXPRIO + 1];
- unsigned int quanta[TC_CBQ_MAXPRIO + 1];
-
- struct cbq_class link;
-
- unsigned int activemask;
- struct cbq_class *active[TC_CBQ_MAXPRIO + 1]; /* List of all classes
- with backlog */
-
-#ifdef CONFIG_NET_CLS_ACT
- struct cbq_class *rx_class;
-#endif
- struct cbq_class *tx_class;
- struct cbq_class *tx_borrowed;
- int tx_len;
- psched_time_t now; /* Cached timestamp */
- unsigned int pmask;
-
- struct qdisc_watchdog watchdog; /* Watchdog timer,
- started when CBQ has
- backlog, but cannot
- transmit just now */
- psched_tdiff_t wd_expires;
- int toplevel;
- u32 hgenerator;
-};
-
-
-#define L2T(cl, len) qdisc_l2t((cl)->R_tab, len)
-
-static inline struct cbq_class *
-cbq_class_lookup(struct cbq_sched_data *q, u32 classid)
-{
- struct Qdisc_class_common *clc;
-
- clc = qdisc_class_find(&q->clhash, classid);
- if (clc == NULL)
- return NULL;
- return container_of(clc, struct cbq_class, common);
-}
-
-#ifdef CONFIG_NET_CLS_ACT
-
-static struct cbq_class *
-cbq_reclassify(struct sk_buff *skb, struct cbq_class *this)
-{
- struct cbq_class *cl;
-
- for (cl = this->tparent; cl; cl = cl->tparent) {
- struct cbq_class *new = cl->defaults[TC_PRIO_BESTEFFORT];
-
- if (new != NULL && new != this)
- return new;
- }
- return NULL;
-}
-
-#endif
-
-/* Classify packet. The procedure is pretty complicated, but
- * it allows us to combine link sharing and priority scheduling
- * transparently.
- *
- * Namely, you can put link sharing rules (f.e. route based) at root of CBQ,
- * so that it resolves to split nodes. Then packets are classified
- * by logical priority, or a more specific classifier may be attached
- * to the split node.
- */
-
-static struct cbq_class *
-cbq_classify(struct sk_buff *skb, struct Qdisc *sch, int *qerr)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *head = &q->link;
- struct cbq_class **defmap;
- struct cbq_class *cl = NULL;
- u32 prio = skb->priority;
- struct tcf_proto *fl;
- struct tcf_result res;
-
- /*
- * Step 1. If skb->priority points to one of our classes, use it.
- */
- if (TC_H_MAJ(prio ^ sch->handle) == 0 &&
- (cl = cbq_class_lookup(q, prio)) != NULL)
- return cl;
-
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
- for (;;) {
- int result = 0;
- defmap = head->defaults;
-
- fl = rcu_dereference_bh(head->filter_list);
- /*
- * Step 2+n. Apply classifier.
- */
- result = tcf_classify(skb, NULL, fl, &res, true);
- if (!fl || result < 0)
- goto fallback;
-
- cl = (void *)res.class;
- if (!cl) {
- if (TC_H_MAJ(res.classid))
- cl = cbq_class_lookup(q, res.classid);
- else if ((cl = defmap[res.classid & TC_PRIO_MAX]) == NULL)
- cl = defmap[TC_PRIO_BESTEFFORT];
-
- if (cl == NULL)
- goto fallback;
- }
- if (cl->level >= head->level)
- goto fallback;
-#ifdef CONFIG_NET_CLS_ACT
- switch (result) {
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- *qerr = NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
- fallthrough;
- case TC_ACT_SHOT:
- return NULL;
- case TC_ACT_RECLASSIFY:
- return cbq_reclassify(skb, cl);
- }
-#endif
- if (cl->level == 0)
- return cl;
-
- /*
- * Step 3+n. If classifier selected a link sharing class,
- * apply agency specific classifier.
- * Repeat this procedure until we hit a leaf node.
- */
- head = cl;
- }
-
-fallback:
- cl = head;
-
- /*
- * Step 4. No success...
- */
- if (TC_H_MAJ(prio) == 0 &&
- !(cl = head->defaults[prio & TC_PRIO_MAX]) &&
- !(cl = head->defaults[TC_PRIO_BESTEFFORT]))
- return head;
-
- return cl;
-}
-
-/*
- * A packet has just been enqueued on the empty class.
- * cbq_activate_class adds it to the tail of active class list
- * of its priority band.
- */
-
-static inline void cbq_activate_class(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- int prio = cl->cpriority;
- struct cbq_class *cl_tail;
-
- cl_tail = q->active[prio];
- q->active[prio] = cl;
-
- if (cl_tail != NULL) {
- cl->next_alive = cl_tail->next_alive;
- cl_tail->next_alive = cl;
- } else {
- cl->next_alive = cl;
- q->activemask |= (1<<prio);
- }
-}
-
-/*
- * Unlink class from active chain.
- * Note that this same procedure is done directly in cbq_dequeue*
- * during round-robin procedure.
- */
-
-static void cbq_deactivate_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- int prio = this->cpriority;
- struct cbq_class *cl;
- struct cbq_class *cl_prev = q->active[prio];
-
- do {
- cl = cl_prev->next_alive;
- if (cl == this) {
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- if (cl == q->active[prio]) {
- q->active[prio] = cl_prev;
- if (cl == q->active[prio]) {
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- return;
- }
- }
- return;
- }
- } while ((cl_prev = cl) != q->active[prio]);
-}
-
-static void
-cbq_mark_toplevel(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- int toplevel = q->toplevel;
-
- if (toplevel > cl->level) {
- psched_time_t now = psched_get_time();
-
- do {
- if (cl->undertime < now) {
- q->toplevel = cl->level;
- return;
- }
- } while ((cl = cl->borrow) != NULL && toplevel > cl->level);
- }
-}
-
-static int
-cbq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- int ret;
- struct cbq_class *cl = cbq_classify(skb, sch, &ret);
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = cl;
-#endif
- if (cl == NULL) {
- if (ret & __NET_XMIT_BYPASS)
- qdisc_qstats_drop(sch);
- __qdisc_drop(skb, to_free);
- return ret;
- }
-
- ret = qdisc_enqueue(skb, cl->q, to_free);
- if (ret == NET_XMIT_SUCCESS) {
- sch->q.qlen++;
- cbq_mark_toplevel(q, cl);
- if (!cl->next_alive)
- cbq_activate_class(cl);
- return ret;
- }
-
- if (net_xmit_drop_count(ret)) {
- qdisc_qstats_drop(sch);
- cbq_mark_toplevel(q, cl);
- cl->qstats.drops++;
- }
- return ret;
-}
-
-/* Overlimit action: penalize leaf class by adding offtime */
-static void cbq_overlimit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- psched_tdiff_t delay = cl->undertime - q->now;
-
- if (!cl->delayed) {
- delay += cl->offtime;
-
- /*
- * Class goes to sleep, so that it will have no
- * chance to work avgidle. Let's forgive it 8)
- *
- * BTW cbq-2.0 has a crap in this
- * place, apparently they forgot to shift it by cl->ewma_log.
- */
- if (cl->avgidle < 0)
- delay -= (-cl->avgidle) - ((-cl->avgidle) >> cl->ewma_log);
- if (cl->avgidle < cl->minidle)
- cl->avgidle = cl->minidle;
- if (delay <= 0)
- delay = 1;
- cl->undertime = q->now + delay;
-
- cl->xstats.overactions++;
- cl->delayed = 1;
- }
- if (q->wd_expires == 0 || q->wd_expires > delay)
- q->wd_expires = delay;
-
- /* Dirty work! We must schedule wakeups based on
- * real available rate, rather than leaf rate,
- * which may be tiny (even zero).
- */
- if (q->toplevel == TC_CBQ_MAXLEVEL) {
- struct cbq_class *b;
- psched_tdiff_t base_delay = q->wd_expires;
-
- for (b = cl->borrow; b; b = b->borrow) {
- delay = b->undertime - q->now;
- if (delay < base_delay) {
- if (delay <= 0)
- delay = 1;
- base_delay = delay;
- }
- }
-
- q->wd_expires = base_delay;
- }
-}
-
-/*
- * It is mission critical procedure.
- *
- * We "regenerate" toplevel cutoff, if transmitting class
- * has backlog and it is not regulated. It is not part of
- * original CBQ description, but looks more reasonable.
- * Probably, it is wrong. This question needs further investigation.
- */
-
-static inline void
-cbq_update_toplevel(struct cbq_sched_data *q, struct cbq_class *cl,
- struct cbq_class *borrowed)
-{
- if (cl && q->toplevel >= borrowed->level) {
- if (cl->q->q.qlen > 1) {
- do {
- if (borrowed->undertime == PSCHED_PASTPERFECT) {
- q->toplevel = borrowed->level;
- return;
- }
- } while ((borrowed = borrowed->borrow) != NULL);
- }
-#if 0
- /* It is not necessary now. Uncommenting it
- will save CPU cycles, but decrease fairness.
- */
- q->toplevel = TC_CBQ_MAXLEVEL;
-#endif
- }
-}
-
-static void
-cbq_update(struct cbq_sched_data *q)
-{
- struct cbq_class *this = q->tx_class;
- struct cbq_class *cl = this;
- int len = q->tx_len;
- psched_time_t now;
-
- q->tx_class = NULL;
- /* Time integrator. We calculate EOS time
- * by adding expected packet transmission time.
- */
- now = q->now + L2T(&q->link, len);
-
- for ( ; cl; cl = cl->share) {
- long avgidle = cl->avgidle;
- long idle;
-
- _bstats_update(&cl->bstats, len, 1);
-
- /*
- * (now - last) is total time between packet right edges.
- * (last_pktlen/rate) is "virtual" busy time, so that
- *
- * idle = (now - last) - last_pktlen/rate
- */
-
- idle = now - cl->last;
- if ((unsigned long)idle > 128*1024*1024) {
- avgidle = cl->maxidle;
- } else {
- idle -= L2T(cl, len);
-
- /* true_avgidle := (1-W)*true_avgidle + W*idle,
- * where W=2^{-ewma_log}. But cl->avgidle is scaled:
- * cl->avgidle == true_avgidle/W,
- * hence:
- */
- avgidle += idle - (avgidle>>cl->ewma_log);
- }
-
- if (avgidle <= 0) {
- /* Overlimit or at-limit */
-
- if (avgidle < cl->minidle)
- avgidle = cl->minidle;
-
- cl->avgidle = avgidle;
-
- /* Calculate expected time, when this class
- * will be allowed to send.
- * It will occur, when:
- * (1-W)*true_avgidle + W*delay = 0, i.e.
- * idle = (1/W - 1)*(-true_avgidle)
- * or
- * idle = (1 - W)*(-cl->avgidle);
- */
- idle = (-avgidle) - ((-avgidle) >> cl->ewma_log);
-
- /*
- * That is not all.
- * To maintain the rate allocated to the class,
- * we add to undertime virtual clock,
- * necessary to complete transmitted packet.
- * (len/phys_bandwidth has been already passed
- * to the moment of cbq_update)
- */
-
- idle -= L2T(&q->link, len);
- idle += L2T(cl, len);
-
- cl->undertime = now + idle;
- } else {
- /* Underlimit */
-
- cl->undertime = PSCHED_PASTPERFECT;
- if (avgidle > cl->maxidle)
- cl->avgidle = cl->maxidle;
- else
- cl->avgidle = avgidle;
- }
- if ((s64)(now - cl->last) > 0)
- cl->last = now;
- }
-
- cbq_update_toplevel(q, this, q->tx_borrowed);
-}
-
-static inline struct cbq_class *
-cbq_under_limit(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *this_cl = cl;
-
- if (cl->tparent == NULL)
- return cl;
-
- if (cl->undertime == PSCHED_PASTPERFECT || q->now >= cl->undertime) {
- cl->delayed = 0;
- return cl;
- }
-
- do {
- /* It is very suspicious place. Now overlimit
- * action is generated for not bounded classes
- * only if link is completely congested.
- * Though it is in agree with ancestor-only paradigm,
- * it looks very stupid. Particularly,
- * it means that this chunk of code will either
- * never be called or result in strong amplification
- * of burstiness. Dangerous, silly, and, however,
- * no another solution exists.
- */
- cl = cl->borrow;
- if (!cl) {
- this_cl->qstats.overlimits++;
- cbq_overlimit(this_cl);
- return NULL;
- }
- if (cl->level > q->toplevel)
- return NULL;
- } while (cl->undertime != PSCHED_PASTPERFECT && q->now < cl->undertime);
-
- cl->delayed = 0;
- return cl;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_prio(struct Qdisc *sch, int prio)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl_tail, *cl_prev, *cl;
- struct sk_buff *skb;
- int deficit;
-
- cl_tail = cl_prev = q->active[prio];
- cl = cl_prev->next_alive;
-
- do {
- deficit = 0;
-
- /* Start round */
- do {
- struct cbq_class *borrow = cl;
-
- if (cl->q->q.qlen &&
- (borrow = cbq_under_limit(cl)) == NULL)
- goto skip_class;
-
- if (cl->deficit <= 0) {
- /* Class exhausted its allotment per
- * this round. Switch to the next one.
- */
- deficit = 1;
- cl->deficit += cl->quantum;
- goto next_class;
- }
-
- skb = cl->q->dequeue(cl->q);
-
- /* Class did not give us any skb :-(
- * It could occur even if cl->q->q.qlen != 0
- * f.e. if cl->q == "tbf"
- */
- if (skb == NULL)
- goto skip_class;
-
- cl->deficit -= qdisc_pkt_len(skb);
- q->tx_class = cl;
- q->tx_borrowed = borrow;
- if (borrow != cl) {
-#ifndef CBQ_XSTATS_BORROWS_BYTES
- borrow->xstats.borrows++;
- cl->xstats.borrows++;
-#else
- borrow->xstats.borrows += qdisc_pkt_len(skb);
- cl->xstats.borrows += qdisc_pkt_len(skb);
-#endif
- }
- q->tx_len = qdisc_pkt_len(skb);
-
- if (cl->deficit <= 0) {
- q->active[prio] = cl;
- cl = cl->next_alive;
- cl->deficit += cl->quantum;
- }
- return skb;
-
-skip_class:
- if (cl->q->q.qlen == 0 || prio != cl->cpriority) {
- /* Class is empty or penalized.
- * Unlink it from active chain.
- */
- cl_prev->next_alive = cl->next_alive;
- cl->next_alive = NULL;
-
- /* Did cl_tail point to it? */
- if (cl == cl_tail) {
- /* Repair it! */
- cl_tail = cl_prev;
-
- /* Was it the last class in this band? */
- if (cl == cl_tail) {
- /* Kill the band! */
- q->active[prio] = NULL;
- q->activemask &= ~(1<<prio);
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
- return NULL;
- }
-
- q->active[prio] = cl_tail;
- }
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- cl = cl_prev;
- }
-
-next_class:
- cl_prev = cl;
- cl = cl->next_alive;
- } while (cl_prev != cl_tail);
- } while (deficit);
-
- q->active[prio] = cl_prev;
-
- return NULL;
-}
-
-static inline struct sk_buff *
-cbq_dequeue_1(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct sk_buff *skb;
- unsigned int activemask;
-
- activemask = q->activemask & 0xFF;
- while (activemask) {
- int prio = ffz(~activemask);
- activemask &= ~(1<<prio);
- skb = cbq_dequeue_prio(sch, prio);
- if (skb)
- return skb;
- }
- return NULL;
-}
-
-static struct sk_buff *
-cbq_dequeue(struct Qdisc *sch)
-{
- struct sk_buff *skb;
- struct cbq_sched_data *q = qdisc_priv(sch);
- psched_time_t now;
-
- now = psched_get_time();
-
- if (q->tx_class)
- cbq_update(q);
-
- q->now = now;
-
- for (;;) {
- q->wd_expires = 0;
-
- skb = cbq_dequeue_1(sch);
- if (skb) {
- qdisc_bstats_update(sch, skb);
- sch->q.qlen--;
- return skb;
- }
-
- /* All the classes are overlimit.
- *
- * It is possible, if:
- *
- * 1. Scheduler is empty.
- * 2. Toplevel cutoff inhibited borrowing.
- * 3. Root class is overlimit.
- *
- * Reset 2d and 3d conditions and retry.
- *
- * Note, that NS and cbq-2.0 are buggy, peeking
- * an arbitrary class is appropriate for ancestor-only
- * sharing, but not for toplevel algorithm.
- *
- * Our version is better, but slower, because it requires
- * two passes, but it is unavoidable with top-level sharing.
- */
-
- if (q->toplevel == TC_CBQ_MAXLEVEL &&
- q->link.undertime == PSCHED_PASTPERFECT)
- break;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->link.undertime = PSCHED_PASTPERFECT;
- }
-
- /* No packets in scheduler or nobody wants to give them to us :-(
- * Sigh... start watchdog timer in the last case.
- */
-
- if (sch->q.qlen) {
- qdisc_qstats_overlimit(sch);
- if (q->wd_expires)
- qdisc_watchdog_schedule(&q->watchdog,
- now + q->wd_expires);
- }
- return NULL;
-}
-
-/* CBQ class maintenance routines */
-
-static void cbq_adjust_levels(struct cbq_class *this)
-{
- if (this == NULL)
- return;
-
- do {
- int level = 0;
- struct cbq_class *cl;
-
- cl = this->children;
- if (cl) {
- do {
- if (cl->level > level)
- level = cl->level;
- } while ((cl = cl->sibling) != this->children);
- }
- this->level = level + 1;
- } while ((this = this->tparent) != NULL);
-}
-
-static void cbq_normalize_quanta(struct cbq_sched_data *q, int prio)
-{
- struct cbq_class *cl;
- unsigned int h;
-
- if (q->quanta[prio] == 0)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- /* BUGGGG... Beware! This expression suffer of
- * arithmetic overflows!
- */
- if (cl->priority == prio) {
- cl->quantum = (cl->weight*cl->allot*q->nclasses[prio])/
- q->quanta[prio];
- }
- if (cl->quantum <= 0 ||
- cl->quantum > 32*qdisc_dev(cl->qdisc)->mtu) {
- pr_warn("CBQ: class %08x has bad quantum==%ld, repaired.\n",
- cl->common.classid, cl->quantum);
- cl->quantum = qdisc_dev(cl->qdisc)->mtu/2 + 1;
- }
- }
- }
-}
-
-static void cbq_sync_defmap(struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
- struct cbq_class *split = cl->split;
- unsigned int h;
- int i;
-
- if (split == NULL)
- return;
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- if (split->defaults[i] == cl && !(cl->defmap & (1<<i)))
- split->defaults[i] = NULL;
- }
-
- for (i = 0; i <= TC_PRIO_MAX; i++) {
- int level = split->level;
-
- if (split->defaults[i])
- continue;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- struct cbq_class *c;
-
- hlist_for_each_entry(c, &q->clhash.hash[h],
- common.hnode) {
- if (c->split == split && c->level < level &&
- c->defmap & (1<<i)) {
- split->defaults[i] = c;
- level = c->level;
- }
- }
- }
- }
-}
-
-static void cbq_change_defmap(struct cbq_class *cl, u32 splitid, u32 def, u32 mask)
-{
- struct cbq_class *split = NULL;
-
- if (splitid == 0) {
- split = cl->split;
- if (!split)
- return;
- splitid = split->common.classid;
- }
-
- if (split == NULL || split->common.classid != splitid) {
- for (split = cl->tparent; split; split = split->tparent)
- if (split->common.classid == splitid)
- break;
- }
-
- if (split == NULL)
- return;
-
- if (cl->split != split) {
- cl->defmap = 0;
- cbq_sync_defmap(cl);
- cl->split = split;
- cl->defmap = def & mask;
- } else
- cl->defmap = (cl->defmap & ~mask) | (def & mask);
-
- cbq_sync_defmap(cl);
-}
-
-static void cbq_unlink_class(struct cbq_class *this)
-{
- struct cbq_class *cl, **clp;
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
-
- qdisc_class_hash_remove(&q->clhash, &this->common);
-
- if (this->tparent) {
- clp = &this->sibling;
- cl = *clp;
- do {
- if (cl == this) {
- *clp = cl->sibling;
- break;
- }
- clp = &cl->sibling;
- } while ((cl = *clp) != this->sibling);
-
- if (this->tparent->children == this) {
- this->tparent->children = this->sibling;
- if (this->sibling == this)
- this->tparent->children = NULL;
- }
- } else {
- WARN_ON(this->sibling != this);
- }
-}
-
-static void cbq_link_class(struct cbq_class *this)
-{
- struct cbq_sched_data *q = qdisc_priv(this->qdisc);
- struct cbq_class *parent = this->tparent;
-
- this->sibling = this;
- qdisc_class_hash_insert(&q->clhash, &this->common);
-
- if (parent == NULL)
- return;
-
- if (parent->children == NULL) {
- parent->children = this;
- } else {
- this->sibling = parent->children->sibling;
- parent->children->sibling = this;
- }
-}
-
-static void
-cbq_reset(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- int prio;
- unsigned int h;
-
- q->activemask = 0;
- q->pmask = 0;
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- qdisc_watchdog_cancel(&q->watchdog);
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- for (prio = 0; prio <= TC_CBQ_MAXPRIO; prio++)
- q->active[prio] = NULL;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- qdisc_reset(cl->q);
-
- cl->next_alive = NULL;
- cl->undertime = PSCHED_PASTPERFECT;
- cl->avgidle = cl->maxidle;
- cl->deficit = cl->quantum;
- cl->cpriority = cl->priority;
- }
- }
-}
-
-
-static void cbq_set_lss(struct cbq_class *cl, struct tc_cbq_lssopt *lss)
-{
- if (lss->change & TCF_CBQ_LSS_FLAGS) {
- cl->share = (lss->flags & TCF_CBQ_LSS_ISOLATED) ? NULL : cl->tparent;
- cl->borrow = (lss->flags & TCF_CBQ_LSS_BOUNDED) ? NULL : cl->tparent;
- }
- if (lss->change & TCF_CBQ_LSS_EWMA)
- cl->ewma_log = lss->ewma_log;
- if (lss->change & TCF_CBQ_LSS_AVPKT)
- cl->avpkt = lss->avpkt;
- if (lss->change & TCF_CBQ_LSS_MINIDLE)
- cl->minidle = -(long)lss->minidle;
- if (lss->change & TCF_CBQ_LSS_MAXIDLE) {
- cl->maxidle = lss->maxidle;
- cl->avgidle = lss->maxidle;
- }
- if (lss->change & TCF_CBQ_LSS_OFFTIME)
- cl->offtime = lss->offtime;
-}
-
-static void cbq_rmprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]--;
- q->quanta[cl->priority] -= cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static void cbq_addprio(struct cbq_sched_data *q, struct cbq_class *cl)
-{
- q->nclasses[cl->priority]++;
- q->quanta[cl->priority] += cl->weight;
- cbq_normalize_quanta(q, cl->priority);
-}
-
-static int cbq_set_wrr(struct cbq_class *cl, struct tc_cbq_wrropt *wrr)
-{
- struct cbq_sched_data *q = qdisc_priv(cl->qdisc);
-
- if (wrr->allot)
- cl->allot = wrr->allot;
- if (wrr->weight)
- cl->weight = wrr->weight;
- if (wrr->priority) {
- cl->priority = wrr->priority - 1;
- cl->cpriority = cl->priority;
- if (cl->priority >= cl->priority2)
- cl->priority2 = TC_CBQ_MAXPRIO - 1;
- }
-
- cbq_addprio(q, cl);
- return 0;
-}
-
-static int cbq_set_fopt(struct cbq_class *cl, struct tc_cbq_fopt *fopt)
-{
- cbq_change_defmap(cl, fopt->split, fopt->defmap, fopt->defchange);
- return 0;
-}
-
-static const struct nla_policy cbq_policy[TCA_CBQ_MAX + 1] = {
- [TCA_CBQ_LSSOPT] = { .len = sizeof(struct tc_cbq_lssopt) },
- [TCA_CBQ_WRROPT] = { .len = sizeof(struct tc_cbq_wrropt) },
- [TCA_CBQ_FOPT] = { .len = sizeof(struct tc_cbq_fopt) },
- [TCA_CBQ_OVL_STRATEGY] = { .len = sizeof(struct tc_cbq_ovl) },
- [TCA_CBQ_RATE] = { .len = sizeof(struct tc_ratespec) },
- [TCA_CBQ_RTAB] = { .type = NLA_BINARY, .len = TC_RTAB_SIZE },
- [TCA_CBQ_POLICE] = { .len = sizeof(struct tc_cbq_police) },
-};
-
-static int cbq_opt_parse(struct nlattr *tb[TCA_CBQ_MAX + 1],
- struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- int err;
-
- if (!opt) {
- NL_SET_ERR_MSG(extack, "CBQ options are required for this operation");
- return -EINVAL;
- }
-
- err = nla_parse_nested_deprecated(tb, TCA_CBQ_MAX, opt,
- cbq_policy, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_WRROPT]) {
- const struct tc_cbq_wrropt *wrr = nla_data(tb[TCA_CBQ_WRROPT]);
-
- if (wrr->priority > TC_CBQ_MAXPRIO) {
- NL_SET_ERR_MSG(extack, "priority is bigger than TC_CBQ_MAXPRIO");
- err = -EINVAL;
- }
- }
- return err;
-}
-
-static int cbq_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct tc_ratespec *r;
- int err;
-
- qdisc_watchdog_init(&q->watchdog, sch);
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (!tb[TCA_CBQ_RTAB] || !tb[TCA_CBQ_RATE]) {
- NL_SET_ERR_MSG(extack, "Rate specification missing or incomplete");
- return -EINVAL;
- }
-
- r = nla_data(tb[TCA_CBQ_RATE]);
-
- q->link.R_tab = qdisc_get_rtab(r, tb[TCA_CBQ_RTAB], extack);
- if (!q->link.R_tab)
- return -EINVAL;
-
- err = tcf_block_get(&q->link.block, &q->link.filter_list, sch, extack);
- if (err)
- goto put_rtab;
-
- err = qdisc_class_hash_init(&q->clhash);
- if (err < 0)
- goto put_block;
-
- q->link.sibling = &q->link;
- q->link.common.classid = sch->handle;
- q->link.qdisc = sch;
- q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (!q->link.q)
- q->link.q = &noop_qdisc;
- else
- qdisc_hash_add(q->link.q, true);
-
- q->link.priority = TC_CBQ_MAXPRIO - 1;
- q->link.priority2 = TC_CBQ_MAXPRIO - 1;
- q->link.cpriority = TC_CBQ_MAXPRIO - 1;
- q->link.allot = psched_mtu(qdisc_dev(sch));
- q->link.quantum = q->link.allot;
- q->link.weight = q->link.R_tab->rate.rate;
-
- q->link.ewma_log = TC_CBQ_DEF_EWMA;
- q->link.avpkt = q->link.allot/2;
- q->link.minidle = -0x7FFFFFFF;
-
- q->toplevel = TC_CBQ_MAXLEVEL;
- q->now = psched_get_time();
-
- cbq_link_class(&q->link);
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(&q->link, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- cbq_addprio(q, &q->link);
- return 0;
-
-put_block:
- tcf_block_put(q->link.block);
-
-put_rtab:
- qdisc_put_rtab(q->link.R_tab);
- return err;
-}
-
-static int cbq_dump_rate(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
-
- if (nla_put(skb, TCA_CBQ_RATE, sizeof(cl->R_tab->rate), &cl->R_tab->rate))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_lss(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_lssopt opt;
-
- opt.flags = 0;
- if (cl->borrow == NULL)
- opt.flags |= TCF_CBQ_LSS_BOUNDED;
- if (cl->share == NULL)
- opt.flags |= TCF_CBQ_LSS_ISOLATED;
- opt.ewma_log = cl->ewma_log;
- opt.level = cl->level;
- opt.avpkt = cl->avpkt;
- opt.maxidle = cl->maxidle;
- opt.minidle = (u32)(-cl->minidle);
- opt.offtime = cl->offtime;
- opt.change = ~0;
- if (nla_put(skb, TCA_CBQ_LSSOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_wrr(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_wrropt opt;
-
- memset(&opt, 0, sizeof(opt));
- opt.flags = 0;
- opt.allot = cl->allot;
- opt.priority = cl->priority + 1;
- opt.cpriority = cl->cpriority + 1;
- opt.weight = cl->weight;
- if (nla_put(skb, TCA_CBQ_WRROPT, sizeof(opt), &opt))
- goto nla_put_failure;
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_fopt(struct sk_buff *skb, struct cbq_class *cl)
-{
- unsigned char *b = skb_tail_pointer(skb);
- struct tc_cbq_fopt opt;
-
- if (cl->split || cl->defmap) {
- opt.split = cl->split ? cl->split->common.classid : 0;
- opt.defmap = cl->defmap;
- opt.defchange = ~0;
- if (nla_put(skb, TCA_CBQ_FOPT, sizeof(opt), &opt))
- goto nla_put_failure;
- }
- return skb->len;
-
-nla_put_failure:
- nlmsg_trim(skb, b);
- return -1;
-}
-
-static int cbq_dump_attr(struct sk_buff *skb, struct cbq_class *cl)
-{
- if (cbq_dump_lss(skb, cl) < 0 ||
- cbq_dump_rate(skb, cl) < 0 ||
- cbq_dump_wrr(skb, cl) < 0 ||
- cbq_dump_fopt(skb, cl) < 0)
- return -1;
- return 0;
-}
-
-static int cbq_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct nlattr *nest;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, &q->link) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- q->link.xstats.avgidle = q->link.avgidle;
- return gnet_stats_copy_app(d, &q->link.xstats, sizeof(q->link.xstats));
-}
-
-static int
-cbq_dump_class(struct Qdisc *sch, unsigned long arg,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
- struct nlattr *nest;
-
- if (cl->tparent)
- tcm->tcm_parent = cl->tparent->common.classid;
- else
- tcm->tcm_parent = TC_H_ROOT;
- tcm->tcm_handle = cl->common.classid;
- tcm->tcm_info = cl->q->handle;
-
- nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (nest == NULL)
- goto nla_put_failure;
- if (cbq_dump_attr(skb, cl) < 0)
- goto nla_put_failure;
- return nla_nest_end(skb, nest);
-
-nla_put_failure:
- nla_nest_cancel(skb, nest);
- return -1;
-}
-
-static int
-cbq_dump_class_stats(struct Qdisc *sch, unsigned long arg,
- struct gnet_dump *d)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
- __u32 qlen;
-
- cl->xstats.avgidle = cl->avgidle;
- cl->xstats.undertime = 0;
- qdisc_qstats_qlen_backlog(cl->q, &qlen, &cl->qstats.backlog);
-
- if (cl->undertime != PSCHED_PASTPERFECT)
- cl->xstats.undertime = cl->undertime - q->now;
-
- if (gnet_stats_copy_basic(d, NULL, &cl->bstats, true) < 0 ||
- gnet_stats_copy_rate_est(d, &cl->rate_est) < 0 ||
- gnet_stats_copy_queue(d, NULL, &cl->qstats, qlen) < 0)
- return -1;
-
- return gnet_stats_copy_app(d, &cl->xstats, sizeof(cl->xstats));
-}
-
-static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new,
- struct Qdisc **old, struct netlink_ext_ack *extack)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- cl->common.classid, extack);
- if (new == NULL)
- return -ENOBUFS;
- }
-
- *old = qdisc_replace(sch, new, &cl->q);
- return 0;
-}
-
-static struct Qdisc *cbq_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- return cl->q;
-}
-
-static void cbq_qlen_notify(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cbq_deactivate_class(cl);
-}
-
-static unsigned long cbq_find(struct Qdisc *sch, u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- return (unsigned long)cbq_class_lookup(q, classid);
-}
-
-static void cbq_destroy_class(struct Qdisc *sch, struct cbq_class *cl)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
-
- WARN_ON(cl->filters);
-
- tcf_block_put(cl->block);
- qdisc_put(cl->q);
- qdisc_put_rtab(cl->R_tab);
- gen_kill_estimator(&cl->rate_est);
- if (cl != &q->link)
- kfree(cl);
-}
-
-static void cbq_destroy(struct Qdisc *sch)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct hlist_node *next;
- struct cbq_class *cl;
- unsigned int h;
-
-#ifdef CONFIG_NET_CLS_ACT
- q->rx_class = NULL;
-#endif
- /*
- * Filters must be destroyed first because we don't destroy the
- * classes from root to leafs which means that filters can still
- * be bound to classes which have been destroyed already. --TGR '04
- */
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- tcf_block_put(cl->block);
- cl->block = NULL;
- }
- }
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry_safe(cl, next, &q->clhash.hash[h],
- common.hnode)
- cbq_destroy_class(sch, cl);
- }
- qdisc_class_hash_destroy(&q->clhash);
-}
-
-static int
-cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **tca,
- unsigned long *arg, struct netlink_ext_ack *extack)
-{
- int err;
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)*arg;
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_CBQ_MAX + 1];
- struct cbq_class *parent;
- struct qdisc_rate_table *rtab = NULL;
-
- err = cbq_opt_parse(tb, opt, extack);
- if (err < 0)
- return err;
-
- if (tb[TCA_CBQ_OVL_STRATEGY] || tb[TCA_CBQ_POLICE]) {
- NL_SET_ERR_MSG(extack, "Neither overlimit strategy nor policing attributes can be used for changing class params");
- return -EOPNOTSUPP;
- }
-
- if (cl) {
- /* Check parent */
- if (parentid) {
- if (cl->tparent &&
- cl->tparent->common.classid != parentid) {
- NL_SET_ERR_MSG(extack, "Invalid parent id");
- return -EINVAL;
- }
- if (!cl->tparent && parentid != TC_H_ROOT) {
- NL_SET_ERR_MSG(extack, "Parent must be root");
- return -EINVAL;
- }
- }
-
- if (tb[TCA_CBQ_RATE]) {
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]),
- tb[TCA_CBQ_RTAB], extack);
- if (rtab == NULL)
- return -EINVAL;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_replace_estimator(&cl->bstats, NULL,
- &cl->rate_est,
- NULL,
- true,
- tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Failed to replace specified rate estimator");
- qdisc_put_rtab(rtab);
- return err;
- }
- }
-
- /* Change class parameters */
- sch_tree_lock(sch);
-
- if (cl->next_alive != NULL)
- cbq_deactivate_class(cl);
-
- if (rtab) {
- qdisc_put_rtab(cl->R_tab);
- cl->R_tab = rtab;
- }
-
- if (tb[TCA_CBQ_LSSOPT])
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
-
- if (tb[TCA_CBQ_WRROPT]) {
- cbq_rmprio(q, cl);
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- }
-
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
-
- if (cl->q->q.qlen)
- cbq_activate_class(cl);
-
- sch_tree_unlock(sch);
-
- return 0;
- }
-
- if (parentid == TC_H_ROOT)
- return -EINVAL;
-
- if (!tb[TCA_CBQ_WRROPT] || !tb[TCA_CBQ_RATE] || !tb[TCA_CBQ_LSSOPT]) {
- NL_SET_ERR_MSG(extack, "One of the following attributes MUST be specified: WRR, rate or link sharing");
- return -EINVAL;
- }
-
- rtab = qdisc_get_rtab(nla_data(tb[TCA_CBQ_RATE]), tb[TCA_CBQ_RTAB],
- extack);
- if (rtab == NULL)
- return -EINVAL;
-
- if (classid) {
- err = -EINVAL;
- if (TC_H_MAJ(classid ^ sch->handle) ||
- cbq_class_lookup(q, classid)) {
- NL_SET_ERR_MSG(extack, "Specified class not found");
- goto failure;
- }
- } else {
- int i;
- classid = TC_H_MAKE(sch->handle, 0x8000);
-
- for (i = 0; i < 0x8000; i++) {
- if (++q->hgenerator >= 0x8000)
- q->hgenerator = 1;
- if (cbq_class_lookup(q, classid|q->hgenerator) == NULL)
- break;
- }
- err = -ENOSR;
- if (i >= 0x8000) {
- NL_SET_ERR_MSG(extack, "Unable to generate classid");
- goto failure;
- }
- classid = classid|q->hgenerator;
- }
-
- parent = &q->link;
- if (parentid) {
- parent = cbq_class_lookup(q, parentid);
- err = -EINVAL;
- if (!parent) {
- NL_SET_ERR_MSG(extack, "Failed to find parentid");
- goto failure;
- }
- }
-
- err = -ENOBUFS;
- cl = kzalloc(sizeof(*cl), GFP_KERNEL);
- if (cl == NULL)
- goto failure;
-
- gnet_stats_basic_sync_init(&cl->bstats);
- err = tcf_block_get(&cl->block, &cl->filter_list, sch, extack);
- if (err) {
- kfree(cl);
- goto failure;
- }
-
- if (tca[TCA_RATE]) {
- err = gen_new_estimator(&cl->bstats, NULL, &cl->rate_est,
- NULL, true, tca[TCA_RATE]);
- if (err) {
- NL_SET_ERR_MSG(extack, "Couldn't create new estimator");
- tcf_block_put(cl->block);
- kfree(cl);
- goto failure;
- }
- }
-
- cl->R_tab = rtab;
- rtab = NULL;
- cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid,
- NULL);
- if (!cl->q)
- cl->q = &noop_qdisc;
- else
- qdisc_hash_add(cl->q, true);
-
- cl->common.classid = classid;
- cl->tparent = parent;
- cl->qdisc = sch;
- cl->allot = parent->allot;
- cl->quantum = cl->allot;
- cl->weight = cl->R_tab->rate.rate;
-
- sch_tree_lock(sch);
- cbq_link_class(cl);
- cl->borrow = cl->tparent;
- if (cl->tparent != &q->link)
- cl->share = cl->tparent;
- cbq_adjust_levels(parent);
- cl->minidle = -0x7FFFFFFF;
- cbq_set_lss(cl, nla_data(tb[TCA_CBQ_LSSOPT]));
- cbq_set_wrr(cl, nla_data(tb[TCA_CBQ_WRROPT]));
- if (cl->ewma_log == 0)
- cl->ewma_log = q->link.ewma_log;
- if (cl->maxidle == 0)
- cl->maxidle = q->link.maxidle;
- if (cl->avpkt == 0)
- cl->avpkt = q->link.avpkt;
- if (tb[TCA_CBQ_FOPT])
- cbq_set_fopt(cl, nla_data(tb[TCA_CBQ_FOPT]));
- sch_tree_unlock(sch);
-
- qdisc_class_hash_grow(sch, &q->clhash);
-
- *arg = (unsigned long)cl;
- return 0;
-
-failure:
- qdisc_put_rtab(rtab);
- return err;
-}
-
-static int cbq_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl->filters || cl->children || cl == &q->link)
- return -EBUSY;
-
- sch_tree_lock(sch);
-
- qdisc_purge_queue(cl->q);
-
- if (cl->next_alive)
- cbq_deactivate_class(cl);
-
- if (q->tx_borrowed == cl)
- q->tx_borrowed = q->tx_class;
- if (q->tx_class == cl) {
- q->tx_class = NULL;
- q->tx_borrowed = NULL;
- }
-#ifdef CONFIG_NET_CLS_ACT
- if (q->rx_class == cl)
- q->rx_class = NULL;
-#endif
-
- cbq_unlink_class(cl);
- cbq_adjust_levels(cl->tparent);
- cl->defmap = 0;
- cbq_sync_defmap(cl);
-
- cbq_rmprio(q, cl);
- sch_tree_unlock(sch);
-
- cbq_destroy_class(sch, cl);
- return 0;
-}
-
-static struct tcf_block *cbq_tcf_block(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- if (cl == NULL)
- cl = &q->link;
-
- return cl->block;
-}
-
-static unsigned long cbq_bind_filter(struct Qdisc *sch, unsigned long parent,
- u32 classid)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *p = (struct cbq_class *)parent;
- struct cbq_class *cl = cbq_class_lookup(q, classid);
-
- if (cl) {
- if (p && p->level <= cl->level)
- return 0;
- cl->filters++;
- return (unsigned long)cl;
- }
- return 0;
-}
-
-static void cbq_unbind_filter(struct Qdisc *sch, unsigned long arg)
-{
- struct cbq_class *cl = (struct cbq_class *)arg;
-
- cl->filters--;
-}
-
-static void cbq_walk(struct Qdisc *sch, struct qdisc_walker *arg)
-{
- struct cbq_sched_data *q = qdisc_priv(sch);
- struct cbq_class *cl;
- unsigned int h;
-
- if (arg->stop)
- return;
-
- for (h = 0; h < q->clhash.hashsize; h++) {
- hlist_for_each_entry(cl, &q->clhash.hash[h], common.hnode) {
- if (!tc_qdisc_stats_dump(sch, (unsigned long)cl, arg))
- return;
- }
- }
-}
-
-static const struct Qdisc_class_ops cbq_class_ops = {
- .graft = cbq_graft,
- .leaf = cbq_leaf,
- .qlen_notify = cbq_qlen_notify,
- .find = cbq_find,
- .change = cbq_change_class,
- .delete = cbq_delete,
- .walk = cbq_walk,
- .tcf_block = cbq_tcf_block,
- .bind_tcf = cbq_bind_filter,
- .unbind_tcf = cbq_unbind_filter,
- .dump = cbq_dump_class,
- .dump_stats = cbq_dump_class_stats,
-};
-
-static struct Qdisc_ops cbq_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &cbq_class_ops,
- .id = "cbq",
- .priv_size = sizeof(struct cbq_sched_data),
- .enqueue = cbq_enqueue,
- .dequeue = cbq_dequeue,
- .peek = qdisc_peek_dequeued,
- .init = cbq_init,
- .reset = cbq_reset,
- .destroy = cbq_destroy,
- .change = NULL,
- .dump = cbq_dump,
- .dump_stats = cbq_dump_stats,
- .owner = THIS_MODULE,
-};
-
-static int __init cbq_module_init(void)
-{
- return register_qdisc(&cbq_qdisc_ops);
-}
-static void __exit cbq_module_exit(void)
-{
- unregister_qdisc(&cbq_qdisc_ops);
-}
-module_init(cbq_module_init)
-module_exit(cbq_module_exit)
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c
deleted file mode 100644
index 401ffaf87d62..000000000000
--- a/net/sched/sch_dsmark.c
+++ /dev/null
@@ -1,518 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* net/sched/sch_dsmark.c - Differentiated Services field marker */
-
-/* Written 1998-2000 by Werner Almesberger, EPFL ICA */
-
-
-#include <linux/module.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/types.h>
-#include <linux/string.h>
-#include <linux/errno.h>
-#include <linux/skbuff.h>
-#include <linux/rtnetlink.h>
-#include <linux/bitops.h>
-#include <net/pkt_sched.h>
-#include <net/pkt_cls.h>
-#include <net/dsfield.h>
-#include <net/inet_ecn.h>
-#include <asm/byteorder.h>
-
-/*
- * classid class marking
- * ------- ----- -------
- * n/a 0 n/a
- * x:0 1 use entry [0]
- * ... ... ...
- * x:y y>0 y+1 use entry [y]
- * ... ... ...
- * x:indices-1 indices use entry [indices-1]
- * ... ... ...
- * x:y y+1 use entry [y & (indices-1)]
- * ... ... ...
- * 0xffff 0x10000 use entry [indices-1]
- */
-
-
-#define NO_DEFAULT_INDEX (1 << 16)
-
-struct mask_value {
- u8 mask;
- u8 value;
-};
-
-struct dsmark_qdisc_data {
- struct Qdisc *q;
- struct tcf_proto __rcu *filter_list;
- struct tcf_block *block;
- struct mask_value *mv;
- u16 indices;
- u8 set_tc_index;
- u32 default_index; /* index range is 0...0xffff */
-#define DSMARK_EMBEDDED_SZ 16
- struct mask_value embedded[DSMARK_EMBEDDED_SZ];
-};
-
-static inline int dsmark_valid_index(struct dsmark_qdisc_data *p, u16 index)
-{
- return index <= p->indices && index > 0;
-}
-
-/* ------------------------- Class/flow operations ------------------------- */
-
-static int dsmark_graft(struct Qdisc *sch, unsigned long arg,
- struct Qdisc *new, struct Qdisc **old,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p],new %p,old %p)\n",
- __func__, sch, p, new, old);
-
- if (new == NULL) {
- new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops,
- sch->handle, NULL);
- if (new == NULL)
- new = &noop_qdisc;
- }
-
- *old = qdisc_replace(sch, new, &p->q);
- return 0;
-}
-
-static struct Qdisc *dsmark_leaf(struct Qdisc *sch, unsigned long arg)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- return p->q;
-}
-
-static unsigned long dsmark_find(struct Qdisc *sch, u32 classid)
-{
- return TC_H_MIN(classid) + 1;
-}
-
-static unsigned long dsmark_bind_filter(struct Qdisc *sch,
- unsigned long parent, u32 classid)
-{
- pr_debug("%s(sch %p,[qdisc %p],classid %x)\n",
- __func__, sch, qdisc_priv(sch), classid);
-
- return dsmark_find(sch, classid);
-}
-
-static void dsmark_unbind_filter(struct Qdisc *sch, unsigned long cl)
-{
-}
-
-static const struct nla_policy dsmark_policy[TCA_DSMARK_MAX + 1] = {
- [TCA_DSMARK_INDICES] = { .type = NLA_U16 },
- [TCA_DSMARK_DEFAULT_INDEX] = { .type = NLA_U16 },
- [TCA_DSMARK_SET_TC_INDEX] = { .type = NLA_FLAG },
- [TCA_DSMARK_MASK] = { .type = NLA_U8 },
- [TCA_DSMARK_VALUE] = { .type = NLA_U8 },
-};
-
-static int dsmark_change(struct Qdisc *sch, u32 classid, u32 parent,
- struct nlattr **tca, unsigned long *arg,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opt = tca[TCA_OPTIONS];
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
-
- pr_debug("%s(sch %p,[qdisc %p],classid %x,parent %x), arg 0x%lx\n",
- __func__, sch, p, classid, parent, *arg);
-
- if (!dsmark_valid_index(p, *arg)) {
- err = -ENOENT;
- goto errout;
- }
-
- if (!opt)
- goto errout;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- if (tb[TCA_DSMARK_VALUE])
- p->mv[*arg - 1].value = nla_get_u8(tb[TCA_DSMARK_VALUE]);
-
- if (tb[TCA_DSMARK_MASK])
- p->mv[*arg - 1].mask = nla_get_u8(tb[TCA_DSMARK_MASK]);
-
- err = 0;
-
-errout:
- return err;
-}
-
-static int dsmark_delete(struct Qdisc *sch, unsigned long arg,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- if (!dsmark_valid_index(p, arg))
- return -EINVAL;
-
- p->mv[arg - 1].mask = 0xff;
- p->mv[arg - 1].value = 0;
-
- return 0;
-}
-
-static void dsmark_walk(struct Qdisc *sch, struct qdisc_walker *walker)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],walker %p)\n",
- __func__, sch, p, walker);
-
- if (walker->stop)
- return;
-
- for (i = 0; i < p->indices; i++) {
- if (p->mv[i].mask == 0xff && !p->mv[i].value) {
- walker->count++;
- continue;
- }
- if (!tc_qdisc_stats_dump(sch, i + 1, walker))
- break;
- }
-}
-
-static struct tcf_block *dsmark_tcf_block(struct Qdisc *sch, unsigned long cl,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- return p->block;
-}
-
-/* --------------------------- Qdisc operations ---------------------------- */
-
-static int dsmark_enqueue(struct sk_buff *skb, struct Qdisc *sch,
- struct sk_buff **to_free)
-{
- unsigned int len = qdisc_pkt_len(skb);
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- int err;
-
- pr_debug("%s(skb %p,sch %p,[qdisc %p])\n", __func__, skb, sch, p);
-
- if (p->set_tc_index) {
- int wlen = skb_network_offset(skb);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- wlen += sizeof(struct iphdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv4_get_dsfield(ip_hdr(skb))
- & ~INET_ECN_MASK;
- break;
-
- case htons(ETH_P_IPV6):
- wlen += sizeof(struct ipv6hdr);
- if (!pskb_may_pull(skb, wlen) ||
- skb_try_make_writable(skb, wlen))
- goto drop;
-
- skb->tc_index = ipv6_get_dsfield(ipv6_hdr(skb))
- & ~INET_ECN_MASK;
- break;
- default:
- skb->tc_index = 0;
- break;
- }
- }
-
- if (TC_H_MAJ(skb->priority) == sch->handle)
- skb->tc_index = TC_H_MIN(skb->priority);
- else {
- struct tcf_result res;
- struct tcf_proto *fl = rcu_dereference_bh(p->filter_list);
- int result = tcf_classify(skb, NULL, fl, &res, false);
-
- pr_debug("result %d class 0x%04x\n", result, res.classid);
-
- switch (result) {
-#ifdef CONFIG_NET_CLS_ACT
- case TC_ACT_QUEUED:
- case TC_ACT_STOLEN:
- case TC_ACT_TRAP:
- __qdisc_drop(skb, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_STOLEN;
-
- case TC_ACT_SHOT:
- goto drop;
-#endif
- case TC_ACT_OK:
- skb->tc_index = TC_H_MIN(res.classid);
- break;
-
- default:
- if (p->default_index != NO_DEFAULT_INDEX)
- skb->tc_index = p->default_index;
- break;
- }
- }
-
- err = qdisc_enqueue(skb, p->q, to_free);
- if (err != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(err))
- qdisc_qstats_drop(sch);
- return err;
- }
-
- sch->qstats.backlog += len;
- sch->q.qlen++;
-
- return NET_XMIT_SUCCESS;
-
-drop:
- qdisc_drop(skb, sch, to_free);
- return NET_XMIT_SUCCESS | __NET_XMIT_BYPASS;
-}
-
-static struct sk_buff *dsmark_dequeue(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct sk_buff *skb;
- u32 index;
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- skb = qdisc_dequeue_peeked(p->q);
- if (skb == NULL)
- return NULL;
-
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- index = skb->tc_index & (p->indices - 1);
- pr_debug("index %d->%d\n", skb->tc_index, index);
-
- switch (skb_protocol(skb, true)) {
- case htons(ETH_P_IP):
- ipv4_change_dsfield(ip_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- case htons(ETH_P_IPV6):
- ipv6_change_dsfield(ipv6_hdr(skb), p->mv[index].mask,
- p->mv[index].value);
- break;
- default:
- /*
- * Only complain if a change was actually attempted.
- * This way, we can send non-IP traffic through dsmark
- * and don't need yet another qdisc as a bypass.
- */
- if (p->mv[index].mask != 0xff || p->mv[index].value)
- pr_warn("%s: unsupported protocol %d\n",
- __func__, ntohs(skb_protocol(skb, true)));
- break;
- }
-
- return skb;
-}
-
-static struct sk_buff *dsmark_peek(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- return p->q->ops->peek(p->q);
-}
-
-static int dsmark_init(struct Qdisc *sch, struct nlattr *opt,
- struct netlink_ext_ack *extack)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *tb[TCA_DSMARK_MAX + 1];
- int err = -EINVAL;
- u32 default_index = NO_DEFAULT_INDEX;
- u16 indices;
- int i;
-
- pr_debug("%s(sch %p,[qdisc %p],opt %p)\n", __func__, sch, p, opt);
-
- if (!opt)
- goto errout;
-
- err = tcf_block_get(&p->block, &p->filter_list, sch, extack);
- if (err)
- return err;
-
- err = nla_parse_nested_deprecated(tb, TCA_DSMARK_MAX, opt,
- dsmark_policy, NULL);
- if (err < 0)
- goto errout;
-
- err = -EINVAL;
- if (!tb[TCA_DSMARK_INDICES])
- goto errout;
- indices = nla_get_u16(tb[TCA_DSMARK_INDICES]);
-
- if (hweight32(indices) != 1)
- goto errout;
-
- if (tb[TCA_DSMARK_DEFAULT_INDEX])
- default_index = nla_get_u16(tb[TCA_DSMARK_DEFAULT_INDEX]);
-
- if (indices <= DSMARK_EMBEDDED_SZ)
- p->mv = p->embedded;
- else
- p->mv = kmalloc_array(indices, sizeof(*p->mv), GFP_KERNEL);
- if (!p->mv) {
- err = -ENOMEM;
- goto errout;
- }
- for (i = 0; i < indices; i++) {
- p->mv[i].mask = 0xff;
- p->mv[i].value = 0;
- }
- p->indices = indices;
- p->default_index = default_index;
- p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]);
-
- p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle,
- NULL);
- if (p->q == NULL)
- p->q = &noop_qdisc;
- else
- qdisc_hash_add(p->q, true);
-
- pr_debug("%s: qdisc %p\n", __func__, p->q);
-
- err = 0;
-errout:
- return err;
-}
-
-static void dsmark_reset(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
- if (p->q)
- qdisc_reset(p->q);
-}
-
-static void dsmark_destroy(struct Qdisc *sch)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
-
- pr_debug("%s(sch %p,[qdisc %p])\n", __func__, sch, p);
-
- tcf_block_put(p->block);
- qdisc_put(p->q);
- if (p->mv != p->embedded)
- kfree(p->mv);
-}
-
-static int dsmark_dump_class(struct Qdisc *sch, unsigned long cl,
- struct sk_buff *skb, struct tcmsg *tcm)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- pr_debug("%s(sch %p,[qdisc %p],class %ld\n", __func__, sch, p, cl);
-
- if (!dsmark_valid_index(p, cl))
- return -EINVAL;
-
- tcm->tcm_handle = TC_H_MAKE(TC_H_MAJ(sch->handle), cl - 1);
- tcm->tcm_info = p->q->handle;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u8(skb, TCA_DSMARK_MASK, p->mv[cl - 1].mask) ||
- nla_put_u8(skb, TCA_DSMARK_VALUE, p->mv[cl - 1].value))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static int dsmark_dump(struct Qdisc *sch, struct sk_buff *skb)
-{
- struct dsmark_qdisc_data *p = qdisc_priv(sch);
- struct nlattr *opts = NULL;
-
- opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
- if (opts == NULL)
- goto nla_put_failure;
- if (nla_put_u16(skb, TCA_DSMARK_INDICES, p->indices))
- goto nla_put_failure;
-
- if (p->default_index != NO_DEFAULT_INDEX &&
- nla_put_u16(skb, TCA_DSMARK_DEFAULT_INDEX, p->default_index))
- goto nla_put_failure;
-
- if (p->set_tc_index &&
- nla_put_flag(skb, TCA_DSMARK_SET_TC_INDEX))
- goto nla_put_failure;
-
- return nla_nest_end(skb, opts);
-
-nla_put_failure:
- nla_nest_cancel(skb, opts);
- return -EMSGSIZE;
-}
-
-static const struct Qdisc_class_ops dsmark_class_ops = {
- .graft = dsmark_graft,
- .leaf = dsmark_leaf,
- .find = dsmark_find,
- .change = dsmark_change,
- .delete = dsmark_delete,
- .walk = dsmark_walk,
- .tcf_block = dsmark_tcf_block,
- .bind_tcf = dsmark_bind_filter,
- .unbind_tcf = dsmark_unbind_filter,
- .dump = dsmark_dump_class,
-};
-
-static struct Qdisc_ops dsmark_qdisc_ops __read_mostly = {
- .next = NULL,
- .cl_ops = &dsmark_class_ops,
- .id = "dsmark",
- .priv_size = sizeof(struct dsmark_qdisc_data),
- .enqueue = dsmark_enqueue,
- .dequeue = dsmark_dequeue,
- .peek = dsmark_peek,
- .init = dsmark_init,
- .reset = dsmark_reset,
- .destroy = dsmark_destroy,
- .change = NULL,
- .dump = dsmark_dump,
- .owner = THIS_MODULE,
-};
-
-static int __init dsmark_module_init(void)
-{
- return register_qdisc(&dsmark_qdisc_ops);
-}
-
-static void __exit dsmark_module_exit(void)
-{
- unregister_qdisc(&dsmark_qdisc_ops);
-}
-
-module_init(dsmark_module_init)
-module_exit(dsmark_module_exit)
-
-MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_gred.c b/net/sched/sch_gred.c
index a661b062cca8..872d127c9db4 100644
--- a/net/sched/sch_gred.c
+++ b/net/sched/sch_gred.c
@@ -377,6 +377,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
/* Even if driver returns failure adjust the stats - in case offload
* ended but driver still wants to adjust the values.
*/
+ sch_tree_lock(sch);
for (i = 0; i < MAX_DPs; i++) {
if (!table->tab[i])
continue;
@@ -393,6 +394,7 @@ static int gred_offload_dump_stats(struct Qdisc *sch)
sch->qstats.overlimits += hw_stats->stats.qstats[i].overlimits;
}
_bstats_update(&sch->bstats, bytes, packets);
+ sch_tree_unlock(sch);
kfree(hw_stats);
return ret;
diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c
index e5b4bbf3ce3d..92f2975b6a82 100644
--- a/net/sched/sch_htb.c
+++ b/net/sched/sch_htb.c
@@ -199,8 +199,14 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
{
return (unsigned long)htb_find(handle, sch);
}
+
+#define HTB_DIRECT ((struct htb_class *)-1L)
+
/**
* htb_classify - classify a packet into class
+ * @skb: the socket buffer
+ * @sch: the active queue discipline
+ * @qerr: pointer for returned status code
*
* It returns NULL if the packet should be dropped or -1 if the packet
* should be passed directly thru. In all other cases leaf class is returned.
@@ -211,8 +217,6 @@ static unsigned long htb_search(struct Qdisc *sch, u32 handle)
* have no valid leaf we try to use MAJOR:default leaf. It still unsuccessful
* then finish and return direct queue.
*/
-#define HTB_DIRECT ((struct htb_class *)-1L)
-
static struct htb_class *htb_classify(struct sk_buff *skb, struct Qdisc *sch,
int *qerr)
{
@@ -427,7 +431,10 @@ static void htb_activate_prios(struct htb_sched *q, struct htb_class *cl)
while (cl->cmode == HTB_MAY_BORROW && p && mask) {
m = mask;
while (m) {
- int prio = ffz(~m);
+ unsigned int prio = ffz(~m);
+
+ if (WARN_ON_ONCE(prio >= ARRAY_SIZE(p->inner.clprio)))
+ break;
m &= ~(1 << prio);
if (p->inner.clprio[prio].feed.rb_node)
@@ -1545,7 +1552,7 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
struct tc_htb_qopt_offload offload_opt;
struct netdev_queue *dev_queue;
struct Qdisc *q = cl->leaf.q;
- struct Qdisc *old = NULL;
+ struct Qdisc *old;
int err;
if (cl->level)
@@ -1553,14 +1560,17 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
WARN_ON(!q);
dev_queue = htb_offload_get_queue(cl);
- old = htb_graft_helper(dev_queue, NULL);
- if (destroying)
- /* Before HTB is destroyed, the kernel grafts noop_qdisc to
- * all queues.
+ /* When destroying, caller qdisc_graft grafts the new qdisc and invokes
+ * qdisc_put for the qdisc being destroyed. htb_destroy_class_offload
+ * does not need to graft or qdisc_put the qdisc being destroyed.
+ */
+ if (!destroying) {
+ old = htb_graft_helper(dev_queue, NULL);
+ /* Last qdisc grafted should be the same as cl->leaf.q when
+ * calling htb_delete.
*/
- WARN_ON(!(old->flags & TCQ_F_BUILTIN));
- else
WARN_ON(old != q);
+ }
if (cl->parent) {
_bstats_update(&cl->parent->bstats_bias,
@@ -1577,10 +1587,12 @@ static int htb_destroy_class_offload(struct Qdisc *sch, struct htb_class *cl,
};
err = htb_offload(qdisc_dev(sch), &offload_opt);
- if (!err || destroying)
- qdisc_put(old);
- else
- htb_graft_helper(dev_queue, old);
+ if (!destroying) {
+ if (!err)
+ qdisc_put(old);
+ else
+ htb_graft_helper(dev_queue, old);
+ }
if (last_child)
return err;
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 4c68abaa289b..48ed87b91086 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -17,6 +17,8 @@
#include <net/sch_generic.h>
#include <net/pkt_cls.h>
+#include "sch_mqprio_lib.h"
+
struct mqprio_sched {
struct Qdisc **qdiscs;
u16 mode;
@@ -27,6 +29,62 @@ struct mqprio_sched {
u64 max_rate[TC_QOPT_MAX_QUEUE];
};
+static int mqprio_enable_offload(struct Qdisc *sch,
+ const struct tc_mqprio_qopt *qopt,
+ struct netlink_ext_ack *extack)
+{
+ struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int err, i;
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
+ return -EINVAL;
+ break;
+ case TC_MQPRIO_MODE_CHANNEL:
+ mqprio.flags = priv->flags;
+ if (priv->flags & TC_MQPRIO_F_MODE)
+ mqprio.mode = priv->mode;
+ if (priv->flags & TC_MQPRIO_F_SHAPER)
+ mqprio.shaper = priv->shaper;
+ if (priv->flags & TC_MQPRIO_F_MIN_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.min_rate[i] = priv->min_rate[i];
+ if (priv->flags & TC_MQPRIO_F_MAX_RATE)
+ for (i = 0; i < mqprio.qopt.num_tc; i++)
+ mqprio.max_rate[i] = priv->max_rate[i];
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ if (err)
+ return err;
+
+ priv->hw_offload = mqprio.qopt.hw;
+
+ return 0;
+}
+
+static void mqprio_disable_offload(struct Qdisc *sch)
+{
+ struct tc_mqprio_qopt_offload mqprio = { { 0 } };
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+
+ switch (priv->mode) {
+ case TC_MQPRIO_MODE_DCB:
+ case TC_MQPRIO_MODE_CHANNEL:
+ dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_QDISC_MQPRIO,
+ &mqprio);
+ break;
+ }
+}
+
static void mqprio_destroy(struct Qdisc *sch)
{
struct net_device *dev = qdisc_dev(sch);
@@ -41,37 +99,17 @@ static void mqprio_destroy(struct Qdisc *sch)
kfree(priv->qdiscs);
}
- if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc) {
- struct tc_mqprio_qopt_offload mqprio = { { 0 } };
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- case TC_MQPRIO_MODE_CHANNEL:
- dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
- break;
- default:
- return;
- }
- } else {
+ if (priv->hw_offload && dev->netdev_ops->ndo_setup_tc)
+ mqprio_disable_offload(sch);
+ else
netdev_set_num_tc(dev, 0);
- }
}
-static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ const struct tc_mqprio_caps *caps,
+ struct netlink_ext_ack *extack)
{
- int i, j;
-
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE)
- return -EINVAL;
-
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i < TC_BITMASK + 1; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc)
- return -EINVAL;
- }
+ int err;
/* Limit qopt->hw to maximum supported offload value. Drivers have
* the option of overriding this later if they don't support the a
@@ -80,31 +118,23 @@ static int mqprio_parse_opt(struct net_device *dev, struct tc_mqprio_qopt *qopt)
if (qopt->hw > TC_MQPRIO_HW_OFFLOAD_MAX)
qopt->hw = TC_MQPRIO_HW_OFFLOAD_MAX;
- /* If hardware offload is requested we will leave it to the device
- * to either populate the queue counts itself or to validate the
- * provided queue counts. If ndo_setup_tc is not present then
- * hardware doesn't support offload and we should return an error.
+ /* If hardware offload is requested, we will leave 3 options to the
+ * device driver:
+ * - populate the queue counts itself (and ignore what was requested)
+ * - validate the provided queue counts by itself (and apply them)
+ * - request queue count validation here (and apply them)
*/
- if (qopt->hw)
- return dev->netdev_ops->ndo_setup_tc ? 0 : -EINVAL;
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->real_num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues)
- return -EINVAL;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j])
- return -EINVAL;
- }
- }
+ err = mqprio_validate_qopt(dev, qopt,
+ !qopt->hw || caps->validate_queue_counts,
+ false, extack);
+ if (err)
+ return err;
+
+ /* If ndo_setup_tc is not present then hardware doesn't support offload
+ * and we should return an error.
+ */
+ if (qopt->hw && !dev->netdev_ops->ndo_setup_tc)
+ return -EINVAL;
return 0;
}
@@ -130,6 +160,67 @@ static int parse_attr(struct nlattr *tb[], int maxtype, struct nlattr *nla,
return 0;
}
+static int mqprio_parse_nlattr(struct Qdisc *sch, struct tc_mqprio_qopt *qopt,
+ struct nlattr *opt)
+{
+ struct mqprio_sched *priv = qdisc_priv(sch);
+ struct nlattr *tb[TCA_MQPRIO_MAX + 1];
+ struct nlattr *attr;
+ int i, rem, err;
+
+ err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
+ sizeof(*qopt));
+ if (err < 0)
+ return err;
+
+ if (!qopt->hw)
+ return -EINVAL;
+
+ if (tb[TCA_MQPRIO_MODE]) {
+ priv->flags |= TC_MQPRIO_F_MODE;
+ priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
+ }
+
+ if (tb[TCA_MQPRIO_SHAPER]) {
+ priv->flags |= TC_MQPRIO_F_SHAPER;
+ priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
+ }
+
+ if (tb[TCA_MQPRIO_MIN_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->min_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MIN_RATE;
+ }
+
+ if (tb[TCA_MQPRIO_MAX_RATE64]) {
+ if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
+ return -EINVAL;
+ i = 0;
+ nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
+ rem) {
+ if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
+ return -EINVAL;
+ if (i >= qopt->num_tc)
+ break;
+ priv->max_rate[i] = *(u64 *)nla_data(attr);
+ i++;
+ }
+ priv->flags |= TC_MQPRIO_F_MAX_RATE;
+ }
+
+ return 0;
+}
+
static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
@@ -139,9 +230,7 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
struct Qdisc *qdisc;
int i, err = -EOPNOTSUPP;
struct tc_mqprio_qopt *qopt = NULL;
- struct nlattr *tb[TCA_MQPRIO_MAX + 1];
- struct nlattr *attr;
- int rem;
+ struct tc_mqprio_caps caps;
int len;
BUILD_BUG_ON(TC_MAX_QUEUE != TC_QOPT_MAX_QUEUE);
@@ -160,61 +249,18 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
if (!opt || nla_len(opt) < sizeof(*qopt))
return -EINVAL;
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_MQPRIO,
+ &caps, sizeof(caps));
+
qopt = nla_data(opt);
- if (mqprio_parse_opt(dev, qopt))
+ if (mqprio_parse_opt(dev, qopt, &caps, extack))
return -EINVAL;
len = nla_len(opt) - NLA_ALIGN(sizeof(*qopt));
if (len > 0) {
- err = parse_attr(tb, TCA_MQPRIO_MAX, opt, mqprio_policy,
- sizeof(*qopt));
- if (err < 0)
+ err = mqprio_parse_nlattr(sch, qopt, opt);
+ if (err)
return err;
-
- if (!qopt->hw)
- return -EINVAL;
-
- if (tb[TCA_MQPRIO_MODE]) {
- priv->flags |= TC_MQPRIO_F_MODE;
- priv->mode = *(u16 *)nla_data(tb[TCA_MQPRIO_MODE]);
- }
-
- if (tb[TCA_MQPRIO_SHAPER]) {
- priv->flags |= TC_MQPRIO_F_SHAPER;
- priv->shaper = *(u16 *)nla_data(tb[TCA_MQPRIO_SHAPER]);
- }
-
- if (tb[TCA_MQPRIO_MIN_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MIN_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MIN_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->min_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MIN_RATE;
- }
-
- if (tb[TCA_MQPRIO_MAX_RATE64]) {
- if (priv->shaper != TC_MQPRIO_SHAPER_BW_RATE)
- return -EINVAL;
- i = 0;
- nla_for_each_nested(attr, tb[TCA_MQPRIO_MAX_RATE64],
- rem) {
- if (nla_type(attr) != TCA_MQPRIO_MAX_RATE64)
- return -EINVAL;
- if (i >= qopt->num_tc)
- break;
- priv->max_rate[i] = *(u64 *)nla_data(attr);
- i++;
- }
- priv->flags |= TC_MQPRIO_F_MAX_RATE;
- }
}
/* pre-allocate qdisc, attachment can't fail */
@@ -241,36 +287,9 @@ static int mqprio_init(struct Qdisc *sch, struct nlattr *opt,
* supplied and verified mapping
*/
if (qopt->hw) {
- struct tc_mqprio_qopt_offload mqprio = {.qopt = *qopt};
-
- switch (priv->mode) {
- case TC_MQPRIO_MODE_DCB:
- if (priv->shaper != TC_MQPRIO_SHAPER_DCB)
- return -EINVAL;
- break;
- case TC_MQPRIO_MODE_CHANNEL:
- mqprio.flags = priv->flags;
- if (priv->flags & TC_MQPRIO_F_MODE)
- mqprio.mode = priv->mode;
- if (priv->flags & TC_MQPRIO_F_SHAPER)
- mqprio.shaper = priv->shaper;
- if (priv->flags & TC_MQPRIO_F_MIN_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.min_rate[i] = priv->min_rate[i];
- if (priv->flags & TC_MQPRIO_F_MAX_RATE)
- for (i = 0; i < mqprio.qopt.num_tc; i++)
- mqprio.max_rate[i] = priv->max_rate[i];
- break;
- default:
- return -EINVAL;
- }
- err = dev->netdev_ops->ndo_setup_tc(dev,
- TC_SETUP_QDISC_MQPRIO,
- &mqprio);
+ err = mqprio_enable_offload(sch, qopt, extack);
if (err)
return err;
-
- priv->hw_offload = mqprio.qopt.hw;
} else {
netdev_set_num_tc(dev, qopt->num_tc);
for (i = 0; i < qopt->num_tc; i++)
@@ -387,7 +406,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct nlattr *nla = (struct nlattr *)skb_tail_pointer(skb);
struct tc_mqprio_qopt opt = { 0 };
struct Qdisc *qdisc;
- unsigned int ntx, tc;
+ unsigned int ntx;
sch->q.qlen = 0;
gnet_stats_basic_sync_init(&sch->bstats);
@@ -411,15 +430,9 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
spin_unlock_bh(qdisc_lock(qdisc));
}
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
+ mqprio_qopt_reconstruct(dev, &opt);
opt.hw = priv->hw_offload;
- for (tc = 0; tc < netdev_get_num_tc(dev); tc++) {
- opt.count[tc] = dev->tc_to_txq[tc].count;
- opt.offset[tc] = dev->tc_to_txq[tc].offset;
- }
-
if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
goto nla_put_failure;
diff --git a/net/sched/sch_mqprio_lib.c b/net/sched/sch_mqprio_lib.c
new file mode 100644
index 000000000000..c58a533b8ec5
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/net.h>
+#include <linux/netdevice.h>
+#include <linux/netlink.h>
+#include <linux/types.h>
+#include <net/pkt_sched.h>
+
+#include "sch_mqprio_lib.h"
+
+/* Returns true if the intervals [a, b) and [c, d) overlap. */
+static bool intervals_overlap(int a, int b, int c, int d)
+{
+ int left = max(a, c), right = min(b, d);
+
+ return left < right;
+}
+
+static int mqprio_validate_queue_counts(struct net_device *dev,
+ const struct tc_mqprio_qopt *qopt,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, j;
+
+ for (i = 0; i < qopt->num_tc; i++) {
+ unsigned int last = qopt->offset[i] + qopt->count[i];
+
+ if (!qopt->count[i]) {
+ NL_SET_ERR_MSG_FMT_MOD(extack, "No queues for TC %d",
+ i);
+ return -EINVAL;
+ }
+
+ /* Verify the queue count is in tx range being equal to the
+ * real_num_tx_queues indicates the last queue is in use.
+ */
+ if (qopt->offset[i] >= dev->real_num_tx_queues ||
+ last > dev->real_num_tx_queues) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "Queues %d:%d for TC %d exceed the %d TX queues available",
+ qopt->count[i], qopt->offset[i],
+ i, dev->real_num_tx_queues);
+ return -EINVAL;
+ }
+
+ if (allow_overlapping_txqs)
+ continue;
+
+ /* Verify that the offset and counts do not overlap */
+ for (j = i + 1; j < qopt->num_tc; j++) {
+ if (intervals_overlap(qopt->offset[i], last,
+ qopt->offset[j],
+ qopt->offset[j] +
+ qopt->count[j])) {
+ NL_SET_ERR_MSG_FMT_MOD(extack,
+ "TC %d queues %d@%d overlap with TC %d queues %d@%d",
+ i, qopt->count[i], qopt->offset[i],
+ j, qopt->count[j], qopt->offset[j]);
+ return -EINVAL;
+ }
+ }
+ }
+
+ return 0;
+}
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack)
+{
+ int i, err;
+
+ /* Verify num_tc is not out of max range */
+ if (qopt->num_tc > TC_MAX_QUEUE) {
+ NL_SET_ERR_MSG(extack,
+ "Number of traffic classes is outside valid range");
+ return -EINVAL;
+ }
+
+ /* Verify priority mapping uses valid tcs */
+ for (i = 0; i <= TC_BITMASK; i++) {
+ if (qopt->prio_tc_map[i] >= qopt->num_tc) {
+ NL_SET_ERR_MSG(extack,
+ "Invalid traffic class in priority to traffic class mapping");
+ return -EINVAL;
+ }
+ }
+
+ if (validate_queue_counts) {
+ err = mqprio_validate_queue_counts(dev, qopt,
+ allow_overlapping_txqs,
+ extack);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+EXPORT_SYMBOL_GPL(mqprio_validate_qopt);
+
+void mqprio_qopt_reconstruct(struct net_device *dev, struct tc_mqprio_qopt *qopt)
+{
+ int tc, num_tc = netdev_get_num_tc(dev);
+
+ qopt->num_tc = num_tc;
+ memcpy(qopt->prio_tc_map, dev->prio_tc_map, sizeof(qopt->prio_tc_map));
+
+ for (tc = 0; tc < num_tc; tc++) {
+ qopt->count[tc] = dev->tc_to_txq[tc].count;
+ qopt->offset[tc] = dev->tc_to_txq[tc].offset;
+ }
+}
+EXPORT_SYMBOL_GPL(mqprio_qopt_reconstruct);
+
+MODULE_LICENSE("GPL");
diff --git a/net/sched/sch_mqprio_lib.h b/net/sched/sch_mqprio_lib.h
new file mode 100644
index 000000000000..63f725ab8761
--- /dev/null
+++ b/net/sched/sch_mqprio_lib.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __SCH_MQPRIO_LIB_H
+#define __SCH_MQPRIO_LIB_H
+
+#include <linux/types.h>
+
+struct net_device;
+struct netlink_ext_ack;
+struct tc_mqprio_qopt;
+
+int mqprio_validate_qopt(struct net_device *dev, struct tc_mqprio_qopt *qopt,
+ bool validate_queue_counts,
+ bool allow_overlapping_txqs,
+ struct netlink_ext_ack *extack);
+void mqprio_qopt_reconstruct(struct net_device *dev,
+ struct tc_mqprio_qopt *qopt);
+
+#endif
diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c
index 570389f6cdd7..1f469861eae3 100644
--- a/net/sched/sch_taprio.c
+++ b/net/sched/sch_taprio.c
@@ -26,7 +26,11 @@
#include <net/sock.h>
#include <net/tcp.h>
+#include "sch_mqprio_lib.h"
+
static LIST_HEAD(taprio_list);
+static struct static_key_false taprio_have_broken_mqprio;
+static struct static_key_false taprio_have_working_mqprio;
#define TAPRIO_ALL_GATES_OPEN -1
@@ -35,15 +39,19 @@ static LIST_HEAD(taprio_list);
#define TAPRIO_FLAGS_INVALID U32_MAX
struct sched_entry {
- struct list_head list;
-
- /* The instant that this entry "closes" and the next one
- * should open, the qdisc will make some effort so that no
- * packet leaves after this time.
+ /* Durations between this GCL entry and the GCL entry where the
+ * respective traffic class gate closes
*/
- ktime_t close_time;
+ u64 gate_duration[TC_MAX_QUEUE];
+ atomic_t budget[TC_MAX_QUEUE];
+ /* The qdisc makes some effort so that no packet leaves
+ * after this time
+ */
+ ktime_t gate_close_time[TC_MAX_QUEUE];
+ struct list_head list;
+ /* Used to calculate when to advance the schedule */
+ ktime_t end_time;
ktime_t next_txtime;
- atomic_t budget;
int index;
u32 gate_mask;
u32 interval;
@@ -51,10 +59,16 @@ struct sched_entry {
};
struct sched_gate_list {
+ /* Longest non-zero contiguous gate durations per traffic class,
+ * or 0 if a traffic class gate never opens during the schedule.
+ */
+ u64 max_open_gate_duration[TC_MAX_QUEUE];
+ u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
+ u32 max_sdu[TC_MAX_QUEUE]; /* for dump */
struct rcu_head rcu;
struct list_head entries;
size_t num_entries;
- ktime_t cycle_close_time;
+ ktime_t cycle_end_time;
s64 cycle_time;
s64 cycle_time_extension;
s64 base_time;
@@ -67,6 +81,8 @@ struct taprio_sched {
enum tk_offsets tk_offset;
int clockid;
bool offloaded;
+ bool detected_mqprio;
+ bool broken_mqprio;
atomic64_t picos_per_byte; /* Using picoseconds because for 10Gbps+
* speeds it's sub-nanoseconds per byte
*/
@@ -78,8 +94,8 @@ struct taprio_sched {
struct sched_gate_list __rcu *admin_sched;
struct hrtimer advance_timer;
struct list_head taprio_list;
- u32 max_frm_len[TC_MAX_QUEUE]; /* for the fast path */
- u32 max_sdu[TC_MAX_QUEUE]; /* for dump and offloading */
+ int cur_txq[TC_MAX_QUEUE];
+ u32 max_sdu[TC_MAX_QUEUE]; /* save info from the user */
u32 txtime_delay;
};
@@ -88,6 +104,57 @@ struct __tc_taprio_qopt_offload {
struct tc_taprio_qopt_offload offload;
};
+static void taprio_calculate_gate_durations(struct taprio_sched *q,
+ struct sched_gate_list *sched)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ struct sched_entry *entry, *cur;
+ int tc;
+
+ list_for_each_entry(entry, &sched->entries, list) {
+ u32 gates_still_open = entry->gate_mask;
+
+ /* For each traffic class, calculate each open gate duration,
+ * starting at this schedule entry and ending at the schedule
+ * entry containing a gate close event for that TC.
+ */
+ cur = entry;
+
+ do {
+ if (!gates_still_open)
+ break;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (!(gates_still_open & BIT(tc)))
+ continue;
+
+ if (cur->gate_mask & BIT(tc))
+ entry->gate_duration[tc] += cur->interval;
+ else
+ gates_still_open &= ~BIT(tc);
+ }
+
+ cur = list_next_entry_circular(cur, &sched->entries, list);
+ } while (cur != entry);
+
+ /* Keep track of the maximum gate duration for each traffic
+ * class, taking care to not confuse a traffic class which is
+ * temporarily closed with one that is always closed.
+ */
+ for (tc = 0; tc < num_tc; tc++)
+ if (entry->gate_duration[tc] &&
+ sched->max_open_gate_duration[tc] < entry->gate_duration[tc])
+ sched->max_open_gate_duration[tc] = entry->gate_duration[tc];
+ }
+}
+
+static bool taprio_entry_allows_tx(ktime_t skb_end_time,
+ struct sched_entry *entry, int tc)
+{
+ return ktime_before(skb_end_time, entry->gate_close_time[tc]);
+}
+
static ktime_t sched_base_time(const struct sched_gate_list *sched)
{
if (!sched)
@@ -180,6 +247,63 @@ static int length_to_duration(struct taprio_sched *q, int len)
return div_u64(len * atomic64_read(&q->picos_per_byte), PSEC_PER_NSEC);
}
+static int duration_to_length(struct taprio_sched *q, u64 duration)
+{
+ return div_u64(duration * PSEC_PER_NSEC, atomic64_read(&q->picos_per_byte));
+}
+
+/* Sets sched->max_sdu[] and sched->max_frm_len[] to the minimum between the
+ * q->max_sdu[] requested by the user and the max_sdu dynamically determined by
+ * the maximum open gate durations at the given link speed.
+ */
+static void taprio_update_queue_max_sdu(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct qdisc_size_table *stab)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ u32 max_sdu_from_user;
+ u32 max_sdu_dynamic;
+ u32 max_sdu;
+ int tc;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ max_sdu_from_user = q->max_sdu[tc] ?: U32_MAX;
+
+ /* TC gate never closes => keep the queueMaxSDU
+ * selected by the user
+ */
+ if (sched->max_open_gate_duration[tc] == sched->cycle_time) {
+ max_sdu_dynamic = U32_MAX;
+ } else {
+ u32 max_frm_len;
+
+ max_frm_len = duration_to_length(q, sched->max_open_gate_duration[tc]);
+ /* Compensate for L1 overhead from size table,
+ * but don't let the frame size go negative
+ */
+ if (stab) {
+ max_frm_len -= stab->szopts.overhead;
+ max_frm_len = max_t(int, max_frm_len,
+ dev->hard_header_len + 1);
+ }
+ max_sdu_dynamic = max_frm_len - dev->hard_header_len;
+ if (max_sdu_dynamic > dev->max_mtu)
+ max_sdu_dynamic = U32_MAX;
+ }
+
+ max_sdu = min(max_sdu_dynamic, max_sdu_from_user);
+
+ if (max_sdu != U32_MAX) {
+ sched->max_frm_len[tc] = max_sdu + dev->hard_header_len;
+ sched->max_sdu[tc] = max_sdu;
+ } else {
+ sched->max_frm_len[tc] = U32_MAX; /* never oversized */
+ sched->max_sdu[tc] = 0;
+ }
+ }
+}
+
/* Returns the entry corresponding to next available interval. If
* validate_interval is set, it only validates whether the timestamp occurs
* when the gate corresponding to the skb's traffic class is open.
@@ -413,14 +537,33 @@ done:
return txtime;
}
-static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
- struct Qdisc *child, struct sk_buff **to_free)
+/* Devices with full offload are expected to honor this in hardware */
+static bool taprio_skb_exceeds_queue_max_sdu(struct Qdisc *sch,
+ struct sk_buff *skb)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
+ struct sched_gate_list *sched;
int prio = skb->priority;
+ bool exceeds = false;
u8 tc;
+ tc = netdev_get_prio_tc_map(dev, prio);
+
+ rcu_read_lock();
+ sched = rcu_dereference(q->oper_sched);
+ if (sched && skb->len > sched->max_frm_len[tc])
+ exceeds = true;
+ rcu_read_unlock();
+
+ return exceeds;
+}
+
+static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child, struct sk_buff **to_free)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+
/* sk_flags are only safe to use on full sockets. */
if (skb->sk && sk_fullsock(skb->sk) && sock_flag(skb->sk, SOCK_TXTIME)) {
if (!is_valid_interval(skb, sch))
@@ -431,17 +574,53 @@ static int taprio_enqueue_one(struct sk_buff *skb, struct Qdisc *sch,
return qdisc_drop(skb, sch, to_free);
}
- /* Devices with full offload are expected to honor this in hardware */
- tc = netdev_get_prio_tc_map(dev, prio);
- if (skb->len > q->max_frm_len[tc])
- return qdisc_drop(skb, sch, to_free);
-
qdisc_qstats_backlog_inc(sch, skb);
sch->q.qlen++;
return qdisc_enqueue(skb, child, to_free);
}
+static int taprio_enqueue_segmented(struct sk_buff *skb, struct Qdisc *sch,
+ struct Qdisc *child,
+ struct sk_buff **to_free)
+{
+ unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
+ netdev_features_t features = netif_skb_features(skb);
+ struct sk_buff *segs, *nskb;
+ int ret;
+
+ segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
+ if (IS_ERR_OR_NULL(segs))
+ return qdisc_drop(skb, sch, to_free);
+
+ skb_list_walk_safe(segs, segs, nskb) {
+ skb_mark_not_on_list(segs);
+ qdisc_skb_cb(segs)->pkt_len = segs->len;
+ slen += segs->len;
+
+ /* FIXME: we should be segmenting to a smaller size
+ * rather than dropping these
+ */
+ if (taprio_skb_exceeds_queue_max_sdu(sch, segs))
+ ret = qdisc_drop(segs, sch, to_free);
+ else
+ ret = taprio_enqueue_one(segs, sch, child, to_free);
+
+ if (ret != NET_XMIT_SUCCESS) {
+ if (net_xmit_drop_count(ret))
+ qdisc_qstats_drop(sch);
+ } else {
+ numsegs++;
+ }
+ }
+
+ if (numsegs > 1)
+ qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
+ consume_skb(skb);
+
+ return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+}
+
/* Will not be called in the full offload case, since the TX queues are
* attached to the Qdisc created using qdisc_create_dflt()
*/
@@ -458,97 +637,190 @@ static int taprio_enqueue(struct sk_buff *skb, struct Qdisc *sch,
if (unlikely(!child))
return qdisc_drop(skb, sch, to_free);
- /* Large packets might not be transmitted when the transmission duration
- * exceeds any configured interval. Therefore, segment the skb into
- * smaller chunks. Drivers with full offload are expected to handle
- * this in hardware.
- */
- if (skb_is_gso(skb)) {
- unsigned int slen = 0, numsegs = 0, len = qdisc_pkt_len(skb);
- netdev_features_t features = netif_skb_features(skb);
- struct sk_buff *segs, *nskb;
- int ret;
-
- segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
- if (IS_ERR_OR_NULL(segs))
- return qdisc_drop(skb, sch, to_free);
+ if (taprio_skb_exceeds_queue_max_sdu(sch, skb)) {
+ /* Large packets might not be transmitted when the transmission
+ * duration exceeds any configured interval. Therefore, segment
+ * the skb into smaller chunks. Drivers with full offload are
+ * expected to handle this in hardware.
+ */
+ if (skb_is_gso(skb))
+ return taprio_enqueue_segmented(skb, sch, child,
+ to_free);
- skb_list_walk_safe(segs, segs, nskb) {
- skb_mark_not_on_list(segs);
- qdisc_skb_cb(segs)->pkt_len = segs->len;
- slen += segs->len;
+ return qdisc_drop(skb, sch, to_free);
+ }
- ret = taprio_enqueue_one(segs, sch, child, to_free);
- if (ret != NET_XMIT_SUCCESS) {
- if (net_xmit_drop_count(ret))
- qdisc_qstats_drop(sch);
- } else {
- numsegs++;
- }
- }
+ return taprio_enqueue_one(skb, sch, child, to_free);
+}
- if (numsegs > 1)
- qdisc_tree_reduce_backlog(sch, 1 - numsegs, len - slen);
- consume_skb(skb);
+static struct sk_buff *taprio_peek(struct Qdisc *sch)
+{
+ WARN_ONCE(1, "taprio only supports operating as root qdisc, peek() not implemented");
+ return NULL;
+}
+
+static void taprio_set_budgets(struct taprio_sched *q,
+ struct sched_gate_list *sched,
+ struct sched_entry *entry)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
+ int tc, budget;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ /* Traffic classes which never close have infinite budget */
+ if (entry->gate_duration[tc] == sched->cycle_time)
+ budget = INT_MAX;
+ else
+ budget = div64_u64((u64)entry->gate_duration[tc] * PSEC_PER_NSEC,
+ atomic64_read(&q->picos_per_byte));
+
+ atomic_set(&entry->budget[tc], budget);
+ }
+}
- return numsegs > 0 ? NET_XMIT_SUCCESS : NET_XMIT_DROP;
+/* When an skb is sent, it consumes from the budget of all traffic classes */
+static int taprio_update_budgets(struct sched_entry *entry, size_t len,
+ int tc_consumed, int num_tc)
+{
+ int tc, budget, new_budget = 0;
+
+ for (tc = 0; tc < num_tc; tc++) {
+ budget = atomic_read(&entry->budget[tc]);
+ /* Don't consume from infinite budget */
+ if (budget == INT_MAX) {
+ if (tc == tc_consumed)
+ new_budget = budget;
+ continue;
+ }
+
+ if (tc == tc_consumed)
+ new_budget = atomic_sub_return(len, &entry->budget[tc]);
+ else
+ atomic_sub(len, &entry->budget[tc]);
}
- return taprio_enqueue_one(skb, sch, child, to_free);
+ return new_budget;
}
-/* Will not be called in the full offload case, since the TX queues are
- * attached to the Qdisc created using qdisc_create_dflt()
- */
-static struct sk_buff *taprio_peek(struct Qdisc *sch)
+static struct sk_buff *taprio_dequeue_from_txq(struct Qdisc *sch, int txq,
+ struct sched_entry *entry,
+ u32 gate_mask)
{
struct taprio_sched *q = qdisc_priv(sch);
struct net_device *dev = qdisc_dev(sch);
- struct sched_entry *entry;
+ struct Qdisc *child = q->qdiscs[txq];
+ int num_tc = netdev_get_num_tc(dev);
struct sk_buff *skb;
- u32 gate_mask;
- int i;
+ ktime_t guard;
+ int prio;
+ int len;
+ u8 tc;
- rcu_read_lock();
- entry = rcu_dereference(q->current_entry);
- gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
- rcu_read_unlock();
+ if (unlikely(!child))
+ return NULL;
- if (!gate_mask)
+ if (TXTIME_ASSIST_IS_ENABLED(q->flags))
+ goto skip_peek_checks;
+
+ skb = child->ops->peek(child);
+ if (!skb)
return NULL;
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
- int prio;
- u8 tc;
+ prio = skb->priority;
+ tc = netdev_get_prio_tc_map(dev, prio);
- if (unlikely(!child))
- continue;
+ if (!(gate_mask & BIT(tc)))
+ return NULL;
- skb = child->ops->peek(child);
- if (!skb)
- continue;
+ len = qdisc_pkt_len(skb);
+ guard = ktime_add_ns(taprio_get_time(q), length_to_duration(q, len));
- if (TXTIME_ASSIST_IS_ENABLED(q->flags))
- return skb;
+ /* In the case that there's no gate entry, there's no
+ * guard band ...
+ */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ !taprio_entry_allows_tx(guard, entry, tc))
+ return NULL;
+
+ /* ... and no budget. */
+ if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
+ taprio_update_budgets(entry, len, tc, num_tc) < 0)
+ return NULL;
+
+skip_peek_checks:
+ skb = child->ops->dequeue(child);
+ if (unlikely(!skb))
+ return NULL;
+
+ qdisc_bstats_update(sch, skb);
+ qdisc_qstats_backlog_dec(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+static void taprio_next_tc_txq(struct net_device *dev, int tc, int *txq)
+{
+ int offset = dev->tc_to_txq[tc].offset;
+ int count = dev->tc_to_txq[tc].count;
- prio = skb->priority;
- tc = netdev_get_prio_tc_map(dev, prio);
+ (*txq)++;
+ if (*txq == offset + count)
+ *txq = offset;
+}
+
+/* Prioritize higher traffic classes, and select among TXQs belonging to the
+ * same TC using round robin
+ */
+static struct sk_buff *taprio_dequeue_tc_priority(struct Qdisc *sch,
+ struct sched_entry *entry,
+ u32 gate_mask)
+{
+ struct taprio_sched *q = qdisc_priv(sch);
+ struct net_device *dev = qdisc_dev(sch);
+ int num_tc = netdev_get_num_tc(dev);
+ struct sk_buff *skb;
+ int tc;
+
+ for (tc = num_tc - 1; tc >= 0; tc--) {
+ int first_txq = q->cur_txq[tc];
if (!(gate_mask & BIT(tc)))
continue;
- return skb;
+ do {
+ skb = taprio_dequeue_from_txq(sch, q->cur_txq[tc],
+ entry, gate_mask);
+
+ taprio_next_tc_txq(dev, tc, &q->cur_txq[tc]);
+
+ if (skb)
+ return skb;
+ } while (q->cur_txq[tc] != first_txq);
}
return NULL;
}
-static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
+/* Broken way of prioritizing smaller TXQ indices and ignoring the traffic
+ * class other than to determine whether the gate is open or not
+ */
+static struct sk_buff *taprio_dequeue_txq_priority(struct Qdisc *sch,
+ struct sched_entry *entry,
+ u32 gate_mask)
{
- atomic_set(&entry->budget,
- div64_u64((u64)entry->interval * PSEC_PER_NSEC,
- atomic64_read(&q->picos_per_byte)));
+ struct net_device *dev = qdisc_dev(sch);
+ struct sk_buff *skb;
+ int i;
+
+ for (i = 0; i < dev->num_tx_queues; i++) {
+ skb = taprio_dequeue_from_txq(sch, i, entry, gate_mask);
+ if (skb)
+ return skb;
+ }
+
+ return NULL;
}
/* Will not be called in the full offload case, since the TX queues are
@@ -557,11 +829,9 @@ static void taprio_set_budget(struct taprio_sched *q, struct sched_entry *entry)
static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
struct sk_buff *skb = NULL;
struct sched_entry *entry;
u32 gate_mask;
- int i;
rcu_read_lock();
entry = rcu_dereference(q->current_entry);
@@ -571,69 +841,23 @@ static struct sk_buff *taprio_dequeue(struct Qdisc *sch)
* "AdminGateStates"
*/
gate_mask = entry ? entry->gate_mask : TAPRIO_ALL_GATES_OPEN;
-
if (!gate_mask)
goto done;
- for (i = 0; i < dev->num_tx_queues; i++) {
- struct Qdisc *child = q->qdiscs[i];
- ktime_t guard;
- int prio;
- int len;
- u8 tc;
-
- if (unlikely(!child))
- continue;
-
- if (TXTIME_ASSIST_IS_ENABLED(q->flags)) {
- skb = child->ops->dequeue(child);
- if (!skb)
- continue;
- goto skb_found;
- }
-
- skb = child->ops->peek(child);
- if (!skb)
- continue;
-
- prio = skb->priority;
- tc = netdev_get_prio_tc_map(dev, prio);
-
- if (!(gate_mask & BIT(tc))) {
- skb = NULL;
- continue;
- }
-
- len = qdisc_pkt_len(skb);
- guard = ktime_add_ns(taprio_get_time(q),
- length_to_duration(q, len));
-
- /* In the case that there's no gate entry, there's no
- * guard band ...
- */
- if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- ktime_after(guard, entry->close_time)) {
- skb = NULL;
- continue;
- }
-
- /* ... and no budget. */
- if (gate_mask != TAPRIO_ALL_GATES_OPEN &&
- atomic_sub_return(len, &entry->budget) < 0) {
- skb = NULL;
- continue;
- }
-
- skb = child->ops->dequeue(child);
- if (unlikely(!skb))
- goto done;
-
-skb_found:
- qdisc_bstats_update(sch, skb);
- qdisc_qstats_backlog_dec(sch, skb);
- sch->q.qlen--;
-
- goto done;
+ if (static_branch_unlikely(&taprio_have_broken_mqprio) &&
+ !static_branch_likely(&taprio_have_working_mqprio)) {
+ /* Single NIC kind which is broken */
+ skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
+ } else if (static_branch_likely(&taprio_have_working_mqprio) &&
+ !static_branch_unlikely(&taprio_have_broken_mqprio)) {
+ /* Single NIC kind which prioritizes properly */
+ skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
+ } else {
+ /* Mixed NIC kinds present in system, need dynamic testing */
+ if (q->broken_mqprio)
+ skb = taprio_dequeue_txq_priority(sch, entry, gate_mask);
+ else
+ skb = taprio_dequeue_tc_priority(sch, entry, gate_mask);
}
done:
@@ -648,7 +872,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper,
if (list_is_last(&entry->list, &oper->entries))
return true;
- if (ktime_compare(entry->close_time, oper->cycle_close_time) == 0)
+ if (ktime_compare(entry->end_time, oper->cycle_end_time) == 0)
return true;
return false;
@@ -656,7 +880,7 @@ static bool should_restart_cycle(const struct sched_gate_list *oper,
static bool should_change_schedules(const struct sched_gate_list *admin,
const struct sched_gate_list *oper,
- ktime_t close_time)
+ ktime_t end_time)
{
ktime_t next_base_time, extension_time;
@@ -665,18 +889,18 @@ static bool should_change_schedules(const struct sched_gate_list *admin,
next_base_time = sched_base_time(admin);
- /* This is the simple case, the close_time would fall after
+ /* This is the simple case, the end_time would fall after
* the next schedule base_time.
*/
- if (ktime_compare(next_base_time, close_time) <= 0)
+ if (ktime_compare(next_base_time, end_time) <= 0)
return true;
- /* This is the cycle_time_extension case, if the close_time
+ /* This is the cycle_time_extension case, if the end_time
* plus the amount that can be extended would fall after the
* next schedule base_time, we can extend the current schedule
* for that amount.
*/
- extension_time = ktime_add_ns(close_time, oper->cycle_time_extension);
+ extension_time = ktime_add_ns(end_time, oper->cycle_time_extension);
/* FIXME: the IEEE 802.1Q-2018 Specification isn't clear about
* how precisely the extension should be made. So after
@@ -692,10 +916,13 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
{
struct taprio_sched *q = container_of(timer, struct taprio_sched,
advance_timer);
+ struct net_device *dev = qdisc_dev(q->root);
struct sched_gate_list *oper, *admin;
+ int num_tc = netdev_get_num_tc(dev);
struct sched_entry *entry, *next;
struct Qdisc *sch = q->root;
- ktime_t close_time;
+ ktime_t end_time;
+ int tc;
spin_lock(&q->current_entry_lock);
entry = rcu_dereference_protected(q->current_entry,
@@ -714,41 +941,49 @@ static enum hrtimer_restart advance_sched(struct hrtimer *timer)
* entry of all schedules are pre-calculated during the
* schedule initialization.
*/
- if (unlikely(!entry || entry->close_time == oper->base_time)) {
+ if (unlikely(!entry || entry->end_time == oper->base_time)) {
next = list_first_entry(&oper->entries, struct sched_entry,
list);
- close_time = next->close_time;
+ end_time = next->end_time;
goto first_run;
}
if (should_restart_cycle(oper, entry)) {
next = list_first_entry(&oper->entries, struct sched_entry,
list);
- oper->cycle_close_time = ktime_add_ns(oper->cycle_close_time,
- oper->cycle_time);
+ oper->cycle_end_time = ktime_add_ns(oper->cycle_end_time,
+ oper->cycle_time);
} else {
next = list_next_entry(entry, list);
}
- close_time = ktime_add_ns(entry->close_time, next->interval);
- close_time = min_t(ktime_t, close_time, oper->cycle_close_time);
+ end_time = ktime_add_ns(entry->end_time, next->interval);
+ end_time = min_t(ktime_t, end_time, oper->cycle_end_time);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (next->gate_duration[tc] == oper->cycle_time)
+ next->gate_close_time[tc] = KTIME_MAX;
+ else
+ next->gate_close_time[tc] = ktime_add_ns(entry->end_time,
+ next->gate_duration[tc]);
+ }
- if (should_change_schedules(admin, oper, close_time)) {
+ if (should_change_schedules(admin, oper, end_time)) {
/* Set things so the next time this runs, the new
* schedule runs.
*/
- close_time = sched_base_time(admin);
+ end_time = sched_base_time(admin);
switch_schedules(q, &admin, &oper);
}
- next->close_time = close_time;
- taprio_set_budget(q, next);
+ next->end_time = end_time;
+ taprio_set_budgets(q, oper, next);
first_run:
rcu_assign_pointer(q->current_entry, next);
spin_unlock(&q->current_entry_lock);
- hrtimer_set_expires(&q->advance_timer, close_time);
+ hrtimer_set_expires(&q->advance_timer, end_time);
rcu_read_lock();
__netif_schedule(sch);
@@ -916,6 +1151,8 @@ static int parse_taprio_schedule(struct taprio_sched *q, struct nlattr **tb,
new->cycle_time = cycle;
}
+ taprio_calculate_gate_durations(q, new);
+
return 0;
}
@@ -924,7 +1161,7 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
struct netlink_ext_ack *extack,
u32 taprio_flags)
{
- int i, j;
+ bool allow_overlapping_txqs = TXTIME_ASSIST_IS_ENABLED(taprio_flags);
if (!qopt && !dev->num_tc) {
NL_SET_ERR_MSG(extack, "'mqprio' configuration is necessary");
@@ -937,52 +1174,17 @@ static int taprio_parse_mqprio_opt(struct net_device *dev,
if (dev->num_tc)
return 0;
- /* Verify num_tc is not out of max range */
- if (qopt->num_tc > TC_MAX_QUEUE) {
- NL_SET_ERR_MSG(extack, "Number of traffic classes is outside valid range");
- return -EINVAL;
- }
-
/* taprio imposes that traffic classes map 1:n to tx queues */
if (qopt->num_tc > dev->num_tx_queues) {
NL_SET_ERR_MSG(extack, "Number of traffic classes is greater than number of HW queues");
return -EINVAL;
}
- /* Verify priority mapping uses valid tcs */
- for (i = 0; i <= TC_BITMASK; i++) {
- if (qopt->prio_tc_map[i] >= qopt->num_tc) {
- NL_SET_ERR_MSG(extack, "Invalid traffic class in priority to traffic class mapping");
- return -EINVAL;
- }
- }
-
- for (i = 0; i < qopt->num_tc; i++) {
- unsigned int last = qopt->offset[i] + qopt->count[i];
-
- /* Verify the queue count is in tx range being equal to the
- * real_num_tx_queues indicates the last queue is in use.
- */
- if (qopt->offset[i] >= dev->num_tx_queues ||
- !qopt->count[i] ||
- last > dev->real_num_tx_queues) {
- NL_SET_ERR_MSG(extack, "Invalid queue in traffic class to queue mapping");
- return -EINVAL;
- }
-
- if (TXTIME_ASSIST_IS_ENABLED(taprio_flags))
- continue;
-
- /* Verify that the offset and counts do not overlap */
- for (j = i + 1; j < qopt->num_tc; j++) {
- if (last > qopt->offset[j]) {
- NL_SET_ERR_MSG(extack, "Detected overlap in the traffic class to queue mapping");
- return -EINVAL;
- }
- }
- }
-
- return 0;
+ /* For some reason, in txtime-assist mode, we allow TXQ ranges for
+ * different TCs to overlap, and just validate the TXQ ranges.
+ */
+ return mqprio_validate_qopt(dev, qopt, true, allow_overlapping_txqs,
+ extack);
}
static int taprio_get_start_time(struct Qdisc *sch,
@@ -1019,11 +1221,14 @@ static int taprio_get_start_time(struct Qdisc *sch,
return 0;
}
-static void setup_first_close_time(struct taprio_sched *q,
- struct sched_gate_list *sched, ktime_t base)
+static void setup_first_end_time(struct taprio_sched *q,
+ struct sched_gate_list *sched, ktime_t base)
{
+ struct net_device *dev = qdisc_dev(q->root);
+ int num_tc = netdev_get_num_tc(dev);
struct sched_entry *first;
ktime_t cycle;
+ int tc;
first = list_first_entry(&sched->entries,
struct sched_entry, list);
@@ -1031,10 +1236,18 @@ static void setup_first_close_time(struct taprio_sched *q,
cycle = sched->cycle_time;
/* FIXME: find a better place to do this */
- sched->cycle_close_time = ktime_add_ns(base, cycle);
+ sched->cycle_end_time = ktime_add_ns(base, cycle);
+
+ first->end_time = ktime_add_ns(base, first->interval);
+ taprio_set_budgets(q, sched, first);
+
+ for (tc = 0; tc < num_tc; tc++) {
+ if (first->gate_duration[tc] == sched->cycle_time)
+ first->gate_close_time[tc] = KTIME_MAX;
+ else
+ first->gate_close_time[tc] = ktime_add_ns(base, first->gate_duration[tc]);
+ }
- first->close_time = ktime_add_ns(base, first->interval);
- taprio_set_budget(q, first);
rcu_assign_pointer(q->current_entry, NULL);
}
@@ -1088,6 +1301,8 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct sched_gate_list *oper, *admin;
+ struct qdisc_size_table *stab;
struct taprio_sched *q;
ASSERT_RTNL();
@@ -1100,6 +1315,17 @@ static int taprio_dev_notifier(struct notifier_block *nb, unsigned long event,
continue;
taprio_set_picos_per_byte(dev, q);
+
+ stab = rtnl_dereference(q->root->stab);
+
+ oper = rtnl_dereference(q->oper_sched);
+ if (oper)
+ taprio_update_queue_max_sdu(q, oper, stab);
+
+ admin = rtnl_dereference(q->admin_sched);
+ if (admin)
+ taprio_update_queue_max_sdu(q, admin, stab);
+
break;
}
@@ -1203,7 +1429,8 @@ static u32 tc_map_to_queue_mask(struct net_device *dev, u32 tc_mask)
static void taprio_sched_to_offload(struct net_device *dev,
struct sched_gate_list *sched,
- struct tc_taprio_qopt_offload *offload)
+ struct tc_taprio_qopt_offload *offload,
+ const struct tc_taprio_caps *caps)
{
struct sched_entry *entry;
int i = 0;
@@ -1217,7 +1444,11 @@ static void taprio_sched_to_offload(struct net_device *dev,
e->command = entry->command;
e->interval = entry->interval;
- e->gate_mask = tc_map_to_queue_mask(dev, entry->gate_mask);
+ if (caps->gate_mask_per_txq)
+ e->gate_mask = tc_map_to_queue_mask(dev,
+ entry->gate_mask);
+ else
+ e->gate_mask = entry->gate_mask;
i++;
}
@@ -1225,6 +1456,34 @@ static void taprio_sched_to_offload(struct net_device *dev,
offload->num_entries = i;
}
+static void taprio_detect_broken_mqprio(struct taprio_sched *q)
+{
+ struct net_device *dev = qdisc_dev(q->root);
+ struct tc_taprio_caps caps;
+
+ qdisc_offload_query_caps(dev, TC_SETUP_QDISC_TAPRIO,
+ &caps, sizeof(caps));
+
+ q->broken_mqprio = caps.broken_mqprio;
+ if (q->broken_mqprio)
+ static_branch_inc(&taprio_have_broken_mqprio);
+ else
+ static_branch_inc(&taprio_have_working_mqprio);
+
+ q->detected_mqprio = true;
+}
+
+static void taprio_cleanup_broken_mqprio(struct taprio_sched *q)
+{
+ if (!q->detected_mqprio)
+ return;
+
+ if (q->broken_mqprio)
+ static_branch_dec(&taprio_have_broken_mqprio);
+ else
+ static_branch_dec(&taprio_have_working_mqprio);
+}
+
static int taprio_enable_offload(struct net_device *dev,
struct taprio_sched *q,
struct sched_gate_list *sched,
@@ -1261,7 +1520,8 @@ static int taprio_enable_offload(struct net_device *dev,
return -ENOMEM;
}
offload->enable = 1;
- taprio_sched_to_offload(dev, sched, offload);
+ mqprio_qopt_reconstruct(dev, &offload->mqprio.qopt);
+ taprio_sched_to_offload(dev, sched, offload, &caps);
for (tc = 0; tc < TC_MAX_QUEUE; tc++)
offload->max_sdu[tc] = q->max_sdu[tc];
@@ -1452,7 +1712,6 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
struct netlink_ext_ack *extack)
{
struct taprio_sched *q = qdisc_priv(sch);
- struct net_device *dev = qdisc_dev(sch);
u32 max_sdu[TC_QOPT_MAX_QUEUE];
unsigned long seen_tcs = 0;
struct nlattr *n;
@@ -1466,18 +1725,14 @@ static int taprio_parse_tc_entries(struct Qdisc *sch,
if (nla_type(n) != TCA_TAPRIO_ATTR_TC_ENTRY)
continue;
- err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs, extack);
+ err = taprio_parse_tc_entry(sch, n, max_sdu, &seen_tcs,
+ extack);
if (err)
goto out;
}
- for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++) {
+ for (tc = 0; tc < TC_QOPT_MAX_QUEUE; tc++)
q->max_sdu[tc] = max_sdu[tc];
- if (max_sdu[tc])
- q->max_frm_len[tc] = max_sdu[tc] + dev->hard_header_len;
- else
- q->max_frm_len[tc] = U32_MAX; /* never oversized */
- }
out:
return err;
@@ -1533,6 +1788,7 @@ static int taprio_new_flags(const struct nlattr *attr, u32 old,
static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
struct netlink_ext_ack *extack)
{
+ struct qdisc_size_table *stab = rtnl_dereference(sch->stab);
struct nlattr *tb[TCA_TAPRIO_ATTR_MAX + 1] = { };
struct sched_gate_list *oper, *admin, *new_admin;
struct taprio_sched *q = qdisc_priv(sch);
@@ -1585,6 +1841,23 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto free_sched;
}
+ if (mqprio) {
+ err = netdev_set_num_tc(dev, mqprio->num_tc);
+ if (err)
+ goto free_sched;
+ for (i = 0; i < mqprio->num_tc; i++) {
+ netdev_set_tc_queue(dev, i,
+ mqprio->count[i],
+ mqprio->offset[i]);
+ q->cur_txq[i] = mqprio->offset[i];
+ }
+
+ /* Always use supplied priority mappings */
+ for (i = 0; i <= TC_BITMASK; i++)
+ netdev_set_prio_tc_map(dev, i,
+ mqprio->prio_tc_map[i]);
+ }
+
err = parse_taprio_schedule(q, tb, new_admin, extack);
if (err < 0)
goto free_sched;
@@ -1600,21 +1873,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
goto free_sched;
taprio_set_picos_per_byte(dev, q);
-
- if (mqprio) {
- err = netdev_set_num_tc(dev, mqprio->num_tc);
- if (err)
- goto free_sched;
- for (i = 0; i < mqprio->num_tc; i++)
- netdev_set_tc_queue(dev, i,
- mqprio->count[i],
- mqprio->offset[i]);
-
- /* Always use supplied priority mappings */
- for (i = 0; i <= TC_BITMASK; i++)
- netdev_set_prio_tc_map(dev, i,
- mqprio->prio_tc_map[i]);
- }
+ taprio_update_queue_max_sdu(q, new_admin, stab);
if (FULL_OFFLOAD_IS_ENABLED(q->flags))
err = taprio_enable_offload(dev, q, new_admin, extack);
@@ -1663,7 +1922,7 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
} else {
- setup_first_close_time(q, new_admin, start);
+ setup_first_end_time(q, new_admin, start);
/* Protects against advance_sched() */
spin_lock_irqsave(&q->current_entry_lock, flags);
@@ -1683,6 +1942,10 @@ static int taprio_change(struct Qdisc *sch, struct nlattr *opt,
new_admin = NULL;
err = 0;
+ if (!stab)
+ NL_SET_ERR_MSG_MOD(extack,
+ "Size table not specified, frame length estimations may be inaccurate");
+
unlock:
spin_unlock_bh(qdisc_lock(sch));
@@ -1700,6 +1963,7 @@ static void taprio_reset(struct Qdisc *sch)
int i;
hrtimer_cancel(&q->advance_timer);
+
if (q->qdiscs) {
for (i = 0; i < dev->num_tx_queues; i++)
if (q->qdiscs[i])
@@ -1720,6 +1984,7 @@ static void taprio_destroy(struct Qdisc *sch)
* happens in qdisc_create(), after taprio_init() has been called.
*/
hrtimer_cancel(&q->advance_timer);
+ qdisc_synchronize(sch);
taprio_disable_offload(dev, q, NULL);
@@ -1741,6 +2006,8 @@ static void taprio_destroy(struct Qdisc *sch)
if (admin)
call_rcu(&admin->rcu, taprio_free_sched_cb);
+
+ taprio_cleanup_broken_mqprio(q);
}
static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
@@ -1805,6 +2072,8 @@ static int taprio_init(struct Qdisc *sch, struct nlattr *opt,
q->qdiscs[i] = qdisc;
}
+ taprio_detect_broken_mqprio(q);
+
return taprio_change(sch, opt, extack);
}
@@ -1945,7 +2214,8 @@ error_nest:
return -1;
}
-static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
+static int taprio_dump_tc_entries(struct sk_buff *skb,
+ struct sched_gate_list *sched)
{
struct nlattr *n;
int tc;
@@ -1959,7 +2229,7 @@ static int taprio_dump_tc_entries(struct taprio_sched *q, struct sk_buff *skb)
goto nla_put_failure;
if (nla_put_u32(skb, TCA_TAPRIO_TC_ENTRY_MAX_SDU,
- q->max_sdu[tc]))
+ sched->max_sdu[tc]))
goto nla_put_failure;
nla_nest_end(skb, n);
@@ -1979,18 +2249,11 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
struct sched_gate_list *oper, *admin;
struct tc_mqprio_qopt opt = { 0 };
struct nlattr *nest, *sched_nest;
- unsigned int i;
oper = rtnl_dereference(q->oper_sched);
admin = rtnl_dereference(q->admin_sched);
- opt.num_tc = netdev_get_num_tc(dev);
- memcpy(opt.prio_tc_map, dev->prio_tc_map, sizeof(opt.prio_tc_map));
-
- for (i = 0; i < netdev_get_num_tc(dev); i++) {
- opt.count[i] = dev->tc_to_txq[i].count;
- opt.offset[i] = dev->tc_to_txq[i].offset;
- }
+ mqprio_qopt_reconstruct(dev, &opt);
nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
if (!nest)
@@ -2010,7 +2273,7 @@ static int taprio_dump(struct Qdisc *sch, struct sk_buff *skb)
nla_put_u32(skb, TCA_TAPRIO_ATTR_TXTIME_DELAY, q->txtime_delay))
goto options_error;
- if (taprio_dump_tc_entries(q, skb))
+ if (oper && taprio_dump_tc_entries(skb, oper))
goto options_error;
if (oper && dump_schedule(skb, oper))
diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c
index 59e653b528b1..6b95d3ba8fe1 100644
--- a/net/sctp/bind_addr.c
+++ b/net/sctp/bind_addr.c
@@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest,
}
}
+ /* If somehow no addresses were found that can be used with this
+ * scope, it's an error.
+ */
+ if (list_empty(&dest->address_list))
+ error = -ENETUNREACH;
+
out:
if (error)
sctp_bind_addr_clean(dest);
diff --git a/net/sctp/diag.c b/net/sctp/diag.c
index a557009e9832..c3d6b92dd386 100644
--- a/net/sctp/diag.c
+++ b/net/sctp/diag.c
@@ -343,11 +343,9 @@ static int sctp_sock_filter(struct sctp_endpoint *ep, struct sctp_transport *tsp
struct sctp_comm_param *commp = p;
struct sock *sk = ep->base.sk;
const struct inet_diag_req_v2 *r = commp->r;
- struct sctp_association *assoc =
- list_entry(ep->asocs.next, struct sctp_association, asocs);
/* find the ep only once through the transports by this condition */
- if (tsp->asoc != assoc)
+ if (!list_is_first(&tsp->asoc->asocs, &ep->asocs))
return 0;
if (r->sdiag_family != AF_UNSPEC && sk->sk_family != r->sdiag_family)
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index 097bd60ce964..62b436a2c8fe 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -807,8 +807,6 @@ static struct sock *sctp_v6_create_accept_sk(struct sock *sk,
newsk->sk_v6_rcv_saddr = sk->sk_v6_rcv_saddr;
- sk_refcnt_debug_inc(newsk);
-
if (newsk->sk_prot->init(newsk)) {
sk_common_release(newsk);
newsk = NULL;
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 909a89a1cff4..c365df24ad33 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -601,8 +601,6 @@ static struct sock *sctp_v4_create_accept_sk(struct sock *sk,
newinet->inet_daddr = asoc->peer.primary_addr.v4.sin_addr.s_addr;
- sk_refcnt_debug_inc(newsk);
-
if (newsk->sk_prot->init(newsk)) {
sk_common_release(newsk);
newsk = NULL;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 84021a6c4f9d..b91616f819de 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -59,6 +59,7 @@
#include <net/ipv6.h>
#include <net/inet_common.h>
#include <net/busy_poll.h>
+#include <trace/events/sock.h>
#include <linux/socket.h> /* for sa_family_t */
#include <linux/export.h>
@@ -8321,7 +8322,7 @@ static int sctp_get_port_local(struct sock *sk, union sctp_addr *addr)
int low, high, remaining, index;
unsigned int rover;
- inet_get_local_port_range(net, &low, &high);
+ inet_sk_get_local_port_range(sk, &low, &high);
remaining = (high - low) + 1;
rover = get_random_u32_below(remaining) + low;
@@ -9244,6 +9245,8 @@ void sctp_data_ready(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
diff --git a/net/sctp/stream_sched_prio.c b/net/sctp/stream_sched_prio.c
index 42d4800f263d..4d4d9da331f4 100644
--- a/net/sctp/stream_sched_prio.c
+++ b/net/sctp/stream_sched_prio.c
@@ -25,6 +25,18 @@
static void sctp_sched_prio_unsched_all(struct sctp_stream *stream);
+static struct sctp_stream_priorities *sctp_sched_prio_head_get(struct sctp_stream_priorities *p)
+{
+ p->users++;
+ return p;
+}
+
+static void sctp_sched_prio_head_put(struct sctp_stream_priorities *p)
+{
+ if (p && --p->users == 0)
+ kfree(p);
+}
+
static struct sctp_stream_priorities *sctp_sched_prio_new_head(
struct sctp_stream *stream, int prio, gfp_t gfp)
{
@@ -38,6 +50,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_new_head(
INIT_LIST_HEAD(&p->active);
p->next = NULL;
p->prio = prio;
+ p->users = 1;
return p;
}
@@ -53,7 +66,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head(
*/
list_for_each_entry(p, &stream->prio_list, prio_sched) {
if (p->prio == prio)
- return p;
+ return sctp_sched_prio_head_get(p);
if (p->prio > prio)
break;
}
@@ -70,7 +83,7 @@ static struct sctp_stream_priorities *sctp_sched_prio_get_head(
*/
break;
if (p->prio == prio)
- return p;
+ return sctp_sched_prio_head_get(p);
}
/* If not even there, allocate a new one. */
@@ -154,32 +167,21 @@ static int sctp_sched_prio_set(struct sctp_stream *stream, __u16 sid,
struct sctp_stream_out_ext *soute = sout->ext;
struct sctp_stream_priorities *prio_head, *old;
bool reschedule = false;
- int i;
+
+ old = soute->prio_head;
+ if (old && old->prio == prio)
+ return 0;
prio_head = sctp_sched_prio_get_head(stream, prio, gfp);
if (!prio_head)
return -ENOMEM;
reschedule = sctp_sched_prio_unsched(soute);
- old = soute->prio_head;
soute->prio_head = prio_head;
if (reschedule)
sctp_sched_prio_sched(stream, soute);
- if (!old)
- /* Happens when we set the priority for the first time */
- return 0;
-
- for (i = 0; i < stream->outcnt; i++) {
- soute = SCTP_SO(stream, i)->ext;
- if (soute && soute->prio_head == old)
- /* It's still in use, nothing else to do here. */
- return 0;
- }
-
- /* No hits, we are good to free it. */
- kfree(old);
-
+ sctp_sched_prio_head_put(old);
return 0;
}
@@ -206,20 +208,8 @@ static int sctp_sched_prio_init_sid(struct sctp_stream *stream, __u16 sid,
static void sctp_sched_prio_free_sid(struct sctp_stream *stream, __u16 sid)
{
- struct sctp_stream_priorities *prio = SCTP_SO(stream, sid)->ext->prio_head;
- int i;
-
- if (!prio)
- return;
-
+ sctp_sched_prio_head_put(SCTP_SO(stream, sid)->ext->prio_head);
SCTP_SO(stream, sid)->ext->prio_head = NULL;
- for (i = 0; i < stream->outcnt; i++) {
- if (SCTP_SO(stream, i)->ext &&
- SCTP_SO(stream, i)->ext->prio_head == prio)
- return;
- }
-
- kfree(prio);
}
static void sctp_sched_prio_enqueue(struct sctp_outq *q,
diff --git a/net/sctp/transport.c b/net/sctp/transport.c
index ca1eba95c293..2f66a2006517 100644
--- a/net/sctp/transport.c
+++ b/net/sctp/transport.c
@@ -196,9 +196,7 @@ void sctp_transport_reset_hb_timer(struct sctp_transport *transport)
/* When a data chunk is sent, reset the heartbeat interval. */
expires = jiffies + sctp_transport_timeout(transport);
- if ((time_before(transport->hb_timer.expires, expires) ||
- !timer_pending(&transport->hb_timer)) &&
- !mod_timer(&transport->hb_timer,
+ if (!mod_timer(&transport->hb_timer,
expires + get_random_u32_below(transport->rto)))
sctp_transport_hold(transport);
}
diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e12d4fa5aece..a4cccdfdc00a 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -27,6 +27,7 @@
#include <linux/if_vlan.h>
#include <linux/rcupdate_wait.h>
#include <linux/ctype.h>
+#include <linux/splice.h>
#include <net/sock.h>
#include <net/tcp.h>
@@ -359,8 +360,6 @@ static void smc_destruct(struct sock *sk)
return;
if (!sock_flag(sk, SOCK_DEAD))
return;
-
- sk_refcnt_debug_dec(sk);
}
static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
@@ -389,7 +388,6 @@ static struct sock *smc_sock_alloc(struct net *net, struct socket *sock,
spin_lock_init(&smc->accept_q_lock);
spin_lock_init(&smc->conn.send_lock);
sk->sk_prot->hash(sk);
- sk_refcnt_debug_inc(sk);
mutex_init(&smc->clcsock_release_lock);
smc_init_saved_callbacks(smc);
@@ -501,7 +499,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
return -EINVAL;
/* protect against parallel smcr_link_reg_buf() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -509,7 +507,7 @@ static int smcr_lgr_reg_sndbufs(struct smc_link *link,
if (rc)
break;
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
return rc;
}
@@ -518,15 +516,30 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
struct smc_buf_desc *rmb_desc)
{
struct smc_link_group *lgr = link->lgr;
+ bool do_slow = false;
int i, rc = 0;
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (rc)
return rc;
+
+ down_read(&lgr->llc_conf_mutex);
+ for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+ if (!smc_link_active(&lgr->lnk[i]))
+ continue;
+ if (!rmb_desc->is_reg_mr[link->link_idx]) {
+ up_read(&lgr->llc_conf_mutex);
+ goto slow_path;
+ }
+ }
+ /* mr register already */
+ goto fast_path;
+slow_path:
+ do_slow = true;
/* protect against parallel smc_llc_cli_rkey_exchange() and
* parallel smcr_link_reg_buf()
*/
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (!smc_link_active(&lgr->lnk[i]))
continue;
@@ -534,7 +547,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
if (rc)
goto out;
}
-
+fast_path:
/* exchange confirm_rkey msg with peer */
rc = smc_llc_do_confirm_rkey(link, rmb_desc);
if (rc) {
@@ -543,7 +556,7 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
}
rmb_desc->is_conf_rkey = true;
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ do_slow ? up_write(&lgr->llc_conf_mutex) : up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
return rc;
}
@@ -1826,8 +1839,10 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
smc_llc_link_active(link);
smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
+ down_write(&link->lgr->llc_conf_mutex);
/* initial contact - try to establish second link */
smc_llc_srv_add_link(link, NULL);
+ up_write(&link->lgr->llc_conf_mutex);
return 0;
}
@@ -3382,12 +3397,14 @@ static int __init smc_init(void)
if (rc)
goto out_pernet_subsys;
- smc_ism_init();
+ rc = smc_ism_init();
+ if (rc)
+ goto out_pernet_subsys_stat;
smc_clc_init();
rc = smc_nl_init();
if (rc)
- goto out_pernet_subsys_stat;
+ goto out_ism;
rc = smc_pnet_init();
if (rc)
@@ -3480,6 +3497,8 @@ out_pnet:
smc_pnet_exit();
out_nl:
smc_nl_exit();
+out_ism:
+ smc_ism_exit();
out_pernet_subsys_stat:
unregister_pernet_subsys(&smc_net_stat_ops);
out_pernet_subsys:
@@ -3495,6 +3514,7 @@ static void __exit smc_exit(void)
sock_unregister(PF_SMC);
smc_core_exit();
smc_ib_unregister_client();
+ smc_ism_exit();
destroy_workqueue(smc_close_wq);
destroy_workqueue(smc_tcp_ls_wq);
destroy_workqueue(smc_hs_wq);
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index dfb9797f7bc6..b9b8b07aa702 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -813,6 +813,7 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
struct smc_clc_v2_extension *v2_ext;
struct smc_clc_msg_smcd *pclc_smcd;
struct smc_clc_msg_trail *trl;
+ struct smcd_dev *smcd;
int len, i, plen, rc;
int reason_code = 0;
struct kvec vec[8];
@@ -868,7 +869,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
if (smcd_indicated(ini->smc_type_v1)) {
/* add SMC-D specifics */
if (ini->ism_dev[0]) {
- pclc_smcd->ism.gid = htonll(ini->ism_dev[0]->local_gid);
+ smcd = ini->ism_dev[0];
+ pclc_smcd->ism.gid =
+ htonll(smcd->ops->get_local_gid(smcd));
pclc_smcd->ism.chid =
htons(smc_ism_get_chid(ini->ism_dev[0]));
}
@@ -914,8 +917,9 @@ int smc_clc_send_proposal(struct smc_sock *smc, struct smc_init_info *ini)
plen += sizeof(*smcd_v2_ext);
if (ini->ism_offered_cnt) {
for (i = 1; i <= ini->ism_offered_cnt; i++) {
+ smcd = ini->ism_dev[i];
gidchids[i - 1].gid =
- htonll(ini->ism_dev[i]->local_gid);
+ htonll(smcd->ops->get_local_gid(smcd));
gidchids[i - 1].chid =
htons(smc_ism_get_chid(ini->ism_dev[i]));
}
@@ -1000,7 +1004,8 @@ static int smc_clc_send_confirm_accept(struct smc_sock *smc,
memcpy(clc->hdr.eyecatcher, SMCD_EYECATCHER,
sizeof(SMCD_EYECATCHER));
clc->hdr.typev1 = SMC_TYPE_D;
- clc->d0.gid = conn->lgr->smcd->local_gid;
+ clc->d0.gid =
+ conn->lgr->smcd->ops->get_local_gid(conn->lgr->smcd);
clc->d0.token = conn->rmb_desc->token;
clc->d0.dmbe_size = conn->rmbe_size_short;
clc->d0.dmbe_idx = 0;
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c305d8dd23f8..d52060b2680c 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -500,6 +500,7 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
struct netlink_callback *cb)
{
char smc_pnet[SMC_MAX_PNETID_LEN + 1];
+ struct smcd_dev *smcd = lgr->smcd;
struct nlattr *attrs;
void *nlh;
@@ -515,8 +516,9 @@ static int smc_nl_fill_smcd_lgr(struct smc_link_group *lgr,
if (nla_put_u32(skb, SMC_NLA_LGR_D_ID, *((u32 *)&lgr->id)))
goto errattr;
- if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID, lgr->smcd->local_gid,
- SMC_NLA_LGR_D_PAD))
+ if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_GID,
+ smcd->ops->get_local_gid(smcd),
+ SMC_NLA_LGR_D_PAD))
goto errattr;
if (nla_put_u64_64bit(skb, SMC_NLA_LGR_D_PEER_GID, lgr->peer_gid,
SMC_NLA_LGR_D_PAD))
@@ -820,6 +822,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
{
struct smc_link_group *lgr;
struct list_head *lgr_list;
+ struct smcd_dev *smcd;
struct smc_link *lnk;
spinlock_t *lgr_lock;
u8 link_idx;
@@ -851,8 +854,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->freeing = 0;
lgr->vlan_id = ini->vlan_id;
refcount_set(&lgr->refcnt, 1); /* set lgr refcnt to 1 */
- mutex_init(&lgr->sndbufs_lock);
- mutex_init(&lgr->rmbs_lock);
+ init_rwsem(&lgr->sndbufs_lock);
+ init_rwsem(&lgr->rmbs_lock);
rwlock_init(&lgr->conns_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
INIT_LIST_HEAD(&lgr->sndbufs[i]);
@@ -866,7 +869,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
lgr->conns_all = RB_ROOT;
if (ini->is_smcd) {
/* SMC-D specific settings */
- get_device(&ini->ism_dev[ini->ism_selected]->dev);
+ smcd = ini->ism_dev[ini->ism_selected];
+ get_device(smcd->ops->get_dev(smcd));
lgr->peer_gid = ini->ism_peer_gid[ini->ism_selected];
lgr->smcd = ini->ism_dev[ini->ism_selected];
lgr_list = &ini->ism_dev[ini->ism_selected]->lgr_list;
@@ -1094,7 +1098,7 @@ err_out:
static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
struct smc_link_group *lgr)
{
- struct mutex *lock; /* lock buffer list */
+ struct rw_semaphore *lock; /* lock buffer list */
int rc;
if (is_rmb && buf_desc->is_conf_rkey && !list_empty(&lgr->list)) {
@@ -1102,10 +1106,10 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
if (!rc) {
/* protect against smc_llc_cli_rkey_exchange() */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
smc_llc_do_delete_rkey(lgr, buf_desc);
buf_desc->is_conf_rkey = false;
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
}
}
@@ -1114,14 +1118,15 @@ static void smcr_buf_unuse(struct smc_buf_desc *buf_desc, bool is_rmb,
/* buf registration failed, reuse not possible */
lock = is_rmb ? &lgr->rmbs_lock :
&lgr->sndbufs_lock;
- mutex_lock(lock);
+ down_write(lock);
list_del(&buf_desc->list);
- mutex_unlock(lock);
+ up_write(lock);
smc_buf_free(lgr, is_rmb, buf_desc);
} else {
- buf_desc->used = 0;
- memset(buf_desc->cpu_addr, 0, buf_desc->len);
+ /* memzero_explicit provides potential memory barrier semantics */
+ memzero_explicit(buf_desc->cpu_addr, buf_desc->len);
+ WRITE_ONCE(buf_desc->used, 0);
}
}
@@ -1132,19 +1137,17 @@ static void smc_buf_unuse(struct smc_connection *conn,
if (!lgr->is_smcd && conn->sndbuf_desc->is_vm) {
smcr_buf_unuse(conn->sndbuf_desc, false, lgr);
} else {
- conn->sndbuf_desc->used = 0;
- memset(conn->sndbuf_desc->cpu_addr, 0,
- conn->sndbuf_desc->len);
+ memzero_explicit(conn->sndbuf_desc->cpu_addr, conn->sndbuf_desc->len);
+ WRITE_ONCE(conn->sndbuf_desc->used, 0);
}
}
if (conn->rmb_desc) {
if (!lgr->is_smcd) {
smcr_buf_unuse(conn->rmb_desc, true, lgr);
} else {
- conn->rmb_desc->used = 0;
- memset(conn->rmb_desc->cpu_addr, 0,
- conn->rmb_desc->len +
- sizeof(struct smcd_cdc_msg));
+ memzero_explicit(conn->rmb_desc->cpu_addr,
+ conn->rmb_desc->len + sizeof(struct smcd_cdc_msg));
+ WRITE_ONCE(conn->rmb_desc->used, 0);
}
}
}
@@ -1220,15 +1223,16 @@ static void smcr_buf_unmap_lgr(struct smc_link *lnk)
int i;
for (i = 0; i < SMC_RMBE_SIZES; i++) {
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
smcr_buf_unmap_link(buf_desc, true, lnk);
- mutex_unlock(&lgr->rmbs_lock);
- mutex_lock(&lgr->sndbufs_lock);
+ up_write(&lgr->rmbs_lock);
+
+ down_write(&lgr->sndbufs_lock);
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
list)
smcr_buf_unmap_link(buf_desc, false, lnk);
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
}
}
@@ -1373,19 +1377,19 @@ static void smc_lgr_free(struct smc_link_group *lgr)
int i;
if (!lgr->is_smcd) {
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
if (lgr->lnk[i].state != SMC_LNK_UNUSED)
smcr_link_clear(&lgr->lnk[i], false);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
smc_llc_lgr_clear(lgr);
}
destroy_workqueue(lgr->tx_wq);
if (lgr->is_smcd) {
smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
- put_device(&lgr->smcd->dev);
+ put_device(lgr->smcd->ops->get_dev(lgr->smcd));
}
smc_lgr_put(lgr); /* theoretically last lgr_put */
}
@@ -1692,12 +1696,12 @@ static void smcr_link_down(struct smc_link *lnk)
} else {
if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
/* another llc task is ongoing */
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
wait_event_timeout(lgr->llc_flow_waiter,
(list_empty(&lgr->list) ||
lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
SMC_LLC_WAIT_TIME);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
}
if (!list_empty(&lgr->list)) {
smc_llc_send_delete_link(to_lnk, del_link_id,
@@ -1757,9 +1761,9 @@ static void smc_link_down_work(struct work_struct *work)
if (list_empty(&lgr->list))
return;
wake_up_all(&lgr->llc_msg_waiter);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
smcr_link_down(link);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_vlan_by_tcpsk_walk(struct net_device *lower_dev,
@@ -1986,19 +1990,19 @@ int smc_uncompress_bufsize(u8 compressed)
* buffer size; if not available, return NULL
*/
static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
- struct mutex *lock,
+ struct rw_semaphore *lock,
struct list_head *buf_list)
{
struct smc_buf_desc *buf_slot;
- mutex_lock(lock);
+ down_read(lock);
list_for_each_entry(buf_slot, buf_list, list) {
if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
- mutex_unlock(lock);
+ up_read(lock);
return buf_slot;
}
}
- mutex_unlock(lock);
+ up_read(lock);
return NULL;
}
@@ -2107,13 +2111,13 @@ int smcr_link_reg_buf(struct smc_link *link, struct smc_buf_desc *buf_desc)
return 0;
}
-static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct rw_semaphore *lock,
struct list_head *lst, bool is_rmb)
{
struct smc_buf_desc *buf_desc, *bf;
int rc = 0;
- mutex_lock(lock);
+ down_write(lock);
list_for_each_entry_safe(buf_desc, bf, lst, list) {
if (!buf_desc->used)
continue;
@@ -2122,7 +2126,7 @@ static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
goto out;
}
out:
- mutex_unlock(lock);
+ up_write(lock);
return rc;
}
@@ -2155,37 +2159,37 @@ int smcr_buf_reg_lgr(struct smc_link *lnk)
int i, rc = 0;
/* reg all RMBs for a new link */
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
if (!buf_desc->used)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
if (lgr->buf_type == SMCR_PHYS_CONT_BUFS)
return rc;
/* reg all vzalloced sndbufs for a new link */
- mutex_lock(&lgr->sndbufs_lock);
+ down_write(&lgr->sndbufs_lock);
for (i = 0; i < SMC_RMBE_SIZES; i++) {
list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i], list) {
if (!buf_desc->used || !buf_desc->is_vm)
continue;
rc = smcr_link_reg_buf(lnk, buf_desc);
if (rc) {
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
}
}
- mutex_unlock(&lgr->sndbufs_lock);
+ up_write(&lgr->sndbufs_lock);
return rc;
}
@@ -2243,7 +2247,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
int i, rc = 0, cnt = 0;
/* protect against parallel link reconfiguration */
- mutex_lock(&lgr->llc_conf_mutex);
+ down_read(&lgr->llc_conf_mutex);
for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
struct smc_link *lnk = &lgr->lnk[i];
@@ -2256,7 +2260,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
cnt++;
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_read(&lgr->llc_conf_mutex);
if (!rc && !cnt)
rc = -EINVAL;
return rc;
@@ -2305,8 +2309,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
struct smc_link_group *lgr = conn->lgr;
struct list_head *buf_list;
int bufsize, bufsize_short;
+ struct rw_semaphore *lock; /* lock buffer list */
bool is_dgraded = false;
- struct mutex *lock; /* lock buffer list */
int sk_buf_size;
if (is_rmb)
@@ -2354,9 +2358,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
SMC_STAT_RMB_ALLOC(smc, is_smcd, is_rmb);
SMC_STAT_RMB_SIZE(smc, is_smcd, is_rmb, bufsize);
buf_desc->used = 1;
- mutex_lock(lock);
+ down_write(lock);
list_add(&buf_desc->list, buf_list);
- mutex_unlock(lock);
+ up_write(lock);
break; /* found */
}
@@ -2430,9 +2434,9 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd)
/* create rmb */
rc = __smc_buf_create(smc, is_smcd, true);
if (rc) {
- mutex_lock(&smc->conn.lgr->sndbufs_lock);
+ down_write(&smc->conn.lgr->sndbufs_lock);
list_del(&smc->conn.sndbuf_desc->list);
- mutex_unlock(&smc->conn.lgr->sndbufs_lock);
+ up_write(&smc->conn.lgr->sndbufs_lock);
smc_buf_free(smc->conn.lgr, false, smc->conn.sndbuf_desc);
smc->conn.sndbuf_desc = NULL;
}
@@ -2595,6 +2599,7 @@ static int smc_core_reboot_event(struct notifier_block *this,
{
smc_lgrs_shutdown();
smc_ib_unregister_client();
+ smc_ism_exit();
return 0;
}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 285f9bd8e232..08b457c2d294 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -252,9 +252,9 @@ struct smc_link_group {
unsigned short vlan_id; /* vlan id of link group */
struct list_head sndbufs[SMC_RMBE_SIZES];/* tx buffers */
- struct mutex sndbufs_lock; /* protects tx buffers */
+ struct rw_semaphore sndbufs_lock; /* protects tx buffers */
struct list_head rmbs[SMC_RMBE_SIZES]; /* rx buffers */
- struct mutex rmbs_lock; /* protects rx buffers */
+ struct rw_semaphore rmbs_lock; /* protects rx buffers */
u8 id[SMC_LGR_ID_SIZE]; /* unique lgr id */
struct delayed_work free_work; /* delayed freeing of an lgr */
@@ -298,7 +298,7 @@ struct smc_link_group {
/* queue for llc events */
spinlock_t llc_event_q_lock;
/* protects llc_event_q */
- struct mutex llc_conf_mutex;
+ struct rw_semaphore llc_conf_mutex;
/* protects lgr reconfig. */
struct work_struct llc_add_link_work;
struct work_struct llc_del_link_work;
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 80ea7d954ece..7ff2152971a5 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -167,12 +167,13 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
!list_empty(&smc->conn.lgr->list)) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
+ struct smcd_dev *smcd = conn->lgr->smcd;
memset(&dinfo, 0, sizeof(dinfo));
dinfo.linkid = *((u32 *)conn->lgr->id);
dinfo.peer_gid = conn->lgr->peer_gid;
- dinfo.my_gid = conn->lgr->smcd->local_gid;
+ dinfo.my_gid = smcd->ops->get_local_gid(smcd);
dinfo.token = conn->rmb_desc->token;
dinfo.peer_token = conn->peer_token;
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 911fe08bc54b..3b0b7710c6b0 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -17,6 +17,7 @@
#include "smc_ism.h"
#include "smc_pnet.h"
#include "smc_netlink.h"
+#include "linux/ism.h"
struct smcd_dev_list smcd_dev_list = {
.list = LIST_HEAD_INIT(smcd_dev_list.list),
@@ -26,6 +27,22 @@ struct smcd_dev_list smcd_dev_list = {
static bool smc_ism_v2_capable;
static u8 smc_ism_v2_system_eid[SMC_MAX_EID_LEN];
+#if IS_ENABLED(CONFIG_ISM)
+static void smcd_register_dev(struct ism_dev *ism);
+static void smcd_unregister_dev(struct ism_dev *ism);
+static void smcd_handle_event(struct ism_dev *ism, struct ism_event *event);
+static void smcd_handle_irq(struct ism_dev *ism, unsigned int dmbno,
+ u16 dmbemask);
+
+static struct ism_client smc_ism_client = {
+ .name = "SMC-D",
+ .add = smcd_register_dev,
+ .remove = smcd_unregister_dev,
+ .handle_event = smcd_handle_event,
+ .handle_irq = smcd_handle_irq,
+};
+#endif
+
/* Test if an ISM communication is possible - same CPC */
int smc_ism_cantalk(u64 peer_gid, unsigned short vlan_id, struct smcd_dev *smcd)
{
@@ -183,6 +200,7 @@ int smc_ism_unregister_dmb(struct smcd_dev *smcd, struct smc_buf_desc *dmb_desc)
int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
struct smc_buf_desc *dmb_desc)
{
+#if IS_ENABLED(CONFIG_ISM)
struct smcd_dmb dmb;
int rc;
@@ -191,7 +209,7 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb.sba_idx = dmb_desc->sba_idx;
dmb.vlan_id = lgr->vlan_id;
dmb.rgid = lgr->peer_gid;
- rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb);
+ rc = lgr->smcd->ops->register_dmb(lgr->smcd, &dmb, &smc_ism_client);
if (!rc) {
dmb_desc->sba_idx = dmb.sba_idx;
dmb_desc->token = dmb.dmb_tok;
@@ -200,6 +218,9 @@ int smc_ism_register_dmb(struct smc_link_group *lgr, int dmb_len,
dmb_desc->len = dmb.dmb_len;
}
return rc;
+#else
+ return 0;
+#endif
}
static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
@@ -210,9 +231,11 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
struct smc_pci_dev smc_pci_dev;
struct nlattr *port_attrs;
struct nlattr *attrs;
+ struct ism_dev *ism;
int use_cnt = 0;
void *nlh;
+ ism = smcd->priv;
nlh = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq,
&smc_gen_nl_family, NLM_F_MULTI,
SMC_NETLINK_GET_DEV_SMCD);
@@ -227,7 +250,7 @@ static int smc_nl_handle_smcd_dev(struct smcd_dev *smcd,
if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, use_cnt > 0))
goto errattr;
memset(&smc_pci_dev, 0, sizeof(smc_pci_dev));
- smc_set_pci_values(to_pci_dev(smcd->dev.parent), &smc_pci_dev);
+ smc_set_pci_values(to_pci_dev(ism->dev.parent), &smc_pci_dev);
if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid))
goto errattr;
if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid))
@@ -293,10 +316,11 @@ int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb)
return skb->len;
}
+#if IS_ENABLED(CONFIG_ISM)
struct smc_ism_event_work {
struct work_struct work;
struct smcd_dev *smcd;
- struct smcd_event event;
+ struct ism_event event;
};
#define ISM_EVENT_REQUEST 0x0001
@@ -336,24 +360,6 @@ static void smcd_handle_sw_event(struct smc_ism_event_work *wrk)
}
}
-int smc_ism_signal_shutdown(struct smc_link_group *lgr)
-{
- int rc;
- union smcd_sw_event_info ev_info;
-
- if (lgr->peer_shutdown)
- return 0;
-
- memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
- ev_info.vlan_id = lgr->vlan_id;
- ev_info.code = ISM_EVENT_REQUEST;
- rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
- ISM_EVENT_REQUEST_IR,
- ISM_EVENT_CODE_SHUTDOWN,
- ev_info.info);
- return rc;
-}
-
/* worker for SMC-D events */
static void smc_ism_event_work(struct work_struct *work)
{
@@ -373,44 +379,25 @@ static void smc_ism_event_work(struct work_struct *work)
kfree(wrk);
}
-static void smcd_release(struct device *dev)
-{
- struct smcd_dev *smcd = container_of(dev, struct smcd_dev, dev);
-
- kfree(smcd->conn);
- kfree(smcd);
-}
-
-struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
- const struct smcd_ops *ops, int max_dmbs)
+static struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
+ const struct smcd_ops *ops, int max_dmbs)
{
struct smcd_dev *smcd;
- smcd = kzalloc(sizeof(*smcd), GFP_KERNEL);
+ smcd = devm_kzalloc(parent, sizeof(*smcd), GFP_KERNEL);
if (!smcd)
return NULL;
- smcd->conn = kcalloc(max_dmbs, sizeof(struct smc_connection *),
- GFP_KERNEL);
- if (!smcd->conn) {
- kfree(smcd);
+ smcd->conn = devm_kcalloc(parent, max_dmbs,
+ sizeof(struct smc_connection *), GFP_KERNEL);
+ if (!smcd->conn)
return NULL;
- }
smcd->event_wq = alloc_ordered_workqueue("ism_evt_wq-%s)",
WQ_MEM_RECLAIM, name);
- if (!smcd->event_wq) {
- kfree(smcd->conn);
- kfree(smcd);
+ if (!smcd->event_wq)
return NULL;
- }
- smcd->dev.parent = parent;
- smcd->dev.release = smcd_release;
- device_initialize(&smcd->dev);
- dev_set_name(&smcd->dev, name);
smcd->ops = ops;
- if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
- smc_pnetid_by_table_smcd(smcd);
spin_lock_init(&smcd->lock);
spin_lock_init(&smcd->lgr_lock);
@@ -419,11 +406,23 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
init_waitqueue_head(&smcd->lgrs_deleted);
return smcd;
}
-EXPORT_SYMBOL_GPL(smcd_alloc_dev);
-int smcd_register_dev(struct smcd_dev *smcd)
+static void smcd_register_dev(struct ism_dev *ism)
{
- int rc;
+ const struct smcd_ops *ops = ism_get_smcd_ops();
+ struct smcd_dev *smcd;
+
+ if (!ops)
+ return;
+
+ smcd = smcd_alloc_dev(&ism->pdev->dev, dev_name(&ism->pdev->dev), ops,
+ ISM_NR_DMBS);
+ if (!smcd)
+ return;
+ smcd->priv = ism;
+ ism_set_priv(ism, &smc_ism_client, smcd);
+ if (smc_pnetid_by_dev_port(&ism->pdev->dev, 0, smcd->pnetid))
+ smc_pnetid_by_table_smcd(smcd);
mutex_lock(&smcd_dev_list.mutex);
if (list_empty(&smcd_dev_list.list)) {
@@ -444,43 +443,28 @@ int smcd_register_dev(struct smcd_dev *smcd)
mutex_unlock(&smcd_dev_list.mutex);
pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
- dev_name(&smcd->dev), smcd->pnetid,
+ dev_name(&ism->dev), smcd->pnetid,
smcd->pnetid_by_user ? " (user defined)" : "");
- rc = device_add(&smcd->dev);
- if (rc) {
- mutex_lock(&smcd_dev_list.mutex);
- list_del(&smcd->list);
- mutex_unlock(&smcd_dev_list.mutex);
- }
-
- return rc;
+ return;
}
-EXPORT_SYMBOL_GPL(smcd_register_dev);
-void smcd_unregister_dev(struct smcd_dev *smcd)
+static void smcd_unregister_dev(struct ism_dev *ism)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
+
pr_warn_ratelimited("smc: removing smcd device %s\n",
- dev_name(&smcd->dev));
+ dev_name(&ism->dev));
+ smcd->going_away = 1;
+ smc_smcd_terminate_all(smcd);
mutex_lock(&smcd_dev_list.mutex);
list_del_init(&smcd->list);
mutex_unlock(&smcd_dev_list.mutex);
- smcd->going_away = 1;
- smc_smcd_terminate_all(smcd);
destroy_workqueue(smcd->event_wq);
-
- device_del(&smcd->dev);
-}
-EXPORT_SYMBOL_GPL(smcd_unregister_dev);
-
-void smcd_free_dev(struct smcd_dev *smcd)
-{
- put_device(&smcd->dev);
}
-EXPORT_SYMBOL_GPL(smcd_free_dev);
/* SMCD Device event handler. Called from ISM device interrupt handler.
- * Parameters are smcd device pointer,
+ * Parameters are ism device pointer,
* - event->type (0 --> DMB, 1 --> GID),
* - event->code (event code),
* - event->tok (either DMB token when event type 0, or GID when event type 1)
@@ -490,8 +474,9 @@ EXPORT_SYMBOL_GPL(smcd_free_dev);
* Context:
* - Function called in IRQ context from ISM device driver event handler.
*/
-void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
+static void smcd_handle_event(struct ism_dev *ism, struct ism_event *event)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
struct smc_ism_event_work *wrk;
if (smcd->going_away)
@@ -505,17 +490,18 @@ void smcd_handle_event(struct smcd_dev *smcd, struct smcd_event *event)
wrk->event = *event;
queue_work(smcd->event_wq, &wrk->work);
}
-EXPORT_SYMBOL_GPL(smcd_handle_event);
/* SMCD Device interrupt handler. Called from ISM device interrupt handler.
- * Parameters are smcd device pointer, DMB number, and the DMBE bitmask.
+ * Parameters are the ism device pointer, DMB number, and the DMBE bitmask.
* Find the connection and schedule the tasklet for this connection.
*
* Context:
* - Function called in IRQ context from ISM device driver IRQ handler.
*/
-void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask)
+static void smcd_handle_irq(struct ism_dev *ism, unsigned int dmbno,
+ u16 dmbemask)
{
+ struct smcd_dev *smcd = ism_get_priv(ism, &smc_ism_client);
struct smc_connection *conn = NULL;
unsigned long flags;
@@ -525,10 +511,44 @@ void smcd_handle_irq(struct smcd_dev *smcd, unsigned int dmbno, u16 dmbemask)
tasklet_schedule(&conn->rx_tsklet);
spin_unlock_irqrestore(&smcd->lock, flags);
}
-EXPORT_SYMBOL_GPL(smcd_handle_irq);
+#endif
+
+int smc_ism_signal_shutdown(struct smc_link_group *lgr)
+{
+ int rc = 0;
+#if IS_ENABLED(CONFIG_ISM)
+ union smcd_sw_event_info ev_info;
+
+ if (lgr->peer_shutdown)
+ return 0;
+
+ memcpy(ev_info.uid, lgr->id, SMC_LGR_ID_SIZE);
+ ev_info.vlan_id = lgr->vlan_id;
+ ev_info.code = ISM_EVENT_REQUEST;
+ rc = lgr->smcd->ops->signal_event(lgr->smcd, lgr->peer_gid,
+ ISM_EVENT_REQUEST_IR,
+ ISM_EVENT_CODE_SHUTDOWN,
+ ev_info.info);
+#endif
+ return rc;
+}
-void __init smc_ism_init(void)
+int smc_ism_init(void)
{
+ int rc = 0;
+
+#if IS_ENABLED(CONFIG_ISM)
smc_ism_v2_capable = false;
memset(smc_ism_v2_system_eid, 0, SMC_MAX_EID_LEN);
+
+ rc = ism_register_client(&smc_ism_client);
+#endif
+ return rc;
+}
+
+void smc_ism_exit(void)
+{
+#if IS_ENABLED(CONFIG_ISM)
+ ism_unregister_client(&smc_ism_client);
+#endif
}
diff --git a/net/smc/smc_ism.h b/net/smc/smc_ism.h
index d6b2db604fe8..832b2f42d79f 100644
--- a/net/smc/smc_ism.h
+++ b/net/smc/smc_ism.h
@@ -42,7 +42,8 @@ int smc_ism_signal_shutdown(struct smc_link_group *lgr);
void smc_ism_get_system_eid(u8 **eid);
u16 smc_ism_get_chid(struct smcd_dev *dev);
bool smc_ism_is_v2_capable(void);
-void smc_ism_init(void);
+int smc_ism_init(void);
+void smc_ism_exit(void);
int smcd_nl_get_device(struct sk_buff *skb, struct netlink_callback *cb);
static inline int smc_ism_write(struct smcd_dev *smcd, u64 dmb_tok,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 524649d0ab65..a0840b8c935b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -608,7 +608,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
prim_lnk_idx = link->link_idx;
lnk_idx = link_new->link_idx;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
ext->num_rkeys = lgr->conns_num;
if (!ext->num_rkeys)
goto out;
@@ -628,7 +628,7 @@ static int smc_llc_fill_ext_v2(struct smc_llc_msg_add_link_v2_ext *ext,
}
len += i * sizeof(ext->rt[0]);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return len;
}
@@ -889,7 +889,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -916,7 +916,7 @@ static int smc_llc_cli_rkey_exchange(struct smc_link *link,
break;
} while (num_rkeys_send || num_rkeys_recv);
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -999,14 +999,14 @@ static void smc_llc_save_add_link_rkeys(struct smc_link *link,
ext = (struct smc_llc_msg_add_link_v2_ext *)((u8 *)lgr->wr_rx_buf_v2 +
SMC_WR_TX_SIZE);
max = min_t(u8, ext->num_rkeys, SMC_LLC_RKEYS_PER_MSG_V2);
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
for (i = 0; i < max; i++) {
smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
ext->rt[i].rmb_key,
ext->rt[i].rmb_vaddr_new,
ext->rt[i].rmb_key_new);
}
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
}
static void smc_llc_save_add_link_info(struct smc_link *link,
@@ -1202,12 +1202,12 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
if (smc_llc_is_local_add_link(&qentry->msg))
smc_llc_cli_add_link_invite(qentry->link, qentry);
else
smc_llc_cli_add_link(qentry->link, qentry);
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
}
static int smc_llc_active_link_count(struct smc_link_group *lgr)
@@ -1313,7 +1313,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
int rc = 0;
int i;
- mutex_lock(&lgr->rmbs_lock);
+ down_write(&lgr->rmbs_lock);
num_rkeys_send = lgr->conns_num;
buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
do {
@@ -1338,7 +1338,7 @@ static int smc_llc_srv_rkey_exchange(struct smc_link *link,
smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
} while (num_rkeys_send || num_rkeys_recv);
out:
- mutex_unlock(&lgr->rmbs_lock);
+ up_write(&lgr->rmbs_lock);
return rc;
}
@@ -1509,13 +1509,13 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
rc = smc_llc_srv_add_link(link, qentry);
if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
/* delete any asymmetric link */
smc_llc_delete_asym_link(lgr);
}
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -1582,7 +1582,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
goto out;
}
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
/* delete single link */
for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
@@ -1616,7 +1616,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
smc_lgr_terminate_sched(lgr);
}
out_unlock:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
out:
kfree(qentry);
}
@@ -1652,7 +1652,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
int active_links;
int i;
- mutex_lock(&lgr->llc_conf_mutex);
+ down_write(&lgr->llc_conf_mutex);
qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
lnk = qentry->link;
del_llc = &qentry->msg.delete_link;
@@ -1708,7 +1708,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
smc_llc_add_link_local(lnk);
}
out:
- mutex_unlock(&lgr->llc_conf_mutex);
+ up_write(&lgr->llc_conf_mutex);
kfree(qentry);
}
@@ -2126,7 +2126,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
spin_lock_init(&lgr->llc_flow_lock);
init_waitqueue_head(&lgr->llc_flow_waiter);
init_waitqueue_head(&lgr->llc_msg_waiter);
- mutex_init(&lgr->llc_conf_mutex);
+ init_rwsem(&lgr->llc_conf_mutex);
lgr->llc_testlink_time = READ_ONCE(net->smc.sysctl_smcr_testlink_time);
}
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 25fb2fd186e2..11775401df68 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -103,7 +103,7 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
struct smc_pnetentry *pnetelem, *tmp_pe;
struct smc_pnettable *pnettable;
struct smc_ib_device *ibdev;
- struct smcd_dev *smcd_dev;
+ struct smcd_dev *smcd;
struct smc_net *sn;
int rc = -ENOENT;
int ibport;
@@ -162,16 +162,17 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
mutex_unlock(&smc_ib_devices.mutex);
/* remove smcd devices */
mutex_lock(&smcd_dev_list.mutex);
- list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (smcd_dev->pnetid_by_user &&
+ list_for_each_entry(smcd, &smcd_dev_list.list, list) {
+ if (smcd->pnetid_by_user &&
(!pnet_name ||
- smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+ smc_pnet_match(pnet_name, smcd->pnetid))) {
pr_warn_ratelimited("smc: smcd device %s "
"erased user defined pnetid "
- "%.16s\n", dev_name(&smcd_dev->dev),
- smcd_dev->pnetid);
- memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
- smcd_dev->pnetid_by_user = false;
+ "%.16s\n",
+ dev_name(smcd->ops->get_dev(smcd)),
+ smcd->pnetid);
+ memset(smcd->pnetid, 0, SMC_MAX_PNETID_LEN);
+ smcd->pnetid_by_user = false;
rc = 0;
}
}
@@ -331,8 +332,8 @@ static struct smcd_dev *smc_pnet_find_smcd(char *smcd_name)
mutex_lock(&smcd_dev_list.mutex);
list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
- if (!strncmp(dev_name(&smcd_dev->dev), smcd_name,
- IB_DEVICE_NAME_MAX - 1))
+ if (!strncmp(dev_name(smcd_dev->ops->get_dev(smcd_dev)),
+ smcd_name, IB_DEVICE_NAME_MAX - 1))
goto out;
}
smcd_dev = NULL;
@@ -411,7 +412,8 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
struct smc_ib_device *ib_dev;
bool smcddev_applied = true;
bool ibdev_applied = true;
- struct smcd_dev *smcd_dev;
+ struct smcd_dev *smcd;
+ struct device *dev;
bool new_ibdev;
/* try to apply the pnetid to active devices */
@@ -425,14 +427,16 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
ib_port,
ib_dev->pnetid[ib_port - 1]);
}
- smcd_dev = smc_pnet_find_smcd(ib_name);
- if (smcd_dev) {
- smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
- if (smcddev_applied)
+ smcd = smc_pnet_find_smcd(ib_name);
+ if (smcd) {
+ smcddev_applied = smc_pnet_apply_smcd(smcd, pnet_name);
+ if (smcddev_applied) {
+ dev = smcd->ops->get_dev(smcd);
pr_warn_ratelimited("smc: smcd device %s "
"applied user defined pnetid "
- "%.16s\n", dev_name(&smcd_dev->dev),
- smcd_dev->pnetid);
+ "%.16s\n", dev_name(dev),
+ smcd->pnetid);
+ }
}
/* Apply fails when a device has a hardware-defined pnetid set, do not
* add a pnet table entry in that case.
@@ -1181,7 +1185,7 @@ int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
*/
int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
{
- const char *ib_name = dev_name(&smcddev->dev);
+ const char *ib_name = dev_name(smcddev->ops->get_dev(smcddev));
struct smc_pnettable *pnettable;
struct smc_pnetentry *tmp_pe;
struct smc_net *sn;
diff --git a/net/smc/smc_rx.c b/net/smc/smc_rx.c
index 17c5aee7ee4f..4380d32f5a5f 100644
--- a/net/smc/smc_rx.c
+++ b/net/smc/smc_rx.c
@@ -13,8 +13,10 @@
#include <linux/net.h>
#include <linux/rcupdate.h>
#include <linux/sched/signal.h>
+#include <linux/splice.h>
#include <net/sock.h>
+#include <trace/events/sock.h>
#include "smc.h"
#include "smc_core.h"
@@ -31,6 +33,8 @@ static void smc_rx_wake_up(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
/* derived from sock_def_readable() */
/* called already in smc_listen_work() */
rcu_read_lock();
diff --git a/net/socket.c b/net/socket.c
index 888cd618a968..6bae8ce7059e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -106,6 +106,7 @@
#include <net/busy_poll.h>
#include <linux/errqueue.h>
#include <linux/ptp_clock_kernel.h>
+#include <trace/events/sock.h>
#ifdef CONFIG_NET_RX_BUSY_POLL
unsigned int sysctl_net_busy_read __read_mostly;
@@ -385,7 +386,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
};
static int sockfs_security_xattr_set(const struct xattr_handler *handler,
- struct user_namespace *mnt_userns,
+ struct mnt_idmap *idmap,
struct dentry *dentry, struct inode *inode,
const char *suffix, const void *value,
size_t size, int flags)
@@ -589,10 +590,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
return used;
}
-static int sockfs_setattr(struct user_namespace *mnt_userns,
+static int sockfs_setattr(struct mnt_idmap *idmap,
struct dentry *dentry, struct iattr *iattr)
{
- int err = simple_setattr(&init_user_ns, dentry, iattr);
+ int err = simple_setattr(&nop_mnt_idmap, dentry, iattr);
if (!err && (iattr->ia_valid & ATTR_UID)) {
struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -709,12 +710,22 @@ INDIRECT_CALLABLE_DECLARE(int inet_sendmsg(struct socket *, struct msghdr *,
size_t));
INDIRECT_CALLABLE_DECLARE(int inet6_sendmsg(struct socket *, struct msghdr *,
size_t));
+
+static noinline void call_trace_sock_send_length(struct sock *sk, int ret,
+ int flags)
+{
+ trace_sock_send_length(sk, ret, 0);
+}
+
static inline int sock_sendmsg_nosec(struct socket *sock, struct msghdr *msg)
{
int ret = INDIRECT_CALL_INET(sock->ops->sendmsg, inet6_sendmsg,
inet_sendmsg, sock, msg,
msg_data_left(msg));
BUG_ON(ret == -EIOCBQUEUED);
+
+ if (trace_sock_send_length_enabled())
+ call_trace_sock_send_length(sock->sk, ret, 0);
return ret;
}
@@ -971,9 +982,12 @@ static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk,
static void sock_recv_mark(struct msghdr *msg, struct sock *sk,
struct sk_buff *skb)
{
- if (sock_flag(sk, SOCK_RCVMARK) && skb)
- put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32),
- &skb->mark);
+ if (sock_flag(sk, SOCK_RCVMARK) && skb) {
+ /* We must use a bounce buffer for CONFIG_HARDENED_USERCOPY=y */
+ __u32 mark = skb->mark;
+
+ put_cmsg(msg, SOL_SOCKET, SO_MARK, sizeof(__u32), &mark);
+ }
}
void __sock_recv_cmsgs(struct msghdr *msg, struct sock *sk,
@@ -989,12 +1003,21 @@ INDIRECT_CALLABLE_DECLARE(int inet_recvmsg(struct socket *, struct msghdr *,
size_t, int));
INDIRECT_CALLABLE_DECLARE(int inet6_recvmsg(struct socket *, struct msghdr *,
size_t, int));
+
+static noinline void call_trace_sock_recv_length(struct sock *sk, int ret, int flags)
+{
+ trace_sock_recv_length(sk, ret, flags);
+}
+
static inline int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg,
int flags)
{
- return INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
- inet_recvmsg, sock, msg, msg_data_left(msg),
- flags);
+ int ret = INDIRECT_CALL_INET(sock->ops->recvmsg, inet6_recvmsg,
+ inet_recvmsg, sock, msg,
+ msg_data_left(msg), flags);
+ if (trace_sock_recv_length_enabled())
+ call_trace_sock_recv_length(sock->sk, ret, flags);
+ return ret;
}
/**
@@ -1044,6 +1067,7 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
{
struct socket *sock;
int flags;
+ int ret;
sock = file->private_data;
@@ -1051,7 +1075,11 @@ static ssize_t sock_sendpage(struct file *file, struct page *page,
/* more is a combination of MSG_MORE and MSG_SENDPAGE_NOTLAST */
flags |= more;
- return kernel_sendpage(sock, page, offset, size, flags);
+ ret = kernel_sendpage(sock, page, offset, size, flags);
+
+ if (trace_sock_send_length_enabled())
+ call_trace_sock_send_length(sock->sk, ret, 0);
+ return ret;
}
static ssize_t sock_splice_read(struct file *file, loff_t *ppos,
diff --git a/net/sunrpc/.kunitconfig b/net/sunrpc/.kunitconfig
new file mode 100644
index 000000000000..a55a00fa649b
--- /dev/null
+++ b/net/sunrpc/.kunitconfig
@@ -0,0 +1,30 @@
+CONFIG_KUNIT=y
+CONFIG_UBSAN=y
+CONFIG_STACKTRACE=y
+CONFIG_NET=y
+CONFIG_NETWORK_FILESYSTEMS=y
+CONFIG_INET=y
+CONFIG_FILE_LOCKING=y
+CONFIG_MULTIUSER=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_CBC=y
+CONFIG_CRYPTO_CTS=y
+CONFIG_CRYPTO_ECB=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_CMAC=y
+CONFIG_CRYPTO_MD5=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_SHA512=y
+CONFIG_CRYPTO_DES=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_CAMELLIA=y
+CONFIG_NFS_FS=y
+CONFIG_SUNRPC=y
+CONFIG_SUNRPC_GSS=y
+CONFIG_RPCSEC_GSS_KRB5=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA=y
+CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2=y
+CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST=y
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index bbbb5af0af13..4afc5fd71d44 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -19,10 +19,10 @@ config SUNRPC_SWAP
config RPCSEC_GSS_KRB5
tristate "Secure RPC: Kerberos V mechanism"
depends on SUNRPC && CRYPTO
- depends on CRYPTO_MD5 && CRYPTO_DES && CRYPTO_CBC && CRYPTO_CTS
- depends on CRYPTO_ECB && CRYPTO_HMAC && CRYPTO_SHA1 && CRYPTO_AES
default y
select SUNRPC_GSS
+ select CRYPTO_SKCIPHER
+ select CRYPTO_HASH
help
Choose Y here to enable Secure RPC using the Kerberos version 5
GSS-API mechanism (RFC 1964).
@@ -34,21 +34,93 @@ config RPCSEC_GSS_KRB5
If unsure, say Y.
-config SUNRPC_DISABLE_INSECURE_ENCTYPES
- bool "Secure RPC: Disable insecure Kerberos encryption types"
+config RPCSEC_GSS_KRB5_SIMPLIFIED
+ bool
+ depends on RPCSEC_GSS_KRB5
+
+config RPCSEC_GSS_KRB5_CRYPTOSYSTEM
+ bool
+ depends on RPCSEC_GSS_KRB5
+
+config RPCSEC_GSS_KRB5_ENCTYPES_DES
+ bool "Enable Kerberos enctypes based on DES (deprecated)"
+ depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_ECB
+ depends on CRYPTO_HMAC && CRYPTO_MD5 && CRYPTO_SHA1
+ depends on CRYPTO_DES
+ default n
+ select RPCSEC_GSS_KRB5_SIMPLIFIED
+ help
+ Choose Y to enable the use of deprecated Kerberos 5
+ encryption types that utilize Data Encryption Standard
+ (DES) based ciphers. These include des-cbc-md5,
+ des-cbc-crc, and des-cbc-md4, which were deprecated by
+ RFC 6649, and des3-cbc-sha1, which was deprecated by RFC
+ 8429.
+
+ These encryption types are known to be insecure, therefore
+ the default setting of this option is N. Support for these
+ encryption types is available only for compatibility with
+ legacy NFS client and server implementations.
+
+ Removal of support is planned for a subsequent kernel
+ release.
+
+config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1
+ bool "Enable Kerberos enctypes based on AES and SHA-1"
depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS
+ depends on CRYPTO_HMAC && CRYPTO_SHA1
+ depends on CRYPTO_AES
+ default y
+ select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
+ help
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Advanced Encryption Standard (AES) ciphers and
+ SHA-1 digests. These include aes128-cts-hmac-sha1-96 and
+ aes256-cts-hmac-sha1-96.
+
+config RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA
+ bool "Enable Kerberos encryption types based on Camellia and CMAC"
+ depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS && CRYPTO_CAMELLIA
+ depends on CRYPTO_CMAC
+ default n
+ select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
+ help
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Camellia ciphers (RFC 3713) and CMAC digests
+ (NIST Special Publication 800-38B). These include
+ camellia128-cts-cmac and camellia256-cts-cmac.
+
+config RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2
+ bool "Enable Kerberos enctypes based on AES and SHA-2"
+ depends on RPCSEC_GSS_KRB5
+ depends on CRYPTO_CBC && CRYPTO_CTS
+ depends on CRYPTO_HMAC && CRYPTO_SHA256 && CRYPTO_SHA512
+ depends on CRYPTO_AES
default n
+ select RPCSEC_GSS_KRB5_CRYPTOSYSTEM
+ help
+ Choose Y to enable the use of Kerberos 5 encryption types
+ that utilize Advanced Encryption Standard (AES) ciphers and
+ SHA-2 digests. These include aes128-cts-hmac-sha256-128 and
+ aes256-cts-hmac-sha384-192.
+
+config RPCSEC_GSS_KRB5_KUNIT_TEST
+ tristate "KUnit tests for RPCSEC GSS Kerberos" if !KUNIT_ALL_TESTS
+ depends on RPCSEC_GSS_KRB5 && KUNIT
+ default KUNIT_ALL_TESTS
help
- Choose Y here to disable the use of deprecated encryption types
- with the Kerberos version 5 GSS-API mechanism (RFC 1964). The
- deprecated encryption types include DES-CBC-MD5, DES-CBC-CRC,
- and DES-CBC-MD4. These types were deprecated by RFC 6649 because
- they were found to be insecure.
-
- N is the default because many sites have deployed KDCs and
- keytabs that contain only these deprecated encryption types.
- Choosing Y prevents the use of known-insecure encryption types
- but might result in compatibility problems.
+ This builds the KUnit tests for RPCSEC GSS Kerberos 5.
+
+ KUnit tests run during boot and output the results to the debug
+ log in TAP format (https://testanything.org/). Only useful for
+ kernel devs running KUnit test harness and are not for inclusion
+ into a production build.
+
+ For more information on KUnit and unit tests in general, refer
+ to the KUnit documentation in Documentation/dev-tools/kunit/.
config SUNRPC_DEBUG
bool "RPC: Enable dprintk debugging"
diff --git a/net/sunrpc/auth_gss/Makefile b/net/sunrpc/auth_gss/Makefile
index 4a29f4c5dac4..012ae1720689 100644
--- a/net/sunrpc/auth_gss/Makefile
+++ b/net/sunrpc/auth_gss/Makefile
@@ -13,3 +13,5 @@ obj-$(CONFIG_RPCSEC_GSS_KRB5) += rpcsec_gss_krb5.o
rpcsec_gss_krb5-y := gss_krb5_mech.o gss_krb5_seal.o gss_krb5_unseal.o \
gss_krb5_seqnum.o gss_krb5_wrap.o gss_krb5_crypto.o gss_krb5_keys.o
+
+obj-$(CONFIG_RPCSEC_GSS_KRB5_KUNIT_TEST) += gss_krb5_test.o
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 7bb247c51e2f..1af71fbb0d80 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -49,6 +49,22 @@ static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+/*
+ * This compile-time check verifies that we will not exceed the
+ * slack space allotted by the client and server auth_gss code
+ * before they call gss_wrap().
+ */
+#define GSS_KRB5_MAX_SLACK_NEEDED \
+ (GSS_KRB5_TOK_HDR_LEN /* gss token header */ \
+ + GSS_KRB5_MAX_CKSUM_LEN /* gss token checksum */ \
+ + GSS_KRB5_MAX_BLOCKSIZE /* confounder */ \
+ + GSS_KRB5_MAX_BLOCKSIZE /* possible padding */ \
+ + GSS_KRB5_TOK_HDR_LEN /* encrypted hdr in v2 token */ \
+ + GSS_KRB5_MAX_CKSUM_LEN /* encryption hmac */ \
+ + XDR_UNIT * 2 /* RPC verifier */ \
+ + GSS_KRB5_TOK_HDR_LEN \
+ + GSS_KRB5_MAX_CKSUM_LEN)
+
#define GSS_CRED_SLACK (RPC_MAX_AUTH_SIZE * 2)
/* length of a krb5 verifier (48), plus data added before arguments when
* using integrity (two 4-byte integers): */
@@ -302,7 +318,7 @@ __gss_find_upcall(struct rpc_pipe *pipe, kuid_t uid, const struct gss_auth *auth
list_for_each_entry(pos, &pipe->in_downcall, list) {
if (!uid_eq(pos->uid, uid))
continue;
- if (auth && pos->auth->service != auth->service)
+ if (pos->auth->service != auth->service)
continue;
refcount_inc(&pos->count);
return pos;
@@ -686,6 +702,21 @@ out:
return err;
}
+static struct gss_upcall_msg *
+gss_find_downcall(struct rpc_pipe *pipe, kuid_t uid)
+{
+ struct gss_upcall_msg *pos;
+ list_for_each_entry(pos, &pipe->in_downcall, list) {
+ if (!uid_eq(pos->uid, uid))
+ continue;
+ if (!rpc_msg_is_inflight(&pos->msg))
+ continue;
+ refcount_inc(&pos->count);
+ return pos;
+ }
+ return NULL;
+}
+
#define MSG_BUF_MAXSIZE 1024
static ssize_t
@@ -732,7 +763,7 @@ gss_pipe_downcall(struct file *filp, const char __user *src, size_t mlen)
err = -ENOENT;
/* Find a matching upcall */
spin_lock(&pipe->lock);
- gss_msg = __gss_find_upcall(pipe, uid, NULL);
+ gss_msg = gss_find_downcall(pipe, uid);
if (gss_msg == NULL) {
spin_unlock(&pipe->lock);
goto err_put_ctx;
@@ -1027,6 +1058,7 @@ gss_create_new(const struct rpc_auth_create_args *args, struct rpc_clnt *clnt)
goto err_put_mech;
auth = &gss_auth->rpc_auth;
auth->au_cslack = GSS_CRED_SLACK >> 2;
+ BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
auth->au_rslack = GSS_KRB5_MAX_SLACK_NEEDED >> 2;
auth->au_verfsize = GSS_VERF_SLACK >> 2;
auth->au_ralign = GSS_VERF_SLACK >> 2;
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index 3ea58175e159..6c7c52eeed4f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -46,11 +46,59 @@
#include <linux/random.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
+#include <kunit/visibility.h>
+
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+/**
+ * krb5_make_confounder - Generate a confounder string
+ * @p: memory location into which to write the string
+ * @conflen: string length to write, in octets
+ *
+ * RFCs 1964 and 3961 mention only "a random confounder" without going
+ * into detail about its function or cryptographic requirements. The
+ * assumed purpose is to prevent repeated encryption of a plaintext with
+ * the same key from generating the same ciphertext. It is also used to
+ * pad minimum plaintext length to at least a single cipher block.
+ *
+ * However, in situations like the GSS Kerberos 5 mechanism, where the
+ * encryption IV is always all zeroes, the confounder also effectively
+ * functions like an IV. Thus, not only must it be unique from message
+ * to message, but it must also be difficult to predict. Otherwise an
+ * attacker can correlate the confounder to previous or future values,
+ * making the encryption easier to break.
+ *
+ * Given that the primary consumer of this encryption mechanism is a
+ * network storage protocol, a type of traffic that often carries
+ * predictable payloads (eg, all zeroes when reading unallocated blocks
+ * from a file), our confounder generation has to be cryptographically
+ * strong.
+ */
+void krb5_make_confounder(u8 *p, int conflen)
+{
+ get_random_bytes(p, conflen);
+}
+
+/**
+ * krb5_encrypt - simple encryption of an RPCSEC GSS payload
+ * @tfm: initialized cipher transform
+ * @iv: pointer to an IV
+ * @in: plaintext to encrypt
+ * @out: OUT: ciphertext
+ * @length: length of input and output buffers, in bytes
+ *
+ * @iv may be NULL to force the use of an all-zero IV.
+ * The buffer containing the IV must be as large as the
+ * cipher's ivsize.
+ *
+ * Return values:
+ * %0: @in successfully encrypted into @out
+ * negative errno: @in not encrypted
+ */
u32
krb5_encrypt(
struct crypto_sync_skcipher *tfm,
@@ -90,6 +138,22 @@ out:
return ret;
}
+/**
+ * krb5_decrypt - simple decryption of an RPCSEC GSS payload
+ * @tfm: initialized cipher transform
+ * @iv: pointer to an IV
+ * @in: ciphertext to decrypt
+ * @out: OUT: plaintext
+ * @length: length of input and output buffers, in bytes
+ *
+ * @iv may be NULL to force the use of an all-zero IV.
+ * The buffer containing the IV must be as large as the
+ * cipher's ivsize.
+ *
+ * Return values:
+ * %0: @in successfully decrypted into @out
+ * negative errno: @in not decrypted
+ */
u32
krb5_decrypt(
struct crypto_sync_skcipher *tfm,
@@ -203,8 +267,8 @@ make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
switch (kctx->gk5e->ctype) {
case CKSUMTYPE_RSA_MD5:
- err = kctx->gk5e->encrypt(kctx->seq, NULL, checksumdata,
- checksumdata, checksumlen);
+ err = krb5_encrypt(kctx->seq, NULL, checksumdata,
+ checksumdata, checksumlen);
if (err)
goto out;
memcpy(cksumout->data,
@@ -228,92 +292,76 @@ out_free_cksum:
return err ? GSS_S_FAILURE : 0;
}
-/*
- * checksum the plaintext data and hdrlen bytes of the token header
- * Per rfc4121, sec. 4.2.4, the checksum is performed over the data
- * body then over the first 16 octets of the MIC token
- * Inclusion of the header data in the calculation of the
- * checksum is optional.
+/**
+ * gss_krb5_checksum - Compute the MAC for a GSS Wrap or MIC token
+ * @tfm: an initialized hash transform
+ * @header: pointer to a buffer containing the token header, or NULL
+ * @hdrlen: number of octets in @header
+ * @body: xdr_buf containing an RPC message (body.len is the message length)
+ * @body_offset: byte offset into @body to start checksumming
+ * @cksumout: OUT: a buffer to be filled in with the computed HMAC
+ *
+ * Usually expressed as H = HMAC(K, message)[1..h] .
+ *
+ * Caller provides the truncation length of the output token (h) in
+ * cksumout.len.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Digest computed, @cksumout filled in
+ * %GSS_S_FAILURE: Call failed
*/
u32
-make_checksum_v2(struct krb5_ctx *kctx, char *header, int hdrlen,
- struct xdr_buf *body, int body_offset, u8 *cksumkey,
- unsigned int usage, struct xdr_netobj *cksumout)
+gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen,
+ const struct xdr_buf *body, int body_offset,
+ struct xdr_netobj *cksumout)
{
- struct crypto_ahash *tfm;
struct ahash_request *req;
- struct scatterlist sg[1];
- int err = -1;
+ int err = -ENOMEM;
u8 *checksumdata;
- if (kctx->gk5e->keyed_cksum == 0) {
- dprintk("%s: expected keyed hash for %s\n",
- __func__, kctx->gk5e->name);
- return GSS_S_FAILURE;
- }
- if (cksumkey == NULL) {
- dprintk("%s: no key supplied for %s\n",
- __func__, kctx->gk5e->name);
- return GSS_S_FAILURE;
- }
-
- checksumdata = kmalloc(GSS_KRB5_MAX_CKSUM_LEN, GFP_KERNEL);
+ checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
if (!checksumdata)
return GSS_S_FAILURE;
- tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(tfm))
- goto out_free_cksum;
-
req = ahash_request_alloc(tfm, GFP_KERNEL);
if (!req)
- goto out_free_ahash;
-
+ goto out_free_cksum;
ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
-
- err = crypto_ahash_setkey(tfm, cksumkey, kctx->gk5e->keylength);
- if (err)
- goto out;
-
err = crypto_ahash_init(req);
if (err)
- goto out;
+ goto out_free_ahash;
+
+ /*
+ * Per RFC 4121 Section 4.2.4, the checksum is performed over the
+ * data body first, then over the octets in "header".
+ */
err = xdr_process_buf(body, body_offset, body->len - body_offset,
checksummer, req);
if (err)
- goto out;
- if (header != NULL) {
+ goto out_free_ahash;
+ if (header) {
+ struct scatterlist sg[1];
+
sg_init_one(sg, header, hdrlen);
ahash_request_set_crypt(req, sg, NULL, hdrlen);
err = crypto_ahash_update(req);
if (err)
- goto out;
+ goto out_free_ahash;
}
+
ahash_request_set_crypt(req, NULL, checksumdata, 0);
err = crypto_ahash_final(req);
if (err)
- goto out;
-
- cksumout->len = kctx->gk5e->cksumlength;
+ goto out_free_ahash;
+ memcpy(cksumout->data, checksumdata, cksumout->len);
- switch (kctx->gk5e->ctype) {
- case CKSUMTYPE_HMAC_SHA1_96_AES128:
- case CKSUMTYPE_HMAC_SHA1_96_AES256:
- /* note that this truncates the hash */
- memcpy(cksumout->data, checksumdata, kctx->gk5e->cksumlength);
- break;
- default:
- BUG();
- break;
- }
-out:
- ahash_request_free(req);
out_free_ahash:
- crypto_free_ahash(tfm);
+ ahash_request_free(req);
out_free_cksum:
- kfree(checksumdata);
- return err ? GSS_S_FAILURE : 0;
+ kfree_sensitive(checksumdata);
+ return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
}
+EXPORT_SYMBOL_IF_KUNIT(gss_krb5_checksum);
struct encryptor_desc {
u8 iv[GSS_KRB5_MAX_BLOCKSIZE];
@@ -526,7 +574,6 @@ xdr_extend_head(struct xdr_buf *buf, unsigned int base, unsigned int shiftlen)
if (shiftlen == 0)
return 0;
- BUILD_BUG_ON(GSS_KRB5_MAX_SLACK_NEEDED > RPC_MAX_AUTH_SIZE);
BUG_ON(shiftlen > RPC_MAX_AUTH_SIZE);
p = buf->head[0].iov_base + base;
@@ -595,40 +642,157 @@ out:
return ret;
}
+/**
+ * krb5_cbc_cts_encrypt - encrypt in CBC mode with CTS
+ * @cts_tfm: CBC cipher with CTS
+ * @cbc_tfm: base CBC cipher
+ * @offset: starting byte offset for plaintext
+ * @buf: OUT: output buffer
+ * @pages: plaintext
+ * @iv: output CBC initialization vector, or NULL
+ * @ivsize: size of @iv, in octets
+ *
+ * To provide confidentiality, encrypt using cipher block chaining
+ * with ciphertext stealing. Message integrity is handled separately.
+ *
+ * Return values:
+ * %0: encryption successful
+ * negative errno: encryption could not be completed
+ */
+VISIBLE_IF_KUNIT
+int krb5_cbc_cts_encrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf, struct page **pages,
+ u8 *iv, unsigned int ivsize)
+{
+ u32 blocksize, nbytes, nblocks, cbcbytes;
+ struct encryptor_desc desc;
+ int err;
+
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+ nbytes = buf->len - offset;
+ nblocks = (nbytes + blocksize - 1) / blocksize;
+ cbcbytes = 0;
+ if (nblocks > 2)
+ cbcbytes = (nblocks - 2) * blocksize;
+
+ memset(desc.iv, 0, sizeof(desc.iv));
+
+ /* Handle block-sized chunks of plaintext with CBC. */
+ if (cbcbytes) {
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, cbc_tfm);
+
+ desc.pos = offset;
+ desc.fragno = 0;
+ desc.fraglen = 0;
+ desc.pages = pages;
+ desc.outbuf = buf;
+ desc.req = req;
+
+ skcipher_request_set_sync_tfm(req, cbc_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+
+ sg_init_table(desc.infrags, 4);
+ sg_init_table(desc.outfrags, 4);
+
+ err = xdr_process_buf(buf, offset, cbcbytes, encryptor, &desc);
+ skcipher_request_zero(req);
+ if (err)
+ return err;
+ }
+
+ /* Remaining plaintext is handled with CBC-CTS. */
+ err = gss_krb5_cts_crypt(cts_tfm, buf, offset + cbcbytes,
+ desc.iv, pages, 1);
+ if (err)
+ return err;
+
+ if (unlikely(iv))
+ memcpy(iv, desc.iv, ivsize);
+ return 0;
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_cbc_cts_encrypt);
+
+/**
+ * krb5_cbc_cts_decrypt - decrypt in CBC mode with CTS
+ * @cts_tfm: CBC cipher with CTS
+ * @cbc_tfm: base CBC cipher
+ * @offset: starting byte offset for plaintext
+ * @buf: OUT: output buffer
+ *
+ * Return values:
+ * %0: decryption successful
+ * negative errno: decryption could not be completed
+ */
+VISIBLE_IF_KUNIT
+int krb5_cbc_cts_decrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf)
+{
+ u32 blocksize, nblocks, cbcbytes;
+ struct decryptor_desc desc;
+ int err;
+
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+ nblocks = (buf->len + blocksize - 1) / blocksize;
+ cbcbytes = 0;
+ if (nblocks > 2)
+ cbcbytes = (nblocks - 2) * blocksize;
+
+ memset(desc.iv, 0, sizeof(desc.iv));
+
+ /* Handle block-sized chunks of plaintext with CBC. */
+ if (cbcbytes) {
+ SYNC_SKCIPHER_REQUEST_ON_STACK(req, cbc_tfm);
+
+ desc.fragno = 0;
+ desc.fraglen = 0;
+ desc.req = req;
+
+ skcipher_request_set_sync_tfm(req, cbc_tfm);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+
+ sg_init_table(desc.frags, 4);
+
+ err = xdr_process_buf(buf, 0, cbcbytes, decryptor, &desc);
+ skcipher_request_zero(req);
+ if (err)
+ return err;
+ }
+
+ /* Remaining plaintext is handled with CBC-CTS. */
+ return gss_krb5_cts_crypt(cts_tfm, buf, cbcbytes, desc.iv, NULL, 0);
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_cbc_cts_decrypt);
+
u32
gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
struct xdr_buf *buf, struct page **pages)
{
u32 err;
struct xdr_netobj hmac;
- u8 *cksumkey;
u8 *ecptr;
struct crypto_sync_skcipher *cipher, *aux_cipher;
- int blocksize;
+ struct crypto_ahash *ahash;
struct page **save_pages;
- int nblocks, nbytes;
- struct encryptor_desc desc;
- u32 cbcbytes;
- unsigned int usage;
+ unsigned int conflen;
if (kctx->initiate) {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
- cksumkey = kctx->initiator_integ;
- usage = KG_USAGE_INITIATOR_SEAL;
+ ahash = kctx->initiator_integ;
} else {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
- cksumkey = kctx->acceptor_integ;
- usage = KG_USAGE_ACCEPTOR_SEAL;
+ ahash = kctx->acceptor_integ;
}
- blocksize = crypto_sync_skcipher_blocksize(cipher);
+ conflen = crypto_sync_skcipher_blocksize(cipher);
/* hide the gss token header and insert the confounder */
offset += GSS_KRB5_TOK_HDR_LEN;
- if (xdr_extend_head(buf, offset, kctx->gk5e->conflen))
+ if (xdr_extend_head(buf, offset, conflen))
return GSS_S_FAILURE;
- gss_krb5_make_confounder(buf->head[0].iov_base + offset, kctx->gk5e->conflen);
+ krb5_make_confounder(buf->head[0].iov_base + offset, conflen);
offset -= GSS_KRB5_TOK_HDR_LEN;
if (buf->tail[0].iov_base != NULL) {
@@ -659,152 +823,322 @@ gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
save_pages = buf->pages;
buf->pages = pages;
- err = make_checksum_v2(kctx, NULL, 0, buf,
- offset + GSS_KRB5_TOK_HDR_LEN,
- cksumkey, usage, &hmac);
+ err = gss_krb5_checksum(ahash, NULL, 0, buf,
+ offset + GSS_KRB5_TOK_HDR_LEN, &hmac);
buf->pages = save_pages;
if (err)
return GSS_S_FAILURE;
- nbytes = buf->len - offset - GSS_KRB5_TOK_HDR_LEN;
- nblocks = (nbytes + blocksize - 1) / blocksize;
- cbcbytes = 0;
- if (nblocks > 2)
- cbcbytes = (nblocks - 2) * blocksize;
-
- memset(desc.iv, 0, sizeof(desc.iv));
-
- if (cbcbytes) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
-
- desc.pos = offset + GSS_KRB5_TOK_HDR_LEN;
- desc.fragno = 0;
- desc.fraglen = 0;
- desc.pages = pages;
- desc.outbuf = buf;
- desc.req = req;
-
- skcipher_request_set_sync_tfm(req, aux_cipher);
- skcipher_request_set_callback(req, 0, NULL, NULL);
-
- sg_init_table(desc.infrags, 4);
- sg_init_table(desc.outfrags, 4);
-
- err = xdr_process_buf(buf, offset + GSS_KRB5_TOK_HDR_LEN,
- cbcbytes, encryptor, &desc);
- skcipher_request_zero(req);
- if (err)
- goto out_err;
- }
-
- /* Make sure IV carries forward from any CBC results. */
- err = gss_krb5_cts_crypt(cipher, buf,
- offset + GSS_KRB5_TOK_HDR_LEN + cbcbytes,
- desc.iv, pages, 1);
- if (err) {
- err = GSS_S_FAILURE;
- goto out_err;
- }
+ err = krb5_cbc_cts_encrypt(cipher, aux_cipher,
+ offset + GSS_KRB5_TOK_HDR_LEN,
+ buf, pages, NULL, 0);
+ if (err)
+ return GSS_S_FAILURE;
/* Now update buf to account for HMAC */
buf->tail[0].iov_len += kctx->gk5e->cksumlength;
buf->len += kctx->gk5e->cksumlength;
-out_err:
- if (err)
- err = GSS_S_FAILURE;
- return err;
+ return GSS_S_COMPLETE;
}
u32
gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
{
- struct xdr_buf subbuf;
- u32 ret = 0;
- u8 *cksum_key;
struct crypto_sync_skcipher *cipher, *aux_cipher;
+ struct crypto_ahash *ahash;
struct xdr_netobj our_hmac_obj;
u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
- int nblocks, blocksize, cbcbytes;
- struct decryptor_desc desc;
- unsigned int usage;
+ struct xdr_buf subbuf;
+ u32 ret = 0;
if (kctx->initiate) {
cipher = kctx->acceptor_enc;
aux_cipher = kctx->acceptor_enc_aux;
- cksum_key = kctx->acceptor_integ;
- usage = KG_USAGE_ACCEPTOR_SEAL;
+ ahash = kctx->acceptor_integ;
} else {
cipher = kctx->initiator_enc;
aux_cipher = kctx->initiator_enc_aux;
- cksum_key = kctx->initiator_integ;
- usage = KG_USAGE_INITIATOR_SEAL;
+ ahash = kctx->initiator_integ;
}
- blocksize = crypto_sync_skcipher_blocksize(cipher);
-
/* create a segment skipping the header and leaving out the checksum */
xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
(len - offset - GSS_KRB5_TOK_HDR_LEN -
kctx->gk5e->cksumlength));
- nblocks = (subbuf.len + blocksize - 1) / blocksize;
+ ret = krb5_cbc_cts_decrypt(cipher, aux_cipher, 0, &subbuf);
+ if (ret)
+ goto out_err;
- cbcbytes = 0;
- if (nblocks > 2)
- cbcbytes = (nblocks - 2) * blocksize;
+ /* Calculate our hmac over the plaintext data */
+ our_hmac_obj.len = sizeof(our_hmac);
+ our_hmac_obj.data = our_hmac;
+ ret = gss_krb5_checksum(ahash, NULL, 0, &subbuf, 0, &our_hmac_obj);
+ if (ret)
+ goto out_err;
- memset(desc.iv, 0, sizeof(desc.iv));
+ /* Get the packet's hmac value */
+ ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
+ pkt_hmac, kctx->gk5e->cksumlength);
+ if (ret)
+ goto out_err;
- if (cbcbytes) {
- SYNC_SKCIPHER_REQUEST_ON_STACK(req, aux_cipher);
+ if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
+ ret = GSS_S_BAD_SIG;
+ goto out_err;
+ }
+ *headskip = crypto_sync_skcipher_blocksize(cipher);
+ *tailskip = kctx->gk5e->cksumlength;
+out_err:
+ if (ret && ret != GSS_S_BAD_SIG)
+ ret = GSS_S_FAILURE;
+ return ret;
+}
- desc.fragno = 0;
- desc.fraglen = 0;
- desc.req = req;
+/**
+ * krb5_etm_checksum - Compute a MAC for a GSS Wrap token
+ * @cipher: an initialized cipher transform
+ * @tfm: an initialized hash transform
+ * @body: xdr_buf containing an RPC message (body.len is the message length)
+ * @body_offset: byte offset into @body to start checksumming
+ * @cksumout: OUT: a buffer to be filled in with the computed HMAC
+ *
+ * Usually expressed as H = HMAC(K, IV | ciphertext)[1..h] .
+ *
+ * Caller provides the truncation length of the output token (h) in
+ * cksumout.len.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Digest computed, @cksumout filled in
+ * %GSS_S_FAILURE: Call failed
+ */
+VISIBLE_IF_KUNIT
+u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
+ struct crypto_ahash *tfm, const struct xdr_buf *body,
+ int body_offset, struct xdr_netobj *cksumout)
+{
+ unsigned int ivsize = crypto_sync_skcipher_ivsize(cipher);
+ struct ahash_request *req;
+ struct scatterlist sg[1];
+ u8 *iv, *checksumdata;
+ int err = -ENOMEM;
- skcipher_request_set_sync_tfm(req, aux_cipher);
- skcipher_request_set_callback(req, 0, NULL, NULL);
+ checksumdata = kmalloc(crypto_ahash_digestsize(tfm), GFP_KERNEL);
+ if (!checksumdata)
+ return GSS_S_FAILURE;
+ /* For RPCSEC, the "initial cipher state" is always all zeroes. */
+ iv = kzalloc(ivsize, GFP_KERNEL);
+ if (!iv)
+ goto out_free_mem;
- sg_init_table(desc.frags, 4);
+ req = ahash_request_alloc(tfm, GFP_KERNEL);
+ if (!req)
+ goto out_free_mem;
+ ahash_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+ err = crypto_ahash_init(req);
+ if (err)
+ goto out_free_ahash;
- ret = xdr_process_buf(&subbuf, 0, cbcbytes, decryptor, &desc);
- skcipher_request_zero(req);
- if (ret)
- goto out_err;
+ sg_init_one(sg, iv, ivsize);
+ ahash_request_set_crypt(req, sg, NULL, ivsize);
+ err = crypto_ahash_update(req);
+ if (err)
+ goto out_free_ahash;
+ err = xdr_process_buf(body, body_offset, body->len - body_offset,
+ checksummer, req);
+ if (err)
+ goto out_free_ahash;
+
+ ahash_request_set_crypt(req, NULL, checksumdata, 0);
+ err = crypto_ahash_final(req);
+ if (err)
+ goto out_free_ahash;
+ memcpy(cksumout->data, checksumdata, cksumout->len);
+
+out_free_ahash:
+ ahash_request_free(req);
+out_free_mem:
+ kfree(iv);
+ kfree_sensitive(checksumdata);
+ return err ? GSS_S_FAILURE : GSS_S_COMPLETE;
+}
+EXPORT_SYMBOL_IF_KUNIT(krb5_etm_checksum);
+
+/**
+ * krb5_etm_encrypt - Encrypt using the RFC 8009 rules
+ * @kctx: Kerberos context
+ * @offset: starting offset of the payload, in bytes
+ * @buf: OUT: send buffer to contain the encrypted payload
+ * @pages: plaintext payload
+ *
+ * The main difference with aes_encrypt is that "The HMAC is
+ * calculated over the cipher state concatenated with the AES
+ * output, instead of being calculated over the confounder and
+ * plaintext. This allows the message receiver to verify the
+ * integrity of the message before decrypting the message."
+ *
+ * RFC 8009 Section 5:
+ *
+ * encryption function: as follows, where E() is AES encryption in
+ * CBC-CS3 mode, and h is the size of truncated HMAC (128 bits or
+ * 192 bits as described above).
+ *
+ * N = random value of length 128 bits (the AES block size)
+ * IV = cipher state
+ * C = E(Ke, N | plaintext, IV)
+ * H = HMAC(Ki, IV | C)
+ * ciphertext = C | H[1..h]
+ *
+ * This encryption formula provides AEAD EtM with key separation.
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Encryption successful
+ * %GSS_S_FAILURE: Encryption failed
+ */
+u32
+krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages)
+{
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
+ struct crypto_ahash *ahash;
+ struct xdr_netobj hmac;
+ unsigned int conflen;
+ u8 *ecptr;
+ u32 err;
+
+ if (kctx->initiate) {
+ cipher = kctx->initiator_enc;
+ aux_cipher = kctx->initiator_enc_aux;
+ ahash = kctx->initiator_integ;
+ } else {
+ cipher = kctx->acceptor_enc;
+ aux_cipher = kctx->acceptor_enc_aux;
+ ahash = kctx->acceptor_integ;
}
+ conflen = crypto_sync_skcipher_blocksize(cipher);
- /* Make sure IV carries forward from any CBC results. */
- ret = gss_krb5_cts_crypt(cipher, &subbuf, cbcbytes, desc.iv, NULL, 0);
- if (ret)
+ offset += GSS_KRB5_TOK_HDR_LEN;
+ if (xdr_extend_head(buf, offset, conflen))
+ return GSS_S_FAILURE;
+ krb5_make_confounder(buf->head[0].iov_base + offset, conflen);
+ offset -= GSS_KRB5_TOK_HDR_LEN;
+
+ if (buf->tail[0].iov_base) {
+ ecptr = buf->tail[0].iov_base + buf->tail[0].iov_len;
+ } else {
+ buf->tail[0].iov_base = buf->head[0].iov_base
+ + buf->head[0].iov_len;
+ buf->tail[0].iov_len = 0;
+ ecptr = buf->tail[0].iov_base;
+ }
+
+ memcpy(ecptr, buf->head[0].iov_base + offset, GSS_KRB5_TOK_HDR_LEN);
+ buf->tail[0].iov_len += GSS_KRB5_TOK_HDR_LEN;
+ buf->len += GSS_KRB5_TOK_HDR_LEN;
+
+ err = krb5_cbc_cts_encrypt(cipher, aux_cipher,
+ offset + GSS_KRB5_TOK_HDR_LEN,
+ buf, pages, NULL, 0);
+ if (err)
+ return GSS_S_FAILURE;
+
+ hmac.data = buf->tail[0].iov_base + buf->tail[0].iov_len;
+ hmac.len = kctx->gk5e->cksumlength;
+ err = krb5_etm_checksum(cipher, ahash,
+ buf, offset + GSS_KRB5_TOK_HDR_LEN, &hmac);
+ if (err)
goto out_err;
+ buf->tail[0].iov_len += kctx->gk5e->cksumlength;
+ buf->len += kctx->gk5e->cksumlength;
+ return GSS_S_COMPLETE;
- /* Calculate our hmac over the plaintext data */
- our_hmac_obj.len = sizeof(our_hmac);
- our_hmac_obj.data = our_hmac;
+out_err:
+ return GSS_S_FAILURE;
+}
+
+/**
+ * krb5_etm_decrypt - Decrypt using the RFC 8009 rules
+ * @kctx: Kerberos context
+ * @offset: starting offset of the ciphertext, in bytes
+ * @len:
+ * @buf:
+ * @headskip: OUT: the enctype's confounder length, in octets
+ * @tailskip: OUT: the enctype's HMAC length, in octets
+ *
+ * RFC 8009 Section 5:
+ *
+ * decryption function: as follows, where D() is AES decryption in
+ * CBC-CS3 mode, and h is the size of truncated HMAC.
+ *
+ * (C, H) = ciphertext
+ * (Note: H is the last h bits of the ciphertext.)
+ * IV = cipher state
+ * if H != HMAC(Ki, IV | C)[1..h]
+ * stop, report error
+ * (N, P) = D(Ke, C, IV)
+ *
+ * Return values:
+ * %GSS_S_COMPLETE: Decryption successful
+ * %GSS_S_BAD_SIG: computed HMAC != received HMAC
+ * %GSS_S_FAILURE: Decryption failed
+ */
+u32
+krb5_etm_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip)
+{
+ struct crypto_sync_skcipher *cipher, *aux_cipher;
+ u8 our_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+ u8 pkt_hmac[GSS_KRB5_MAX_CKSUM_LEN];
+ struct xdr_netobj our_hmac_obj;
+ struct crypto_ahash *ahash;
+ struct xdr_buf subbuf;
+ u32 ret = 0;
- ret = make_checksum_v2(kctx, NULL, 0, &subbuf, 0,
- cksum_key, usage, &our_hmac_obj);
+ if (kctx->initiate) {
+ cipher = kctx->acceptor_enc;
+ aux_cipher = kctx->acceptor_enc_aux;
+ ahash = kctx->acceptor_integ;
+ } else {
+ cipher = kctx->initiator_enc;
+ aux_cipher = kctx->initiator_enc_aux;
+ ahash = kctx->initiator_integ;
+ }
+
+ /* Extract the ciphertext into @subbuf. */
+ xdr_buf_subsegment(buf, &subbuf, offset + GSS_KRB5_TOK_HDR_LEN,
+ (len - offset - GSS_KRB5_TOK_HDR_LEN -
+ kctx->gk5e->cksumlength));
+
+ our_hmac_obj.data = our_hmac;
+ our_hmac_obj.len = kctx->gk5e->cksumlength;
+ ret = krb5_etm_checksum(cipher, ahash, &subbuf, 0, &our_hmac_obj);
if (ret)
goto out_err;
-
- /* Get the packet's hmac value */
ret = read_bytes_from_xdr_buf(buf, len - kctx->gk5e->cksumlength,
pkt_hmac, kctx->gk5e->cksumlength);
if (ret)
goto out_err;
-
if (crypto_memneq(pkt_hmac, our_hmac, kctx->gk5e->cksumlength) != 0) {
ret = GSS_S_BAD_SIG;
goto out_err;
}
- *headskip = kctx->gk5e->conflen;
+
+ ret = krb5_cbc_cts_decrypt(cipher, aux_cipher, 0, &subbuf);
+ if (ret) {
+ ret = GSS_S_FAILURE;
+ goto out_err;
+ }
+
+ *headskip = crypto_sync_skcipher_blocksize(cipher);
*tailskip = kctx->gk5e->cksumlength;
+ return GSS_S_COMPLETE;
+
out_err:
- if (ret && ret != GSS_S_BAD_SIG)
+ if (ret != GSS_S_BAD_SIG)
ret = GSS_S_FAILURE;
return ret;
}
diff --git a/net/sunrpc/auth_gss/gss_krb5_internal.h b/net/sunrpc/auth_gss/gss_krb5_internal.h
new file mode 100644
index 000000000000..b673e2626acb
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_internal.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 or BSD-3-Clause */
+/*
+ * SunRPC GSS Kerberos 5 mechanism internal definitions
+ *
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ */
+
+#ifndef _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H
+#define _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H
+
+/*
+ * The RFCs often specify payload lengths in bits. This helper
+ * converts a specified bit-length to the number of octets/bytes.
+ */
+#define BITS2OCTETS(x) ((x) / 8)
+
+struct krb5_ctx;
+
+struct gss_krb5_enctype {
+ const u32 etype; /* encryption (key) type */
+ const u32 ctype; /* checksum type */
+ const char *name; /* "friendly" name */
+ const char *encrypt_name; /* crypto encrypt name */
+ const char *aux_cipher; /* aux encrypt cipher name */
+ const char *cksum_name; /* crypto checksum name */
+ const u16 signalg; /* signing algorithm */
+ const u16 sealalg; /* sealing algorithm */
+ const u32 cksumlength; /* checksum length */
+ const u32 keyed_cksum; /* is it a keyed cksum? */
+ const u32 keybytes; /* raw key len, in bytes */
+ const u32 keylength; /* protocol key length, in octets */
+ const u32 Kc_length; /* checksum subkey length, in octets */
+ const u32 Ke_length; /* encryption subkey length, in octets */
+ const u32 Ki_length; /* integrity subkey length, in octets */
+
+ int (*import_ctx)(struct krb5_ctx *ctx, gfp_t gfp_mask);
+ int (*derive_key)(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *in,
+ struct xdr_netobj *out,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask);
+ u32 (*encrypt)(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages);
+ u32 (*decrypt)(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip);
+ u32 (*get_mic)(struct krb5_ctx *kctx, struct xdr_buf *text,
+ struct xdr_netobj *token);
+ u32 (*verify_mic)(struct krb5_ctx *kctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token);
+ u32 (*wrap)(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages);
+ u32 (*unwrap)(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align);
+};
+
+/* krb5_ctx flags definitions */
+#define KRB5_CTX_FLAG_INITIATOR 0x00000001
+#define KRB5_CTX_FLAG_ACCEPTOR_SUBKEY 0x00000004
+
+struct krb5_ctx {
+ int initiate; /* 1 = initiating, 0 = accepting */
+ u32 enctype;
+ u32 flags;
+ const struct gss_krb5_enctype *gk5e; /* enctype-specific info */
+ struct crypto_sync_skcipher *enc;
+ struct crypto_sync_skcipher *seq;
+ struct crypto_sync_skcipher *acceptor_enc;
+ struct crypto_sync_skcipher *initiator_enc;
+ struct crypto_sync_skcipher *acceptor_enc_aux;
+ struct crypto_sync_skcipher *initiator_enc_aux;
+ struct crypto_ahash *acceptor_sign;
+ struct crypto_ahash *initiator_sign;
+ struct crypto_ahash *initiator_integ;
+ struct crypto_ahash *acceptor_integ;
+ u8 Ksess[GSS_KRB5_MAX_KEYLEN]; /* session key */
+ u8 cksum[GSS_KRB5_MAX_KEYLEN];
+ atomic_t seq_send;
+ atomic64_t seq_send64;
+ time64_t endtime;
+ struct xdr_netobj mech_used;
+};
+
+/*
+ * GSS Kerberos 5 mechanism Per-Message calls.
+ */
+
+u32 gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token);
+u32 gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token);
+
+u32 gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token);
+u32 gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token);
+
+u32 gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages);
+u32 gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages);
+
+u32 gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align);
+u32 gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align);
+
+/*
+ * Implementation internal functions
+ */
+
+/* Key Derivation Functions */
+
+int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask);
+
+int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask);
+
+int krb5_kdf_hmac_sha2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask);
+
+int krb5_kdf_feedback_cmac(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *in_constant,
+ gfp_t gfp_mask);
+
+/**
+ * krb5_derive_key - Derive a subkey from a protocol key
+ * @kctx: Kerberos 5 context
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @usage: key usage value
+ * @seed: key usage seed (one octet)
+ * @gfp_mask: memory allocation control flags
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+static inline int krb5_derive_key(struct krb5_ctx *kctx,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ u32 usage, u8 seed, gfp_t gfp_mask)
+{
+ const struct gss_krb5_enctype *gk5e = kctx->gk5e;
+ u8 label_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj label = {
+ .len = sizeof(label_data),
+ .data = label_data,
+ };
+ __be32 *p = (__be32 *)label_data;
+
+ *p = cpu_to_be32(usage);
+ label_data[4] = seed;
+ return gk5e->derive_key(gk5e, inkey, outkey, &label, gfp_mask);
+}
+
+s32 krb5_make_seq_num(struct krb5_ctx *kctx, struct crypto_sync_skcipher *key,
+ int direction, u32 seqnum, unsigned char *cksum,
+ unsigned char *buf);
+
+s32 krb5_get_seq_num(struct krb5_ctx *kctx, unsigned char *cksum,
+ unsigned char *buf, int *direction, u32 *seqnum);
+
+void krb5_make_confounder(u8 *p, int conflen);
+
+u32 make_checksum(struct krb5_ctx *kctx, char *header, int hdrlen,
+ struct xdr_buf *body, int body_offset, u8 *cksumkey,
+ unsigned int usage, struct xdr_netobj *cksumout);
+
+u32 gss_krb5_checksum(struct crypto_ahash *tfm, char *header, int hdrlen,
+ const struct xdr_buf *body, int body_offset,
+ struct xdr_netobj *cksumout);
+
+u32 krb5_encrypt(struct crypto_sync_skcipher *key, void *iv, void *in,
+ void *out, int length);
+
+u32 krb5_decrypt(struct crypto_sync_skcipher *key, void *iv, void *in,
+ void *out, int length);
+
+int xdr_extend_head(struct xdr_buf *buf, unsigned int base,
+ unsigned int shiftlen);
+
+int gss_encrypt_xdr_buf(struct crypto_sync_skcipher *tfm,
+ struct xdr_buf *outbuf, int offset,
+ struct page **pages);
+
+int gss_decrypt_xdr_buf(struct crypto_sync_skcipher *tfm,
+ struct xdr_buf *inbuf, int offset);
+
+u32 gss_krb5_aes_encrypt(struct krb5_ctx *kctx, u32 offset,
+ struct xdr_buf *buf, struct page **pages);
+
+u32 gss_krb5_aes_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *plainoffset, u32 *plainlen);
+
+u32 krb5_etm_encrypt(struct krb5_ctx *kctx, u32 offset, struct xdr_buf *buf,
+ struct page **pages);
+
+u32 krb5_etm_decrypt(struct krb5_ctx *kctx, u32 offset, u32 len,
+ struct xdr_buf *buf, u32 *headskip, u32 *tailskip);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+void krb5_nfold(u32 inbits, const u8 *in, u32 outbits, u8 *out);
+const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype);
+int krb5_cbc_cts_encrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm, u32 offset,
+ struct xdr_buf *buf, struct page **pages,
+ u8 *iv, unsigned int ivsize);
+int krb5_cbc_cts_decrypt(struct crypto_sync_skcipher *cts_tfm,
+ struct crypto_sync_skcipher *cbc_tfm,
+ u32 offset, struct xdr_buf *buf);
+u32 krb5_etm_checksum(struct crypto_sync_skcipher *cipher,
+ struct crypto_ahash *tfm, const struct xdr_buf *body,
+ int body_offset, struct xdr_netobj *cksumout);
+#endif
+
+#endif /* _NET_SUNRPC_AUTH_GSS_KRB5_INTERNAL_H */
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 726c076950c0..5347fe1cc93f 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -60,18 +60,27 @@
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
#include <linux/lcm.h>
+#include <crypto/hash.h>
+#include <kunit/visibility.h>
+
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-/*
+/**
+ * krb5_nfold - n-fold function
+ * @inbits: number of bits in @in
+ * @in: buffer containing input to fold
+ * @outbits: number of bits in the output buffer
+ * @out: buffer to hold the result
+ *
* This is the n-fold function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
-
-static void krb5_nfold(u32 inbits, const u8 *in,
- u32 outbits, u8 *out)
+VISIBLE_IF_KUNIT
+void krb5_nfold(u32 inbits, const u8 *in, u32 outbits, u8 *out)
{
unsigned long ulcm;
int byte, i, msbit;
@@ -132,40 +141,36 @@ static void krb5_nfold(u32 inbits, const u8 *in,
}
}
}
+EXPORT_SYMBOL_IF_KUNIT(krb5_nfold);
/*
* This is the DK (derive_key) function as described in rfc3961, sec 5.1
* Taken from MIT Kerberos and modified.
*/
-
-u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
- const struct xdr_netobj *inkey,
- struct xdr_netobj *outkey,
- const struct xdr_netobj *in_constant,
- gfp_t gfp_mask)
+static int krb5_DK(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey, u8 *rawkey,
+ const struct xdr_netobj *in_constant, gfp_t gfp_mask)
{
size_t blocksize, keybytes, keylength, n;
- unsigned char *inblockdata, *outblockdata, *rawkey;
+ unsigned char *inblockdata, *outblockdata;
struct xdr_netobj inblock, outblock;
struct crypto_sync_skcipher *cipher;
- u32 ret = EINVAL;
+ int ret = -EINVAL;
- blocksize = gk5e->blocksize;
keybytes = gk5e->keybytes;
keylength = gk5e->keylength;
- if ((inkey->len != keylength) || (outkey->len != keylength))
+ if (inkey->len != keylength)
goto err_return;
cipher = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
if (IS_ERR(cipher))
goto err_return;
+ blocksize = crypto_sync_skcipher_blocksize(cipher);
if (crypto_sync_skcipher_setkey(cipher, inkey->data, inkey->len))
goto err_return;
- /* allocate and set up buffers */
-
- ret = ENOMEM;
+ ret = -ENOMEM;
inblockdata = kmalloc(blocksize, gfp_mask);
if (inblockdata == NULL)
goto err_free_cipher;
@@ -174,10 +179,6 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
if (outblockdata == NULL)
goto err_free_in;
- rawkey = kmalloc(keybytes, gfp_mask);
- if (rawkey == NULL)
- goto err_free_out;
-
inblock.data = (char *) inblockdata;
inblock.len = blocksize;
@@ -197,8 +198,8 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
n = 0;
while (n < keybytes) {
- (*(gk5e->encrypt))(cipher, NULL, inblock.data,
- outblock.data, inblock.len);
+ krb5_encrypt(cipher, NULL, inblock.data, outblock.data,
+ inblock.len);
if ((keybytes - n) <= outblock.len) {
memcpy(rawkey + n, outblock.data, (keybytes - n));
@@ -210,26 +211,8 @@ u32 krb5_derive_key(const struct gss_krb5_enctype *gk5e,
n += outblock.len;
}
- /* postprocess the key */
-
- inblock.data = (char *) rawkey;
- inblock.len = keybytes;
-
- BUG_ON(gk5e->mk_key == NULL);
- ret = (*(gk5e->mk_key))(gk5e, &inblock, outkey);
- if (ret) {
- dprintk("%s: got %d from mk_key function for '%s'\n",
- __func__, ret, gk5e->encrypt_name);
- goto err_free_raw;
- }
-
- /* clean memory, free resources and exit */
-
ret = 0;
-err_free_raw:
- kfree_sensitive(rawkey);
-err_free_out:
kfree_sensitive(outblockdata);
err_free_in:
kfree_sensitive(inblockdata);
@@ -252,15 +235,11 @@ static void mit_des_fixup_key_parity(u8 key[8])
}
}
-/*
- * This is the des3 key derivation postprocess function
- */
-u32 gss_krb5_des3_make_key(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
+static int krb5_random_to_key_v1(const struct gss_krb5_enctype *gk5e,
+ struct xdr_netobj *randombits,
+ struct xdr_netobj *key)
{
- int i;
- u32 ret = EINVAL;
+ int i, ret = -EINVAL;
if (key->len != 24) {
dprintk("%s: key->len is %d\n", __func__, key->len);
@@ -292,14 +271,49 @@ err_out:
return ret;
}
+/**
+ * krb5_derive_key_v1 - Derive a subkey for an RFC 3961 enctype
+ * @gk5e: Kerberos 5 enctype profile
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @label: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int krb5_derive_key_v1(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask)
+{
+ struct xdr_netobj inblock;
+ int ret;
+
+ inblock.len = gk5e->keybytes;
+ inblock.data = kmalloc(inblock.len, gfp_mask);
+ if (!inblock.data)
+ return -ENOMEM;
+
+ ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
+ if (!ret)
+ ret = krb5_random_to_key_v1(gk5e, &inblock, outkey);
+
+ kfree_sensitive(inblock.data);
+ return ret;
+}
+
/*
- * This is the aes key derivation postprocess function
+ * This is the identity function, with some sanity checking.
*/
-u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
- struct xdr_netobj *randombits,
- struct xdr_netobj *key)
+static int krb5_random_to_key_v2(const struct gss_krb5_enctype *gk5e,
+ struct xdr_netobj *randombits,
+ struct xdr_netobj *key)
{
- u32 ret = EINVAL;
+ int ret = -EINVAL;
if (key->len != 16 && key->len != 32) {
dprintk("%s: key->len is %d\n", __func__, key->len);
@@ -320,3 +334,297 @@ u32 gss_krb5_aes_make_key(const struct gss_krb5_enctype *gk5e,
err_out:
return ret;
}
+
+/**
+ * krb5_derive_key_v2 - Derive a subkey for an RFC 3962 enctype
+ * @gk5e: Kerberos 5 enctype profile
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @label: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int krb5_derive_key_v2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask)
+{
+ struct xdr_netobj inblock;
+ int ret;
+
+ inblock.len = gk5e->keybytes;
+ inblock.data = kmalloc(inblock.len, gfp_mask);
+ if (!inblock.data)
+ return -ENOMEM;
+
+ ret = krb5_DK(gk5e, inkey, inblock.data, label, gfp_mask);
+ if (!ret)
+ ret = krb5_random_to_key_v2(gk5e, &inblock, outkey);
+
+ kfree_sensitive(inblock.data);
+ return ret;
+}
+
+/*
+ * K(i) = CMAC(key, K(i-1) | i | constant | 0x00 | k)
+ *
+ * i: A block counter is used with a length of 4 bytes, represented
+ * in big-endian order.
+ *
+ * constant: The label input to the KDF is the usage constant supplied
+ * to the key derivation function
+ *
+ * k: The length of the output key in bits, represented as a 4-byte
+ * string in big-endian order.
+ *
+ * Caller fills in K(i-1) in @step, and receives the result K(i)
+ * in the same buffer.
+ */
+static int
+krb5_cmac_Ki(struct crypto_shash *tfm, const struct xdr_netobj *constant,
+ u32 outlen, u32 count, struct xdr_netobj *step)
+{
+ __be32 k = cpu_to_be32(outlen * 8);
+ SHASH_DESC_ON_STACK(desc, tfm);
+ __be32 i = cpu_to_be32(count);
+ u8 zero = 0;
+ int ret;
+
+ desc->tfm = tfm;
+ ret = crypto_shash_init(desc);
+ if (ret)
+ goto out_err;
+
+ ret = crypto_shash_update(desc, step->data, step->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&i, sizeof(i));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, constant->data, constant->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, &zero, sizeof(zero));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&k, sizeof(k));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_final(desc, step->data);
+ if (ret)
+ goto out_err;
+
+out_err:
+ shash_desc_zero(desc);
+ return ret;
+}
+
+/**
+ * krb5_kdf_feedback_cmac - Derive a subkey for a Camellia/CMAC-based enctype
+ * @gk5e: Kerberos 5 enctype parameters
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @constant: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * RFC 6803 Section 3:
+ *
+ * "We use a key derivation function from the family specified in
+ * [SP800-108], Section 5.2, 'KDF in Feedback Mode'."
+ *
+ * n = ceiling(k / 128)
+ * K(0) = zeros
+ * K(i) = CMAC(key, K(i-1) | i | constant | 0x00 | k)
+ * DR(key, constant) = k-truncate(K(1) | K(2) | ... | K(n))
+ * KDF-FEEDBACK-CMAC(key, constant) = random-to-key(DR(key, constant))
+ *
+ * Caller sets @outkey->len to the desired length of the derived key (k).
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int
+krb5_kdf_feedback_cmac(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *constant,
+ gfp_t gfp_mask)
+{
+ struct xdr_netobj step = { .data = NULL };
+ struct xdr_netobj DR = { .data = NULL };
+ unsigned int blocksize, offset;
+ struct crypto_shash *tfm;
+ int n, count, ret;
+
+ /*
+ * This implementation assumes the CMAC used for an enctype's
+ * key derivation is the same as the CMAC used for its
+ * checksumming. This happens to be true for enctypes that
+ * are currently supported by this implementation.
+ */
+ tfm = crypto_alloc_shash(gk5e->cksum_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+ ret = crypto_shash_setkey(tfm, inkey->data, inkey->len);
+ if (ret)
+ goto out_free_tfm;
+
+ blocksize = crypto_shash_digestsize(tfm);
+ n = (outkey->len + blocksize - 1) / blocksize;
+
+ /* K(0) is all zeroes */
+ ret = -ENOMEM;
+ step.len = blocksize;
+ step.data = kzalloc(step.len, gfp_mask);
+ if (!step.data)
+ goto out_free_tfm;
+
+ DR.len = blocksize * n;
+ DR.data = kmalloc(DR.len, gfp_mask);
+ if (!DR.data)
+ goto out_free_tfm;
+
+ /* XXX: Does not handle partial-block key sizes */
+ for (offset = 0, count = 1; count <= n; count++) {
+ ret = krb5_cmac_Ki(tfm, constant, outkey->len, count, &step);
+ if (ret)
+ goto out_free_tfm;
+
+ memcpy(DR.data + offset, step.data, blocksize);
+ offset += blocksize;
+ }
+
+ /* k-truncate and random-to-key */
+ memcpy(outkey->data, DR.data, outkey->len);
+ ret = 0;
+
+out_free_tfm:
+ crypto_free_shash(tfm);
+out:
+ kfree_sensitive(step.data);
+ kfree_sensitive(DR.data);
+ return ret;
+}
+
+/*
+ * K1 = HMAC-SHA(key, 0x00000001 | label | 0x00 | k)
+ *
+ * key: The source of entropy from which subsequent keys are derived.
+ *
+ * label: An octet string describing the intended usage of the
+ * derived key.
+ *
+ * k: Length in bits of the key to be outputted, expressed in
+ * big-endian binary representation in 4 bytes.
+ */
+static int
+krb5_hmac_K1(struct crypto_shash *tfm, const struct xdr_netobj *label,
+ u32 outlen, struct xdr_netobj *K1)
+{
+ __be32 k = cpu_to_be32(outlen * 8);
+ SHASH_DESC_ON_STACK(desc, tfm);
+ __be32 one = cpu_to_be32(1);
+ u8 zero = 0;
+ int ret;
+
+ desc->tfm = tfm;
+ ret = crypto_shash_init(desc);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&one, sizeof(one));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, label->data, label->len);
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, &zero, sizeof(zero));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_update(desc, (u8 *)&k, sizeof(k));
+ if (ret)
+ goto out_err;
+ ret = crypto_shash_final(desc, K1->data);
+ if (ret)
+ goto out_err;
+
+out_err:
+ shash_desc_zero(desc);
+ return ret;
+}
+
+/**
+ * krb5_kdf_hmac_sha2 - Derive a subkey for an AES/SHA2-based enctype
+ * @gk5e: Kerberos 5 enctype policy parameters
+ * @inkey: base protocol key
+ * @outkey: OUT: derived key
+ * @label: subkey usage label
+ * @gfp_mask: memory allocation control flags
+ *
+ * RFC 8009 Section 3:
+ *
+ * "We use a key derivation function from Section 5.1 of [SP800-108],
+ * which uses the HMAC algorithm as the PRF."
+ *
+ * function KDF-HMAC-SHA2(key, label, [context,] k):
+ * k-truncate(K1)
+ *
+ * Caller sets @outkey->len to the desired length of the derived key.
+ *
+ * On success, returns 0 and fills in @outkey. A negative errno value
+ * is returned on failure.
+ */
+int
+krb5_kdf_hmac_sha2(const struct gss_krb5_enctype *gk5e,
+ const struct xdr_netobj *inkey,
+ struct xdr_netobj *outkey,
+ const struct xdr_netobj *label,
+ gfp_t gfp_mask)
+{
+ struct crypto_shash *tfm;
+ struct xdr_netobj K1 = {
+ .data = NULL,
+ };
+ int ret;
+
+ /*
+ * This implementation assumes the HMAC used for an enctype's
+ * key derivation is the same as the HMAC used for its
+ * checksumming. This happens to be true for enctypes that
+ * are currently supported by this implementation.
+ */
+ tfm = crypto_alloc_shash(gk5e->cksum_name, 0, 0);
+ if (IS_ERR(tfm)) {
+ ret = PTR_ERR(tfm);
+ goto out;
+ }
+ ret = crypto_shash_setkey(tfm, inkey->data, inkey->len);
+ if (ret)
+ goto out_free_tfm;
+
+ K1.len = crypto_shash_digestsize(tfm);
+ K1.data = kmalloc(K1.len, gfp_mask);
+ if (!K1.data) {
+ ret = -ENOMEM;
+ goto out_free_tfm;
+ }
+
+ ret = krb5_hmac_K1(tfm, label, outkey->len, &K1);
+ if (ret)
+ goto out_free_tfm;
+
+ /* k-truncate and random-to-key */
+ memcpy(outkey->data, K1.data, outkey->len);
+
+out_free_tfm:
+ kfree_sensitive(K1.data);
+ crypto_free_shash(tfm);
+out:
+ return ret;
+}
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 1c092b05c2bb..20e21d08badb 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -19,18 +19,27 @@
#include <linux/sunrpc/auth.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/sunrpc/xdr.h>
-#include <linux/sunrpc/gss_krb5_enctypes.h>
+#include <kunit/visibility.h>
#include "auth_gss_internal.h"
+#include "gss_krb5_internal.h"
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
-static struct gss_api_mech gss_kerberos_mech; /* forward declaration */
+static struct gss_api_mech gss_kerberos_mech;
+
+#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
+static int gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask);
+static int gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask);
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
+static int gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask);
+#endif
static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
-#ifndef CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
/*
* DES (All DES enctypes are mapped to the same gss functionality)
*/
@@ -40,19 +49,18 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.name = "des-cbc-crc",
.encrypt_name = "cbc(des)",
.cksum_name = "md5",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = NULL,
+ .import_ctx = gss_krb5_import_ctx_des,
+ .get_mic = gss_krb5_get_mic_v1,
+ .verify_mic = gss_krb5_verify_mic_v1,
+ .wrap = gss_krb5_wrap_v1,
+ .unwrap = gss_krb5_unwrap_v1,
.signalg = SGN_ALG_DES_MAC_MD5,
.sealalg = SEAL_ALG_DES,
.keybytes = 7,
.keylength = 8,
- .blocksize = 8,
- .conflen = 8,
.cksumlength = 8,
.keyed_cksum = 0,
},
-#endif /* CONFIG_SUNRPC_DISABLE_INSECURE_ENCTYPES */
/*
* 3DES
*/
@@ -62,100 +70,291 @@ static const struct gss_krb5_enctype supported_gss_krb5_enctypes[] = {
.name = "des3-hmac-sha1",
.encrypt_name = "cbc(des3_ede)",
.cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_des3_make_key,
+ .import_ctx = gss_krb5_import_ctx_v1,
+ .derive_key = krb5_derive_key_v1,
+ .get_mic = gss_krb5_get_mic_v1,
+ .verify_mic = gss_krb5_verify_mic_v1,
+ .wrap = gss_krb5_wrap_v1,
+ .unwrap = gss_krb5_unwrap_v1,
.signalg = SGN_ALG_HMAC_SHA1_DES3_KD,
.sealalg = SEAL_ALG_DES3KD,
.keybytes = 21,
.keylength = 24,
- .blocksize = 8,
- .conflen = 8,
.cksumlength = 20,
.keyed_cksum = 1,
},
+#endif
+
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
/*
- * AES128
+ * AES-128 with SHA-1 (RFC 3962)
*/
{
.etype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES128,
.name = "aes128-cts",
.encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_aes_make_key,
- .encrypt_v2 = gss_krb5_aes_encrypt,
- .decrypt_v2 = gss_krb5_aes_decrypt,
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_derive_key_v2,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+
.signalg = -1,
.sealalg = -1,
.keybytes = 16,
- .keylength = 16,
- .blocksize = 16,
- .conflen = 16,
- .cksumlength = 12,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+ .cksumlength = BITS2OCTETS(96),
.keyed_cksum = 1,
},
/*
- * AES256
+ * AES-256 with SHA-1 (RFC 3962)
*/
{
.etype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
.ctype = CKSUMTYPE_HMAC_SHA1_96_AES256,
.name = "aes256-cts",
.encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
.cksum_name = "hmac(sha1)",
- .encrypt = krb5_encrypt,
- .decrypt = krb5_decrypt,
- .mk_key = gss_krb5_aes_make_key,
- .encrypt_v2 = gss_krb5_aes_encrypt,
- .decrypt_v2 = gss_krb5_aes_decrypt,
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_derive_key_v2,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+
.signalg = -1,
.sealalg = -1,
.keybytes = 32,
- .keylength = 32,
- .blocksize = 16,
- .conflen = 16,
- .cksumlength = 12,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(256),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(256),
+ .cksumlength = BITS2OCTETS(96),
.keyed_cksum = 1,
},
+#endif
+
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA)
+ /*
+ * Camellia-128 with CMAC (RFC 6803)
+ */
+ {
+ .etype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .ctype = CKSUMTYPE_CMAC_CAMELLIA128,
+ .name = "camellia128-cts-cmac",
+ .encrypt_name = "cts(cbc(camellia))",
+ .aux_cipher = "cbc(camellia)",
+ .cksum_name = "cmac(camellia)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_kdf_feedback_cmac,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+ /*
+ * Camellia-256 with CMAC (RFC 6803)
+ */
+ {
+ .etype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .ctype = CKSUMTYPE_CMAC_CAMELLIA256,
+ .name = "camellia256-cts-cmac",
+ .encrypt_name = "cts(cbc(camellia))",
+ .aux_cipher = "cbc(camellia)",
+ .cksum_name = "cmac(camellia)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(256),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(256),
+
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_kdf_feedback_cmac,
+ .encrypt = gss_krb5_aes_encrypt,
+ .decrypt = gss_krb5_aes_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+#endif
+
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2)
+ /*
+ * AES-128 with SHA-256 (RFC 8009)
+ */
+ {
+ .etype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .ctype = CKSUMTYPE_HMAC_SHA256_128_AES128,
+ .name = "aes128-cts-hmac-sha256-128",
+ .encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
+ .cksum_name = "hmac(sha256)",
+ .cksumlength = BITS2OCTETS(128),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(128),
+ .Kc_length = BITS2OCTETS(128),
+ .Ke_length = BITS2OCTETS(128),
+ .Ki_length = BITS2OCTETS(128),
+
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_kdf_hmac_sha2,
+ .encrypt = krb5_etm_encrypt,
+ .decrypt = krb5_etm_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+ /*
+ * AES-256 with SHA-384 (RFC 8009)
+ */
+ {
+ .etype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .ctype = CKSUMTYPE_HMAC_SHA384_192_AES256,
+ .name = "aes256-cts-hmac-sha384-192",
+ .encrypt_name = "cts(cbc(aes))",
+ .aux_cipher = "cbc(aes)",
+ .cksum_name = "hmac(sha384)",
+ .cksumlength = BITS2OCTETS(192),
+ .keyed_cksum = 1,
+ .keylength = BITS2OCTETS(256),
+ .Kc_length = BITS2OCTETS(192),
+ .Ke_length = BITS2OCTETS(256),
+ .Ki_length = BITS2OCTETS(192),
+
+ .import_ctx = gss_krb5_import_ctx_v2,
+ .derive_key = krb5_kdf_hmac_sha2,
+ .encrypt = krb5_etm_encrypt,
+ .decrypt = krb5_etm_decrypt,
+
+ .get_mic = gss_krb5_get_mic_v2,
+ .verify_mic = gss_krb5_verify_mic_v2,
+ .wrap = gss_krb5_wrap_v2,
+ .unwrap = gss_krb5_unwrap_v2,
+ },
+#endif
};
-static const int num_supported_enctypes =
- ARRAY_SIZE(supported_gss_krb5_enctypes);
+/*
+ * The list of advertised enctypes is specified in order of most
+ * preferred to least.
+ */
+static char gss_krb5_enctype_priority_list[64];
-static int
-supported_gss_krb5_enctype(int etype)
+static void gss_krb5_prepare_enctype_priority_list(void)
{
- int i;
- for (i = 0; i < num_supported_enctypes; i++)
- if (supported_gss_krb5_enctypes[i].etype == etype)
- return 1;
- return 0;
+ static const u32 gss_krb5_enctypes[] = {
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA2)
+ ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_CAMELLIA)
+ ENCTYPE_CAMELLIA256_CTS_CMAC,
+ ENCTYPE_CAMELLIA128_CTS_CMAC,
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_AES_SHA1)
+ ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+ ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_ENCTYPES_DES)
+ ENCTYPE_DES3_CBC_SHA1,
+ ENCTYPE_DES_CBC_MD5,
+ ENCTYPE_DES_CBC_CRC,
+ ENCTYPE_DES_CBC_MD4,
+#endif
+ };
+ size_t total, i;
+ char buf[16];
+ char *sep;
+ int n;
+
+ sep = "";
+ gss_krb5_enctype_priority_list[0] = '\0';
+ for (total = 0, i = 0; i < ARRAY_SIZE(gss_krb5_enctypes); i++) {
+ n = sprintf(buf, "%s%u", sep, gss_krb5_enctypes[i]);
+ if (n < 0)
+ break;
+ if (total + n >= sizeof(gss_krb5_enctype_priority_list))
+ break;
+ strcat(gss_krb5_enctype_priority_list, buf);
+ sep = ",";
+ total += n;
+ }
}
-static const struct gss_krb5_enctype *
-get_gss_krb5_enctype(int etype)
+/**
+ * gss_krb5_lookup_enctype - Retrieve profile information for a given enctype
+ * @etype: ENCTYPE value
+ *
+ * Returns a pointer to a gss_krb5_enctype structure, or NULL if no
+ * matching etype is found.
+ */
+VISIBLE_IF_KUNIT
+const struct gss_krb5_enctype *gss_krb5_lookup_enctype(u32 etype)
{
- int i;
- for (i = 0; i < num_supported_enctypes; i++)
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(supported_gss_krb5_enctypes); i++)
if (supported_gss_krb5_enctypes[i].etype == etype)
return &supported_gss_krb5_enctypes[i];
return NULL;
}
+EXPORT_SYMBOL_IF_KUNIT(gss_krb5_lookup_enctype);
+
+static struct crypto_sync_skcipher *
+gss_krb5_alloc_cipher_v1(struct krb5_ctx *ctx, struct xdr_netobj *key)
+{
+ struct crypto_sync_skcipher *tfm;
+
+ tfm = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
+ if (IS_ERR(tfm))
+ return NULL;
+ if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
+ crypto_free_sync_skcipher(tfm);
+ return NULL;
+ }
+ return tfm;
+}
static inline const void *
get_key(const void *p, const void *end,
struct krb5_ctx *ctx, struct crypto_sync_skcipher **res)
{
+ struct crypto_sync_skcipher *tfm;
struct xdr_netobj key;
int alg;
p = simple_get_bytes(p, end, &alg, sizeof(alg));
if (IS_ERR(p))
goto out_err;
-
switch (alg) {
case ENCTYPE_DES_CBC_CRC:
case ENCTYPE_DES_CBC_MD4:
@@ -164,37 +363,26 @@ get_key(const void *p, const void *end,
alg = ENCTYPE_DES_CBC_RAW;
break;
}
-
- if (!supported_gss_krb5_enctype(alg)) {
- printk(KERN_WARNING "gss_kerberos_mech: unsupported "
- "encryption key algorithm %d\n", alg);
- p = ERR_PTR(-EINVAL);
- goto out_err;
+ if (!gss_krb5_lookup_enctype(alg)) {
+ pr_warn("gss_krb5: unsupported enctype: %d\n", alg);
+ goto out_err_inval;
}
+
p = simple_get_netobj(p, end, &key);
if (IS_ERR(p))
goto out_err;
-
- *res = crypto_alloc_sync_skcipher(ctx->gk5e->encrypt_name, 0, 0);
- if (IS_ERR(*res)) {
- printk(KERN_WARNING "gss_kerberos_mech: unable to initialize "
- "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
- *res = NULL;
- goto out_err_free_key;
- }
- if (crypto_sync_skcipher_setkey(*res, key.data, key.len)) {
- printk(KERN_WARNING "gss_kerberos_mech: error setting key for "
- "crypto algorithm %s\n", ctx->gk5e->encrypt_name);
- goto out_err_free_tfm;
+ tfm = gss_krb5_alloc_cipher_v1(ctx, &key);
+ kfree(key.data);
+ if (!tfm) {
+ pr_warn("gss_krb5: failed to initialize cipher '%s'\n",
+ ctx->gk5e->encrypt_name);
+ goto out_err_inval;
}
+ *res = tfm;
- kfree(key.data);
return p;
-out_err_free_tfm:
- crypto_free_sync_skcipher(*res);
-out_err_free_key:
- kfree(key.data);
+out_err_inval:
p = ERR_PTR(-EINVAL);
out_err:
return p;
@@ -214,7 +402,7 @@ gss_import_v1_context(const void *p, const void *end, struct krb5_ctx *ctx)
/* Old format supports only DES! Any other enctype uses new format */
ctx->enctype = ENCTYPE_DES_CBC_RAW;
- ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+ ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
p = ERR_PTR(-EINVAL);
goto out_err;
@@ -278,70 +466,34 @@ out_err:
return PTR_ERR(p);
}
-static struct crypto_sync_skcipher *
-context_v2_alloc_cipher(struct krb5_ctx *ctx, const char *cname, u8 *key)
-{
- struct crypto_sync_skcipher *cp;
-
- cp = crypto_alloc_sync_skcipher(cname, 0, 0);
- if (IS_ERR(cp)) {
- dprintk("gss_kerberos_mech: unable to initialize "
- "crypto algorithm %s\n", cname);
- return NULL;
- }
- if (crypto_sync_skcipher_setkey(cp, key, ctx->gk5e->keylength)) {
- dprintk("gss_kerberos_mech: error setting key for "
- "crypto algorithm %s\n", cname);
- crypto_free_sync_skcipher(cp);
- return NULL;
- }
- return cp;
-}
-
-static inline void
-set_cdata(u8 cdata[GSS_KRB5_K5CLENGTH], u32 usage, u8 seed)
+#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
+static int
+gss_krb5_import_ctx_des(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
- cdata[0] = (usage>>24)&0xff;
- cdata[1] = (usage>>16)&0xff;
- cdata[2] = (usage>>8)&0xff;
- cdata[3] = usage&0xff;
- cdata[4] = seed;
+ return -EINVAL;
}
static int
-context_derive_keys_des3(struct krb5_ctx *ctx, gfp_t gfp_mask)
+gss_krb5_import_ctx_v1(struct krb5_ctx *ctx, gfp_t gfp_mask)
{
- struct xdr_netobj c, keyin, keyout;
- u8 cdata[GSS_KRB5_K5CLENGTH];
- u32 err;
-
- c.len = GSS_KRB5_K5CLENGTH;
- c.data = cdata;
+ struct xdr_netobj keyin, keyout;
keyin.data = ctx->Ksess;
keyin.len = ctx->gk5e->keylength;
- keyout.len = ctx->gk5e->keylength;
- /* seq uses the raw key */
- ctx->seq = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
- ctx->Ksess);
+ ctx->seq = gss_krb5_alloc_cipher_v1(ctx, &keyin);
if (ctx->seq == NULL)
goto out_err;
-
- ctx->enc = context_v2_alloc_cipher(ctx, ctx->gk5e->encrypt_name,
- ctx->Ksess);
+ ctx->enc = gss_krb5_alloc_cipher_v1(ctx, &keyin);
if (ctx->enc == NULL)
goto out_free_seq;
/* derive cksum */
- set_cdata(cdata, KG_USAGE_SIGN, KEY_USAGE_SEED_CHECKSUM);
keyout.data = ctx->cksum;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving cksum key\n",
- __func__, err);
+ keyout.len = ctx->gk5e->keylength;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_SIGN,
+ KEY_USAGE_SEED_CHECKSUM, gfp_mask))
goto out_free_enc;
- }
return 0;
@@ -352,118 +504,140 @@ out_free_seq:
out_err:
return -EINVAL;
}
+#endif
-static int
-context_derive_keys_new(struct krb5_ctx *ctx, gfp_t gfp_mask)
+#if defined(CONFIG_RPCSEC_GSS_KRB5_CRYPTOSYSTEM)
+
+static struct crypto_sync_skcipher *
+gss_krb5_alloc_cipher_v2(const char *cname, const struct xdr_netobj *key)
{
- struct xdr_netobj c, keyin, keyout;
- u8 cdata[GSS_KRB5_K5CLENGTH];
- u32 err;
+ struct crypto_sync_skcipher *tfm;
- c.len = GSS_KRB5_K5CLENGTH;
- c.data = cdata;
+ tfm = crypto_alloc_sync_skcipher(cname, 0, 0);
+ if (IS_ERR(tfm))
+ return NULL;
+ if (crypto_sync_skcipher_setkey(tfm, key->data, key->len)) {
+ crypto_free_sync_skcipher(tfm);
+ return NULL;
+ }
+ return tfm;
+}
- keyin.data = ctx->Ksess;
- keyin.len = ctx->gk5e->keylength;
- keyout.len = ctx->gk5e->keylength;
+static struct crypto_ahash *
+gss_krb5_alloc_hash_v2(struct krb5_ctx *kctx, const struct xdr_netobj *key)
+{
+ struct crypto_ahash *tfm;
- /* initiator seal encryption */
- set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
- keyout.data = ctx->initiator_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_seal key\n",
- __func__, err);
- goto out_err;
+ tfm = crypto_alloc_ahash(kctx->gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ if (IS_ERR(tfm))
+ return NULL;
+ if (crypto_ahash_setkey(tfm, key->data, key->len)) {
+ crypto_free_ahash(tfm);
+ return NULL;
}
- ctx->initiator_enc = context_v2_alloc_cipher(ctx,
- ctx->gk5e->encrypt_name,
- ctx->initiator_seal);
+ return tfm;
+}
+
+static int
+gss_krb5_import_ctx_v2(struct krb5_ctx *ctx, gfp_t gfp_mask)
+{
+ struct xdr_netobj keyin = {
+ .len = ctx->gk5e->keylength,
+ .data = ctx->Ksess,
+ };
+ struct xdr_netobj keyout;
+ int ret = -EINVAL;
+
+ keyout.data = kmalloc(GSS_KRB5_MAX_KEYLEN, gfp_mask);
+ if (!keyout.data)
+ return -ENOMEM;
+
+ /* initiator seal encryption */
+ keyout.len = ctx->gk5e->Ke_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SEAL,
+ KEY_USAGE_SEED_ENCRYPTION, gfp_mask))
+ goto out;
+ ctx->initiator_enc = gss_krb5_alloc_cipher_v2(ctx->gk5e->encrypt_name,
+ &keyout);
if (ctx->initiator_enc == NULL)
- goto out_err;
+ goto out;
+ if (ctx->gk5e->aux_cipher) {
+ ctx->initiator_enc_aux =
+ gss_krb5_alloc_cipher_v2(ctx->gk5e->aux_cipher,
+ &keyout);
+ if (ctx->initiator_enc_aux == NULL)
+ goto out_free;
+ }
/* acceptor seal encryption */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_ENCRYPTION);
- keyout.data = ctx->acceptor_seal;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_seal key\n",
- __func__, err);
- goto out_free_initiator_enc;
- }
- ctx->acceptor_enc = context_v2_alloc_cipher(ctx,
- ctx->gk5e->encrypt_name,
- ctx->acceptor_seal);
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SEAL,
+ KEY_USAGE_SEED_ENCRYPTION, gfp_mask))
+ goto out_free;
+ ctx->acceptor_enc = gss_krb5_alloc_cipher_v2(ctx->gk5e->encrypt_name,
+ &keyout);
if (ctx->acceptor_enc == NULL)
- goto out_free_initiator_enc;
+ goto out_free;
+ if (ctx->gk5e->aux_cipher) {
+ ctx->acceptor_enc_aux =
+ gss_krb5_alloc_cipher_v2(ctx->gk5e->aux_cipher,
+ &keyout);
+ if (ctx->acceptor_enc_aux == NULL)
+ goto out_free;
+ }
/* initiator sign checksum */
- set_cdata(cdata, KG_USAGE_INITIATOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
- keyout.data = ctx->initiator_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_sign key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ keyout.len = ctx->gk5e->Kc_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SIGN,
+ KEY_USAGE_SEED_CHECKSUM, gfp_mask))
+ goto out_free;
+ ctx->initiator_sign = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->initiator_sign == NULL)
+ goto out_free;
/* acceptor sign checksum */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SIGN, KEY_USAGE_SEED_CHECKSUM);
- keyout.data = ctx->acceptor_sign;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_sign key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SIGN,
+ KEY_USAGE_SEED_CHECKSUM, gfp_mask))
+ goto out_free;
+ ctx->acceptor_sign = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->acceptor_sign == NULL)
+ goto out_free;
/* initiator seal integrity */
- set_cdata(cdata, KG_USAGE_INITIATOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
- keyout.data = ctx->initiator_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving initiator_integ key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
+ keyout.len = ctx->gk5e->Ki_length;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_INITIATOR_SEAL,
+ KEY_USAGE_SEED_INTEGRITY, gfp_mask))
+ goto out_free;
+ ctx->initiator_integ = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->initiator_integ == NULL)
+ goto out_free;
/* acceptor seal integrity */
- set_cdata(cdata, KG_USAGE_ACCEPTOR_SEAL, KEY_USAGE_SEED_INTEGRITY);
- keyout.data = ctx->acceptor_integ;
- err = krb5_derive_key(ctx->gk5e, &keyin, &keyout, &c, gfp_mask);
- if (err) {
- dprintk("%s: Error %d deriving acceptor_integ key\n",
- __func__, err);
- goto out_free_acceptor_enc;
- }
-
- switch (ctx->enctype) {
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- ctx->initiator_enc_aux =
- context_v2_alloc_cipher(ctx, "cbc(aes)",
- ctx->initiator_seal);
- if (ctx->initiator_enc_aux == NULL)
- goto out_free_acceptor_enc;
- ctx->acceptor_enc_aux =
- context_v2_alloc_cipher(ctx, "cbc(aes)",
- ctx->acceptor_seal);
- if (ctx->acceptor_enc_aux == NULL) {
- crypto_free_sync_skcipher(ctx->initiator_enc_aux);
- goto out_free_acceptor_enc;
- }
- }
-
- return 0;
+ if (krb5_derive_key(ctx, &keyin, &keyout, KG_USAGE_ACCEPTOR_SEAL,
+ KEY_USAGE_SEED_INTEGRITY, gfp_mask))
+ goto out_free;
+ ctx->acceptor_integ = gss_krb5_alloc_hash_v2(ctx, &keyout);
+ if (ctx->acceptor_integ == NULL)
+ goto out_free;
+
+ ret = 0;
+out:
+ kfree_sensitive(keyout.data);
+ return ret;
-out_free_acceptor_enc:
+out_free:
+ crypto_free_ahash(ctx->acceptor_integ);
+ crypto_free_ahash(ctx->initiator_integ);
+ crypto_free_ahash(ctx->acceptor_sign);
+ crypto_free_ahash(ctx->initiator_sign);
+ crypto_free_sync_skcipher(ctx->acceptor_enc_aux);
crypto_free_sync_skcipher(ctx->acceptor_enc);
-out_free_initiator_enc:
+ crypto_free_sync_skcipher(ctx->initiator_enc_aux);
crypto_free_sync_skcipher(ctx->initiator_enc);
-out_err:
- return -EINVAL;
+ goto out;
}
+#endif
+
static int
gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
gfp_t gfp_mask)
@@ -500,7 +674,7 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
/* Map ENCTYPE_DES3_CBC_SHA1 to ENCTYPE_DES3_CBC_RAW */
if (ctx->enctype == ENCTYPE_DES3_CBC_SHA1)
ctx->enctype = ENCTYPE_DES3_CBC_RAW;
- ctx->gk5e = get_gss_krb5_enctype(ctx->enctype);
+ ctx->gk5e = gss_krb5_lookup_enctype(ctx->enctype);
if (ctx->gk5e == NULL) {
dprintk("gss_kerberos_mech: unsupported krb5 enctype %u\n",
ctx->enctype);
@@ -526,25 +700,15 @@ gss_import_v2_context(const void *p, const void *end, struct krb5_ctx *ctx,
}
ctx->mech_used.len = gss_kerberos_mech.gm_oid.len;
- switch (ctx->enctype) {
- case ENCTYPE_DES3_CBC_RAW:
- return context_derive_keys_des3(ctx, gfp_mask);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return context_derive_keys_new(ctx, gfp_mask);
- default:
- return -EINVAL;
- }
+ return ctx->gk5e->import_ctx(ctx, gfp_mask);
out_err:
return PTR_ERR(p);
}
static int
-gss_import_sec_context_kerberos(const void *p, size_t len,
- struct gss_ctx *ctx_id,
- time64_t *endtime,
- gfp_t gfp_mask)
+gss_krb5_import_sec_context(const void *p, size_t len, struct gss_ctx *ctx_id,
+ time64_t *endtime, gfp_t gfp_mask)
{
const void *end = (const void *)((const char *)p + len);
struct krb5_ctx *ctx;
@@ -558,20 +722,21 @@ gss_import_sec_context_kerberos(const void *p, size_t len,
ret = gss_import_v1_context(p, end, ctx);
else
ret = gss_import_v2_context(p, end, ctx, gfp_mask);
-
- if (ret == 0) {
- ctx_id->internal_ctx_id = ctx;
- if (endtime)
- *endtime = ctx->endtime;
- } else
+ memzero_explicit(&ctx->Ksess, sizeof(ctx->Ksess));
+ if (ret) {
kfree(ctx);
+ return ret;
+ }
- dprintk("RPC: %s: returning %d\n", __func__, ret);
- return ret;
+ ctx_id->internal_ctx_id = ctx;
+ if (endtime)
+ *endtime = ctx->endtime;
+ return 0;
}
static void
-gss_delete_sec_context_kerberos(void *internal_ctx) {
+gss_krb5_delete_sec_context(void *internal_ctx)
+{
struct krb5_ctx *kctx = internal_ctx;
crypto_free_sync_skcipher(kctx->seq);
@@ -580,17 +745,105 @@ gss_delete_sec_context_kerberos(void *internal_ctx) {
crypto_free_sync_skcipher(kctx->initiator_enc);
crypto_free_sync_skcipher(kctx->acceptor_enc_aux);
crypto_free_sync_skcipher(kctx->initiator_enc_aux);
+ crypto_free_ahash(kctx->acceptor_sign);
+ crypto_free_ahash(kctx->initiator_sign);
+ crypto_free_ahash(kctx->acceptor_integ);
+ crypto_free_ahash(kctx->initiator_integ);
kfree(kctx->mech_used.data);
kfree(kctx);
}
+/**
+ * gss_krb5_get_mic - get_mic for the Kerberos GSS mechanism
+ * @gctx: GSS context
+ * @text: plaintext to checksum
+ * @token: buffer into which to write the computed checksum
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, and @token is filled in
+ * %GSS_S_FAILURE - checksum could not be generated
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_get_mic(struct gss_ctx *gctx, struct xdr_buf *text,
+ struct xdr_netobj *token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->get_mic(kctx, text, token);
+}
+
+/**
+ * gss_krb5_verify_mic - verify_mic for the Kerberos GSS mechanism
+ * @gctx: GSS context
+ * @message_buffer: plaintext to check
+ * @read_token: received checksum to check
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - computed and received checksums match
+ * %GSS_S_DEFECTIVE_TOKEN - received checksum is not valid
+ * %GSS_S_BAD_SIG - computed and received checksums do not match
+ * %GSS_S_FAILURE - received checksum could not be checked
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_verify_mic(struct gss_ctx *gctx,
+ struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->verify_mic(kctx, message_buffer, read_token);
+}
+
+/**
+ * gss_krb5_wrap - gss_wrap for the Kerberos GSS mechanism
+ * @gctx: initialized GSS context
+ * @offset: byte offset in @buf to start writing the cipher text
+ * @buf: OUT: send buffer
+ * @pages: plaintext to wrap
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, @buf has been updated
+ * %GSS_S_FAILURE - @buf could not be wrapped
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_wrap(struct gss_ctx *gctx, int offset,
+ struct xdr_buf *buf, struct page **pages)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->wrap(kctx, offset, buf, pages);
+}
+
+/**
+ * gss_krb5_unwrap - gss_unwrap for the Kerberos GSS mechanism
+ * @gctx: initialized GSS context
+ * @offset: starting byte offset into @buf
+ * @len: size of ciphertext to unwrap
+ * @buf: ciphertext to unwrap
+ *
+ * Return values:
+ * %GSS_S_COMPLETE - success, @buf has been updated
+ * %GSS_S_DEFECTIVE_TOKEN - received blob is not valid
+ * %GSS_S_BAD_SIG - computed and received checksums do not match
+ * %GSS_S_FAILURE - @buf could not be unwrapped
+ * %GSS_S_CONTEXT_EXPIRED - Kerberos context is no longer valid
+ */
+static u32 gss_krb5_unwrap(struct gss_ctx *gctx, int offset,
+ int len, struct xdr_buf *buf)
+{
+ struct krb5_ctx *kctx = gctx->internal_ctx_id;
+
+ return kctx->gk5e->unwrap(kctx, offset, len, buf,
+ &gctx->slack, &gctx->align);
+}
+
static const struct gss_api_ops gss_kerberos_ops = {
- .gss_import_sec_context = gss_import_sec_context_kerberos,
- .gss_get_mic = gss_get_mic_kerberos,
- .gss_verify_mic = gss_verify_mic_kerberos,
- .gss_wrap = gss_wrap_kerberos,
- .gss_unwrap = gss_unwrap_kerberos,
- .gss_delete_sec_context = gss_delete_sec_context_kerberos,
+ .gss_import_sec_context = gss_krb5_import_sec_context,
+ .gss_get_mic = gss_krb5_get_mic,
+ .gss_verify_mic = gss_krb5_verify_mic,
+ .gss_wrap = gss_krb5_wrap,
+ .gss_unwrap = gss_krb5_unwrap,
+ .gss_delete_sec_context = gss_krb5_delete_sec_context,
};
static struct pf_desc gss_kerberos_pfs[] = {
@@ -631,13 +884,14 @@ static struct gss_api_mech gss_kerberos_mech = {
.gm_ops = &gss_kerberos_ops,
.gm_pf_num = ARRAY_SIZE(gss_kerberos_pfs),
.gm_pfs = gss_kerberos_pfs,
- .gm_upcall_enctypes = KRB5_SUPPORTED_ENCTYPES,
+ .gm_upcall_enctypes = gss_krb5_enctype_priority_list,
};
static int __init init_kerberos_module(void)
{
int status;
+ gss_krb5_prepare_enctype_priority_list();
status = gss_mech_register(&gss_kerberos_mech);
if (status)
printk("Failed to register kerberos gss mechanism!\n");
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 33061417ec97..146aa755f07d 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -65,10 +65,14 @@
#include <linux/crypto.h>
#include <linux/atomic.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
+
static void *
setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
{
@@ -95,37 +99,9 @@ setup_token(struct krb5_ctx *ctx, struct xdr_netobj *token)
return krb5_hdr;
}
-static void *
-setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
-{
- u16 *ptr;
- void *krb5_hdr;
- u8 *p, flags = 0x00;
-
- if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
- flags |= 0x01;
- if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
- flags |= 0x04;
-
- /* Per rfc 4121, sec 4.2.6.1, there is no header,
- * just start the token */
- krb5_hdr = ptr = (u16 *)token->data;
-
- *ptr++ = KG2_TOK_MIC;
- p = (u8 *)ptr;
- *p++ = flags;
- *p++ = 0xff;
- ptr = (u16 *)p;
- *ptr++ = 0xffff;
- *ptr = 0xffff;
-
- token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
- return krb5_hdr;
-}
-
-static u32
-gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
+u32
+gss_krb5_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
@@ -162,18 +138,50 @@ gss_get_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *text,
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-static u32
-gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
+#endif
+
+static void *
+setup_token_v2(struct krb5_ctx *ctx, struct xdr_netobj *token)
+{
+ u16 *ptr;
+ void *krb5_hdr;
+ u8 *p, flags = 0x00;
+
+ if ((ctx->flags & KRB5_CTX_FLAG_INITIATOR) == 0)
+ flags |= 0x01;
+ if (ctx->flags & KRB5_CTX_FLAG_ACCEPTOR_SUBKEY)
+ flags |= 0x04;
+
+ /* Per rfc 4121, sec 4.2.6.1, there is no header,
+ * just start the token.
+ */
+ krb5_hdr = (u16 *)token->data;
+ ptr = krb5_hdr;
+
+ *ptr++ = KG2_TOK_MIC;
+ p = (u8 *)ptr;
+ *p++ = flags;
+ *p++ = 0xff;
+ ptr = (u16 *)p;
+ *ptr++ = 0xffff;
+ *ptr = 0xffff;
+
+ token->len = GSS_KRB5_TOK_HDR_LEN + ctx->gk5e->cksumlength;
+ return krb5_hdr;
+}
+
+u32
+gss_krb5_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
+ struct xdr_netobj *token)
{
- char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj cksumobj = { .len = sizeof(cksumdata),
- .data = cksumdata};
+ struct crypto_ahash *tfm = ctx->initiate ?
+ ctx->initiator_sign : ctx->acceptor_sign;
+ struct xdr_netobj cksumobj = {
+ .len = ctx->gk5e->cksumlength,
+ };
+ __be64 seq_send_be64;
void *krb5_hdr;
time64_t now;
- u8 *cksumkey;
- unsigned int cksum_usage;
- __be64 seq_send_be64;
dprintk("RPC: %s\n", __func__);
@@ -184,39 +192,11 @@ gss_get_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *text,
seq_send_be64 = cpu_to_be64(atomic64_fetch_inc(&ctx->seq_send64));
memcpy(krb5_hdr + 8, (char *) &seq_send_be64, 8);
- if (ctx->initiate) {
- cksumkey = ctx->initiator_sign;
- cksum_usage = KG_USAGE_INITIATOR_SIGN;
- } else {
- cksumkey = ctx->acceptor_sign;
- cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
- }
-
- if (make_checksum_v2(ctx, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
- text, 0, cksumkey, cksum_usage, &cksumobj))
+ cksumobj.data = krb5_hdr + GSS_KRB5_TOK_HDR_LEN;
+ if (gss_krb5_checksum(tfm, krb5_hdr, GSS_KRB5_TOK_HDR_LEN,
+ text, 0, &cksumobj))
return GSS_S_FAILURE;
- memcpy(krb5_hdr + GSS_KRB5_TOK_HDR_LEN, cksumobj.data, cksumobj.len);
-
now = ktime_get_real_seconds();
-
return (ctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-
-u32
-gss_get_mic_kerberos(struct gss_ctx *gss_ctx, struct xdr_buf *text,
- struct xdr_netobj *token)
-{
- struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
-
- switch (ctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_get_mic_v1(ctx, text, token);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_get_mic_v2(ctx, text, token);
- }
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 3200b971a814..1babc3474e10 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -35,6 +35,8 @@
#include <linux/types.h>
#include <linux/sunrpc/gss_krb5.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
diff --git a/net/sunrpc/auth_gss/gss_krb5_test.c b/net/sunrpc/auth_gss/gss_krb5_test.c
new file mode 100644
index 000000000000..c287ce15c419
--- /dev/null
+++ b/net/sunrpc/auth_gss/gss_krb5_test.c
@@ -0,0 +1,2040 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2022 Oracle and/or its affiliates.
+ *
+ * KUnit test of SunRPC's GSS Kerberos mechanism. Subsystem
+ * name is "rpcsec_gss_krb5".
+ */
+
+#include <kunit/test.h>
+#include <kunit/visibility.h>
+
+#include <linux/kernel.h>
+#include <crypto/hash.h>
+
+#include <linux/sunrpc/xdr.h>
+#include <linux/sunrpc/gss_krb5.h>
+
+#include "gss_krb5_internal.h"
+
+MODULE_IMPORT_NS(EXPORTED_FOR_KUNIT_TESTING);
+
+struct gss_krb5_test_param {
+ const char *desc;
+ u32 enctype;
+ u32 nfold;
+ u32 constant;
+ const struct xdr_netobj *base_key;
+ const struct xdr_netobj *Ke;
+ const struct xdr_netobj *usage;
+ const struct xdr_netobj *plaintext;
+ const struct xdr_netobj *confounder;
+ const struct xdr_netobj *expected_result;
+ const struct xdr_netobj *expected_hmac;
+ const struct xdr_netobj *next_iv;
+};
+
+static inline void gss_krb5_get_desc(const struct gss_krb5_test_param *param,
+ char *desc)
+{
+ strscpy(desc, param->desc, KUNIT_PARAM_DESC_SIZE);
+}
+
+static void kdf_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj derivedkey;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ derivedkey.data = kunit_kzalloc(test, param->expected_result->len,
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, derivedkey.data);
+ derivedkey.len = param->expected_result->len;
+
+ /* Act */
+ err = gk5e->derive_key(gk5e, param->base_key, &derivedkey,
+ param->usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ derivedkey.data, derivedkey.len), 0,
+ "key mismatch");
+}
+
+static void checksum_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct xdr_buf buf = {
+ .head[0].iov_base = param->plaintext->data,
+ .head[0].iov_len = param->plaintext->len,
+ .len = param->plaintext->len,
+ };
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Kc, checksum;
+ struct crypto_ahash *tfm;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ Kc.len = gk5e->Kc_length;
+ Kc.data = kunit_kzalloc(test, Kc.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Kc.data);
+ err = gk5e->derive_key(gk5e, param->base_key, &Kc,
+ param->usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, tfm);
+ err = crypto_ahash_setkey(tfm, Kc.data, Kc.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ /* Act */
+ err = gss_krb5_checksum(tfm, NULL, 0, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ checksum.data, checksum.len), 0,
+ "checksum mismatch");
+
+ crypto_free_ahash(tfm);
+}
+
+#define DEFINE_HEX_XDR_NETOBJ(name, hex_array...) \
+ static const u8 name ## _data[] = { hex_array }; \
+ static const struct xdr_netobj name = { \
+ .data = (u8 *)name##_data, \
+ .len = sizeof(name##_data), \
+ }
+
+#define DEFINE_STR_XDR_NETOBJ(name, string) \
+ static const u8 name ## _str[] = string; \
+ static const struct xdr_netobj name = { \
+ .data = (u8 *)name##_str, \
+ .len = sizeof(name##_str) - 1, \
+ }
+
+/*
+ * RFC 3961 Appendix A.1. n-fold
+ *
+ * The n-fold function is defined in section 5.1 of RFC 3961.
+ *
+ * This test material is copyright (C) The Internet Society (2005).
+ */
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test1_plaintext,
+ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test1_expected_result,
+ 0xbe, 0x07, 0x26, 0x31, 0x27, 0x6b, 0x19, 0x55
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test2_plaintext,
+ 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test2_expected_result,
+ 0x78, 0xa0, 0x7b, 0x6c, 0xaf, 0x85, 0xfa
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test3_plaintext,
+ 0x52, 0x6f, 0x75, 0x67, 0x68, 0x20, 0x43, 0x6f,
+ 0x6e, 0x73, 0x65, 0x6e, 0x73, 0x75, 0x73, 0x2c,
+ 0x20, 0x61, 0x6e, 0x64, 0x20, 0x52, 0x75, 0x6e,
+ 0x6e, 0x69, 0x6e, 0x67, 0x20, 0x43, 0x6f, 0x64,
+ 0x65
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test3_expected_result,
+ 0xbb, 0x6e, 0xd3, 0x08, 0x70, 0xb7, 0xf0, 0xe0
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test4_plaintext,
+ 0x70, 0x61, 0x73, 0x73, 0x77, 0x6f, 0x72, 0x64
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test4_expected_result,
+ 0x59, 0xe4, 0xa8, 0xca, 0x7c, 0x03, 0x85, 0xc3,
+ 0xc3, 0x7b, 0x3f, 0x6d, 0x20, 0x00, 0x24, 0x7c,
+ 0xb6, 0xe6, 0xbd, 0x5b, 0x3e
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test5_plaintext,
+ 0x4d, 0x41, 0x53, 0x53, 0x41, 0x43, 0x48, 0x56,
+ 0x53, 0x45, 0x54, 0x54, 0x53, 0x20, 0x49, 0x4e,
+ 0x53, 0x54, 0x49, 0x54, 0x56, 0x54, 0x45, 0x20,
+ 0x4f, 0x46, 0x20, 0x54, 0x45, 0x43, 0x48, 0x4e,
+ 0x4f, 0x4c, 0x4f, 0x47, 0x59
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test5_expected_result,
+ 0xdb, 0x3b, 0x0d, 0x8f, 0x0b, 0x06, 0x1e, 0x60,
+ 0x32, 0x82, 0xb3, 0x08, 0xa5, 0x08, 0x41, 0x22,
+ 0x9a, 0xd7, 0x98, 0xfa, 0xb9, 0x54, 0x0c, 0x1b
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test6_plaintext,
+ 0x51
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test6_expected_result,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15, 0xa8, 0x45, 0x2a,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15, 0xa8, 0x45, 0x2a,
+ 0x51, 0x8a, 0x54, 0xa2, 0x15
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test7_plaintext,
+ 0x62, 0x61
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test7_expected_result,
+ 0xfb, 0x25, 0xd5, 0x31, 0xae, 0x89, 0x74, 0x49,
+ 0x9f, 0x52, 0xfd, 0x92, 0xea, 0x98, 0x57, 0xc4,
+ 0xba, 0x24, 0xcf, 0x29, 0x7e
+);
+
+DEFINE_HEX_XDR_NETOBJ(nfold_test_kerberos,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test8_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test9_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73,
+ 0x7b, 0x9b, 0x5b, 0x2b, 0x93, 0x13, 0x2b, 0x93
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test10_expected_result,
+ 0x83, 0x72, 0xc2, 0x36, 0x34, 0x4e, 0x5f, 0x15,
+ 0x50, 0xcd, 0x07, 0x47, 0xe1, 0x5d, 0x62, 0xca,
+ 0x7a, 0x5a, 0x3b, 0xce, 0xa4
+);
+DEFINE_HEX_XDR_NETOBJ(nfold_test11_expected_result,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73,
+ 0x7b, 0x9b, 0x5b, 0x2b, 0x93, 0x13, 0x2b, 0x93,
+ 0x5c, 0x9b, 0xdc, 0xda, 0xd9, 0x5c, 0x98, 0x99,
+ 0xc4, 0xca, 0xe4, 0xde, 0xe6, 0xd6, 0xca, 0xe4
+);
+
+static const struct gss_krb5_test_param rfc3961_nfold_test_params[] = {
+ {
+ .desc = "64-fold(\"012345\")",
+ .nfold = 64,
+ .plaintext = &nfold_test1_plaintext,
+ .expected_result = &nfold_test1_expected_result,
+ },
+ {
+ .desc = "56-fold(\"password\")",
+ .nfold = 56,
+ .plaintext = &nfold_test2_plaintext,
+ .expected_result = &nfold_test2_expected_result,
+ },
+ {
+ .desc = "64-fold(\"Rough Consensus, and Running Code\")",
+ .nfold = 64,
+ .plaintext = &nfold_test3_plaintext,
+ .expected_result = &nfold_test3_expected_result,
+ },
+ {
+ .desc = "168-fold(\"password\")",
+ .nfold = 168,
+ .plaintext = &nfold_test4_plaintext,
+ .expected_result = &nfold_test4_expected_result,
+ },
+ {
+ .desc = "192-fold(\"MASSACHVSETTS INSTITVTE OF TECHNOLOGY\")",
+ .nfold = 192,
+ .plaintext = &nfold_test5_plaintext,
+ .expected_result = &nfold_test5_expected_result,
+ },
+ {
+ .desc = "168-fold(\"Q\")",
+ .nfold = 168,
+ .plaintext = &nfold_test6_plaintext,
+ .expected_result = &nfold_test6_expected_result,
+ },
+ {
+ .desc = "168-fold(\"ba\")",
+ .nfold = 168,
+ .plaintext = &nfold_test7_plaintext,
+ .expected_result = &nfold_test7_expected_result,
+ },
+ {
+ .desc = "64-fold(\"kerberos\")",
+ .nfold = 64,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test8_expected_result,
+ },
+ {
+ .desc = "128-fold(\"kerberos\")",
+ .nfold = 128,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test9_expected_result,
+ },
+ {
+ .desc = "168-fold(\"kerberos\")",
+ .nfold = 168,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test10_expected_result,
+ },
+ {
+ .desc = "256-fold(\"kerberos\")",
+ .nfold = 256,
+ .plaintext = &nfold_test_kerberos,
+ .expected_result = &nfold_test11_expected_result,
+ },
+};
+
+/* Creates the function rfc3961_nfold_gen_params */
+KUNIT_ARRAY_PARAM(rfc3961_nfold, rfc3961_nfold_test_params, gss_krb5_get_desc);
+
+static void rfc3961_nfold_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ u8 *result;
+
+ /* Arrange */
+ result = kunit_kzalloc(test, 4096, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, result);
+
+ /* Act */
+ krb5_nfold(param->plaintext->len * 8, param->plaintext->data,
+ param->expected_result->len * 8, result);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ result, param->expected_result->len), 0,
+ "result mismatch");
+}
+
+/*
+ * RFC 3961 Appendix A.3. DES3 DR and DK
+ *
+ * These tests show the derived-random and derived-key values for the
+ * des3-hmac-sha1-kd encryption scheme, using the DR and DK functions
+ * defined in section 6.3.1. The input keys were randomly generated;
+ * the usage values are from this specification.
+ *
+ * This test material is copyright (C) The Internet Society (2005).
+ */
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_155,
+ 0x00, 0x00, 0x00, 0x01, 0x55
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_1aa,
+ 0x00, 0x00, 0x00, 0x01, 0xaa
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_usage_kerberos,
+ 0x6b, 0x65, 0x72, 0x62, 0x65, 0x72, 0x6f, 0x73
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_base_key,
+ 0xdc, 0xe0, 0x6b, 0x1f, 0x64, 0xc8, 0x57, 0xa1,
+ 0x1c, 0x3d, 0xb5, 0x7c, 0x51, 0x89, 0x9b, 0x2c,
+ 0xc1, 0x79, 0x10, 0x08, 0xce, 0x97, 0x3b, 0x92
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test1_derived_key,
+ 0x92, 0x51, 0x79, 0xd0, 0x45, 0x91, 0xa7, 0x9b,
+ 0x5d, 0x31, 0x92, 0xc4, 0xa7, 0xe9, 0xc2, 0x89,
+ 0xb0, 0x49, 0xc7, 0x1f, 0x6e, 0xe6, 0x04, 0xcd
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_base_key,
+ 0x5e, 0x13, 0xd3, 0x1c, 0x70, 0xef, 0x76, 0x57,
+ 0x46, 0x57, 0x85, 0x31, 0xcb, 0x51, 0xc1, 0x5b,
+ 0xf1, 0x1c, 0xa8, 0x2c, 0x97, 0xce, 0xe9, 0xf2
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test2_derived_key,
+ 0x9e, 0x58, 0xe5, 0xa1, 0x46, 0xd9, 0x94, 0x2a,
+ 0x10, 0x1c, 0x46, 0x98, 0x45, 0xd6, 0x7a, 0x20,
+ 0xe3, 0xc4, 0x25, 0x9e, 0xd9, 0x13, 0xf2, 0x07
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_base_key,
+ 0x98, 0xe6, 0xfd, 0x8a, 0x04, 0xa4, 0xb6, 0x85,
+ 0x9b, 0x75, 0xa1, 0x76, 0x54, 0x0b, 0x97, 0x52,
+ 0xba, 0xd3, 0xec, 0xd6, 0x10, 0xa2, 0x52, 0xbc
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test3_derived_key,
+ 0x13, 0xfe, 0xf8, 0x0d, 0x76, 0x3e, 0x94, 0xec,
+ 0x6d, 0x13, 0xfd, 0x2c, 0xa1, 0xd0, 0x85, 0x07,
+ 0x02, 0x49, 0xda, 0xd3, 0x98, 0x08, 0xea, 0xbf
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_base_key,
+ 0x62, 0x2a, 0xec, 0x25, 0xa2, 0xfe, 0x2c, 0xad,
+ 0x70, 0x94, 0x68, 0x0b, 0x7c, 0x64, 0x94, 0x02,
+ 0x80, 0x08, 0x4c, 0x1a, 0x7c, 0xec, 0x92, 0xb5
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test4_derived_key,
+ 0xf8, 0xdf, 0xbf, 0x04, 0xb0, 0x97, 0xe6, 0xd9,
+ 0xdc, 0x07, 0x02, 0x68, 0x6b, 0xcb, 0x34, 0x89,
+ 0xd9, 0x1f, 0xd9, 0xa4, 0x51, 0x6b, 0x70, 0x3e
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_base_key,
+ 0xd3, 0xf8, 0x29, 0x8c, 0xcb, 0x16, 0x64, 0x38,
+ 0xdc, 0xb9, 0xb9, 0x3e, 0xe5, 0xa7, 0x62, 0x92,
+ 0x86, 0xa4, 0x91, 0xf8, 0x38, 0xf8, 0x02, 0xfb
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test5_derived_key,
+ 0x23, 0x70, 0xda, 0x57, 0x5d, 0x2a, 0x3d, 0xa8,
+ 0x64, 0xce, 0xbf, 0xdc, 0x52, 0x04, 0xd5, 0x6d,
+ 0xf7, 0x79, 0xa7, 0xdf, 0x43, 0xd9, 0xda, 0x43
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_base_key,
+ 0xc1, 0x08, 0x16, 0x49, 0xad, 0xa7, 0x43, 0x62,
+ 0xe6, 0xa1, 0x45, 0x9d, 0x01, 0xdf, 0xd3, 0x0d,
+ 0x67, 0xc2, 0x23, 0x4c, 0x94, 0x07, 0x04, 0xda
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test6_derived_key,
+ 0x34, 0x80, 0x57, 0xec, 0x98, 0xfd, 0xc4, 0x80,
+ 0x16, 0x16, 0x1c, 0x2a, 0x4c, 0x7a, 0x94, 0x3e,
+ 0x92, 0xae, 0x49, 0x2c, 0x98, 0x91, 0x75, 0xf7
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_base_key,
+ 0x5d, 0x15, 0x4a, 0xf2, 0x38, 0xf4, 0x67, 0x13,
+ 0x15, 0x57, 0x19, 0xd5, 0x5e, 0x2f, 0x1f, 0x79,
+ 0x0d, 0xd6, 0x61, 0xf2, 0x79, 0xa7, 0x91, 0x7c
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test7_derived_key,
+ 0xa8, 0x80, 0x8a, 0xc2, 0x67, 0xda, 0xda, 0x3d,
+ 0xcb, 0xe9, 0xa7, 0xc8, 0x46, 0x26, 0xfb, 0xc7,
+ 0x61, 0xc2, 0x94, 0xb0, 0x13, 0x15, 0xe5, 0xc1
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_base_key,
+ 0x79, 0x85, 0x62, 0xe0, 0x49, 0x85, 0x2f, 0x57,
+ 0xdc, 0x8c, 0x34, 0x3b, 0xa1, 0x7f, 0x2c, 0xa1,
+ 0xd9, 0x73, 0x94, 0xef, 0xc8, 0xad, 0xc4, 0x43
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test8_derived_key,
+ 0xc8, 0x13, 0xf8, 0x8a, 0x3b, 0xe3, 0xb3, 0x34,
+ 0xf7, 0x54, 0x25, 0xce, 0x91, 0x75, 0xfb, 0xe3,
+ 0xc8, 0x49, 0x3b, 0x89, 0xc8, 0x70, 0x3b, 0x49
+);
+
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_base_key,
+ 0x26, 0xdc, 0xe3, 0x34, 0xb5, 0x45, 0x29, 0x2f,
+ 0x2f, 0xea, 0xb9, 0xa8, 0x70, 0x1a, 0x89, 0xa4,
+ 0xb9, 0x9e, 0xb9, 0x94, 0x2c, 0xec, 0xd0, 0x16
+);
+DEFINE_HEX_XDR_NETOBJ(des3_dk_test9_derived_key,
+ 0xf4, 0x8f, 0xfd, 0x6e, 0x83, 0xf8, 0x3e, 0x73,
+ 0x54, 0xe6, 0x94, 0xfd, 0x25, 0x2c, 0xf8, 0x3b,
+ 0xfe, 0x58, 0xf7, 0xd5, 0xba, 0x37, 0xec, 0x5d
+);
+
+static const struct gss_krb5_test_param rfc3961_kdf_test_params[] = {
+ {
+ .desc = "des3-hmac-sha1 key derivation case 1",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test1_base_key,
+ .usage = &des3_dk_usage_155,
+ .expected_result = &des3_dk_test1_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 2",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test2_base_key,
+ .usage = &des3_dk_usage_1aa,
+ .expected_result = &des3_dk_test2_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 3",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test3_base_key,
+ .usage = &des3_dk_usage_155,
+ .expected_result = &des3_dk_test3_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 4",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test4_base_key,
+ .usage = &des3_dk_usage_1aa,
+ .expected_result = &des3_dk_test4_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 5",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test5_base_key,
+ .usage = &des3_dk_usage_kerberos,
+ .expected_result = &des3_dk_test5_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 6",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test6_base_key,
+ .usage = &des3_dk_usage_155,
+ .expected_result = &des3_dk_test6_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 7",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test7_base_key,
+ .usage = &des3_dk_usage_1aa,
+ .expected_result = &des3_dk_test7_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 8",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test8_base_key,
+ .usage = &des3_dk_usage_155,
+ .expected_result = &des3_dk_test8_derived_key,
+ },
+ {
+ .desc = "des3-hmac-sha1 key derivation case 9",
+ .enctype = ENCTYPE_DES3_CBC_RAW,
+ .base_key = &des3_dk_test9_base_key,
+ .usage = &des3_dk_usage_1aa,
+ .expected_result = &des3_dk_test9_derived_key,
+ },
+};
+
+/* Creates the function rfc3961_kdf_gen_params */
+KUNIT_ARRAY_PARAM(rfc3961_kdf, rfc3961_kdf_test_params, gss_krb5_get_desc);
+
+static struct kunit_case rfc3961_test_cases[] = {
+ {
+ .name = "RFC 3961 n-fold",
+ .run_case = rfc3961_nfold_case,
+ .generate_params = rfc3961_nfold_gen_params,
+ },
+ {
+ .name = "RFC 3961 key derivation",
+ .run_case = kdf_case,
+ .generate_params = rfc3961_kdf_gen_params,
+ },
+};
+
+static struct kunit_suite rfc3961_suite = {
+ .name = "RFC 3961 tests",
+ .test_cases = rfc3961_test_cases,
+};
+
+/*
+ * From RFC 3962 Appendix B: Sample Test Vectors
+ *
+ * Some test vectors for CBC with ciphertext stealing, using an
+ * initial vector of all-zero.
+ *
+ * This test material is copyright (C) The Internet Society (2005).
+ */
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_encryption_key,
+ 0x63, 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x20,
+ 0x74, 0x65, 0x72, 0x69, 0x79, 0x61, 0x6b, 0x69
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_expected_result,
+ 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
+ 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f,
+ 0x97
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test1_next_iv,
+ 0xc6, 0x35, 0x35, 0x68, 0xf2, 0xbf, 0x8c, 0xb4,
+ 0xd8, 0xa5, 0x80, 0x36, 0x2d, 0xa7, 0xff, 0x7f
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_expected_result,
+ 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
+ 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test2_next_iv,
+ 0xfc, 0x00, 0x78, 0x3e, 0x0e, 0xfd, 0xb2, 0xc1,
+ 0xd4, 0x45, 0xd4, 0xc8, 0xef, 0xf7, 0xed, 0x22
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_expected_result,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test3_next_iv,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
+ 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test4_next_iv,
+ 0xb3, 0xff, 0xfd, 0x94, 0x0c, 0x16, 0xa1, 0x8c,
+ 0x1b, 0x55, 0x49, 0xd2, 0xf8, 0x38, 0x02, 0x9e
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c, 0x20
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test5_next_iv,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_plaintext,
+ 0x49, 0x20, 0x77, 0x6f, 0x75, 0x6c, 0x64, 0x20,
+ 0x6c, 0x69, 0x6b, 0x65, 0x20, 0x74, 0x68, 0x65,
+ 0x20, 0x47, 0x65, 0x6e, 0x65, 0x72, 0x61, 0x6c,
+ 0x20, 0x47, 0x61, 0x75, 0x27, 0x73, 0x20, 0x43,
+ 0x68, 0x69, 0x63, 0x6b, 0x65, 0x6e, 0x2c, 0x20,
+ 0x70, 0x6c, 0x65, 0x61, 0x73, 0x65, 0x2c, 0x20,
+ 0x61, 0x6e, 0x64, 0x20, 0x77, 0x6f, 0x6e, 0x74,
+ 0x6f, 0x6e, 0x20, 0x73, 0x6f, 0x75, 0x70, 0x2e
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_expected_result,
+ 0x97, 0x68, 0x72, 0x68, 0xd6, 0xec, 0xcc, 0xc0,
+ 0xc0, 0x7b, 0x25, 0xe2, 0x5e, 0xcf, 0xe5, 0x84,
+ 0x39, 0x31, 0x25, 0x23, 0xa7, 0x86, 0x62, 0xd5,
+ 0xbe, 0x7f, 0xcb, 0xcc, 0x98, 0xeb, 0xf5, 0xa8,
+ 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
+ 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40,
+ 0x9d, 0xad, 0x8b, 0xbb, 0x96, 0xc4, 0xcd, 0xc0,
+ 0x3b, 0xc1, 0x03, 0xe1, 0xa1, 0x94, 0xbb, 0xd8
+);
+DEFINE_HEX_XDR_NETOBJ(rfc3962_enc_test6_next_iv,
+ 0x48, 0x07, 0xef, 0xe8, 0x36, 0xee, 0x89, 0xa5,
+ 0x26, 0x73, 0x0d, 0xbc, 0x2f, 0x7b, 0xc8, 0x40
+);
+
+static const struct gss_krb5_test_param rfc3962_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 1",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test1_plaintext,
+ .expected_result = &rfc3962_enc_test1_expected_result,
+ .next_iv = &rfc3962_enc_test1_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 2",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test2_plaintext,
+ .expected_result = &rfc3962_enc_test2_expected_result,
+ .next_iv = &rfc3962_enc_test2_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 3",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test3_plaintext,
+ .expected_result = &rfc3962_enc_test3_expected_result,
+ .next_iv = &rfc3962_enc_test3_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 4",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test4_plaintext,
+ .expected_result = &rfc3962_enc_test4_expected_result,
+ .next_iv = &rfc3962_enc_test4_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 5",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test5_plaintext,
+ .expected_result = &rfc3962_enc_test5_expected_result,
+ .next_iv = &rfc3962_enc_test5_next_iv,
+ },
+ {
+ .desc = "Encrypt with aes128-cts-hmac-sha1-96 case 6",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &rfc3962_enc_test6_plaintext,
+ .expected_result = &rfc3962_enc_test6_expected_result,
+ .next_iv = &rfc3962_enc_test6_next_iv,
+ },
+};
+
+/* Creates the function rfc3962_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc3962_encrypt, rfc3962_encrypt_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * This tests the implementation of the encryption part of the mechanism.
+ * It does not apply a confounder or test the result of HMAC over the
+ * plaintext.
+ */
+static void rfc3962_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_buf buf;
+ void *iv, *text;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ iv = kunit_kzalloc(test, crypto_sync_skcipher_ivsize(cts_tfm), GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, iv);
+
+ text = kunit_kzalloc(test, param->plaintext->len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+
+ memcpy(text, param->plaintext->data, param->plaintext->len);
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL,
+ iv, crypto_sync_skcipher_ivsize(cts_tfm));
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->expected_result->len, buf.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ text, param->expected_result->len), 0,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->next_iv->data, iv,
+ param->next_iv->len), 0,
+ "IV mismatch");
+
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc3962_test_cases[] = {
+ {
+ .name = "RFC 3962 encryption",
+ .run_case = rfc3962_encrypt_case,
+ .generate_params = rfc3962_encrypt_gen_params,
+ },
+};
+
+static struct kunit_suite rfc3962_suite = {
+ .name = "RFC 3962 suite",
+ .test_cases = rfc3962_test_cases,
+};
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample results for key derivation
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_basekey,
+ 0x57, 0xd0, 0x29, 0x72, 0x98, 0xff, 0xd9, 0xd3,
+ 0x5d, 0xe5, 0xa4, 0x7f, 0xb4, 0xbd, 0xe2, 0x4b
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Kc,
+ 0xd1, 0x55, 0x77, 0x5a, 0x20, 0x9d, 0x05, 0xf0,
+ 0x2b, 0x38, 0xd4, 0x2a, 0x38, 0x9e, 0x5a, 0x56
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Ke,
+ 0x64, 0xdf, 0x83, 0xf8, 0x5a, 0x53, 0x2f, 0x17,
+ 0x57, 0x7d, 0x8c, 0x37, 0x03, 0x57, 0x96, 0xab
+);
+DEFINE_HEX_XDR_NETOBJ(camellia128_cts_cmac_Ki,
+ 0x3e, 0x4f, 0xbd, 0xf3, 0x0f, 0xb8, 0x25, 0x9c,
+ 0x42, 0x5c, 0xb6, 0xc9, 0x6f, 0x1f, 0x46, 0x35
+);
+
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_basekey,
+ 0xb9, 0xd6, 0x82, 0x8b, 0x20, 0x56, 0xb7, 0xbe,
+ 0x65, 0x6d, 0x88, 0xa1, 0x23, 0xb1, 0xfa, 0xc6,
+ 0x82, 0x14, 0xac, 0x2b, 0x72, 0x7e, 0xcf, 0x5f,
+ 0x69, 0xaf, 0xe0, 0xc4, 0xdf, 0x2a, 0x6d, 0x2c
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Kc,
+ 0xe4, 0x67, 0xf9, 0xa9, 0x55, 0x2b, 0xc7, 0xd3,
+ 0x15, 0x5a, 0x62, 0x20, 0xaf, 0x9c, 0x19, 0x22,
+ 0x0e, 0xee, 0xd4, 0xff, 0x78, 0xb0, 0xd1, 0xe6,
+ 0xa1, 0x54, 0x49, 0x91, 0x46, 0x1a, 0x9e, 0x50
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Ke,
+ 0x41, 0x2a, 0xef, 0xc3, 0x62, 0xa7, 0x28, 0x5f,
+ 0xc3, 0x96, 0x6c, 0x6a, 0x51, 0x81, 0xe7, 0x60,
+ 0x5a, 0xe6, 0x75, 0x23, 0x5b, 0x6d, 0x54, 0x9f,
+ 0xbf, 0xc9, 0xab, 0x66, 0x30, 0xa4, 0xc6, 0x04
+);
+DEFINE_HEX_XDR_NETOBJ(camellia256_cts_cmac_Ki,
+ 0xfa, 0x62, 0x4f, 0xa0, 0xe5, 0x23, 0x99, 0x3f,
+ 0xa3, 0x88, 0xae, 0xfd, 0xc6, 0x7e, 0x67, 0xeb,
+ 0xcd, 0x8c, 0x08, 0xe8, 0xa0, 0x24, 0x6b, 0x1d,
+ 0x73, 0xb0, 0xd1, 0xdd, 0x9f, 0xc5, 0x82, 0xb0
+);
+
+DEFINE_HEX_XDR_NETOBJ(usage_checksum,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(usage_encryption,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_ENCRYPTION
+);
+DEFINE_HEX_XDR_NETOBJ(usage_integrity,
+ 0x00, 0x00, 0x00, 0x02, KEY_USAGE_SEED_INTEGRITY
+);
+
+static const struct gss_krb5_test_param rfc6803_kdf_test_params[] = {
+ {
+ .desc = "Derive Kc subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &camellia128_cts_cmac_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &camellia128_cts_cmac_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &camellia128_cts_cmac_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &camellia128_cts_cmac_Ki,
+ },
+ {
+ .desc = "Derive Kc subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &camellia256_cts_cmac_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &camellia256_cts_cmac_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &camellia256_cts_cmac_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &camellia256_cts_cmac_Ki,
+ },
+};
+
+/* Creates the function rfc6803_kdf_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_kdf, rfc6803_kdf_test_params, gss_krb5_get_desc);
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample checksums.
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ *
+ * XXX: These tests are likely to fail on EBCDIC or Unicode platforms.
+ */
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test1_plaintext,
+ "abcdefghijk");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_basekey,
+ 0x1d, 0xc4, 0x6a, 0x8d, 0x76, 0x3f, 0x4f, 0x93,
+ 0x74, 0x2b, 0xcb, 0xa3, 0x38, 0x75, 0x76, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_usage,
+ 0x00, 0x00, 0x00, 0x07, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test1_expected_result,
+ 0x11, 0x78, 0xe6, 0xc5, 0xc4, 0x7a, 0x8c, 0x1a,
+ 0xe0, 0xc4, 0xb9, 0xc7, 0xd4, 0xeb, 0x7b, 0x6b
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test2_plaintext,
+ "ABCDEFGHIJKLMNOPQRSTUVWXYZ");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_basekey,
+ 0x50, 0x27, 0xbc, 0x23, 0x1d, 0x0f, 0x3a, 0x9d,
+ 0x23, 0x33, 0x3f, 0x1c, 0xa6, 0xfd, 0xbe, 0x7c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_usage,
+ 0x00, 0x00, 0x00, 0x08, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test2_expected_result,
+ 0xd1, 0xb3, 0x4f, 0x70, 0x04, 0xa7, 0x31, 0xf2,
+ 0x3a, 0x0c, 0x00, 0xbf, 0x6c, 0x3f, 0x75, 0x3a
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test3_plaintext,
+ "123456789");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_basekey,
+ 0xb6, 0x1c, 0x86, 0xcc, 0x4e, 0x5d, 0x27, 0x57,
+ 0x54, 0x5a, 0xd4, 0x23, 0x39, 0x9f, 0xb7, 0x03,
+ 0x1e, 0xca, 0xb9, 0x13, 0xcb, 0xb9, 0x00, 0xbd,
+ 0x7a, 0x3c, 0x6d, 0xd8, 0xbf, 0x92, 0x01, 0x5b
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_usage,
+ 0x00, 0x00, 0x00, 0x09, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test3_expected_result,
+ 0x87, 0xa1, 0x2c, 0xfd, 0x2b, 0x96, 0x21, 0x48,
+ 0x10, 0xf0, 0x1c, 0x82, 0x6e, 0x77, 0x44, 0xb1
+);
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_checksum_test4_plaintext,
+ "!@#$%^&*()!@#$%^&*()!@#$%^&*()");
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_basekey,
+ 0x32, 0x16, 0x4c, 0x5b, 0x43, 0x4d, 0x1d, 0x15,
+ 0x38, 0xe4, 0xcf, 0xd9, 0xbe, 0x80, 0x40, 0xfe,
+ 0x8c, 0x4a, 0xc7, 0xac, 0xc4, 0xb9, 0x3d, 0x33,
+ 0x14, 0xd2, 0x13, 0x36, 0x68, 0x14, 0x7a, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_usage,
+ 0x00, 0x00, 0x00, 0x0a, KEY_USAGE_SEED_CHECKSUM
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_checksum_test4_expected_result,
+ 0x3f, 0xa0, 0xb4, 0x23, 0x55, 0xe5, 0x2b, 0x18,
+ 0x91, 0x87, 0x29, 0x4a, 0xa2, 0x52, 0xab, 0x64
+);
+
+static const struct gss_krb5_test_param rfc6803_checksum_test_params[] = {
+ {
+ .desc = "camellia128-cts-cmac checksum test 1",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test1_basekey,
+ .usage = &rfc6803_checksum_test1_usage,
+ .plaintext = &rfc6803_checksum_test1_plaintext,
+ .expected_result = &rfc6803_checksum_test1_expected_result,
+ },
+ {
+ .desc = "camellia128-cts-cmac checksum test 2",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test2_basekey,
+ .usage = &rfc6803_checksum_test2_usage,
+ .plaintext = &rfc6803_checksum_test2_plaintext,
+ .expected_result = &rfc6803_checksum_test2_expected_result,
+ },
+ {
+ .desc = "camellia256-cts-cmac checksum test 3",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test3_basekey,
+ .usage = &rfc6803_checksum_test3_usage,
+ .plaintext = &rfc6803_checksum_test3_plaintext,
+ .expected_result = &rfc6803_checksum_test3_expected_result,
+ },
+ {
+ .desc = "camellia256-cts-cmac checksum test 4",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .base_key = &rfc6803_checksum_test4_basekey,
+ .usage = &rfc6803_checksum_test4_usage,
+ .plaintext = &rfc6803_checksum_test4_plaintext,
+ .expected_result = &rfc6803_checksum_test4_expected_result,
+ },
+};
+
+/* Creates the function rfc6803_checksum_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_checksum, rfc6803_checksum_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * From RFC 6803 Section 10. Test vectors
+ *
+ * Sample encryptions (all using the default cipher state)
+ *
+ * Copyright (c) 2012 IETF Trust and the persons identified as the
+ * document authors. All rights reserved.
+ *
+ * Key usage values are from errata 4326 against RFC 6803.
+ */
+
+static const struct xdr_netobj rfc6803_enc_empty_plaintext = {
+ .len = 0,
+};
+
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_1byte_plaintext, "1");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_9byte_plaintext, "9 bytesss");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_13byte_plaintext, "13 bytes byte");
+DEFINE_STR_XDR_NETOBJ(rfc6803_enc_30byte_plaintext,
+ "30 bytes bytes bytes bytes byt"
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_confounder,
+ 0xb6, 0x98, 0x22, 0xa1, 0x9a, 0x6b, 0x09, 0xc0,
+ 0xeb, 0xc8, 0x55, 0x7d, 0x1f, 0x1b, 0x6c, 0x0a
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_basekey,
+ 0x1d, 0xc4, 0x6a, 0x8d, 0x76, 0x3f, 0x4f, 0x93,
+ 0x74, 0x2b, 0xcb, 0xa3, 0x38, 0x75, 0x76, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test1_expected_result,
+ 0xc4, 0x66, 0xf1, 0x87, 0x10, 0x69, 0x92, 0x1e,
+ 0xdb, 0x7c, 0x6f, 0xde, 0x24, 0x4a, 0x52, 0xdb,
+ 0x0b, 0xa1, 0x0e, 0xdc, 0x19, 0x7b, 0xdb, 0x80,
+ 0x06, 0x65, 0x8c, 0xa3, 0xcc, 0xce, 0x6e, 0xb8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_confounder,
+ 0x6f, 0x2f, 0xc3, 0xc2, 0xa1, 0x66, 0xfd, 0x88,
+ 0x98, 0x96, 0x7a, 0x83, 0xde, 0x95, 0x96, 0xd9
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_basekey,
+ 0x50, 0x27, 0xbc, 0x23, 0x1d, 0x0f, 0x3a, 0x9d,
+ 0x23, 0x33, 0x3f, 0x1c, 0xa6, 0xfd, 0xbe, 0x7c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test2_expected_result,
+ 0x84, 0x2d, 0x21, 0xfd, 0x95, 0x03, 0x11, 0xc0,
+ 0xdd, 0x46, 0x4a, 0x3f, 0x4b, 0xe8, 0xd6, 0xda,
+ 0x88, 0xa5, 0x6d, 0x55, 0x9c, 0x9b, 0x47, 0xd3,
+ 0xf9, 0xa8, 0x50, 0x67, 0xaf, 0x66, 0x15, 0x59,
+ 0xb8
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_confounder,
+ 0xa5, 0xb4, 0xa7, 0x1e, 0x07, 0x7a, 0xee, 0xf9,
+ 0x3c, 0x87, 0x63, 0xc1, 0x8f, 0xdb, 0x1f, 0x10
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_basekey,
+ 0xa1, 0xbb, 0x61, 0xe8, 0x05, 0xf9, 0xba, 0x6d,
+ 0xde, 0x8f, 0xdb, 0xdd, 0xc0, 0x5c, 0xde, 0xa0
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test3_expected_result,
+ 0x61, 0x9f, 0xf0, 0x72, 0xe3, 0x62, 0x86, 0xff,
+ 0x0a, 0x28, 0xde, 0xb3, 0xa3, 0x52, 0xec, 0x0d,
+ 0x0e, 0xdf, 0x5c, 0x51, 0x60, 0xd6, 0x63, 0xc9,
+ 0x01, 0x75, 0x8c, 0xcf, 0x9d, 0x1e, 0xd3, 0x3d,
+ 0x71, 0xdb, 0x8f, 0x23, 0xaa, 0xbf, 0x83, 0x48,
+ 0xa0
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_confounder,
+ 0x19, 0xfe, 0xe4, 0x0d, 0x81, 0x0c, 0x52, 0x4b,
+ 0x5b, 0x22, 0xf0, 0x18, 0x74, 0xc6, 0x93, 0xda
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_basekey,
+ 0x2c, 0xa2, 0x7a, 0x5f, 0xaf, 0x55, 0x32, 0x24,
+ 0x45, 0x06, 0x43, 0x4e, 0x1c, 0xef, 0x66, 0x76
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test4_expected_result,
+ 0xb8, 0xec, 0xa3, 0x16, 0x7a, 0xe6, 0x31, 0x55,
+ 0x12, 0xe5, 0x9f, 0x98, 0xa7, 0xc5, 0x00, 0x20,
+ 0x5e, 0x5f, 0x63, 0xff, 0x3b, 0xb3, 0x89, 0xaf,
+ 0x1c, 0x41, 0xa2, 0x1d, 0x64, 0x0d, 0x86, 0x15,
+ 0xc9, 0xed, 0x3f, 0xbe, 0xb0, 0x5a, 0xb6, 0xac,
+ 0xb6, 0x76, 0x89, 0xb5, 0xea
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_confounder,
+ 0xca, 0x7a, 0x7a, 0xb4, 0xbe, 0x19, 0x2d, 0xab,
+ 0xd6, 0x03, 0x50, 0x6d, 0xb1, 0x9c, 0x39, 0xe2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_basekey,
+ 0x78, 0x24, 0xf8, 0xc1, 0x6f, 0x83, 0xff, 0x35,
+ 0x4c, 0x6b, 0xf7, 0x51, 0x5b, 0x97, 0x3f, 0x43
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test5_expected_result,
+ 0xa2, 0x6a, 0x39, 0x05, 0xa4, 0xff, 0xd5, 0x81,
+ 0x6b, 0x7b, 0x1e, 0x27, 0x38, 0x0d, 0x08, 0x09,
+ 0x0c, 0x8e, 0xc1, 0xf3, 0x04, 0x49, 0x6e, 0x1a,
+ 0xbd, 0xcd, 0x2b, 0xdc, 0xd1, 0xdf, 0xfc, 0x66,
+ 0x09, 0x89, 0xe1, 0x17, 0xa7, 0x13, 0xdd, 0xbb,
+ 0x57, 0xa4, 0x14, 0x6c, 0x15, 0x87, 0xcb, 0xa4,
+ 0x35, 0x66, 0x65, 0x59, 0x1d, 0x22, 0x40, 0x28,
+ 0x2f, 0x58, 0x42, 0xb1, 0x05, 0xa5
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_confounder,
+ 0x3c, 0xbb, 0xd2, 0xb4, 0x59, 0x17, 0x94, 0x10,
+ 0x67, 0xf9, 0x65, 0x99, 0xbb, 0x98, 0x92, 0x6c
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_basekey,
+ 0xb6, 0x1c, 0x86, 0xcc, 0x4e, 0x5d, 0x27, 0x57,
+ 0x54, 0x5a, 0xd4, 0x23, 0x39, 0x9f, 0xb7, 0x03,
+ 0x1e, 0xca, 0xb9, 0x13, 0xcb, 0xb9, 0x00, 0xbd,
+ 0x7a, 0x3c, 0x6d, 0xd8, 0xbf, 0x92, 0x01, 0x5b
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test6_expected_result,
+ 0x03, 0x88, 0x6d, 0x03, 0x31, 0x0b, 0x47, 0xa6,
+ 0xd8, 0xf0, 0x6d, 0x7b, 0x94, 0xd1, 0xdd, 0x83,
+ 0x7e, 0xcc, 0xe3, 0x15, 0xef, 0x65, 0x2a, 0xff,
+ 0x62, 0x08, 0x59, 0xd9, 0x4a, 0x25, 0x92, 0x66
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_confounder,
+ 0xde, 0xf4, 0x87, 0xfc, 0xeb, 0xe6, 0xde, 0x63,
+ 0x46, 0xd4, 0xda, 0x45, 0x21, 0xbb, 0xa2, 0xd2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_basekey,
+ 0x1b, 0x97, 0xfe, 0x0a, 0x19, 0x0e, 0x20, 0x21,
+ 0xeb, 0x30, 0x75, 0x3e, 0x1b, 0x6e, 0x1e, 0x77,
+ 0xb0, 0x75, 0x4b, 0x1d, 0x68, 0x46, 0x10, 0x35,
+ 0x58, 0x64, 0x10, 0x49, 0x63, 0x46, 0x38, 0x33
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test7_expected_result,
+ 0x2c, 0x9c, 0x15, 0x70, 0x13, 0x3c, 0x99, 0xbf,
+ 0x6a, 0x34, 0xbc, 0x1b, 0x02, 0x12, 0x00, 0x2f,
+ 0xd1, 0x94, 0x33, 0x87, 0x49, 0xdb, 0x41, 0x35,
+ 0x49, 0x7a, 0x34, 0x7c, 0xfc, 0xd9, 0xd1, 0x8a,
+ 0x12
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_confounder,
+ 0xad, 0x4f, 0xf9, 0x04, 0xd3, 0x4e, 0x55, 0x53,
+ 0x84, 0xb1, 0x41, 0x00, 0xfc, 0x46, 0x5f, 0x88
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_basekey,
+ 0x32, 0x16, 0x4c, 0x5b, 0x43, 0x4d, 0x1d, 0x15,
+ 0x38, 0xe4, 0xcf, 0xd9, 0xbe, 0x80, 0x40, 0xfe,
+ 0x8c, 0x4a, 0xc7, 0xac, 0xc4, 0xb9, 0x3d, 0x33,
+ 0x14, 0xd2, 0x13, 0x36, 0x68, 0x14, 0x7a, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test8_expected_result,
+ 0x9c, 0x6d, 0xe7, 0x5f, 0x81, 0x2d, 0xe7, 0xed,
+ 0x0d, 0x28, 0xb2, 0x96, 0x35, 0x57, 0xa1, 0x15,
+ 0x64, 0x09, 0x98, 0x27, 0x5b, 0x0a, 0xf5, 0x15,
+ 0x27, 0x09, 0x91, 0x3f, 0xf5, 0x2a, 0x2a, 0x9c,
+ 0x8e, 0x63, 0xb8, 0x72, 0xf9, 0x2e, 0x64, 0xc8,
+ 0x39
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_confounder,
+ 0xcf, 0x9b, 0xca, 0x6d, 0xf1, 0x14, 0x4e, 0x0c,
+ 0x0a, 0xf9, 0xb8, 0xf3, 0x4c, 0x90, 0xd5, 0x14
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_basekey,
+ 0xb0, 0x38, 0xb1, 0x32, 0xcd, 0x8e, 0x06, 0x61,
+ 0x22, 0x67, 0xfa, 0xb7, 0x17, 0x00, 0x66, 0xd8,
+ 0x8a, 0xec, 0xcb, 0xa0, 0xb7, 0x44, 0xbf, 0xc6,
+ 0x0d, 0xc8, 0x9b, 0xca, 0x18, 0x2d, 0x07, 0x15
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test9_expected_result,
+ 0xee, 0xec, 0x85, 0xa9, 0x81, 0x3c, 0xdc, 0x53,
+ 0x67, 0x72, 0xab, 0x9b, 0x42, 0xde, 0xfc, 0x57,
+ 0x06, 0xf7, 0x26, 0xe9, 0x75, 0xdd, 0xe0, 0x5a,
+ 0x87, 0xeb, 0x54, 0x06, 0xea, 0x32, 0x4c, 0xa1,
+ 0x85, 0xc9, 0x98, 0x6b, 0x42, 0xaa, 0xbe, 0x79,
+ 0x4b, 0x84, 0x82, 0x1b, 0xee
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_confounder,
+ 0x64, 0x4d, 0xef, 0x38, 0xda, 0x35, 0x00, 0x72,
+ 0x75, 0x87, 0x8d, 0x21, 0x68, 0x55, 0xe2, 0x28
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_basekey,
+ 0xcc, 0xfc, 0xd3, 0x49, 0xbf, 0x4c, 0x66, 0x77,
+ 0xe8, 0x6e, 0x4b, 0x02, 0xb8, 0xea, 0xb9, 0x24,
+ 0xa5, 0x46, 0xac, 0x73, 0x1c, 0xf9, 0xbf, 0x69,
+ 0x89, 0xb9, 0x96, 0xe7, 0xd6, 0xbf, 0xbb, 0xa7
+);
+DEFINE_HEX_XDR_NETOBJ(rfc6803_enc_test10_expected_result,
+ 0x0e, 0x44, 0x68, 0x09, 0x85, 0x85, 0x5f, 0x2d,
+ 0x1f, 0x18, 0x12, 0x52, 0x9c, 0xa8, 0x3b, 0xfd,
+ 0x8e, 0x34, 0x9d, 0xe6, 0xfd, 0x9a, 0xda, 0x0b,
+ 0xaa, 0xa0, 0x48, 0xd6, 0x8e, 0x26, 0x5f, 0xeb,
+ 0xf3, 0x4a, 0xd1, 0x25, 0x5a, 0x34, 0x49, 0x99,
+ 0xad, 0x37, 0x14, 0x68, 0x87, 0xa6, 0xc6, 0x84,
+ 0x57, 0x31, 0xac, 0x7f, 0x46, 0x37, 0x6a, 0x05,
+ 0x04, 0xcd, 0x06, 0x57, 0x14, 0x74
+);
+
+static const struct gss_krb5_test_param rfc6803_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt empty plaintext with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 0,
+ .base_key = &rfc6803_enc_test1_basekey,
+ .plaintext = &rfc6803_enc_empty_plaintext,
+ .confounder = &rfc6803_enc_test1_confounder,
+ .expected_result = &rfc6803_enc_test1_expected_result,
+ },
+ {
+ .desc = "Encrypt 1 byte with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 1,
+ .base_key = &rfc6803_enc_test2_basekey,
+ .plaintext = &rfc6803_enc_1byte_plaintext,
+ .confounder = &rfc6803_enc_test2_confounder,
+ .expected_result = &rfc6803_enc_test2_expected_result,
+ },
+ {
+ .desc = "Encrypt 9 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 2,
+ .base_key = &rfc6803_enc_test3_basekey,
+ .plaintext = &rfc6803_enc_9byte_plaintext,
+ .confounder = &rfc6803_enc_test3_confounder,
+ .expected_result = &rfc6803_enc_test3_expected_result,
+ },
+ {
+ .desc = "Encrypt 13 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 3,
+ .base_key = &rfc6803_enc_test4_basekey,
+ .plaintext = &rfc6803_enc_13byte_plaintext,
+ .confounder = &rfc6803_enc_test4_confounder,
+ .expected_result = &rfc6803_enc_test4_expected_result,
+ },
+ {
+ .desc = "Encrypt 30 bytes with camellia128-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .constant = 4,
+ .base_key = &rfc6803_enc_test5_basekey,
+ .plaintext = &rfc6803_enc_30byte_plaintext,
+ .confounder = &rfc6803_enc_test5_confounder,
+ .expected_result = &rfc6803_enc_test5_expected_result,
+ },
+ {
+ .desc = "Encrypt empty plaintext with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 0,
+ .base_key = &rfc6803_enc_test6_basekey,
+ .plaintext = &rfc6803_enc_empty_plaintext,
+ .confounder = &rfc6803_enc_test6_confounder,
+ .expected_result = &rfc6803_enc_test6_expected_result,
+ },
+ {
+ .desc = "Encrypt 1 byte with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 1,
+ .base_key = &rfc6803_enc_test7_basekey,
+ .plaintext = &rfc6803_enc_1byte_plaintext,
+ .confounder = &rfc6803_enc_test7_confounder,
+ .expected_result = &rfc6803_enc_test7_expected_result,
+ },
+ {
+ .desc = "Encrypt 9 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 2,
+ .base_key = &rfc6803_enc_test8_basekey,
+ .plaintext = &rfc6803_enc_9byte_plaintext,
+ .confounder = &rfc6803_enc_test8_confounder,
+ .expected_result = &rfc6803_enc_test8_expected_result,
+ },
+ {
+ .desc = "Encrypt 13 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 3,
+ .base_key = &rfc6803_enc_test9_basekey,
+ .plaintext = &rfc6803_enc_13byte_plaintext,
+ .confounder = &rfc6803_enc_test9_confounder,
+ .expected_result = &rfc6803_enc_test9_expected_result,
+ },
+ {
+ .desc = "Encrypt 30 bytes with camellia256-cts-cmac",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .constant = 4,
+ .base_key = &rfc6803_enc_test10_basekey,
+ .plaintext = &rfc6803_enc_30byte_plaintext,
+ .confounder = &rfc6803_enc_test10_confounder,
+ .expected_result = &rfc6803_enc_test10_expected_result,
+ },
+};
+
+/* Creates the function rfc6803_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc6803_encrypt, rfc6803_encrypt_test_params,
+ gss_krb5_get_desc);
+
+static void rfc6803_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Ke, Ki, checksum;
+ u8 usage_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj usage = {
+ .data = usage_data,
+ .len = sizeof(usage_data),
+ };
+ struct crypto_ahash *ahash_tfm;
+ unsigned int blocksize;
+ struct xdr_buf buf;
+ void *text;
+ size_t len;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ usage.data[3] = param->constant;
+
+ Ke.len = gk5e->Ke_length;
+ Ke.data = kunit_kzalloc(test, Ke.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ke.data);
+ usage.data[4] = KEY_USAGE_SEED_ENCRYPTION;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ke, &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ blocksize = crypto_sync_skcipher_blocksize(cts_tfm);
+
+ len = param->confounder->len + param->plaintext->len + blocksize;
+ text = kunit_kzalloc(test, len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+ memcpy(text, param->confounder->data, param->confounder->len);
+ memcpy(text + param->confounder->len, param->plaintext->data,
+ param->plaintext->len);
+
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->confounder->len + param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ Ki.len = gk5e->Ki_length;
+ Ki.data = kunit_kzalloc(test, Ki.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ki.data);
+ usage.data[4] = KEY_USAGE_SEED_INTEGRITY;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ki,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ ahash_tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ahash_tfm);
+ err = crypto_ahash_setkey(ahash_tfm, Ki.data, Ki.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ err = gss_krb5_checksum(ahash_tfm, NULL, 0, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test, param->expected_result->len,
+ buf.len + checksum.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ buf.head[0].iov_base, buf.len), 0,
+ "encrypted result mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data +
+ (param->expected_result->len - checksum.len),
+ checksum.data, checksum.len), 0,
+ "HMAC mismatch");
+
+ crypto_free_ahash(ahash_tfm);
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc6803_test_cases[] = {
+ {
+ .name = "RFC 6803 key derivation",
+ .run_case = kdf_case,
+ .generate_params = rfc6803_kdf_gen_params,
+ },
+ {
+ .name = "RFC 6803 checksum",
+ .run_case = checksum_case,
+ .generate_params = rfc6803_checksum_gen_params,
+ },
+ {
+ .name = "RFC 6803 encryption",
+ .run_case = rfc6803_encrypt_case,
+ .generate_params = rfc6803_encrypt_gen_params,
+ },
+};
+
+static struct kunit_suite rfc6803_suite = {
+ .name = "RFC 6803 suite",
+ .test_cases = rfc6803_test_cases,
+};
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * Sample results for SHA-2 enctype key derivation
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_basekey,
+ 0x37, 0x05, 0xd9, 0x60, 0x80, 0xc1, 0x77, 0x28,
+ 0xa0, 0xe8, 0x00, 0xea, 0xb6, 0xe0, 0xd2, 0x3c
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Kc,
+ 0xb3, 0x1a, 0x01, 0x8a, 0x48, 0xf5, 0x47, 0x76,
+ 0xf4, 0x03, 0xe9, 0xa3, 0x96, 0x32, 0x5d, 0xc3
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Ke,
+ 0x9b, 0x19, 0x7d, 0xd1, 0xe8, 0xc5, 0x60, 0x9d,
+ 0x6e, 0x67, 0xc3, 0xe3, 0x7c, 0x62, 0xc7, 0x2e
+);
+DEFINE_HEX_XDR_NETOBJ(aes128_cts_hmac_sha256_128_Ki,
+ 0x9f, 0xda, 0x0e, 0x56, 0xab, 0x2d, 0x85, 0xe1,
+ 0x56, 0x9a, 0x68, 0x86, 0x96, 0xc2, 0x6a, 0x6c
+);
+
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_basekey,
+ 0x6d, 0x40, 0x4d, 0x37, 0xfa, 0xf7, 0x9f, 0x9d,
+ 0xf0, 0xd3, 0x35, 0x68, 0xd3, 0x20, 0x66, 0x98,
+ 0x00, 0xeb, 0x48, 0x36, 0x47, 0x2e, 0xa8, 0xa0,
+ 0x26, 0xd1, 0x6b, 0x71, 0x82, 0x46, 0x0c, 0x52
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Kc,
+ 0xef, 0x57, 0x18, 0xbe, 0x86, 0xcc, 0x84, 0x96,
+ 0x3d, 0x8b, 0xbb, 0x50, 0x31, 0xe9, 0xf5, 0xc4,
+ 0xba, 0x41, 0xf2, 0x8f, 0xaf, 0x69, 0xe7, 0x3d
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Ke,
+ 0x56, 0xab, 0x22, 0xbe, 0xe6, 0x3d, 0x82, 0xd7,
+ 0xbc, 0x52, 0x27, 0xf6, 0x77, 0x3f, 0x8e, 0xa7,
+ 0xa5, 0xeb, 0x1c, 0x82, 0x51, 0x60, 0xc3, 0x83,
+ 0x12, 0x98, 0x0c, 0x44, 0x2e, 0x5c, 0x7e, 0x49
+);
+DEFINE_HEX_XDR_NETOBJ(aes256_cts_hmac_sha384_192_Ki,
+ 0x69, 0xb1, 0x65, 0x14, 0xe3, 0xcd, 0x8e, 0x56,
+ 0xb8, 0x20, 0x10, 0xd5, 0xc7, 0x30, 0x12, 0xb6,
+ 0x22, 0xc4, 0xd0, 0x0f, 0xfc, 0x23, 0xed, 0x1f
+);
+
+static const struct gss_krb5_test_param rfc8009_kdf_test_params[] = {
+ {
+ .desc = "Derive Kc subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &aes128_cts_hmac_sha256_128_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &aes128_cts_hmac_sha256_128_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &aes128_cts_hmac_sha256_128_Ki,
+ },
+ {
+ .desc = "Derive Kc subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_checksum,
+ .expected_result = &aes256_cts_hmac_sha384_192_Kc,
+ },
+ {
+ .desc = "Derive Ke subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_encryption,
+ .expected_result = &aes256_cts_hmac_sha384_192_Ke,
+ },
+ {
+ .desc = "Derive Ki subkey for aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_integrity,
+ .expected_result = &aes256_cts_hmac_sha384_192_Ki,
+ },
+};
+
+/* Creates the function rfc8009_kdf_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_kdf, rfc8009_kdf_test_params, gss_krb5_get_desc);
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * These sample checksums use the above sample key derivation results,
+ * including use of the same base-key and key usage values.
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_test1_expected_result,
+ 0xd7, 0x83, 0x67, 0x18, 0x66, 0x43, 0xd6, 0x7b,
+ 0x41, 0x1c, 0xba, 0x91, 0x39, 0xfc, 0x1d, 0xee
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_checksum_test2_expected_result,
+ 0x45, 0xee, 0x79, 0x15, 0x67, 0xee, 0xfc, 0xa3,
+ 0x7f, 0x4a, 0xc1, 0xe0, 0x22, 0x2d, 0xe8, 0x0d,
+ 0x43, 0xc3, 0xbf, 0xa0, 0x66, 0x99, 0x67, 0x2a
+);
+
+static const struct gss_krb5_test_param rfc8009_checksum_test_params[] = {
+ {
+ .desc = "Checksum with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .usage = &usage_checksum,
+ .plaintext = &rfc8009_checksum_plaintext,
+ .expected_result = &rfc8009_checksum_test1_expected_result,
+ },
+ {
+ .desc = "Checksum with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .usage = &usage_checksum,
+ .plaintext = &rfc8009_checksum_plaintext,
+ .expected_result = &rfc8009_checksum_test2_expected_result,
+ },
+};
+
+/* Creates the function rfc8009_checksum_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_checksum, rfc8009_checksum_test_params,
+ gss_krb5_get_desc);
+
+/*
+ * From RFC 8009 Appendix A. Test Vectors
+ *
+ * Sample encryptions (all using the default cipher state):
+ * --------------------------------------------------------
+ *
+ * These sample encryptions use the above sample key derivation results,
+ * including use of the same base-key and key usage values.
+ *
+ * This test material is copyright (c) 2016 IETF Trust and the
+ * persons identified as the document authors. All rights reserved.
+ */
+
+static const struct xdr_netobj rfc8009_enc_empty_plaintext = {
+ .len = 0,
+};
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_short_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_block_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_long_plaintext,
+ 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+ 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+ 0x10, 0x11, 0x12, 0x13, 0x14
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_confounder,
+ 0x7e, 0x58, 0x95, 0xea, 0xf2, 0x67, 0x24, 0x35,
+ 0xba, 0xd8, 0x17, 0xf5, 0x45, 0xa3, 0x71, 0x48
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_expected_result,
+ 0xef, 0x85, 0xfb, 0x89, 0x0b, 0xb8, 0x47, 0x2f,
+ 0x4d, 0xab, 0x20, 0x39, 0x4d, 0xca, 0x78, 0x1d
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test1_expected_hmac,
+ 0xad, 0x87, 0x7e, 0xda, 0x39, 0xd5, 0x0c, 0x87,
+ 0x0c, 0x0d, 0x5a, 0x0a, 0x8e, 0x48, 0xc7, 0x18
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_confounder,
+ 0x7b, 0xca, 0x28, 0x5e, 0x2f, 0xd4, 0x13, 0x0f,
+ 0xb5, 0x5b, 0x1a, 0x5c, 0x83, 0xbc, 0x5b, 0x24
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_expected_result,
+ 0x84, 0xd7, 0xf3, 0x07, 0x54, 0xed, 0x98, 0x7b,
+ 0xab, 0x0b, 0xf3, 0x50, 0x6b, 0xeb, 0x09, 0xcf,
+ 0xb5, 0x54, 0x02, 0xce, 0xf7, 0xe6
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test2_expected_hmac,
+ 0x87, 0x7c, 0xe9, 0x9e, 0x24, 0x7e, 0x52, 0xd1,
+ 0x6e, 0xd4, 0x42, 0x1d, 0xfd, 0xf8, 0x97, 0x6c
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_confounder,
+ 0x56, 0xab, 0x21, 0x71, 0x3f, 0xf6, 0x2c, 0x0a,
+ 0x14, 0x57, 0x20, 0x0f, 0x6f, 0xa9, 0x94, 0x8f
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_expected_result,
+ 0x35, 0x17, 0xd6, 0x40, 0xf5, 0x0d, 0xdc, 0x8a,
+ 0xd3, 0x62, 0x87, 0x22, 0xb3, 0x56, 0x9d, 0x2a,
+ 0xe0, 0x74, 0x93, 0xfa, 0x82, 0x63, 0x25, 0x40,
+ 0x80, 0xea, 0x65, 0xc1, 0x00, 0x8e, 0x8f, 0xc2
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test3_expected_hmac,
+ 0x95, 0xfb, 0x48, 0x52, 0xe7, 0xd8, 0x3e, 0x1e,
+ 0x7c, 0x48, 0xc3, 0x7e, 0xeb, 0xe6, 0xb0, 0xd3
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_confounder,
+ 0xa7, 0xa4, 0xe2, 0x9a, 0x47, 0x28, 0xce, 0x10,
+ 0x66, 0x4f, 0xb6, 0x4e, 0x49, 0xad, 0x3f, 0xac
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_expected_result,
+ 0x72, 0x0f, 0x73, 0xb1, 0x8d, 0x98, 0x59, 0xcd,
+ 0x6c, 0xcb, 0x43, 0x46, 0x11, 0x5c, 0xd3, 0x36,
+ 0xc7, 0x0f, 0x58, 0xed, 0xc0, 0xc4, 0x43, 0x7c,
+ 0x55, 0x73, 0x54, 0x4c, 0x31, 0xc8, 0x13, 0xbc,
+ 0xe1, 0xe6, 0xd0, 0x72, 0xc1
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test4_expected_hmac,
+ 0x86, 0xb3, 0x9a, 0x41, 0x3c, 0x2f, 0x92, 0xca,
+ 0x9b, 0x83, 0x34, 0xa2, 0x87, 0xff, 0xcb, 0xfc
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_confounder,
+ 0xf7, 0x64, 0xe9, 0xfa, 0x15, 0xc2, 0x76, 0x47,
+ 0x8b, 0x2c, 0x7d, 0x0c, 0x4e, 0x5f, 0x58, 0xe4
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_expected_result,
+ 0x41, 0xf5, 0x3f, 0xa5, 0xbf, 0xe7, 0x02, 0x6d,
+ 0x91, 0xfa, 0xf9, 0xbe, 0x95, 0x91, 0x95, 0xa0
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test5_expected_hmac,
+ 0x58, 0x70, 0x72, 0x73, 0xa9, 0x6a, 0x40, 0xf0,
+ 0xa0, 0x19, 0x60, 0x62, 0x1a, 0xc6, 0x12, 0x74,
+ 0x8b, 0x9b, 0xbf, 0xbe, 0x7e, 0xb4, 0xce, 0x3c
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_confounder,
+ 0xb8, 0x0d, 0x32, 0x51, 0xc1, 0xf6, 0x47, 0x14,
+ 0x94, 0x25, 0x6f, 0xfe, 0x71, 0x2d, 0x0b, 0x9a
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_expected_result,
+ 0x4e, 0xd7, 0xb3, 0x7c, 0x2b, 0xca, 0xc8, 0xf7,
+ 0x4f, 0x23, 0xc1, 0xcf, 0x07, 0xe6, 0x2b, 0xc7,
+ 0xb7, 0x5f, 0xb3, 0xf6, 0x37, 0xb9
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test6_expected_hmac,
+ 0xf5, 0x59, 0xc7, 0xf6, 0x64, 0xf6, 0x9e, 0xab,
+ 0x7b, 0x60, 0x92, 0x23, 0x75, 0x26, 0xea, 0x0d,
+ 0x1f, 0x61, 0xcb, 0x20, 0xd6, 0x9d, 0x10, 0xf2
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_confounder,
+ 0x53, 0xbf, 0x8a, 0x0d, 0x10, 0x52, 0x65, 0xd4,
+ 0xe2, 0x76, 0x42, 0x86, 0x24, 0xce, 0x5e, 0x63
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_expected_result,
+ 0xbc, 0x47, 0xff, 0xec, 0x79, 0x98, 0xeb, 0x91,
+ 0xe8, 0x11, 0x5c, 0xf8, 0xd1, 0x9d, 0xac, 0x4b,
+ 0xbb, 0xe2, 0xe1, 0x63, 0xe8, 0x7d, 0xd3, 0x7f,
+ 0x49, 0xbe, 0xca, 0x92, 0x02, 0x77, 0x64, 0xf6
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test7_expected_hmac,
+ 0x8c, 0xf5, 0x1f, 0x14, 0xd7, 0x98, 0xc2, 0x27,
+ 0x3f, 0x35, 0xdf, 0x57, 0x4d, 0x1f, 0x93, 0x2e,
+ 0x40, 0xc4, 0xff, 0x25, 0x5b, 0x36, 0xa2, 0x66
+);
+
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_confounder,
+ 0x76, 0x3e, 0x65, 0x36, 0x7e, 0x86, 0x4f, 0x02,
+ 0xf5, 0x51, 0x53, 0xc7, 0xe3, 0xb5, 0x8a, 0xf1
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_expected_result,
+ 0x40, 0x01, 0x3e, 0x2d, 0xf5, 0x8e, 0x87, 0x51,
+ 0x95, 0x7d, 0x28, 0x78, 0xbc, 0xd2, 0xd6, 0xfe,
+ 0x10, 0x1c, 0xcf, 0xd5, 0x56, 0xcb, 0x1e, 0xae,
+ 0x79, 0xdb, 0x3c, 0x3e, 0xe8, 0x64, 0x29, 0xf2,
+ 0xb2, 0xa6, 0x02, 0xac, 0x86
+);
+DEFINE_HEX_XDR_NETOBJ(rfc8009_enc_test8_expected_hmac,
+ 0xfe, 0xf6, 0xec, 0xb6, 0x47, 0xd6, 0x29, 0x5f,
+ 0xae, 0x07, 0x7a, 0x1f, 0xeb, 0x51, 0x75, 0x08,
+ 0xd2, 0xc1, 0x6b, 0x41, 0x92, 0xe0, 0x1f, 0x62
+);
+
+static const struct gss_krb5_test_param rfc8009_encrypt_test_params[] = {
+ {
+ .desc = "Encrypt empty plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_empty_plaintext,
+ .confounder = &rfc8009_enc_test1_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test1_expected_result,
+ .expected_hmac = &rfc8009_enc_test1_expected_hmac,
+ },
+ {
+ .desc = "Encrypt short plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_short_plaintext,
+ .confounder = &rfc8009_enc_test2_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test2_expected_result,
+ .expected_hmac = &rfc8009_enc_test2_expected_hmac,
+ },
+ {
+ .desc = "Encrypt block plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_block_plaintext,
+ .confounder = &rfc8009_enc_test3_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test3_expected_result,
+ .expected_hmac = &rfc8009_enc_test3_expected_hmac,
+ },
+ {
+ .desc = "Encrypt long plaintext with aes128-cts-hmac-sha256-128",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .plaintext = &rfc8009_enc_long_plaintext,
+ .confounder = &rfc8009_enc_test4_confounder,
+ .base_key = &aes128_cts_hmac_sha256_128_basekey,
+ .expected_result = &rfc8009_enc_test4_expected_result,
+ .expected_hmac = &rfc8009_enc_test4_expected_hmac,
+ },
+ {
+ .desc = "Encrypt empty plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_empty_plaintext,
+ .confounder = &rfc8009_enc_test5_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test5_expected_result,
+ .expected_hmac = &rfc8009_enc_test5_expected_hmac,
+ },
+ {
+ .desc = "Encrypt short plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_short_plaintext,
+ .confounder = &rfc8009_enc_test6_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test6_expected_result,
+ .expected_hmac = &rfc8009_enc_test6_expected_hmac,
+ },
+ {
+ .desc = "Encrypt block plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_block_plaintext,
+ .confounder = &rfc8009_enc_test7_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test7_expected_result,
+ .expected_hmac = &rfc8009_enc_test7_expected_hmac,
+ },
+ {
+ .desc = "Encrypt long plaintext with aes256-cts-hmac-sha384-192",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .plaintext = &rfc8009_enc_long_plaintext,
+ .confounder = &rfc8009_enc_test8_confounder,
+ .base_key = &aes256_cts_hmac_sha384_192_basekey,
+ .expected_result = &rfc8009_enc_test8_expected_result,
+ .expected_hmac = &rfc8009_enc_test8_expected_hmac,
+ },
+};
+
+/* Creates the function rfc8009_encrypt_gen_params */
+KUNIT_ARRAY_PARAM(rfc8009_encrypt, rfc8009_encrypt_test_params,
+ gss_krb5_get_desc);
+
+static void rfc8009_encrypt_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_netobj Ke, Ki, checksum;
+ u8 usage_data[GSS_KRB5_K5CLENGTH];
+ struct xdr_netobj usage = {
+ .data = usage_data,
+ .len = sizeof(usage_data),
+ };
+ struct crypto_ahash *ahash_tfm;
+ struct xdr_buf buf;
+ void *text;
+ size_t len;
+ u32 err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ *(__be32 *)usage.data = cpu_to_be32(2);
+
+ Ke.len = gk5e->Ke_length;
+ Ke.data = kunit_kzalloc(test, Ke.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ke.data);
+ usage.data[4] = KEY_USAGE_SEED_ENCRYPTION;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ke,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, Ke.data, Ke.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ len = param->confounder->len + param->plaintext->len;
+ text = kunit_kzalloc(test, len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+ memcpy(text, param->confounder->data, param->confounder->len);
+ memcpy(text + param->confounder->len, param->plaintext->data,
+ param->plaintext->len);
+
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->confounder->len + param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ checksum.len = gk5e->cksumlength;
+ checksum.data = kunit_kzalloc(test, checksum.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, checksum.data);
+
+ Ki.len = gk5e->Ki_length;
+ Ki.data = kunit_kzalloc(test, Ki.len, GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, Ki.data);
+ usage.data[4] = KEY_USAGE_SEED_INTEGRITY;
+ err = gk5e->derive_key(gk5e, param->base_key, &Ki,
+ &usage, GFP_KERNEL);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ ahash_tfm = crypto_alloc_ahash(gk5e->cksum_name, 0, CRYPTO_ALG_ASYNC);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ahash_tfm);
+ err = crypto_ahash_setkey(ahash_tfm, Ki.data, Ki.len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ err = krb5_etm_checksum(cts_tfm, ahash_tfm, &buf, 0, &checksum);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->expected_result->len, buf.len,
+ "ciphertext length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->expected_result->data,
+ buf.head[0].iov_base,
+ param->expected_result->len), 0,
+ "ciphertext mismatch");
+ KUNIT_EXPECT_EQ_MSG(test, memcmp(param->expected_hmac->data,
+ checksum.data,
+ checksum.len), 0,
+ "HMAC mismatch");
+
+ crypto_free_ahash(ahash_tfm);
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case rfc8009_test_cases[] = {
+ {
+ .name = "RFC 8009 key derivation",
+ .run_case = kdf_case,
+ .generate_params = rfc8009_kdf_gen_params,
+ },
+ {
+ .name = "RFC 8009 checksum",
+ .run_case = checksum_case,
+ .generate_params = rfc8009_checksum_gen_params,
+ },
+ {
+ .name = "RFC 8009 encryption",
+ .run_case = rfc8009_encrypt_case,
+ .generate_params = rfc8009_encrypt_gen_params,
+ },
+};
+
+static struct kunit_suite rfc8009_suite = {
+ .name = "RFC 8009 suite",
+ .test_cases = rfc8009_test_cases,
+};
+
+/*
+ * Encryption self-tests
+ */
+
+DEFINE_STR_XDR_NETOBJ(encrypt_selftest_plaintext,
+ "This is the plaintext for the encryption self-test.");
+
+static const struct gss_krb5_test_param encrypt_selftest_params[] = {
+ {
+ .desc = "aes128-cts-hmac-sha1-96 encryption self-test",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes256-cts-hmac-sha1-96 encryption self-test",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA1_96,
+ .Ke = &rfc3962_encryption_key,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "camellia128-cts-cmac encryption self-test",
+ .enctype = ENCTYPE_CAMELLIA128_CTS_CMAC,
+ .Ke = &camellia128_cts_cmac_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "camellia256-cts-cmac encryption self-test",
+ .enctype = ENCTYPE_CAMELLIA256_CTS_CMAC,
+ .Ke = &camellia256_cts_cmac_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes128-cts-hmac-sha256-128 encryption self-test",
+ .enctype = ENCTYPE_AES128_CTS_HMAC_SHA256_128,
+ .Ke = &aes128_cts_hmac_sha256_128_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+ {
+ .desc = "aes256-cts-hmac-sha384-192 encryption self-test",
+ .enctype = ENCTYPE_AES256_CTS_HMAC_SHA384_192,
+ .Ke = &aes256_cts_hmac_sha384_192_Ke,
+ .plaintext = &encrypt_selftest_plaintext,
+ },
+};
+
+/* Creates the function encrypt_selftest_gen_params */
+KUNIT_ARRAY_PARAM(encrypt_selftest, encrypt_selftest_params,
+ gss_krb5_get_desc);
+
+/*
+ * Encrypt and decrypt plaintext, and ensure the input plaintext
+ * matches the output plaintext. A confounder is not added in this
+ * case.
+ */
+static void encrypt_selftest_case(struct kunit *test)
+{
+ const struct gss_krb5_test_param *param = test->param_value;
+ struct crypto_sync_skcipher *cts_tfm, *cbc_tfm;
+ const struct gss_krb5_enctype *gk5e;
+ struct xdr_buf buf;
+ void *text;
+ int err;
+
+ /* Arrange */
+ gk5e = gss_krb5_lookup_enctype(param->enctype);
+ KUNIT_ASSERT_NOT_NULL(test, gk5e);
+
+ cbc_tfm = crypto_alloc_sync_skcipher(gk5e->aux_cipher, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cbc_tfm);
+ err = crypto_sync_skcipher_setkey(cbc_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ cts_tfm = crypto_alloc_sync_skcipher(gk5e->encrypt_name, 0, 0);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cts_tfm);
+ err = crypto_sync_skcipher_setkey(cts_tfm, param->Ke->data, param->Ke->len);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ text = kunit_kzalloc(test, roundup(param->plaintext->len,
+ crypto_sync_skcipher_blocksize(cbc_tfm)),
+ GFP_KERNEL);
+ KUNIT_ASSERT_NOT_ERR_OR_NULL(test, text);
+
+ memcpy(text, param->plaintext->data, param->plaintext->len);
+ memset(&buf, 0, sizeof(buf));
+ buf.head[0].iov_base = text;
+ buf.head[0].iov_len = param->plaintext->len;
+ buf.len = buf.head[0].iov_len;
+
+ /* Act */
+ err = krb5_cbc_cts_encrypt(cts_tfm, cbc_tfm, 0, &buf, NULL, NULL, 0);
+ KUNIT_ASSERT_EQ(test, err, 0);
+ err = krb5_cbc_cts_decrypt(cts_tfm, cbc_tfm, 0, &buf);
+ KUNIT_ASSERT_EQ(test, err, 0);
+
+ /* Assert */
+ KUNIT_EXPECT_EQ_MSG(test,
+ param->plaintext->len, buf.len,
+ "length mismatch");
+ KUNIT_EXPECT_EQ_MSG(test,
+ memcmp(param->plaintext->data,
+ buf.head[0].iov_base, buf.len), 0,
+ "plaintext mismatch");
+
+ crypto_free_sync_skcipher(cts_tfm);
+ crypto_free_sync_skcipher(cbc_tfm);
+}
+
+static struct kunit_case encryption_test_cases[] = {
+ {
+ .name = "Encryption self-tests",
+ .run_case = encrypt_selftest_case,
+ .generate_params = encrypt_selftest_gen_params,
+ },
+};
+
+static struct kunit_suite encryption_test_suite = {
+ .name = "Encryption test suite",
+ .test_cases = encryption_test_cases,
+};
+
+kunit_test_suites(&rfc3961_suite,
+ &rfc3962_suite,
+ &rfc6803_suite,
+ &rfc8009_suite,
+ &encryption_test_suite);
+
+MODULE_DESCRIPTION("Test RPCSEC GSS Kerberos 5 functions");
+MODULE_LICENSE("GPL");
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index ba04e3ec970a..7d6d4ae4a3c9 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -57,22 +57,25 @@
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
+#include <crypto/algapi.h>
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
#include <linux/crypto.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
/* read_token is a mic token, and message_buffer is the data that the mic was
* supposedly taken over. */
-
-static u32
-gss_verify_mic_v1(struct krb5_ctx *ctx,
- struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+u32
+gss_krb5_verify_mic_v1(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token)
{
int signalg;
int sealalg;
@@ -141,21 +144,24 @@ gss_verify_mic_v1(struct krb5_ctx *ctx,
return GSS_S_COMPLETE;
}
+#endif
-static u32
-gss_verify_mic_v2(struct krb5_ctx *ctx,
- struct xdr_buf *message_buffer, struct xdr_netobj *read_token)
+u32
+gss_krb5_verify_mic_v2(struct krb5_ctx *ctx, struct xdr_buf *message_buffer,
+ struct xdr_netobj *read_token)
{
+ struct crypto_ahash *tfm = ctx->initiate ?
+ ctx->acceptor_sign : ctx->initiator_sign;
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
- struct xdr_netobj cksumobj = {.len = sizeof(cksumdata),
- .data = cksumdata};
- time64_t now;
+ struct xdr_netobj cksumobj = {
+ .len = ctx->gk5e->cksumlength,
+ .data = cksumdata,
+ };
u8 *ptr = read_token->data;
- u8 *cksumkey;
+ __be16 be16_ptr;
+ time64_t now;
u8 flags;
int i;
- unsigned int cksum_usage;
- __be16 be16_ptr;
dprintk("RPC: %s\n", __func__);
@@ -177,16 +183,8 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
if (ptr[i] != 0xff)
return GSS_S_DEFECTIVE_TOKEN;
- if (ctx->initiate) {
- cksumkey = ctx->acceptor_sign;
- cksum_usage = KG_USAGE_ACCEPTOR_SIGN;
- } else {
- cksumkey = ctx->initiator_sign;
- cksum_usage = KG_USAGE_INITIATOR_SIGN;
- }
-
- if (make_checksum_v2(ctx, ptr, GSS_KRB5_TOK_HDR_LEN, message_buffer, 0,
- cksumkey, cksum_usage, &cksumobj))
+ if (gss_krb5_checksum(tfm, ptr, GSS_KRB5_TOK_HDR_LEN,
+ message_buffer, 0, &cksumobj))
return GSS_S_FAILURE;
if (memcmp(cksumobj.data, ptr + GSS_KRB5_TOK_HDR_LEN,
@@ -205,22 +203,3 @@ gss_verify_mic_v2(struct krb5_ctx *ctx,
return GSS_S_COMPLETE;
}
-
-u32
-gss_verify_mic_kerberos(struct gss_ctx *gss_ctx,
- struct xdr_buf *message_buffer,
- struct xdr_netobj *read_token)
-{
- struct krb5_ctx *ctx = gss_ctx->internal_ctx_id;
-
- switch (ctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_verify_mic_v1(ctx, message_buffer, read_token);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_verify_mic_v2(ctx, message_buffer, read_token);
- }
-}
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 48337687848c..6d6b082380b2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -32,13 +32,16 @@
#include <linux/types.h>
#include <linux/jiffies.h>
#include <linux/sunrpc/gss_krb5.h>
-#include <linux/random.h>
#include <linux/pagemap.h>
+#include "gss_krb5_internal.h"
+
#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
# define RPCDBG_FACILITY RPCDBG_AUTH
#endif
+#if defined(CONFIG_RPCSEC_GSS_KRB5_SIMPLIFIED)
+
static inline int
gss_krb5_padding(int blocksize, int length)
{
@@ -113,39 +116,6 @@ out:
return 0;
}
-void
-gss_krb5_make_confounder(char *p, u32 conflen)
-{
- static u64 i = 0;
- u64 *q = (u64 *)p;
-
- /* rfc1964 claims this should be "random". But all that's really
- * necessary is that it be unique. And not even that is necessary in
- * our case since our "gssapi" implementation exists only to support
- * rpcsec_gss, so we know that the only buffers we will ever encrypt
- * already begin with a unique sequence number. Just to hedge my bets
- * I'll make a half-hearted attempt at something unique, but ensuring
- * uniqueness would mean worrying about atomicity and rollover, and I
- * don't care enough. */
-
- /* initialize to random value */
- if (i == 0) {
- i = get_random_u32();
- i = (i << 32) | get_random_u32();
- }
-
- switch (conflen) {
- case 16:
- *q++ = i++;
- fallthrough;
- case 8:
- *q++ = i++;
- break;
- default:
- BUG();
- }
-}
-
/* Assumptions: the head and tail of inbuf are ours to play with.
* The pages, however, may be real pages in the page cache and we replace
* them with scratch pages from **pages before writing to them. */
@@ -154,9 +124,9 @@ gss_krb5_make_confounder(char *p, u32 conflen)
/* XXX factor out common code with seal/unseal. */
-static u32
-gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
- struct xdr_buf *buf, struct page **pages)
+u32
+gss_krb5_wrap_v1(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages)
{
char cksumdata[GSS_KRB5_MAX_CKSUM_LEN];
struct xdr_netobj md5cksum = {.len = sizeof(cksumdata),
@@ -168,7 +138,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
struct page **tmp_pages;
u32 seq_send;
u8 *cksumkey;
- u32 conflen = kctx->gk5e->conflen;
+ u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
dprintk("RPC: %s\n", __func__);
@@ -211,7 +181,7 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
ptr[6] = 0xff;
ptr[7] = 0xff;
- gss_krb5_make_confounder(msg_start, conflen);
+ krb5_make_confounder(msg_start, conflen);
if (kctx->gk5e->keyed_cksum)
cksumkey = kctx->cksum;
@@ -243,10 +213,10 @@ gss_wrap_kerberos_v1(struct krb5_ctx *kctx, int offset,
return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-static u32
-gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
+u32
+gss_krb5_unwrap_v1(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
int signalg;
int sealalg;
@@ -261,7 +231,7 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
void *data_start, *orig_start;
int data_len;
int blocksize;
- u32 conflen = kctx->gk5e->conflen;
+ u32 conflen = crypto_sync_skcipher_blocksize(kctx->enc);
int crypt_offset;
u8 *cksumkey;
unsigned int saved_len = buf->len;
@@ -355,6 +325,8 @@ gss_unwrap_kerberos_v1(struct krb5_ctx *kctx, int offset, int len,
return GSS_S_COMPLETE;
}
+#endif
+
/*
* We can shift data by up to LOCAL_BUF_LEN bytes in a pass. If we need
* to do more than that, we shift repeatedly. Kevin Coffman reports
@@ -405,9 +377,9 @@ static void rotate_left(u32 base, struct xdr_buf *buf, unsigned int shift)
_rotate_left(&subbuf, shift);
}
-static u32
-gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
- struct xdr_buf *buf, struct page **pages)
+u32
+gss_krb5_wrap_v2(struct krb5_ctx *kctx, int offset,
+ struct xdr_buf *buf, struct page **pages)
{
u8 *ptr;
time64_t now;
@@ -418,9 +390,6 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
dprintk("RPC: %s\n", __func__);
- if (kctx->gk5e->encrypt_v2 == NULL)
- return GSS_S_FAILURE;
-
/* make room for gss token header */
if (xdr_extend_head(buf, offset, GSS_KRB5_TOK_HDR_LEN))
return GSS_S_FAILURE;
@@ -448,7 +417,7 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
be64ptr = (__be64 *)be16ptr;
*be64ptr = cpu_to_be64(atomic64_fetch_inc(&kctx->seq_send64));
- err = (*kctx->gk5e->encrypt_v2)(kctx, offset, buf, pages);
+ err = (*kctx->gk5e->encrypt)(kctx, offset, buf, pages);
if (err)
return err;
@@ -456,10 +425,10 @@ gss_wrap_kerberos_v2(struct krb5_ctx *kctx, u32 offset,
return (kctx->endtime < now) ? GSS_S_CONTEXT_EXPIRED : GSS_S_COMPLETE;
}
-static u32
-gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
- struct xdr_buf *buf, unsigned int *slack,
- unsigned int *align)
+u32
+gss_krb5_unwrap_v2(struct krb5_ctx *kctx, int offset, int len,
+ struct xdr_buf *buf, unsigned int *slack,
+ unsigned int *align)
{
time64_t now;
u8 *ptr;
@@ -473,9 +442,6 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
dprintk("RPC: %s\n", __func__);
- if (kctx->gk5e->decrypt_v2 == NULL)
- return GSS_S_FAILURE;
-
ptr = buf->head[0].iov_base + offset;
if (be16_to_cpu(*((__be16 *)ptr)) != KG2_TOK_WRAP)
@@ -505,8 +471,8 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
if (rrc != 0)
rotate_left(offset + 16, buf, rrc);
- err = (*kctx->gk5e->decrypt_v2)(kctx, offset, len, buf,
- &headskip, &tailskip);
+ err = (*kctx->gk5e->decrypt)(kctx, offset, len, buf,
+ &headskip, &tailskip);
if (err)
return GSS_S_FAILURE;
@@ -556,41 +522,3 @@ gss_unwrap_kerberos_v2(struct krb5_ctx *kctx, int offset, int len,
*slack = *align + XDR_QUADLEN(ec + GSS_KRB5_TOK_HDR_LEN + tailskip);
return GSS_S_COMPLETE;
}
-
-u32
-gss_wrap_kerberos(struct gss_ctx *gctx, int offset,
- struct xdr_buf *buf, struct page **pages)
-{
- struct krb5_ctx *kctx = gctx->internal_ctx_id;
-
- switch (kctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_wrap_kerberos_v1(kctx, offset, buf, pages);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_wrap_kerberos_v2(kctx, offset, buf, pages);
- }
-}
-
-u32
-gss_unwrap_kerberos(struct gss_ctx *gctx, int offset,
- int len, struct xdr_buf *buf)
-{
- struct krb5_ctx *kctx = gctx->internal_ctx_id;
-
- switch (kctx->enctype) {
- default:
- BUG();
- case ENCTYPE_DES_CBC_RAW:
- case ENCTYPE_DES3_CBC_RAW:
- return gss_unwrap_kerberos_v1(kctx, offset, len, buf,
- &gctx->slack, &gctx->align);
- case ENCTYPE_AES128_CTS_HMAC_SHA1_96:
- case ENCTYPE_AES256_CTS_HMAC_SHA1_96:
- return gss_unwrap_kerberos_v2(kctx, offset, len, buf,
- &gctx->slack, &gctx->align);
- }
-}
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index 148bb0a7fa5b..9c843974bb48 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -71,12 +71,11 @@
struct gss_svc_data {
/* decoded gss client cred: */
struct rpc_gss_wire_cred clcred;
- /* save a pointer to the beginning of the encoded verifier,
- * for use in encryption/checksumming in svcauth_gss_release: */
- __be32 *verf_start;
+ u32 gsd_databody_offset;
struct rsc *rsci;
/* for temporary results */
+ __be32 gsd_seq_num;
u8 gsd_scratch[GSS_SCRATCH_SIZE];
};
@@ -692,78 +691,49 @@ alreadyseen:
goto out;
}
-static inline u32 round_up_to_quad(u32 i)
-{
- return (i + 3 ) & ~3;
-}
-
-static inline int
-svc_safe_getnetobj(struct kvec *argv, struct xdr_netobj *o)
-{
- int l;
-
- if (argv->iov_len < 4)
- return -1;
- o->len = svc_getnl(argv);
- l = round_up_to_quad(o->len);
- if (argv->iov_len < l)
- return -1;
- o->data = argv->iov_base;
- argv->iov_base += l;
- argv->iov_len -= l;
- return 0;
-}
-
-static inline int
-svc_safe_putnetobj(struct kvec *resv, struct xdr_netobj *o)
-{
- u8 *p;
-
- if (resv->iov_len + 4 > PAGE_SIZE)
- return -1;
- svc_putnl(resv, o->len);
- p = resv->iov_base + resv->iov_len;
- resv->iov_len += round_up_to_quad(o->len);
- if (resv->iov_len > PAGE_SIZE)
- return -1;
- memcpy(p, o->data, o->len);
- memset(p + o->len, 0, round_up_to_quad(o->len) - o->len);
- return 0;
-}
-
/*
- * Verify the checksum on the header and return SVC_OK on success.
- * Otherwise, return SVC_DROP (in the case of a bad sequence number)
- * or return SVC_DENIED and indicate error in rqstp->rq_auth_stat.
+ * Decode and verify a Call's verifier field. For RPC_AUTH_GSS Calls,
+ * the body of this field contains a variable length checksum.
+ *
+ * GSS-specific auth_stat values are mandated by RFC 2203 Section
+ * 5.3.3.3.
*/
static int
-gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
- __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
+svcauth_gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
+ __be32 *rpcstart, struct rpc_gss_wire_cred *gc)
{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct gss_ctx *ctx_id = rsci->mechctx;
+ u32 flavor, maj_stat;
struct xdr_buf rpchdr;
struct xdr_netobj checksum;
- u32 flavor = 0;
- struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec iov;
- /* data to compute the checksum over: */
+ /*
+ * Compute the checksum of the incoming Call from the
+ * XID field to credential field:
+ */
iov.iov_base = rpcstart;
- iov.iov_len = (u8 *)argv->iov_base - (u8 *)rpcstart;
+ iov.iov_len = (u8 *)xdr->p - (u8 *)rpcstart;
xdr_buf_from_iov(&iov, &rpchdr);
- rqstp->rq_auth_stat = rpc_autherr_badverf;
- if (argv->iov_len < 4)
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor,
+ (void **)&checksum.data,
+ &checksum.len) < 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
- flavor = svc_getnl(argv);
- if (flavor != RPC_AUTH_GSS)
- return SVC_DENIED;
- if (svc_safe_getnetobj(argv, &checksum))
+ }
+ if (flavor != RPC_AUTH_GSS) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
+ }
- if (rqstp->rq_deferred) /* skip verification of revisited request */
+ if (rqstp->rq_deferred)
return SVC_OK;
- if (gss_verify_mic(ctx_id, &rpchdr, &checksum) != GSS_S_COMPLETE) {
+ maj_stat = gss_verify_mic(ctx_id, &rpchdr, &checksum);
+ if (maj_stat != GSS_S_COMPLETE) {
+ trace_rpcgss_svc_mic(rqstp, maj_stat);
rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
return SVC_DENIED;
}
@@ -778,54 +748,36 @@ gss_verify_header(struct svc_rqst *rqstp, struct rsc *rsci,
return SVC_OK;
}
-static int
-gss_write_null_verf(struct svc_rqst *rqstp)
-{
- __be32 *p;
-
- svc_putnl(rqstp->rq_res.head, RPC_AUTH_NULL);
- p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
- /* don't really need to check if head->iov_len > PAGE_SIZE ... */
- *p++ = 0;
- if (!xdr_ressize_check(rqstp, p))
- return -1;
- return 0;
-}
-
-static int
-gss_write_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
+/*
+ * Construct and encode a Reply's verifier field. The verifier's body
+ * field contains a variable-length checksum of the GSS sequence
+ * number.
+ */
+static bool
+svcauth_gss_encode_verf(struct svc_rqst *rqstp, struct gss_ctx *ctx_id, u32 seq)
{
- __be32 *xdr_seq;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
u32 maj_stat;
struct xdr_buf verf_data;
- struct xdr_netobj mic;
- __be32 *p;
+ struct xdr_netobj checksum;
struct kvec iov;
- int err = -1;
-
- svc_putnl(rqstp->rq_res.head, RPC_AUTH_GSS);
- xdr_seq = kmalloc(4, GFP_KERNEL);
- if (!xdr_seq)
- return -ENOMEM;
- *xdr_seq = htonl(seq);
- iov.iov_base = xdr_seq;
- iov.iov_len = 4;
+ gsd->gsd_seq_num = cpu_to_be32(seq);
+ iov.iov_base = &gsd->gsd_seq_num;
+ iov.iov_len = XDR_UNIT;
xdr_buf_from_iov(&iov, &verf_data);
- p = rqstp->rq_res.head->iov_base + rqstp->rq_res.head->iov_len;
- mic.data = (u8 *)(p + 1);
- maj_stat = gss_get_mic(ctx_id, &verf_data, &mic);
+
+ checksum.data = gsd->gsd_scratch;
+ maj_stat = gss_get_mic(ctx_id, &verf_data, &checksum);
if (maj_stat != GSS_S_COMPLETE)
- goto out;
- *p++ = htonl(mic.len);
- memset((u8 *)p + mic.len, 0, round_up_to_quad(mic.len) - mic.len);
- p += XDR_QUADLEN(mic.len);
- if (!xdr_ressize_check(rqstp, p))
- goto out;
- err = 0;
-out:
- kfree(xdr_seq);
- return err;
+ goto bad_mic;
+
+ return xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream, RPC_AUTH_GSS,
+ checksum.data, checksum.len) > 0;
+
+bad_mic:
+ trace_rpcgss_svc_get_mic(rqstp, maj_stat);
+ return false;
}
struct gss_domain {
@@ -891,31 +843,28 @@ out:
}
EXPORT_SYMBOL_GPL(svcauth_gss_register_pseudoflavor);
-static inline int
-read_u32_from_xdr_buf(struct xdr_buf *buf, int base, u32 *obj)
-{
- __be32 raw;
- int status;
-
- status = read_bytes_from_xdr_buf(buf, base, &raw, sizeof(*obj));
- if (status)
- return status;
- *obj = ntohl(raw);
- return 0;
-}
-
-/* It would be nice if this bit of code could be shared with the client.
- * Obstacles:
- * The client shouldn't malloc(), would have to pass in own memory.
- * The server uses base of head iovec as read pointer, while the
- * client uses separate pointer. */
-static int
-unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+/*
+ * RFC 2203, Section 5.3.2.2
+ *
+ * struct rpc_gss_integ_data {
+ * opaque databody_integ<>;
+ * opaque checksum<>;
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ */
+static noinline_for_stack int
+svcauth_gss_unwrap_integ(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
{
struct gss_svc_data *gsd = rqstp->rq_auth_data;
- u32 integ_len, rseqno, maj_stat;
- struct xdr_netobj mic;
- struct xdr_buf integ_buf;
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 len, offset, seq_num, maj_stat;
+ struct xdr_buf *buf = xdr->buf;
+ struct xdr_buf databody_integ;
+ struct xdr_netobj checksum;
/* NFS READ normally uses splice to send data in-place. However
* the data in cache can change after the reply's MIC is computed
@@ -923,110 +872,106 @@ unwrap_integ_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct g
* rejecting the server-computed MIC in this somewhat rare case,
* do not use splice with the GSS integrity service.
*/
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Did we already verify the signature on the original pass through? */
if (rqstp->rq_deferred)
return 0;
- integ_len = svc_getnl(&buf->head[0]);
- if (integ_len & 3)
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
goto unwrap_failed;
- if (integ_len > buf->len)
+ if (len & 3)
goto unwrap_failed;
- if (xdr_buf_subsegment(buf, &integ_buf, 0, integ_len))
+ offset = xdr_stream_pos(xdr);
+ if (xdr_buf_subsegment(buf, &databody_integ, offset, len))
goto unwrap_failed;
- /* copy out mic... */
- if (read_u32_from_xdr_buf(buf, integ_len, &mic.len))
+ /*
+ * The xdr_stream now points to the @seq_num field. The next
+ * XDR data item is the @arg field, which contains the clear
+ * text RPC program payload. The checksum, which follows the
+ * @arg field, is located and decoded without updating the
+ * xdr_stream.
+ */
+
+ offset += len;
+ if (xdr_decode_word(buf, offset, &checksum.len))
goto unwrap_failed;
- if (mic.len > sizeof(gsd->gsd_scratch))
+ if (checksum.len > sizeof(gsd->gsd_scratch))
goto unwrap_failed;
- mic.data = gsd->gsd_scratch;
- if (read_bytes_from_xdr_buf(buf, integ_len + 4, mic.data, mic.len))
+ checksum.data = gsd->gsd_scratch;
+ if (read_bytes_from_xdr_buf(buf, offset + XDR_UNIT, checksum.data,
+ checksum.len))
goto unwrap_failed;
- maj_stat = gss_verify_mic(ctx, &integ_buf, &mic);
+
+ maj_stat = gss_verify_mic(ctx, &databody_integ, &checksum);
if (maj_stat != GSS_S_COMPLETE)
goto bad_mic;
- rseqno = svc_getnl(&buf->head[0]);
- if (rseqno != seq)
+
+ /* The received seqno is protected by the checksum. */
+ if (xdr_stream_decode_u32(xdr, &seq_num) < 0)
+ goto unwrap_failed;
+ if (seq_num != seq)
goto bad_seqno;
- /* trim off the mic and padding at the end before returning */
- xdr_buf_trim(buf, round_up_to_quad(mic.len) + 4);
+
+ xdr_truncate_decode(xdr, XDR_UNIT + checksum.len);
return 0;
unwrap_failed:
trace_rpcgss_svc_unwrap_failed(rqstp);
return -EINVAL;
bad_seqno:
- trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num);
return -EINVAL;
bad_mic:
trace_rpcgss_svc_mic(rqstp, maj_stat);
return -EINVAL;
}
-static inline int
-total_buf_len(struct xdr_buf *buf)
-{
- return buf->head[0].iov_len + buf->page_len + buf->tail[0].iov_len;
-}
-
-static void
-fix_priv_head(struct xdr_buf *buf, int pad)
-{
- if (buf->page_len == 0) {
- /* We need to adjust head and buf->len in tandem in this
- * case to make svc_defer() work--it finds the original
- * buffer start using buf->len - buf->head[0].iov_len. */
- buf->head[0].iov_len -= pad;
- }
-}
-
-static int
-unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gss_ctx *ctx)
+/*
+ * RFC 2203, Section 5.3.2.3
+ *
+ * struct rpc_gss_priv_data {
+ * opaque databody_priv<>
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ */
+static noinline_for_stack int
+svcauth_gss_unwrap_priv(struct svc_rqst *rqstp, u32 seq, struct gss_ctx *ctx)
{
- u32 priv_len, maj_stat;
- int pad, remaining_len, offset;
- u32 rseqno;
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 len, maj_stat, seq_num, offset;
+ struct xdr_buf *buf = xdr->buf;
+ unsigned int saved_len;
- __clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
- priv_len = svc_getnl(&buf->head[0]);
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+ goto unwrap_failed;
if (rqstp->rq_deferred) {
/* Already decrypted last time through! The sequence number
* check at out_seq is unnecessary but harmless: */
goto out_seq;
}
- /* buf->len is the number of bytes from the original start of the
- * request to the end, where head[0].iov_len is just the bytes
- * not yet read from the head, so these two values are different: */
- remaining_len = total_buf_len(buf);
- if (priv_len > remaining_len)
+ if (len > xdr_stream_remaining(xdr))
goto unwrap_failed;
- pad = remaining_len - priv_len;
- buf->len -= pad;
- fix_priv_head(buf, pad);
-
- maj_stat = gss_unwrap(ctx, 0, priv_len, buf);
- pad = priv_len - buf->len;
- /* The upper layers assume the buffer is aligned on 4-byte boundaries.
- * In the krb5p case, at least, the data ends up offset, so we need to
- * move it around. */
- /* XXX: This is very inefficient. It would be better to either do
- * this while we encrypt, or maybe in the receive code, if we can peak
- * ahead and work out the service and mechanism there. */
- offset = xdr_pad_size(buf->head[0].iov_len);
- if (offset) {
- buf->buflen = RPCSVC_MAXPAYLOAD;
- xdr_shift_buf(buf, offset);
- fix_priv_head(buf, pad);
- }
+ offset = xdr_stream_pos(xdr);
+
+ saved_len = buf->len;
+ maj_stat = gss_unwrap(ctx, offset, offset + len, buf);
if (maj_stat != GSS_S_COMPLETE)
goto bad_unwrap;
+ xdr->nwords -= XDR_QUADLEN(saved_len - buf->len);
+
out_seq:
- rseqno = svc_getnl(&buf->head[0]);
- if (rseqno != seq)
+ /* gss_unwrap() decrypted the sequence number. */
+ if (xdr_stream_decode_u32(xdr, &seq_num) < 0)
+ goto unwrap_failed;
+ if (seq_num != seq)
goto bad_seqno;
return 0;
@@ -1034,7 +979,7 @@ unwrap_failed:
trace_rpcgss_svc_unwrap_failed(rqstp);
return -EINVAL;
bad_seqno:
- trace_rpcgss_svc_seqno_bad(rqstp, seq, rseqno);
+ trace_rpcgss_svc_seqno_bad(rqstp, seq, seq_num);
return -EINVAL;
bad_unwrap:
trace_rpcgss_svc_unwrap(rqstp, maj_stat);
@@ -1071,72 +1016,29 @@ svcauth_gss_set_client(struct svc_rqst *rqstp)
return SVC_OK;
}
-static inline int
-gss_write_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
- struct xdr_netobj *out_handle, int *major_status)
+static bool
+svcauth_gss_proc_init_verf(struct cache_detail *cd, struct svc_rqst *rqstp,
+ struct xdr_netobj *out_handle, int *major_status,
+ u32 seq_num)
{
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct rsc *rsci;
- int rc;
+ bool rc;
if (*major_status != GSS_S_COMPLETE)
- return gss_write_null_verf(rqstp);
+ goto null_verifier;
rsci = gss_svc_searchbyctx(cd, out_handle);
if (rsci == NULL) {
*major_status = GSS_S_NO_CONTEXT;
- return gss_write_null_verf(rqstp);
+ goto null_verifier;
}
- rc = gss_write_verf(rqstp, rsci->mechctx, GSS_SEQ_WIN);
+
+ rc = svcauth_gss_encode_verf(rqstp, rsci->mechctx, seq_num);
cache_put(&rsci->h, cd);
return rc;
-}
-
-static inline int
-gss_read_common_verf(struct rpc_gss_wire_cred *gc,
- struct kvec *argv, __be32 *authp,
- struct xdr_netobj *in_handle)
-{
- /* Read the verifier; should be NULL: */
- *authp = rpc_autherr_badverf;
- if (argv->iov_len < 2 * 4)
- return SVC_DENIED;
- if (svc_getnl(argv) != RPC_AUTH_NULL)
- return SVC_DENIED;
- if (svc_getnl(argv) != 0)
- return SVC_DENIED;
- /* Martial context handle and token for upcall: */
- *authp = rpc_autherr_badcred;
- if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0)
- return SVC_DENIED;
- if (dup_netobj(in_handle, &gc->gc_ctx))
- return SVC_CLOSE;
- *authp = rpc_autherr_badverf;
- return 0;
-}
-
-static inline int
-gss_read_verf(struct rpc_gss_wire_cred *gc,
- struct kvec *argv, __be32 *authp,
- struct xdr_netobj *in_handle,
- struct xdr_netobj *in_token)
-{
- struct xdr_netobj tmpobj;
- int res;
-
- res = gss_read_common_verf(gc, argv, authp, in_handle);
- if (res)
- return res;
-
- if (svc_safe_getnetobj(argv, &tmpobj)) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
- if (dup_netobj(in_token, &tmpobj)) {
- kfree(in_handle->data);
- return SVC_CLOSE;
- }
-
- return 0;
+null_verifier:
+ return xdr_stream_encode_opaque_auth(xdr, RPC_AUTH_NULL, NULL, 0) > 0;
}
static void gss_free_in_token_pages(struct gssp_in_token *in_token)
@@ -1161,40 +1063,43 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
struct xdr_netobj *in_handle,
struct gssp_in_token *in_token)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
unsigned int length, pgto_offs, pgfrom_offs;
- int pages, i, res, pgto, pgfrom;
- size_t inlen, to_offs, from_offs;
+ int pages, i, pgto, pgfrom;
+ size_t to_offs, from_offs;
+ u32 inlen;
- res = gss_read_common_verf(gc, argv, &rqstp->rq_auth_stat, in_handle);
- if (res)
- return res;
+ if (dup_netobj(in_handle, &gc->gc_ctx))
+ return SVC_CLOSE;
- inlen = svc_getnl(argv);
- if (inlen > (argv->iov_len + rqstp->rq_arg.page_len)) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
+ /*
+ * RFC 2203 Section 5.2.2
+ *
+ * struct rpc_gss_init_arg {
+ * opaque gss_token<>;
+ * };
+ */
+ if (xdr_stream_decode_u32(xdr, &inlen) < 0)
+ goto out_denied_free;
+ if (inlen > xdr_stream_remaining(xdr))
+ goto out_denied_free;
pages = DIV_ROUND_UP(inlen, PAGE_SIZE);
in_token->pages = kcalloc(pages, sizeof(struct page *), GFP_KERNEL);
- if (!in_token->pages) {
- kfree(in_handle->data);
- return SVC_DENIED;
- }
+ if (!in_token->pages)
+ goto out_denied_free;
in_token->page_base = 0;
in_token->page_len = inlen;
for (i = 0; i < pages; i++) {
in_token->pages[i] = alloc_page(GFP_KERNEL);
if (!in_token->pages[i]) {
- kfree(in_handle->data);
gss_free_in_token_pages(in_token);
- return SVC_DENIED;
+ goto out_denied_free;
}
}
- length = min_t(unsigned int, inlen, argv->iov_len);
- memcpy(page_address(in_token->pages[0]), argv->iov_base, length);
+ length = min_t(unsigned int, inlen, (char *)xdr->end - (char *)xdr->p);
+ memcpy(page_address(in_token->pages[0]), xdr->p, length);
inlen -= length;
to_offs = length;
@@ -1217,26 +1122,41 @@ static int gss_read_proxy_verf(struct svc_rqst *rqstp,
inlen -= length;
}
return 0;
+
+out_denied_free:
+ kfree(in_handle->data);
+ return SVC_DENIED;
}
-static inline int
-gss_write_resv(struct kvec *resv, size_t size_limit,
- struct xdr_netobj *out_handle, struct xdr_netobj *out_token,
- int major_status, int minor_status)
-{
- if (resv->iov_len + 4 > size_limit)
- return -1;
- svc_putnl(resv, RPC_SUCCESS);
- if (svc_safe_putnetobj(resv, out_handle))
- return -1;
- if (resv->iov_len + 3 * 4 > size_limit)
- return -1;
- svc_putnl(resv, major_status);
- svc_putnl(resv, minor_status);
- svc_putnl(resv, GSS_SEQ_WIN);
- if (svc_safe_putnetobj(resv, out_token))
- return -1;
- return 0;
+/*
+ * RFC 2203, Section 5.2.3.1.
+ *
+ * struct rpc_gss_init_res {
+ * opaque handle<>;
+ * unsigned int gss_major;
+ * unsigned int gss_minor;
+ * unsigned int seq_window;
+ * opaque gss_token<>;
+ * };
+ */
+static bool
+svcxdr_encode_gss_init_res(struct xdr_stream *xdr,
+ struct xdr_netobj *handle,
+ struct xdr_netobj *gss_token,
+ unsigned int major_status,
+ unsigned int minor_status, u32 seq_num)
+{
+ if (xdr_stream_encode_opaque(xdr, handle->data, handle->len) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, major_status) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, minor_status) < 0)
+ return false;
+ if (xdr_stream_encode_u32(xdr, seq_num) < 0)
+ return false;
+ if (xdr_stream_encode_opaque(xdr, gss_token->data, gss_token->len) < 0)
+ return false;
+ return true;
}
/*
@@ -1246,20 +1166,44 @@ gss_write_resv(struct kvec *resv, size_t size_limit,
* the upcall results are available, write the verifier and result.
* Otherwise, drop the request pending an answer to the upcall.
*/
-static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
- struct rpc_gss_wire_cred *gc)
+static int
+svcauth_gss_legacy_init(struct svc_rqst *rqstp,
+ struct rpc_gss_wire_cred *gc)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct rsi *rsip, rsikey;
+ __be32 *p;
+ u32 len;
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
memset(&rsikey, 0, sizeof(rsikey));
- ret = gss_read_verf(gc, argv, &rqstp->rq_auth_stat,
- &rsikey.in_handle, &rsikey.in_token);
- if (ret)
- return ret;
+ if (dup_netobj(&rsikey.in_handle, &gc->gc_ctx))
+ return SVC_CLOSE;
+
+ /*
+ * RFC 2203 Section 5.2.2
+ *
+ * struct rpc_gss_init_arg {
+ * opaque gss_token<>;
+ * };
+ */
+ if (xdr_stream_decode_u32(xdr, &len) < 0) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DENIED;
+ }
+ p = xdr_inline_decode(xdr, len);
+ if (!p) {
+ kfree(rsikey.in_handle.data);
+ return SVC_DENIED;
+ }
+ rsikey.in_token.data = kmalloc(len, GFP_KERNEL);
+ if (ZERO_OR_NULL_PTR(rsikey.in_token.data)) {
+ kfree(rsikey.in_handle.data);
+ return SVC_CLOSE;
+ }
+ memcpy(rsikey.in_token.data, p, len);
+ rsikey.in_token.len = len;
/* Perform upcall, or find upcall result: */
rsip = rsi_lookup(sn->rsi_cache, &rsikey);
@@ -1271,13 +1215,14 @@ static int svcauth_gss_legacy_init(struct svc_rqst *rqstp,
return SVC_CLOSE;
ret = SVC_CLOSE;
- /* Got an answer to the upcall; use it: */
- if (gss_write_init_verf(sn->rsc_cache, rqstp,
- &rsip->out_handle, &rsip->major_status))
+ if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &rsip->out_handle,
+ &rsip->major_status, GSS_SEQ_WIN))
+ goto out;
+ if (!svcxdr_set_accept_stat(rqstp))
goto out;
- if (gss_write_resv(resv, PAGE_SIZE,
- &rsip->out_handle, &rsip->out_token,
- rsip->major_status, rsip->minor_status))
+ if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &rsip->out_handle,
+ &rsip->out_token, rsip->major_status,
+ rsip->minor_status, GSS_SEQ_WIN))
goto out;
ret = SVC_COMPLETE;
@@ -1361,7 +1306,6 @@ out:
static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
struct rpc_gss_wire_cred *gc)
{
- struct kvec *resv = &rqstp->rq_res.head[0];
struct xdr_netobj cli_handle;
struct gssp_upcall_data ud;
uint64_t handle;
@@ -1399,13 +1343,14 @@ static int svcauth_gss_proxy_init(struct svc_rqst *rqstp,
goto out;
}
- /* Got an answer to the upcall; use it: */
- if (gss_write_init_verf(sn->rsc_cache, rqstp,
- &cli_handle, &ud.major_status))
+ if (!svcauth_gss_proc_init_verf(sn->rsc_cache, rqstp, &cli_handle,
+ &ud.major_status, GSS_SEQ_WIN))
goto out;
- if (gss_write_resv(resv, PAGE_SIZE,
- &cli_handle, &ud.out_token,
- ud.major_status, ud.minor_status))
+ if (!svcxdr_set_accept_stat(rqstp))
+ goto out;
+ if (!svcxdr_encode_gss_init_res(&rqstp->rq_res_stream, &cli_handle,
+ &ud.out_token, ud.major_status,
+ ud.minor_status, GSS_SEQ_WIN))
goto out;
ret = SVC_COMPLETE;
@@ -1442,6 +1387,31 @@ static bool use_gss_proxy(struct net *net)
return sn->use_gss_proxy;
}
+static noinline_for_stack int
+svcauth_gss_proc_init(struct svc_rqst *rqstp, struct rpc_gss_wire_cred *gc)
+{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
+ u32 flavor, len;
+ void *body;
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badverf;
+ return SVC_DENIED;
+ }
+
+ if (gc->gc_proc == RPC_GSS_PROC_INIT && gc->gc_ctx.len != 0) {
+ rqstp->rq_auth_stat = rpc_autherr_badcred;
+ return SVC_DENIED;
+ }
+
+ if (!use_gss_proxy(SVC_NET(rqstp)))
+ return svcauth_gss_legacy_init(rqstp, gc);
+ return svcauth_gss_proxy_init(rqstp, gc);
+}
+
#ifdef CONFIG_PROC_FS
static ssize_t write_gssp(struct file *file, const char __user *buf,
@@ -1524,6 +1494,56 @@ static void destroy_use_gss_proxy_proc_entry(struct net *net)
clear_gssp_clnt(sn);
}
}
+
+static ssize_t read_gss_krb5_enctypes(struct file *file, char __user *buf,
+ size_t count, loff_t *ppos)
+{
+ struct rpcsec_gss_oid oid = {
+ .len = 9,
+ .data = "\x2a\x86\x48\x86\xf7\x12\x01\x02\x02",
+ };
+ struct gss_api_mech *mech;
+ ssize_t ret;
+
+ mech = gss_mech_get_by_OID(&oid);
+ if (!mech)
+ return 0;
+ if (!mech->gm_upcall_enctypes) {
+ gss_mech_put(mech);
+ return 0;
+ }
+
+ ret = simple_read_from_buffer(buf, count, ppos,
+ mech->gm_upcall_enctypes,
+ strlen(mech->gm_upcall_enctypes));
+ gss_mech_put(mech);
+ return ret;
+}
+
+static const struct proc_ops gss_krb5_enctypes_proc_ops = {
+ .proc_open = nonseekable_open,
+ .proc_read = read_gss_krb5_enctypes,
+};
+
+static int create_krb5_enctypes_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ sn->gss_krb5_enctypes =
+ proc_create_data("gss_krb5_enctypes", S_IFREG | 0444,
+ sn->proc_net_rpc, &gss_krb5_enctypes_proc_ops,
+ net);
+ return sn->gss_krb5_enctypes ? 0 : -ENOMEM;
+}
+
+static void destroy_krb5_enctypes_proc_entry(struct net *net)
+{
+ struct sunrpc_net *sn = net_generic(net, sunrpc_net_id);
+
+ if (sn->gss_krb5_enctypes)
+ remove_proc_entry("gss_krb5_enctypes", sn->proc_net_rpc);
+}
+
#else /* CONFIG_PROC_FS */
static int create_use_gss_proxy_proc_entry(struct net *net)
@@ -1533,27 +1553,94 @@ static int create_use_gss_proxy_proc_entry(struct net *net)
static void destroy_use_gss_proxy_proc_entry(struct net *net) {}
+static int create_krb5_enctypes_proc_entry(struct net *net)
+{
+ return 0;
+}
+
+static void destroy_krb5_enctypes_proc_entry(struct net *net) {}
+
#endif /* CONFIG_PROC_FS */
/*
- * Accept an rpcsec packet.
- * If context establishment, punt to user space
- * If data exchange, verify/decrypt
- * If context destruction, handle here
- * In the context establishment and destruction case we encode
- * response here and return SVC_COMPLETE.
+ * The Call's credential body should contain a struct rpc_gss_cred_t.
+ *
+ * RFC 2203 Section 5
+ *
+ * struct rpc_gss_cred_t {
+ * union switch (unsigned int version) {
+ * case RPCSEC_GSS_VERS_1:
+ * struct {
+ * rpc_gss_proc_t gss_proc;
+ * unsigned int seq_num;
+ * rpc_gss_service_t service;
+ * opaque handle<>;
+ * } rpc_gss_cred_vers_1_t;
+ * }
+ * };
+ */
+static bool
+svcauth_gss_decode_credbody(struct xdr_stream *xdr,
+ struct rpc_gss_wire_cred *gc,
+ __be32 **rpcstart)
+{
+ ssize_t handle_len;
+ u32 body_len;
+ __be32 *p;
+
+ p = xdr_inline_decode(xdr, XDR_UNIT);
+ if (!p)
+ return false;
+ /*
+ * start of rpc packet is 7 u32's back from here:
+ * xid direction rpcversion prog vers proc flavour
+ */
+ *rpcstart = p - 7;
+ body_len = be32_to_cpup(p);
+ if (body_len > RPC_MAX_AUTH_SIZE)
+ return false;
+
+ /* struct rpc_gss_cred_t */
+ if (xdr_stream_decode_u32(xdr, &gc->gc_v) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_proc) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_seq) < 0)
+ return false;
+ if (xdr_stream_decode_u32(xdr, &gc->gc_svc) < 0)
+ return false;
+ handle_len = xdr_stream_decode_opaque_inline(xdr,
+ (void **)&gc->gc_ctx.data,
+ body_len);
+ if (handle_len < 0)
+ return false;
+ if (body_len != XDR_UNIT * 5 + xdr_align_size(handle_len))
+ return false;
+
+ gc->gc_ctx.len = handle_len;
+ return true;
+}
+
+/**
+ * svcauth_gss_accept - Decode and validate incoming RPC_AUTH_GSS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Success
+ * %SVC_COMPLETE: GSS context lifetime event
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * The rqstp->rq_auth_stat field is also set (see RFCs 2203 and 5531).
*/
static int
svcauth_gss_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
- u32 crlen;
struct gss_svc_data *svcdata = rqstp->rq_auth_data;
+ __be32 *rpcstart;
struct rpc_gss_wire_cred *gc;
struct rsc *rsci = NULL;
- __be32 *rpcstart;
- __be32 *reject_stat = resv->iov_base + resv->iov_len;
int ret;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
@@ -1563,53 +1650,31 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
if (!svcdata)
goto auth_err;
rqstp->rq_auth_data = svcdata;
- svcdata->verf_start = NULL;
+ svcdata->gsd_databody_offset = 0;
svcdata->rsci = NULL;
gc = &svcdata->clcred;
- /* start of rpc packet is 7 u32's back from here:
- * xid direction rpcversion prog vers proc flavour
- */
- rpcstart = argv->iov_base;
- rpcstart -= 7;
-
- /* credential is:
- * version(==1), proc(0,1,2,3), seq, service (1,2,3), handle
- * at least 5 u32s, and is preceded by length, so that makes 6.
- */
-
- if (argv->iov_len < 5 * 4)
+ if (!svcauth_gss_decode_credbody(&rqstp->rq_arg_stream, gc, &rpcstart))
goto auth_err;
- crlen = svc_getnl(argv);
- if (svc_getnl(argv) != RPC_GSS_VERSION)
- goto auth_err;
- gc->gc_proc = svc_getnl(argv);
- gc->gc_seq = svc_getnl(argv);
- gc->gc_svc = svc_getnl(argv);
- if (svc_safe_getnetobj(argv, &gc->gc_ctx))
- goto auth_err;
- if (crlen != round_up_to_quad(gc->gc_ctx.len) + 5 * 4)
- goto auth_err;
-
- if ((gc->gc_proc != RPC_GSS_PROC_DATA) && (rqstp->rq_proc != 0))
+ if (gc->gc_v != RPC_GSS_VERSION)
goto auth_err;
- rqstp->rq_auth_stat = rpc_autherr_badverf;
switch (gc->gc_proc) {
case RPC_GSS_PROC_INIT:
case RPC_GSS_PROC_CONTINUE_INIT:
- if (use_gss_proxy(SVC_NET(rqstp)))
- return svcauth_gss_proxy_init(rqstp, gc);
- else
- return svcauth_gss_legacy_init(rqstp, gc);
- case RPC_GSS_PROC_DATA:
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
+ return svcauth_gss_proc_init(rqstp, gc);
case RPC_GSS_PROC_DESTROY:
- /* Look up the context, and check the verifier: */
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
+ fallthrough;
+ case RPC_GSS_PROC_DATA:
rqstp->rq_auth_stat = rpcsec_gsserr_credproblem;
rsci = gss_svc_searchbyctx(sn->rsc_cache, &gc->gc_ctx);
if (!rsci)
goto auth_err;
- switch (gss_verify_header(rqstp, rsci, rpcstart, gc)) {
+ switch (svcauth_gss_verify_header(rqstp, rsci, rpcstart, gc)) {
case SVC_OK:
break;
case SVC_DENIED:
@@ -1619,6 +1684,8 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
}
break;
default:
+ if (rqstp->rq_proc != 0)
+ goto auth_err;
rqstp->rq_auth_stat = rpc_autherr_rejectedcred;
goto auth_err;
}
@@ -1626,19 +1693,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
/* now act upon the command: */
switch (gc->gc_proc) {
case RPC_GSS_PROC_DESTROY:
- if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
goto auth_err;
/* Delete the entry from the cache_list and call cache_put */
sunrpc_cache_unhash(sn->rsc_cache, &rsci->h);
- if (resv->iov_len + 4 > PAGE_SIZE)
- goto drop;
- svc_putnl(resv, RPC_SUCCESS);
goto complete;
case RPC_GSS_PROC_DATA:
rqstp->rq_auth_stat = rpcsec_gsserr_ctxproblem;
- svcdata->verf_start = resv->iov_base + resv->iov_len;
- if (gss_write_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ if (!svcauth_gss_encode_verf(rqstp, rsci->mechctx, gc->gc_seq))
+ goto auth_err;
+ if (!svcxdr_set_accept_stat(rqstp))
goto auth_err;
+ svcdata->gsd_databody_offset = xdr_stream_pos(&rqstp->rq_res_stream);
rqstp->rq_cred = rsci->cred;
get_group_info(rsci->cred.cr_group_info);
rqstp->rq_auth_stat = rpc_autherr_badcred;
@@ -1646,22 +1714,20 @@ svcauth_gss_accept(struct svc_rqst *rqstp)
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
- /* placeholders for length and seq. number: */
- svc_putnl(resv, 0);
- svc_putnl(resv, 0);
- if (unwrap_integ_data(rqstp, &rqstp->rq_arg,
- gc->gc_seq, rsci->mechctx))
+ /* placeholders for body length and seq. number: */
+ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2);
+ if (svcauth_gss_unwrap_integ(rqstp, gc->gc_seq,
+ rsci->mechctx))
goto garbage_args;
- rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE;
+ svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE);
break;
case RPC_GSS_SVC_PRIVACY:
- /* placeholders for length and seq. number: */
- svc_putnl(resv, 0);
- svc_putnl(resv, 0);
- if (unwrap_priv_data(rqstp, &rqstp->rq_arg,
- gc->gc_seq, rsci->mechctx))
+ /* placeholders for body length and seq. number: */
+ xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2);
+ if (svcauth_gss_unwrap_priv(rqstp, gc->gc_seq,
+ rsci->mechctx))
goto garbage_args;
- rqstp->rq_auth_slack = RPC_MAX_AUTH_SIZE * 2;
+ svcxdr_set_auth_slack(rqstp, RPC_MAX_AUTH_SIZE * 2);
break;
default:
goto auth_err;
@@ -1680,8 +1746,7 @@ garbage_args:
ret = SVC_GARBAGE;
goto out;
auth_err:
- /* Restore write pointer to its original value: */
- xdr_ressize_check(rqstp, reject_stat);
+ xdr_truncate_encode(&rqstp->rq_res_stream, XDR_UNIT * 2);
ret = SVC_DENIED;
goto out;
complete:
@@ -1695,104 +1760,125 @@ out:
return ret;
}
-static __be32 *
-svcauth_gss_prepare_to_wrap(struct xdr_buf *resbuf, struct gss_svc_data *gsd)
+static u32
+svcauth_gss_prepare_to_wrap(struct svc_rqst *rqstp, struct gss_svc_data *gsd)
{
- __be32 *p;
- u32 verf_len;
+ u32 offset;
- p = gsd->verf_start;
- gsd->verf_start = NULL;
+ /* Release can be called twice, but we only wrap once. */
+ offset = gsd->gsd_databody_offset;
+ gsd->gsd_databody_offset = 0;
- /* If the reply stat is nonzero, don't wrap: */
- if (*(p-1) != rpc_success)
- return NULL;
- /* Skip the verifier: */
- p += 1;
- verf_len = ntohl(*p++);
- p += XDR_QUADLEN(verf_len);
- /* move accept_stat to right place: */
- memcpy(p, p + 2, 4);
- /* Also don't wrap if the accept stat is nonzero: */
- if (*p != rpc_success) {
- resbuf->head[0].iov_len -= 2 * 4;
- return NULL;
- }
- p++;
- return p;
+ /* AUTH_ERROR replies are not wrapped. */
+ if (rqstp->rq_auth_stat != rpc_auth_ok)
+ return 0;
+
+ /* Also don't wrap if the accept_stat is nonzero: */
+ if (*rqstp->rq_accept_statp != rpc_success)
+ return 0;
+
+ return offset;
}
-static inline int
-svcauth_gss_wrap_resp_integ(struct svc_rqst *rqstp)
+/*
+ * RFC 2203, Section 5.3.2.2
+ *
+ * struct rpc_gss_integ_data {
+ * opaque databody_integ<>;
+ * opaque checksum<>;
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ *
+ * The RPC Reply message has already been XDR-encoded. rq_res_stream
+ * is now positioned so that the checksum can be written just past
+ * the RPC Reply message.
+ */
+static int svcauth_gss_wrap_integ(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct rpc_gss_wire_cred *gc = &gsd->clcred;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- struct xdr_buf integ_buf;
- struct xdr_netobj mic;
- struct kvec *resv;
- __be32 *p;
- int integ_offset, integ_len;
- int stat = -EINVAL;
+ struct xdr_buf *buf = xdr->buf;
+ struct xdr_buf databody_integ;
+ struct xdr_netobj checksum;
+ u32 offset, maj_stat;
- p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
- if (p == NULL)
- goto out;
- integ_offset = (u8 *)(p + 1) - (u8 *)resbuf->head[0].iov_base;
- integ_len = resbuf->len - integ_offset;
- if (integ_len & 3)
+ offset = svcauth_gss_prepare_to_wrap(rqstp, gsd);
+ if (!offset)
goto out;
- *p++ = htonl(integ_len);
- *p++ = htonl(gc->gc_seq);
- if (xdr_buf_subsegment(resbuf, &integ_buf, integ_offset, integ_len)) {
- WARN_ON_ONCE(1);
- goto out_err;
- }
- if (resbuf->tail[0].iov_base == NULL) {
- if (resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- goto out_err;
- resbuf->tail[0].iov_base = resbuf->head[0].iov_base
- + resbuf->head[0].iov_len;
- resbuf->tail[0].iov_len = 0;
- }
- resv = &resbuf->tail[0];
- mic.data = (u8 *)resv->iov_base + resv->iov_len + 4;
- if (gss_get_mic(gsd->rsci->mechctx, &integ_buf, &mic))
- goto out_err;
- svc_putnl(resv, mic.len);
- memset(mic.data + mic.len, 0,
- round_up_to_quad(mic.len) - mic.len);
- resv->iov_len += XDR_QUADLEN(mic.len) << 2;
- /* not strictly required: */
- resbuf->len += XDR_QUADLEN(mic.len) << 2;
- if (resv->iov_len > PAGE_SIZE)
- goto out_err;
+
+ if (xdr_buf_subsegment(buf, &databody_integ, offset + XDR_UNIT,
+ buf->len - offset - XDR_UNIT))
+ goto wrap_failed;
+ /* Buffer space for these has already been reserved in
+ * svcauth_gss_accept(). */
+ if (xdr_encode_word(buf, offset, databody_integ.len))
+ goto wrap_failed;
+ if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq))
+ goto wrap_failed;
+
+ checksum.data = gsd->gsd_scratch;
+ maj_stat = gss_get_mic(gsd->rsci->mechctx, &databody_integ, &checksum);
+ if (maj_stat != GSS_S_COMPLETE)
+ goto bad_mic;
+
+ if (xdr_stream_encode_opaque(xdr, checksum.data, checksum.len) < 0)
+ goto wrap_failed;
+ xdr_commit_encode(xdr);
+
out:
- stat = 0;
-out_err:
- return stat;
+ return 0;
+
+bad_mic:
+ trace_rpcgss_svc_get_mic(rqstp, maj_stat);
+ return -EINVAL;
+wrap_failed:
+ trace_rpcgss_svc_wrap_failed(rqstp);
+ return -EINVAL;
}
-static inline int
-svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
+/*
+ * RFC 2203, Section 5.3.2.3
+ *
+ * struct rpc_gss_priv_data {
+ * opaque databody_priv<>
+ * };
+ *
+ * struct rpc_gss_data_t {
+ * unsigned int seq_num;
+ * proc_req_arg_t arg;
+ * };
+ *
+ * gss_wrap() expands the size of the RPC message payload in the
+ * response buffer. The main purpose of svcauth_gss_wrap_priv()
+ * is to ensure there is adequate space in the response buffer to
+ * avoid overflow during the wrap.
+ */
+static int svcauth_gss_wrap_priv(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
struct rpc_gss_wire_cred *gc = &gsd->clcred;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- struct page **inpages = NULL;
- __be32 *p, *len;
- int offset;
- int pad;
-
- p = svcauth_gss_prepare_to_wrap(resbuf, gsd);
- if (p == NULL)
+ struct xdr_buf *buf = &rqstp->rq_res;
+ struct kvec *head = buf->head;
+ struct kvec *tail = buf->tail;
+ u32 offset, pad, maj_stat;
+ __be32 *p;
+
+ offset = svcauth_gss_prepare_to_wrap(rqstp, gsd);
+ if (!offset)
return 0;
- len = p++;
- offset = (u8 *)p - (u8 *)resbuf->head[0].iov_base;
- *p++ = htonl(gc->gc_seq);
- inpages = resbuf->pages;
- /* XXX: Would be better to write some xdr helper functions for
- * nfs{2,3,4}xdr.c that place the data right, instead of copying: */
+
+ /*
+ * Buffer space for this field has already been reserved
+ * in svcauth_gss_accept(). Note that the GSS sequence
+ * number is encrypted along with the RPC reply payload.
+ */
+ if (xdr_encode_word(buf, offset + XDR_UNIT, gc->gc_seq))
+ goto wrap_failed;
/*
* If there is currently tail data, make sure there is
@@ -1801,19 +1887,17 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
* there is RPC_MAX_AUTH_SIZE slack space available in
* both the head and tail.
*/
- if (resbuf->tail[0].iov_base) {
- if (resbuf->tail[0].iov_base >=
- resbuf->head[0].iov_base + PAGE_SIZE)
- return -EINVAL;
- if (resbuf->tail[0].iov_base < resbuf->head[0].iov_base)
- return -EINVAL;
- if (resbuf->tail[0].iov_len + resbuf->head[0].iov_len
+ if (tail->iov_base) {
+ if (tail->iov_base >= head->iov_base + PAGE_SIZE)
+ goto wrap_failed;
+ if (tail->iov_base < head->iov_base)
+ goto wrap_failed;
+ if (tail->iov_len + head->iov_len
+ 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- return -ENOMEM;
- memmove(resbuf->tail[0].iov_base + RPC_MAX_AUTH_SIZE,
- resbuf->tail[0].iov_base,
- resbuf->tail[0].iov_len);
- resbuf->tail[0].iov_base += RPC_MAX_AUTH_SIZE;
+ goto wrap_failed;
+ memmove(tail->iov_base + RPC_MAX_AUTH_SIZE, tail->iov_base,
+ tail->iov_len);
+ tail->iov_base += RPC_MAX_AUTH_SIZE;
}
/*
* If there is no current tail data, make sure there is
@@ -1822,55 +1906,73 @@ svcauth_gss_wrap_resp_priv(struct svc_rqst *rqstp)
* is RPC_MAX_AUTH_SIZE slack space available in both the
* head and tail.
*/
- if (resbuf->tail[0].iov_base == NULL) {
- if (resbuf->head[0].iov_len + 2*RPC_MAX_AUTH_SIZE > PAGE_SIZE)
- return -ENOMEM;
- resbuf->tail[0].iov_base = resbuf->head[0].iov_base
- + resbuf->head[0].iov_len + RPC_MAX_AUTH_SIZE;
- resbuf->tail[0].iov_len = 0;
+ if (!tail->iov_base) {
+ if (head->iov_len + 2 * RPC_MAX_AUTH_SIZE > PAGE_SIZE)
+ goto wrap_failed;
+ tail->iov_base = head->iov_base
+ + head->iov_len + RPC_MAX_AUTH_SIZE;
+ tail->iov_len = 0;
}
- if (gss_wrap(gsd->rsci->mechctx, offset, resbuf, inpages))
- return -ENOMEM;
- *len = htonl(resbuf->len - offset);
- pad = 3 - ((resbuf->len - offset - 1)&3);
- p = (__be32 *)(resbuf->tail[0].iov_base + resbuf->tail[0].iov_len);
+
+ maj_stat = gss_wrap(gsd->rsci->mechctx, offset + XDR_UNIT, buf,
+ buf->pages);
+ if (maj_stat != GSS_S_COMPLETE)
+ goto bad_wrap;
+
+ /* Wrapping can change the size of databody_priv. */
+ if (xdr_encode_word(buf, offset, buf->len - offset - XDR_UNIT))
+ goto wrap_failed;
+ pad = xdr_pad_size(buf->len - offset - XDR_UNIT);
+ p = (__be32 *)(tail->iov_base + tail->iov_len);
memset(p, 0, pad);
- resbuf->tail[0].iov_len += pad;
- resbuf->len += pad;
+ tail->iov_len += pad;
+ buf->len += pad;
+
return 0;
+wrap_failed:
+ trace_rpcgss_svc_wrap_failed(rqstp);
+ return -EINVAL;
+bad_wrap:
+ trace_rpcgss_svc_wrap(rqstp, maj_stat);
+ return -ENOMEM;
}
+/**
+ * svcauth_gss_release - Wrap payload and release resources
+ * @rqstp: RPC transaction context
+ *
+ * Return values:
+ * %0: the Reply is ready to be sent
+ * %-ENOMEM: failed to allocate memory
+ * %-EINVAL: encoding error
+ *
+ * XXX: These return values do not match the return values documented
+ * for the auth_ops ->release method in linux/sunrpc/svcauth.h.
+ */
static int
svcauth_gss_release(struct svc_rqst *rqstp)
{
- struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
- struct rpc_gss_wire_cred *gc;
- struct xdr_buf *resbuf = &rqstp->rq_res;
- int stat = -EINVAL;
struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
+ struct gss_svc_data *gsd = rqstp->rq_auth_data;
+ struct rpc_gss_wire_cred *gc;
+ int stat;
if (!gsd)
goto out;
gc = &gsd->clcred;
if (gc->gc_proc != RPC_GSS_PROC_DATA)
goto out;
- /* Release can be called twice, but we only wrap once. */
- if (gsd->verf_start == NULL)
- goto out;
- /* normally not set till svc_send, but we need it here: */
- /* XXX: what for? Do we mess it up the moment we call svc_putu32
- * or whatever? */
- resbuf->len = total_buf_len(resbuf);
+
switch (gc->gc_svc) {
case RPC_GSS_SVC_NONE:
break;
case RPC_GSS_SVC_INTEGRITY:
- stat = svcauth_gss_wrap_resp_integ(rqstp);
+ stat = svcauth_gss_wrap_integ(rqstp);
if (stat)
goto out_err;
break;
case RPC_GSS_SVC_PRIVACY:
- stat = svcauth_gss_wrap_resp_priv(rqstp);
+ stat = svcauth_gss_wrap_priv(rqstp);
if (stat)
goto out_err;
break;
@@ -1997,7 +2099,15 @@ gss_svc_init_net(struct net *net)
rv = create_use_gss_proxy_proc_entry(net);
if (rv)
goto out2;
+
+ rv = create_krb5_enctypes_proc_entry(net);
+ if (rv)
+ goto out3;
+
return 0;
+
+out3:
+ destroy_use_gss_proxy_proc_entry(net);
out2:
rsi_cache_destroy_net(net);
out1:
@@ -2008,6 +2118,7 @@ out1:
void
gss_svc_shutdown_net(struct net *net)
{
+ destroy_krb5_enctypes_proc_entry(net);
destroy_use_gss_proxy_proc_entry(net);
rsi_cache_destroy_net(net);
rsc_cache_destroy_net(net);
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 0b0b9f1eed46..fd7e1c630493 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -3350,6 +3350,8 @@ rpc_clnt_swap_deactivate_callback(struct rpc_clnt *clnt,
void
rpc_clnt_swap_deactivate(struct rpc_clnt *clnt)
{
+ while (clnt != clnt->cl_parent)
+ clnt = clnt->cl_parent;
if (atomic_dec_if_positive(&clnt->cl_swapper) == 0)
rpc_clnt_iterate_for_each_xprt(clnt,
rpc_clnt_swap_deactivate_callback, NULL);
diff --git a/net/sunrpc/netns.h b/net/sunrpc/netns.h
index 7ec10b92bea1..4efb5f28d881 100644
--- a/net/sunrpc/netns.h
+++ b/net/sunrpc/netns.h
@@ -33,6 +33,7 @@ struct sunrpc_net {
int pipe_version;
atomic_t pipe_users;
struct proc_dir_entry *use_gssp_proc;
+ struct proc_dir_entry *gss_krb5_enctypes;
};
extern unsigned int sunrpc_net_id;
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 52908f9e6eab..65fc1297c6df 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -83,7 +83,8 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
{
const struct svc_program *prog = statp->program;
const struct svc_version *vers;
- unsigned int i, j;
+ unsigned int i, j, k;
+ unsigned long count;
seq_printf(seq,
"net %u %u %u %u\n",
@@ -104,8 +105,12 @@ void svc_seq_show(struct seq_file *seq, const struct svc_stat *statp)
if (!vers)
continue;
seq_printf(seq, "proc%d %u", i, vers->vs_nproc);
- for (j = 0; j < vers->vs_nproc; j++)
- seq_printf(seq, " %u", vers->vs_count[j]);
+ for (j = 0; j < vers->vs_nproc; j++) {
+ count = 0;
+ for_each_possible_cpu(k)
+ count += per_cpu(vers->vs_count[j], k);
+ seq_printf(seq, " %lu", count);
+ }
seq_putc(seq, '\n');
}
}
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 85f0c3cfc877..1fd3f5e57285 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -512,6 +512,10 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
INIT_LIST_HEAD(&pool->sp_sockets);
INIT_LIST_HEAD(&pool->sp_all_threads);
spin_lock_init(&pool->sp_lock);
+
+ percpu_counter_init(&pool->sp_sockets_queued, 0, GFP_KERNEL);
+ percpu_counter_init(&pool->sp_threads_woken, 0, GFP_KERNEL);
+ percpu_counter_init(&pool->sp_threads_timedout, 0, GFP_KERNEL);
}
return serv;
@@ -565,6 +569,7 @@ void
svc_destroy(struct kref *ref)
{
struct svc_serv *serv = container_of(ref, struct svc_serv, sv_refcnt);
+ unsigned int i;
dprintk("svc: svc_destroy(%s)\n", serv->sv_program->pg_name);
timer_shutdown_sync(&serv->sv_temptimer);
@@ -580,6 +585,13 @@ svc_destroy(struct kref *ref)
svc_pool_map_put(serv->sv_nrpools);
+ for (i = 0; i < serv->sv_nrpools; i++) {
+ struct svc_pool *pool = &serv->sv_pools[i];
+
+ percpu_counter_destroy(&pool->sp_sockets_queued);
+ percpu_counter_destroy(&pool->sp_threads_woken);
+ percpu_counter_destroy(&pool->sp_threads_timedout);
+ }
kfree(serv->sv_pools);
kfree(serv);
}
@@ -1208,7 +1220,7 @@ svc_generic_init_request(struct svc_rqst *rqstp,
memset(rqstp->rq_resp, 0, procp->pc_ressize);
/* Bump per-procedure stats counter */
- versp->vs_count[rqstp->rq_proc]++;
+ this_cpu_inc(versp->vs_count[rqstp->rq_proc]);
ret->dispatch = versp->vs_dispatch;
return rpc_success;
@@ -1225,50 +1237,42 @@ EXPORT_SYMBOL_GPL(svc_generic_init_request);
* Common routine for processing the RPC request.
*/
static int
-svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
+svc_process_common(struct svc_rqst *rqstp)
{
+ struct xdr_stream *xdr = &rqstp->rq_res_stream;
struct svc_program *progp;
const struct svc_procedure *procp = NULL;
struct svc_serv *serv = rqstp->rq_server;
struct svc_process_info process;
- __be32 *statp;
- u32 prog, vers;
- __be32 rpc_stat;
int auth_res, rc;
- __be32 *reply_statp;
-
- rpc_stat = rpc_success;
-
- if (argv->iov_len < 6*4)
- goto err_short_len;
+ unsigned int aoffset;
+ __be32 *p;
/* Will be turned off by GSS integrity and privacy services */
- __set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
+ set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
/* Will be turned off only when NFSv4 Sessions are used */
- __set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
- __clear_bit(RQ_DROPME, &rqstp->rq_flags);
-
- svc_putu32(resv, rqstp->rq_xid);
-
- vers = svc_getnl(argv);
+ set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
+ clear_bit(RQ_DROPME, &rqstp->rq_flags);
- /* First words of reply: */
- svc_putnl(resv, 1); /* REPLY */
+ /* Construct the first words of the reply: */
+ svcxdr_init_encode(rqstp);
+ xdr_stream_encode_be32(xdr, rqstp->rq_xid);
+ xdr_stream_encode_be32(xdr, rpc_reply);
- if (vers != 2) /* RPC version number */
+ p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 4);
+ if (unlikely(!p))
+ goto err_short_len;
+ if (*p++ != cpu_to_be32(RPC_VERSION))
goto err_bad_rpc;
- /* Save position in case we later decide to reject: */
- reply_statp = resv->iov_base + resv->iov_len;
-
- svc_putnl(resv, 0); /* ACCEPT */
+ xdr_stream_encode_be32(xdr, rpc_msg_accepted);
- rqstp->rq_prog = prog = svc_getnl(argv); /* program number */
- rqstp->rq_vers = svc_getnl(argv); /* version number */
- rqstp->rq_proc = svc_getnl(argv); /* procedure number */
+ rqstp->rq_prog = be32_to_cpup(p++);
+ rqstp->rq_vers = be32_to_cpup(p++);
+ rqstp->rq_proc = be32_to_cpup(p);
for (progp = serv->sv_program; progp; progp = progp->pg_next)
- if (prog == progp->pg_prog)
+ if (rqstp->rq_prog == progp->pg_prog)
break;
/*
@@ -1285,10 +1289,9 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
case SVC_OK:
break;
case SVC_GARBAGE:
- goto err_garbage;
+ goto err_garbage_args;
case SVC_SYSERR:
- rpc_stat = rpc_system_err;
- goto err_bad;
+ goto err_system_err;
case SVC_DENIED:
goto err_bad_auth;
case SVC_CLOSE:
@@ -1302,8 +1305,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
if (progp == NULL)
goto err_bad_prog;
- rpc_stat = progp->pg_init_request(rqstp, progp, &process);
- switch (rpc_stat) {
+ switch (progp->pg_init_request(rqstp, progp, &process)) {
case rpc_success:
break;
case rpc_prog_unavail:
@@ -1323,9 +1325,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
serv->sv_stats->rpccnt++;
trace_svc_process(rqstp, progp->pg_name);
- /* Build the reply header. */
- statp = resv->iov_base +resv->iov_len;
- svc_putnl(resv, RPC_SUCCESS);
+ aoffset = xdr_stream_pos(xdr);
/* un-reserve some of the out-queue now that we have a
* better idea of reply size
@@ -1334,7 +1334,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
svc_reserve_auth(rqstp, procp->pc_xdrressize<<2);
/* Call the function that processes the request. */
- rc = process.dispatch(rqstp, statp);
+ rc = process.dispatch(rqstp);
if (procp->pc_release)
procp->pc_release(rqstp);
if (!rc)
@@ -1342,9 +1342,8 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
if (rqstp->rq_auth_stat != rpc_auth_ok)
goto err_bad_auth;
- /* Check RPC status result */
- if (*statp != rpc_success)
- resv->iov_len = ((void*)statp) - resv->iov_base + 4;
+ if (*rqstp->rq_accept_statp != rpc_success)
+ xdr_truncate_encode(xdr, aoffset);
if (procp->pc_encode == NULL)
goto dropit;
@@ -1368,33 +1367,34 @@ close_xprt:
return 0;
err_short_len:
- svc_printk(rqstp, "short len %zd, dropping request\n",
- argv->iov_len);
+ svc_printk(rqstp, "short len %u, dropping request\n",
+ rqstp->rq_arg.len);
goto close_xprt;
err_bad_rpc:
serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, 1); /* REJECT */
- svc_putnl(resv, 0); /* RPC_MISMATCH */
- svc_putnl(resv, 2); /* Only RPCv2 supported */
- svc_putnl(resv, 2);
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+ xdr_stream_encode_u32(xdr, RPC_MISMATCH);
+ /* Only RPCv2 supported */
+ xdr_stream_encode_u32(xdr, RPC_VERSION);
+ xdr_stream_encode_u32(xdr, RPC_VERSION);
goto sendit;
err_bad_auth:
dprintk("svc: authentication failed (%d)\n",
be32_to_cpu(rqstp->rq_auth_stat));
serv->sv_stats->rpcbadauth++;
- /* Restore write pointer to location of accept status: */
- xdr_ressize_check(rqstp, reply_statp);
- svc_putnl(resv, 1); /* REJECT */
- svc_putnl(resv, 1); /* AUTH_ERROR */
- svc_putu32(resv, rqstp->rq_auth_stat); /* status */
+ /* Restore write pointer to location of reply status: */
+ xdr_truncate_encode(xdr, XDR_UNIT * 2);
+ xdr_stream_encode_u32(xdr, RPC_MSG_DENIED);
+ xdr_stream_encode_u32(xdr, RPC_AUTH_ERROR);
+ xdr_stream_encode_be32(xdr, rqstp->rq_auth_stat);
goto sendit;
err_bad_prog:
- dprintk("svc: unknown program %d\n", prog);
+ dprintk("svc: unknown program %d\n", rqstp->rq_prog);
serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROG_UNAVAIL);
+ xdr_stream_encode_u32(xdr, RPC_PROG_UNAVAIL);
goto sendit;
err_bad_vers:
@@ -1402,25 +1402,28 @@ err_bad_vers:
rqstp->rq_vers, rqstp->rq_prog, progp->pg_name);
serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROG_MISMATCH);
- svc_putnl(resv, process.mismatch.lovers);
- svc_putnl(resv, process.mismatch.hivers);
+ xdr_stream_encode_u32(xdr, RPC_PROG_MISMATCH);
+ xdr_stream_encode_u32(xdr, process.mismatch.lovers);
+ xdr_stream_encode_u32(xdr, process.mismatch.hivers);
goto sendit;
err_bad_proc:
svc_printk(rqstp, "unknown procedure (%d)\n", rqstp->rq_proc);
serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, RPC_PROC_UNAVAIL);
+ xdr_stream_encode_u32(xdr, RPC_PROC_UNAVAIL);
goto sendit;
-err_garbage:
- svc_printk(rqstp, "failed to decode args\n");
+err_garbage_args:
+ svc_printk(rqstp, "failed to decode RPC header\n");
+
+ serv->sv_stats->rpcbadfmt++;
+ xdr_stream_encode_u32(xdr, RPC_GARBAGE_ARGS);
+ goto sendit;
- rpc_stat = rpc_garbage_args;
-err_bad:
+err_system_err:
serv->sv_stats->rpcbadfmt++;
- svc_putnl(resv, ntohl(rpc_stat));
+ xdr_stream_encode_u32(xdr, RPC_SYSTEM_ERR);
goto sendit;
}
@@ -1430,9 +1433,8 @@ err_bad:
int
svc_process(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
struct kvec *resv = &rqstp->rq_res.head[0];
- __be32 dir;
+ __be32 *p;
#if IS_ENABLED(CONFIG_FAIL_SUNRPC)
if (!fail_sunrpc.ignore_server_disconnect &&
@@ -1455,16 +1457,21 @@ svc_process(struct svc_rqst *rqstp)
rqstp->rq_res.tail[0].iov_base = NULL;
rqstp->rq_res.tail[0].iov_len = 0;
- dir = svc_getu32(argv);
- if (dir != rpc_call)
+ svcxdr_init_decode(rqstp);
+ p = xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2);
+ if (unlikely(!p))
+ goto out_drop;
+ rqstp->rq_xid = *p++;
+ if (unlikely(*p != rpc_call))
goto out_baddir;
- if (!svc_process_common(rqstp, argv, resv))
+
+ if (!svc_process_common(rqstp))
goto out_drop;
return svc_send(rqstp);
out_baddir:
svc_printk(rqstp, "bad direction 0x%08x, dropping request\n",
- be32_to_cpu(dir));
+ be32_to_cpu(*p));
rqstp->rq_server->sv_stats->rpcbadfmt++;
out_drop:
svc_drop(rqstp);
@@ -1481,8 +1488,6 @@ int
bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
struct rpc_task *task;
int proc_error;
int error;
@@ -1513,18 +1518,21 @@ bc_svc_process(struct svc_serv *serv, struct rpc_rqst *req,
rqstp->rq_arg.len = rqstp->rq_arg.head[0].iov_len +
rqstp->rq_arg.page_len;
- /* reset result send buffer "put" position */
- resv->iov_len = 0;
+ /* Reset the response buffer */
+ rqstp->rq_res.head[0].iov_len = 0;
/*
- * Skip the next two words because they've already been
- * processed in the transport
+ * Skip the XID and calldir fields because they've already
+ * been processed by the caller.
*/
- svc_getu32(argv); /* XID */
- svc_getnl(argv); /* CALLDIR */
+ svcxdr_init_decode(rqstp);
+ if (!xdr_inline_decode(&rqstp->rq_arg_stream, XDR_UNIT * 2)) {
+ error = -EINVAL;
+ goto out;
+ }
/* Parse and execute the bc call */
- proc_error = svc_process_common(rqstp, argv, resv);
+ proc_error = svc_process_common(rqstp);
atomic_dec(&req->rq_xprt->bc_slot_count);
if (!proc_error) {
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 2106003645a7..ba629297da4e 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -462,11 +462,9 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
pool = svc_pool_for_cpu(xprt->xpt_server);
- atomic_long_inc(&pool->sp_stats.packets);
-
+ percpu_counter_inc(&pool->sp_sockets_queued);
spin_lock_bh(&pool->sp_lock);
list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
- pool->sp_stats.sockets_queued++;
spin_unlock_bh(&pool->sp_lock);
/* find a thread for this xprt */
@@ -474,7 +472,7 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags))
continue;
- atomic_long_inc(&pool->sp_stats.threads_woken);
+ percpu_counter_inc(&pool->sp_threads_woken);
rqstp->rq_qtime = ktime_get();
wake_up_process(rqstp->rq_task);
goto out_unlock;
@@ -769,7 +767,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
goto out_found;
if (!time_left)
- atomic_long_inc(&pool->sp_stats.threads_timedout);
+ percpu_counter_inc(&pool->sp_threads_timedout);
if (signalled() || kthread_should_stop())
return ERR_PTR(-EINTR);
@@ -888,9 +886,7 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
clear_bit(XPT_OLD, &xprt->xpt_flags);
- xprt->xpt_ops->xpo_secure_port(rqstp);
rqstp->rq_chandle.defer = svc_defer;
- rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
if (serv->sv_stats)
serv->sv_stats->netcnt++;
@@ -1238,7 +1234,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
trace_svc_defer(rqstp);
svc_xprt_get(rqstp->rq_xprt);
dr->xprt = rqstp->rq_xprt;
- __set_bit(RQ_DROPME, &rqstp->rq_flags);
+ set_bit(RQ_DROPME, &rqstp->rq_flags);
dr->handle.revisit = svc_revisit;
return &dr->handle;
@@ -1441,12 +1437,12 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
return 0;
}
- seq_printf(m, "%u %lu %lu %lu %lu\n",
+ seq_printf(m, "%u %llu %llu %llu %llu\n",
pool->sp_id,
- (unsigned long)atomic_long_read(&pool->sp_stats.packets),
- pool->sp_stats.sockets_queued,
- (unsigned long)atomic_long_read(&pool->sp_stats.threads_woken),
- (unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout));
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_sockets_queued),
+ percpu_counter_sum_positive(&pool->sp_threads_woken),
+ percpu_counter_sum_positive(&pool->sp_threads_timedout));
return 0;
}
diff --git a/net/sunrpc/svcauth.c b/net/sunrpc/svcauth.c
index e72ba2f13f6c..67d8245a08af 100644
--- a/net/sunrpc/svcauth.c
+++ b/net/sunrpc/svcauth.c
@@ -63,14 +63,17 @@ svc_put_auth_ops(struct auth_ops *aops)
int
svc_authenticate(struct svc_rqst *rqstp)
{
- rpc_authflavor_t flavor;
- struct auth_ops *aops;
+ struct auth_ops *aops;
+ u32 flavor;
rqstp->rq_auth_stat = rpc_auth_ok;
- flavor = svc_getnl(&rqstp->rq_arg.head[0]);
-
- dprintk("svc: svc_authenticate (%d)\n", flavor);
+ /*
+ * Decode the Call credential's flavor field. The credential's
+ * body field is decoded in the chosen ->accept method below.
+ */
+ if (xdr_stream_decode_u32(&rqstp->rq_arg_stream, &flavor) < 0)
+ return SVC_GARBAGE;
aops = svc_get_auth_ops(flavor);
if (aops == NULL) {
diff --git a/net/sunrpc/svcauth_unix.c b/net/sunrpc/svcauth_unix.c
index b1efc34db6ed..983c5891cb56 100644
--- a/net/sunrpc/svcauth_unix.c
+++ b/net/sunrpc/svcauth_unix.c
@@ -729,23 +729,38 @@ out:
EXPORT_SYMBOL_GPL(svcauth_unix_set_client);
+/**
+ * svcauth_null_accept - Decode and validate incoming RPC_AUTH_NULL credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
static int
svcauth_null_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
+ u32 flavor, len;
+ void *body;
- if (argv->iov_len < 3*4)
+ /* Length of Call's credential body field: */
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
return SVC_GARBAGE;
-
- if (svc_getu32(argv) != 0) {
- dprintk("svc: bad null cred\n");
+ if (len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
- if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
- dprintk("svc: bad null verf\n");
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -757,9 +772,11 @@ svcauth_null_accept(struct svc_rqst *rqstp)
if (cred->cr_group_info == NULL)
return SVC_CLOSE; /* kmalloc failure - client must retry */
- /* Put NULL verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
- svc_putnl(resv, 0);
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_NULL;
return SVC_OK;
@@ -783,31 +800,45 @@ struct auth_ops svcauth_null = {
.name = "null",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_NULL,
- .accept = svcauth_null_accept,
+ .accept = svcauth_null_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
+/**
+ * svcauth_tls_accept - Decode and validate incoming RPC_AUTH_TLS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
static int
svcauth_tls_accept(struct svc_rqst *rqstp)
{
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
- struct kvec *argv = rqstp->rq_arg.head;
- struct kvec *resv = rqstp->rq_res.head;
+ u32 flavor, len;
+ void *body;
+ __be32 *p;
- if (argv->iov_len < XDR_UNIT * 3)
+ /* Length of Call's credential body field: */
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
return SVC_GARBAGE;
-
- /* Call's cred length */
- if (svc_getu32(argv) != xdr_zero) {
+ if (len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badcred;
return SVC_DENIED;
}
- /* Call's verifier flavor and its length */
- if (svc_getu32(argv) != rpc_auth_null ||
- svc_getu32(argv) != xdr_zero) {
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
@@ -818,21 +849,27 @@ svcauth_tls_accept(struct svc_rqst *rqstp)
return SVC_DENIED;
}
- /* Mapping to nobody uid/gid is required */
+ /* Signal that mapping to nobody uid/gid is required */
cred->cr_uid = INVALID_UID;
cred->cr_gid = INVALID_GID;
cred->cr_group_info = groups_alloc(0);
if (cred->cr_group_info == NULL)
- return SVC_CLOSE; /* kmalloc failure - client must retry */
+ return SVC_CLOSE;
- /* Reply's verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
if (rqstp->rq_xprt->xpt_ops->xpo_start_tls) {
- svc_putnl(resv, 8);
- memcpy(resv->iov_base + resv->iov_len, "STARTTLS", 8);
- resv->iov_len += 8;
- } else
- svc_putnl(resv, 0);
+ p = xdr_reserve_space(&rqstp->rq_res_stream, XDR_UNIT * 2 + 8);
+ if (!p)
+ return SVC_CLOSE;
+ *p++ = rpc_auth_null;
+ *p++ = cpu_to_be32(8);
+ memcpy(p, "STARTTLS", 8);
+ } else {
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ }
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_TLS;
return SVC_OK;
@@ -842,32 +879,48 @@ struct auth_ops svcauth_tls = {
.name = "tls",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_TLS,
- .accept = svcauth_tls_accept,
+ .accept = svcauth_tls_accept,
.release = svcauth_null_release,
.set_client = svcauth_unix_set_client,
};
+/**
+ * svcauth_unix_accept - Decode and validate incoming RPC_AUTH_SYS credential
+ * @rqstp: RPC transaction
+ *
+ * Return values:
+ * %SVC_OK: Both credential and verifier are valid
+ * %SVC_DENIED: Credential or verifier is not valid
+ * %SVC_GARBAGE: Failed to decode credential or verifier
+ * %SVC_CLOSE: Temporary failure
+ *
+ * rqstp->rq_auth_stat is set as mandated by RFC 5531.
+ */
static int
svcauth_unix_accept(struct svc_rqst *rqstp)
{
- struct kvec *argv = &rqstp->rq_arg.head[0];
- struct kvec *resv = &rqstp->rq_res.head[0];
+ struct xdr_stream *xdr = &rqstp->rq_arg_stream;
struct svc_cred *cred = &rqstp->rq_cred;
struct user_namespace *userns;
- u32 slen, i;
- int len = argv->iov_len;
+ u32 flavor, len, i;
+ void *body;
+ __be32 *p;
- if ((len -= 3*4) < 0)
+ /*
+ * This implementation ignores the length of the Call's
+ * credential body field and the timestamp and machinename
+ * fields.
+ */
+ p = xdr_inline_decode(xdr, XDR_UNIT * 3);
+ if (!p)
+ return SVC_GARBAGE;
+ len = be32_to_cpup(p + 2);
+ if (len > RPC_MAX_MACHINENAME)
+ return SVC_GARBAGE;
+ if (!xdr_inline_decode(xdr, len))
return SVC_GARBAGE;
- svc_getu32(argv); /* length */
- svc_getu32(argv); /* time stamp */
- slen = XDR_QUADLEN(svc_getnl(argv)); /* machname length */
- if (slen > 64 || (len -= (slen + 3)*4) < 0)
- goto badcred;
- argv->iov_base = (void*)((__be32*)argv->iov_base + slen); /* skip machname */
- argv->iov_len -= slen*4;
/*
* Note: we skip uid_valid()/gid_valid() checks here for
* backwards compatibility with clients that use -1 id's.
@@ -877,27 +930,42 @@ svcauth_unix_accept(struct svc_rqst *rqstp)
*/
userns = (rqstp->rq_xprt && rqstp->rq_xprt->xpt_cred) ?
rqstp->rq_xprt->xpt_cred->user_ns : &init_user_ns;
- cred->cr_uid = make_kuid(userns, svc_getnl(argv)); /* uid */
- cred->cr_gid = make_kgid(userns, svc_getnl(argv)); /* gid */
- slen = svc_getnl(argv); /* gids length */
- if (slen > UNX_NGROUPS || (len -= (slen + 2)*4) < 0)
+ if (xdr_stream_decode_u32(xdr, &i) < 0)
+ return SVC_GARBAGE;
+ cred->cr_uid = make_kuid(userns, i);
+ if (xdr_stream_decode_u32(xdr, &i) < 0)
+ return SVC_GARBAGE;
+ cred->cr_gid = make_kgid(userns, i);
+
+ if (xdr_stream_decode_u32(xdr, &len) < 0)
+ return SVC_GARBAGE;
+ if (len > UNX_NGROUPS)
goto badcred;
- cred->cr_group_info = groups_alloc(slen);
+ p = xdr_inline_decode(xdr, XDR_UNIT * len);
+ if (!p)
+ return SVC_GARBAGE;
+ cred->cr_group_info = groups_alloc(len);
if (cred->cr_group_info == NULL)
return SVC_CLOSE;
- for (i = 0; i < slen; i++) {
- kgid_t kgid = make_kgid(userns, svc_getnl(argv));
+ for (i = 0; i < len; i++) {
+ kgid_t kgid = make_kgid(userns, be32_to_cpup(p++));
cred->cr_group_info->gid[i] = kgid;
}
groups_sort(cred->cr_group_info);
- if (svc_getu32(argv) != htonl(RPC_AUTH_NULL) || svc_getu32(argv) != 0) {
+
+ /* Call's verf field: */
+ if (xdr_stream_decode_opaque_auth(xdr, &flavor, &body, &len) < 0)
+ return SVC_GARBAGE;
+ if (flavor != RPC_AUTH_NULL || len != 0) {
rqstp->rq_auth_stat = rpc_autherr_badverf;
return SVC_DENIED;
}
- /* Put NULL verifier */
- svc_putnl(resv, RPC_AUTH_NULL);
- svc_putnl(resv, 0);
+ if (xdr_stream_encode_opaque_auth(&rqstp->rq_res_stream,
+ RPC_AUTH_NULL, NULL, 0) < 0)
+ return SVC_CLOSE;
+ if (!svcxdr_set_accept_stat(rqstp))
+ return SVC_CLOSE;
rqstp->rq_cred.cr_flavor = RPC_AUTH_UNIX;
return SVC_OK;
@@ -927,7 +995,7 @@ struct auth_ops svcauth_unix = {
.name = "unix",
.owner = THIS_MODULE,
.flavour = RPC_AUTH_UNIX,
- .accept = svcauth_unix_accept,
+ .accept = svcauth_unix_accept,
.release = svcauth_unix_release,
.domain_release = svcauth_unix_domain_release,
.set_client = svcauth_unix_set_client,
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 015714398007..03a4f5615086 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -55,6 +55,7 @@
#include <linux/sunrpc/stats.h>
#include <linux/sunrpc/xprt.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
@@ -252,11 +253,8 @@ static ssize_t svc_tcp_read_msg(struct svc_rqst *rqstp, size_t buflen,
clear_bit(XPT_DATA, &svsk->sk_xprt.xpt_flags);
- for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE) {
- bvec[i].bv_page = rqstp->rq_pages[i];
- bvec[i].bv_len = PAGE_SIZE;
- bvec[i].bv_offset = 0;
- }
+ for (i = 0, t = 0; t < buflen; i++, t += PAGE_SIZE)
+ bvec_set_page(&bvec[i], rqstp->rq_pages[i], PAGE_SIZE, 0);
rqstp->rq_respages = &rqstp->rq_pages[i];
rqstp->rq_next_page = rqstp->rq_respages + 1;
@@ -298,9 +296,9 @@ static void svc_sock_setbufsize(struct svc_sock *svsk, unsigned int nreqs)
static void svc_sock_secure_port(struct svc_rqst *rqstp)
{
if (svc_port_is_privileged(svc_addr(rqstp)))
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
else
- __clear_bit(RQ_SECURE, &rqstp->rq_flags);
+ clear_bit(RQ_SECURE, &rqstp->rq_flags);
}
/*
@@ -310,6 +308,8 @@ static void svc_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+ trace_sk_data_ready(sk);
+
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
@@ -508,6 +508,7 @@ static int svc_udp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->netudpcnt++;
+ svc_sock_secure_port(rqstp);
svc_xprt_received(rqstp->rq_xprt);
return len;
@@ -636,7 +637,6 @@ static const struct svc_xprt_ops svc_udp_ops = {
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_udp_has_wspace,
.xpo_accept = svc_udp_accept,
- .xpo_secure_port = svc_sock_secure_port,
.xpo_kill_temp_xprt = svc_udp_kill_temp_xprt,
};
@@ -687,6 +687,8 @@ static void svc_tcp_listen_data_ready(struct sock *sk)
{
struct svc_sock *svsk = (struct svc_sock *)sk->sk_user_data;
+ trace_sk_data_ready(sk);
+
if (svsk) {
/* Refer to svc_setup_socket() for details. */
rmb();
@@ -1008,9 +1010,9 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = NULL;
rqstp->rq_prot = IPPROTO_TCP;
if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
- __set_bit(RQ_LOCAL, &rqstp->rq_flags);
+ set_bit(RQ_LOCAL, &rqstp->rq_flags);
else
- __clear_bit(RQ_LOCAL, &rqstp->rq_flags);
+ clear_bit(RQ_LOCAL, &rqstp->rq_flags);
p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
calldir = p[1];
@@ -1028,6 +1030,7 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
if (serv->sv_stats)
serv->sv_stats->nettcpcnt++;
+ svc_sock_secure_port(rqstp);
svc_xprt_received(rqstp->rq_xprt);
return rqstp->rq_arg.len;
@@ -1209,7 +1212,6 @@ static const struct svc_xprt_ops svc_tcp_ops = {
.xpo_free = svc_sock_free,
.xpo_has_wspace = svc_tcp_has_wspace,
.xpo_accept = svc_tcp_accept,
- .xpo_secure_port = svc_sock_secure_port,
.xpo_kill_temp_xprt = svc_tcp_kill_temp_xprt,
};
diff --git a/net/sunrpc/sysfs.c b/net/sunrpc/sysfs.c
index 1e05a2d723f4..0d0db4e1064e 100644
--- a/net/sunrpc/sysfs.c
+++ b/net/sunrpc/sysfs.c
@@ -36,7 +36,7 @@ rpc_sysfs_object_child_ns_type(const struct kobject *kobj)
return &net_ns_type_operations;
}
-static struct kobj_type rpc_sysfs_object_type = {
+static const struct kobj_type rpc_sysfs_object_type = {
.release = rpc_sysfs_object_release,
.sysfs_ops = &kobj_sysfs_ops,
.child_ns_type = rpc_sysfs_object_child_ns_type,
@@ -427,20 +427,20 @@ static struct attribute *rpc_sysfs_xprt_switch_attrs[] = {
};
ATTRIBUTE_GROUPS(rpc_sysfs_xprt_switch);
-static struct kobj_type rpc_sysfs_client_type = {
+static const struct kobj_type rpc_sysfs_client_type = {
.release = rpc_sysfs_client_release,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_client_namespace,
};
-static struct kobj_type rpc_sysfs_xprt_switch_type = {
+static const struct kobj_type rpc_sysfs_xprt_switch_type = {
.release = rpc_sysfs_xprt_switch_release,
.default_groups = rpc_sysfs_xprt_switch_groups,
.sysfs_ops = &kobj_sysfs_ops,
.namespace = rpc_sysfs_xprt_switch_namespace,
};
-static struct kobj_type rpc_sysfs_xprt_type = {
+static const struct kobj_type rpc_sysfs_xprt_type = {
.release = rpc_sysfs_xprt_release,
.default_groups = rpc_sysfs_xprt_groups,
.sysfs_ops = &kobj_sysfs_ops,
diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c
index f7767bf22406..36835b2f5446 100644
--- a/net/sunrpc/xdr.c
+++ b/net/sunrpc/xdr.c
@@ -150,9 +150,8 @@ xdr_alloc_bvec(struct xdr_buf *buf, gfp_t gfp)
if (!buf->bvec)
return -ENOMEM;
for (i = 0; i < n; i++) {
- buf->bvec[i].bv_page = buf->pages[i];
- buf->bvec[i].bv_len = PAGE_SIZE;
- buf->bvec[i].bv_offset = 0;
+ bvec_set_page(&buf->bvec[i], buf->pages[i], PAGE_SIZE,
+ 0);
}
}
return 0;
@@ -863,13 +862,6 @@ static unsigned int xdr_shrink_pagelen(struct xdr_buf *buf, unsigned int len)
return shift;
}
-void
-xdr_shift_buf(struct xdr_buf *buf, size_t len)
-{
- xdr_shrink_bufhead(buf, buf->head->iov_len - len);
-}
-EXPORT_SYMBOL_GPL(xdr_shift_buf);
-
/**
* xdr_stream_pos - Return the current offset from the start of the xdr_stream
* @xdr: pointer to struct xdr_stream
@@ -1193,6 +1185,21 @@ void xdr_truncate_encode(struct xdr_stream *xdr, size_t len)
EXPORT_SYMBOL(xdr_truncate_encode);
/**
+ * xdr_truncate_decode - Truncate a decoding stream
+ * @xdr: pointer to struct xdr_stream
+ * @len: Number of bytes to remove
+ *
+ */
+void xdr_truncate_decode(struct xdr_stream *xdr, size_t len)
+{
+ unsigned int nbytes = xdr_align_size(len);
+
+ xdr->buf->len -= nbytes;
+ xdr->nwords -= XDR_QUADLEN(nbytes);
+}
+EXPORT_SYMBOL_GPL(xdr_truncate_decode);
+
+/**
* xdr_restrict_buflen - decrease available buffer space
* @xdr: pointer to xdr_stream
* @newbuflen: new maximum number of bytes available
@@ -2274,3 +2281,60 @@ ssize_t xdr_stream_decode_string_dup(struct xdr_stream *xdr, char **str,
return ret;
}
EXPORT_SYMBOL_GPL(xdr_stream_decode_string_dup);
+
+/**
+ * xdr_stream_decode_opaque_auth - Decode struct opaque_auth (RFC5531 S8.2)
+ * @xdr: pointer to xdr_stream
+ * @flavor: location to store decoded flavor
+ * @body: location to store decode body
+ * @body_len: location to store length of decoded body
+ *
+ * Return values:
+ * On success, returns the number of buffer bytes consumed
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the decoded size of the body field exceeds 400 octets
+ */
+ssize_t xdr_stream_decode_opaque_auth(struct xdr_stream *xdr, u32 *flavor,
+ void **body, unsigned int *body_len)
+{
+ ssize_t ret, len;
+
+ len = xdr_stream_decode_u32(xdr, flavor);
+ if (unlikely(len < 0))
+ return len;
+ ret = xdr_stream_decode_opaque_inline(xdr, body, RPC_MAX_AUTH_SIZE);
+ if (unlikely(ret < 0))
+ return ret;
+ *body_len = ret;
+ return len + ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_decode_opaque_auth);
+
+/**
+ * xdr_stream_encode_opaque_auth - Encode struct opaque_auth (RFC5531 S8.2)
+ * @xdr: pointer to xdr_stream
+ * @flavor: verifier flavor to encode
+ * @body: content of body to encode
+ * @body_len: length of body to encode
+ *
+ * Return values:
+ * On success, returns length in bytes of XDR buffer consumed
+ * %-EBADMSG on XDR buffer overflow
+ * %-EMSGSIZE if the size of @body exceeds 400 octets
+ */
+ssize_t xdr_stream_encode_opaque_auth(struct xdr_stream *xdr, u32 flavor,
+ void *body, unsigned int body_len)
+{
+ ssize_t ret, len;
+
+ if (unlikely(body_len > RPC_MAX_AUTH_SIZE))
+ return -EMSGSIZE;
+ len = xdr_stream_encode_u32(xdr, flavor);
+ if (unlikely(len < 0))
+ return len;
+ ret = xdr_stream_encode_opaque(xdr, body, body_len);
+ if (unlikely(ret < 0))
+ return ret;
+ return len + ret;
+}
+EXPORT_SYMBOL_GPL(xdr_stream_encode_opaque_auth);
diff --git a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
index 5242ad121450..1c658fa43063 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
@@ -847,6 +847,7 @@ int svc_rdma_recvfrom(struct svc_rqst *rqstp)
rqstp->rq_xprt_ctxt = ctxt;
rqstp->rq_prot = IPPROTO_MAX;
svc_xprt_copy_addrs(rqstp, xprt);
+ set_bit(RQ_SECURE, &rqstp->rq_flags);
return rqstp->rq_arg.len;
out_err:
diff --git a/net/sunrpc/xprtrdma/svc_rdma_transport.c b/net/sunrpc/xprtrdma/svc_rdma_transport.c
index 199fa012f18a..416b298f74dd 100644
--- a/net/sunrpc/xprtrdma/svc_rdma_transport.c
+++ b/net/sunrpc/xprtrdma/svc_rdma_transport.c
@@ -73,7 +73,6 @@ static struct svc_xprt *svc_rdma_accept(struct svc_xprt *xprt);
static void svc_rdma_detach(struct svc_xprt *xprt);
static void svc_rdma_free(struct svc_xprt *xprt);
static int svc_rdma_has_wspace(struct svc_xprt *xprt);
-static void svc_rdma_secure_port(struct svc_rqst *);
static void svc_rdma_kill_temp_xprt(struct svc_xprt *);
static const struct svc_xprt_ops svc_rdma_ops = {
@@ -86,7 +85,6 @@ static const struct svc_xprt_ops svc_rdma_ops = {
.xpo_free = svc_rdma_free,
.xpo_has_wspace = svc_rdma_has_wspace,
.xpo_accept = svc_rdma_accept,
- .xpo_secure_port = svc_rdma_secure_port,
.xpo_kill_temp_xprt = svc_rdma_kill_temp_xprt,
};
@@ -600,11 +598,6 @@ static int svc_rdma_has_wspace(struct svc_xprt *xprt)
return 1;
}
-static void svc_rdma_secure_port(struct svc_rqst *rqstp)
-{
- __set_bit(RQ_SECURE, &rqstp->rq_flags);
-}
-
static void svc_rdma_kill_temp_xprt(struct svc_xprt *xprt)
{
}
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index aaa5b2741b79..adcbedc244d6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -52,6 +52,7 @@
#include <linux/uio.h>
#include <linux/sched/mm.h>
+#include <trace/events/sock.h>
#include <trace/events/sunrpc.h>
#include "socklib.h"
@@ -1378,6 +1379,8 @@ static void xs_data_ready(struct sock *sk)
{
struct rpc_xprt *xprt;
+ trace_sk_data_ready(sk);
+
xprt = xprt_from_sock(sk);
if (xprt != NULL) {
struct sock_xprt *transport = container_of(xprt,
diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index d67440de011e..577fa5af33ec 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -267,10 +267,10 @@ static int tipc_aead_encrypt(struct tipc_aead *aead, struct sk_buff *skb,
struct tipc_bearer *b,
struct tipc_media_addr *dst,
struct tipc_node *__dnode);
-static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err);
+static void tipc_aead_encrypt_done(void *data, int err);
static int tipc_aead_decrypt(struct net *net, struct tipc_aead *aead,
struct sk_buff *skb, struct tipc_bearer *b);
-static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err);
+static void tipc_aead_decrypt_done(void *data, int err);
static inline int tipc_ehdr_size(struct tipc_ehdr *ehdr);
static int tipc_ehdr_build(struct net *net, struct tipc_aead *aead,
u8 tx_key, struct sk_buff *skb,
@@ -830,9 +830,9 @@ exit:
return rc;
}
-static void tipc_aead_encrypt_done(struct crypto_async_request *base, int err)
+static void tipc_aead_encrypt_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct tipc_crypto_tx_ctx *tx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
struct tipc_bearer *b = tx_ctx->bearer;
struct tipc_aead *aead = tx_ctx->aead;
@@ -954,9 +954,9 @@ exit:
return rc;
}
-static void tipc_aead_decrypt_done(struct crypto_async_request *base, int err)
+static void tipc_aead_decrypt_done(void *data, int err)
{
- struct sk_buff *skb = base->data;
+ struct sk_buff *skb = data;
struct tipc_crypto_rx_ctx *rx_ctx = TIPC_SKB_CB(skb)->crypto_ctx;
struct tipc_bearer *b = rx_ctx->bearer;
struct tipc_aead *aead = rx_ctx->aead;
diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c
index dfea27a906f2..9b47c8409231 100644
--- a/net/tipc/netlink_compat.c
+++ b/net/tipc/netlink_compat.c
@@ -39,6 +39,7 @@
#include "node.h"
#include "net.h"
#include <net/genetlink.h>
+#include <linux/string_helpers.h>
#include <linux/tipc_config.h>
/* The legacy API had an artificial message length limit called
@@ -173,11 +174,6 @@ static struct sk_buff *tipc_get_err_tlv(char *str)
return buf;
}
-static inline bool string_is_valid(char *s, int len)
-{
- return memchr(s, '\0', len) ? true : false;
-}
-
static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd,
struct tipc_nl_compat_msg *msg,
struct sk_buff *arg)
@@ -445,7 +441,7 @@ static int tipc_nl_compat_bearer_enable(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_BEARER_NAME);
- if (!string_is_valid(b->name, len))
+ if (!string_is_terminated(b->name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, b->name))
@@ -486,7 +482,7 @@ static int tipc_nl_compat_bearer_disable(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_BEARER_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_BEARER_NAME, name))
@@ -584,7 +580,7 @@ static int tipc_nl_compat_link_stat_dump(struct tipc_nl_compat_msg *msg,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (strcmp(name, nla_data(link[TIPC_NLA_LINK_NAME])) != 0)
@@ -819,7 +815,7 @@ static int tipc_nl_compat_link_set(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(lc->name, len))
+ if (!string_is_terminated(lc->name, len))
return -EINVAL;
media = tipc_media_find(lc->name);
@@ -856,7 +852,7 @@ static int tipc_nl_compat_link_reset_stats(struct tipc_nl_compat_cmd_doit *cmd,
return -EINVAL;
len = min_t(int, len, TIPC_MAX_LINK_NAME);
- if (!string_is_valid(name, len))
+ if (!string_is_terminated(name, len))
return -EINVAL;
if (nla_put_string(skb, TIPC_NLA_LINK_NAME, name))
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 49ddc484c4fe..5e000fde8067 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1179,8 +1179,9 @@ void tipc_node_check_dest(struct net *net, u32 addr,
bool addr_match = false;
bool sign_match = false;
bool link_up = false;
+ bool link_is_reset = false;
bool accept_addr = false;
- bool reset = true;
+ bool reset = false;
char *if_name;
unsigned long intv;
u16 session;
@@ -1200,14 +1201,14 @@ void tipc_node_check_dest(struct net *net, u32 addr,
/* Prepare to validate requesting node's signature and media address */
l = le->link;
link_up = l && tipc_link_is_up(l);
+ link_is_reset = l && tipc_link_is_reset(l);
addr_match = l && !memcmp(&le->maddr, maddr, sizeof(*maddr));
sign_match = (signature == n->signature);
/* These three flags give us eight permutations: */
if (sign_match && addr_match && link_up) {
- /* All is fine. Do nothing. */
- reset = false;
+ /* All is fine. Ignore requests. */
/* Peer node is not a container/local namespace */
if (!n->peer_hash_mix)
n->peer_hash_mix = hash_mixes;
@@ -1232,6 +1233,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
*/
accept_addr = true;
*respond = true;
+ reset = true;
} else if (!sign_match && addr_match && link_up) {
/* Peer node rebooted. Two possibilities:
* - Delayed re-discovery; this link endpoint has already
@@ -1263,6 +1265,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
n->signature = signature;
accept_addr = true;
*respond = true;
+ reset = true;
}
if (!accept_addr)
@@ -1291,6 +1294,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
tipc_link_fsm_evt(l, LINK_RESET_EVT);
if (n->state == NODE_FAILINGOVER)
tipc_link_fsm_evt(l, LINK_FAILOVER_BEGIN_EVT);
+ link_is_reset = tipc_link_is_reset(l);
le->link = l;
n->link_cnt++;
tipc_node_calculate_timer(n, l);
@@ -1303,7 +1307,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
memcpy(&le->maddr, maddr, sizeof(*maddr));
exit:
tipc_node_write_unlock(n);
- if (reset && l && !tipc_link_is_reset(l))
+ if (reset && !link_is_reset)
tipc_node_link_down(n, b->identity, false);
tipc_node_put(n);
}
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index b35c8701876a..37edfe10f8c6 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -37,6 +37,7 @@
#include <linux/rhashtable.h>
#include <linux/sched/signal.h>
+#include <trace/events/sock.h>
#include "core.h"
#include "name_table.h"
@@ -2130,6 +2131,8 @@ static void tipc_data_ready(struct sock *sk)
{
struct socket_wq *wq;
+ trace_sk_data_ready(sk);
+
rcu_read_lock();
wq = rcu_dereference(sk->sk_wq);
if (skwq_has_sleeper(wq))
@@ -2614,6 +2617,7 @@ static int tipc_connect(struct socket *sock, struct sockaddr *dest,
/* Send a 'SYN-' to destination */
m.msg_name = dest;
m.msg_namelen = destlen;
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
/* If connect is in non-blocking case, set MSG_DONTWAIT to
* indicate send_msg() is never blocked.
@@ -2776,6 +2780,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
__skb_queue_head(&new_sk->sk_receive_queue, buf);
skb_set_owner_r(buf, new_sk);
}
+ iov_iter_kvec(&m.msg_iter, ITER_SOURCE, NULL, 0, 0);
__tipc_sendstream(new_sock, &m, 0);
release_sock(new_sk);
exit:
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 69c88cc03887..8ee0c07d00e9 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -43,6 +43,7 @@
#include "bearer.h"
#include <net/sock.h>
#include <linux/module.h>
+#include <trace/events/sock.h>
/* Number of messages to send before rescheduling */
#define MAX_SEND_MSG_COUNT 25
@@ -439,6 +440,8 @@ static void tipc_conn_data_ready(struct sock *sk)
{
struct tipc_conn *con;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
con = sk->sk_user_data;
if (connected(con)) {
@@ -496,6 +499,8 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
{
struct tipc_topsrv *srv;
+ trace_sk_data_ready(sk);
+
read_lock_bh(&sk->sk_callback_lock);
srv = sk->sk_user_data;
if (srv)
diff --git a/net/tls/tls.h b/net/tls/tls.h
index 0e840a0c3437..804c3880d028 100644
--- a/net/tls/tls.h
+++ b/net/tls/tls.h
@@ -70,6 +70,8 @@ struct tls_rec {
char content_type;
struct scatterlist sg_content_type;
+ struct sock *sk;
+
char aad_space[TLS_AAD_SPACE_SIZE];
u8 iv_data[MAX_IV_SIZE];
struct aead_request aead_req;
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 9ed978634125..782d3701b86f 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -38,11 +38,13 @@
#include <linux/bug.h>
#include <linux/sched/signal.h>
#include <linux/module.h>
+#include <linux/kernel.h>
#include <linux/splice.h>
#include <crypto/aead.h>
#include <net/strparser.h>
#include <net/tls.h>
+#include <trace/events/sock.h>
#include "tls.h"
@@ -57,6 +59,7 @@ struct tls_decrypt_arg {
};
struct tls_decrypt_ctx {
+ struct sock *sk;
u8 iv[MAX_IV_SIZE];
u8 aad[TLS_MAX_AAD_SIZE];
u8 tail;
@@ -177,18 +180,25 @@ static int tls_padding_length(struct tls_prot_info *prot, struct sk_buff *skb,
return sub;
}
-static void tls_decrypt_done(struct crypto_async_request *req, int err)
+static void tls_decrypt_done(void *data, int err)
{
- struct aead_request *aead_req = (struct aead_request *)req;
+ struct aead_request *aead_req = data;
+ struct crypto_aead *aead = crypto_aead_reqtfm(aead_req);
struct scatterlist *sgout = aead_req->dst;
struct scatterlist *sgin = aead_req->src;
struct tls_sw_context_rx *ctx;
+ struct tls_decrypt_ctx *dctx;
struct tls_context *tls_ctx;
struct scatterlist *sg;
unsigned int pages;
struct sock *sk;
+ int aead_size;
- sk = (struct sock *)req->data;
+ aead_size = sizeof(*aead_req) + crypto_aead_reqsize(aead);
+ aead_size = ALIGN(aead_size, __alignof__(*dctx));
+ dctx = (void *)((u8 *)aead_req + aead_size);
+
+ sk = dctx->sk;
tls_ctx = tls_get_ctx(sk);
ctx = tls_sw_ctx_rx(tls_ctx);
@@ -240,7 +250,7 @@ static int tls_do_decryption(struct sock *sk,
if (darg->async) {
aead_request_set_callback(aead_req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
- tls_decrypt_done, sk);
+ tls_decrypt_done, aead_req);
atomic_inc(&ctx->decrypt_pending);
} else {
aead_request_set_callback(aead_req,
@@ -336,6 +346,8 @@ static struct tls_rec *tls_get_rec(struct sock *sk)
sg_set_buf(&rec->sg_aead_out[0], rec->aad_space, prot->aad_size);
sg_unmark_end(&rec->sg_aead_out[1]);
+ rec->sk = sk;
+
return rec;
}
@@ -417,22 +429,25 @@ tx_err:
return rc;
}
-static void tls_encrypt_done(struct crypto_async_request *req, int err)
+static void tls_encrypt_done(void *data, int err)
{
- struct aead_request *aead_req = (struct aead_request *)req;
- struct sock *sk = req->data;
- struct tls_context *tls_ctx = tls_get_ctx(sk);
- struct tls_prot_info *prot = &tls_ctx->prot_info;
- struct tls_sw_context_tx *ctx = tls_sw_ctx_tx(tls_ctx);
+ struct tls_sw_context_tx *ctx;
+ struct tls_context *tls_ctx;
+ struct tls_prot_info *prot;
+ struct tls_rec *rec = data;
struct scatterlist *sge;
struct sk_msg *msg_en;
- struct tls_rec *rec;
bool ready = false;
+ struct sock *sk;
int pending;
- rec = container_of(aead_req, struct tls_rec, aead_req);
msg_en = &rec->msg_encrypted;
+ sk = rec->sk;
+ tls_ctx = tls_get_ctx(sk);
+ prot = &tls_ctx->prot_info;
+ ctx = tls_sw_ctx_tx(tls_ctx);
+
sge = sk_msg_elem(msg_en, msg_en->sg.curr);
sge->offset -= prot->prepend_size;
sge->length += prot->prepend_size;
@@ -520,7 +535,7 @@ static int tls_do_encryption(struct sock *sk,
data_len, rec->iv_data);
aead_request_set_callback(aead_req, CRYPTO_TFM_REQ_MAY_BACKLOG,
- tls_encrypt_done, sk);
+ tls_encrypt_done, rec);
/* Add the record in tx_list */
list_add_tail((struct list_head *)&rec->list, &ctx->tx_list);
@@ -1485,6 +1500,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
* Both structs are variable length.
*/
aead_size = sizeof(*aead_req) + crypto_aead_reqsize(ctx->aead_recv);
+ aead_size = ALIGN(aead_size, __alignof__(*dctx));
mem = kmalloc(aead_size + struct_size(dctx, sg, n_sgin + n_sgout),
sk->sk_allocation);
if (!mem) {
@@ -1495,6 +1511,7 @@ static int tls_decrypt_sg(struct sock *sk, struct iov_iter *out_iov,
/* Segment the allocated memory */
aead_req = (struct aead_request *)mem;
dctx = (struct tls_decrypt_ctx *)(mem + aead_size);
+ dctx->sk = sk;
sgin = &dctx->sg[0];
sgout = &dctx->sg[n_sgin];
@@ -2284,6 +2301,8 @@ static void tls_data_ready(struct sock *sk)
struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx);
struct sk_psock *psock;
+ trace_sk_data_ready(sk);
+
tls_strp_data_ready(&ctx->strp);
psock = sk_psock_get(sk);
@@ -2427,7 +2446,7 @@ static bool tls_is_tx_ready(struct tls_sw_context_tx *ctx)
{
struct tls_rec *rec;
- rec = list_first_entry(&ctx->tx_list, struct tls_rec, list);
+ rec = list_first_entry_or_null(&ctx->tx_list, struct tls_rec, list);
if (!rec)
return false;
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index f0c2293f1d3b..347122c3575e 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -112,6 +112,7 @@
#include <linux/mount.h>
#include <net/checksum.h>
#include <linux/security.h>
+#include <linux/splice.h>
#include <linux/freezer.h>
#include <linux/file.h>
#include <linux/btf_ids.h>
@@ -807,23 +808,23 @@ static int unix_count_nr_fds(struct sock *sk)
static void unix_show_fdinfo(struct seq_file *m, struct socket *sock)
{
struct sock *sk = sock->sk;
+ unsigned char s_state;
struct unix_sock *u;
- int nr_fds;
+ int nr_fds = 0;
if (sk) {
+ s_state = READ_ONCE(sk->sk_state);
u = unix_sk(sk);
- if (sock->type == SOCK_DGRAM) {
- nr_fds = atomic_read(&u->scm_stat.nr_fds);
- goto out_print;
- }
- unix_state_lock(sk);
- if (sk->sk_state != TCP_LISTEN)
+ /* SOCK_STREAM and SOCK_SEQPACKET sockets never change their
+ * sk_state after switching to TCP_ESTABLISHED or TCP_LISTEN.
+ * SOCK_DGRAM is ordinary. So, no lock is needed.
+ */
+ if (sock->type == SOCK_DGRAM || s_state == TCP_ESTABLISHED)
nr_fds = atomic_read(&u->scm_stat.nr_fds);
- else
+ else if (s_state == TCP_LISTEN)
nr_fds = unix_count_nr_fds(sk);
- unix_state_unlock(sk);
-out_print:
+
seq_printf(m, "scm_fds: %u\n", nr_fds);
}
}
@@ -1190,7 +1191,7 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
unsigned int new_hash, old_hash = sk->sk_hash;
struct unix_sock *u = unix_sk(sk);
struct net *net = sock_net(sk);
- struct user_namespace *ns; // barf...
+ struct mnt_idmap *idmap;
struct unix_address *addr;
struct dentry *dentry;
struct path parent;
@@ -1217,10 +1218,10 @@ static int unix_bind_bsd(struct sock *sk, struct sockaddr_un *sunaddr,
/*
* All right, let's create it.
*/
- ns = mnt_user_ns(parent.mnt);
+ idmap = mnt_idmap(parent.mnt);
err = security_path_mknod(&parent, dentry, mode, 0);
if (!err)
- err = vfs_mknod(ns, d_inode(parent.dentry), dentry, mode, 0);
+ err = vfs_mknod(idmap, d_inode(parent.dentry), dentry, mode, 0);
if (err)
goto out_path;
err = mutex_lock_interruptible(&u->bindlock);
@@ -1245,7 +1246,7 @@ out_unlock:
err = -EINVAL;
out_unlink:
/* failed after successful mknod? unlink what we'd created... */
- vfs_unlink(ns, d_inode(parent.dentry), dentry, NULL);
+ vfs_unlink(idmap, d_inode(parent.dentry), dentry, NULL);
out_path:
done_path_create(&parent, dentry);
out:
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index d593d5b6d4b1..19aea7cba26e 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -1861,8 +1861,9 @@ static int vsock_connectible_sendmsg(struct socket *sock, struct msghdr *msg,
written = transport->stream_enqueue(vsk,
msg, len - total_written);
}
+
if (written < 0) {
- err = -ENOMEM;
+ err = written;
goto out_err;
}
diff --git a/net/vmw_vsock/hyperv_transport.c b/net/vmw_vsock/hyperv_transport.c
index 59c3e2697069..7cb1a9d2cdb4 100644
--- a/net/vmw_vsock/hyperv_transport.c
+++ b/net/vmw_vsock/hyperv_transport.c
@@ -879,13 +879,11 @@ static int hvs_probe(struct hv_device *hdev,
return 0;
}
-static int hvs_remove(struct hv_device *hdev)
+static void hvs_remove(struct hv_device *hdev)
{
struct vmbus_channel *chan = hdev->channel;
vmbus_close(chan);
-
- return 0;
}
/* hv_sock connections can not persist across hibernation, and all the hv_sock
diff --git a/net/vmw_vsock/virtio_transport.c b/net/vmw_vsock/virtio_transport.c
index ad64f403536a..28b5a8e8e094 100644
--- a/net/vmw_vsock/virtio_transport.c
+++ b/net/vmw_vsock/virtio_transport.c
@@ -42,8 +42,7 @@ struct virtio_vsock {
bool tx_run;
struct work_struct send_pkt_work;
- spinlock_t send_pkt_list_lock;
- struct list_head send_pkt_list;
+ struct sk_buff_head send_pkt_queue;
atomic_t queued_replies;
@@ -101,41 +100,31 @@ virtio_transport_send_pkt_work(struct work_struct *work)
vq = vsock->vqs[VSOCK_VQ_TX];
for (;;) {
- struct virtio_vsock_pkt *pkt;
struct scatterlist hdr, buf, *sgs[2];
int ret, in_sg = 0, out_sg = 0;
+ struct sk_buff *skb;
bool reply;
- spin_lock_bh(&vsock->send_pkt_list_lock);
- if (list_empty(&vsock->send_pkt_list)) {
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ skb = virtio_vsock_skb_dequeue(&vsock->send_pkt_queue);
+ if (!skb)
break;
- }
-
- pkt = list_first_entry(&vsock->send_pkt_list,
- struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
- virtio_transport_deliver_tap_pkt(pkt);
+ virtio_transport_deliver_tap_pkt(skb);
+ reply = virtio_vsock_skb_reply(skb);
- reply = pkt->reply;
-
- sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
+ sg_init_one(&hdr, virtio_vsock_hdr(skb), sizeof(*virtio_vsock_hdr(skb)));
sgs[out_sg++] = &hdr;
- if (pkt->buf) {
- sg_init_one(&buf, pkt->buf, pkt->len);
+ if (skb->len > 0) {
+ sg_init_one(&buf, skb->data, skb->len);
sgs[out_sg++] = &buf;
}
- ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, pkt, GFP_KERNEL);
+ ret = virtqueue_add_sgs(vq, sgs, out_sg, in_sg, skb, GFP_KERNEL);
/* Usually this means that there is no more space available in
* the vq
*/
if (ret < 0) {
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_add(&pkt->list, &vsock->send_pkt_list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_vsock_skb_queue_head(&vsock->send_pkt_queue, skb);
break;
}
@@ -164,32 +153,32 @@ out:
}
static int
-virtio_transport_send_pkt(struct virtio_vsock_pkt *pkt)
+virtio_transport_send_pkt(struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr;
struct virtio_vsock *vsock;
- int len = pkt->len;
+ int len = skb->len;
+
+ hdr = virtio_vsock_hdr(skb);
rcu_read_lock();
vsock = rcu_dereference(the_virtio_vsock);
if (!vsock) {
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
len = -ENODEV;
goto out_rcu;
}
- if (le64_to_cpu(pkt->hdr.dst_cid) == vsock->guest_cid) {
- virtio_transport_free_pkt(pkt);
+ if (le64_to_cpu(hdr->dst_cid) == vsock->guest_cid) {
+ kfree_skb(skb);
len = -ENODEV;
goto out_rcu;
}
- if (pkt->reply)
+ if (virtio_vsock_skb_reply(skb))
atomic_inc(&vsock->queued_replies);
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_add_tail(&pkt->list, &vsock->send_pkt_list);
- spin_unlock_bh(&vsock->send_pkt_list_lock);
-
+ virtio_vsock_skb_queue_tail(&vsock->send_pkt_queue, skb);
queue_work(virtio_vsock_workqueue, &vsock->send_pkt_work);
out_rcu:
@@ -201,9 +190,7 @@ static int
virtio_transport_cancel_pkt(struct vsock_sock *vsk)
{
struct virtio_vsock *vsock;
- struct virtio_vsock_pkt *pkt, *n;
int cnt = 0, ret;
- LIST_HEAD(freeme);
rcu_read_lock();
vsock = rcu_dereference(the_virtio_vsock);
@@ -212,20 +199,7 @@ virtio_transport_cancel_pkt(struct vsock_sock *vsk)
goto out_rcu;
}
- spin_lock_bh(&vsock->send_pkt_list_lock);
- list_for_each_entry_safe(pkt, n, &vsock->send_pkt_list, list) {
- if (pkt->vsk != vsk)
- continue;
- list_move(&pkt->list, &freeme);
- }
- spin_unlock_bh(&vsock->send_pkt_list_lock);
-
- list_for_each_entry_safe(pkt, n, &freeme, list) {
- if (pkt->reply)
- cnt++;
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ cnt = virtio_transport_purge_skbs(vsk, &vsock->send_pkt_queue);
if (cnt) {
struct virtqueue *rx_vq = vsock->vqs[VSOCK_VQ_RX];
@@ -246,38 +220,28 @@ out_rcu:
static void virtio_vsock_rx_fill(struct virtio_vsock *vsock)
{
- int buf_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE;
- struct virtio_vsock_pkt *pkt;
- struct scatterlist hdr, buf, *sgs[2];
+ int total_len = VIRTIO_VSOCK_DEFAULT_RX_BUF_SIZE + VIRTIO_VSOCK_SKB_HEADROOM;
+ struct scatterlist pkt, *p;
struct virtqueue *vq;
+ struct sk_buff *skb;
int ret;
vq = vsock->vqs[VSOCK_VQ_RX];
do {
- pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
+ skb = virtio_vsock_alloc_skb(total_len, GFP_KERNEL);
+ if (!skb)
break;
- pkt->buf = kmalloc(buf_len, GFP_KERNEL);
- if (!pkt->buf) {
- virtio_transport_free_pkt(pkt);
+ memset(skb->head, 0, VIRTIO_VSOCK_SKB_HEADROOM);
+ sg_init_one(&pkt, virtio_vsock_hdr(skb), total_len);
+ p = &pkt;
+ ret = virtqueue_add_sgs(vq, &p, 0, 1, skb, GFP_KERNEL);
+ if (ret < 0) {
+ kfree_skb(skb);
break;
}
- pkt->buf_len = buf_len;
- pkt->len = buf_len;
-
- sg_init_one(&hdr, &pkt->hdr, sizeof(pkt->hdr));
- sgs[0] = &hdr;
-
- sg_init_one(&buf, pkt->buf, buf_len);
- sgs[1] = &buf;
- ret = virtqueue_add_sgs(vq, sgs, 0, 2, pkt, GFP_KERNEL);
- if (ret) {
- virtio_transport_free_pkt(pkt);
- break;
- }
vsock->rx_buf_nr++;
} while (vq->num_free);
if (vsock->rx_buf_nr > vsock->rx_buf_max_nr)
@@ -299,12 +263,12 @@ static void virtio_transport_tx_work(struct work_struct *work)
goto out;
do {
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
unsigned int len;
virtqueue_disable_cb(vq);
- while ((pkt = virtqueue_get_buf(vq, &len)) != NULL) {
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_get_buf(vq, &len)) != NULL) {
+ consume_skb(skb);
added = true;
}
} while (!virtqueue_enable_cb(vq));
@@ -529,7 +493,7 @@ static void virtio_transport_rx_work(struct work_struct *work)
do {
virtqueue_disable_cb(vq);
for (;;) {
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
unsigned int len;
if (!virtio_transport_more_replies(vsock)) {
@@ -540,23 +504,22 @@ static void virtio_transport_rx_work(struct work_struct *work)
goto out;
}
- pkt = virtqueue_get_buf(vq, &len);
- if (!pkt) {
+ skb = virtqueue_get_buf(vq, &len);
+ if (!skb)
break;
- }
vsock->rx_buf_nr--;
/* Drop short/long packets */
- if (unlikely(len < sizeof(pkt->hdr) ||
- len > sizeof(pkt->hdr) + pkt->len)) {
- virtio_transport_free_pkt(pkt);
+ if (unlikely(len < sizeof(struct virtio_vsock_hdr) ||
+ len > virtio_vsock_skb_len(skb))) {
+ kfree_skb(skb);
continue;
}
- pkt->len = len - sizeof(pkt->hdr);
- virtio_transport_deliver_tap_pkt(pkt);
- virtio_transport_recv_pkt(&virtio_transport, pkt);
+ virtio_vsock_skb_rx_put(skb);
+ virtio_transport_deliver_tap_pkt(skb);
+ virtio_transport_recv_pkt(&virtio_transport, skb);
}
} while (!virtqueue_enable_cb(vq));
@@ -610,7 +573,7 @@ static int virtio_vsock_vqs_init(struct virtio_vsock *vsock)
static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
{
struct virtio_device *vdev = vsock->vdev;
- struct virtio_vsock_pkt *pkt;
+ struct sk_buff *skb;
/* Reset all connected sockets when the VQs disappear */
vsock_for_each_connected_socket(&virtio_transport.transport,
@@ -637,23 +600,16 @@ static void virtio_vsock_vqs_del(struct virtio_vsock *vsock)
virtio_reset_device(vdev);
mutex_lock(&vsock->rx_lock);
- while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_RX])))
+ kfree_skb(skb);
mutex_unlock(&vsock->rx_lock);
mutex_lock(&vsock->tx_lock);
- while ((pkt = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
- virtio_transport_free_pkt(pkt);
+ while ((skb = virtqueue_detach_unused_buf(vsock->vqs[VSOCK_VQ_TX])))
+ kfree_skb(skb);
mutex_unlock(&vsock->tx_lock);
- spin_lock_bh(&vsock->send_pkt_list_lock);
- while (!list_empty(&vsock->send_pkt_list)) {
- pkt = list_first_entry(&vsock->send_pkt_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
- spin_unlock_bh(&vsock->send_pkt_list_lock);
+ virtio_vsock_skb_queue_purge(&vsock->send_pkt_queue);
/* Delete virtqueues and flush outstanding callbacks if any */
vdev->config->del_vqs(vdev);
@@ -690,8 +646,7 @@ static int virtio_vsock_probe(struct virtio_device *vdev)
mutex_init(&vsock->tx_lock);
mutex_init(&vsock->rx_lock);
mutex_init(&vsock->event_lock);
- spin_lock_init(&vsock->send_pkt_list_lock);
- INIT_LIST_HEAD(&vsock->send_pkt_list);
+ skb_queue_head_init(&vsock->send_pkt_queue);
INIT_WORK(&vsock->rx_work, virtio_transport_rx_work);
INIT_WORK(&vsock->tx_work, virtio_transport_tx_work);
INIT_WORK(&vsock->event_work, virtio_transport_event_work);
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index a9980e9b9304..a1581c77cf84 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -37,53 +37,56 @@ virtio_transport_get_ops(struct vsock_sock *vsk)
return container_of(t, struct virtio_transport, transport);
}
-static struct virtio_vsock_pkt *
-virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
+/* Returns a new packet on success, otherwise returns NULL.
+ *
+ * If NULL is returned, errp is set to a negative errno.
+ */
+static struct sk_buff *
+virtio_transport_alloc_skb(struct virtio_vsock_pkt_info *info,
size_t len,
u32 src_cid,
u32 src_port,
u32 dst_cid,
u32 dst_port)
{
- struct virtio_vsock_pkt *pkt;
+ const size_t skb_len = VIRTIO_VSOCK_SKB_HEADROOM + len;
+ struct virtio_vsock_hdr *hdr;
+ struct sk_buff *skb;
+ void *payload;
int err;
- pkt = kzalloc(sizeof(*pkt), GFP_KERNEL);
- if (!pkt)
+ skb = virtio_vsock_alloc_skb(skb_len, GFP_KERNEL);
+ if (!skb)
return NULL;
- pkt->hdr.type = cpu_to_le16(info->type);
- pkt->hdr.op = cpu_to_le16(info->op);
- pkt->hdr.src_cid = cpu_to_le64(src_cid);
- pkt->hdr.dst_cid = cpu_to_le64(dst_cid);
- pkt->hdr.src_port = cpu_to_le32(src_port);
- pkt->hdr.dst_port = cpu_to_le32(dst_port);
- pkt->hdr.flags = cpu_to_le32(info->flags);
- pkt->len = len;
- pkt->hdr.len = cpu_to_le32(len);
- pkt->reply = info->reply;
- pkt->vsk = info->vsk;
+ hdr = virtio_vsock_hdr(skb);
+ hdr->type = cpu_to_le16(info->type);
+ hdr->op = cpu_to_le16(info->op);
+ hdr->src_cid = cpu_to_le64(src_cid);
+ hdr->dst_cid = cpu_to_le64(dst_cid);
+ hdr->src_port = cpu_to_le32(src_port);
+ hdr->dst_port = cpu_to_le32(dst_port);
+ hdr->flags = cpu_to_le32(info->flags);
+ hdr->len = cpu_to_le32(len);
if (info->msg && len > 0) {
- pkt->buf = kmalloc(len, GFP_KERNEL);
- if (!pkt->buf)
- goto out_pkt;
-
- pkt->buf_len = len;
-
- err = memcpy_from_msg(pkt->buf, info->msg, len);
+ payload = skb_put(skb, len);
+ err = memcpy_from_msg(payload, info->msg, len);
if (err)
goto out;
if (msg_data_left(info->msg) == 0 &&
info->type == VIRTIO_VSOCK_TYPE_SEQPACKET) {
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOM);
if (info->msg->msg_flags & MSG_EOR)
- pkt->hdr.flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
+ hdr->flags |= cpu_to_le32(VIRTIO_VSOCK_SEQ_EOR);
}
}
+ if (info->reply)
+ virtio_vsock_skb_set_reply(skb);
+
trace_virtio_transport_alloc_pkt(src_cid, src_port,
dst_cid, dst_port,
len,
@@ -91,19 +94,18 @@ virtio_transport_alloc_pkt(struct virtio_vsock_pkt_info *info,
info->op,
info->flags);
- return pkt;
+ return skb;
out:
- kfree(pkt->buf);
-out_pkt:
- kfree(pkt);
+ kfree_skb(skb);
return NULL;
}
/* Packet capture */
static struct sk_buff *virtio_transport_build_skb(void *opaque)
{
- struct virtio_vsock_pkt *pkt = opaque;
+ struct virtio_vsock_hdr *pkt_hdr;
+ struct sk_buff *pkt = opaque;
struct af_vsockmon_hdr *hdr;
struct sk_buff *skb;
size_t payload_len;
@@ -113,10 +115,11 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
* the payload length from the header and the buffer pointer taking
* care of the offset in the original packet.
*/
- payload_len = le32_to_cpu(pkt->hdr.len);
- payload_buf = pkt->buf + pkt->off;
+ pkt_hdr = virtio_vsock_hdr(pkt);
+ payload_len = pkt->len;
+ payload_buf = pkt->data;
- skb = alloc_skb(sizeof(*hdr) + sizeof(pkt->hdr) + payload_len,
+ skb = alloc_skb(sizeof(*hdr) + sizeof(*pkt_hdr) + payload_len,
GFP_ATOMIC);
if (!skb)
return NULL;
@@ -124,16 +127,16 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
hdr = skb_put(skb, sizeof(*hdr));
/* pkt->hdr is little-endian so no need to byteswap here */
- hdr->src_cid = pkt->hdr.src_cid;
- hdr->src_port = pkt->hdr.src_port;
- hdr->dst_cid = pkt->hdr.dst_cid;
- hdr->dst_port = pkt->hdr.dst_port;
+ hdr->src_cid = pkt_hdr->src_cid;
+ hdr->src_port = pkt_hdr->src_port;
+ hdr->dst_cid = pkt_hdr->dst_cid;
+ hdr->dst_port = pkt_hdr->dst_port;
hdr->transport = cpu_to_le16(AF_VSOCK_TRANSPORT_VIRTIO);
- hdr->len = cpu_to_le16(sizeof(pkt->hdr));
+ hdr->len = cpu_to_le16(sizeof(*pkt_hdr));
memset(hdr->reserved, 0, sizeof(hdr->reserved));
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(pkt_hdr->op)) {
case VIRTIO_VSOCK_OP_REQUEST:
case VIRTIO_VSOCK_OP_RESPONSE:
hdr->op = cpu_to_le16(AF_VSOCK_OP_CONNECT);
@@ -154,7 +157,7 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
break;
}
- skb_put_data(skb, &pkt->hdr, sizeof(pkt->hdr));
+ skb_put_data(skb, pkt_hdr, sizeof(*pkt_hdr));
if (payload_len) {
skb_put_data(skb, payload_buf, payload_len);
@@ -163,13 +166,13 @@ static struct sk_buff *virtio_transport_build_skb(void *opaque)
return skb;
}
-void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt)
+void virtio_transport_deliver_tap_pkt(struct sk_buff *skb)
{
- if (pkt->tap_delivered)
+ if (virtio_vsock_skb_tap_delivered(skb))
return;
- vsock_deliver_tap(virtio_transport_build_skb, pkt);
- pkt->tap_delivered = true;
+ vsock_deliver_tap(virtio_transport_build_skb, skb);
+ virtio_vsock_skb_set_tap_delivered(skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_deliver_tap_pkt);
@@ -192,8 +195,8 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
u32 src_cid, src_port, dst_cid, dst_port;
const struct virtio_transport *t_ops;
struct virtio_vsock_sock *vvs;
- struct virtio_vsock_pkt *pkt;
u32 pkt_len = info->pkt_len;
+ struct sk_buff *skb;
info->type = virtio_transport_get_type(sk_vsock(vsk));
@@ -224,42 +227,47 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (pkt_len == 0 && info->op == VIRTIO_VSOCK_OP_RW)
return pkt_len;
- pkt = virtio_transport_alloc_pkt(info, pkt_len,
+ skb = virtio_transport_alloc_skb(info, pkt_len,
src_cid, src_port,
dst_cid, dst_port);
- if (!pkt) {
+ if (!skb) {
virtio_transport_put_credit(vvs, pkt_len);
return -ENOMEM;
}
- virtio_transport_inc_tx_pkt(vvs, pkt);
+ virtio_transport_inc_tx_pkt(vvs, skb);
- return t_ops->send_pkt(pkt);
+ return t_ops->send_pkt(skb);
}
static bool virtio_transport_inc_rx_pkt(struct virtio_vsock_sock *vvs,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- if (vvs->rx_bytes + pkt->len > vvs->buf_alloc)
+ if (vvs->rx_bytes + skb->len > vvs->buf_alloc)
return false;
- vvs->rx_bytes += pkt->len;
+ vvs->rx_bytes += skb->len;
return true;
}
static void virtio_transport_dec_rx_pkt(struct virtio_vsock_sock *vvs,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- vvs->rx_bytes -= pkt->len;
- vvs->fwd_cnt += pkt->len;
+ int len;
+
+ len = skb_headroom(skb) - sizeof(struct virtio_vsock_hdr) - skb->len;
+ vvs->rx_bytes -= len;
+ vvs->fwd_cnt += len;
}
-void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt)
+void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
+
spin_lock_bh(&vvs->rx_lock);
vvs->last_fwd_cnt = vvs->fwd_cnt;
- pkt->hdr.fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
- pkt->hdr.buf_alloc = cpu_to_le32(vvs->buf_alloc);
+ hdr->fwd_cnt = cpu_to_le32(vvs->fwd_cnt);
+ hdr->buf_alloc = cpu_to_le32(vvs->buf_alloc);
spin_unlock_bh(&vvs->rx_lock);
}
EXPORT_SYMBOL_GPL(virtio_transport_inc_tx_pkt);
@@ -303,29 +311,29 @@ virtio_transport_stream_do_peek(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0, off;
+ struct sk_buff *skb, *tmp;
int err = -EFAULT;
spin_lock_bh(&vvs->rx_lock);
- list_for_each_entry(pkt, &vvs->rx_queue, list) {
- off = pkt->off;
+ skb_queue_walk_safe(&vvs->rx_queue, skb, tmp) {
+ off = 0;
if (total == len)
break;
- while (total < len && off < pkt->len) {
+ while (total < len && off < skb->len) {
bytes = len - total;
- if (bytes > pkt->len - off)
- bytes = pkt->len - off;
+ if (bytes > skb->len - off)
+ bytes = skb->len - off;
/* sk_lock is held by caller so no one else can dequeue.
* Unlock rx_lock since memcpy_to_msg() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf + off, bytes);
+ err = memcpy_to_msg(msg, skb->data + off, bytes);
if (err)
goto out;
@@ -352,37 +360,38 @@ virtio_transport_stream_do_dequeue(struct vsock_sock *vsk,
size_t len)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
size_t bytes, total = 0;
- u32 free_space;
+ struct sk_buff *skb;
int err = -EFAULT;
+ u32 free_space;
spin_lock_bh(&vvs->rx_lock);
- while (total < len && !list_empty(&vvs->rx_queue)) {
- pkt = list_first_entry(&vvs->rx_queue,
- struct virtio_vsock_pkt, list);
+ while (total < len && !skb_queue_empty(&vvs->rx_queue)) {
+ skb = __skb_dequeue(&vvs->rx_queue);
bytes = len - total;
- if (bytes > pkt->len - pkt->off)
- bytes = pkt->len - pkt->off;
+ if (bytes > skb->len)
+ bytes = skb->len;
/* sk_lock is held by caller so no one else can dequeue.
* Unlock rx_lock since memcpy_to_msg() may sleep.
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf + pkt->off, bytes);
+ err = memcpy_to_msg(msg, skb->data, bytes);
if (err)
goto out;
spin_lock_bh(&vvs->rx_lock);
total += bytes;
- pkt->off += bytes;
- if (pkt->off == pkt->len) {
- virtio_transport_dec_rx_pkt(vvs, pkt);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
+ skb_pull(skb, bytes);
+
+ if (skb->len == 0) {
+ virtio_transport_dec_rx_pkt(vvs, skb);
+ consume_skb(skb);
+ } else {
+ __skb_queue_head(&vvs->rx_queue, skb);
}
}
@@ -414,10 +423,10 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
int flags)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt;
int dequeued_len = 0;
size_t user_buf_len = msg_data_left(msg);
bool msg_ready = false;
+ struct sk_buff *skb;
spin_lock_bh(&vvs->rx_lock);
@@ -427,13 +436,18 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
}
while (!msg_ready) {
- pkt = list_first_entry(&vvs->rx_queue, struct virtio_vsock_pkt, list);
+ struct virtio_vsock_hdr *hdr;
+
+ skb = __skb_dequeue(&vvs->rx_queue);
+ if (!skb)
+ break;
+ hdr = virtio_vsock_hdr(skb);
if (dequeued_len >= 0) {
size_t pkt_len;
size_t bytes_to_copy;
- pkt_len = (size_t)le32_to_cpu(pkt->hdr.len);
+ pkt_len = (size_t)le32_to_cpu(hdr->len);
bytes_to_copy = min(user_buf_len, pkt_len);
if (bytes_to_copy) {
@@ -444,7 +458,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
*/
spin_unlock_bh(&vvs->rx_lock);
- err = memcpy_to_msg(msg, pkt->buf, bytes_to_copy);
+ err = memcpy_to_msg(msg, skb->data, bytes_to_copy);
if (err) {
/* Copy of message failed. Rest of
* fragments will be freed without copy.
@@ -452,6 +466,7 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
dequeued_len = err;
} else {
user_buf_len -= bytes_to_copy;
+ skb_pull(skb, bytes_to_copy);
}
spin_lock_bh(&vvs->rx_lock);
@@ -461,17 +476,16 @@ static int virtio_transport_seqpacket_do_dequeue(struct vsock_sock *vsk,
dequeued_len += pkt_len;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM) {
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM) {
msg_ready = true;
vvs->msg_count--;
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOR)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOR)
msg->msg_flags |= MSG_EOR;
}
- virtio_transport_dec_rx_pkt(vvs, pkt);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_dec_rx_pkt(vvs, skb);
+ kfree_skb(skb);
}
spin_unlock_bh(&vvs->rx_lock);
@@ -609,7 +623,7 @@ int virtio_transport_do_socket_init(struct vsock_sock *vsk,
spin_lock_init(&vvs->rx_lock);
spin_lock_init(&vvs->tx_lock);
- INIT_LIST_HEAD(&vvs->rx_queue);
+ skb_queue_head_init(&vvs->rx_queue);
return 0;
}
@@ -806,16 +820,16 @@ void virtio_transport_destruct(struct vsock_sock *vsk)
EXPORT_SYMBOL_GPL(virtio_transport_destruct);
static int virtio_transport_reset(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
- .reply = !!pkt,
+ .reply = !!skb,
.vsk = vsk,
};
/* Send RST only if the original pkt is not a RST pkt */
- if (pkt && le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (skb && le16_to_cpu(virtio_vsock_hdr(skb)->op) == VIRTIO_VSOCK_OP_RST)
return 0;
return virtio_transport_send_pkt_info(vsk, &info);
@@ -825,29 +839,30 @@ static int virtio_transport_reset(struct vsock_sock *vsk,
* attempt was made to connect to a socket that does not exist.
*/
static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
- struct virtio_vsock_pkt *reply;
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RST,
- .type = le16_to_cpu(pkt->hdr.type),
+ .type = le16_to_cpu(hdr->type),
.reply = true,
};
+ struct sk_buff *reply;
/* Send RST only if the original pkt is not a RST pkt */
- if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
return 0;
- reply = virtio_transport_alloc_pkt(&info, 0,
- le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port),
- le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
+ reply = virtio_transport_alloc_skb(&info, 0,
+ le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port),
+ le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
if (!reply)
return -ENOMEM;
if (!t) {
- virtio_transport_free_pkt(reply);
+ kfree_skb(reply);
return -ENOTCONN;
}
@@ -858,16 +873,11 @@ static int virtio_transport_reset_no_sock(const struct virtio_transport *t,
static void virtio_transport_remove_sock(struct vsock_sock *vsk)
{
struct virtio_vsock_sock *vvs = vsk->trans;
- struct virtio_vsock_pkt *pkt, *tmp;
/* We don't need to take rx_lock, as the socket is closing and we are
* removing it.
*/
- list_for_each_entry_safe(pkt, tmp, &vvs->rx_queue, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
-
+ __skb_queue_purge(&vvs->rx_queue);
vsock_remove_sock(vsk);
}
@@ -981,13 +991,14 @@ EXPORT_SYMBOL_GPL(virtio_transport_release);
static int
virtio_transport_recv_connecting(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
- int err;
int skerr;
+ int err;
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RESPONSE:
sk->sk_state = TCP_ESTABLISHED;
sk->sk_socket->state = SS_CONNECTED;
@@ -1008,7 +1019,7 @@ virtio_transport_recv_connecting(struct sock *sk,
return 0;
destroy:
- virtio_transport_reset(vsk, pkt);
+ virtio_transport_reset(vsk, skb);
sk->sk_state = TCP_CLOSE;
sk->sk_err = skerr;
sk_error_report(sk);
@@ -1017,34 +1028,37 @@ destroy:
static void
virtio_transport_recv_enqueue(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
struct virtio_vsock_sock *vvs = vsk->trans;
bool can_enqueue, free_pkt = false;
+ struct virtio_vsock_hdr *hdr;
+ u32 len;
- pkt->len = le32_to_cpu(pkt->hdr.len);
- pkt->off = 0;
+ hdr = virtio_vsock_hdr(skb);
+ len = le32_to_cpu(hdr->len);
spin_lock_bh(&vvs->rx_lock);
- can_enqueue = virtio_transport_inc_rx_pkt(vvs, pkt);
+ can_enqueue = virtio_transport_inc_rx_pkt(vvs, skb);
if (!can_enqueue) {
free_pkt = true;
goto out;
}
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)
vvs->msg_count++;
/* Try to copy small packets into the buffer of last packet queued,
* to avoid wasting memory queueing the entire buffer with a small
* payload.
*/
- if (pkt->len <= GOOD_COPY_LEN && !list_empty(&vvs->rx_queue)) {
- struct virtio_vsock_pkt *last_pkt;
+ if (len <= GOOD_COPY_LEN && !skb_queue_empty(&vvs->rx_queue)) {
+ struct virtio_vsock_hdr *last_hdr;
+ struct sk_buff *last_skb;
- last_pkt = list_last_entry(&vvs->rx_queue,
- struct virtio_vsock_pkt, list);
+ last_skb = skb_peek_tail(&vvs->rx_queue);
+ last_hdr = virtio_vsock_hdr(last_skb);
/* If there is space in the last packet queued, we copy the
* new packet in its buffer. We avoid this if the last packet
@@ -1052,35 +1066,35 @@ virtio_transport_recv_enqueue(struct vsock_sock *vsk,
* delimiter of SEQPACKET message, so 'pkt' is the first packet
* of a new message.
*/
- if ((pkt->len <= last_pkt->buf_len - last_pkt->len) &&
- !(le32_to_cpu(last_pkt->hdr.flags) & VIRTIO_VSOCK_SEQ_EOM)) {
- memcpy(last_pkt->buf + last_pkt->len, pkt->buf,
- pkt->len);
- last_pkt->len += pkt->len;
+ if (skb->len < skb_tailroom(last_skb) &&
+ !(le32_to_cpu(last_hdr->flags) & VIRTIO_VSOCK_SEQ_EOM)) {
+ memcpy(skb_put(last_skb, skb->len), skb->data, skb->len);
free_pkt = true;
- last_pkt->hdr.flags |= pkt->hdr.flags;
+ last_hdr->flags |= hdr->flags;
+ last_hdr->len = cpu_to_le32(last_skb->len);
goto out;
}
}
- list_add_tail(&pkt->list, &vvs->rx_queue);
+ __skb_queue_tail(&vvs->rx_queue, skb);
out:
spin_unlock_bh(&vvs->rx_lock);
if (free_pkt)
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
}
static int
virtio_transport_recv_connected(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
int err = 0;
- switch (le16_to_cpu(pkt->hdr.op)) {
+ switch (le16_to_cpu(hdr->op)) {
case VIRTIO_VSOCK_OP_RW:
- virtio_transport_recv_enqueue(vsk, pkt);
+ virtio_transport_recv_enqueue(vsk, skb);
vsock_data_ready(sk);
return err;
case VIRTIO_VSOCK_OP_CREDIT_REQUEST:
@@ -1090,18 +1104,17 @@ virtio_transport_recv_connected(struct sock *sk,
sk->sk_write_space(sk);
break;
case VIRTIO_VSOCK_OP_SHUTDOWN:
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_RCV)
vsk->peer_shutdown |= RCV_SHUTDOWN;
- if (le32_to_cpu(pkt->hdr.flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
+ if (le32_to_cpu(hdr->flags) & VIRTIO_VSOCK_SHUTDOWN_SEND)
vsk->peer_shutdown |= SEND_SHUTDOWN;
if (vsk->peer_shutdown == SHUTDOWN_MASK &&
vsock_stream_has_data(vsk) <= 0 &&
!sock_flag(sk, SOCK_DONE)) {
(void)virtio_transport_reset(vsk, NULL);
-
virtio_transport_do_close(vsk, true);
}
- if (le32_to_cpu(pkt->hdr.flags))
+ if (le32_to_cpu(virtio_vsock_hdr(skb)->flags))
sk->sk_state_change(sk);
break;
case VIRTIO_VSOCK_OP_RST:
@@ -1112,28 +1125,30 @@ virtio_transport_recv_connected(struct sock *sk,
break;
}
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
return err;
}
static void
virtio_transport_recv_disconnecting(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
- if (le16_to_cpu(pkt->hdr.op) == VIRTIO_VSOCK_OP_RST)
+ if (le16_to_cpu(hdr->op) == VIRTIO_VSOCK_OP_RST)
virtio_transport_do_close(vsk, true);
}
static int
virtio_transport_send_response(struct vsock_sock *vsk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct virtio_vsock_pkt_info info = {
.op = VIRTIO_VSOCK_OP_RESPONSE,
- .remote_cid = le64_to_cpu(pkt->hdr.src_cid),
- .remote_port = le32_to_cpu(pkt->hdr.src_port),
+ .remote_cid = le64_to_cpu(hdr->src_cid),
+ .remote_port = le32_to_cpu(hdr->src_port),
.reply = true,
.vsk = vsk,
};
@@ -1142,8 +1157,9 @@ virtio_transport_send_response(struct vsock_sock *vsk,
}
static bool virtio_transport_space_update(struct sock *sk,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
struct virtio_vsock_sock *vvs = vsk->trans;
bool space_available;
@@ -1158,8 +1174,8 @@ static bool virtio_transport_space_update(struct sock *sk,
/* buf_alloc and fwd_cnt is always included in the hdr */
spin_lock_bh(&vvs->tx_lock);
- vvs->peer_buf_alloc = le32_to_cpu(pkt->hdr.buf_alloc);
- vvs->peer_fwd_cnt = le32_to_cpu(pkt->hdr.fwd_cnt);
+ vvs->peer_buf_alloc = le32_to_cpu(hdr->buf_alloc);
+ vvs->peer_fwd_cnt = le32_to_cpu(hdr->fwd_cnt);
space_available = virtio_transport_has_space(vsk);
spin_unlock_bh(&vvs->tx_lock);
return space_available;
@@ -1167,27 +1183,28 @@ static bool virtio_transport_space_update(struct sock *sk,
/* Handle server socket */
static int
-virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
+virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
struct virtio_transport *t)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct vsock_sock *vsk = vsock_sk(sk);
struct vsock_sock *vchild;
struct sock *child;
int ret;
- if (le16_to_cpu(pkt->hdr.op) != VIRTIO_VSOCK_OP_REQUEST) {
- virtio_transport_reset_no_sock(t, pkt);
+ if (le16_to_cpu(hdr->op) != VIRTIO_VSOCK_OP_REQUEST) {
+ virtio_transport_reset_no_sock(t, skb);
return -EINVAL;
}
if (sk_acceptq_is_full(sk)) {
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
return -ENOMEM;
}
child = vsock_create_connected(sk);
if (!child) {
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
return -ENOMEM;
}
@@ -1198,10 +1215,10 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
child->sk_state = TCP_ESTABLISHED;
vchild = vsock_sk(child);
- vsock_addr_init(&vchild->local_addr, le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port));
- vsock_addr_init(&vchild->remote_addr, le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
+ vsock_addr_init(&vchild->local_addr, le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port));
+ vsock_addr_init(&vchild->remote_addr, le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
ret = vsock_assign_transport(vchild, vsk);
/* Transport assigned (looking at remote_addr) must be the same
@@ -1209,17 +1226,17 @@ virtio_transport_recv_listen(struct sock *sk, struct virtio_vsock_pkt *pkt,
*/
if (ret || vchild->transport != &t->transport) {
release_sock(child);
- virtio_transport_reset_no_sock(t, pkt);
+ virtio_transport_reset_no_sock(t, skb);
sock_put(child);
return ret;
}
- if (virtio_transport_space_update(child, pkt))
+ if (virtio_transport_space_update(child, skb))
child->sk_write_space(child);
vsock_insert_connected(vchild);
vsock_enqueue_accept(sk, child);
- virtio_transport_send_response(vchild, pkt);
+ virtio_transport_send_response(vchild, skb);
release_sock(child);
@@ -1237,29 +1254,30 @@ static bool virtio_transport_valid_type(u16 type)
* lock.
*/
void virtio_transport_recv_pkt(struct virtio_transport *t,
- struct virtio_vsock_pkt *pkt)
+ struct sk_buff *skb)
{
+ struct virtio_vsock_hdr *hdr = virtio_vsock_hdr(skb);
struct sockaddr_vm src, dst;
struct vsock_sock *vsk;
struct sock *sk;
bool space_available;
- vsock_addr_init(&src, le64_to_cpu(pkt->hdr.src_cid),
- le32_to_cpu(pkt->hdr.src_port));
- vsock_addr_init(&dst, le64_to_cpu(pkt->hdr.dst_cid),
- le32_to_cpu(pkt->hdr.dst_port));
+ vsock_addr_init(&src, le64_to_cpu(hdr->src_cid),
+ le32_to_cpu(hdr->src_port));
+ vsock_addr_init(&dst, le64_to_cpu(hdr->dst_cid),
+ le32_to_cpu(hdr->dst_port));
trace_virtio_transport_recv_pkt(src.svm_cid, src.svm_port,
dst.svm_cid, dst.svm_port,
- le32_to_cpu(pkt->hdr.len),
- le16_to_cpu(pkt->hdr.type),
- le16_to_cpu(pkt->hdr.op),
- le32_to_cpu(pkt->hdr.flags),
- le32_to_cpu(pkt->hdr.buf_alloc),
- le32_to_cpu(pkt->hdr.fwd_cnt));
-
- if (!virtio_transport_valid_type(le16_to_cpu(pkt->hdr.type))) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ le32_to_cpu(hdr->len),
+ le16_to_cpu(hdr->type),
+ le16_to_cpu(hdr->op),
+ le32_to_cpu(hdr->flags),
+ le32_to_cpu(hdr->buf_alloc),
+ le32_to_cpu(hdr->fwd_cnt));
+
+ if (!virtio_transport_valid_type(le16_to_cpu(hdr->type))) {
+ (void)virtio_transport_reset_no_sock(t, skb);
goto free_pkt;
}
@@ -1270,13 +1288,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
if (!sk) {
sk = vsock_find_bound_socket(&dst);
if (!sk) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
goto free_pkt;
}
}
- if (virtio_transport_get_type(sk) != le16_to_cpu(pkt->hdr.type)) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ if (virtio_transport_get_type(sk) != le16_to_cpu(hdr->type)) {
+ (void)virtio_transport_reset_no_sock(t, skb);
sock_put(sk);
goto free_pkt;
}
@@ -1287,13 +1305,13 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
/* Check if sk has been closed before lock_sock */
if (sock_flag(sk, SOCK_DONE)) {
- (void)virtio_transport_reset_no_sock(t, pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
release_sock(sk);
sock_put(sk);
goto free_pkt;
}
- space_available = virtio_transport_space_update(sk, pkt);
+ space_available = virtio_transport_space_update(sk, skb);
/* Update CID in case it has changed after a transport reset event */
if (vsk->local_addr.svm_cid != VMADDR_CID_ANY)
@@ -1304,23 +1322,23 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
switch (sk->sk_state) {
case TCP_LISTEN:
- virtio_transport_recv_listen(sk, pkt, t);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_listen(sk, skb, t);
+ kfree_skb(skb);
break;
case TCP_SYN_SENT:
- virtio_transport_recv_connecting(sk, pkt);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_connecting(sk, skb);
+ kfree_skb(skb);
break;
case TCP_ESTABLISHED:
- virtio_transport_recv_connected(sk, pkt);
+ virtio_transport_recv_connected(sk, skb);
break;
case TCP_CLOSING:
- virtio_transport_recv_disconnecting(sk, pkt);
- virtio_transport_free_pkt(pkt);
+ virtio_transport_recv_disconnecting(sk, skb);
+ kfree_skb(skb);
break;
default:
- (void)virtio_transport_reset_no_sock(t, pkt);
- virtio_transport_free_pkt(pkt);
+ (void)virtio_transport_reset_no_sock(t, skb);
+ kfree_skb(skb);
break;
}
@@ -1333,16 +1351,42 @@ void virtio_transport_recv_pkt(struct virtio_transport *t,
return;
free_pkt:
- virtio_transport_free_pkt(pkt);
+ kfree_skb(skb);
}
EXPORT_SYMBOL_GPL(virtio_transport_recv_pkt);
-void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt)
+/* Remove skbs found in a queue that have a vsk that matches.
+ *
+ * Each skb is freed.
+ *
+ * Returns the count of skbs that were reply packets.
+ */
+int virtio_transport_purge_skbs(void *vsk, struct sk_buff_head *queue)
{
- kvfree(pkt->buf);
- kfree(pkt);
+ struct sk_buff_head freeme;
+ struct sk_buff *skb, *tmp;
+ int cnt = 0;
+
+ skb_queue_head_init(&freeme);
+
+ spin_lock_bh(&queue->lock);
+ skb_queue_walk_safe(queue, skb, tmp) {
+ if (vsock_sk(skb->sk) != vsk)
+ continue;
+
+ __skb_unlink(skb, queue);
+ __skb_queue_tail(&freeme, skb);
+
+ if (virtio_vsock_skb_reply(skb))
+ cnt++;
+ }
+ spin_unlock_bh(&queue->lock);
+
+ __skb_queue_purge(&freeme);
+
+ return cnt;
}
-EXPORT_SYMBOL_GPL(virtio_transport_free_pkt);
+EXPORT_SYMBOL_GPL(virtio_transport_purge_skbs);
MODULE_LICENSE("GPL v2");
MODULE_AUTHOR("Asias He");
diff --git a/net/vmw_vsock/vsock_loopback.c b/net/vmw_vsock/vsock_loopback.c
index 169a8cf65b39..671e03240fc5 100644
--- a/net/vmw_vsock/vsock_loopback.c
+++ b/net/vmw_vsock/vsock_loopback.c
@@ -16,7 +16,7 @@ struct vsock_loopback {
struct workqueue_struct *workqueue;
spinlock_t pkt_list_lock; /* protects pkt_list */
- struct list_head pkt_list;
+ struct sk_buff_head pkt_queue;
struct work_struct pkt_work;
};
@@ -27,13 +27,13 @@ static u32 vsock_loopback_get_local_cid(void)
return VMADDR_CID_LOCAL;
}
-static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
+static int vsock_loopback_send_pkt(struct sk_buff *skb)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- int len = pkt->len;
+ int len = skb->len;
spin_lock_bh(&vsock->pkt_list_lock);
- list_add_tail(&pkt->list, &vsock->pkt_list);
+ skb_queue_tail(&vsock->pkt_queue, skb);
spin_unlock_bh(&vsock->pkt_list_lock);
queue_work(vsock->workqueue, &vsock->pkt_work);
@@ -44,21 +44,8 @@ static int vsock_loopback_send_pkt(struct virtio_vsock_pkt *pkt)
static int vsock_loopback_cancel_pkt(struct vsock_sock *vsk)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- struct virtio_vsock_pkt *pkt, *n;
- LIST_HEAD(freeme);
- spin_lock_bh(&vsock->pkt_list_lock);
- list_for_each_entry_safe(pkt, n, &vsock->pkt_list, list) {
- if (pkt->vsk != vsk)
- continue;
- list_move(&pkt->list, &freeme);
- }
- spin_unlock_bh(&vsock->pkt_list_lock);
-
- list_for_each_entry_safe(pkt, n, &freeme, list) {
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ virtio_transport_purge_skbs(vsk, &vsock->pkt_queue);
return 0;
}
@@ -121,20 +108,18 @@ static void vsock_loopback_work(struct work_struct *work)
{
struct vsock_loopback *vsock =
container_of(work, struct vsock_loopback, pkt_work);
- LIST_HEAD(pkts);
+ struct sk_buff_head pkts;
+ struct sk_buff *skb;
+
+ skb_queue_head_init(&pkts);
spin_lock_bh(&vsock->pkt_list_lock);
- list_splice_init(&vsock->pkt_list, &pkts);
+ skb_queue_splice_init(&vsock->pkt_queue, &pkts);
spin_unlock_bh(&vsock->pkt_list_lock);
- while (!list_empty(&pkts)) {
- struct virtio_vsock_pkt *pkt;
-
- pkt = list_first_entry(&pkts, struct virtio_vsock_pkt, list);
- list_del_init(&pkt->list);
-
- virtio_transport_deliver_tap_pkt(pkt);
- virtio_transport_recv_pkt(&loopback_transport, pkt);
+ while ((skb = __skb_dequeue(&pkts))) {
+ virtio_transport_deliver_tap_pkt(skb);
+ virtio_transport_recv_pkt(&loopback_transport, skb);
}
}
@@ -148,7 +133,7 @@ static int __init vsock_loopback_init(void)
return -ENOMEM;
spin_lock_init(&vsock->pkt_list_lock);
- INIT_LIST_HEAD(&vsock->pkt_list);
+ skb_queue_head_init(&vsock->pkt_queue);
INIT_WORK(&vsock->pkt_work, vsock_loopback_work);
ret = vsock_core_register(&loopback_transport.transport,
@@ -166,19 +151,13 @@ out_wq:
static void __exit vsock_loopback_exit(void)
{
struct vsock_loopback *vsock = &the_vsock_loopback;
- struct virtio_vsock_pkt *pkt;
vsock_core_unregister(&loopback_transport.transport);
flush_work(&vsock->pkt_work);
spin_lock_bh(&vsock->pkt_list_lock);
- while (!list_empty(&vsock->pkt_list)) {
- pkt = list_first_entry(&vsock->pkt_list,
- struct virtio_vsock_pkt, list);
- list_del(&pkt->list);
- virtio_transport_free_pkt(pkt);
- }
+ virtio_vsock_skb_queue_purge(&vsock->pkt_queue);
spin_unlock_bh(&vsock->pkt_list_lock);
destroy_workqueue(vsock->workqueue);
diff --git a/net/wireless/ap.c b/net/wireless/ap.c
index e68923200018..0962770303b2 100644
--- a/net/wireless/ap.c
+++ b/net/wireless/ap.c
@@ -39,7 +39,7 @@ static int ___cfg80211_stop_ap(struct cfg80211_registered_device *rdev,
wdev->u.ap.ssid_len = 0;
rdev_set_qos_map(rdev, dev, NULL);
if (notify)
- nl80211_send_ap_stopped(wdev);
+ nl80211_send_ap_stopped(wdev, link_id);
/* Should we apply the grace period during beaconing interface
* shutdown also?
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 0e5835cd8c61..0b7e81db383d 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -1460,3 +1460,72 @@ struct cfg80211_chan_def *wdev_chandef(struct wireless_dev *wdev,
}
}
EXPORT_SYMBOL(wdev_chandef);
+
+struct cfg80211_per_bw_puncturing_values {
+ u8 len;
+ const u16 *valid_values;
+};
+
+static const u16 puncturing_values_80mhz[] = {
+ 0x8, 0x4, 0x2, 0x1
+};
+
+static const u16 puncturing_values_160mhz[] = {
+ 0x80, 0x40, 0x20, 0x10, 0x8, 0x4, 0x2, 0x1, 0xc0, 0x30, 0xc, 0x3
+};
+
+static const u16 puncturing_values_320mhz[] = {
+ 0xc000, 0x3000, 0xc00, 0x300, 0xc0, 0x30, 0xc, 0x3, 0xf000, 0xf00,
+ 0xf0, 0xf, 0xfc00, 0xf300, 0xf0c0, 0xf030, 0xf00c, 0xf003, 0xc00f,
+ 0x300f, 0xc0f, 0x30f, 0xcf, 0x3f
+};
+
+#define CFG80211_PER_BW_VALID_PUNCTURING_VALUES(_bw) \
+ { \
+ .len = ARRAY_SIZE(puncturing_values_ ## _bw ## mhz), \
+ .valid_values = puncturing_values_ ## _bw ## mhz \
+ }
+
+static const struct cfg80211_per_bw_puncturing_values per_bw_puncturing[] = {
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(80),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(160),
+ CFG80211_PER_BW_VALID_PUNCTURING_VALUES(320)
+};
+
+bool cfg80211_valid_disable_subchannel_bitmap(u16 *bitmap,
+ const struct cfg80211_chan_def *chandef)
+{
+ u32 idx, i, start_freq;
+
+ switch (chandef->width) {
+ case NL80211_CHAN_WIDTH_80:
+ idx = 0;
+ start_freq = chandef->center_freq1 - 40;
+ break;
+ case NL80211_CHAN_WIDTH_160:
+ idx = 1;
+ start_freq = chandef->center_freq1 - 80;
+ break;
+ case NL80211_CHAN_WIDTH_320:
+ idx = 2;
+ start_freq = chandef->center_freq1 - 160;
+ break;
+ default:
+ *bitmap = 0;
+ break;
+ }
+
+ if (!*bitmap)
+ return true;
+
+ /* check if primary channel is punctured */
+ if (*bitmap & (u16)BIT((chandef->chan->center_freq - start_freq) / 20))
+ return false;
+
+ for (i = 0; i < per_bw_puncturing[idx].len; i++)
+ if (per_bw_puncturing[idx].valid_values[i] == *bitmap)
+ return true;
+
+ return false;
+}
+EXPORT_SYMBOL(cfg80211_valid_disable_subchannel_bitmap);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index af85d8909935..7c61752f6d83 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -278,8 +278,8 @@ struct cfg80211_event {
};
struct cfg80211_cached_keys {
- struct key_params params[CFG80211_MAX_WEP_KEYS];
- u8 data[CFG80211_MAX_WEP_KEYS][WLAN_KEY_LEN_WEP104];
+ struct key_params params[4];
+ u8 data[4][WLAN_KEY_LEN_WEP104];
int def;
};
diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c
index edd062f104f4..e6fdb0b8187d 100644
--- a/net/wireless/ibss.c
+++ b/net/wireless/ibss.c
@@ -45,8 +45,7 @@ void __cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid,
cfg80211_hold_bss(bss_from_pub(bss));
wdev->u.ibss.current_bss = bss_from_pub(bss);
- if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
- cfg80211_upload_connect_keys(wdev);
+ cfg80211_upload_connect_keys(wdev);
nl80211_send_ibss_bssid(wiphy_to_rdev(wdev->wiphy), dev, bssid,
GFP_KERNEL);
@@ -294,7 +293,7 @@ int cfg80211_ibss_wext_join(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
ck->params[i].key = ck->data[i];
}
err = __cfg80211_join_ibss(rdev, wdev->netdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 58e1fb18f85a..81d3f40d6235 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -742,7 +742,10 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
case NL80211_IFTYPE_AP:
case NL80211_IFTYPE_P2P_GO:
case NL80211_IFTYPE_AP_VLAN:
- if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev)))
+ if (!ether_addr_equal(mgmt->bssid, wdev_address(wdev)) &&
+ (params->link_id < 0 ||
+ !ether_addr_equal(mgmt->bssid,
+ wdev->links[params->link_id].addr)))
err = -EINVAL;
break;
case NL80211_IFTYPE_MESH_POINT:
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 33a82ecab9d5..112b4bb009c8 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -805,6 +805,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
[NL80211_ATTR_MLD_ADDR] = NLA_POLICY_EXACT_LEN(ETH_ALEN),
[NL80211_ATTR_MLO_SUPPORT] = { .type = NLA_FLAG },
[NL80211_ATTR_MAX_NUM_AKM_SUITES] = { .type = NLA_REJECT },
+ [NL80211_ATTR_PUNCT_BITMAP] = NLA_POLICY_RANGE(NLA_U8, 0, 0xffff),
};
/* policy for the key attributes */
@@ -883,7 +884,7 @@ nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
},
[NL80211_REKEY_DATA_KCK] = {
.type = NLA_BINARY,
- .len = NL80211_KCK_EXT_LEN
+ .len = NL80211_KCK_EXT_LEN_32
},
[NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN(NL80211_REPLAY_CTR_LEN),
[NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 },
@@ -1548,10 +1549,14 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
if (wdev->connected)
return 0;
return -ENOLINK;
+ case NL80211_IFTYPE_NAN:
+ if (wiphy_ext_feature_isset(wdev->wiphy,
+ NL80211_EXT_FEATURE_SECURE_NAN))
+ return 0;
+ return -EINVAL;
case NL80211_IFTYPE_UNSPECIFIED:
case NL80211_IFTYPE_OCB:
case NL80211_IFTYPE_MONITOR:
- case NL80211_IFTYPE_NAN:
case NL80211_IFTYPE_P2P_DEVICE:
case NL80211_IFTYPE_WDS:
case NUM_NL80211_IFTYPES:
@@ -3173,6 +3178,21 @@ static bool nl80211_can_set_dev_channel(struct wireless_dev *wdev)
wdev->iftype == NL80211_IFTYPE_P2P_GO;
}
+static int nl80211_parse_punct_bitmap(struct cfg80211_registered_device *rdev,
+ struct genl_info *info,
+ const struct cfg80211_chan_def *chandef,
+ u16 *punct_bitmap)
+{
+ if (!wiphy_ext_feature_isset(&rdev->wiphy, NL80211_EXT_FEATURE_PUNCT))
+ return -EINVAL;
+
+ *punct_bitmap = nla_get_u32(info->attrs[NL80211_ATTR_PUNCT_BITMAP]);
+ if (!cfg80211_valid_disable_subchannel_bitmap(punct_bitmap, chandef))
+ return -EINVAL;
+
+ return 0;
+}
+
int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
struct genl_info *info,
struct cfg80211_chan_def *chandef)
@@ -3181,8 +3201,11 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
struct nlattr **attrs = info->attrs;
u32 control_freq;
- if (!attrs[NL80211_ATTR_WIPHY_FREQ])
+ if (!attrs[NL80211_ATTR_WIPHY_FREQ]) {
+ NL_SET_ERR_MSG_ATTR(extack, attrs[NL80211_ATTR_WIPHY_FREQ],
+ "Frequency is missing");
return -EINVAL;
+ }
control_freq = MHZ_TO_KHZ(
nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
@@ -5770,6 +5793,42 @@ static bool nl80211_valid_auth_type(struct cfg80211_registered_device *rdev,
}
}
+static void nl80211_send_ap_started(struct wireless_dev *wdev,
+ unsigned int link_id)
+{
+ struct wiphy *wiphy = wdev->wiphy;
+ struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
+ struct sk_buff *msg;
+ void *hdr;
+
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+ if (!msg)
+ return;
+
+ hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_START_AP);
+ if (!hdr)
+ goto out;
+
+ if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
+ nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) ||
+ nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
+ NL80211_ATTR_PAD) ||
+ (wdev->u.ap.ssid_len &&
+ nla_put(msg, NL80211_ATTR_SSID, wdev->u.ap.ssid_len,
+ wdev->u.ap.ssid)) ||
+ (wdev->valid_links &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)))
+ goto out;
+
+ genlmsg_end(msg, hdr);
+
+ genlmsg_multicast_netns(&nl80211_fam, wiphy_net(wiphy), msg, 0,
+ NL80211_MCGRP_MLME, GFP_KERNEL);
+ return;
+out:
+ nlmsg_free(msg);
+}
+
static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
{
struct cfg80211_registered_device *rdev = info->user_ptr[0];
@@ -5918,6 +5977,14 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
goto out;
}
+ if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
+ err = nl80211_parse_punct_bitmap(rdev, info,
+ &params->chandef,
+ &params->punct_bitmap);
+ if (err)
+ goto out;
+ }
+
if (!cfg80211_reg_can_beacon_relax(&rdev->wiphy, &params->chandef,
wdev->iftype)) {
err = -EINVAL;
@@ -6050,6 +6117,8 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
if (info->attrs[NL80211_ATTR_SOCKET_OWNER])
wdev->conn_owner_nlportid = info->snd_portid;
+
+ nl80211_send_ap_started(wdev, link_id);
}
out_unlock:
wdev_unlock(wdev);
@@ -6527,6 +6596,22 @@ static int nl80211_send_station(struct sk_buff *msg, u32 cmd, u32 portid,
sinfo->assoc_req_ies))
goto nla_put_failure;
+ if (sinfo->assoc_resp_ies_len &&
+ nla_put(msg, NL80211_ATTR_RESP_IE, sinfo->assoc_resp_ies_len,
+ sinfo->assoc_resp_ies))
+ goto nla_put_failure;
+
+ if (sinfo->mlo_params_valid) {
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID,
+ sinfo->assoc_link_id))
+ goto nla_put_failure;
+
+ if (!is_zero_ether_addr(sinfo->mld_addr) &&
+ nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN,
+ sinfo->mld_addr))
+ goto nla_put_failure;
+ }
+
cfg80211_sinfo_release_content(sinfo);
genlmsg_end(msg, hdr);
return 0;
@@ -10057,6 +10142,14 @@ skip_beacons:
if (info->attrs[NL80211_ATTR_CH_SWITCH_BLOCK_TX])
params.block_tx = true;
+ if (info->attrs[NL80211_ATTR_PUNCT_BITMAP]) {
+ err = nl80211_parse_punct_bitmap(rdev, info,
+ &params.chandef,
+ &params.punct_bitmap);
+ if (err)
+ goto free;
+ }
+
wdev_lock(wdev);
err = rdev_channel_switch(rdev, dev, &params);
wdev_unlock(wdev);
@@ -12253,6 +12346,10 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_P2P_DEVICE:
break;
case NL80211_IFTYPE_NAN:
+ if (!wiphy_ext_feature_isset(wdev->wiphy,
+ NL80211_EXT_FEATURE_SECURE_NAN))
+ return -EOPNOTSUPP;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -12310,6 +12407,10 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info)
case NL80211_IFTYPE_P2P_GO:
break;
case NL80211_IFTYPE_NAN:
+ if (!wiphy_ext_feature_isset(wdev->wiphy,
+ NL80211_EXT_FEATURE_SECURE_NAN))
+ return -EOPNOTSUPP;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -12447,6 +12548,10 @@ static int nl80211_tx_mgmt_cancel_wait(struct sk_buff *skb, struct genl_info *in
case NL80211_IFTYPE_P2P_DEVICE:
break;
case NL80211_IFTYPE_NAN:
+ if (!wiphy_ext_feature_isset(wdev->wiphy,
+ NL80211_EXT_FEATURE_SECURE_NAN))
+ return -EOPNOTSUPP;
+ break;
default:
return -EOPNOTSUPP;
}
@@ -13809,7 +13914,9 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
return -ERANGE;
if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN &&
!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
- nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN))
+ nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN) &&
+ !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KCK_32 &&
+ nla_len(tb[NL80211_REKEY_DATA_KCK]) == NL80211_KCK_EXT_LEN_32))
return -ERANGE;
rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
@@ -18954,7 +19061,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
struct cfg80211_chan_def *chandef,
gfp_t gfp,
enum nl80211_commands notif,
- u8 count, bool quiet)
+ u8 count, bool quiet, u16 punct_bitmap)
{
struct wireless_dev *wdev = netdev->ieee80211_ptr;
struct sk_buff *msg;
@@ -18988,6 +19095,9 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
goto nla_put_failure;
}
+ if (nla_put_u32(msg, NL80211_ATTR_PUNCT_BITMAP, punct_bitmap))
+ goto nla_put_failure;
+
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
@@ -19000,7 +19110,7 @@ static void nl80211_ch_switch_notify(struct cfg80211_registered_device *rdev,
void cfg80211_ch_switch_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id)
+ unsigned int link_id, u16 punct_bitmap)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19009,7 +19119,7 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
ASSERT_WDEV_LOCK(wdev);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_notify(dev, chandef, link_id);
+ trace_cfg80211_ch_switch_notify(dev, chandef, link_id, punct_bitmap);
switch (wdev->iftype) {
case NL80211_IFTYPE_STATION:
@@ -19037,14 +19147,15 @@ void cfg80211_ch_switch_notify(struct net_device *dev,
cfg80211_sched_dfs_chan_update(rdev);
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
- NL80211_CMD_CH_SWITCH_NOTIFY, 0, false);
+ NL80211_CMD_CH_SWITCH_NOTIFY, 0, false,
+ punct_bitmap);
}
EXPORT_SYMBOL(cfg80211_ch_switch_notify);
void cfg80211_ch_switch_started_notify(struct net_device *dev,
struct cfg80211_chan_def *chandef,
unsigned int link_id, u8 count,
- bool quiet)
+ bool quiet, u16 punct_bitmap)
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
struct wiphy *wiphy = wdev->wiphy;
@@ -19053,15 +19164,17 @@ void cfg80211_ch_switch_started_notify(struct net_device *dev,
ASSERT_WDEV_LOCK(wdev);
WARN_INVALID_LINK_ID(wdev, link_id);
- trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id);
+ trace_cfg80211_ch_switch_started_notify(dev, chandef, link_id,
+ punct_bitmap);
+
nl80211_ch_switch_notify(rdev, dev, link_id, chandef, GFP_KERNEL,
NL80211_CMD_CH_SWITCH_STARTED_NOTIFY,
- count, quiet);
+ count, quiet, punct_bitmap);
}
EXPORT_SYMBOL(cfg80211_ch_switch_started_notify);
-int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
+int cfg80211_bss_color_notify(struct net_device *dev,
enum nl80211_commands cmd, u8 count,
u64 color_bitmap)
{
@@ -19075,7 +19188,7 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
trace_cfg80211_bss_color_notify(dev, cmd, count, color_bitmap);
- msg = nlmsg_new(NLMSG_DEFAULT_SIZE, gfp);
+ msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
if (!msg)
return -ENOMEM;
@@ -19098,7 +19211,7 @@ int cfg80211_bss_color_notify(struct net_device *dev, gfp_t gfp,
genlmsg_end(msg, hdr);
return genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy),
- msg, 0, NL80211_MCGRP_MLME, gfp);
+ msg, 0, NL80211_MCGRP_MLME, GFP_KERNEL);
nla_put_failure:
nlmsg_free(msg);
@@ -19661,7 +19774,7 @@ void cfg80211_crit_proto_stopped(struct wireless_dev *wdev, gfp_t gfp)
}
EXPORT_SYMBOL(cfg80211_crit_proto_stopped);
-void nl80211_send_ap_stopped(struct wireless_dev *wdev)
+void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id)
{
struct wiphy *wiphy = wdev->wiphy;
struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -19679,7 +19792,9 @@ void nl80211_send_ap_stopped(struct wireless_dev *wdev)
if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
nla_put_u32(msg, NL80211_ATTR_IFINDEX, wdev->netdev->ifindex) ||
nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
- NL80211_ATTR_PAD))
+ NL80211_ATTR_PAD) ||
+ (wdev->valid_links &&
+ nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID, link_id)))
goto out;
genlmsg_end(msg, hdr);
@@ -19718,7 +19833,9 @@ int cfg80211_external_auth_request(struct net_device *dev,
params->action) ||
nla_put(msg, NL80211_ATTR_BSSID, ETH_ALEN, params->bssid) ||
nla_put(msg, NL80211_ATTR_SSID, params->ssid.ssid_len,
- params->ssid.ssid))
+ params->ssid.ssid) ||
+ (!is_zero_ether_addr(params->mld_addr) &&
+ nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN, params->mld_addr)))
goto nla_put_failure;
genlmsg_end(msg, hdr);
@@ -19760,6 +19877,17 @@ void cfg80211_update_owe_info_event(struct net_device *netdev,
nla_put(msg, NL80211_ATTR_IE, owe_info->ie_len, owe_info->ie))
goto nla_put_failure;
+ if (owe_info->assoc_link_id != -1) {
+ if (nla_put_u8(msg, NL80211_ATTR_MLO_LINK_ID,
+ owe_info->assoc_link_id))
+ goto nla_put_failure;
+
+ if (!is_zero_ether_addr(owe_info->peer_mld_addr) &&
+ nla_put(msg, NL80211_ATTR_MLD_ADDR, ETH_ALEN,
+ owe_info->peer_mld_addr))
+ goto nla_put_failure;
+ }
+
genlmsg_end(msg, hdr);
genlmsg_multicast_netns(&nl80211_fam, wiphy_net(&rdev->wiphy), msg, 0,
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index ba9457e94c43..0278d817bb02 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -114,7 +114,7 @@ nl80211_radar_notify(struct cfg80211_registered_device *rdev,
enum nl80211_radar_event event,
struct net_device *netdev, gfp_t gfp);
-void nl80211_send_ap_stopped(struct wireless_dev *wdev);
+void nl80211_send_ap_stopped(struct wireless_dev *wdev, unsigned int link_id);
void cfg80211_rdev_free_coalesce(struct cfg80211_registered_device *rdev);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 4f3f31244e8b..0d40d6af7e10 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -737,51 +737,9 @@ static bool valid_country(const u8 *data, unsigned int size,
}
#ifdef CONFIG_CFG80211_REQUIRE_SIGNED_REGDB
-static struct key *builtin_regdb_keys;
-
-static void __init load_keys_from_buffer(const u8 *p, unsigned int buflen)
-{
- const u8 *end = p + buflen;
- size_t plen;
- key_ref_t key;
+#include <keys/asymmetric-type.h>
- while (p < end) {
- /* Each cert begins with an ASN.1 SEQUENCE tag and must be more
- * than 256 bytes in size.
- */
- if (end - p < 4)
- goto dodgy_cert;
- if (p[0] != 0x30 &&
- p[1] != 0x82)
- goto dodgy_cert;
- plen = (p[2] << 8) | p[3];
- plen += 4;
- if (plen > end - p)
- goto dodgy_cert;
-
- key = key_create_or_update(make_key_ref(builtin_regdb_keys, 1),
- "asymmetric", NULL, p, plen,
- ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
- KEY_USR_VIEW | KEY_USR_READ),
- KEY_ALLOC_NOT_IN_QUOTA |
- KEY_ALLOC_BUILT_IN |
- KEY_ALLOC_BYPASS_RESTRICTION);
- if (IS_ERR(key)) {
- pr_err("Problem loading in-kernel X.509 certificate (%ld)\n",
- PTR_ERR(key));
- } else {
- pr_notice("Loaded X.509 cert '%s'\n",
- key_ref_to_ptr(key)->description);
- key_ref_put(key);
- }
- p += plen;
- }
-
- return;
-
-dodgy_cert:
- pr_err("Problem parsing in-kernel X.509 certificate list\n");
-}
+static struct key *builtin_regdb_keys;
static int __init load_builtin_regdb_keys(void)
{
@@ -797,11 +755,15 @@ static int __init load_builtin_regdb_keys(void)
pr_notice("Loading compiled-in X.509 certificates for regulatory database\n");
#ifdef CONFIG_CFG80211_USE_KERNEL_REGDB_KEYS
- load_keys_from_buffer(shipped_regdb_certs, shipped_regdb_certs_len);
+ x509_load_certificate_list(shipped_regdb_certs,
+ shipped_regdb_certs_len,
+ builtin_regdb_keys);
#endif
#ifdef CONFIG_CFG80211_EXTRA_REGDB_KEYDIR
if (CONFIG_CFG80211_EXTRA_REGDB_KEYDIR[0] != '\0')
- load_keys_from_buffer(extra_regdb_certs, extra_regdb_certs_len);
+ x509_load_certificate_list(extra_regdb_certs,
+ extra_regdb_certs_len,
+ builtin_regdb_keys);
#endif
return 0;
@@ -3198,6 +3160,9 @@ static void reg_process_self_managed_hint(struct wiphy *wiphy)
request.alpha2[1] = regd->alpha2[1];
request.initiator = NL80211_REGDOM_SET_BY_DRIVER;
+ if (wiphy->flags & WIPHY_FLAG_NOTIFY_REGDOM_BY_DRIVER)
+ reg_call_notifier(wiphy, &request);
+
nl80211_send_wiphy_reg_change_event(&request);
}
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 4b5b6ee0fe01..28ce13840a88 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -285,6 +285,15 @@ void cfg80211_conn_work(struct work_struct *work)
wiphy_unlock(&rdev->wiphy);
}
+static void cfg80211_step_auth_next(struct cfg80211_conn *conn,
+ struct cfg80211_bss *bss)
+{
+ memcpy(conn->bssid, bss->bssid, ETH_ALEN);
+ conn->params.bssid = conn->bssid;
+ conn->params.channel = bss->channel;
+ conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+}
+
/* Returned bss is reference counted and must be cleaned up appropriately. */
static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
{
@@ -302,10 +311,7 @@ static struct cfg80211_bss *cfg80211_get_conn_bss(struct wireless_dev *wdev)
if (!bss)
return NULL;
- memcpy(wdev->conn->bssid, bss->bssid, ETH_ALEN);
- wdev->conn->params.bssid = wdev->conn->bssid;
- wdev->conn->params.channel = bss->channel;
- wdev->conn->state = CFG80211_CONN_AUTHENTICATE_NEXT;
+ cfg80211_step_auth_next(wdev->conn, bss);
schedule_work(&rdev->conn_work);
return bss;
@@ -597,7 +603,12 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
wdev->conn->params.ssid_len = wdev->u.client.ssid_len;
/* see if we have the bss already */
- bss = cfg80211_get_conn_bss(wdev);
+ bss = cfg80211_get_bss(wdev->wiphy, wdev->conn->params.channel,
+ wdev->conn->params.bssid,
+ wdev->conn->params.ssid,
+ wdev->conn->params.ssid_len,
+ wdev->conn_bss_type,
+ IEEE80211_PRIVACY(wdev->conn->params.privacy));
if (prev_bssid) {
memcpy(wdev->conn->prev_bssid, prev_bssid, ETH_ALEN);
@@ -608,6 +619,7 @@ static int cfg80211_sme_connect(struct wireless_dev *wdev,
if (bss) {
enum nl80211_timeout_reason treason;
+ cfg80211_step_auth_next(wdev->conn, bss);
err = cfg80211_conn_do_work(wdev, &treason);
cfg80211_put_bss(wdev->wiphy, bss);
} else {
@@ -724,6 +736,7 @@ void __cfg80211_connect_result(struct net_device *dev,
{
struct wireless_dev *wdev = dev->ieee80211_ptr;
const struct element *country_elem = NULL;
+ const struct element *ssid;
const u8 *country_data;
u8 country_datalen;
#ifdef CONFIG_CFG80211_WEXT
@@ -855,8 +868,7 @@ void __cfg80211_connect_result(struct net_device *dev,
ETH_ALEN);
}
- if (!(wdev->wiphy->flags & WIPHY_FLAG_HAS_STATIC_WEP))
- cfg80211_upload_connect_keys(wdev);
+ cfg80211_upload_connect_keys(wdev);
rcu_read_lock();
for_each_valid_link(cr, link) {
@@ -883,6 +895,22 @@ void __cfg80211_connect_result(struct net_device *dev,
country_data, country_datalen);
kfree(country_data);
+ if (!wdev->u.client.ssid_len) {
+ rcu_read_lock();
+ for_each_valid_link(cr, link) {
+ ssid = ieee80211_bss_get_elem(cr->links[link].bss,
+ WLAN_EID_SSID);
+
+ if (!ssid || !ssid->datalen)
+ continue;
+
+ memcpy(wdev->u.client.ssid, ssid->data, ssid->datalen);
+ wdev->u.client.ssid_len = ssid->datalen;
+ break;
+ }
+ rcu_read_unlock();
+ }
+
return;
out:
for_each_valid_link(cr, link)
@@ -1462,12 +1490,18 @@ int cfg80211_connect(struct cfg80211_registered_device *rdev,
connect->crypto.ciphers_pairwise[0] = cipher;
}
}
-
- connect->crypto.wep_keys = connkeys->params;
- connect->crypto.wep_tx_key = connkeys->def;
} else {
if (WARN_ON(connkeys))
return -EINVAL;
+
+ /* connect can point to wdev->wext.connect which
+ * can hold key data from a previous connection
+ */
+ connect->key = NULL;
+ connect->key_len = 0;
+ connect->key_idx = 0;
+ connect->crypto.cipher_group = 0;
+ connect->crypto.n_ciphers_pairwise = 0;
}
wdev->connect_keys = connkeys;
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index a405c3edbc47..ca7474eec723 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -19,8 +19,6 @@
else \
eth_zero_addr(__entry->entry_mac); \
} while (0)
-#define MAC_PR_FMT "%pM"
-#define MAC_PR_ARG(entry_mac) (__entry->entry_mac)
#define MAXNAME 32
#define WIPHY_ENTRY __array(char, wiphy_name, 32)
@@ -454,10 +452,10 @@ DECLARE_EVENT_CLASS(key_handle,
__entry->pairwise = pairwise;
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
- "key_index: %u, pairwise: %s, mac addr: " MAC_PR_FMT,
+ "key_index: %u, pairwise: %s, mac addr: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
__entry->key_index, BOOL_TO_STR(__entry->pairwise),
- MAC_PR_ARG(mac_addr))
+ __entry->mac_addr)
);
DEFINE_EVENT(key_handle, rdev_get_key,
@@ -496,10 +494,10 @@ TRACE_EVENT(rdev_add_key,
),
TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, "
"key_index: %u, mode: %u, pairwise: %s, "
- "mac addr: " MAC_PR_FMT,
+ "mac addr: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
__entry->key_index, __entry->mode,
- BOOL_TO_STR(__entry->pairwise), MAC_PR_ARG(mac_addr))
+ BOOL_TO_STR(__entry->pairwise), __entry->mac_addr)
);
TRACE_EVENT(rdev_set_default_key,
@@ -813,11 +811,11 @@ DECLARE_EVENT_CLASS(station_add_change,
__entry->opmode_notif_used =
params->link_sta_params.opmode_notif_used;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
", station flags mask: %u, station flags set: %u, "
"station modify mask: %u, listen interval: %d, aid: %u, "
"plink action: %u, plink state: %u, uapsd queues: %u, vlan:%s",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
__entry->sta_flags_mask, __entry->sta_flags_set,
__entry->sta_modify_mask, __entry->listen_interval,
__entry->aid, __entry->plink_action, __entry->plink_state,
@@ -849,8 +847,8 @@ DECLARE_EVENT_CLASS(wiphy_netdev_mac_evt,
NETDEV_ASSIGN;
MAC_ASSIGN(sta_mac, mac);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", mac: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac)
);
DECLARE_EVENT_CLASS(station_del,
@@ -871,9 +869,9 @@ DECLARE_EVENT_CLASS(station_del,
__entry->subtype = params->subtype;
__entry->reason_code = params->reason_code;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
", subtype: %u, reason_code: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
__entry->subtype, __entry->reason_code)
);
@@ -909,8 +907,8 @@ TRACE_EVENT(rdev_dump_station,
MAC_ASSIGN(sta_mac, mac);
__entry->idx = _idx;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT ", idx: %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(sta_mac),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM, idx: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->sta_mac,
__entry->idx)
);
@@ -947,9 +945,9 @@ DECLARE_EVENT_CLASS(mpath_evt,
MAC_ASSIGN(dst, dst);
MAC_ASSIGN(next_hop, next_hop);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: " MAC_PR_FMT ", next hop: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dst),
- MAC_PR_ARG(next_hop))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM, next hop: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dst,
+ __entry->next_hop)
);
DEFINE_EVENT(mpath_evt, rdev_add_mpath,
@@ -988,10 +986,9 @@ TRACE_EVENT(rdev_dump_mpath,
MAC_ASSIGN(next_hop, next_hop);
__entry->idx = _idx;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: "
- MAC_PR_FMT ", next hop: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, MAC_PR_ARG(dst),
- MAC_PR_ARG(next_hop))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, next hop: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst,
+ __entry->next_hop)
);
TRACE_EVENT(rdev_get_mpp,
@@ -1010,9 +1007,9 @@ TRACE_EVENT(rdev_get_mpp,
MAC_ASSIGN(dst, dst);
MAC_ASSIGN(mpp, mpp);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: " MAC_PR_FMT
- ", mpp: " MAC_PR_FMT, WIPHY_PR_ARG, NETDEV_PR_ARG,
- MAC_PR_ARG(dst), MAC_PR_ARG(mpp))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", destination: %pM"
+ ", mpp: %pM", WIPHY_PR_ARG, NETDEV_PR_ARG,
+ __entry->dst, __entry->mpp)
);
TRACE_EVENT(rdev_dump_mpp,
@@ -1033,10 +1030,9 @@ TRACE_EVENT(rdev_dump_mpp,
MAC_ASSIGN(mpp, mpp);
__entry->idx = _idx;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: "
- MAC_PR_FMT ", mpp: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, MAC_PR_ARG(dst),
- MAC_PR_ARG(mpp))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", index: %d, destination: %pM, mpp: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->idx, __entry->dst,
+ __entry->mpp)
);
TRACE_EVENT(rdev_return_int_mpath_info,
@@ -1243,9 +1239,9 @@ TRACE_EVENT(rdev_auth,
eth_zero_addr(__entry->bssid);
__entry->auth_type = req->auth_type;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: " MAC_PR_FMT,
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", auth type: %d, bssid: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->auth_type,
- MAC_PR_ARG(bssid))
+ __entry->bssid)
);
TRACE_EVENT(rdev_assoc,
@@ -1294,10 +1290,10 @@ TRACE_EVENT(rdev_assoc,
memcpy(__get_dynamic_array(fils_nonces),
req->fils_nonces, 2 * FILS_NONCE_LEN);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
- ", previous bssid: " MAC_PR_FMT ", use mfp: %s, flags: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid),
- MAC_PR_ARG(prev_bssid), BOOL_TO_STR(__entry->use_mfp),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
+ ", previous bssid: %pM, use mfp: %s, flags: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
+ __entry->prev_bssid, BOOL_TO_STR(__entry->use_mfp),
__entry->flags)
);
@@ -1317,8 +1313,8 @@ TRACE_EVENT(rdev_deauth,
MAC_ASSIGN(bssid, req->bssid);
__entry->reason_code = req->reason_code;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", reason: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, reason: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
__entry->reason_code)
);
@@ -1340,9 +1336,9 @@ TRACE_EVENT(rdev_disassoc,
__entry->reason_code = req->reason_code;
__entry->local_state_change = req->local_state_change;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
", reason: %u, local state change: %s",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
__entry->reason_code,
BOOL_TO_STR(__entry->local_state_change))
);
@@ -1413,12 +1409,12 @@ TRACE_EVENT(rdev_connect,
__entry->flags = sme->flags;
MAC_ASSIGN(prev_bssid, sme->prev_bssid);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
", ssid: %s, auth type: %d, privacy: %s, wpa versions: %u, "
- "flags: %u, previous bssid: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->ssid,
+ "flags: %u, previous bssid: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid,
__entry->auth_type, BOOL_TO_STR(__entry->privacy),
- __entry->wpa_versions, __entry->flags, MAC_PR_ARG(prev_bssid))
+ __entry->wpa_versions, __entry->flags, __entry->prev_bssid)
);
TRACE_EVENT(rdev_update_connect_params,
@@ -1542,8 +1538,8 @@ TRACE_EVENT(rdev_join_ibss,
memset(__entry->ssid, 0, IEEE80211_MAX_SSID_LEN + 1);
memcpy(__entry->ssid, params->ssid, params->ssid_len);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", ssid: %s",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid), __entry->ssid)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM, ssid: %s",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid, __entry->ssid)
);
TRACE_EVENT(rdev_join_ocb,
@@ -1664,9 +1660,9 @@ TRACE_EVENT(rdev_set_bitrate_mask,
__entry->link_id = link_id;
MAC_ASSIGN(peer, peer);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: " MAC_PR_FMT,
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", link_id: %d, peer: %pM",
WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->link_id,
- MAC_PR_ARG(peer))
+ __entry->peer)
);
TRACE_EVENT(rdev_update_mgmt_frame_registrations,
@@ -1810,10 +1806,10 @@ TRACE_EVENT(rdev_tdls_mgmt,
__entry->initiator = initiator;
memcpy(__get_dynamic_array(buf), buf, len);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", action_code: %u, "
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, action_code: %u, "
"dialog_token: %u, status_code: %u, peer_capability: %u "
"initiator: %s buf: %#.2x ",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
__entry->action_code, __entry->dialog_token,
__entry->status_code, __entry->peer_capability,
BOOL_TO_STR(__entry->initiator),
@@ -1893,8 +1889,8 @@ TRACE_EVENT(rdev_tdls_oper,
MAC_ASSIGN(peer, peer);
__entry->oper = oper;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", oper: %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->oper)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, oper: %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper)
);
DECLARE_EVENT_CLASS(rdev_pmksa,
@@ -1911,8 +1907,8 @@ DECLARE_EVENT_CLASS(rdev_pmksa,
NETDEV_ASSIGN;
MAC_ASSIGN(bssid, pmksa->bssid);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(bssid))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid)
);
TRACE_EVENT(rdev_probe_client,
@@ -1929,8 +1925,8 @@ TRACE_EVENT(rdev_probe_client,
NETDEV_ASSIGN;
MAC_ASSIGN(peer, peer);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer)
);
DEFINE_EVENT(rdev_pmksa, rdev_set_pmksa,
@@ -2051,9 +2047,9 @@ TRACE_EVENT(rdev_tx_control_port,
__entry->unencrypted = unencrypted;
__entry->link_id = link_id;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ","
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM,"
" proto: 0x%x, unencrypted: %s, link: %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest,
be16_to_cpu(__entry->proto),
BOOL_TO_STR(__entry->unencrypted),
__entry->link_id)
@@ -2392,8 +2388,8 @@ TRACE_EVENT(rdev_add_tx_ts,
__entry->user_prio = user_prio;
__entry->admitted_time = admitted_time;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d, UP %d, time %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
__entry->tsid, __entry->user_prio, __entry->admitted_time)
);
@@ -2413,8 +2409,8 @@ TRACE_EVENT(rdev_del_tx_ts,
MAC_ASSIGN(peer, peer);
__entry->tsid = tsid;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM, TSID %d",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tsid)
);
TRACE_EVENT(rdev_tdls_channel_switch,
@@ -2435,9 +2431,9 @@ TRACE_EVENT(rdev_tdls_channel_switch,
MAC_ASSIGN(addr, addr);
CHAN_DEF_ASSIGN(chandef);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM"
" oper class %d, " CHAN_DEF_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr,
__entry->oper_class, CHAN_DEF_PR_ARG)
);
@@ -2455,8 +2451,8 @@ TRACE_EVENT(rdev_tdls_cancel_channel_switch,
NETDEV_ASSIGN;
MAC_ASSIGN(addr, addr);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(addr))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->addr)
);
TRACE_EVENT(rdev_set_pmk,
@@ -2488,9 +2484,9 @@ TRACE_EVENT(rdev_set_pmk,
pmk_conf->pmk_r0_name ? WLAN_PMK_NAME_LEN : 0);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM"
"pmk_len=%u, pmk: %s pmk_r0_name: %s", WIPHY_PR_ARG,
- NETDEV_PR_ARG, MAC_PR_ARG(aa), __entry->pmk_len,
+ NETDEV_PR_ARG, __entry->aa, __entry->pmk_len,
__print_array(__get_dynamic_array(pmk),
__get_dynamic_array_len(pmk), 1),
__entry->pmk_r0_name_len ?
@@ -2515,8 +2511,8 @@ TRACE_EVENT(rdev_del_pmk,
MAC_ASSIGN(aa, aa);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(aa))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->aa)
);
TRACE_EVENT(rdev_external_auth,
@@ -2528,6 +2524,7 @@ TRACE_EVENT(rdev_external_auth,
MAC_ENTRY(bssid)
__array(u8, ssid, IEEE80211_MAX_SSID_LEN + 1)
__field(u16, status)
+ MAC_ENTRY(mld_addr)
),
TP_fast_assign(WIPHY_ASSIGN;
NETDEV_ASSIGN;
@@ -2536,10 +2533,12 @@ TRACE_EVENT(rdev_external_auth,
memcpy(__entry->ssid, params->ssid.ssid,
params->ssid.ssid_len);
__entry->status = params->status;
+ MAC_ASSIGN(mld_addr, params->mld_addr);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: " MAC_PR_FMT
- ", ssid: %s, status: %u", WIPHY_PR_ARG, NETDEV_PR_ARG,
- __entry->bssid, __entry->ssid, __entry->status)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", bssid: %pM"
+ ", ssid: %s, status: %u, mld_addr: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->bssid,
+ __entry->ssid, __entry->status, __entry->mld_addr)
);
TRACE_EVENT(rdev_start_radar_detection,
@@ -2720,8 +2719,8 @@ TRACE_EVENT(rdev_update_owe_info,
__entry->status = owe_info->status;
memcpy(__get_dynamic_array(ie),
owe_info->ie, owe_info->ie_len);),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT
- " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM"
+ " status %d", WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
__entry->status)
);
@@ -2739,8 +2738,8 @@ TRACE_EVENT(rdev_probe_mesh_link,
NETDEV_ASSIGN;
MAC_ASSIGN(dest, dest);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(dest))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->dest)
);
TRACE_EVENT(rdev_set_tid_config,
@@ -2757,8 +2756,8 @@ TRACE_EVENT(rdev_set_tid_config,
NETDEV_ASSIGN;
MAC_ASSIGN(peer, tid_conf->peer);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer)
);
TRACE_EVENT(rdev_reset_tid_config,
@@ -2777,8 +2776,8 @@ TRACE_EVENT(rdev_reset_tid_config,
MAC_ASSIGN(peer, peer);
__entry->tids = tids;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", tids: 0x%x",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tids)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, tids: 0x%x",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->tids)
);
TRACE_EVENT(rdev_set_sar_specs,
@@ -2881,8 +2880,8 @@ DECLARE_EVENT_CLASS(cfg80211_netdev_mac_evt,
NETDEV_ASSIGN;
MAC_ASSIGN(macaddr, macaddr);
),
- TP_printk(NETDEV_PR_FMT ", mac: " MAC_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(macaddr))
+ TP_printk(NETDEV_PR_FMT ", mac: %pM",
+ NETDEV_PR_ARG, __entry->macaddr)
);
DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_notify_new_peer_candidate,
@@ -2920,8 +2919,8 @@ TRACE_EVENT(cfg80211_send_rx_assoc,
MAC_ASSIGN(ap_addr,
data->ap_mld_addr ?: data->links[0].bss->bssid);
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(ap_addr))
+ TP_printk(NETDEV_PR_FMT ", %pM",
+ NETDEV_PR_ARG, __entry->ap_addr)
);
DECLARE_EVENT_CLASS(netdev_frame_event,
@@ -2981,8 +2980,8 @@ DECLARE_EVENT_CLASS(netdev_mac_evt,
NETDEV_ASSIGN;
MAC_ASSIGN(mac, mac)
),
- TP_printk(NETDEV_PR_FMT ", mac: " MAC_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(mac))
+ TP_printk(NETDEV_PR_FMT ", mac: %pM",
+ NETDEV_PR_ARG, __entry->mac)
);
DEFINE_EVENT(netdev_mac_evt, cfg80211_send_auth_timeout,
@@ -3004,8 +3003,8 @@ TRACE_EVENT(cfg80211_send_assoc_failure,
MAC_ASSIGN(ap_addr, data->ap_mld_addr ?: data->bss[0]->bssid);
__entry->timeout = data->timeout;
),
- TP_printk(NETDEV_PR_FMT ", mac: " MAC_PR_FMT ", timeout: %d",
- NETDEV_PR_ARG, MAC_PR_ARG(ap_addr), __entry->timeout)
+ TP_printk(NETDEV_PR_FMT ", mac: %pM, timeout: %d",
+ NETDEV_PR_ARG, __entry->ap_addr, __entry->timeout)
);
TRACE_EVENT(cfg80211_michael_mic_failure,
@@ -3027,8 +3026,8 @@ TRACE_EVENT(cfg80211_michael_mic_failure,
if (tsc)
memcpy(__entry->tsc, tsc, 6);
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT ", key type: %d, key id: %d, tsc: %pm",
- NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->key_type,
+ TP_printk(NETDEV_PR_FMT ", %pM, key type: %d, key id: %d, tsc: %pm",
+ NETDEV_PR_ARG, __entry->addr, __entry->key_type,
__entry->key_id, __entry->tsc)
);
@@ -3104,8 +3103,8 @@ TRACE_EVENT(cfg80211_new_sta,
MAC_ASSIGN(mac_addr, mac_addr);
SINFO_ASSIGN;
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(mac_addr))
+ TP_printk(NETDEV_PR_FMT ", %pM",
+ NETDEV_PR_ARG, __entry->mac_addr)
);
DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_del_sta,
@@ -3182,8 +3181,8 @@ TRACE_EVENT(cfg80211_rx_control_port,
__entry->proto = be16_to_cpu(skb->protocol);
__entry->unencrypted = unencrypted;
),
- TP_printk(NETDEV_PR_FMT ", len=%d, " MAC_PR_FMT ", proto: 0x%x, unencrypted: %s",
- NETDEV_PR_ARG, __entry->len, MAC_PR_ARG(from),
+ TP_printk(NETDEV_PR_FMT ", len=%d, %pM, proto: 0x%x, unencrypted: %s",
+ NETDEV_PR_ARG, __entry->len, __entry->from,
__entry->proto, BOOL_TO_STR(__entry->unencrypted))
);
@@ -3245,39 +3244,47 @@ TRACE_EVENT(cfg80211_chandef_dfs_required,
TRACE_EVENT(cfg80211_ch_switch_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id),
- TP_ARGS(netdev, chandef, link_id),
+ unsigned int link_id,
+ u16 punct_bitmap),
+ TP_ARGS(netdev, chandef, link_id, punct_bitmap),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
+ __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
+ __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
+ __entry->punct_bitmap)
);
TRACE_EVENT(cfg80211_ch_switch_started_notify,
TP_PROTO(struct net_device *netdev,
struct cfg80211_chan_def *chandef,
- unsigned int link_id),
- TP_ARGS(netdev, chandef, link_id),
+ unsigned int link_id,
+ u16 punct_bitmap),
+ TP_ARGS(netdev, chandef, link_id, punct_bitmap),
TP_STRUCT__entry(
NETDEV_ENTRY
CHAN_DEF_ENTRY
__field(unsigned int, link_id)
+ __field(u16, punct_bitmap)
),
TP_fast_assign(
NETDEV_ASSIGN;
CHAN_DEF_ASSIGN(chandef);
__entry->link_id = link_id;
+ __entry->punct_bitmap = punct_bitmap;
),
- TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d",
- NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id)
+ TP_printk(NETDEV_PR_FMT ", " CHAN_DEF_PR_FMT ", link:%d, punct_bitmap:%u",
+ NETDEV_PR_ARG, CHAN_DEF_PR_ARG, __entry->link_id,
+ __entry->punct_bitmap)
);
TRACE_EVENT(cfg80211_radar_event,
@@ -3324,7 +3331,7 @@ DECLARE_EVENT_CLASS(cfg80211_rx_evt,
NETDEV_ASSIGN;
MAC_ASSIGN(addr, addr);
),
- TP_printk(NETDEV_PR_FMT ", " MAC_PR_FMT, NETDEV_PR_ARG, MAC_PR_ARG(addr))
+ TP_printk(NETDEV_PR_FMT ", %pM", NETDEV_PR_ARG, __entry->addr)
);
DEFINE_EVENT(cfg80211_rx_evt, cfg80211_rx_spurious_frame,
@@ -3351,8 +3358,8 @@ TRACE_EVENT(cfg80211_ibss_joined,
MAC_ASSIGN(bssid, bssid);
CHAN_ASSIGN(channel);
),
- TP_printk(NETDEV_PR_FMT ", bssid: " MAC_PR_FMT ", " CHAN_PR_FMT,
- NETDEV_PR_ARG, MAC_PR_ARG(bssid), CHAN_PR_ARG)
+ TP_printk(NETDEV_PR_FMT ", bssid: %pM, " CHAN_PR_FMT,
+ NETDEV_PR_ARG, __entry->bssid, CHAN_PR_ARG)
);
TRACE_EVENT(cfg80211_probe_status,
@@ -3371,8 +3378,8 @@ TRACE_EVENT(cfg80211_probe_status,
__entry->cookie = cookie;
__entry->acked = acked;
),
- TP_printk(NETDEV_PR_FMT " addr:" MAC_PR_FMT ", cookie: %llu, acked: %s",
- NETDEV_PR_ARG, MAC_PR_ARG(addr), __entry->cookie,
+ TP_printk(NETDEV_PR_FMT " addr:%pM, cookie: %llu, acked: %s",
+ NETDEV_PR_ARG, __entry->addr, __entry->cookie,
BOOL_TO_STR(__entry->acked))
);
@@ -3389,8 +3396,8 @@ TRACE_EVENT(cfg80211_cqm_pktloss_notify,
MAC_ASSIGN(peer, peer);
__entry->num_packets = num_packets;
),
- TP_printk(NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", num of lost packets: %u",
- NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->num_packets)
+ TP_printk(NETDEV_PR_FMT ", peer: %pM, num of lost packets: %u",
+ NETDEV_PR_ARG, __entry->peer, __entry->num_packets)
);
DEFINE_EVENT(cfg80211_netdev_mac_evt, cfg80211_gtk_rekey_notify,
@@ -3414,8 +3421,8 @@ TRACE_EVENT(cfg80211_pmksa_candidate_notify,
MAC_ASSIGN(bssid, bssid);
__entry->preauth = preauth;
),
- TP_printk(NETDEV_PR_FMT ", index:%d, bssid: " MAC_PR_FMT ", pre auth: %s",
- NETDEV_PR_ARG, __entry->index, MAC_PR_ARG(bssid),
+ TP_printk(NETDEV_PR_FMT ", index:%d, bssid: %pM, pre auth: %s",
+ NETDEV_PR_ARG, __entry->index, __entry->bssid,
BOOL_TO_STR(__entry->preauth))
);
@@ -3455,8 +3462,8 @@ TRACE_EVENT(cfg80211_tdls_oper_request,
__entry->oper = oper;
__entry->reason_code = reason_code;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT ", oper: %d, reason_code %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->oper,
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM, oper: %d, reason_code %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer, __entry->oper,
__entry->reason_code)
);
@@ -3494,10 +3501,10 @@ TRACE_EVENT(cfg80211_scan_done,
MAC_ASSIGN(tsf_bssid, info->tsf_bssid);
}
),
- TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: " MAC_PR_FMT,
+ TP_printk("aborted: %s, scan start (TSF): %llu, tsf_bssid: %pM",
BOOL_TO_STR(__entry->aborted),
(unsigned long long)__entry->scan_start_tsf,
- MAC_PR_ARG(tsf_bssid))
+ __entry->tsf_bssid)
);
DECLARE_EVENT_CLASS(wiphy_id_evt,
@@ -3546,9 +3553,9 @@ TRACE_EVENT(cfg80211_get_bss,
__entry->bss_type = bss_type;
__entry->privacy = privacy;
),
- TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT ", %pM"
", buf: %#.2x, bss_type: %d, privacy: %d",
- WIPHY_PR_ARG, CHAN_PR_ARG, MAC_PR_ARG(bssid),
+ WIPHY_PR_ARG, CHAN_PR_ARG, __entry->bssid,
((u8 *)__get_dynamic_array(ssid))[0], __entry->bss_type,
__entry->privacy)
);
@@ -3579,11 +3586,11 @@ TRACE_EVENT(cfg80211_inform_bss_frame,
MAC_ASSIGN(parent_bssid, data->parent_bssid);
),
TP_printk(WIPHY_PR_FMT ", " CHAN_PR_FMT
- "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: "
- MAC_PR_FMT, WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
+ "(scan_width: %d) signal: %d, tsb:%llu, detect_tsf:%llu, tsf_bssid: %pM",
+ WIPHY_PR_ARG, CHAN_PR_ARG, __entry->scan_width,
__entry->signal, (unsigned long long)__entry->ts_boottime,
(unsigned long long)__entry->parent_tsf,
- MAC_PR_ARG(parent_bssid))
+ __entry->parent_bssid)
);
DECLARE_EVENT_CLASS(cfg80211_bss_evt,
@@ -3597,7 +3604,7 @@ DECLARE_EVENT_CLASS(cfg80211_bss_evt,
MAC_ASSIGN(bssid, pub->bssid);
CHAN_ASSIGN(pub->channel);
),
- TP_printk(MAC_PR_FMT ", " CHAN_PR_FMT, MAC_PR_ARG(bssid), CHAN_PR_ARG)
+ TP_printk("%pM, " CHAN_PR_FMT, __entry->bssid, CHAN_PR_ARG)
);
DEFINE_EVENT(cfg80211_bss_evt, cfg80211_return_bss,
@@ -3689,8 +3696,8 @@ TRACE_EVENT(cfg80211_ft_event,
memcpy(__get_dynamic_array(ric_ies), ft_event->ric_ies,
ft_event->ric_ies_len);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(target_ap))
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", target_ap: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->target_ap)
);
TRACE_EVENT(cfg80211_stop_iface,
@@ -3724,10 +3731,10 @@ TRACE_EVENT(cfg80211_pmsr_report,
__entry->cookie = cookie;
MAC_ASSIGN(addr, addr);
),
- TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld, " MAC_PR_FMT,
+ TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", cookie:%lld, %pM",
WIPHY_PR_ARG, WDEV_PR_ARG,
(unsigned long long)__entry->cookie,
- MAC_PR_ARG(addr))
+ __entry->addr)
);
TRACE_EVENT(cfg80211_pmsr_complete,
@@ -3749,20 +3756,30 @@ TRACE_EVENT(cfg80211_pmsr_complete,
);
TRACE_EVENT(cfg80211_update_owe_info_event,
- TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
- struct cfg80211_update_owe_info *owe_info),
- TP_ARGS(wiphy, netdev, owe_info),
- TP_STRUCT__entry(WIPHY_ENTRY
- NETDEV_ENTRY
- MAC_ENTRY(peer)
- __dynamic_array(u8, ie, owe_info->ie_len)),
- TP_fast_assign(WIPHY_ASSIGN;
- NETDEV_ASSIGN;
- MAC_ASSIGN(peer, owe_info->peer);
- memcpy(__get_dynamic_array(ie), owe_info->ie,
- owe_info->ie_len);),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: " MAC_PR_FMT,
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
+ TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+ struct cfg80211_update_owe_info *owe_info),
+ TP_ARGS(wiphy, netdev, owe_info),
+ TP_STRUCT__entry(
+ WIPHY_ENTRY
+ NETDEV_ENTRY
+ MAC_ENTRY(peer)
+ __dynamic_array(u8, ie, owe_info->ie_len)
+ __field(int, assoc_link_id)
+ MAC_ENTRY(peer_mld_addr)
+ ),
+ TP_fast_assign(
+ WIPHY_ASSIGN;
+ NETDEV_ASSIGN;
+ MAC_ASSIGN(peer, owe_info->peer);
+ memcpy(__get_dynamic_array(ie), owe_info->ie,
+ owe_info->ie_len);
+ __entry->assoc_link_id = owe_info->assoc_link_id;
+ MAC_ASSIGN(peer_mld_addr, owe_info->peer_mld_addr);
+ ),
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", peer: %pM,"
+ " assoc_link_id: %d, peer_mld_addr: %pM",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->peer,
+ __entry->assoc_link_id, __entry->peer_mld_addr)
);
TRACE_EVENT(cfg80211_bss_color_notify,
@@ -3800,8 +3817,8 @@ TRACE_EVENT(cfg80211_assoc_comeback,
MAC_ASSIGN(ap_addr, ap_addr);
__entry->timeout = timeout;
),
- TP_printk(WDEV_PR_FMT ", " MAC_PR_FMT ", timeout: %u TUs",
- WDEV_PR_ARG, MAC_PR_ARG(ap_addr), __entry->timeout)
+ TP_printk(WDEV_PR_FMT ", %pM, timeout: %u TUs",
+ WDEV_PR_ARG, __entry->ap_addr, __entry->timeout)
);
DECLARE_EVENT_CLASS(link_station_add_mod,
@@ -3859,10 +3876,10 @@ DECLARE_EVENT_CLASS(link_station_add_mod,
memcpy(__get_dynamic_array(eht_capa), params->eht_capa,
params->eht_capa_len);
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
- ", link mac: " MAC_PR_FMT ", link id: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(mld_mac),
- MAC_PR_ARG(link_mac), __entry->link_id)
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
+ ", link mac: %pM, link id: %u",
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
+ __entry->link_mac, __entry->link_id)
);
DEFINE_EVENT(link_station_add_mod, rdev_add_link_station,
@@ -3895,9 +3912,9 @@ TRACE_EVENT(rdev_del_link_station,
memcpy(__entry->mld_mac, params->mld_mac, 6);
__entry->link_id = params->link_id;
),
- TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: " MAC_PR_FMT
+ TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", station mac: %pM"
", link id: %u",
- WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(mld_mac),
+ WIPHY_PR_ARG, NETDEV_PR_ARG, __entry->mld_mac,
__entry->link_id)
);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 8f403f9fe816..d1a89e82ead0 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -542,6 +542,64 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr)
}
EXPORT_SYMBOL(ieee80211_get_mesh_hdrlen);
+bool ieee80211_get_8023_tunnel_proto(const void *hdr, __be16 *proto)
+{
+ const __be16 *hdr_proto = hdr + ETH_ALEN;
+
+ if (!(ether_addr_equal(hdr, rfc1042_header) &&
+ *hdr_proto != htons(ETH_P_AARP) &&
+ *hdr_proto != htons(ETH_P_IPX)) &&
+ !ether_addr_equal(hdr, bridge_tunnel_header))
+ return false;
+
+ *proto = *hdr_proto;
+
+ return true;
+}
+EXPORT_SYMBOL(ieee80211_get_8023_tunnel_proto);
+
+int ieee80211_strip_8023_mesh_hdr(struct sk_buff *skb)
+{
+ const void *mesh_addr;
+ struct {
+ struct ethhdr eth;
+ u8 flags;
+ } payload;
+ int hdrlen;
+ int ret;
+
+ ret = skb_copy_bits(skb, 0, &payload, sizeof(payload));
+ if (ret)
+ return ret;
+
+ hdrlen = sizeof(payload.eth) + __ieee80211_get_mesh_hdrlen(payload.flags);
+
+ if (likely(pskb_may_pull(skb, hdrlen + 8) &&
+ ieee80211_get_8023_tunnel_proto(skb->data + hdrlen,
+ &payload.eth.h_proto)))
+ hdrlen += ETH_ALEN + 2;
+ else if (!pskb_may_pull(skb, hdrlen))
+ return -EINVAL;
+
+ mesh_addr = skb->data + sizeof(payload.eth) + ETH_ALEN;
+ switch (payload.flags & MESH_FLAGS_AE) {
+ case MESH_FLAGS_AE_A4:
+ memcpy(&payload.eth.h_source, mesh_addr, ETH_ALEN);
+ break;
+ case MESH_FLAGS_AE_A5_A6:
+ memcpy(&payload.eth, mesh_addr, 2 * ETH_ALEN);
+ break;
+ default:
+ break;
+ }
+
+ pskb_pull(skb, hdrlen - sizeof(payload.eth));
+ memcpy(skb->data, &payload.eth, sizeof(payload.eth));
+
+ return 0;
+}
+EXPORT_SYMBOL(ieee80211_strip_8023_mesh_hdr);
+
int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
const u8 *addr, enum nl80211_iftype iftype,
u8 data_offset, bool is_amsdu)
@@ -553,7 +611,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
} payload;
struct ethhdr tmp;
u16 hdrlen;
- u8 mesh_flags = 0;
if (unlikely(!ieee80211_is_data_present(hdr->frame_control)))
return -1;
@@ -574,12 +631,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
memcpy(tmp.h_dest, ieee80211_get_DA(hdr), ETH_ALEN);
memcpy(tmp.h_source, ieee80211_get_SA(hdr), ETH_ALEN);
- if (iftype == NL80211_IFTYPE_MESH_POINT &&
- skb_copy_bits(skb, hdrlen, &mesh_flags, 1) < 0)
- return -1;
-
- mesh_flags &= MESH_FLAGS_AE;
-
switch (hdr->frame_control &
cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS)) {
case cpu_to_le16(IEEE80211_FCTL_TODS):
@@ -593,17 +644,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
iftype != NL80211_IFTYPE_AP_VLAN &&
iftype != NL80211_IFTYPE_STATION))
return -1;
- if (iftype == NL80211_IFTYPE_MESH_POINT) {
- if (mesh_flags == MESH_FLAGS_AE_A4)
- return -1;
- if (mesh_flags == MESH_FLAGS_AE_A5_A6 &&
- skb_copy_bits(skb, hdrlen +
- offsetof(struct ieee80211s_hdr, eaddr1),
- tmp.h_dest, 2 * ETH_ALEN) < 0)
- return -1;
-
- hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
- }
break;
case cpu_to_le16(IEEE80211_FCTL_FROMDS):
if ((iftype != NL80211_IFTYPE_STATION &&
@@ -612,16 +652,6 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
(is_multicast_ether_addr(tmp.h_dest) &&
ether_addr_equal(tmp.h_source, addr)))
return -1;
- if (iftype == NL80211_IFTYPE_MESH_POINT) {
- if (mesh_flags == MESH_FLAGS_AE_A5_A6)
- return -1;
- if (mesh_flags == MESH_FLAGS_AE_A4 &&
- skb_copy_bits(skb, hdrlen +
- offsetof(struct ieee80211s_hdr, eaddr1),
- tmp.h_source, ETH_ALEN) < 0)
- return -1;
- hdrlen += __ieee80211_get_mesh_hdrlen(mesh_flags);
- }
break;
case cpu_to_le16(0):
if (iftype != NL80211_IFTYPE_ADHOC &&
@@ -631,15 +661,11 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr,
break;
}
- if (likely(skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
- ((!is_amsdu && ether_addr_equal(payload.hdr, rfc1042_header) &&
- payload.proto != htons(ETH_P_AARP) &&
- payload.proto != htons(ETH_P_IPX)) ||
- ether_addr_equal(payload.hdr, bridge_tunnel_header)))) {
- /* remove RFC1042 or Bridge-Tunnel encapsulation and
- * replace EtherType */
+ if (likely(!is_amsdu && iftype != NL80211_IFTYPE_MESH_POINT &&
+ skb_copy_bits(skb, hdrlen, &payload, sizeof(payload)) == 0 &&
+ ieee80211_get_8023_tunnel_proto(&payload, &tmp.h_proto))) {
+ /* remove RFC1042 or Bridge-Tunnel encapsulation */
hdrlen += ETH_ALEN + 2;
- tmp.h_proto = payload.proto;
skb_postpull_rcsum(skb, &payload, ETH_ALEN + 2);
} else {
tmp.h_proto = htons(skb->len - hdrlen);
@@ -711,7 +737,8 @@ __ieee80211_amsdu_copy_frag(struct sk_buff *skb, struct sk_buff *frame,
static struct sk_buff *
__ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
- int offset, int len, bool reuse_frag)
+ int offset, int len, bool reuse_frag,
+ int min_len)
{
struct sk_buff *frame;
int cur_len = len;
@@ -725,7 +752,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
* in the stack later.
*/
if (reuse_frag)
- cur_len = min_t(int, len, 32);
+ cur_len = min_t(int, len, min_len);
/*
* Allocate and reserve two bytes more for payload
@@ -735,6 +762,7 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
if (!frame)
return NULL;
+ frame->priority = skb->priority;
skb_reserve(frame, hlen + sizeof(struct ethhdr) + 2);
skb_copy_bits(skb, offset, skb_put(frame, cur_len), cur_len);
@@ -748,28 +776,72 @@ __ieee80211_amsdu_copy(struct sk_buff *skb, unsigned int hlen,
return frame;
}
+bool ieee80211_is_valid_amsdu(struct sk_buff *skb, bool mesh_hdr)
+{
+ int offset = 0, remaining, subframe_len, padding;
+
+ for (offset = 0; offset < skb->len; offset += subframe_len + padding) {
+ struct {
+ __be16 len;
+ u8 mesh_flags;
+ } hdr;
+ u16 len;
+
+ if (skb_copy_bits(skb, offset + 2 * ETH_ALEN, &hdr, sizeof(hdr)) < 0)
+ return false;
+
+ if (mesh_hdr)
+ len = le16_to_cpu(*(__le16 *)&hdr.len) +
+ __ieee80211_get_mesh_hdrlen(hdr.mesh_flags);
+ else
+ len = ntohs(hdr.len);
+
+ subframe_len = sizeof(struct ethhdr) + len;
+ padding = (4 - subframe_len) & 0x3;
+ remaining = skb->len - offset;
+
+ if (subframe_len > remaining)
+ return false;
+ }
+
+ return true;
+}
+EXPORT_SYMBOL(ieee80211_is_valid_amsdu);
+
void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
const u8 *addr, enum nl80211_iftype iftype,
const unsigned int extra_headroom,
- const u8 *check_da, const u8 *check_sa)
+ const u8 *check_da, const u8 *check_sa,
+ bool mesh_control)
{
unsigned int hlen = ALIGN(extra_headroom, 4);
struct sk_buff *frame = NULL;
- u16 ethertype;
- u8 *payload;
int offset = 0, remaining;
- struct ethhdr eth;
+ struct {
+ struct ethhdr eth;
+ uint8_t flags;
+ } hdr;
bool reuse_frag = skb->head_frag && !skb_has_frag_list(skb);
bool reuse_skb = false;
bool last = false;
+ int copy_len = sizeof(hdr.eth);
+
+ if (iftype == NL80211_IFTYPE_MESH_POINT)
+ copy_len = sizeof(hdr);
while (!last) {
unsigned int subframe_len;
- int len;
+ int len, mesh_len = 0;
u8 padding;
- skb_copy_bits(skb, offset, &eth, sizeof(eth));
- len = ntohs(eth.h_proto);
+ skb_copy_bits(skb, offset, &hdr, copy_len);
+ if (iftype == NL80211_IFTYPE_MESH_POINT)
+ mesh_len = __ieee80211_get_mesh_hdrlen(hdr.flags);
+ if (mesh_control)
+ len = le16_to_cpu(*(__le16 *)&hdr.eth.h_proto) + mesh_len;
+ else
+ len = ntohs(hdr.eth.h_proto);
+
subframe_len = sizeof(struct ethhdr) + len;
padding = (4 - subframe_len) & 0x3;
@@ -778,16 +850,16 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
if (subframe_len > remaining)
goto purge;
/* mitigate A-MSDU aggregation injection attacks */
- if (ether_addr_equal(eth.h_dest, rfc1042_header))
+ if (ether_addr_equal(hdr.eth.h_dest, rfc1042_header))
goto purge;
offset += sizeof(struct ethhdr);
last = remaining <= subframe_len + padding;
/* FIXME: should we really accept multicast DA? */
- if ((check_da && !is_multicast_ether_addr(eth.h_dest) &&
- !ether_addr_equal(check_da, eth.h_dest)) ||
- (check_sa && !ether_addr_equal(check_sa, eth.h_source))) {
+ if ((check_da && !is_multicast_ether_addr(hdr.eth.h_dest) &&
+ !ether_addr_equal(check_da, hdr.eth.h_dest)) ||
+ (check_sa && !ether_addr_equal(check_sa, hdr.eth.h_source))) {
offset += len + padding;
continue;
}
@@ -799,7 +871,7 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
reuse_skb = true;
} else {
frame = __ieee80211_amsdu_copy(skb, hlen, offset, len,
- reuse_frag);
+ reuse_frag, 32 + mesh_len);
if (!frame)
goto purge;
@@ -810,16 +882,11 @@ void ieee80211_amsdu_to_8023s(struct sk_buff *skb, struct sk_buff_head *list,
frame->dev = skb->dev;
frame->priority = skb->priority;
- payload = frame->data;
- ethertype = (payload[6] << 8) | payload[7];
- if (likely((ether_addr_equal(payload, rfc1042_header) &&
- ethertype != ETH_P_AARP && ethertype != ETH_P_IPX) ||
- ether_addr_equal(payload, bridge_tunnel_header))) {
- eth.h_proto = htons(ethertype);
+ if (likely(iftype != NL80211_IFTYPE_MESH_POINT &&
+ ieee80211_get_8023_tunnel_proto(frame->data, &hdr.eth.h_proto)))
skb_pull(frame, ETH_ALEN + 2);
- }
- memcpy(skb_push(frame, sizeof(eth)), &eth, sizeof(eth));
+ memcpy(skb_push(frame, sizeof(hdr.eth)), &hdr.eth, sizeof(hdr.eth));
__skb_queue_tail(list, frame);
}
@@ -934,7 +1001,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev)
if (!wdev->connect_keys)
return;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++) {
+ for (i = 0; i < 4; i++) {
if (!wdev->connect_keys->params[i].cipher)
continue;
if (rdev_add_key(rdev, dev, -1, i, false, NULL,
diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c
index 8a24dfca75af..e3acfac7430a 100644
--- a/net/wireless/wext-compat.c
+++ b/net/wireless/wext-compat.c
@@ -439,7 +439,7 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev,
GFP_KERNEL);
if (!wdev->wext.keys)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
wdev->wext.keys->params[i].key =
wdev->wext.keys->data[i];
}
diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c
index fe8765c4075d..a125fd1fa134 100644
--- a/net/wireless/wext-core.c
+++ b/net/wireless/wext-core.c
@@ -636,7 +636,15 @@ void wireless_send_event(struct net_device * dev,
}
EXPORT_SYMBOL(wireless_send_event);
+#ifdef CONFIG_CFG80211_WEXT
+static void wireless_warn_cfg80211_wext(void)
+{
+ char name[sizeof(current->comm)];
+ pr_warn_once("warning: `%s' uses wireless extensions which will stop working for Wi-Fi 7 hardware; use nl80211\n",
+ get_task_comm(name, current));
+}
+#endif
/* IW handlers */
@@ -652,8 +660,12 @@ struct iw_statistics *get_wireless_stats(struct net_device *dev)
if (dev->ieee80211_ptr &&
dev->ieee80211_ptr->wiphy &&
dev->ieee80211_ptr->wiphy->wext &&
- dev->ieee80211_ptr->wiphy->wext->get_wireless_stats)
+ dev->ieee80211_ptr->wiphy->wext->get_wireless_stats) {
+ wireless_warn_cfg80211_wext();
+ if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ return NULL;
return dev->ieee80211_ptr->wiphy->wext->get_wireless_stats(dev);
+ }
#endif
/* not found */
@@ -690,8 +702,12 @@ static iw_handler get_handler(struct net_device *dev, unsigned int cmd)
const struct iw_handler_def *handlers = NULL;
#ifdef CONFIG_CFG80211_WEXT
- if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy)
+ if (dev->ieee80211_ptr && dev->ieee80211_ptr->wiphy) {
+ wireless_warn_cfg80211_wext();
+ if (dev->ieee80211_ptr->wiphy->flags & WIPHY_FLAG_SUPPORTS_MLO)
+ return NULL;
handlers = dev->ieee80211_ptr->wiphy->wext;
+ }
#endif
#ifdef CONFIG_WIRELESS_EXT
if (dev->wireless_handlers)
diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c
index 191c6d98c700..f231207ca210 100644
--- a/net/wireless/wext-sme.c
+++ b/net/wireless/wext-sme.c
@@ -47,7 +47,7 @@ int cfg80211_mgd_wext_connect(struct cfg80211_registered_device *rdev,
ck = kmemdup(wdev->wext.keys, sizeof(*ck), GFP_KERNEL);
if (!ck)
return -ENOMEM;
- for (i = 0; i < CFG80211_MAX_WEP_KEYS; i++)
+ for (i = 0; i < 4; i++)
ck->params[i].key = ck->data[i];
}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 3b55502b2965..5c7ad301d742 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog)
int rc = -EOPNOTSUPP;
lock_sock(sk);
+ if (sock->state != SS_UNCONNECTED) {
+ rc = -EINVAL;
+ release_sock(sk);
+ return rc;
+ }
+
if (sk->sk_state != TCP_LISTEN) {
memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN);
sk->sk_max_ack_backlog = backlog;
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f0561b67c12..2ac58b282b5e 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -22,6 +22,7 @@
#include <linux/net.h>
#include <linux/netdevice.h>
#include <linux/rculist.h>
+#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include <net/busy_poll.h>
#include <net/xdp.h>
@@ -511,7 +512,7 @@ static struct sk_buff *xsk_build_skb(struct xdp_sock *xs,
return skb;
}
-static int xsk_generic_xmit(struct sock *sk)
+static int __xsk_generic_xmit(struct sock *sk)
{
struct xdp_sock *xs = xdp_sk(sk);
u32 max_batch = TX_BATCH_SIZE;
@@ -594,22 +595,13 @@ out:
return err;
}
-static int xsk_xmit(struct sock *sk)
+static int xsk_generic_xmit(struct sock *sk)
{
- struct xdp_sock *xs = xdp_sk(sk);
int ret;
- if (unlikely(!(xs->dev->flags & IFF_UP)))
- return -ENETDOWN;
- if (unlikely(!xs->tx))
- return -ENOBUFS;
-
- if (xs->zc)
- return xsk_wakeup(xs, XDP_WAKEUP_TX);
-
/* Drop the RCU lock since the SKB path might sleep. */
rcu_read_unlock();
- ret = xsk_generic_xmit(sk);
+ ret = __xsk_generic_xmit(sk);
/* Reaquire RCU lock before going into common code. */
rcu_read_lock();
@@ -627,17 +619,31 @@ static bool xsk_no_wakeup(struct sock *sk)
#endif
}
+static int xsk_check_common(struct xdp_sock *xs)
+{
+ if (unlikely(!xsk_is_bound(xs)))
+ return -ENXIO;
+ if (unlikely(!(xs->dev->flags & IFF_UP)))
+ return -ENETDOWN;
+
+ return 0;
+}
+
static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len)
{
bool need_wait = !(m->msg_flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
struct xsk_buff_pool *pool;
+ int err;
- if (unlikely(!xsk_is_bound(xs)))
- return -ENXIO;
+ err = xsk_check_common(xs);
+ if (err)
+ return err;
if (unlikely(need_wait))
return -EOPNOTSUPP;
+ if (unlikely(!xs->tx))
+ return -ENOBUFS;
if (sk_can_busy_loop(sk)) {
if (xs->zc)
@@ -649,8 +655,11 @@ static int __xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len
return 0;
pool = xs->pool;
- if (pool->cached_need_wakeup & XDP_WAKEUP_TX)
- return xsk_xmit(sk);
+ if (pool->cached_need_wakeup & XDP_WAKEUP_TX) {
+ if (xs->zc)
+ return xsk_wakeup(xs, XDP_WAKEUP_TX);
+ return xsk_generic_xmit(sk);
+ }
return 0;
}
@@ -670,11 +679,11 @@ static int __xsk_recvmsg(struct socket *sock, struct msghdr *m, size_t len, int
bool need_wait = !(flags & MSG_DONTWAIT);
struct sock *sk = sock->sk;
struct xdp_sock *xs = xdp_sk(sk);
+ int err;
- if (unlikely(!xsk_is_bound(xs)))
- return -ENXIO;
- if (unlikely(!(xs->dev->flags & IFF_UP)))
- return -ENETDOWN;
+ err = xsk_check_common(xs);
+ if (err)
+ return err;
if (unlikely(!xs->rx))
return -ENOBUFS;
if (unlikely(need_wait))
@@ -713,21 +722,20 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock,
sock_poll_wait(file, sock, wait);
rcu_read_lock();
- if (unlikely(!xsk_is_bound(xs))) {
- rcu_read_unlock();
- return mask;
- }
+ if (xsk_check_common(xs))
+ goto skip_tx;
pool = xs->pool;
if (pool->cached_need_wakeup) {
if (xs->zc)
xsk_wakeup(xs, pool->cached_need_wakeup);
- else
+ else if (xs->tx)
/* Poll needs to drive Tx also in copy mode */
- xsk_xmit(sk);
+ xsk_generic_xmit(sk);
}
+skip_tx:
if (xs->rx && !xskq_prod_is_empty(xs->rx))
mask |= EPOLLIN | EPOLLRDNORM;
if (xs->tx && xsk_tx_writeable(xs))
@@ -845,7 +853,6 @@ static int xsk_release(struct socket *sock)
sock_orphan(sk);
sock->sk = NULL;
- sk_refcnt_debug_release(sk);
sock_put(sk);
return 0;
@@ -1295,8 +1302,6 @@ static int xsk_mmap(struct file *file, struct socket *sock,
unsigned long size = vma->vm_end - vma->vm_start;
struct xdp_sock *xs = xdp_sk(sock->sk);
struct xsk_queue *q = NULL;
- unsigned long pfn;
- struct page *qpg;
if (READ_ONCE(xs->state) != XSK_READY)
return -EBUSY;
@@ -1319,13 +1324,10 @@ static int xsk_mmap(struct file *file, struct socket *sock,
/* Matches the smp_wmb() in xsk_init_queue */
smp_rmb();
- qpg = virt_to_head_page(q->ring);
- if (size > page_size(qpg))
+ if (size > q->ring_vmalloc_size)
return -EINVAL;
- pfn = virt_to_phys(q->ring) >> PAGE_SHIFT;
- return remap_pfn_range(vma, vma->vm_start, pfn,
- size, vma->vm_page_prot);
+ return remap_vmalloc_range(vma, q->ring, 0);
}
static int xsk_notifier(struct notifier_block *this,
@@ -1396,8 +1398,6 @@ static void xsk_destruct(struct sock *sk)
if (!xp_put_pool(xs->pool))
xdp_put_umem(xs->umem, !xs->pool);
-
- sk_refcnt_debug_dec(sk);
}
static int xsk_create(struct net *net, struct socket *sock, int protocol,
@@ -1427,7 +1427,6 @@ static int xsk_create(struct net *net, struct socket *sock, int protocol,
sk->sk_family = PF_XDP;
sk->sk_destruct = xsk_destruct;
- sk_refcnt_debug_inc(sk);
sock_set_flag(sk, SOCK_RCU_FREE);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index ed6c71826d31..b2df1e0f8153 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -140,6 +140,10 @@ static void xp_disable_drv_zc(struct xsk_buff_pool *pool)
}
}
+#define NETDEV_XDP_ACT_ZC (NETDEV_XDP_ACT_BASIC | \
+ NETDEV_XDP_ACT_REDIRECT | \
+ NETDEV_XDP_ACT_XSK_ZEROCOPY)
+
int xp_assign_dev(struct xsk_buff_pool *pool,
struct net_device *netdev, u16 queue_id, u16 flags)
{
@@ -178,8 +182,7 @@ int xp_assign_dev(struct xsk_buff_pool *pool,
/* For copy-mode, we are done. */
return 0;
- if (!netdev->netdev_ops->ndo_bpf ||
- !netdev->netdev_ops->ndo_xsk_wakeup) {
+ if ((netdev->xdp_features & NETDEV_XDP_ACT_ZC) != NETDEV_XDP_ACT_ZC) {
err = -EOPNOTSUPP;
goto err_unreg_pool;
}
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 6cf9586e5027..f8905400ee07 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -6,6 +6,7 @@
#include <linux/log2.h>
#include <linux/slab.h>
#include <linux/overflow.h>
+#include <linux/vmalloc.h>
#include <net/xdp_sock_drv.h>
#include "xsk_queue.h"
@@ -23,7 +24,6 @@ static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
{
struct xsk_queue *q;
- gfp_t gfp_flags;
size_t size;
q = kzalloc(sizeof(*q), GFP_KERNEL);
@@ -33,17 +33,16 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue)
q->nentries = nentries;
q->ring_mask = nentries - 1;
- gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN |
- __GFP_COMP | __GFP_NORETRY;
size = xskq_get_ring_size(q, umem_queue);
+ size = PAGE_ALIGN(size);
- q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags,
- get_order(size));
+ q->ring = vmalloc_user(size);
if (!q->ring) {
kfree(q);
return NULL;
}
+ q->ring_vmalloc_size = size;
return q;
}
@@ -52,6 +51,6 @@ void xskq_destroy(struct xsk_queue *q)
if (!q)
return;
- page_frag_free(q->ring);
+ vfree(q->ring);
kfree(q);
}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index c6fb6b763658..bfb2a7e50c26 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -45,6 +45,7 @@ struct xsk_queue {
struct xdp_ring *ring;
u64 invalid_descs;
u64 queue_empty_descs;
+ size_t ring_vmalloc_size;
};
/* The structure of the shared state of the rings are a simple
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 74a54295c164..872b80188e83 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -6,6 +6,7 @@
#include <net/espintcp.h>
#include <linux/skmsg.h>
#include <net/inet_common.h>
+#include <trace/events/sock.h>
#if IS_ENABLED(CONFIG_IPV6)
#include <net/ipv6_stubs.h>
#endif
@@ -397,6 +398,8 @@ static void espintcp_data_ready(struct sock *sk)
{
struct espintcp_ctx *ctx = espintcp_getctx(sk);
+ trace_sk_data_ready(sk);
+
strp_data_ready(&ctx->strp);
}
diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c
index a0f62fa02e06..8cbf45a8bcdc 100644
--- a/net/xfrm/xfrm_compat.c
+++ b/net/xfrm/xfrm_compat.c
@@ -5,6 +5,7 @@
* Based on code and translator idea by: Florian Westphal <fw@strlen.de>
*/
#include <linux/compat.h>
+#include <linux/nospec.h>
#include <linux/xfrm.h>
#include <net/xfrm.h>
@@ -302,7 +303,7 @@ static int xfrm_xlate64(struct sk_buff *dst, const struct nlmsghdr *nlh_src)
nla_for_each_attr(nla, attrs, len, remaining) {
int err;
- switch (type) {
+ switch (nlh_src->nlmsg_type) {
case XFRM_MSG_NEWSPDINFO:
err = xfrm_nla_cpy(dst, nla, nla_len(nla));
break;
@@ -437,6 +438,7 @@ static int xfrm_xlate32_attr(void *dst, const struct nlattr *nla,
NL_SET_ERR_MSG(extack, "Bad attribute");
return -EOPNOTSUPP;
}
+ type = array_index_nospec(type, XFRMA_MAX + 1);
if (nla_len(nla) < compat_policy[type].len) {
NL_SET_ERR_MSG(extack, "Attribute bad length");
return -EOPNOTSUPP;
diff --git a/net/xfrm/xfrm_device.c b/net/xfrm/xfrm_device.c
index 4aff76c6f12e..95f1436bf6a2 100644
--- a/net/xfrm/xfrm_device.c
+++ b/net/xfrm/xfrm_device.c
@@ -309,7 +309,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
else
xso->type = XFRM_DEV_OFFLOAD_CRYPTO;
- err = dev->xfrmdev_ops->xdo_dev_state_add(x);
+ err = dev->xfrmdev_ops->xdo_dev_state_add(x, extack);
if (err) {
xso->dev = NULL;
xso->dir = 0;
@@ -326,7 +326,7 @@ int xfrm_dev_state_add(struct net *net, struct xfrm_state *x,
*/
WARN_ON(err == -EOPNOTSUPP && is_packet_offload);
if (err != -EOPNOTSUPP || is_packet_offload) {
- NL_SET_ERR_MSG(extack, "Device failed to offload this state");
+ NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this state");
return err;
}
}
@@ -383,14 +383,14 @@ int xfrm_dev_policy_add(struct net *net, struct xfrm_policy *xp,
return -EINVAL;
}
- err = dev->xfrmdev_ops->xdo_dev_policy_add(xp);
+ err = dev->xfrmdev_ops->xdo_dev_policy_add(xp, extack);
if (err) {
xdo->dev = NULL;
xdo->real_dev = NULL;
xdo->type = XFRM_DEV_OFFLOAD_UNSPECIFIED;
xdo->dir = 0;
netdev_put(dev, &xdo->dev_tracker);
- NL_SET_ERR_MSG(extack, "Device failed to offload this policy");
+ NL_SET_ERR_MSG_WEAK(extack, "Device failed to offload this policy");
return err;
}
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index c06e54a10540..436d29640ac2 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -279,8 +279,7 @@ static int xfrm6_remove_tunnel_encap(struct xfrm_state *x, struct sk_buff *skb)
goto out;
if (x->props.flags & XFRM_STATE_DECAP_DSCP)
- ipv6_copy_dscp(ipv6_get_dsfield(ipv6_hdr(skb)),
- ipipv6_hdr(skb));
+ ipv6_copy_dscp(XFRM_MODE_SKB_CB(skb)->tos, ipipv6_hdr(skb));
if (!(x->props.flags & XFRM_STATE_NOECN))
ipip6_ecn_decapsulate(skb);
diff --git a/net/xfrm/xfrm_interface_bpf.c b/net/xfrm/xfrm_interface_bpf.c
index 1ef2162cebcf..d74f3fd20f2b 100644
--- a/net/xfrm/xfrm_interface_bpf.c
+++ b/net/xfrm/xfrm_interface_bpf.c
@@ -39,8 +39,7 @@ __diag_ignore_all("-Wmissing-prototypes",
* @to - Pointer to memory to which the metadata will be copied
* Cannot be NULL
*/
-__used noinline
-int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
+__bpf_kfunc int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
{
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
struct xfrm_md_info *info;
@@ -62,9 +61,7 @@ int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx, struct bpf_xfrm_info *to)
* @from - Pointer to memory from which the metadata will be copied
* Cannot be NULL
*/
-__used noinline
-int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
- const struct bpf_xfrm_info *from)
+__bpf_kfunc int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx, const struct bpf_xfrm_info *from)
{
struct sk_buff *skb = (struct sk_buff *)skb_ctx;
struct metadata_dst *md_dst;
diff --git a/net/xfrm/xfrm_interface_core.c b/net/xfrm/xfrm_interface_core.c
index 1f99dc469027..35279c220bd7 100644
--- a/net/xfrm/xfrm_interface_core.c
+++ b/net/xfrm/xfrm_interface_core.c
@@ -310,6 +310,52 @@ static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
skb->mark = 0;
}
+static int xfrmi_input(struct sk_buff *skb, int nexthdr, __be32 spi,
+ int encap_type, unsigned short family)
+{
+ struct sec_path *sp;
+
+ sp = skb_sec_path(skb);
+ if (sp && (sp->len || sp->olen) &&
+ !xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family))
+ goto discard;
+
+ XFRM_SPI_SKB_CB(skb)->family = family;
+ if (family == AF_INET) {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct iphdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip4 = NULL;
+ } else {
+ XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+ XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+ }
+
+ return xfrm_input(skb, nexthdr, spi, encap_type);
+discard:
+ kfree_skb(skb);
+ return 0;
+}
+
+static int xfrmi4_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, ip_hdr(skb)->protocol, 0, 0, AF_INET);
+}
+
+static int xfrmi6_rcv(struct sk_buff *skb)
+{
+ return xfrmi_input(skb, skb_network_header(skb)[IP6CB(skb)->nhoff],
+ 0, 0, AF_INET6);
+}
+
+static int xfrmi4_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET);
+}
+
+static int xfrmi6_input(struct sk_buff *skb, int nexthdr, __be32 spi, int encap_type)
+{
+ return xfrmi_input(skb, nexthdr, spi, encap_type, AF_INET6);
+}
+
static int xfrmi_rcv_cb(struct sk_buff *skb, int err)
{
const struct xfrm_mode *inner_mode;
@@ -945,8 +991,8 @@ static struct pernet_operations xfrmi_net_ops = {
};
static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
- .handler = xfrm6_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi6_rcv,
+ .input_handler = xfrmi6_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi6_err,
.priority = 10,
@@ -996,8 +1042,8 @@ static struct xfrm6_tunnel xfrmi_ip6ip_handler __read_mostly = {
#endif
static struct xfrm4_protocol xfrmi_esp4_protocol __read_mostly = {
- .handler = xfrm4_rcv,
- .input_handler = xfrm_input,
+ .handler = xfrmi4_rcv,
+ .input_handler = xfrmi4_input,
.cb_handler = xfrmi_rcv_cb,
.err_handler = xfrmi4_err,
.priority = 10,
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index e9eb82c5457d..5c61ec04b839 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -336,7 +336,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.hard_use_expires_seconds) {
time64_t tmo = xp->lft.hard_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -354,7 +354,7 @@ static void xfrm_policy_timer(struct timer_list *t)
}
if (xp->lft.soft_use_expires_seconds) {
time64_t tmo = xp->lft.soft_use_expires_seconds +
- (xp->curlft.use_time ? : xp->curlft.add_time) - now;
+ (READ_ONCE(xp->curlft.use_time) ? : xp->curlft.add_time) - now;
if (tmo <= 0) {
warn = 1;
tmo = XFRM_KM_TIMEOUT;
@@ -3661,7 +3661,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
return 1;
}
- pol->curlft.use_time = ktime_get_real_seconds();
+ /* This lockless write can happen from different cpus. */
+ WRITE_ONCE(pol->curlft.use_time, ktime_get_real_seconds());
pols[0] = pol;
npols++;
@@ -3676,7 +3677,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
xfrm_pol_put(pols[0]);
return 0;
}
- pols[1]->curlft.use_time = ktime_get_real_seconds();
+ /* This write can happen from different cpus. */
+ WRITE_ONCE(pols[1]->curlft.use_time,
+ ktime_get_real_seconds());
npols++;
}
}
@@ -3742,6 +3745,9 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
goto reject;
}
+ if (if_id)
+ secpath_reset(skb);
+
xfrm_pols_put(pols, npols);
return 1;
}
diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c
index 89c731f4f0c7..2ab3e09e2227 100644
--- a/net/xfrm/xfrm_state.c
+++ b/net/xfrm/xfrm_state.c
@@ -577,7 +577,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.state == XFRM_STATE_EXPIRED)
goto expired;
if (x->lft.hard_add_expires_seconds) {
- long tmo = x->lft.hard_add_expires_seconds +
+ time64_t tmo = x->lft.hard_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
if (x->xflags & XFRM_SOFT_EXPIRE) {
@@ -594,8 +594,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
next = tmo;
}
if (x->lft.hard_use_expires_seconds) {
- long tmo = x->lft.hard_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.hard_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
goto expired;
if (tmo < next)
@@ -604,7 +604,7 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
if (x->km.dying)
goto resched;
if (x->lft.soft_add_expires_seconds) {
- long tmo = x->lft.soft_add_expires_seconds +
+ time64_t tmo = x->lft.soft_add_expires_seconds +
x->curlft.add_time - now;
if (tmo <= 0) {
warn = 1;
@@ -616,8 +616,8 @@ static enum hrtimer_restart xfrm_timer_handler(struct hrtimer *me)
}
}
if (x->lft.soft_use_expires_seconds) {
- long tmo = x->lft.soft_use_expires_seconds +
- (x->curlft.use_time ? : now) - now;
+ time64_t tmo = x->lft.soft_use_expires_seconds +
+ (READ_ONCE(x->curlft.use_time) ? : now) - now;
if (tmo <= 0)
warn = 1;
else if (tmo < next)
@@ -1274,7 +1274,7 @@ found:
xso->real_dev = xdo->real_dev;
netdev_tracker_alloc(xso->dev, &xso->dev_tracker,
GFP_ATOMIC);
- error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x);
+ error = xso->dev->xfrmdev_ops->xdo_dev_state_add(x, NULL);
if (error) {
xso->dir = 0;
netdev_put(xso->dev, &xso->dev_tracker);
@@ -1906,7 +1906,7 @@ out:
hrtimer_start(&x1->mtimer, ktime_set(1, 0),
HRTIMER_MODE_REL_SOFT);
- if (x1->curlft.use_time)
+ if (READ_ONCE(x1->curlft.use_time))
xfrm_state_check_expire(x1);
if (x->props.smark.m || x->props.smark.v || x->if_id) {
@@ -1940,8 +1940,8 @@ int xfrm_state_check_expire(struct xfrm_state *x)
{
xfrm_dev_state_update_curlft(x);
- if (!x->curlft.use_time)
- x->curlft.use_time = ktime_get_real_seconds();
+ if (!READ_ONCE(x->curlft.use_time))
+ WRITE_ONCE(x->curlft.use_time, ktime_get_real_seconds());
if (x->curlft.bytes >= x->lft.hard_byte_limit ||
x->curlft.packets >= x->lft.hard_packet_limit) {