2 * Copyright (c) 2007 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
37 #include <sys/cdefs.h>
39 #include <linux/types.h>
40 #include <linux/compiler.h>
41 #include <linux/list.h>
42 #include <linux/mutex.h>
43 #include <linux/netdevice.h>
45 #include <linux/mlx4/device.h>
46 #include <linux/mlx4/qp.h>
47 #include <linux/mlx4/cq.h>
48 #include <linux/mlx4/srq.h>
49 #include <linux/mlx4/doorbell.h>
50 #include <linux/mlx4/cmd.h>
52 #include <net/if_media.h>
53 #include <netinet/tcp_lro.h>
57 #define DRV_NAME "mlx4_en"
58 #define DRV_VERSION "1.5.2"
59 #define DRV_RELDATE "July 2010"
62 #define NETIF_MSG_LINK 0x1
63 #define NETIF_MSG_IFDOWN 0x2
64 #define NETIF_MSG_HW 0x4
65 #define NETIF_MSG_DRV 0x8
66 #define NETIF_MSG_INTR 0x10
67 #define NETIF_MSG_RX_ERR 0x20
69 #define MLX4_EN_MSG_LEVEL (NETIF_MSG_LINK | NETIF_MSG_IFDOWN)
71 #define en_print(level, priv, format, arg...) \
73 if ((priv)->registered) \
74 printk(level "%s: %s: " format, DRV_NAME, \
75 (priv->dev)->if_xname, ## arg); \
77 printk(level "%s: %s: Port %d: " format, \
78 DRV_NAME, dev_name(&priv->mdev->pdev->dev), \
79 (priv)->port, ## arg); \
82 #define en_dbg(mlevel, priv, format, arg...) \
83 if (NETIF_MSG_##mlevel & priv->msg_enable) \
84 en_print(KERN_DEBUG, priv, format, ## arg)
85 #define en_warn(priv, format, arg...) \
86 en_print(KERN_WARNING, priv, format, ## arg)
87 #define en_err(priv, format, arg...) \
88 en_print(KERN_ERR, priv, format, ## arg)
89 #define en_info(priv, format, arg...) \
90 en_print(KERN_INFO, priv, format, ## arg)
92 #define mlx4_err(mdev, format, arg...) \
93 printk(KERN_ERR "%s %s: " format , DRV_NAME ,\
94 dev_name(&mdev->pdev->dev) , ## arg)
95 #define mlx4_info(mdev, format, arg...) \
96 printk(KERN_INFO "%s %s: " format , DRV_NAME ,\
97 dev_name(&mdev->pdev->dev) , ## arg)
98 #define mlx4_warn(mdev, format, arg...) \
99 printk(KERN_WARNING "%s %s: " format , DRV_NAME ,\
100 dev_name(&mdev->pdev->dev) , ## arg)
107 #define MLX4_EN_PAGE_SHIFT 12
108 #define MLX4_EN_PAGE_SIZE (1 << MLX4_EN_PAGE_SHIFT)
109 #define MAX_TX_RINGS (MLX4_EN_NUM_HASH_RINGS + 1 + MLX4_EN_NUM_PPP_RINGS)
110 #define MAX_RX_RINGS 16
112 #define HEADROOM (2048 / TXBB_SIZE + 1)
113 #define STAMP_STRIDE 64
114 #define STAMP_DWORDS (STAMP_STRIDE / 4)
115 #define STAMP_SHIFT 31
116 #define STAMP_VAL 0x7fffffff
117 #define STATS_DELAY (HZ / 4)
119 /* Typical TSO descriptor with 16 gather entries is 352 bytes... */
120 #define MAX_DESC_SIZE 512
121 #define MAX_DESC_TXBBS (MAX_DESC_SIZE / TXBB_SIZE)
124 * OS related constants and tunables
127 #define MLX4_EN_WATCHDOG_TIMEOUT (15 * HZ)
129 #define MLX4_EN_MAX_LRO_DESCRIPTORS 32
130 #define MLX4_EN_NUM_IPFRAG_SESSIONS 16
132 /* Receive fragment sizes; we use at most 3 fragments (for 9600 byte MTU
133 * and 4K allocations) */
134 #if MJUMPAGESIZE == 4096
137 FRAG_SZ1 = MJUMPAGESIZE,
138 FRAG_SZ2 = MJUMPAGESIZE,
140 #define MLX4_EN_MAX_RX_FRAGS 3
141 #elif MJUMPAGESIZE == 8192
144 FRAG_SZ1 = MJUMPAGESIZE,
146 #define MLX4_EN_MAX_RX_FRAGS 2
147 #elif MJUMPAGESIZE == 8192
149 #error "Unknown PAGE_SIZE"
152 /* Maximum ring sizes */
153 #define MLX4_EN_MAX_TX_SIZE 8192
154 #define MLX4_EN_MAX_RX_SIZE 8192
156 #define MLX4_EN_MIN_RX_SIZE (128)
157 #define MLX4_EN_MIN_TX_SIZE (4096 / TXBB_SIZE)
159 #define MLX4_EN_SMALL_PKT_SIZE 64
160 #define MLX4_EN_TX_HASH_SIZE 256
161 #define MLX4_EN_TX_HASH_MASK (MLX4_EN_TX_HASH_SIZE - 1)
162 #define MLX4_EN_NUM_HASH_RINGS 4
163 #define MLX4_EN_NUM_PPP_RINGS 8
164 #define MLX4_EN_DEF_TX_RING_SIZE 512
165 #define MLX4_EN_DEF_TX_QUEUE_SIZE 4096
166 #define MLX4_EN_DEF_RX_RING_SIZE 1024
167 #define MLX4_EN_MAX_RX_POLL 1024
169 /* Target number of bytes to coalesce with interrupt moderation */
170 #define MLX4_EN_RX_COAL_TARGET 0x20000
171 #define MLX4_EN_RX_COAL_TIME 0x10
173 #define MLX4_EN_TX_COAL_PKTS 5
174 #define MLX4_EN_TX_COAL_TIME 0x80
176 #define MLX4_EN_RX_RATE_LOW 400000
177 #define MLX4_EN_RX_COAL_TIME_LOW 0
178 #define MLX4_EN_RX_RATE_HIGH 450000
179 #define MLX4_EN_RX_COAL_TIME_HIGH 128
180 #define MLX4_EN_RX_SIZE_THRESH 1024
181 #define MLX4_EN_RX_RATE_THRESH (1000000 / MLX4_EN_RX_COAL_TIME_HIGH)
182 #define MLX4_EN_SAMPLE_INTERVAL 0
183 #define MLX4_EN_AVG_PKT_SMALL 256
185 #define MLX4_EN_AUTO_CONF 0xffff
187 #define MLX4_EN_DEF_RX_PAUSE 1
188 #define MLX4_EN_DEF_TX_PAUSE 1
190 /* Interval between sucessive polls in the Tx routine when polling is used
191 instead of interrupts (in per-core Tx rings) - should be power of 2 */
192 #define MLX4_EN_TX_POLL_MODER 16
193 #define MLX4_EN_TX_POLL_TIMEOUT (HZ / 4)
195 #define ETH_LLC_SNAP_SIZE 8
197 #define SMALL_PACKET_SIZE (MHLEN)
198 #define HEADER_COPY_SIZE (128)
199 #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETHER_HDR_LEN)
201 #define MLX4_EN_MIN_MTU 46
202 #define ETH_BCAST 0xffffffffffffULL
204 #define MLX4_EN_LOOPBACK_RETRIES 5
205 #define MLX4_EN_LOOPBACK_TIMEOUT 100
207 #ifdef MLX4_EN_PERF_STAT
208 /* Number of samples to 'average' */
210 #define AVG_FACTOR 1024
211 #define NUM_PERF_STATS NUM_PERF_COUNTERS
213 #define INC_PERF_COUNTER(cnt) (++(cnt))
214 #define ADD_PERF_COUNTER(cnt, add) ((cnt) += (add))
215 #define AVG_PERF_COUNTER(cnt, sample) \
216 ((cnt) = ((cnt) * (AVG_SIZE - 1) + (sample) * AVG_FACTOR) / AVG_SIZE)
217 #define GET_PERF_COUNTER(cnt) (cnt)
218 #define GET_AVG_PERF_COUNTER(cnt) ((cnt) / AVG_FACTOR)
222 #define NUM_PERF_STATS 0
223 #define INC_PERF_COUNTER(cnt) do {} while (0)
224 #define ADD_PERF_COUNTER(cnt, add) do {} while (0)
225 #define AVG_PERF_COUNTER(cnt, sample) do {} while (0)
226 #define GET_PERF_COUNTER(cnt) (0)
227 #define GET_AVG_PERF_COUNTER(cnt) (0)
228 #endif /* MLX4_EN_PERF_STAT */
243 #define ROUNDUP_LOG2(x) ilog2(roundup_pow_of_two(x))
244 #define XNOR(x, y) (!(x) == !(y))
245 #define ILLEGAL_MAC(addr) (addr == 0xffffffffffffULL || addr == 0x0)
248 struct mlx4_en_tx_info {
257 #define MLX4_EN_BIT_DESC_OWN 0x80000000
258 #define CTRL_SIZE sizeof(struct mlx4_wqe_ctrl_seg)
259 #define MLX4_EN_MEMTYPE_PAD 0x100
260 #define DS_SIZE sizeof(struct mlx4_wqe_data_seg)
263 struct mlx4_en_tx_desc {
264 struct mlx4_wqe_ctrl_seg ctrl;
266 struct mlx4_wqe_data_seg data; /* at least one data segment */
267 struct mlx4_wqe_lso_seg lso;
268 struct mlx4_wqe_inline_seg inl;
272 #define MLX4_EN_USE_SRQ 0x01000000
274 struct mlx4_en_tx_ring {
276 struct mlx4_hwq_resources wqres;
277 u32 size ; /* number of TXBBs */
280 u16 cqn; /* index of port CQ associated with this ring */
289 struct mlx4_en_tx_info *tx_info;
293 struct mlx4_qp_context context;
295 enum mlx4_qp_state qp_state;
296 struct mlx4_srq dummy;
298 unsigned long packets;
299 unsigned long errors;
300 spinlock_t comp_lock;
306 struct mlx4_en_ipfrag {
307 struct mbuf *fragments;
317 struct mlx4_en_rx_desc {
318 /* actual number of entries depends on rx ring stride */
319 struct mlx4_wqe_data_seg data[0];
322 struct mlx4_en_rx_ring {
323 struct mlx4_hwq_resources wqres;
324 u32 size ; /* number of Rx descs*/
329 u16 cqn; /* index of port CQ associated with this ring */
336 unsigned long packets;
337 unsigned long errors;
339 struct mlx4_en_ipfrag ipfrag[MLX4_EN_NUM_IPFRAG_SESSIONS];
343 static inline int mlx4_en_can_lro(__be16 status)
345 return (status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
346 MLX4_CQE_STATUS_IPV4F |
347 MLX4_CQE_STATUS_IPV6 |
348 MLX4_CQE_STATUS_IPV4OPT |
349 MLX4_CQE_STATUS_TCP |
350 MLX4_CQE_STATUS_UDP |
351 MLX4_CQE_STATUS_IPOK)) ==
352 cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
353 MLX4_CQE_STATUS_IPOK |
354 MLX4_CQE_STATUS_TCP);
359 struct mlx4_hwq_resources wqres;
362 struct net_device *dev;
363 /* Per-core Tx cq processing support */
364 struct timer_list timer;
371 struct mlx4_cqe *buf;
373 struct taskqueue *tq;
374 #define MLX4_EN_OPCODE_ERROR 0x1e
378 struct mlx4_en_port_profile {
390 struct mlx4_en_profile {
400 struct mlx4_en_port_profile prof[MLX4_MAX_PORTS + 1];
404 struct mlx4_dev *dev;
405 struct pci_dev *pdev;
406 struct mutex state_lock;
407 struct net_device *pndev[MLX4_MAX_PORTS + 1];
410 struct mlx4_en_profile profile;
412 struct workqueue_struct *workqueue;
413 struct device *dma_device;
414 void __iomem *uar_map;
415 struct mlx4_uar priv_uar;
419 u8 mac_removed[MLX4_MAX_PORTS + 1];
423 struct mlx4_en_rss_map {
425 struct mlx4_qp qps[MAX_RX_RINGS];
426 enum mlx4_qp_state state[MAX_RX_RINGS];
427 struct mlx4_qp indir_qp;
428 enum mlx4_qp_state indir_state;
431 struct mlx4_en_rss_context {
441 struct mlx4_en_port_state {
447 struct mlx4_en_pkt_stats {
448 unsigned long broadcast;
449 unsigned long rx_prio[8];
450 unsigned long tx_prio[8];
451 #define NUM_PKT_STATS 17
454 struct mlx4_en_port_stats {
455 unsigned long tso_packets;
456 unsigned long queue_stopped;
457 unsigned long wake_queue;
458 unsigned long tx_timeout;
459 unsigned long rx_alloc_failed;
460 unsigned long rx_chksum_good;
461 unsigned long rx_chksum_none;
462 unsigned long tx_chksum_offload;
465 struct mlx4_en_perf_stats {
473 struct mlx4_en_frag_info {
475 u16 frag_prefix_size;
478 struct mlx4_en_tx_hash_entry {
480 unsigned int small_pkts;
481 unsigned int big_pkts;
485 struct mlx4_en_priv {
486 struct mlx4_en_dev *mdev;
487 struct mlx4_en_port_profile *prof;
488 struct net_device *dev;
490 u32 vlan_register[VLAN_FLTR_SIZE];
491 u32 vlan_unregister[VLAN_FLTR_SIZE];
492 u32 vlans[VLAN_FLTR_SIZE];
493 spinlock_t vlan_lock;
494 struct mlx4_en_port_state port_state;
495 spinlock_t stats_lock;
497 unsigned long last_moder_packets;
498 unsigned long last_moder_tx_packets;
499 unsigned long last_moder_bytes;
500 unsigned long last_moder_jiffies;
511 u16 adaptive_rx_coal;
514 u32 validate_loopback;
516 struct mlx4_hwq_resources res;
529 struct mlx4_en_rss_map rss_map;
532 #define MLX4_EN_FLAG_PROMISC 0x1
536 struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS];
542 struct mlx4_en_tx_ring tx_ring[MAX_TX_RINGS];
543 struct mlx4_en_rx_ring rx_ring[MAX_RX_RINGS];
544 struct mlx4_en_cq tx_cq[MAX_TX_RINGS];
545 struct mlx4_en_cq rx_cq[MAX_RX_RINGS];
546 struct mlx4_en_tx_hash_entry tx_hash[MLX4_EN_TX_HASH_SIZE];
547 struct work_struct mcast_task;
548 struct work_struct watchdog_task;
549 struct work_struct linkstate_task;
550 struct delayed_work stats_task;
551 struct mlx4_en_perf_stats pstats;
552 struct mlx4_en_pkt_stats pkstats;
553 struct mlx4_en_port_stats port_stats;
554 struct mlx4_en_stat_out_mbox hw_stats;
555 struct ifmedia media;
556 eventhandler_tag vlan_attach;
557 eventhandler_tag vlan_detach;
558 struct callout watchdog_timer;
559 volatile int blocked;
560 struct sysctl_oid *sysctl;
561 struct sysctl_ctx_list conf_ctx;
562 struct sysctl_ctx_list stat_ctx;
566 MLX4_EN_WOL_MAGIC = (1ULL << 61),
567 MLX4_EN_WOL_ENABLED = (1ULL << 62),
568 MLX4_EN_WOL_DO_MODIFY = (1ULL << 63),
571 int mlx4_en_transmit(struct net_device *dev, struct mbuf *mb);
572 void mlx4_en_qflush(struct net_device *dev);
574 int mlx4_en_rx_frags(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring,
575 struct mbuf *mb, struct mlx4_cqe *cqe);
576 void mlx4_en_flush_frags(struct mlx4_en_priv *priv,
577 struct mlx4_en_rx_ring *ring);
578 void mlx4_en_destroy_netdev(struct net_device *dev);
579 int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
580 struct mlx4_en_port_profile *prof);
582 int mlx4_en_start_port(struct net_device *dev);
583 void mlx4_en_stop_port(struct net_device *dev);
585 void mlx4_en_free_resources(struct mlx4_en_priv *priv);
586 int mlx4_en_alloc_resources(struct mlx4_en_priv *priv);
588 int mlx4_en_create_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq,
589 int entries, int ring, enum cq_type mode);
590 void mlx4_en_destroy_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
591 int mlx4_en_activate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
592 void mlx4_en_deactivate_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
593 int mlx4_en_set_cq_moder(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
594 int mlx4_en_arm_cq(struct mlx4_en_priv *priv, struct mlx4_en_cq *cq);
596 void mlx4_en_poll_tx_cq(unsigned long data);
597 void mlx4_en_tx_irq(struct mlx4_cq *mcq);
598 u16 mlx4_en_select_queue(struct net_device *dev, struct mbuf *mb);
600 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring,
601 u32 size, u16 stride);
602 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring *ring);
603 int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
604 struct mlx4_en_tx_ring *ring,
606 void mlx4_en_deactivate_tx_ring(struct mlx4_en_priv *priv,
607 struct mlx4_en_tx_ring *ring);
609 int mlx4_en_create_rx_ring(struct mlx4_en_priv *priv,
610 struct mlx4_en_rx_ring *ring, u32 size);
611 void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv,
612 struct mlx4_en_rx_ring *ring);
613 int mlx4_en_activate_rx_rings(struct mlx4_en_priv *priv);
614 void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv,
615 struct mlx4_en_rx_ring *ring);
616 int mlx4_en_process_rx_cq(struct net_device *dev,
617 struct mlx4_en_cq *cq,
619 int mlx4_en_process_rx_cq_mb(struct net_device *dev,
620 struct mlx4_en_cq *cq,
622 void mlx4_en_tx_que(void *context, int pending);
623 void mlx4_en_rx_que(void *context, int pending);
624 void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride,
625 int is_tx, int rss, int qpn, int cqn,
626 struct mlx4_qp_context *context);
627 void mlx4_en_sqp_event(struct mlx4_qp *qp, enum mlx4_event event);
628 int mlx4_en_map_buffer(struct mlx4_buf *buf);
629 void mlx4_en_unmap_buffer(struct mlx4_buf *buf);
631 void mlx4_en_calc_rx_buf(struct net_device *dev);
632 void mlx4_en_set_prio_map(struct mlx4_en_priv *priv, u16 *prio_map, u32 ring_num);
633 int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv);
634 void mlx4_en_release_rss_steer(struct mlx4_en_priv *priv);
635 int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring);
636 void mlx4_en_rx_irq(struct mlx4_cq *mcq);
638 int mlx4_SET_MCAST_FLTR(struct mlx4_dev *dev, u8 port, u64 mac, u64 clear, u8 mode);
639 int mlx4_SET_VLAN_FLTR(struct mlx4_dev *dev, u8 port, u32 *vlans);
640 int mlx4_SET_PORT_general(struct mlx4_dev *dev, u8 port, int mtu,
641 u8 pptx, u8 pfctx, u8 pprx, u8 pfcrx);
642 int mlx4_SET_PORT_qpn_calc(struct mlx4_dev *dev, u8 port, u32 base_qpn,
645 int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset);
646 int mlx4_en_QUERY_PORT(struct mlx4_en_dev *mdev, u8 port);
648 #define MLX4_EN_NUM_SELF_TEST 5
649 void mlx4_en_ex_selftest(struct net_device *dev, u32 *flags, u64 *buf);
650 u64 mlx4_en_mac_to_u64(u8 *addr);
655 extern const struct ethtool_ops mlx4_en_ethtool_ops;