sys/compat/ndis/subr_hal.c

   1 /*-
   2  * SPDX-License-Identifier: BSD-4-Clause
   3  *
   4  * Copyright (c) 2003
   5  *      Bill Paul <wpaul@windriver.com>.  All rights reserved.
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions
   9  * are met:
  10  * 1. Redistributions of source code must retain the above copyright
  11  *    notice, this list of conditions and the following disclaimer.
  12  * 2. Redistributions in binary form must reproduce the above copyright
  13  *    notice, this list of conditions and the following disclaimer in the
  14  *    documentation and/or other materials provided with the distribution.
  15  * 3. All advertising materials mentioning features or use of this software
  16  *    must display the following acknowledgement:
  17  *      This product includes software developed by Bill Paul.
  18  * 4. Neither the name of the author nor the names of any co-contributors
  19  *    may be used to endorse or promote products derived from this software
  20  *    without specific prior written permission.
  21  *
  22  * THIS SOFTWARE IS PROVIDED BY Bill Paul AND CONTRIBUTORS ``AS IS'' AND
  23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  25  * ARE DISCLAIMED.  IN NO EVENT SHALL Bill Paul OR THE VOICES IN HIS HEAD
  26  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
  32  * THE POSSIBILITY OF SUCH DAMAGE.
  33  */
  34
  35 #include <sys/cdefs.h>
  36 __FBSDID("$FreeBSD$");
  37
  38 #include <sys/param.h>
  39 #include <sys/types.h>
  40 #include <sys/errno.h>
  41
  42 #include <sys/callout.h>
  43 #include <sys/kernel.h>
  44 #include <sys/lock.h>
  45 #include <sys/mutex.h>
  46 #include <sys/proc.h>
  47 #include <sys/sched.h>
  48 #include <sys/module.h>
  49
  50 #include <sys/systm.h>
  51 #include <machine/bus.h>
  52
  53 #include <sys/bus.h>
  54 #include <sys/rman.h>
  55
  56 #include <compat/ndis/pe_var.h>
  57 #include <compat/ndis/resource_var.h>
  58 #include <compat/ndis/cfg_var.h>
  59 #include <compat/ndis/ntoskrnl_var.h>
  60 #include <compat/ndis/hal_var.h>
  61
  62 static void KeStallExecutionProcessor(uint32_t);
  63 static void WRITE_PORT_BUFFER_ULONG(uint32_t *,
  64         uint32_t *, uint32_t);
  65 static void WRITE_PORT_BUFFER_USHORT(uint16_t *,
  66         uint16_t *, uint32_t);
  67 static void WRITE_PORT_BUFFER_UCHAR(uint8_t *,
  68         uint8_t *, uint32_t);
  69 static void WRITE_PORT_ULONG(uint32_t *, uint32_t);
  70 static void WRITE_PORT_USHORT(uint16_t *, uint16_t);
  71 static void WRITE_PORT_UCHAR(uint8_t *, uint8_t);
  72 static uint32_t READ_PORT_ULONG(uint32_t *);
  73 static uint16_t READ_PORT_USHORT(uint16_t *);
  74 static uint8_t READ_PORT_UCHAR(uint8_t *);
  75 static void READ_PORT_BUFFER_ULONG(uint32_t *,
  76         uint32_t *, uint32_t);
  77 static void READ_PORT_BUFFER_USHORT(uint16_t *,
  78         uint16_t *, uint32_t);
  79 static void READ_PORT_BUFFER_UCHAR(uint8_t *,
  80         uint8_t *, uint32_t);
  81 static uint64_t KeQueryPerformanceCounter(uint64_t *);
  82 static void _KeLowerIrql(uint8_t);
  83 static uint8_t KeRaiseIrqlToDpcLevel(void);
  84 static void dummy (void);
  85
  86 #define NDIS_MAXCPUS 64
  87 static struct mtx disp_lock[NDIS_MAXCPUS];
  88
  89 int
  90 hal_libinit()
  91 {
  92         image_patch_table       *patch;
  93         int                     i;
  94
  95         for (i = 0; i < NDIS_MAXCPUS; i++)
  96                 mtx_init(&disp_lock[i], "HAL preemption lock",
  97                     "HAL lock", MTX_RECURSE|MTX_DEF);
  98
  99         patch = hal_functbl;
 100         while (patch->ipt_func != NULL) {
 101                 windrv_wrap((funcptr)patch->ipt_func,
 102                     (funcptr *)&patch->ipt_wrap,
 103                     patch->ipt_argcnt, patch->ipt_ftype);
 104                 patch++;
 105         }
 106
 107         return (0);
 108 }
 109
 110 int
 111 hal_libfini()
 112 {
 113         image_patch_table       *patch;
 114         int                     i;
 115
 116         for (i = 0; i < NDIS_MAXCPUS; i++)
 117                 mtx_destroy(&disp_lock[i]);
 118
 119         patch = hal_functbl;
 120         while (patch->ipt_func != NULL) {
 121                 windrv_unwrap(patch->ipt_wrap);
 122                 patch++;
 123         }
 124
 125         return (0);
 126 }
 127
 128 static void
 129 KeStallExecutionProcessor(usecs)
 130         uint32_t                usecs;
 131 {
 132         DELAY(usecs);
 133 }
 134
 135 static void
 136 WRITE_PORT_ULONG(port, val)
 137         uint32_t                *port;
 138         uint32_t                val;
 139 {
 140         bus_space_write_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 141 }
 142
 143 static void
 144 WRITE_PORT_USHORT(uint16_t *port, uint16_t val)
 145 {
 146         bus_space_write_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 147 }
 148
 149 static void
 150 WRITE_PORT_UCHAR(uint8_t *port, uint8_t val)
 151 {
 152         bus_space_write_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port, val);
 153 }
 154
 155 static void
 156 WRITE_PORT_BUFFER_ULONG(port, val, cnt)
 157         uint32_t                *port;
 158         uint32_t                *val;
 159         uint32_t                cnt;
 160 {
 161         bus_space_write_multi_4(NDIS_BUS_SPACE_IO, 0x0,
 162             (bus_size_t)port, val, cnt);
 163 }
 164
 165 static void
 166 WRITE_PORT_BUFFER_USHORT(port, val, cnt)
 167         uint16_t                *port;
 168         uint16_t                *val;
 169         uint32_t                cnt;
 170 {
 171         bus_space_write_multi_2(NDIS_BUS_SPACE_IO, 0x0,
 172             (bus_size_t)port, val, cnt);
 173 }
 174
 175 static void
 176 WRITE_PORT_BUFFER_UCHAR(port, val, cnt)
 177         uint8_t                 *port;
 178         uint8_t                 *val;
 179         uint32_t                cnt;
 180 {
 181         bus_space_write_multi_1(NDIS_BUS_SPACE_IO, 0x0,
 182             (bus_size_t)port, val, cnt);
 183 }
 184
 185 static uint16_t
 186 READ_PORT_USHORT(port)
 187         uint16_t                *port;
 188 {
 189         return (bus_space_read_2(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 190 }
 191
 192 static uint32_t
 193 READ_PORT_ULONG(port)
 194         uint32_t                *port;
 195 {
 196         return (bus_space_read_4(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 197 }
 198
 199 static uint8_t
 200 READ_PORT_UCHAR(port)
 201         uint8_t                 *port;
 202 {
 203         return (bus_space_read_1(NDIS_BUS_SPACE_IO, 0x0, (bus_size_t)port));
 204 }
 205
 206 static void
 207 READ_PORT_BUFFER_ULONG(port, val, cnt)
 208         uint32_t                *port;
 209         uint32_t                *val;
 210         uint32_t                cnt;
 211 {
 212         bus_space_read_multi_4(NDIS_BUS_SPACE_IO, 0x0,
 213             (bus_size_t)port, val, cnt);
 214 }
 215
 216 static void
 217 READ_PORT_BUFFER_USHORT(port, val, cnt)
 218         uint16_t                *port;
 219         uint16_t                *val;
 220         uint32_t                cnt;
 221 {
 222         bus_space_read_multi_2(NDIS_BUS_SPACE_IO, 0x0,
 223             (bus_size_t)port, val, cnt);
 224 }
 225
 226 static void
 227 READ_PORT_BUFFER_UCHAR(port, val, cnt)
 228         uint8_t                 *port;
 229         uint8_t                 *val;
 230         uint32_t                cnt;
 231 {
 232         bus_space_read_multi_1(NDIS_BUS_SPACE_IO, 0x0,
 233             (bus_size_t)port, val, cnt);
 234 }
 235
 236 /*
 237  * The spinlock implementation in Windows differs from that of FreeBSD.
 238  * The basic operation of spinlocks involves two steps: 1) spin in a
 239  * tight loop while trying to acquire a lock, 2) after obtaining the
 240  * lock, disable preemption. (Note that on uniprocessor systems, you're
 241  * allowed to skip the first step and just lock out pre-emption, since
 242  * it's not possible for you to be in contention with another running
 243  * thread.) Later, you release the lock then re-enable preemption.
 244  * The difference between Windows and FreeBSD lies in how preemption
 245  * is disabled. In FreeBSD, it's done using critical_enter(), which on
 246  * the x86 arch translates to a cli instruction. This masks off all
 247  * interrupts, and effectively stops the scheduler from ever running
 248  * so _nothing_ can execute except the current thread. In Windows,
 249  * preemption is disabled by raising the processor IRQL to DISPATCH_LEVEL.
 250  * This stops other threads from running, but does _not_ block device
 251  * interrupts. This means ISRs can still run, and they can make other
 252  * threads runable, but those other threads won't be able to execute
 253  * until the current thread lowers the IRQL to something less than
 254  * DISPATCH_LEVEL.
 255  *
 256  * There's another commonly used IRQL in Windows, which is APC_LEVEL.
 257  * An APC is an Asynchronous Procedure Call, which differs from a DPC
 258  * (Defered Procedure Call) in that a DPC is queued up to run in
 259  * another thread, while an APC runs in the thread that scheduled
 260  * it (similar to a signal handler in a UNIX process). We don't
 261  * actually support the notion of APCs in FreeBSD, so for now, the
 262  * only IRQLs we're interested in are DISPATCH_LEVEL and PASSIVE_LEVEL.
 263  *
 264  * To simulate DISPATCH_LEVEL, we raise the current thread's priority
 265  * to PI_REALTIME, which is the highest we can give it. This should,
 266  * if I understand things correctly, prevent anything except for an
 267  * interrupt thread from preempting us. PASSIVE_LEVEL is basically
 268  * everything else.
 269  *
 270  * Be aware that, at least on the x86 arch, the Windows spinlock
 271  * functions are divided up in peculiar ways. The actual spinlock
 272  * functions are KfAcquireSpinLock() and KfReleaseSpinLock(), and
 273  * they live in HAL.dll. Meanwhile, KeInitializeSpinLock(),
 274  * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
 275  * live in ntoskrnl.exe. Most Windows source code will call
 276  * KeAcquireSpinLock() and KeReleaseSpinLock(), but these are just
 277  * macros that call KfAcquireSpinLock() and KfReleaseSpinLock().
 278  * KefAcquireSpinLockAtDpcLevel() and KefReleaseSpinLockFromDpcLevel()
 279  * perform the lock acquisition/release functions without doing the
 280  * IRQL manipulation, and are used when one is already running at
 281  * DISPATCH_LEVEL. Make sense? Good.
 282  *
 283  * According to the Microsoft documentation, any thread that calls
 284  * KeAcquireSpinLock() must be running at IRQL <= DISPATCH_LEVEL. If
 285  * we detect someone trying to acquire a spinlock from DEVICE_LEVEL
 286  * or HIGH_LEVEL, we panic.
 287  *
 288  * Alternate sleep-lock-based spinlock implementation
 289  * --------------------------------------------------
 290  *
 291  * The earlier spinlock implementation was arguably a bit of a hack
 292  * and presented several problems. It was basically designed to provide
 293  * the functionality of spinlocks without incurring the wrath of
 294  * WITNESS. We could get away with using both our spinlock implementation
 295  * and FreeBSD sleep locks at the same time, but if WITNESS knew what
 296  * we were really up to, it would have spanked us rather severely.
 297  *
 298  * There's another method we can use based entirely on sleep locks.
 299  * First, it's important to realize that everything we're locking
 300  * resides inside Project Evil itself: any critical data being locked
 301  * by drivers belongs to the drivers, and should not be referenced
 302  * by any other OS code outside of the NDISulator. The priority-based
 303  * locking scheme has system-wide effects, just like real spinlocks
 304  * (blocking preemption affects the whole CPU), but since we keep all
 305  * our critical data private, we can use a simpler mechanism that
 306  * affects only code/threads directly related to Project Evil.
 307  *
 308  * The idea is to create a sleep lock mutex for each CPU in the system.
 309  * When a CPU running in the NDISulator wants to acquire a spinlock, it
 310  * does the following:
 311  * - Pin ourselves to the current CPU
 312  * - Acquire the mutex for the current CPU
 313  * - Spin on the spinlock variable using atomic test and set, just like
 314  *   a real spinlock.
 315  * - Once we have the lock, we execute our critical code
 316  *
 317  * To give up the lock, we do:
 318  * - Clear the spinlock variable with an atomic op
 319  * - Release the per-CPU mutex
 320  * - Unpin ourselves from the current CPU.
 321  *
 322  * On a uniprocessor system, this means all threads that access protected
 323  * data are serialized through the per-CPU mutex. After one thread
 324  * acquires the 'spinlock,' any other thread that uses a spinlock on the
 325  * current CPU will block on the per-CPU mutex, which has the same general
 326  * effect of blocking pre-emption, but _only_ for those threads that are
 327  * running NDISulator code.
 328  *
 329  * On a multiprocessor system, threads on different CPUs all block on
 330  * their respective per-CPU mutex, and the atomic test/set operation
 331  * on the spinlock variable provides inter-CPU synchronization, though
 332  * only for threads running NDISulator code.
 333  *
 334  * This method solves an important problem. In Windows, you're allowed
 335  * to do an ExAllocatePoolWithTag() with a spinlock held, provided you
 336  * allocate from NonPagedPool. This implies an atomic heap allocation
 337  * that will not cause the current thread to sleep. (You can't sleep
 338  * while holding real spinlock: clowns will eat you.) But in FreeBSD,
 339  * malloc(9) _always_ triggers the acquisition of a sleep lock, even
 340  * when you use M_NOWAIT. This is not a problem for FreeBSD native
 341  * code: you're allowed to sleep in things like interrupt threads. But
 342  * it is a problem with the old priority-based spinlock implementation:
 343  * even though we get away with it most of the time, we really can't
 344  * do a malloc(9) after doing a KeAcquireSpinLock() or KeRaiseIrql().
 345  * With the new implementation, it's not a problem: you're allowed to
 346  * acquire more than one sleep lock (as long as you avoid lock order
 347  * reversals).
 348  *
 349  * The one drawback to this approach is that now we have a lot of
 350  * contention on one per-CPU mutex within the NDISulator code. Whether
 351  * or not this is preferable to the expected Windows spinlock behavior
 352  * of blocking pre-emption is debatable.
 353  */
 354
 355 uint8_t
 356 KfAcquireSpinLock(lock)
 357         kspin_lock              *lock;
 358 {
 359         uint8_t                 oldirql;
 360
 361         KeRaiseIrql(DISPATCH_LEVEL, &oldirql);
 362         KeAcquireSpinLockAtDpcLevel(lock);
 363
 364         return (oldirql);
 365 }
 366
 367 void
 368 KfReleaseSpinLock(kspin_lock *lock, uint8_t newirql)
 369 {
 370         KeReleaseSpinLockFromDpcLevel(lock);
 371         KeLowerIrql(newirql);
 372 }
 373
 374 uint8_t
 375 KeGetCurrentIrql()
 376 {
 377         if (mtx_owned(&disp_lock[curthread->td_oncpu]))
 378                 return (DISPATCH_LEVEL);
 379         return (PASSIVE_LEVEL);
 380 }
 381
 382 static uint64_t
 383 KeQueryPerformanceCounter(freq)
 384         uint64_t                *freq;
 385 {
 386         if (freq != NULL)
 387                 *freq = hz;
 388
 389         return ((uint64_t)ticks);
 390 }
 391
 392 uint8_t
 393 KfRaiseIrql(uint8_t irql)
 394 {
 395         uint8_t                 oldirql;
 396
 397         sched_pin();
 398         oldirql = KeGetCurrentIrql();
 399
 400         /* I am so going to hell for this. */
 401         if (oldirql > irql)
 402                 panic("IRQL_NOT_LESS_THAN_OR_EQUAL");
 403
 404         if (oldirql != DISPATCH_LEVEL)
 405                 mtx_lock(&disp_lock[curthread->td_oncpu]);
 406         else
 407                 sched_unpin();
 408
 409 /*printf("RAISE IRQL: %d %d\n", irql, oldirql);*/
 410
 411         return (oldirql);
 412 }
 413
 414 void
 415 KfLowerIrql(uint8_t oldirql)
 416 {
 417         if (oldirql == DISPATCH_LEVEL)
 418                 return;
 419
 420         if (KeGetCurrentIrql() != DISPATCH_LEVEL)
 421                 panic("IRQL_NOT_GREATER_THAN");
 422
 423         mtx_unlock(&disp_lock[curthread->td_oncpu]);
 424         sched_unpin();
 425 }
 426
 427 static uint8_t
 428 KeRaiseIrqlToDpcLevel(void)
 429 {
 430         uint8_t                 irql;
 431
 432         KeRaiseIrql(DISPATCH_LEVEL, &irql);
 433         return (irql);
 434 }
 435
 436 static void
 437 _KeLowerIrql(uint8_t oldirql)
 438 {
 439         KeLowerIrql(oldirql);
 440 }
 441
 442 static void dummy()
 443 {
 444         printf("hal dummy called...\n");
 445 }
 446
 447 image_patch_table hal_functbl[] = {
 448         IMPORT_SFUNC(KeStallExecutionProcessor, 1),
 449         IMPORT_SFUNC(WRITE_PORT_ULONG, 2),
 450         IMPORT_SFUNC(WRITE_PORT_USHORT, 2),
 451         IMPORT_SFUNC(WRITE_PORT_UCHAR, 2),
 452         IMPORT_SFUNC(WRITE_PORT_BUFFER_ULONG, 3),
 453         IMPORT_SFUNC(WRITE_PORT_BUFFER_USHORT, 3),
 454         IMPORT_SFUNC(WRITE_PORT_BUFFER_UCHAR, 3),
 455         IMPORT_SFUNC(READ_PORT_ULONG, 1),
 456         IMPORT_SFUNC(READ_PORT_USHORT, 1),
 457         IMPORT_SFUNC(READ_PORT_UCHAR, 1),
 458         IMPORT_SFUNC(READ_PORT_BUFFER_ULONG, 3),
 459         IMPORT_SFUNC(READ_PORT_BUFFER_USHORT, 3),
 460         IMPORT_SFUNC(READ_PORT_BUFFER_UCHAR, 3),
 461         IMPORT_FFUNC(KfAcquireSpinLock, 1),
 462         IMPORT_FFUNC(KfReleaseSpinLock, 1),
 463         IMPORT_SFUNC(KeGetCurrentIrql, 0),
 464         IMPORT_SFUNC(KeQueryPerformanceCounter, 1),
 465         IMPORT_FFUNC(KfLowerIrql, 1),
 466         IMPORT_FFUNC(KfRaiseIrql, 1),
 467         IMPORT_SFUNC(KeRaiseIrqlToDpcLevel, 0),
 468 #undef KeLowerIrql
 469         IMPORT_SFUNC_MAP(KeLowerIrql, _KeLowerIrql, 1),
 470
 471         /*
 472          * This last entry is a catch-all for any function we haven't
 473          * implemented yet. The PE import list patching routine will
 474          * use it for any function that doesn't have an explicit match
 475          * in this table.
 476          */
 477
 478         { NULL, (FUNC)dummy, NULL, 0, WINDRV_WRAP_STDCALL },
 479
 480         /* End of list. */
 481
 482         { NULL, NULL, NULL }
 483 };