]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/compat/linprocfs/linprocfs.c
Merge llvm-project release/15.x llvmorg-15.0.6-0-g088f33605d8a
[FreeBSD/FreeBSD.git] / sys / compat / linprocfs / linprocfs.c
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2000 Dag-Erling Coïdan Smørgrav
5  * Copyright (c) 1999 Pierre Beyssac
6  * Copyright (c) 1993 Jan-Simon Pendry
7  * Copyright (c) 1993
8  *      The Regents of the University of California.  All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *      This product includes software developed by the University of
24  *      California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *      @(#)procfs_status.c     8.4 (Berkeley) 6/15/94
42  */
43
44 #include "opt_inet.h"
45
46 #include <sys/cdefs.h>
47 __FBSDID("$FreeBSD$");
48
49 #include <sys/param.h>
50 #include <sys/queue.h>
51 #include <sys/blist.h>
52 #include <sys/conf.h>
53 #include <sys/exec.h>
54 #include <sys/fcntl.h>
55 #include <sys/filedesc.h>
56 #include <sys/jail.h>
57 #include <sys/kernel.h>
58 #include <sys/limits.h>
59 #include <sys/linker.h>
60 #include <sys/lock.h>
61 #include <sys/malloc.h>
62 #include <sys/msg.h>
63 #include <sys/mutex.h>
64 #include <sys/namei.h>
65 #include <sys/proc.h>
66 #include <sys/ptrace.h>
67 #include <sys/resourcevar.h>
68 #include <sys/resource.h>
69 #include <sys/sbuf.h>
70 #include <sys/sem.h>
71 #include <sys/shm.h>
72 #include <sys/smp.h>
73 #include <sys/socket.h>
74 #include <sys/syscallsubr.h>
75 #include <sys/sysctl.h>
76 #include <sys/sysent.h>
77 #include <sys/systm.h>
78 #include <sys/time.h>
79 #include <sys/tty.h>
80 #include <sys/user.h>
81 #include <sys/uuid.h>
82 #include <sys/vmmeter.h>
83 #include <sys/vnode.h>
84 #include <sys/bus.h>
85 #include <sys/uio.h>
86
87 #include <net/if.h>
88 #include <net/if_var.h>
89 #include <net/if_types.h>
90
91 #include <net/route.h>
92 #include <net/route/nhop.h>
93 #include <net/route/route_ctl.h>
94
95 #include <vm/vm.h>
96 #include <vm/vm_extern.h>
97 #include <vm/pmap.h>
98 #include <vm/vm_map.h>
99 #include <vm/vm_param.h>
100 #include <vm/vm_object.h>
101 #include <vm/swap_pager.h>
102
103 #include <machine/clock.h>
104
105 #include <geom/geom.h>
106 #include <geom/geom_int.h>
107
108 #if defined(__i386__) || defined(__amd64__)
109 #include <machine/cputypes.h>
110 #include <machine/md_var.h>
111 #endif /* __i386__ || __amd64__ */
112
113 #include <compat/linux/linux.h>
114 #include <compat/linux/linux_emul.h>
115 #include <compat/linux/linux_mib.h>
116 #include <compat/linux/linux_misc.h>
117 #include <compat/linux/linux_util.h>
118 #include <fs/pseudofs/pseudofs.h>
119 #include <fs/procfs/procfs.h>
120
121 /*
122  * Various conversion macros
123  */
124 #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz)))        /* ticks to jiffies */
125 #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz)))      /* ticks to centiseconds */
126 #define T2S(x) ((x) / (stathz ? stathz : hz))           /* ticks to seconds */
127 #define B2K(x) ((x) >> 10)                              /* bytes to kbytes */
128 #define B2P(x) ((x) >> PAGE_SHIFT)                      /* bytes to pages */
129 #define P2B(x) ((x) << PAGE_SHIFT)                      /* pages to bytes */
130 #define P2K(x) ((x) << (PAGE_SHIFT - 10))               /* pages to kbytes */
131 #define TV2J(x) ((x)->tv_sec * 100UL + (x)->tv_usec / 10000)
132
133 /**
134  * @brief Mapping of ki_stat in struct kinfo_proc to the linux state
135  *
136  * The linux procfs state field displays one of the characters RSDZTW to
137  * denote running, sleeping in an interruptible wait, waiting in an
138  * uninterruptible disk sleep, a zombie process, process is being traced
139  * or stopped, or process is paging respectively.
140  *
141  * Our struct kinfo_proc contains the variable ki_stat which contains a
142  * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK.
143  *
144  * This character array is used with ki_stati-1 as an index and tries to
145  * map our states to suitable linux states.
146  */
147 static char linux_state[] = "RRSTZDD";
148
149 /*
150  * Filler function for proc/meminfo
151  */
152 static int
153 linprocfs_domeminfo(PFS_FILL_ARGS)
154 {
155         unsigned long memtotal;         /* total memory in bytes */
156         unsigned long memfree;          /* free memory in bytes */
157         unsigned long cached;           /* page cache */
158         unsigned long buffers;          /* buffer cache */
159         unsigned long long swaptotal;   /* total swap space in bytes */
160         unsigned long long swapused;    /* used swap space in bytes */
161         unsigned long long swapfree;    /* free swap space in bytes */
162         size_t sz;
163         int error, i, j;
164
165         memtotal = physmem * PAGE_SIZE;
166         memfree = (unsigned long)vm_free_count() * PAGE_SIZE;
167         swap_pager_status(&i, &j);
168         swaptotal = (unsigned long long)i * PAGE_SIZE;
169         swapused = (unsigned long long)j * PAGE_SIZE;
170         swapfree = swaptotal - swapused;
171
172         /*
173          * This value may exclude wired pages, but we have no good way of
174          * accounting for that.
175          */
176         cached =
177             (vm_active_count() + vm_inactive_count() + vm_laundry_count()) *
178             PAGE_SIZE;
179
180         sz = sizeof(buffers);
181         error = kernel_sysctlbyname(curthread, "vfs.bufspace", &buffers, &sz,
182             NULL, 0, 0, 0);
183         if (error != 0)
184                 buffers = 0;
185
186         sbuf_printf(sb,
187             "MemTotal: %9lu kB\n"
188             "MemFree:  %9lu kB\n"
189             "Buffers:  %9lu kB\n"
190             "Cached:   %9lu kB\n"
191             "SwapTotal:%9llu kB\n"
192             "SwapFree: %9llu kB\n",
193             B2K(memtotal), B2K(memfree), B2K(buffers),
194             B2K(cached), B2K(swaptotal), B2K(swapfree));
195
196         return (0);
197 }
198
199 #if defined(__i386__) || defined(__amd64__)
200 /*
201  * Filler function for proc/cpuinfo (i386 & amd64 version)
202  */
203 static int
204 linprocfs_docpuinfo(PFS_FILL_ARGS)
205 {
206         int hw_model[2];
207         char model[128];
208         uint64_t freq;
209         size_t size;
210         u_int cache_size[4];
211         u_int regs[4] = { 0 };
212         int fqmhz, fqkhz;
213         int i, j;
214
215         /*
216          * We default the flags to include all non-conflicting flags,
217          * and the Intel versions of conflicting flags.
218          */
219         static char *cpu_feature_names[] = {
220                 /*  0 */ "fpu", "vme", "de", "pse",
221                 /*  4 */ "tsc", "msr", "pae", "mce",
222                 /*  8 */ "cx8", "apic", "", "sep",
223                 /* 12 */ "mtrr", "pge", "mca", "cmov",
224                 /* 16 */ "pat", "pse36", "pn", "clflush",
225                 /* 20 */ "", "dts", "acpi", "mmx",
226                 /* 24 */ "fxsr", "sse", "sse2", "ss",
227                 /* 28 */ "ht", "tm", "ia64", "pbe"
228         };
229
230         static char *amd_feature_names[] = {
231                 /*  0 */ "", "", "", "",
232                 /*  4 */ "", "", "", "",
233                 /*  8 */ "", "", "", "syscall",
234                 /* 12 */ "", "", "", "",
235                 /* 16 */ "", "", "", "mp",
236                 /* 20 */ "nx", "", "mmxext", "",
237                 /* 24 */ "", "fxsr_opt", "pdpe1gb", "rdtscp",
238                 /* 28 */ "", "lm", "3dnowext", "3dnow"
239         };
240
241         static char *cpu_feature2_names[] = {
242                 /*  0 */ "pni", "pclmulqdq", "dtes64", "monitor",
243                 /*  4 */ "ds_cpl", "vmx", "smx", "est",
244                 /*  8 */ "tm2", "ssse3", "cid", "sdbg",
245                 /* 12 */ "fma", "cx16", "xtpr", "pdcm",
246                 /* 16 */ "", "pcid", "dca", "sse4_1",
247                 /* 20 */ "sse4_2", "x2apic", "movbe", "popcnt",
248                 /* 24 */ "tsc_deadline_timer", "aes", "xsave", "",
249                 /* 28 */ "avx", "f16c", "rdrand", "hypervisor"
250         };
251
252         static char *amd_feature2_names[] = {
253                 /*  0 */ "lahf_lm", "cmp_legacy", "svm", "extapic",
254                 /*  4 */ "cr8_legacy", "abm", "sse4a", "misalignsse",
255                 /*  8 */ "3dnowprefetch", "osvw", "ibs", "xop",
256                 /* 12 */ "skinit", "wdt", "", "lwp",
257                 /* 16 */ "fma4", "tce", "", "nodeid_msr",
258                 /* 20 */ "", "tbm", "topoext", "perfctr_core",
259                 /* 24 */ "perfctr_nb", "", "bpext", "ptsc",
260                 /* 28 */ "perfctr_llc", "mwaitx", "", ""
261         };
262
263         static char *cpu_stdext_feature_names[] = {
264                 /*  0 */ "fsgsbase", "tsc_adjust", "sgx", "bmi1",
265                 /*  4 */ "hle", "avx2", "", "smep",
266                 /*  8 */ "bmi2", "erms", "invpcid", "rtm",
267                 /* 12 */ "cqm", "", "mpx", "rdt_a",
268                 /* 16 */ "avx512f", "avx512dq", "rdseed", "adx",
269                 /* 20 */ "smap", "avx512ifma", "", "clflushopt",
270                 /* 24 */ "clwb", "intel_pt", "avx512pf", "avx512er",
271                 /* 28 */ "avx512cd", "sha_ni", "avx512bw", "avx512vl"
272         };
273
274         static char *cpu_stdext_feature2_names[] = {
275                 /*  0 */ "prefetchwt1", "avx512vbmi", "umip", "pku",
276                 /*  4 */ "ospke", "waitpkg", "avx512_vbmi2", "",
277                 /*  8 */ "gfni", "vaes", "vpclmulqdq", "avx512_vnni",
278                 /* 12 */ "avx512_bitalg", "", "avx512_vpopcntdq", "",
279                 /* 16 */ "", "", "", "",
280                 /* 20 */ "", "", "rdpid", "",
281                 /* 24 */ "", "cldemote", "", "movdiri",
282                 /* 28 */ "movdir64b", "enqcmd", "sgx_lc", ""
283         };
284
285         static char *cpu_stdext_feature3_names[] = {
286                 /*  0 */ "", "", "avx512_4vnniw", "avx512_4fmaps",
287                 /*  4 */ "fsrm", "", "", "",
288                 /*  8 */ "avx512_vp2intersect", "", "md_clear", "",
289                 /* 12 */ "", "", "", "",
290                 /* 16 */ "", "", "pconfig", "",
291                 /* 20 */ "", "", "", "",
292                 /* 24 */ "", "", "ibrs", "stibp",
293                 /* 28 */ "flush_l1d", "arch_capabilities", "core_capabilities", "ssbd"
294         };
295
296         static char *cpu_stdext_feature_l1_names[] = {
297                 /*  0 */ "xsaveopt", "xsavec", "xgetbv1", "xsaves",
298                 /*  4 */ "xfd"
299         };
300
301         static char *power_flags[] = {
302                 "ts",           "fid",          "vid",
303                 "ttp",          "tm",           "stc",
304                 "100mhzsteps",  "hwpstate",     "",
305                 "cpb",          "eff_freq_ro",  "proc_feedback",
306                 "acc_power",
307         };
308
309         hw_model[0] = CTL_HW;
310         hw_model[1] = HW_MODEL;
311         model[0] = '\0';
312         size = sizeof(model);
313         if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0)
314                 strcpy(model, "unknown");
315 #ifdef __i386__
316         switch (cpu_vendor_id) {
317         case CPU_VENDOR_AMD:
318                 if (cpu_class < CPUCLASS_686)
319                         cpu_feature_names[16] = "fcmov";
320                 break;
321         case CPU_VENDOR_CYRIX:
322                 cpu_feature_names[24] = "cxmmx";
323                 break;
324         }
325 #endif
326         if (cpu_exthigh >= 0x80000006)
327                 do_cpuid(0x80000006, cache_size);
328         else
329                 memset(cache_size, 0, sizeof(cache_size));
330         for (i = 0; i < mp_ncpus; ++i) {
331                 fqmhz = 0;
332                 fqkhz = 0;
333                 freq = atomic_load_acq_64(&tsc_freq);
334                 if (freq != 0) {
335                         fqmhz = (freq + 4999) / 1000000;
336                         fqkhz = ((freq + 4999) / 10000) % 100;
337                 }
338                 sbuf_printf(sb,
339                     "processor\t: %d\n"
340                     "vendor_id\t: %.20s\n"
341                     "cpu family\t: %u\n"
342                     "model\t\t: %u\n"
343                     "model name\t: %s\n"
344                     "stepping\t: %u\n"
345                     "cpu MHz\t\t: %d.%02d\n"
346                     "cache size\t: %d KB\n"
347                     "physical id\t: %d\n"
348                     "siblings\t: %d\n"
349                     "core id\t\t: %d\n"
350                     "cpu cores\t: %d\n"
351                     "apicid\t\t: %d\n"
352                     "initial apicid\t: %d\n"
353                     "fpu\t\t: %s\n"
354                     "fpu_exception\t: %s\n"
355                     "cpuid level\t: %d\n"
356                     "wp\t\t: %s\n",
357                     i, cpu_vendor, CPUID_TO_FAMILY(cpu_id),
358                     CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING,
359                     fqmhz, fqkhz,
360                     (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus,
361                     i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/
362                     (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes",
363                     CPUID_TO_FAMILY(cpu_id), "yes");
364                 sbuf_cat(sb, "flags\t\t:");
365                 for (j = 0; j < nitems(cpu_feature_names); j++)
366                         if (cpu_feature & (1 << j) &&
367                             cpu_feature_names[j][0] != '\0')
368                                 sbuf_printf(sb, " %s", cpu_feature_names[j]);
369                 for (j = 0; j < nitems(amd_feature_names); j++)
370                         if (amd_feature & (1 << j) &&
371                             amd_feature_names[j][0] != '\0')
372                                 sbuf_printf(sb, " %s", amd_feature_names[j]);
373                 for (j = 0; j < nitems(cpu_feature2_names); j++)
374                         if (cpu_feature2 & (1 << j) &&
375                             cpu_feature2_names[j][0] != '\0')
376                                 sbuf_printf(sb, " %s", cpu_feature2_names[j]);
377                 for (j = 0; j < nitems(amd_feature2_names); j++)
378                         if (amd_feature2 & (1 << j) &&
379                             amd_feature2_names[j][0] != '\0')
380                                 sbuf_printf(sb, " %s", amd_feature2_names[j]);
381                 for (j = 0; j < nitems(cpu_stdext_feature_names); j++)
382                         if (cpu_stdext_feature & (1 << j) &&
383                             cpu_stdext_feature_names[j][0] != '\0')
384                                 sbuf_printf(sb, " %s",
385                                     cpu_stdext_feature_names[j]);
386                 if (tsc_is_invariant)
387                         sbuf_cat(sb, " constant_tsc");
388                 for (j = 0; j < nitems(cpu_stdext_feature2_names); j++)
389                         if (cpu_stdext_feature2 & (1 << j) &&
390                             cpu_stdext_feature2_names[j][0] != '\0')
391                                 sbuf_printf(sb, " %s",
392                                     cpu_stdext_feature2_names[j]);
393                 for (j = 0; j < nitems(cpu_stdext_feature3_names); j++)
394                         if (cpu_stdext_feature3 & (1 << j) &&
395                             cpu_stdext_feature3_names[j][0] != '\0')
396                                 sbuf_printf(sb, " %s",
397                                     cpu_stdext_feature3_names[j]);
398                 if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
399                         cpuid_count(0xd, 0x1, regs);
400                         for (j = 0; j < nitems(cpu_stdext_feature_l1_names); j++)
401                                 if (regs[0] & (1 << j) &&
402                                     cpu_stdext_feature_l1_names[j][0] != '\0')
403                                         sbuf_printf(sb, " %s",
404                                             cpu_stdext_feature_l1_names[j]);
405                 }
406                 sbuf_cat(sb, "\n");
407                 sbuf_printf(sb,
408                     "bugs\t\t: %s\n"
409                     "bogomips\t: %d.%02d\n"
410                     "clflush size\t: %d\n"
411                     "cache_alignment\t: %d\n"
412                     "address sizes\t: %d bits physical, %d bits virtual\n",
413 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
414                     (has_f00f_bug) ? "Intel F00F" : "",
415 #else
416                     "",
417 #endif
418                     fqmhz * 2, fqkhz,
419                     cpu_clflush_line_size, cpu_clflush_line_size,
420                     cpu_maxphyaddr,
421                     (cpu_maxphyaddr > 32) ? 48 : 0);
422                 sbuf_cat(sb, "power management: ");
423                 for (j = 0; j < nitems(power_flags); j++)
424                         if (amd_pminfo & (1 << j))
425                                 sbuf_printf(sb, " %s", power_flags[j]);
426                 sbuf_cat(sb, "\n\n");
427
428                 /* XXX per-cpu vendor / class / model / id? */
429         }
430         sbuf_cat(sb, "\n");
431
432         return (0);
433 }
434 #else
435 /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */
436 static int
437 linprocfs_docpuinfo(PFS_FILL_ARGS)
438 {
439         int i;
440
441         for (i = 0; i < mp_ncpus; ++i) {
442                 sbuf_printf(sb,
443                     "processor\t: %d\n"
444                     "BogoMIPS\t: %d.%02d\n",
445                     i, 0, 0);
446                 sbuf_cat(sb, "Features\t: ");
447                 sbuf_cat(sb, "\n");
448                 sbuf_printf(sb,
449                     "CPU implementer\t: \n"
450                     "CPU architecture: \n"
451                     "CPU variant\t: 0x%x\n"
452                     "CPU part\t: 0x%x\n"
453                     "CPU revision\t: %d\n",
454                     0, 0, 0);
455                 sbuf_cat(sb, "\n");
456         }
457
458         return (0);
459 }
460 #endif /* __i386__ || __amd64__ */
461
462 static const char *path_slash_sys = "/sys";
463 static const char *fstype_sysfs = "sysfs";
464
465 static int
466 _mtab_helper(const struct pfs_node *pn, const struct statfs *sp,
467     const char **mntfrom, const char **mntto, const char **fstype)
468 {
469         /* determine device name */
470         *mntfrom = sp->f_mntfromname;
471
472         /* determine mount point */
473         *mntto = sp->f_mntonname;
474
475         /* determine fs type */
476         *fstype = sp->f_fstypename;
477         if (strcmp(*fstype, pn->pn_info->pi_name) == 0)
478                 *mntfrom = *fstype = "proc";
479         else if (strcmp(*fstype, "procfs") == 0)
480                 return (ECANCELED);
481
482         if (strcmp(*fstype, "autofs") == 0) {
483                 /*
484                  * FreeBSD uses eg "map -hosts", whereas Linux
485                  * expects just "-hosts".
486                  */
487                 if (strncmp(*mntfrom, "map ", 4) == 0)
488                         *mntfrom += 4;
489         }
490
491         if (strcmp(*fstype, "linsysfs") == 0) {
492                 *mntfrom = path_slash_sys;
493                 *fstype = fstype_sysfs;
494         } else {
495                 /* For Linux msdosfs is called vfat */
496                 if (strcmp(*fstype, "msdosfs") == 0)
497                         *fstype = "vfat";
498         }
499         return (0);
500 }
501
502 static void
503 _sbuf_mntoptions_helper(struct sbuf *sb, uint64_t f_flags)
504 {
505         sbuf_cat(sb, (f_flags & MNT_RDONLY) ? "ro" : "rw");
506 #define ADD_OPTION(opt, name) \
507         if (f_flags & (opt)) sbuf_cat(sb, "," name);
508         ADD_OPTION(MNT_SYNCHRONOUS,     "sync");
509         ADD_OPTION(MNT_NOEXEC,          "noexec");
510         ADD_OPTION(MNT_NOSUID,          "nosuid");
511         ADD_OPTION(MNT_UNION,           "union");
512         ADD_OPTION(MNT_ASYNC,           "async");
513         ADD_OPTION(MNT_SUIDDIR,         "suiddir");
514         ADD_OPTION(MNT_NOSYMFOLLOW,     "nosymfollow");
515         ADD_OPTION(MNT_NOATIME,         "noatime");
516 #undef ADD_OPTION
517 }
518
519 /*
520  * Filler function for proc/mtab and proc/<pid>/mounts.
521  *
522  * /proc/mtab doesn't exist in Linux' procfs, but is included here so
523  * users can symlink /compat/linux/etc/mtab to /proc/mtab
524  */
525 static int
526 linprocfs_domtab(PFS_FILL_ARGS)
527 {
528         struct nameidata nd;
529         const char *lep, *mntto, *mntfrom, *fstype;
530         char *dlep, *flep;
531         size_t lep_len;
532         int error;
533         struct statfs *buf, *sp;
534         size_t count;
535
536         /* resolve symlinks etc. in the emulation tree prefix */
537         /*
538          * Ideally, this would use the current chroot rather than some
539          * hardcoded path.
540          */
541         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path);
542         flep = NULL;
543         error = namei(&nd);
544         lep = linux_emul_path;
545         if (error == 0) {
546                 if (vn_fullpath(nd.ni_vp, &dlep, &flep) == 0)
547                         lep = dlep;
548                 vrele(nd.ni_vp);
549         }
550         lep_len = strlen(lep);
551
552         buf = NULL;
553         error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
554             UIO_SYSSPACE, MNT_WAIT);
555         if (error != 0) {
556                 free(buf, M_TEMP);
557                 free(flep, M_TEMP);
558                 return (error);
559         }
560
561         for (sp = buf; count > 0; sp++, count--) {
562                 error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
563                 if (error != 0) {
564                         MPASS(error == ECANCELED);
565                         continue;
566                 }
567
568                 /* determine mount point */
569                 if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/')
570                         mntto += lep_len;
571
572                 sbuf_printf(sb, "%s %s %s ", mntfrom, mntto, fstype);
573                 _sbuf_mntoptions_helper(sb, sp->f_flags);
574                 /* a real Linux mtab will also show NFS options */
575                 sbuf_printf(sb, " 0 0\n");
576         }
577
578         free(buf, M_TEMP);
579         free(flep, M_TEMP);
580         return (error);
581 }
582
583 static int
584 linprocfs_doprocmountinfo(PFS_FILL_ARGS)
585 {
586         struct nameidata nd;
587         const char *mntfrom, *mntto, *fstype;
588         const char *lep;
589         char *dlep, *flep;
590         struct statfs *buf, *sp;
591         size_t count, lep_len;
592         int error;
593
594         /*
595          * Ideally, this would use the current chroot rather than some
596          * hardcoded path.
597          */
598         NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, linux_emul_path);
599         flep = NULL;
600         error = namei(&nd);
601         lep = linux_emul_path;
602         if (error == 0) {
603                 if (vn_fullpath(nd.ni_vp, &dlep, &flep) == 0)
604                         lep = dlep;
605                 vrele(nd.ni_vp);
606         }
607         lep_len = strlen(lep);
608
609         buf = NULL;
610         error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
611             UIO_SYSSPACE, MNT_WAIT);
612         if (error != 0)
613                 goto out;
614
615         for (sp = buf; count > 0; sp++, count--) {
616                 error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
617                 if (error != 0) {
618                         MPASS(error == ECANCELED);
619                         continue;
620                 }
621
622                 if (strncmp(mntto, lep, lep_len) == 0 && mntto[lep_len] == '/')
623                         mntto += lep_len;
624 #if 0
625                 /*
626                  * If the prefix is a chroot, and this mountpoint is not under
627                  * the prefix, we should skip it.  Leave it for now for
628                  * consistency with procmtab above.
629                  */
630                 else
631                         continue;
632 #endif
633
634                 /*
635                  * (1) mount id
636                  *
637                  * (2) parent mount id -- we don't have this cheaply, so
638                  * provide a dummy value
639                  *
640                  * (3) major:minor -- ditto
641                  *
642                  * (4) root filesystem mount -- probably a namespaces thing
643                  *
644                  * (5) mountto path
645                  */
646                 sbuf_printf(sb, "%u 0 0:0 / %s ",
647                     sp->f_fsid.val[0] ^ sp->f_fsid.val[1], mntto);
648                 /* (6) mount options */
649                 _sbuf_mntoptions_helper(sb, sp->f_flags);
650                 /*
651                  * (7) zero or more optional fields -- again, namespace related
652                  *
653                  * (8) End of variable length fields separator ("-")
654                  *
655                  * (9) fstype
656                  *
657                  * (10) mount from
658                  *
659                  * (11) "superblock" options -- like (6), but different
660                  * semantics in Linux
661                  */
662                 sbuf_printf(sb, " - %s %s %s\n", fstype, mntfrom,
663                     (sp->f_flags & MNT_RDONLY) ? "ro" : "rw");
664         }
665
666         error = 0;
667 out:
668         free(buf, M_TEMP);
669         free(flep, M_TEMP);
670         return (error);
671 }
672
673 /*
674  * Filler function for proc/partitions
675  */
676 static int
677 linprocfs_dopartitions(PFS_FILL_ARGS)
678 {
679         struct g_class *cp;
680         struct g_geom *gp;
681         struct g_provider *pp;
682         int major, minor;
683
684         g_topology_lock();
685         sbuf_printf(sb, "major minor  #blocks  name rio rmerge rsect "
686             "ruse wio wmerge wsect wuse running use aveq\n");
687
688         LIST_FOREACH(cp, &g_classes, class) {
689                 if (strcmp(cp->name, "DISK") == 0 ||
690                     strcmp(cp->name, "PART") == 0)
691                         LIST_FOREACH(gp, &cp->geom, geom) {
692                                 LIST_FOREACH(pp, &gp->provider, provider) {
693                                         if (linux_driver_get_major_minor(
694                                             pp->name, &major, &minor) != 0) {
695                                                 major = 0;
696                                                 minor = 0;
697                                         }
698                                         sbuf_printf(sb, "%d %d %lld %s "
699                                             "%d %d %d %d %d "
700                                              "%d %d %d %d %d %d\n",
701                                              major, minor,
702                                              (long long)pp->mediasize, pp->name,
703                                              0, 0, 0, 0, 0,
704                                              0, 0, 0, 0, 0, 0);
705                                 }
706                         }
707         }
708         g_topology_unlock();
709
710         return (0);
711 }
712
713 /*
714  * Filler function for proc/stat
715  *
716  * Output depends on kernel version:
717  *
718  * v2.5.40 <=
719  *   user nice system idle
720  * v2.5.41
721  *   user nice system idle iowait
722  * v2.6.11
723  *   user nice system idle iowait irq softirq steal
724  * v2.6.24
725  *   user nice system idle iowait irq softirq steal guest
726  * v2.6.33 >=
727  *   user nice system idle iowait irq softirq steal guest guest_nice
728  */
729 static int
730 linprocfs_dostat(PFS_FILL_ARGS)
731 {
732         struct pcpu *pcpu;
733         long cp_time[CPUSTATES];
734         long *cp;
735         struct timeval boottime;
736         int i;
737         char *zero_pad;
738         bool has_intr = true;
739
740         if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) {
741                 zero_pad = " 0 0 0 0\n";
742         } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) {
743                 zero_pad = " 0 0 0\n";
744         } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) {
745                 zero_pad = " 0 0\n";
746         } else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) {
747                 has_intr = false;
748                 zero_pad = " 0\n";
749         } else {
750                 has_intr = false;
751                 zero_pad = "\n";
752         }
753
754         read_cpu_time(cp_time);
755         getboottime(&boottime);
756         /* Parameters common to all versions */
757         sbuf_printf(sb, "cpu %lu %lu %lu %lu",
758             T2J(cp_time[CP_USER]),
759             T2J(cp_time[CP_NICE]),
760             T2J(cp_time[CP_SYS]),
761             T2J(cp_time[CP_IDLE]));
762
763         /* Print interrupt stats if available */
764         if (has_intr) {
765                 sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR]));
766         }
767
768         /* Pad out remaining fields depending on version */
769         sbuf_printf(sb, "%s", zero_pad);
770
771         CPU_FOREACH(i) {
772                 pcpu = pcpu_find(i);
773                 cp = pcpu->pc_cp_time;
774                 sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i,
775                     T2J(cp[CP_USER]),
776                     T2J(cp[CP_NICE]),
777                     T2J(cp[CP_SYS]),
778                     T2J(cp[CP_IDLE]));
779
780                 if (has_intr) {
781                         sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR]));
782                 }
783
784                 sbuf_printf(sb, "%s", zero_pad);
785         }
786         sbuf_printf(sb,
787             "disk 0 0 0 0\n"
788             "page %ju %ju\n"
789             "swap %ju %ju\n"
790             "intr %ju\n"
791             "ctxt %ju\n"
792             "btime %lld\n",
793             (uintmax_t)VM_CNT_FETCH(v_vnodepgsin),
794             (uintmax_t)VM_CNT_FETCH(v_vnodepgsout),
795             (uintmax_t)VM_CNT_FETCH(v_swappgsin),
796             (uintmax_t)VM_CNT_FETCH(v_swappgsout),
797             (uintmax_t)VM_CNT_FETCH(v_intr),
798             (uintmax_t)VM_CNT_FETCH(v_swtch),
799             (long long)boottime.tv_sec);
800         return (0);
801 }
802
803 static int
804 linprocfs_doswaps(PFS_FILL_ARGS)
805 {
806         struct xswdev xsw;
807         uintmax_t total, used;
808         int n;
809         char devname[SPECNAMELEN + 1];
810
811         sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
812         for (n = 0; ; n++) {
813                 if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0)
814                         break;
815                 total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024;
816                 used  = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024;
817
818                 /*
819                  * The space and not tab after the device name is on
820                  * purpose.  Linux does so.
821                  */
822                 sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n",
823                     devname, total, used);
824         }
825         return (0);
826 }
827
828 /*
829  * Filler function for proc/uptime
830  */
831 static int
832 linprocfs_douptime(PFS_FILL_ARGS)
833 {
834         long cp_time[CPUSTATES];
835         struct timeval tv;
836
837         getmicrouptime(&tv);
838         read_cpu_time(cp_time);
839         sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n",
840             (long long)tv.tv_sec, tv.tv_usec / 10000,
841             T2S(cp_time[CP_IDLE] / mp_ncpus),
842             T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100);
843         return (0);
844 }
845
846 /*
847  * Get OS build date
848  */
849 static void
850 linprocfs_osbuild(struct thread *td, struct sbuf *sb)
851 {
852 #if 0
853         char osbuild[256];
854         char *cp1, *cp2;
855
856         strncpy(osbuild, version, 256);
857         osbuild[255] = '\0';
858         cp1 = strstr(osbuild, "\n");
859         cp2 = strstr(osbuild, ":");
860         if (cp1 && cp2) {
861                 *cp1 = *cp2 = '\0';
862                 cp1 = strstr(osbuild, "#");
863         } else
864                 cp1 = NULL;
865         if (cp1)
866                 sbuf_printf(sb, "%s%s", cp1, cp2 + 1);
867         else
868 #endif
869                 sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977");
870 }
871
872 /*
873  * Get OS builder
874  */
875 static void
876 linprocfs_osbuilder(struct thread *td, struct sbuf *sb)
877 {
878 #if 0
879         char builder[256];
880         char *cp;
881
882         cp = strstr(version, "\n    ");
883         if (cp) {
884                 strncpy(builder, cp + 5, 256);
885                 builder[255] = '\0';
886                 cp = strstr(builder, ":");
887                 if (cp)
888                         *cp = '\0';
889         }
890         if (cp)
891                 sbuf_cat(sb, builder);
892         else
893 #endif
894                 sbuf_cat(sb, "des@freebsd.org");
895 }
896
897 /*
898  * Filler function for proc/version
899  */
900 static int
901 linprocfs_doversion(PFS_FILL_ARGS)
902 {
903         char osname[LINUX_MAX_UTSNAME];
904         char osrelease[LINUX_MAX_UTSNAME];
905
906         linux_get_osname(td, osname);
907         linux_get_osrelease(td, osrelease);
908         sbuf_printf(sb, "%s version %s (", osname, osrelease);
909         linprocfs_osbuilder(td, sb);
910         sbuf_cat(sb, ") (gcc version " __VERSION__ ") ");
911         linprocfs_osbuild(td, sb);
912         sbuf_cat(sb, "\n");
913
914         return (0);
915 }
916
917 /*
918  * Filler function for proc/loadavg
919  */
920 static int
921 linprocfs_doloadavg(PFS_FILL_ARGS)
922 {
923
924         sbuf_printf(sb,
925             "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
926             (int)(averunnable.ldavg[0] / averunnable.fscale),
927             (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
928             (int)(averunnable.ldavg[1] / averunnable.fscale),
929             (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
930             (int)(averunnable.ldavg[2] / averunnable.fscale),
931             (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
932             1,                          /* number of running tasks */
933             nprocs,                     /* number of tasks */
934             lastpid                     /* the last pid */
935         );
936         return (0);
937 }
938
939 static int
940 linprocfs_get_tty_nr(struct proc *p)
941 {
942         struct session *sp;
943         const char *ttyname;
944         int error, major, minor, nr;
945
946         PROC_LOCK_ASSERT(p, MA_OWNED);
947         sx_assert(&proctree_lock, SX_LOCKED);
948
949         if ((p->p_flag & P_CONTROLT) == 0)
950                 return (-1);
951
952         sp = p->p_pgrp->pg_session;
953         if (sp == NULL)
954                 return (-1);
955
956         ttyname = devtoname(sp->s_ttyp->t_dev);
957         error = linux_driver_get_major_minor(ttyname, &major, &minor);
958         if (error != 0)
959                 return (-1);
960
961         nr = makedev(major, minor);
962         return (nr);
963 }
964
965 /*
966  * Filler function for proc/pid/stat
967  */
968 static int
969 linprocfs_doprocstat(PFS_FILL_ARGS)
970 {
971         struct kinfo_proc kp;
972         struct timeval boottime;
973         char state;
974         static int ratelimit = 0;
975         int tty_nr;
976         vm_offset_t startcode, startdata;
977
978         getboottime(&boottime);
979         sx_slock(&proctree_lock);
980         PROC_LOCK(p);
981         fill_kinfo_proc(p, &kp);
982         tty_nr = linprocfs_get_tty_nr(p);
983         sx_sunlock(&proctree_lock);
984         if (p->p_vmspace) {
985            startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
986            startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
987         } else {
988            startcode = 0;
989            startdata = 0;
990         }
991         sbuf_printf(sb, "%d", p->p_pid);
992 #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg)
993         PS_ADD("comm",          "(%s)", p->p_comm);
994         if (kp.ki_stat > sizeof(linux_state)) {
995                 state = 'R';
996
997                 if (ratelimit == 0) {
998                         printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n",
999                             kp.ki_stat, sizeof(linux_state));
1000                         ++ratelimit;
1001                 }
1002         } else
1003                 state = linux_state[kp.ki_stat - 1];
1004         PS_ADD("state",         "%c",   state);
1005         PS_ADD("ppid",          "%d",   p->p_pptr ? p->p_pptr->p_pid : 0);
1006         PS_ADD("pgrp",          "%d",   p->p_pgid);
1007         PS_ADD("session",       "%d",   p->p_session->s_sid);
1008         PROC_UNLOCK(p);
1009         PS_ADD("tty",           "%d",   tty_nr);
1010         PS_ADD("tpgid",         "%d",   kp.ki_tpgid);
1011         PS_ADD("flags",         "%u",   0); /* XXX */
1012         PS_ADD("minflt",        "%lu",  kp.ki_rusage.ru_minflt);
1013         PS_ADD("cminflt",       "%lu",  kp.ki_rusage_ch.ru_minflt);
1014         PS_ADD("majflt",        "%lu",  kp.ki_rusage.ru_majflt);
1015         PS_ADD("cmajflt",       "%lu",  kp.ki_rusage_ch.ru_majflt);
1016         PS_ADD("utime",         "%ld",  TV2J(&kp.ki_rusage.ru_utime));
1017         PS_ADD("stime",         "%ld",  TV2J(&kp.ki_rusage.ru_stime));
1018         PS_ADD("cutime",        "%ld",  TV2J(&kp.ki_rusage_ch.ru_utime));
1019         PS_ADD("cstime",        "%ld",  TV2J(&kp.ki_rusage_ch.ru_stime));
1020         PS_ADD("priority",      "%d",   kp.ki_pri.pri_user);
1021         PS_ADD("nice",          "%d",   kp.ki_nice); /* 19 (nicest) to -19 */
1022         PS_ADD("0",             "%d",   0); /* removed field */
1023         PS_ADD("itrealvalue",   "%d",   0); /* XXX */
1024         PS_ADD("starttime",     "%lu",  TV2J(&kp.ki_start) - TV2J(&boottime));
1025         PS_ADD("vsize",         "%ju",  P2K((uintmax_t)kp.ki_size));
1026         PS_ADD("rss",           "%ju",  (uintmax_t)kp.ki_rssize);
1027         PS_ADD("rlim",          "%lu",  kp.ki_rusage.ru_maxrss);
1028         PS_ADD("startcode",     "%ju",  (uintmax_t)startcode);
1029         PS_ADD("endcode",       "%ju",  (uintmax_t)startdata);
1030         PS_ADD("startstack",    "%u",   0); /* XXX */
1031         PS_ADD("kstkesp",       "%u",   0); /* XXX */
1032         PS_ADD("kstkeip",       "%u",   0); /* XXX */
1033         PS_ADD("signal",        "%u",   0); /* XXX */
1034         PS_ADD("blocked",       "%u",   0); /* XXX */
1035         PS_ADD("sigignore",     "%u",   0); /* XXX */
1036         PS_ADD("sigcatch",      "%u",   0); /* XXX */
1037         PS_ADD("wchan",         "%u",   0); /* XXX */
1038         PS_ADD("nswap",         "%lu",  kp.ki_rusage.ru_nswap);
1039         PS_ADD("cnswap",        "%lu",  kp.ki_rusage_ch.ru_nswap);
1040         PS_ADD("exitsignal",    "%d",   0); /* XXX */
1041         PS_ADD("processor",     "%u",   kp.ki_lastcpu);
1042         PS_ADD("rt_priority",   "%u",   0); /* XXX */ /* >= 2.5.19 */
1043         PS_ADD("policy",        "%u",   kp.ki_pri.pri_class); /* >= 2.5.19 */
1044 #undef PS_ADD
1045         sbuf_putc(sb, '\n');
1046
1047         return (0);
1048 }
1049
1050 /*
1051  * Filler function for proc/pid/statm
1052  */
1053 static int
1054 linprocfs_doprocstatm(PFS_FILL_ARGS)
1055 {
1056         struct kinfo_proc kp;
1057         segsz_t lsize;
1058
1059         sx_slock(&proctree_lock);
1060         PROC_LOCK(p);
1061         fill_kinfo_proc(p, &kp);
1062         PROC_UNLOCK(p);
1063         sx_sunlock(&proctree_lock);
1064
1065         /*
1066          * See comments in linprocfs_doprocstatus() regarding the
1067          * computation of lsize.
1068          */
1069         /* size resident share trs drs lrs dt */
1070         sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size));
1071         sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize);
1072         sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */
1073         sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_tsize);
1074         sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize));
1075         lsize = B2P(kp.ki_size) - kp.ki_dsize -
1076             kp.ki_ssize - kp.ki_tsize - 1;
1077         sbuf_printf(sb, "%ju ", (uintmax_t)lsize);
1078         sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */
1079
1080         return (0);
1081 }
1082
1083 /*
1084  * Filler function for proc/pid/status
1085  */
1086 static int
1087 linprocfs_doprocstatus(PFS_FILL_ARGS)
1088 {
1089         struct kinfo_proc kp;
1090         char *state;
1091         segsz_t lsize;
1092         struct thread *td2;
1093         struct sigacts *ps;
1094         l_sigset_t siglist, sigignore, sigcatch;
1095         int i;
1096
1097         sx_slock(&proctree_lock);
1098         PROC_LOCK(p);
1099         td2 = FIRST_THREAD_IN_PROC(p);
1100
1101         if (P_SHOULDSTOP(p)) {
1102                 state = "T (stopped)";
1103         } else {
1104                 switch(p->p_state) {
1105                 case PRS_NEW:
1106                         state = "I (idle)";
1107                         break;
1108                 case PRS_NORMAL:
1109                         if (p->p_flag & P_WEXIT) {
1110                                 state = "X (exiting)";
1111                                 break;
1112                         }
1113                         switch(TD_GET_STATE(td2)) {
1114                         case TDS_INHIBITED:
1115                                 state = "S (sleeping)";
1116                                 break;
1117                         case TDS_RUNQ:
1118                         case TDS_RUNNING:
1119                                 state = "R (running)";
1120                                 break;
1121                         default:
1122                                 state = "? (unknown)";
1123                                 break;
1124                         }
1125                         break;
1126                 case PRS_ZOMBIE:
1127                         state = "Z (zombie)";
1128                         break;
1129                 default:
1130                         state = "? (unknown)";
1131                         break;
1132                 }
1133         }
1134
1135         fill_kinfo_proc(p, &kp);
1136         sx_sunlock(&proctree_lock);
1137
1138         sbuf_printf(sb, "Name:\t%s\n",          p->p_comm); /* XXX escape */
1139         sbuf_printf(sb, "State:\t%s\n",         state);
1140
1141         /*
1142          * Credentials
1143          */
1144         sbuf_printf(sb, "Tgid:\t%d\n",          p->p_pid);
1145         sbuf_printf(sb, "Pid:\t%d\n",           p->p_pid);
1146         sbuf_printf(sb, "PPid:\t%d\n",          kp.ki_ppid );
1147         sbuf_printf(sb, "TracerPid:\t%d\n",     kp.ki_tracer );
1148         sbuf_printf(sb, "Uid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_ruid,
1149                                                 p->p_ucred->cr_uid,
1150                                                 p->p_ucred->cr_svuid,
1151                                                 /* FreeBSD doesn't have fsuid */
1152                                                 p->p_ucred->cr_uid);
1153         sbuf_printf(sb, "Gid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_rgid,
1154                                                 p->p_ucred->cr_gid,
1155                                                 p->p_ucred->cr_svgid,
1156                                                 /* FreeBSD doesn't have fsgid */
1157                                                 p->p_ucred->cr_gid);
1158         sbuf_cat(sb, "Groups:\t");
1159         for (i = 0; i < p->p_ucred->cr_ngroups; i++)
1160                 sbuf_printf(sb, "%d ",          p->p_ucred->cr_groups[i]);
1161         PROC_UNLOCK(p);
1162         sbuf_putc(sb, '\n');
1163
1164         /*
1165          * Memory
1166          *
1167          * While our approximation of VmLib may not be accurate (I
1168          * don't know of a simple way to verify it, and I'm not sure
1169          * it has much meaning anyway), I believe it's good enough.
1170          *
1171          * The same code that could (I think) accurately compute VmLib
1172          * could also compute VmLck, but I don't really care enough to
1173          * implement it. Submissions are welcome.
1174          */
1175         sbuf_printf(sb, "VmSize:\t%8ju kB\n",   B2K((uintmax_t)kp.ki_size));
1176         sbuf_printf(sb, "VmLck:\t%8u kB\n",     P2K(0)); /* XXX */
1177         sbuf_printf(sb, "VmRSS:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_rssize));
1178         sbuf_printf(sb, "VmData:\t%8ju kB\n",   P2K((uintmax_t)kp.ki_dsize));
1179         sbuf_printf(sb, "VmStk:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_ssize));
1180         sbuf_printf(sb, "VmExe:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_tsize));
1181         lsize = B2P(kp.ki_size) - kp.ki_dsize -
1182             kp.ki_ssize - kp.ki_tsize - 1;
1183         sbuf_printf(sb, "VmLib:\t%8ju kB\n",    P2K((uintmax_t)lsize));
1184
1185         /*
1186          * Signal masks
1187          */
1188         PROC_LOCK(p);
1189         bsd_to_linux_sigset(&p->p_siglist, &siglist);
1190         ps = p->p_sigacts;
1191         mtx_lock(&ps->ps_mtx);
1192         bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
1193         bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
1194         mtx_unlock(&ps->ps_mtx);
1195         PROC_UNLOCK(p);
1196
1197         sbuf_printf(sb, "SigPnd:\t%016jx\n",    siglist.__mask);
1198         /*
1199          * XXX. SigBlk - target thread's signal mask, td_sigmask.
1200          * To implement SigBlk pseudofs should support proc/tid dir entries.
1201          */
1202         sbuf_printf(sb, "SigBlk:\t%016x\n",     0);
1203         sbuf_printf(sb, "SigIgn:\t%016jx\n",    sigignore.__mask);
1204         sbuf_printf(sb, "SigCgt:\t%016jx\n",    sigcatch.__mask);
1205
1206         /*
1207          * Linux also prints the capability masks, but we don't have
1208          * capabilities yet, and when we do get them they're likely to
1209          * be meaningless to Linux programs, so we lie. XXX
1210          */
1211         sbuf_printf(sb, "CapInh:\t%016x\n",     0);
1212         sbuf_printf(sb, "CapPrm:\t%016x\n",     0);
1213         sbuf_printf(sb, "CapEff:\t%016x\n",     0);
1214
1215         return (0);
1216 }
1217
1218 /*
1219  * Filler function for proc/pid/cwd
1220  */
1221 static int
1222 linprocfs_doproccwd(PFS_FILL_ARGS)
1223 {
1224         struct pwd *pwd;
1225         char *fullpath = "unknown";
1226         char *freepath = NULL;
1227
1228         pwd = pwd_hold_proc(p);
1229         vn_fullpath(pwd->pwd_cdir, &fullpath, &freepath);
1230         sbuf_printf(sb, "%s", fullpath);
1231         if (freepath)
1232                 free(freepath, M_TEMP);
1233         pwd_drop(pwd);
1234         return (0);
1235 }
1236
1237 /*
1238  * Filler function for proc/pid/root
1239  */
1240 static int
1241 linprocfs_doprocroot(PFS_FILL_ARGS)
1242 {
1243         struct pwd *pwd;
1244         struct vnode *vp;
1245         char *fullpath = "unknown";
1246         char *freepath = NULL;
1247
1248         pwd = pwd_hold_proc(p);
1249         vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
1250         vn_fullpath(vp, &fullpath, &freepath);
1251         sbuf_printf(sb, "%s", fullpath);
1252         if (freepath)
1253                 free(freepath, M_TEMP);
1254         pwd_drop(pwd);
1255         return (0);
1256 }
1257
1258 /*
1259  * Filler function for proc/pid/cmdline
1260  */
1261 static int
1262 linprocfs_doproccmdline(PFS_FILL_ARGS)
1263 {
1264         int ret;
1265
1266         PROC_LOCK(p);
1267         if ((ret = p_cansee(td, p)) != 0) {
1268                 PROC_UNLOCK(p);
1269                 return (ret);
1270         }
1271
1272         /*
1273          * Mimic linux behavior and pass only processes with usermode
1274          * address space as valid.  Return zero silently otherwize.
1275          */
1276         if (p->p_vmspace == &vmspace0) {
1277                 PROC_UNLOCK(p);
1278                 return (0);
1279         }
1280         if (p->p_args != NULL) {
1281                 sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length);
1282                 PROC_UNLOCK(p);
1283                 return (0);
1284         }
1285
1286         if ((p->p_flag & P_SYSTEM) != 0) {
1287                 PROC_UNLOCK(p);
1288                 return (0);
1289         }
1290
1291         PROC_UNLOCK(p);
1292
1293         ret = proc_getargv(td, p, sb);
1294         return (ret);
1295 }
1296
1297 /*
1298  * Filler function for proc/pid/environ
1299  */
1300 static int
1301 linprocfs_doprocenviron(PFS_FILL_ARGS)
1302 {
1303
1304         /*
1305          * Mimic linux behavior and pass only processes with usermode
1306          * address space as valid.  Return zero silently otherwize.
1307          */
1308         if (p->p_vmspace == &vmspace0)
1309                 return (0);
1310
1311         return (proc_getenvv(td, p, sb));
1312 }
1313
1314 static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1315 static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1316 static char vdso_str[] = "      [vdso]";
1317 static char stack_str[] = "      [stack]";
1318
1319 /*
1320  * Filler function for proc/pid/maps
1321  */
1322 static int
1323 linprocfs_doprocmaps(PFS_FILL_ARGS)
1324 {
1325         struct vmspace *vm;
1326         vm_map_t map;
1327         vm_map_entry_t entry, tmp_entry;
1328         vm_object_t obj, tobj, lobj;
1329         vm_offset_t e_start, e_end;
1330         vm_ooffset_t off;
1331         vm_prot_t e_prot;
1332         unsigned int last_timestamp;
1333         char *name = "", *freename = NULL;
1334         const char *l_map_str;
1335         ino_t ino;
1336         int error;
1337         struct vnode *vp;
1338         struct vattr vat;
1339         bool private;
1340
1341         PROC_LOCK(p);
1342         error = p_candebug(td, p);
1343         PROC_UNLOCK(p);
1344         if (error)
1345                 return (error);
1346
1347         if (uio->uio_rw != UIO_READ)
1348                 return (EOPNOTSUPP);
1349
1350         error = 0;
1351         vm = vmspace_acquire_ref(p);
1352         if (vm == NULL)
1353                 return (ESRCH);
1354
1355         if (SV_CURPROC_FLAG(SV_LP64))
1356                 l_map_str = l64_map_str;
1357         else
1358                 l_map_str = l32_map_str;
1359         map = &vm->vm_map;
1360         vm_map_lock_read(map);
1361         VM_MAP_ENTRY_FOREACH(entry, map) {
1362                 name = "";
1363                 freename = NULL;
1364                 /*
1365                  * Skip printing of the guard page of the stack region, as
1366                  * it confuses glibc pthread_getattr_np() method, where both
1367                  * the base address and size of the stack of the initial thread
1368                  * are calculated.
1369                  */
1370                 if ((entry->eflags & (MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_GUARD)) != 0)
1371                         continue;
1372                 e_prot = entry->protection;
1373                 e_start = entry->start;
1374                 e_end = entry->end;
1375                 obj = entry->object.vm_object;
1376                 off = entry->offset;
1377                 for (lobj = tobj = obj; tobj != NULL;
1378                     lobj = tobj, tobj = tobj->backing_object) {
1379                         VM_OBJECT_RLOCK(tobj);
1380                         off += lobj->backing_object_offset;
1381                         if (lobj != obj)
1382                                 VM_OBJECT_RUNLOCK(lobj);
1383                 }
1384                 private = (entry->eflags & MAP_ENTRY_COW) != 0 || obj == NULL ||
1385                     (obj->flags & OBJ_ANON) != 0;
1386                 last_timestamp = map->timestamp;
1387                 vm_map_unlock_read(map);
1388                 ino = 0;
1389                 if (lobj) {
1390                         vp = vm_object_vnode(lobj);
1391                         if (vp != NULL)
1392                                 vref(vp);
1393                         if (lobj != obj)
1394                                 VM_OBJECT_RUNLOCK(lobj);
1395                         VM_OBJECT_RUNLOCK(obj);
1396                         if (vp != NULL) {
1397                                 vn_fullpath(vp, &name, &freename);
1398                                 vn_lock(vp, LK_SHARED | LK_RETRY);
1399                                 VOP_GETATTR(vp, &vat, td->td_ucred);
1400                                 ino = vat.va_fileid;
1401                                 vput(vp);
1402                         } else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
1403                                 /*
1404                                  * sv_shared_page_base pointed out to the
1405                                  * FreeBSD sharedpage, PAGE_SIZE is a size
1406                                  * of it. The vDSO page is above.
1407                                  */
1408                                 if (e_start == p->p_sysent->sv_shared_page_base +
1409                                     PAGE_SIZE)
1410                                         name = vdso_str;
1411                                 if (e_end == p->p_sysent->sv_usrstack)
1412                                         name = stack_str;
1413                         }
1414                 }
1415
1416                 /*
1417                  * format:
1418                  *  start, end, access, offset, major, minor, inode, name.
1419                  */
1420                 error = sbuf_printf(sb, l_map_str,
1421                     (u_long)e_start, (u_long)e_end,
1422                     (e_prot & VM_PROT_READ)?"r":"-",
1423                     (e_prot & VM_PROT_WRITE)?"w":"-",
1424                     (e_prot & VM_PROT_EXECUTE)?"x":"-",
1425                     private ? "p" : "s",
1426                     (u_long)off,
1427                     0,
1428                     0,
1429                     (u_long)ino,
1430                     *name ? "     " : " ",
1431                     name
1432                     );
1433                 if (freename)
1434                         free(freename, M_TEMP);
1435                 vm_map_lock_read(map);
1436                 if (error == -1) {
1437                         error = 0;
1438                         break;
1439                 }
1440                 if (last_timestamp != map->timestamp) {
1441                         /*
1442                          * Look again for the entry because the map was
1443                          * modified while it was unlocked.  Specifically,
1444                          * the entry may have been clipped, merged, or deleted.
1445                          */
1446                         vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
1447                         entry = tmp_entry;
1448                 }
1449         }
1450         vm_map_unlock_read(map);
1451         vmspace_free(vm);
1452
1453         return (error);
1454 }
1455
1456 /*
1457  * Filler function for proc/pid/mem
1458  */
1459 static int
1460 linprocfs_doprocmem(PFS_FILL_ARGS)
1461 {
1462         ssize_t resid;
1463         int error;
1464
1465         resid = uio->uio_resid;
1466         error = procfs_doprocmem(PFS_FILL_ARGNAMES);
1467
1468         if (uio->uio_rw == UIO_READ && resid != uio->uio_resid)
1469                 return (0);
1470
1471         if (error == EFAULT)
1472                 error = EIO;
1473
1474         return (error);
1475 }
1476
1477 struct linux_ifname_cb_s {
1478         struct ifnet *ifp;
1479         int ethno;
1480         char *buffer;
1481         size_t buflen;
1482 };
1483
1484 static int
1485 linux_ifname_cb(if_t ifp, void *arg)
1486 {
1487         struct linux_ifname_cb_s *cbs = arg;
1488
1489         if (ifp == cbs->ifp)
1490                 return (snprintf(cbs->buffer, cbs->buflen, "eth%d", cbs->ethno));
1491         if (!linux_use_real_ifname(ifp))
1492                 cbs->ethno++;
1493         return (0);
1494 }
1495
1496 static int
1497 linux_ifname(struct ifnet *ifp, char *buffer, size_t buflen)
1498 {
1499         struct linux_ifname_cb_s arg;
1500
1501         IFNET_RLOCK_ASSERT();
1502
1503         arg.ifp = ifp;
1504         arg.buffer = buffer;
1505         arg.buflen = buflen;
1506         arg.ethno = 0;
1507
1508         /* Short-circuit non ethernet interfaces */
1509         if (linux_use_real_ifname(ifp))
1510                 return (strlcpy(buffer, if_name(ifp), buflen));
1511
1512         /* Determine the (relative) unit number for ethernet interfaces */
1513         return (if_foreach(linux_ifname_cb, &arg));
1514 }
1515
1516 static int
1517 linprocfs_donetdev_cb(if_t ifp, void *arg)
1518 {
1519         char ifname[16]; /* XXX LINUX_IFNAMSIZ */
1520         struct sbuf *sb = arg;
1521
1522         linux_ifname(ifp, ifname, sizeof ifname);
1523         sbuf_printf(sb, "%6.6s: ", ifname);
1524         sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ",
1525             (uintmax_t )if_getcounter(ifp, IFCOUNTER_IBYTES),
1526             (uintmax_t )if_getcounter(ifp, IFCOUNTER_IPACKETS),
1527             (uintmax_t )if_getcounter(ifp, IFCOUNTER_IERRORS),
1528             (uintmax_t )if_getcounter(ifp, IFCOUNTER_IQDROPS),
1529                                                 /* rx_missed_errors */
1530             0UL,                                /* rx_fifo_errors */
1531             0UL,                                /* rx_length_errors +
1532                                                  * rx_over_errors +
1533                                                  * rx_crc_errors +
1534                                                  * rx_frame_errors */
1535             0UL,                                /* rx_compressed */
1536             (uintmax_t )if_getcounter(ifp, IFCOUNTER_IMCASTS));
1537                                                 /* XXX-BZ rx only? */
1538         sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n",
1539             (uintmax_t )if_getcounter(ifp, IFCOUNTER_OBYTES),
1540             (uintmax_t )if_getcounter(ifp, IFCOUNTER_OPACKETS),
1541             (uintmax_t )if_getcounter(ifp, IFCOUNTER_OERRORS),
1542             (uintmax_t )if_getcounter(ifp, IFCOUNTER_OQDROPS),
1543             0UL,                                /* tx_fifo_errors */
1544             (uintmax_t )if_getcounter(ifp, IFCOUNTER_COLLISIONS),
1545             0UL,                                /* tx_carrier_errors +
1546                                                  * tx_aborted_errors +
1547                                                  * tx_window_errors +
1548                                                  * tx_heartbeat_errors*/
1549             0UL);                               /* tx_compressed */
1550
1551         return (0);
1552 }
1553
1554 /*
1555  * Filler function for proc/net/dev
1556  */
1557 static int
1558 linprocfs_donetdev(PFS_FILL_ARGS)
1559 {
1560         sbuf_printf(sb, "%6s|%58s|%s\n"
1561             "%6s|%58s|%58s\n",
1562             "Inter-", "   Receive", "  Transmit",
1563             " face",
1564             "bytes    packets errs drop fifo frame compressed multicast",
1565             "bytes    packets errs drop fifo colls carrier compressed");
1566
1567         CURVNET_SET(TD_TO_VNET(curthread));
1568         IFNET_RLOCK();
1569         if_foreach(linprocfs_donetdev_cb, sb);
1570         IFNET_RUNLOCK();
1571         CURVNET_RESTORE();
1572
1573         return (0);
1574 }
1575
1576 struct walkarg {
1577         struct sbuf *sb;
1578 };
1579
1580 static int
1581 linux_route_print(struct rtentry *rt, void *vw)
1582 {
1583 #ifdef INET
1584         struct walkarg *w = vw;
1585         struct route_nhop_data rnd;
1586         struct in_addr dst, mask;
1587         struct nhop_object *nh;
1588         char ifname[16];
1589         uint32_t scopeid = 0;
1590         uint32_t gw = 0;
1591         uint32_t linux_flags = 0;
1592
1593         rt_get_inet_prefix_pmask(rt, &dst, &mask, &scopeid);
1594
1595         rt_get_rnd(rt, &rnd);
1596
1597         /* select only first route in case of multipath */
1598         nh = nhop_select_func(rnd.rnd_nhop, 0);
1599
1600         linux_ifname(nh->nh_ifp, ifname, sizeof(ifname));
1601
1602         gw = (nh->nh_flags & NHF_GATEWAY)
1603                 ? nh->gw4_sa.sin_addr.s_addr : 0;
1604
1605         linux_flags = RTF_UP |
1606                 (nhop_get_rtflags(nh) & (RTF_GATEWAY | RTF_HOST));
1607
1608         sbuf_printf(w->sb,
1609                 "%s\t"
1610                 "%08X\t%08X\t%04X\t"
1611                 "%d\t%u\t%d\t"
1612                 "%08X\t%d\t%u\t%u",
1613                 ifname,
1614                 dst.s_addr, gw, linux_flags,
1615                 0, 0, rnd.rnd_weight,
1616                 mask.s_addr, nh->nh_mtu, 0, 0);
1617
1618         sbuf_printf(w->sb, "\n\n");
1619 #endif
1620         return (0);
1621 }
1622
1623 /*
1624  * Filler function for proc/net/route
1625  */
1626 static int
1627 linprocfs_donetroute(PFS_FILL_ARGS)
1628 {
1629         struct walkarg w = {
1630                 .sb = sb
1631         };
1632         uint32_t fibnum = curthread->td_proc->p_fibnum;
1633
1634         sbuf_printf(w.sb, "%-127s\n", "Iface\tDestination\tGateway "
1635                "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1636                "\tWindow\tIRTT");
1637
1638         CURVNET_SET(TD_TO_VNET(curthread));
1639         IFNET_RLOCK();
1640         rib_walk(fibnum, AF_INET, false, linux_route_print, &w);
1641         IFNET_RUNLOCK();
1642         CURVNET_RESTORE();
1643
1644         return (0);
1645 }
1646
1647 /*
1648  * Filler function for proc/sys/kernel/osrelease
1649  */
1650 static int
1651 linprocfs_doosrelease(PFS_FILL_ARGS)
1652 {
1653         char osrelease[LINUX_MAX_UTSNAME];
1654
1655         linux_get_osrelease(td, osrelease);
1656         sbuf_printf(sb, "%s\n", osrelease);
1657
1658         return (0);
1659 }
1660
1661 /*
1662  * Filler function for proc/sys/kernel/ostype
1663  */
1664 static int
1665 linprocfs_doostype(PFS_FILL_ARGS)
1666 {
1667         char osname[LINUX_MAX_UTSNAME];
1668
1669         linux_get_osname(td, osname);
1670         sbuf_printf(sb, "%s\n", osname);
1671
1672         return (0);
1673 }
1674
1675 /*
1676  * Filler function for proc/sys/kernel/version
1677  */
1678 static int
1679 linprocfs_doosbuild(PFS_FILL_ARGS)
1680 {
1681
1682         linprocfs_osbuild(td, sb);
1683         sbuf_cat(sb, "\n");
1684         return (0);
1685 }
1686
1687 /*
1688  * Filler function for proc/sys/kernel/msgmax
1689  */
1690 static int
1691 linprocfs_domsgmax(PFS_FILL_ARGS)
1692 {
1693
1694         sbuf_printf(sb, "%d\n", msginfo.msgmax);
1695         return (0);
1696 }
1697
1698 /*
1699  * Filler function for proc/sys/kernel/msgmni
1700  */
1701 static int
1702 linprocfs_domsgmni(PFS_FILL_ARGS)
1703 {
1704
1705         sbuf_printf(sb, "%d\n", msginfo.msgmni);
1706         return (0);
1707 }
1708
1709 /*
1710  * Filler function for proc/sys/kernel/msgmnb
1711  */
1712 static int
1713 linprocfs_domsgmnb(PFS_FILL_ARGS)
1714 {
1715
1716         sbuf_printf(sb, "%d\n", msginfo.msgmnb);
1717         return (0);
1718 }
1719
1720 /*
1721  * Filler function for proc/sys/kernel/ngroups_max
1722  *
1723  * Note that in Linux it defaults to 65536, not 1023.
1724  */
1725 static int
1726 linprocfs_dongroups_max(PFS_FILL_ARGS)
1727 {
1728
1729         sbuf_printf(sb, "%d\n", ngroups_max);
1730         return (0);
1731 }
1732
1733 /*
1734  * Filler function for proc/sys/kernel/pid_max
1735  */
1736 static int
1737 linprocfs_dopid_max(PFS_FILL_ARGS)
1738 {
1739
1740         sbuf_printf(sb, "%i\n", PID_MAX);
1741         return (0);
1742 }
1743
1744 /*
1745  * Filler function for proc/sys/kernel/sem
1746  */
1747 static int
1748 linprocfs_dosem(PFS_FILL_ARGS)
1749 {
1750
1751         sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns,
1752             seminfo.semopm, seminfo.semmni);
1753         return (0);
1754 }
1755
1756 /*
1757  * Filler function for proc/sys/kernel/shmall
1758  */
1759 static int
1760 linprocfs_doshmall(PFS_FILL_ARGS)
1761 {
1762
1763         sbuf_printf(sb, "%lu\n", shminfo.shmall);
1764         return (0);
1765 }
1766
1767 /*
1768  * Filler function for proc/sys/kernel/shmmax
1769  */
1770 static int
1771 linprocfs_doshmmax(PFS_FILL_ARGS)
1772 {
1773
1774         sbuf_printf(sb, "%lu\n", shminfo.shmmax);
1775         return (0);
1776 }
1777
1778 /*
1779  * Filler function for proc/sys/kernel/shmmni
1780  */
1781 static int
1782 linprocfs_doshmmni(PFS_FILL_ARGS)
1783 {
1784
1785         sbuf_printf(sb, "%lu\n", shminfo.shmmni);
1786         return (0);
1787 }
1788
1789 /*
1790  * Filler function for proc/sys/kernel/tainted
1791  */
1792 static int
1793 linprocfs_dotainted(PFS_FILL_ARGS)
1794 {
1795
1796         sbuf_printf(sb, "0\n");
1797         return (0);
1798 }
1799
1800 /*
1801  * Filler function for proc/sys/vm/min_free_kbytes
1802  *
1803  * This mirrors the approach in illumos to return zero for reads. Effectively,
1804  * it says, no memory is kept in reserve for "atomic allocations". This class
1805  * of allocation can be used at times when a thread cannot be suspended.
1806  */
1807 static int
1808 linprocfs_dominfree(PFS_FILL_ARGS)
1809 {
1810
1811         sbuf_printf(sb, "%d\n", 0);
1812         return (0);
1813 }
1814
1815 /*
1816  * Filler function for proc/scsi/device_info
1817  */
1818 static int
1819 linprocfs_doscsidevinfo(PFS_FILL_ARGS)
1820 {
1821
1822         return (0);
1823 }
1824
1825 /*
1826  * Filler function for proc/scsi/scsi
1827  */
1828 static int
1829 linprocfs_doscsiscsi(PFS_FILL_ARGS)
1830 {
1831
1832         return (0);
1833 }
1834
1835 /*
1836  * Filler function for proc/devices
1837  */
1838 static int
1839 linprocfs_dodevices(PFS_FILL_ARGS)
1840 {
1841         char *char_devices;
1842         sbuf_printf(sb, "Character devices:\n");
1843
1844         char_devices = linux_get_char_devices();
1845         sbuf_printf(sb, "%s", char_devices);
1846         linux_free_get_char_devices(char_devices);
1847
1848         sbuf_printf(sb, "\nBlock devices:\n");
1849
1850         return (0);
1851 }
1852
1853 /*
1854  * Filler function for proc/cmdline
1855  */
1856 static int
1857 linprocfs_docmdline(PFS_FILL_ARGS)
1858 {
1859
1860         sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname);
1861         sbuf_printf(sb, " ro root=302\n");
1862         return (0);
1863 }
1864
1865 /*
1866  * Filler function for proc/filesystems
1867  */
1868 static int
1869 linprocfs_dofilesystems(PFS_FILL_ARGS)
1870 {
1871         struct vfsconf *vfsp;
1872
1873         vfsconf_slock();
1874         TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
1875                 if (vfsp->vfc_flags & VFCF_SYNTHETIC)
1876                         sbuf_printf(sb, "nodev");
1877                 sbuf_printf(sb, "\t%s\n", vfsp->vfc_name);
1878         }
1879         vfsconf_sunlock();
1880         return(0);
1881 }
1882
1883 /*
1884  * Filler function for proc/modules
1885  */
1886 static int
1887 linprocfs_domodules(PFS_FILL_ARGS)
1888 {
1889 #if 0
1890         struct linker_file *lf;
1891
1892         TAILQ_FOREACH(lf, &linker_files, link) {
1893                 sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename,
1894                     (unsigned long)lf->size, lf->refs);
1895         }
1896 #endif
1897         return (0);
1898 }
1899
1900 /*
1901  * Filler function for proc/pid/fd
1902  */
1903 static int
1904 linprocfs_dofdescfs(PFS_FILL_ARGS)
1905 {
1906
1907         if (p == curproc)
1908                 sbuf_printf(sb, "/dev/fd");
1909         else
1910                 sbuf_printf(sb, "unknown");
1911         return (0);
1912 }
1913
1914 /*
1915  * Filler function for proc/pid/limits
1916  */
1917 static const struct linux_rlimit_ident {
1918         const char      *desc;
1919         const char      *unit;
1920         unsigned int    rlim_id;
1921 } linux_rlimits_ident[] = {
1922         { "Max cpu time",       "seconds",      RLIMIT_CPU },
1923         { "Max file size",      "bytes",        RLIMIT_FSIZE },
1924         { "Max data size",      "bytes",        RLIMIT_DATA },
1925         { "Max stack size",     "bytes",        RLIMIT_STACK },
1926         { "Max core file size",  "bytes",       RLIMIT_CORE },
1927         { "Max resident set",   "bytes",        RLIMIT_RSS },
1928         { "Max processes",      "processes",    RLIMIT_NPROC },
1929         { "Max open files",     "files",        RLIMIT_NOFILE },
1930         { "Max locked memory",  "bytes",        RLIMIT_MEMLOCK },
1931         { "Max address space",  "bytes",        RLIMIT_AS },
1932         { "Max file locks",     "locks",        LINUX_RLIMIT_LOCKS },
1933         { "Max pending signals", "signals",     LINUX_RLIMIT_SIGPENDING },
1934         { "Max msgqueue size",  "bytes",        LINUX_RLIMIT_MSGQUEUE },
1935         { "Max nice priority",          "",     LINUX_RLIMIT_NICE },
1936         { "Max realtime priority",      "",     LINUX_RLIMIT_RTPRIO },
1937         { "Max realtime timeout",       "us",   LINUX_RLIMIT_RTTIME },
1938         { 0, 0, 0 }
1939 };
1940
1941 static int
1942 linprocfs_doproclimits(PFS_FILL_ARGS)
1943 {
1944         const struct linux_rlimit_ident *li;
1945         struct plimit *limp;
1946         struct rlimit rl;
1947         ssize_t size;
1948         int res, error;
1949
1950         error = 0;
1951
1952         PROC_LOCK(p);
1953         limp = lim_hold(p->p_limit);
1954         PROC_UNLOCK(p);
1955         size = sizeof(res);
1956         sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit",
1957                         "Hard Limit", "Units");
1958         for (li = linux_rlimits_ident; li->desc != NULL; ++li) {
1959                 switch (li->rlim_id)
1960                 {
1961                 case LINUX_RLIMIT_LOCKS:
1962                         /* FALLTHROUGH */
1963                 case LINUX_RLIMIT_RTTIME:
1964                         rl.rlim_cur = RLIM_INFINITY;
1965                         break;
1966                 case LINUX_RLIMIT_SIGPENDING:
1967                         error = kernel_sysctlbyname(td,
1968                             "kern.sigqueue.max_pending_per_proc",
1969                             &res, &size, 0, 0, 0, 0);
1970                         if (error != 0)
1971                                 goto out;
1972                         rl.rlim_cur = res;
1973                         rl.rlim_max = res;
1974                         break;
1975                 case LINUX_RLIMIT_MSGQUEUE:
1976                         error = kernel_sysctlbyname(td,
1977                             "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
1978                         if (error != 0)
1979                                 goto out;
1980                         rl.rlim_cur = res;
1981                         rl.rlim_max = res;
1982                         break;
1983                 case LINUX_RLIMIT_NICE:
1984                         /* FALLTHROUGH */
1985                 case LINUX_RLIMIT_RTPRIO:
1986                         rl.rlim_cur = 0;
1987                         rl.rlim_max = 0;
1988                         break;
1989                 default:
1990                         rl = limp->pl_rlimit[li->rlim_id];
1991                         break;
1992                 }
1993                 if (rl.rlim_cur == RLIM_INFINITY)
1994                         sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n",
1995                             li->desc, "unlimited", "unlimited", li->unit);
1996                 else
1997                         sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n",
1998                             li->desc, (unsigned long long)rl.rlim_cur,
1999                             (unsigned long long)rl.rlim_max, li->unit);
2000         }
2001 out:
2002         lim_free(limp);
2003         return (error);
2004 }
2005
2006 /*
2007  * The point of the following two functions is to work around
2008  * an assertion in Chromium; see kern/240991 for details.
2009  */
2010 static int
2011 linprocfs_dotaskattr(PFS_ATTR_ARGS)
2012 {
2013
2014         vap->va_nlink = 3;
2015         return (0);
2016 }
2017
2018 /*
2019  * Filler function for proc/<pid>/task/.dummy
2020  */
2021 static int
2022 linprocfs_dotaskdummy(PFS_FILL_ARGS)
2023 {
2024
2025         return (0);
2026 }
2027
2028 /*
2029  * Filler function for proc/sys/kernel/random/uuid
2030  */
2031 static int
2032 linprocfs_douuid(PFS_FILL_ARGS)
2033 {
2034         struct uuid uuid;
2035
2036         kern_uuidgen(&uuid, 1);
2037         sbuf_printf_uuid(sb, &uuid);
2038         sbuf_printf(sb, "\n");
2039         return(0);
2040 }
2041
2042 /*
2043  * Filler function for proc/sys/kernel/random/boot_id
2044  */
2045 static int
2046 linprocfs_doboot_id(PFS_FILL_ARGS)
2047 {
2048        static bool firstboot = 1;
2049        static struct uuid uuid;
2050
2051        if (firstboot) {
2052                kern_uuidgen(&uuid, 1);
2053                firstboot = 0;
2054        }
2055        sbuf_printf_uuid(sb, &uuid);
2056        sbuf_printf(sb, "\n");
2057        return(0);
2058 }
2059
2060 /*
2061  * Filler function for proc/pid/auxv
2062  */
2063 static int
2064 linprocfs_doauxv(PFS_FILL_ARGS)
2065 {
2066         struct sbuf *asb;
2067         off_t buflen, resid;
2068         int error;
2069
2070         /*
2071          * Mimic linux behavior and pass only processes with usermode
2072          * address space as valid. Return zero silently otherwise.
2073          */
2074         if (p->p_vmspace == &vmspace0)
2075                 return (0);
2076
2077         if (uio->uio_resid == 0)
2078                 return (0);
2079         if (uio->uio_offset < 0 || uio->uio_resid < 0)
2080                 return (EINVAL);
2081
2082         asb = sbuf_new_auto();
2083         if (asb == NULL)
2084                 return (ENOMEM);
2085         error = proc_getauxv(td, p, asb);
2086         if (error == 0)
2087                 error = sbuf_finish(asb);
2088
2089         resid = sbuf_len(asb) - uio->uio_offset;
2090         if (resid > uio->uio_resid)
2091                 buflen = uio->uio_resid;
2092         else
2093                 buflen = resid;
2094         if (buflen > IOSIZE_MAX)
2095                 return (EINVAL);
2096         if (buflen > maxphys)
2097                 buflen = maxphys;
2098         if (resid <= 0)
2099                 return (0);
2100
2101         if (error == 0)
2102                 error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
2103         sbuf_delete(asb);
2104         return (error);
2105 }
2106
2107 /*
2108  * Filler function for proc/self/oom_score_adj
2109  */
2110 static int
2111 linprocfs_do_oom_score_adj(PFS_FILL_ARGS)
2112 {
2113         struct linux_pemuldata *pem;
2114         long oom;
2115
2116         pem = pem_find(p);
2117         if (pem == NULL || uio == NULL)
2118                 return (EOPNOTSUPP);
2119         if (uio->uio_rw == UIO_READ) {
2120                 sbuf_printf(sb, "%d\n", pem->oom_score_adj);
2121         } else {
2122                 sbuf_trim(sb);
2123                 sbuf_finish(sb);
2124                 oom = strtol(sbuf_data(sb), NULL, 10);
2125                 if (oom < LINUX_OOM_SCORE_ADJ_MIN ||
2126                     oom > LINUX_OOM_SCORE_ADJ_MAX)
2127                         return (EINVAL);
2128                 pem->oom_score_adj = oom;
2129         }
2130         return (0);
2131 }
2132
2133 /*
2134  * Filler function for proc/sys/vm/max_map_count
2135  *
2136  * Maximum number of active map areas, on Linux this limits the number
2137  * of vmaps per mm struct. We don't limit mappings, return a suitable
2138  * large value.
2139  */
2140 static int
2141 linprocfs_domax_map_cnt(PFS_FILL_ARGS)
2142 {
2143
2144         sbuf_printf(sb, "%d\n", INT32_MAX);
2145         return (0);
2146 }
2147
2148 /*
2149  * Constructor
2150  */
2151 static int
2152 linprocfs_init(PFS_INIT_ARGS)
2153 {
2154         struct pfs_node *root;
2155         struct pfs_node *dir;
2156         struct pfs_node *sys;
2157
2158         root = pi->pi_root;
2159
2160         /* /proc/... */
2161         pfs_create_file(root, "cmdline", &linprocfs_docmdline,
2162             NULL, NULL, NULL, PFS_RD);
2163         pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
2164             NULL, NULL, NULL, PFS_RD);
2165         pfs_create_file(root, "devices", &linprocfs_dodevices,
2166             NULL, NULL, NULL, PFS_RD);
2167         pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
2168             NULL, NULL, NULL, PFS_RD);
2169         pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
2170             NULL, NULL, NULL, PFS_RD);
2171         pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
2172             NULL, NULL, NULL, PFS_RD);
2173         pfs_create_file(root, "modules", &linprocfs_domodules,
2174             NULL, NULL, NULL, PFS_RD);
2175         pfs_create_file(root, "mounts", &linprocfs_domtab,
2176             NULL, NULL, NULL, PFS_RD);
2177         pfs_create_file(root, "mtab", &linprocfs_domtab,
2178             NULL, NULL, NULL, PFS_RD);
2179         pfs_create_file(root, "partitions", &linprocfs_dopartitions,
2180             NULL, NULL, NULL, PFS_RD);
2181         pfs_create_link(root, "self", &procfs_docurproc,
2182             NULL, NULL, NULL, 0);
2183         pfs_create_file(root, "stat", &linprocfs_dostat,
2184             NULL, NULL, NULL, PFS_RD);
2185         pfs_create_file(root, "swaps", &linprocfs_doswaps,
2186             NULL, NULL, NULL, PFS_RD);
2187         pfs_create_file(root, "uptime", &linprocfs_douptime,
2188             NULL, NULL, NULL, PFS_RD);
2189         pfs_create_file(root, "version", &linprocfs_doversion,
2190             NULL, NULL, NULL, PFS_RD);
2191
2192         /* /proc/bus/... */
2193         dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
2194         dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
2195         dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
2196
2197         /* /proc/net/... */
2198         dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
2199         pfs_create_file(dir, "dev", &linprocfs_donetdev,
2200             NULL, NULL, NULL, PFS_RD);
2201         pfs_create_file(dir, "route", &linprocfs_donetroute,
2202             NULL, NULL, NULL, PFS_RD);
2203
2204         /* /proc/<pid>/... */
2205         dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
2206         pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
2207             NULL, NULL, NULL, PFS_RD);
2208         pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
2209             NULL, NULL, NULL, 0);
2210         pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
2211             NULL, &procfs_candebug, NULL, PFS_RD);
2212         pfs_create_link(dir, "exe", &procfs_doprocfile,
2213             NULL, &procfs_notsystem, NULL, 0);
2214         pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
2215             NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
2216         pfs_create_file(dir, "mem", &linprocfs_doprocmem,
2217             procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
2218         pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo,
2219             NULL, NULL, NULL, PFS_RD);
2220         pfs_create_file(dir, "mounts", &linprocfs_domtab,
2221             NULL, NULL, NULL, PFS_RD);
2222         pfs_create_link(dir, "root", &linprocfs_doprocroot,
2223             NULL, NULL, NULL, 0);
2224         pfs_create_file(dir, "stat", &linprocfs_doprocstat,
2225             NULL, NULL, NULL, PFS_RD);
2226         pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
2227             NULL, NULL, NULL, PFS_RD);
2228         pfs_create_file(dir, "status", &linprocfs_doprocstatus,
2229             NULL, NULL, NULL, PFS_RD);
2230         pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
2231             NULL, NULL, NULL, 0);
2232         pfs_create_file(dir, "auxv", &linprocfs_doauxv,
2233             NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
2234         pfs_create_file(dir, "limits", &linprocfs_doproclimits,
2235             NULL, NULL, NULL, PFS_RD);
2236         pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj,
2237             procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR);
2238
2239         /* /proc/<pid>/task/... */
2240         dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
2241         pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
2242             NULL, NULL, NULL, PFS_RD);
2243
2244         /* /proc/scsi/... */
2245         dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
2246         pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
2247             NULL, NULL, NULL, PFS_RD);
2248         pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
2249             NULL, NULL, NULL, PFS_RD);
2250
2251         /* /proc/sys/... */
2252         sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
2253
2254         /* /proc/sys/kernel/... */
2255         dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
2256         pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
2257             NULL, NULL, NULL, PFS_RD);
2258         pfs_create_file(dir, "ostype", &linprocfs_doostype,
2259             NULL, NULL, NULL, PFS_RD);
2260         pfs_create_file(dir, "version", &linprocfs_doosbuild,
2261             NULL, NULL, NULL, PFS_RD);
2262         pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
2263             NULL, NULL, NULL, PFS_RD);
2264         pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
2265             NULL, NULL, NULL, PFS_RD);
2266         pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
2267             NULL, NULL, NULL, PFS_RD);
2268         pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
2269             NULL, NULL, NULL, PFS_RD);
2270         pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
2271             NULL, NULL, NULL, PFS_RD);
2272         pfs_create_file(dir, "sem", &linprocfs_dosem,
2273             NULL, NULL, NULL, PFS_RD);
2274         pfs_create_file(dir, "shmall", &linprocfs_doshmall,
2275             NULL, NULL, NULL, PFS_RD);
2276         pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
2277             NULL, NULL, NULL, PFS_RD);
2278         pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
2279             NULL, NULL, NULL, PFS_RD);
2280         pfs_create_file(dir, "tainted", &linprocfs_dotainted,
2281             NULL, NULL, NULL, PFS_RD);
2282
2283         /* /proc/sys/kernel/random/... */
2284         dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
2285         pfs_create_file(dir, "uuid", &linprocfs_douuid,
2286             NULL, NULL, NULL, PFS_RD);
2287         pfs_create_file(dir, "boot_id", &linprocfs_doboot_id,
2288             NULL, NULL, NULL, PFS_RD);
2289
2290         /* /proc/sys/vm/.... */
2291         dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
2292         pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
2293             NULL, NULL, NULL, PFS_RD);
2294         pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt,
2295             NULL, NULL, NULL, PFS_RD);
2296
2297         return (0);
2298 }
2299
2300 /*
2301  * Destructor
2302  */
2303 static int
2304 linprocfs_uninit(PFS_INIT_ARGS)
2305 {
2306
2307         /* nothing to do, pseudofs will GC */
2308         return (0);
2309 }
2310
2311 PSEUDOFS(linprocfs, 1, VFCF_JAIL);
2312 #if defined(__aarch64__) || defined(__amd64__)
2313 MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
2314 #else
2315 MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
2316 #endif
2317 MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
2318 MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
2319 MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
2320 MODULE_DEPEND(linprocfs, sysvshm, 1, 1, 1);