]> CyberLeo.Net >> Repos - FreeBSD/FreeBSD.git/blob - sys/compat/linprocfs/linprocfs.c
zfs: merge openzfs/zfs@4647353c8
[FreeBSD/FreeBSD.git] / sys / compat / linprocfs / linprocfs.c
1 /*-
2  * SPDX-License-Identifier: BSD-4-Clause
3  *
4  * Copyright (c) 2000 Dag-Erling Smørgrav
5  * Copyright (c) 1999 Pierre Beyssac
6  * Copyright (c) 1993 Jan-Simon Pendry
7  * Copyright (c) 1993
8  *      The Regents of the University of California.  All rights reserved.
9  *
10  * This code is derived from software contributed to Berkeley by
11  * Jan-Simon Pendry.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1. Redistributions of source code must retain the above copyright
17  *    notice, this list of conditions and the following disclaimer.
18  * 2. Redistributions in binary form must reproduce the above copyright
19  *    notice, this list of conditions and the following disclaimer in the
20  *    documentation and/or other materials provided with the distribution.
21  * 3. All advertising materials mentioning features or use of this software
22  *    must display the following acknowledgement:
23  *      This product includes software developed by the University of
24  *      California, Berkeley and its contributors.
25  * 4. Neither the name of the University nor the names of its contributors
26  *    may be used to endorse or promote products derived from this software
27  *    without specific prior written permission.
28  *
29  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39  * SUCH DAMAGE.
40  *
41  *      @(#)procfs_status.c     8.4 (Berkeley) 6/15/94
42  */
43
44 #include "opt_inet.h"
45
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/blist.h>
49 #include <sys/conf.h>
50 #include <sys/exec.h>
51 #include <sys/fcntl.h>
52 #include <sys/filedesc.h>
53 #include <sys/jail.h>
54 #include <sys/kernel.h>
55 #include <sys/limits.h>
56 #include <sys/linker.h>
57 #include <sys/lock.h>
58 #include <sys/malloc.h>
59 #include <sys/msg.h>
60 #include <sys/mutex.h>
61 #include <sys/namei.h>
62 #include <sys/proc.h>
63 #include <sys/ptrace.h>
64 #include <sys/queue.h>
65 #include <sys/resourcevar.h>
66 #include <sys/resource.h>
67 #include <sys/sbuf.h>
68 #include <sys/sem.h>
69 #include <sys/shm.h>
70 #include <sys/smp.h>
71 #include <sys/socket.h>
72 #include <sys/syscallsubr.h>
73 #include <sys/sysctl.h>
74 #include <sys/sysent.h>
75 #include <sys/time.h>
76 #include <sys/tty.h>
77 #include <sys/user.h>
78 #include <sys/uuid.h>
79 #include <sys/vmmeter.h>
80 #include <sys/vnode.h>
81 #include <sys/bus.h>
82 #include <sys/uio.h>
83
84 #include <net/if.h>
85 #include <net/if_var.h>
86 #include <net/if_types.h>
87
88 #include <net/route.h>
89 #include <net/route/nhop.h>
90 #include <net/route/route_ctl.h>
91
92 #include <vm/vm.h>
93 #include <vm/vm_extern.h>
94 #include <vm/pmap.h>
95 #include <vm/vm_map.h>
96 #include <vm/vm_param.h>
97 #include <vm/vm_object.h>
98 #include <vm/swap_pager.h>
99
100 #include <machine/clock.h>
101
102 #include <geom/geom.h>
103 #include <geom/geom_int.h>
104
105 #if defined(__i386__) || defined(__amd64__)
106 #include <machine/cputypes.h>
107 #include <machine/md_var.h>
108 #endif /* __i386__ || __amd64__ */
109
110 #include <compat/linux/linux.h>
111 #include <compat/linux/linux_common.h>
112 #include <compat/linux/linux_emul.h>
113 #include <compat/linux/linux_mib.h>
114 #include <compat/linux/linux_misc.h>
115 #include <compat/linux/linux_util.h>
116 #include <fs/pseudofs/pseudofs.h>
117 #include <fs/procfs/procfs.h>
118
119 /*
120  * Various conversion macros
121  */
122 #define T2J(x) ((long)(((x) * 100ULL) / (stathz ? stathz : hz)))        /* ticks to jiffies */
123 #define T2CS(x) ((unsigned long)(((x) * 100ULL) / (stathz ? stathz : hz)))      /* ticks to centiseconds */
124 #define T2S(x) ((x) / (stathz ? stathz : hz))           /* ticks to seconds */
125 #define B2K(x) ((x) >> 10)                              /* bytes to kbytes */
126 #define B2P(x) ((x) >> PAGE_SHIFT)                      /* bytes to pages */
127 #define P2B(x) ((x) << PAGE_SHIFT)                      /* pages to bytes */
128 #define P2K(x) ((x) << (PAGE_SHIFT - 10))               /* pages to kbytes */
129 #define TV2J(x) ((x)->tv_sec * 100UL + (x)->tv_usec / 10000)
130
131 /**
132  * @brief Mapping of ki_stat in struct kinfo_proc to the linux state
133  *
134  * The linux procfs state field displays one of the characters RSDZTW to
135  * denote running, sleeping in an interruptible wait, waiting in an
136  * uninterruptible disk sleep, a zombie process, process is being traced
137  * or stopped, or process is paging respectively.
138  *
139  * Our struct kinfo_proc contains the variable ki_stat which contains a
140  * value out of SIDL, SRUN, SSLEEP, SSTOP, SZOMB, SWAIT and SLOCK.
141  *
142  * This character array is used with ki_stati-1 as an index and tries to
143  * map our states to suitable linux states.
144  */
145 static char linux_state[] = "RRSTZDD";
146
147 /*
148  * Filler function for proc/meminfo
149  */
150 static int
151 linprocfs_domeminfo(PFS_FILL_ARGS)
152 {
153         unsigned long memtotal;         /* total memory in bytes */
154         unsigned long memfree;          /* free memory in bytes */
155         unsigned long cached;           /* page cache */
156         unsigned long buffers;          /* buffer cache */
157         unsigned long long swaptotal;   /* total swap space in bytes */
158         unsigned long long swapused;    /* used swap space in bytes */
159         unsigned long long swapfree;    /* free swap space in bytes */
160         size_t sz;
161         int error, i, j;
162
163         memtotal = physmem * PAGE_SIZE;
164         memfree = (unsigned long)vm_free_count() * PAGE_SIZE;
165         swap_pager_status(&i, &j);
166         swaptotal = (unsigned long long)i * PAGE_SIZE;
167         swapused = (unsigned long long)j * PAGE_SIZE;
168         swapfree = swaptotal - swapused;
169
170         /*
171          * This value may exclude wired pages, but we have no good way of
172          * accounting for that.
173          */
174         cached =
175             (vm_active_count() + vm_inactive_count() + vm_laundry_count()) *
176             PAGE_SIZE;
177
178         sz = sizeof(buffers);
179         error = kernel_sysctlbyname(curthread, "vfs.bufspace", &buffers, &sz,
180             NULL, 0, 0, 0);
181         if (error != 0)
182                 buffers = 0;
183
184         sbuf_printf(sb,
185             "MemTotal: %9lu kB\n"
186             "MemFree:  %9lu kB\n"
187             "Buffers:  %9lu kB\n"
188             "Cached:   %9lu kB\n"
189             "SwapTotal:%9llu kB\n"
190             "SwapFree: %9llu kB\n",
191             B2K(memtotal), B2K(memfree), B2K(buffers),
192             B2K(cached), B2K(swaptotal), B2K(swapfree));
193
194         return (0);
195 }
196
197 #if defined(__i386__) || defined(__amd64__)
198 /*
199  * Filler function for proc/cpuinfo (i386 & amd64 version)
200  */
201 static int
202 linprocfs_docpuinfo(PFS_FILL_ARGS)
203 {
204         int hw_model[2];
205         char model[128];
206         uint64_t freq;
207         size_t size;
208         u_int cache_size[4];
209         u_int regs[4] = { 0 };
210         int fqmhz, fqkhz;
211         int i, j;
212
213         /*
214          * We default the flags to include all non-conflicting flags,
215          * and the Intel versions of conflicting flags.
216          */
217         static char *cpu_feature_names[] = {
218                 /*  0 */ "fpu", "vme", "de", "pse",
219                 /*  4 */ "tsc", "msr", "pae", "mce",
220                 /*  8 */ "cx8", "apic", "", "sep",
221                 /* 12 */ "mtrr", "pge", "mca", "cmov",
222                 /* 16 */ "pat", "pse36", "pn", "clflush",
223                 /* 20 */ "", "dts", "acpi", "mmx",
224                 /* 24 */ "fxsr", "sse", "sse2", "ss",
225                 /* 28 */ "ht", "tm", "ia64", "pbe"
226         };
227
228         static char *amd_feature_names[] = {
229                 /*  0 */ "", "", "", "",
230                 /*  4 */ "", "", "", "",
231                 /*  8 */ "", "", "", "syscall",
232                 /* 12 */ "", "", "", "",
233                 /* 16 */ "", "", "", "mp",
234                 /* 20 */ "nx", "", "mmxext", "",
235                 /* 24 */ "", "fxsr_opt", "pdpe1gb", "rdtscp",
236                 /* 28 */ "", "lm", "3dnowext", "3dnow"
237         };
238
239         static char *cpu_feature2_names[] = {
240                 /*  0 */ "pni", "pclmulqdq", "dtes64", "monitor",
241                 /*  4 */ "ds_cpl", "vmx", "smx", "est",
242                 /*  8 */ "tm2", "ssse3", "cid", "sdbg",
243                 /* 12 */ "fma", "cx16", "xtpr", "pdcm",
244                 /* 16 */ "", "pcid", "dca", "sse4_1",
245                 /* 20 */ "sse4_2", "x2apic", "movbe", "popcnt",
246                 /* 24 */ "tsc_deadline_timer", "aes", "xsave", "",
247                 /* 28 */ "avx", "f16c", "rdrand", "hypervisor"
248         };
249
250         static char *amd_feature2_names[] = {
251                 /*  0 */ "lahf_lm", "cmp_legacy", "svm", "extapic",
252                 /*  4 */ "cr8_legacy", "abm", "sse4a", "misalignsse",
253                 /*  8 */ "3dnowprefetch", "osvw", "ibs", "xop",
254                 /* 12 */ "skinit", "wdt", "", "lwp",
255                 /* 16 */ "fma4", "tce", "", "nodeid_msr",
256                 /* 20 */ "", "tbm", "topoext", "perfctr_core",
257                 /* 24 */ "perfctr_nb", "", "bpext", "ptsc",
258                 /* 28 */ "perfctr_llc", "mwaitx", "", ""
259         };
260
261         static char *cpu_stdext_feature_names[] = {
262                 /*  0 */ "fsgsbase", "tsc_adjust", "sgx", "bmi1",
263                 /*  4 */ "hle", "avx2", "", "smep",
264                 /*  8 */ "bmi2", "erms", "invpcid", "rtm",
265                 /* 12 */ "cqm", "", "mpx", "rdt_a",
266                 /* 16 */ "avx512f", "avx512dq", "rdseed", "adx",
267                 /* 20 */ "smap", "avx512ifma", "", "clflushopt",
268                 /* 24 */ "clwb", "intel_pt", "avx512pf", "avx512er",
269                 /* 28 */ "avx512cd", "sha_ni", "avx512bw", "avx512vl"
270         };
271
272         static char *cpu_stdext_feature2_names[] = {
273                 /*  0 */ "prefetchwt1", "avx512vbmi", "umip", "pku",
274                 /*  4 */ "ospke", "waitpkg", "avx512_vbmi2", "",
275                 /*  8 */ "gfni", "vaes", "vpclmulqdq", "avx512_vnni",
276                 /* 12 */ "avx512_bitalg", "", "avx512_vpopcntdq", "",
277                 /* 16 */ "", "", "", "",
278                 /* 20 */ "", "", "rdpid", "",
279                 /* 24 */ "", "cldemote", "", "movdiri",
280                 /* 28 */ "movdir64b", "enqcmd", "sgx_lc", ""
281         };
282
283         static char *cpu_stdext_feature3_names[] = {
284                 /*  0 */ "", "", "avx512_4vnniw", "avx512_4fmaps",
285                 /*  4 */ "fsrm", "", "", "",
286                 /*  8 */ "avx512_vp2intersect", "", "md_clear", "",
287                 /* 12 */ "", "", "", "",
288                 /* 16 */ "", "", "pconfig", "",
289                 /* 20 */ "", "", "", "",
290                 /* 24 */ "", "", "ibrs", "stibp",
291                 /* 28 */ "flush_l1d", "arch_capabilities", "core_capabilities", "ssbd"
292         };
293
294         static char *cpu_stdext_feature_l1_names[] = {
295                 /*  0 */ "xsaveopt", "xsavec", "xgetbv1", "xsaves",
296                 /*  4 */ "xfd"
297         };
298
299         static char *power_flags[] = {
300                 "ts",           "fid",          "vid",
301                 "ttp",          "tm",           "stc",
302                 "100mhzsteps",  "hwpstate",     "",
303                 "cpb",          "eff_freq_ro",  "proc_feedback",
304                 "acc_power",
305         };
306
307         hw_model[0] = CTL_HW;
308         hw_model[1] = HW_MODEL;
309         model[0] = '\0';
310         size = sizeof(model);
311         if (kernel_sysctl(td, hw_model, 2, &model, &size, 0, 0, 0, 0) != 0)
312                 strcpy(model, "unknown");
313 #ifdef __i386__
314         switch (cpu_vendor_id) {
315         case CPU_VENDOR_AMD:
316                 if (cpu_class < CPUCLASS_686)
317                         cpu_feature_names[16] = "fcmov";
318                 break;
319         case CPU_VENDOR_CYRIX:
320                 cpu_feature_names[24] = "cxmmx";
321                 break;
322         }
323 #endif
324         if (cpu_exthigh >= 0x80000006)
325                 do_cpuid(0x80000006, cache_size);
326         else
327                 memset(cache_size, 0, sizeof(cache_size));
328         for (i = 0; i < mp_ncpus; ++i) {
329                 fqmhz = 0;
330                 fqkhz = 0;
331                 freq = atomic_load_acq_64(&tsc_freq);
332                 if (freq != 0) {
333                         fqmhz = (freq + 4999) / 1000000;
334                         fqkhz = ((freq + 4999) / 10000) % 100;
335                 }
336                 sbuf_printf(sb,
337                     "processor\t: %d\n"
338                     "vendor_id\t: %.20s\n"
339                     "cpu family\t: %u\n"
340                     "model\t\t: %u\n"
341                     "model name\t: %s\n"
342                     "stepping\t: %u\n"
343                     "cpu MHz\t\t: %d.%02d\n"
344                     "cache size\t: %d KB\n"
345                     "physical id\t: %d\n"
346                     "siblings\t: %d\n"
347                     "core id\t\t: %d\n"
348                     "cpu cores\t: %d\n"
349                     "apicid\t\t: %d\n"
350                     "initial apicid\t: %d\n"
351                     "fpu\t\t: %s\n"
352                     "fpu_exception\t: %s\n"
353                     "cpuid level\t: %d\n"
354                     "wp\t\t: %s\n",
355                     i, cpu_vendor, CPUID_TO_FAMILY(cpu_id),
356                     CPUID_TO_MODEL(cpu_id), model, cpu_id & CPUID_STEPPING,
357                     fqmhz, fqkhz,
358                     (cache_size[2] >> 16), 0, mp_ncpus, i, mp_ncpus,
359                     i, i, /*cpu_id & CPUID_LOCAL_APIC_ID ??*/
360                     (cpu_feature & CPUID_FPU) ? "yes" : "no", "yes",
361                     CPUID_TO_FAMILY(cpu_id), "yes");
362                 sbuf_cat(sb, "flags\t\t:");
363                 for (j = 0; j < nitems(cpu_feature_names); j++)
364                         if (cpu_feature & (1 << j) &&
365                             cpu_feature_names[j][0] != '\0')
366                                 sbuf_printf(sb, " %s", cpu_feature_names[j]);
367                 for (j = 0; j < nitems(amd_feature_names); j++)
368                         if (amd_feature & (1 << j) &&
369                             amd_feature_names[j][0] != '\0')
370                                 sbuf_printf(sb, " %s", amd_feature_names[j]);
371                 for (j = 0; j < nitems(cpu_feature2_names); j++)
372                         if (cpu_feature2 & (1 << j) &&
373                             cpu_feature2_names[j][0] != '\0')
374                                 sbuf_printf(sb, " %s", cpu_feature2_names[j]);
375                 for (j = 0; j < nitems(amd_feature2_names); j++)
376                         if (amd_feature2 & (1 << j) &&
377                             amd_feature2_names[j][0] != '\0')
378                                 sbuf_printf(sb, " %s", amd_feature2_names[j]);
379                 for (j = 0; j < nitems(cpu_stdext_feature_names); j++)
380                         if (cpu_stdext_feature & (1 << j) &&
381                             cpu_stdext_feature_names[j][0] != '\0')
382                                 sbuf_printf(sb, " %s",
383                                     cpu_stdext_feature_names[j]);
384                 if (tsc_is_invariant)
385                         sbuf_cat(sb, " constant_tsc");
386                 for (j = 0; j < nitems(cpu_stdext_feature2_names); j++)
387                         if (cpu_stdext_feature2 & (1 << j) &&
388                             cpu_stdext_feature2_names[j][0] != '\0')
389                                 sbuf_printf(sb, " %s",
390                                     cpu_stdext_feature2_names[j]);
391                 for (j = 0; j < nitems(cpu_stdext_feature3_names); j++)
392                         if (cpu_stdext_feature3 & (1 << j) &&
393                             cpu_stdext_feature3_names[j][0] != '\0')
394                                 sbuf_printf(sb, " %s",
395                                     cpu_stdext_feature3_names[j]);
396                 if ((cpu_feature2 & CPUID2_XSAVE) != 0) {
397                         cpuid_count(0xd, 0x1, regs);
398                         for (j = 0; j < nitems(cpu_stdext_feature_l1_names); j++)
399                                 if (regs[0] & (1 << j) &&
400                                     cpu_stdext_feature_l1_names[j][0] != '\0')
401                                         sbuf_printf(sb, " %s",
402                                             cpu_stdext_feature_l1_names[j]);
403                 }
404                 sbuf_cat(sb, "\n");
405                 sbuf_printf(sb,
406                     "bugs\t\t: %s\n"
407                     "bogomips\t: %d.%02d\n"
408                     "clflush size\t: %d\n"
409                     "cache_alignment\t: %d\n"
410                     "address sizes\t: %d bits physical, %d bits virtual\n",
411 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
412                     (has_f00f_bug) ? "Intel F00F" : "",
413 #else
414                     "",
415 #endif
416                     fqmhz * 2, fqkhz,
417                     cpu_clflush_line_size, cpu_clflush_line_size,
418                     cpu_maxphyaddr,
419                     (cpu_maxphyaddr > 32) ? 48 : 0);
420                 sbuf_cat(sb, "power management: ");
421                 for (j = 0; j < nitems(power_flags); j++)
422                         if (amd_pminfo & (1 << j))
423                                 sbuf_printf(sb, " %s", power_flags[j]);
424                 sbuf_cat(sb, "\n\n");
425
426                 /* XXX per-cpu vendor / class / model / id? */
427         }
428         sbuf_cat(sb, "\n");
429
430         return (0);
431 }
432 #else
433 /* ARM64TODO: implement non-stubbed linprocfs_docpuinfo */
434 static int
435 linprocfs_docpuinfo(PFS_FILL_ARGS)
436 {
437         int i;
438
439         for (i = 0; i < mp_ncpus; ++i) {
440                 sbuf_printf(sb,
441                     "processor\t: %d\n"
442                     "BogoMIPS\t: %d.%02d\n",
443                     i, 0, 0);
444                 sbuf_cat(sb, "Features\t: ");
445                 sbuf_cat(sb, "\n");
446                 sbuf_printf(sb,
447                     "CPU implementer\t: \n"
448                     "CPU architecture: \n"
449                     "CPU variant\t: 0x%x\n"
450                     "CPU part\t: 0x%x\n"
451                     "CPU revision\t: %d\n",
452                     0, 0, 0);
453                 sbuf_cat(sb, "\n");
454         }
455
456         return (0);
457 }
458 #endif /* __i386__ || __amd64__ */
459
460 static const char *path_slash_sys = "/sys";
461 static const char *fstype_sysfs = "sysfs";
462
463 static int
464 _mtab_helper(const struct pfs_node *pn, const struct statfs *sp,
465     const char **mntfrom, const char **mntto, const char **fstype)
466 {
467         /* determine device name */
468         *mntfrom = sp->f_mntfromname;
469
470         /* determine mount point */
471         *mntto = sp->f_mntonname;
472
473         /* determine fs type */
474         *fstype = sp->f_fstypename;
475         if (strcmp(*fstype, pn->pn_info->pi_name) == 0)
476                 *mntfrom = *fstype = "proc";
477         else if (strcmp(*fstype, "procfs") == 0)
478                 return (ECANCELED);
479
480         if (strcmp(*fstype, "autofs") == 0) {
481                 /*
482                  * FreeBSD uses eg "map -hosts", whereas Linux
483                  * expects just "-hosts".
484                  */
485                 if (strncmp(*mntfrom, "map ", 4) == 0)
486                         *mntfrom += 4;
487         }
488
489         if (strcmp(*fstype, "linsysfs") == 0) {
490                 *mntfrom = path_slash_sys;
491                 *fstype = fstype_sysfs;
492         } else {
493                 /* For Linux msdosfs is called vfat */
494                 if (strcmp(*fstype, "msdosfs") == 0)
495                         *fstype = "vfat";
496         }
497         return (0);
498 }
499
500 static void
501 _sbuf_mntoptions_helper(struct sbuf *sb, uint64_t f_flags)
502 {
503         sbuf_cat(sb, (f_flags & MNT_RDONLY) ? "ro" : "rw");
504 #define ADD_OPTION(opt, name) \
505         if (f_flags & (opt)) sbuf_cat(sb, "," name);
506         ADD_OPTION(MNT_SYNCHRONOUS,     "sync");
507         ADD_OPTION(MNT_NOEXEC,          "noexec");
508         ADD_OPTION(MNT_NOSUID,          "nosuid");
509         ADD_OPTION(MNT_UNION,           "union");
510         ADD_OPTION(MNT_ASYNC,           "async");
511         ADD_OPTION(MNT_SUIDDIR,         "suiddir");
512         ADD_OPTION(MNT_NOSYMFOLLOW,     "nosymfollow");
513         ADD_OPTION(MNT_NOATIME,         "noatime");
514 #undef ADD_OPTION
515 }
516
517 /*
518  * Filler function for proc/mtab and proc/<pid>/mounts.
519  *
520  * /proc/mtab doesn't exist in Linux' procfs, but is included here so
521  * users can symlink /compat/linux/etc/mtab to /proc/mtab
522  */
523 static int
524 linprocfs_domtab(PFS_FILL_ARGS)
525 {
526         const char *mntto, *mntfrom, *fstype;
527         char *dlep, *flep;
528         struct vnode *vp;
529         struct pwd *pwd;
530         size_t lep_len;
531         int error;
532         struct statfs *buf, *sp;
533         size_t count;
534
535         /*
536          * Resolve emulation tree prefix
537          */
538         flep = NULL;
539         pwd = pwd_hold(td);
540         vp = pwd->pwd_adir;
541         error = vn_fullpath_global(vp, &dlep, &flep);
542         pwd_drop(pwd);
543         if (error != 0)
544                 return (error);
545         lep_len = strlen(dlep);
546
547         buf = NULL;
548         error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
549             UIO_SYSSPACE, MNT_WAIT);
550         if (error != 0) {
551                 free(buf, M_TEMP);
552                 free(flep, M_TEMP);
553                 return (error);
554         }
555
556         for (sp = buf; count > 0; sp++, count--) {
557                 error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
558                 if (error != 0) {
559                         MPASS(error == ECANCELED);
560                         continue;
561                 }
562
563                 /* determine mount point */
564                 if (strncmp(mntto, dlep, lep_len) == 0 && mntto[lep_len] == '/')
565                         mntto += lep_len;
566
567                 sbuf_printf(sb, "%s %s %s ", mntfrom, mntto, fstype);
568                 _sbuf_mntoptions_helper(sb, sp->f_flags);
569                 /* a real Linux mtab will also show NFS options */
570                 sbuf_printf(sb, " 0 0\n");
571         }
572
573         free(buf, M_TEMP);
574         free(flep, M_TEMP);
575         return (error);
576 }
577
578 static int
579 linprocfs_doprocmountinfo(PFS_FILL_ARGS)
580 {
581         const char *mntfrom, *mntto, *fstype;
582         char *dlep, *flep;
583         struct statfs *buf, *sp;
584         size_t count, lep_len;
585         struct vnode *vp;
586         struct pwd *pwd;
587         int error;
588
589         /*
590          * Resolve emulation tree prefix
591          */
592         flep = NULL;
593         pwd = pwd_hold(td);
594         vp = pwd->pwd_adir;
595         error = vn_fullpath_global(vp, &dlep, &flep);
596         pwd_drop(pwd);
597         if (error != 0)
598                 return (error);
599         lep_len = strlen(dlep);
600
601         buf = NULL;
602         error = kern_getfsstat(td, &buf, SIZE_T_MAX, &count,
603             UIO_SYSSPACE, MNT_WAIT);
604         if (error != 0)
605                 goto out;
606
607         for (sp = buf; count > 0; sp++, count--) {
608                 error = _mtab_helper(pn, sp, &mntfrom, &mntto, &fstype);
609                 if (error != 0) {
610                         MPASS(error == ECANCELED);
611                         continue;
612                 }
613
614                 if (strncmp(mntto, dlep, lep_len) == 0 && mntto[lep_len] == '/')
615                         mntto += lep_len;
616 #if 0
617                 /*
618                  * If the prefix is a chroot, and this mountpoint is not under
619                  * the prefix, we should skip it.  Leave it for now for
620                  * consistency with procmtab above.
621                  */
622                 else
623                         continue;
624 #endif
625
626                 /*
627                  * (1) mount id
628                  *
629                  * (2) parent mount id -- we don't have this cheaply, so
630                  * provide a dummy value
631                  *
632                  * (3) major:minor -- ditto
633                  *
634                  * (4) root filesystem mount -- probably a namespaces thing
635                  *
636                  * (5) mountto path
637                  */
638                 sbuf_printf(sb, "%u 0 0:0 / %s ",
639                     sp->f_fsid.val[0] ^ sp->f_fsid.val[1], mntto);
640                 /* (6) mount options */
641                 _sbuf_mntoptions_helper(sb, sp->f_flags);
642                 /*
643                  * (7) zero or more optional fields -- again, namespace related
644                  *
645                  * (8) End of variable length fields separator ("-")
646                  *
647                  * (9) fstype
648                  *
649                  * (10) mount from
650                  *
651                  * (11) "superblock" options -- like (6), but different
652                  * semantics in Linux
653                  */
654                 sbuf_printf(sb, " - %s %s %s\n", fstype, mntfrom,
655                     (sp->f_flags & MNT_RDONLY) ? "ro" : "rw");
656         }
657
658         error = 0;
659 out:
660         free(buf, M_TEMP);
661         free(flep, M_TEMP);
662         return (error);
663 }
664
665 /*
666  * Filler function for proc/partitions
667  */
668 static int
669 linprocfs_dopartitions(PFS_FILL_ARGS)
670 {
671         struct g_class *cp;
672         struct g_geom *gp;
673         struct g_provider *pp;
674         int major, minor;
675
676         g_topology_lock();
677         sbuf_printf(sb, "major minor  #blocks  name rio rmerge rsect "
678             "ruse wio wmerge wsect wuse running use aveq\n");
679
680         LIST_FOREACH(cp, &g_classes, class) {
681                 if (strcmp(cp->name, "DISK") == 0 ||
682                     strcmp(cp->name, "PART") == 0)
683                         LIST_FOREACH(gp, &cp->geom, geom) {
684                                 LIST_FOREACH(pp, &gp->provider, provider) {
685                                         if (linux_driver_get_major_minor(
686                                             pp->name, &major, &minor) != 0) {
687                                                 major = 0;
688                                                 minor = 0;
689                                         }
690                                         sbuf_printf(sb, "%d %d %lld %s "
691                                             "%d %d %d %d %d "
692                                              "%d %d %d %d %d %d\n",
693                                              major, minor,
694                                              (long long)pp->mediasize, pp->name,
695                                              0, 0, 0, 0, 0,
696                                              0, 0, 0, 0, 0, 0);
697                                 }
698                         }
699         }
700         g_topology_unlock();
701
702         return (0);
703 }
704
705 /*
706  * Filler function for proc/stat
707  *
708  * Output depends on kernel version:
709  *
710  * v2.5.40 <=
711  *   user nice system idle
712  * v2.5.41
713  *   user nice system idle iowait
714  * v2.6.11
715  *   user nice system idle iowait irq softirq steal
716  * v2.6.24
717  *   user nice system idle iowait irq softirq steal guest
718  * v2.6.33 >=
719  *   user nice system idle iowait irq softirq steal guest guest_nice
720  */
721 static int
722 linprocfs_dostat(PFS_FILL_ARGS)
723 {
724         struct pcpu *pcpu;
725         long cp_time[CPUSTATES];
726         long *cp;
727         struct timeval boottime;
728         int i;
729         char *zero_pad;
730         bool has_intr = true;
731
732         if (linux_kernver(td) >= LINUX_KERNVER(2,6,33)) {
733                 zero_pad = " 0 0 0 0\n";
734         } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,24)) {
735                 zero_pad = " 0 0 0\n";
736         } else if (linux_kernver(td) >= LINUX_KERNVER(2,6,11)) {
737                 zero_pad = " 0 0\n";
738         } else if (linux_kernver(td) >= LINUX_KERNVER(2,5,41)) {
739                 has_intr = false;
740                 zero_pad = " 0\n";
741         } else {
742                 has_intr = false;
743                 zero_pad = "\n";
744         }
745
746         read_cpu_time(cp_time);
747         getboottime(&boottime);
748         /* Parameters common to all versions */
749         sbuf_printf(sb, "cpu %lu %lu %lu %lu",
750             T2J(cp_time[CP_USER]),
751             T2J(cp_time[CP_NICE]),
752             T2J(cp_time[CP_SYS]),
753             T2J(cp_time[CP_IDLE]));
754
755         /* Print interrupt stats if available */
756         if (has_intr) {
757                 sbuf_printf(sb, " 0 %lu", T2J(cp_time[CP_INTR]));
758         }
759
760         /* Pad out remaining fields depending on version */
761         sbuf_printf(sb, "%s", zero_pad);
762
763         CPU_FOREACH(i) {
764                 pcpu = pcpu_find(i);
765                 cp = pcpu->pc_cp_time;
766                 sbuf_printf(sb, "cpu%d %lu %lu %lu %lu", i,
767                     T2J(cp[CP_USER]),
768                     T2J(cp[CP_NICE]),
769                     T2J(cp[CP_SYS]),
770                     T2J(cp[CP_IDLE]));
771
772                 if (has_intr) {
773                         sbuf_printf(sb, " 0 %lu", T2J(cp[CP_INTR]));
774                 }
775
776                 sbuf_printf(sb, "%s", zero_pad);
777         }
778         sbuf_printf(sb,
779             "disk 0 0 0 0\n"
780             "page %ju %ju\n"
781             "swap %ju %ju\n"
782             "intr %ju\n"
783             "ctxt %ju\n"
784             "btime %lld\n",
785             (uintmax_t)VM_CNT_FETCH(v_vnodepgsin),
786             (uintmax_t)VM_CNT_FETCH(v_vnodepgsout),
787             (uintmax_t)VM_CNT_FETCH(v_swappgsin),
788             (uintmax_t)VM_CNT_FETCH(v_swappgsout),
789             (uintmax_t)VM_CNT_FETCH(v_intr),
790             (uintmax_t)VM_CNT_FETCH(v_swtch),
791             (long long)boottime.tv_sec);
792         return (0);
793 }
794
795 static int
796 linprocfs_doswaps(PFS_FILL_ARGS)
797 {
798         struct xswdev xsw;
799         uintmax_t total, used;
800         int n;
801         char devname[SPECNAMELEN + 1];
802
803         sbuf_printf(sb, "Filename\t\t\t\tType\t\tSize\tUsed\tPriority\n");
804         for (n = 0; ; n++) {
805                 if (swap_dev_info(n, &xsw, devname, sizeof(devname)) != 0)
806                         break;
807                 total = (uintmax_t)xsw.xsw_nblks * PAGE_SIZE / 1024;
808                 used  = (uintmax_t)xsw.xsw_used * PAGE_SIZE / 1024;
809
810                 /*
811                  * The space and not tab after the device name is on
812                  * purpose.  Linux does so.
813                  */
814                 sbuf_printf(sb, "/dev/%-34s unknown\t\t%jd\t%jd\t-1\n",
815                     devname, total, used);
816         }
817         return (0);
818 }
819
820 /*
821  * Filler function for proc/uptime
822  */
823 static int
824 linprocfs_douptime(PFS_FILL_ARGS)
825 {
826         long cp_time[CPUSTATES];
827         struct timeval tv;
828
829         getmicrouptime(&tv);
830         read_cpu_time(cp_time);
831         sbuf_printf(sb, "%lld.%02ld %ld.%02lu\n",
832             (long long)tv.tv_sec, tv.tv_usec / 10000,
833             T2S(cp_time[CP_IDLE] / mp_ncpus),
834             T2CS(cp_time[CP_IDLE] / mp_ncpus) % 100);
835         return (0);
836 }
837
838 /*
839  * Get OS build date
840  */
841 static void
842 linprocfs_osbuild(struct thread *td, struct sbuf *sb)
843 {
844 #if 0
845         char osbuild[256];
846         char *cp1, *cp2;
847
848         strncpy(osbuild, version, 256);
849         osbuild[255] = '\0';
850         cp1 = strstr(osbuild, "\n");
851         cp2 = strstr(osbuild, ":");
852         if (cp1 && cp2) {
853                 *cp1 = *cp2 = '\0';
854                 cp1 = strstr(osbuild, "#");
855         } else
856                 cp1 = NULL;
857         if (cp1)
858                 sbuf_printf(sb, "%s%s", cp1, cp2 + 1);
859         else
860 #endif
861                 sbuf_cat(sb, "#4 Sun Dec 18 04:30:00 CET 1977");
862 }
863
864 /*
865  * Get OS builder
866  */
867 static void
868 linprocfs_osbuilder(struct thread *td, struct sbuf *sb)
869 {
870 #if 0
871         char builder[256];
872         char *cp;
873
874         cp = strstr(version, "\n    ");
875         if (cp) {
876                 strncpy(builder, cp + 5, 256);
877                 builder[255] = '\0';
878                 cp = strstr(builder, ":");
879                 if (cp)
880                         *cp = '\0';
881         }
882         if (cp)
883                 sbuf_cat(sb, builder);
884         else
885 #endif
886                 sbuf_cat(sb, "des@freebsd.org");
887 }
888
889 /*
890  * Filler function for proc/version
891  */
892 static int
893 linprocfs_doversion(PFS_FILL_ARGS)
894 {
895         char osname[LINUX_MAX_UTSNAME];
896         char osrelease[LINUX_MAX_UTSNAME];
897
898         linux_get_osname(td, osname);
899         linux_get_osrelease(td, osrelease);
900         sbuf_printf(sb, "%s version %s (", osname, osrelease);
901         linprocfs_osbuilder(td, sb);
902         sbuf_cat(sb, ") (gcc version " __VERSION__ ") ");
903         linprocfs_osbuild(td, sb);
904         sbuf_cat(sb, "\n");
905
906         return (0);
907 }
908
909 /*
910  * Filler function for proc/loadavg
911  */
912 static int
913 linprocfs_doloadavg(PFS_FILL_ARGS)
914 {
915
916         sbuf_printf(sb,
917             "%d.%02d %d.%02d %d.%02d %d/%d %d\n",
918             (int)(averunnable.ldavg[0] / averunnable.fscale),
919             (int)(averunnable.ldavg[0] * 100 / averunnable.fscale % 100),
920             (int)(averunnable.ldavg[1] / averunnable.fscale),
921             (int)(averunnable.ldavg[1] * 100 / averunnable.fscale % 100),
922             (int)(averunnable.ldavg[2] / averunnable.fscale),
923             (int)(averunnable.ldavg[2] * 100 / averunnable.fscale % 100),
924             1,                          /* number of running tasks */
925             nprocs,                     /* number of tasks */
926             lastpid                     /* the last pid */
927         );
928         return (0);
929 }
930
931 static int
932 linprocfs_get_tty_nr(struct proc *p)
933 {
934         struct session *sp;
935         const char *ttyname;
936         int error, major, minor, nr;
937
938         PROC_LOCK_ASSERT(p, MA_OWNED);
939         sx_assert(&proctree_lock, SX_LOCKED);
940
941         if ((p->p_flag & P_CONTROLT) == 0)
942                 return (-1);
943
944         sp = p->p_pgrp->pg_session;
945         if (sp == NULL)
946                 return (-1);
947
948         ttyname = devtoname(sp->s_ttyp->t_dev);
949         error = linux_driver_get_major_minor(ttyname, &major, &minor);
950         if (error != 0)
951                 return (-1);
952
953         nr = makedev(major, minor);
954         return (nr);
955 }
956
957 /*
958  * Filler function for proc/pid/stat
959  */
960 static int
961 linprocfs_doprocstat(PFS_FILL_ARGS)
962 {
963         struct kinfo_proc kp;
964         struct timeval boottime;
965         char state;
966         static int ratelimit = 0;
967         int tty_nr;
968         vm_offset_t startcode, startdata;
969
970         getboottime(&boottime);
971         sx_slock(&proctree_lock);
972         PROC_LOCK(p);
973         fill_kinfo_proc(p, &kp);
974         tty_nr = linprocfs_get_tty_nr(p);
975         sx_sunlock(&proctree_lock);
976         if (p->p_vmspace) {
977            startcode = (vm_offset_t)p->p_vmspace->vm_taddr;
978            startdata = (vm_offset_t)p->p_vmspace->vm_daddr;
979         } else {
980            startcode = 0;
981            startdata = 0;
982         }
983         sbuf_printf(sb, "%d", p->p_pid);
984 #define PS_ADD(name, fmt, arg) sbuf_printf(sb, " " fmt, arg)
985         PS_ADD("comm",          "(%s)", p->p_comm);
986         if (kp.ki_stat > sizeof(linux_state)) {
987                 state = 'R';
988
989                 if (ratelimit == 0) {
990                         printf("linprocfs: don't know how to handle unknown FreeBSD state %d/%zd, mapping to R\n",
991                             kp.ki_stat, sizeof(linux_state));
992                         ++ratelimit;
993                 }
994         } else
995                 state = linux_state[kp.ki_stat - 1];
996         PS_ADD("state",         "%c",   state);
997         PS_ADD("ppid",          "%d",   p->p_pptr ? p->p_pptr->p_pid : 0);
998         PS_ADD("pgrp",          "%d",   p->p_pgid);
999         PS_ADD("session",       "%d",   p->p_session->s_sid);
1000         PROC_UNLOCK(p);
1001         PS_ADD("tty",           "%d",   tty_nr);
1002         PS_ADD("tpgid",         "%d",   kp.ki_tpgid);
1003         PS_ADD("flags",         "%u",   0); /* XXX */
1004         PS_ADD("minflt",        "%lu",  kp.ki_rusage.ru_minflt);
1005         PS_ADD("cminflt",       "%lu",  kp.ki_rusage_ch.ru_minflt);
1006         PS_ADD("majflt",        "%lu",  kp.ki_rusage.ru_majflt);
1007         PS_ADD("cmajflt",       "%lu",  kp.ki_rusage_ch.ru_majflt);
1008         PS_ADD("utime",         "%ld",  TV2J(&kp.ki_rusage.ru_utime));
1009         PS_ADD("stime",         "%ld",  TV2J(&kp.ki_rusage.ru_stime));
1010         PS_ADD("cutime",        "%ld",  TV2J(&kp.ki_rusage_ch.ru_utime));
1011         PS_ADD("cstime",        "%ld",  TV2J(&kp.ki_rusage_ch.ru_stime));
1012         PS_ADD("priority",      "%d",   kp.ki_pri.pri_user);
1013         PS_ADD("nice",          "%d",   kp.ki_nice); /* 19 (nicest) to -19 */
1014         PS_ADD("0",             "%d",   0); /* removed field */
1015         PS_ADD("itrealvalue",   "%d",   0); /* XXX */
1016         PS_ADD("starttime",     "%lu",  TV2J(&kp.ki_start) - TV2J(&boottime));
1017         PS_ADD("vsize",         "%ju",  (uintmax_t)kp.ki_size);
1018         PS_ADD("rss",           "%ju",  (uintmax_t)kp.ki_rssize);
1019         PS_ADD("rlim",          "%lu",  kp.ki_rusage.ru_maxrss);
1020         PS_ADD("startcode",     "%ju",  (uintmax_t)startcode);
1021         PS_ADD("endcode",       "%ju",  (uintmax_t)startdata);
1022         PS_ADD("startstack",    "%u",   0); /* XXX */
1023         PS_ADD("kstkesp",       "%u",   0); /* XXX */
1024         PS_ADD("kstkeip",       "%u",   0); /* XXX */
1025         PS_ADD("signal",        "%u",   0); /* XXX */
1026         PS_ADD("blocked",       "%u",   0); /* XXX */
1027         PS_ADD("sigignore",     "%u",   0); /* XXX */
1028         PS_ADD("sigcatch",      "%u",   0); /* XXX */
1029         PS_ADD("wchan",         "%u",   0); /* XXX */
1030         PS_ADD("nswap",         "%lu",  kp.ki_rusage.ru_nswap);
1031         PS_ADD("cnswap",        "%lu",  kp.ki_rusage_ch.ru_nswap);
1032         PS_ADD("exitsignal",    "%d",   0); /* XXX */
1033         PS_ADD("processor",     "%u",   kp.ki_lastcpu);
1034         PS_ADD("rt_priority",   "%u",   0); /* XXX */ /* >= 2.5.19 */
1035         PS_ADD("policy",        "%u",   kp.ki_pri.pri_class); /* >= 2.5.19 */
1036 #undef PS_ADD
1037         sbuf_putc(sb, '\n');
1038
1039         return (0);
1040 }
1041
1042 /*
1043  * Filler function for proc/pid/statm
1044  */
1045 static int
1046 linprocfs_doprocstatm(PFS_FILL_ARGS)
1047 {
1048         struct kinfo_proc kp;
1049         segsz_t lsize;
1050
1051         sx_slock(&proctree_lock);
1052         PROC_LOCK(p);
1053         fill_kinfo_proc(p, &kp);
1054         PROC_UNLOCK(p);
1055         sx_sunlock(&proctree_lock);
1056
1057         /*
1058          * See comments in linprocfs_doprocstatus() regarding the
1059          * computation of lsize.
1060          */
1061         /* size resident share trs drs lrs dt */
1062         sbuf_printf(sb, "%ju ", B2P((uintmax_t)kp.ki_size));
1063         sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_rssize);
1064         sbuf_printf(sb, "%ju ", (uintmax_t)0); /* XXX */
1065         sbuf_printf(sb, "%ju ", (uintmax_t)kp.ki_tsize);
1066         sbuf_printf(sb, "%ju ", (uintmax_t)(kp.ki_dsize + kp.ki_ssize));
1067         lsize = B2P(kp.ki_size) - kp.ki_dsize -
1068             kp.ki_ssize - kp.ki_tsize - 1;
1069         sbuf_printf(sb, "%ju ", (uintmax_t)lsize);
1070         sbuf_printf(sb, "%ju\n", (uintmax_t)0); /* XXX */
1071
1072         return (0);
1073 }
1074
1075 /*
1076  * Filler function for proc/pid/status
1077  */
1078 static int
1079 linprocfs_doprocstatus(PFS_FILL_ARGS)
1080 {
1081         struct kinfo_proc kp;
1082         char *state;
1083         segsz_t lsize;
1084         struct thread *td2;
1085         struct sigacts *ps;
1086         l_sigset_t siglist, sigignore, sigcatch;
1087         int i;
1088
1089         sx_slock(&proctree_lock);
1090         PROC_LOCK(p);
1091         td2 = FIRST_THREAD_IN_PROC(p);
1092
1093         if (P_SHOULDSTOP(p)) {
1094                 state = "T (stopped)";
1095         } else {
1096                 switch(p->p_state) {
1097                 case PRS_NEW:
1098                         state = "I (idle)";
1099                         break;
1100                 case PRS_NORMAL:
1101                         if (p->p_flag & P_WEXIT) {
1102                                 state = "X (exiting)";
1103                                 break;
1104                         }
1105                         switch(TD_GET_STATE(td2)) {
1106                         case TDS_INHIBITED:
1107                                 state = "S (sleeping)";
1108                                 break;
1109                         case TDS_RUNQ:
1110                         case TDS_RUNNING:
1111                                 state = "R (running)";
1112                                 break;
1113                         default:
1114                                 state = "? (unknown)";
1115                                 break;
1116                         }
1117                         break;
1118                 case PRS_ZOMBIE:
1119                         state = "Z (zombie)";
1120                         break;
1121                 default:
1122                         state = "? (unknown)";
1123                         break;
1124                 }
1125         }
1126
1127         fill_kinfo_proc(p, &kp);
1128         sx_sunlock(&proctree_lock);
1129
1130         sbuf_printf(sb, "Name:\t%s\n",          p->p_comm); /* XXX escape */
1131         sbuf_printf(sb, "State:\t%s\n",         state);
1132
1133         /*
1134          * Credentials
1135          */
1136         sbuf_printf(sb, "Tgid:\t%d\n",          p->p_pid);
1137         sbuf_printf(sb, "Pid:\t%d\n",           p->p_pid);
1138         sbuf_printf(sb, "PPid:\t%d\n",          kp.ki_ppid );
1139         sbuf_printf(sb, "TracerPid:\t%d\n",     kp.ki_tracer );
1140         sbuf_printf(sb, "Uid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_ruid,
1141                                                 p->p_ucred->cr_uid,
1142                                                 p->p_ucred->cr_svuid,
1143                                                 /* FreeBSD doesn't have fsuid */
1144                                                 p->p_ucred->cr_uid);
1145         sbuf_printf(sb, "Gid:\t%d\t%d\t%d\t%d\n", p->p_ucred->cr_rgid,
1146                                                 p->p_ucred->cr_gid,
1147                                                 p->p_ucred->cr_svgid,
1148                                                 /* FreeBSD doesn't have fsgid */
1149                                                 p->p_ucred->cr_gid);
1150         sbuf_cat(sb, "Groups:\t");
1151         for (i = 0; i < p->p_ucred->cr_ngroups; i++)
1152                 sbuf_printf(sb, "%d ",          p->p_ucred->cr_groups[i]);
1153         PROC_UNLOCK(p);
1154         sbuf_putc(sb, '\n');
1155
1156         /*
1157          * Memory
1158          *
1159          * While our approximation of VmLib may not be accurate (I
1160          * don't know of a simple way to verify it, and I'm not sure
1161          * it has much meaning anyway), I believe it's good enough.
1162          *
1163          * The same code that could (I think) accurately compute VmLib
1164          * could also compute VmLck, but I don't really care enough to
1165          * implement it. Submissions are welcome.
1166          */
1167         sbuf_printf(sb, "VmSize:\t%8ju kB\n",   B2K((uintmax_t)kp.ki_size));
1168         sbuf_printf(sb, "VmLck:\t%8u kB\n",     P2K(0)); /* XXX */
1169         sbuf_printf(sb, "VmRSS:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_rssize));
1170         sbuf_printf(sb, "VmData:\t%8ju kB\n",   P2K((uintmax_t)kp.ki_dsize));
1171         sbuf_printf(sb, "VmStk:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_ssize));
1172         sbuf_printf(sb, "VmExe:\t%8ju kB\n",    P2K((uintmax_t)kp.ki_tsize));
1173         lsize = B2P(kp.ki_size) - kp.ki_dsize -
1174             kp.ki_ssize - kp.ki_tsize - 1;
1175         sbuf_printf(sb, "VmLib:\t%8ju kB\n",    P2K((uintmax_t)lsize));
1176
1177         /*
1178          * Signal masks
1179          */
1180         PROC_LOCK(p);
1181         bsd_to_linux_sigset(&p->p_siglist, &siglist);
1182         ps = p->p_sigacts;
1183         mtx_lock(&ps->ps_mtx);
1184         bsd_to_linux_sigset(&ps->ps_sigignore, &sigignore);
1185         bsd_to_linux_sigset(&ps->ps_sigcatch, &sigcatch);
1186         mtx_unlock(&ps->ps_mtx);
1187         PROC_UNLOCK(p);
1188
1189         sbuf_printf(sb, "SigPnd:\t%016jx\n",    siglist.__mask);
1190         /*
1191          * XXX. SigBlk - target thread's signal mask, td_sigmask.
1192          * To implement SigBlk pseudofs should support proc/tid dir entries.
1193          */
1194         sbuf_printf(sb, "SigBlk:\t%016x\n",     0);
1195         sbuf_printf(sb, "SigIgn:\t%016jx\n",    sigignore.__mask);
1196         sbuf_printf(sb, "SigCgt:\t%016jx\n",    sigcatch.__mask);
1197
1198         /*
1199          * Linux also prints the capability masks, but we don't have
1200          * capabilities yet, and when we do get them they're likely to
1201          * be meaningless to Linux programs, so we lie. XXX
1202          */
1203         sbuf_printf(sb, "CapInh:\t%016x\n",     0);
1204         sbuf_printf(sb, "CapPrm:\t%016x\n",     0);
1205         sbuf_printf(sb, "CapEff:\t%016x\n",     0);
1206
1207         return (0);
1208 }
1209
1210 /*
1211  * Filler function for proc/pid/cwd
1212  */
1213 static int
1214 linprocfs_doproccwd(PFS_FILL_ARGS)
1215 {
1216         struct pwd *pwd;
1217         char *fullpath = "unknown";
1218         char *freepath = NULL;
1219
1220         pwd = pwd_hold_proc(p);
1221         vn_fullpath(pwd->pwd_cdir, &fullpath, &freepath);
1222         sbuf_printf(sb, "%s", fullpath);
1223         if (freepath)
1224                 free(freepath, M_TEMP);
1225         pwd_drop(pwd);
1226         return (0);
1227 }
1228
1229 /*
1230  * Filler function for proc/pid/root
1231  */
1232 static int
1233 linprocfs_doprocroot(PFS_FILL_ARGS)
1234 {
1235         struct pwd *pwd;
1236         struct vnode *vp;
1237         char *fullpath = "unknown";
1238         char *freepath = NULL;
1239
1240         pwd = pwd_hold_proc(p);
1241         vp = jailed(p->p_ucred) ? pwd->pwd_jdir : pwd->pwd_rdir;
1242         vn_fullpath(vp, &fullpath, &freepath);
1243         sbuf_printf(sb, "%s", fullpath);
1244         if (freepath)
1245                 free(freepath, M_TEMP);
1246         pwd_drop(pwd);
1247         return (0);
1248 }
1249
1250 /*
1251  * Filler function for proc/pid/cmdline
1252  */
1253 static int
1254 linprocfs_doproccmdline(PFS_FILL_ARGS)
1255 {
1256         int ret;
1257
1258         PROC_LOCK(p);
1259         if ((ret = p_cansee(td, p)) != 0) {
1260                 PROC_UNLOCK(p);
1261                 return (ret);
1262         }
1263
1264         /*
1265          * Mimic linux behavior and pass only processes with usermode
1266          * address space as valid.  Return zero silently otherwize.
1267          */
1268         if (p->p_vmspace == &vmspace0) {
1269                 PROC_UNLOCK(p);
1270                 return (0);
1271         }
1272         if (p->p_args != NULL) {
1273                 sbuf_bcpy(sb, p->p_args->ar_args, p->p_args->ar_length);
1274                 PROC_UNLOCK(p);
1275                 return (0);
1276         }
1277
1278         if ((p->p_flag & P_SYSTEM) != 0) {
1279                 PROC_UNLOCK(p);
1280                 return (0);
1281         }
1282
1283         PROC_UNLOCK(p);
1284
1285         ret = proc_getargv(td, p, sb);
1286         return (ret);
1287 }
1288
1289 /*
1290  * Filler function for proc/pid/environ
1291  */
1292 static int
1293 linprocfs_doprocenviron(PFS_FILL_ARGS)
1294 {
1295
1296         /*
1297          * Mimic linux behavior and pass only processes with usermode
1298          * address space as valid.  Return zero silently otherwize.
1299          */
1300         if (p->p_vmspace == &vmspace0)
1301                 return (0);
1302
1303         return (proc_getenvv(td, p, sb));
1304 }
1305
1306 static char l32_map_str[] = "%08lx-%08lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1307 static char l64_map_str[] = "%016lx-%016lx %s%s%s%s %08lx %02x:%02x %lu%s%s\n";
1308 static char vdso_str[] = "      [vdso]";
1309 static char stack_str[] = "      [stack]";
1310
1311 /*
1312  * Filler function for proc/pid/maps
1313  */
1314 static int
1315 linprocfs_doprocmaps(PFS_FILL_ARGS)
1316 {
1317         struct vmspace *vm;
1318         vm_map_t map;
1319         vm_map_entry_t entry, tmp_entry;
1320         vm_object_t obj, tobj, lobj;
1321         vm_offset_t e_start, e_end;
1322         vm_ooffset_t off;
1323         vm_prot_t e_prot;
1324         unsigned int last_timestamp;
1325         char *name = "", *freename = NULL;
1326         const char *l_map_str;
1327         ino_t ino;
1328         int error;
1329         struct vnode *vp;
1330         struct vattr vat;
1331         bool private;
1332
1333         PROC_LOCK(p);
1334         error = p_candebug(td, p);
1335         PROC_UNLOCK(p);
1336         if (error)
1337                 return (error);
1338
1339         if (uio->uio_rw != UIO_READ)
1340                 return (EOPNOTSUPP);
1341
1342         error = 0;
1343         vm = vmspace_acquire_ref(p);
1344         if (vm == NULL)
1345                 return (ESRCH);
1346
1347         if (SV_CURPROC_FLAG(SV_LP64))
1348                 l_map_str = l64_map_str;
1349         else
1350                 l_map_str = l32_map_str;
1351         map = &vm->vm_map;
1352         vm_map_lock_read(map);
1353         VM_MAP_ENTRY_FOREACH(entry, map) {
1354                 name = "";
1355                 freename = NULL;
1356                 /*
1357                  * Skip printing of the guard page of the stack region, as
1358                  * it confuses glibc pthread_getattr_np() method, where both
1359                  * the base address and size of the stack of the initial thread
1360                  * are calculated.
1361                  */
1362                 if ((entry->eflags & (MAP_ENTRY_IS_SUB_MAP | MAP_ENTRY_GUARD)) != 0)
1363                         continue;
1364                 e_prot = entry->protection;
1365                 e_start = entry->start;
1366                 e_end = entry->end;
1367                 obj = entry->object.vm_object;
1368                 off = entry->offset;
1369                 for (lobj = tobj = obj; tobj != NULL;
1370                     lobj = tobj, tobj = tobj->backing_object) {
1371                         VM_OBJECT_RLOCK(tobj);
1372                         off += lobj->backing_object_offset;
1373                         if (lobj != obj)
1374                                 VM_OBJECT_RUNLOCK(lobj);
1375                 }
1376                 private = (entry->eflags & MAP_ENTRY_COW) != 0 || obj == NULL ||
1377                     (obj->flags & OBJ_ANON) != 0;
1378                 last_timestamp = map->timestamp;
1379                 vm_map_unlock_read(map);
1380                 ino = 0;
1381                 if (lobj) {
1382                         vp = vm_object_vnode(lobj);
1383                         if (vp != NULL)
1384                                 vref(vp);
1385                         if (lobj != obj)
1386                                 VM_OBJECT_RUNLOCK(lobj);
1387                         VM_OBJECT_RUNLOCK(obj);
1388                         if (vp != NULL) {
1389                                 vn_fullpath(vp, &name, &freename);
1390                                 vn_lock(vp, LK_SHARED | LK_RETRY);
1391                                 VOP_GETATTR(vp, &vat, td->td_ucred);
1392                                 ino = vat.va_fileid;
1393                                 vput(vp);
1394                         } else if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
1395                                 /*
1396                                  * sv_shared_page_base pointed out to the
1397                                  * FreeBSD sharedpage, PAGE_SIZE is a size
1398                                  * of it. The vDSO page is above.
1399                                  */
1400                                 if (e_start == p->p_sysent->sv_shared_page_base +
1401                                     PAGE_SIZE)
1402                                         name = vdso_str;
1403                                 if (e_end == p->p_sysent->sv_usrstack)
1404                                         name = stack_str;
1405                         }
1406                 }
1407
1408                 /*
1409                  * format:
1410                  *  start, end, access, offset, major, minor, inode, name.
1411                  */
1412                 error = sbuf_printf(sb, l_map_str,
1413                     (u_long)e_start, (u_long)e_end,
1414                     (e_prot & VM_PROT_READ)?"r":"-",
1415                     (e_prot & VM_PROT_WRITE)?"w":"-",
1416                     (e_prot & VM_PROT_EXECUTE)?"x":"-",
1417                     private ? "p" : "s",
1418                     (u_long)off,
1419                     0,
1420                     0,
1421                     (u_long)ino,
1422                     *name ? "     " : " ",
1423                     name
1424                     );
1425                 if (freename)
1426                         free(freename, M_TEMP);
1427                 vm_map_lock_read(map);
1428                 if (error == -1) {
1429                         error = 0;
1430                         break;
1431                 }
1432                 if (last_timestamp != map->timestamp) {
1433                         /*
1434                          * Look again for the entry because the map was
1435                          * modified while it was unlocked.  Specifically,
1436                          * the entry may have been clipped, merged, or deleted.
1437                          */
1438                         vm_map_lookup_entry(map, e_end - 1, &tmp_entry);
1439                         entry = tmp_entry;
1440                 }
1441         }
1442         vm_map_unlock_read(map);
1443         vmspace_free(vm);
1444
1445         return (error);
1446 }
1447
1448 /*
1449  * Filler function for proc/pid/mem
1450  */
1451 static int
1452 linprocfs_doprocmem(PFS_FILL_ARGS)
1453 {
1454         ssize_t resid;
1455         int error;
1456
1457         resid = uio->uio_resid;
1458         error = procfs_doprocmem(PFS_FILL_ARGNAMES);
1459
1460         if (uio->uio_rw == UIO_READ && resid != uio->uio_resid)
1461                 return (0);
1462
1463         if (error == EFAULT)
1464                 error = EIO;
1465
1466         return (error);
1467 }
1468
1469 /*
1470  * Filler function for proc/net/dev
1471  */
1472 static int
1473 linprocfs_donetdev_cb(if_t ifp, void *arg)
1474 {
1475         char ifname[LINUX_IFNAMSIZ];
1476         struct sbuf *sb = arg;
1477
1478         if (ifname_bsd_to_linux_ifp(ifp, ifname, sizeof(ifname)) <= 0)
1479                 return (ENODEV);
1480
1481         sbuf_printf(sb, "%6.6s: ", ifname);
1482         sbuf_printf(sb, "%7ju %7ju %4ju %4ju %4lu %5lu %10lu %9ju ",
1483             (uintmax_t)if_getcounter(ifp, IFCOUNTER_IBYTES),
1484             (uintmax_t)if_getcounter(ifp, IFCOUNTER_IPACKETS),
1485             (uintmax_t)if_getcounter(ifp, IFCOUNTER_IERRORS),
1486             (uintmax_t)if_getcounter(ifp, IFCOUNTER_IQDROPS),
1487                                                 /* rx_missed_errors */
1488             0UL,                                /* rx_fifo_errors */
1489             0UL,                                /* rx_length_errors +
1490                                                  * rx_over_errors +
1491                                                  * rx_crc_errors +
1492                                                  * rx_frame_errors */
1493             0UL,                                /* rx_compressed */
1494             (uintmax_t)if_getcounter(ifp, IFCOUNTER_IMCASTS));
1495                                                 /* XXX-BZ rx only? */
1496         sbuf_printf(sb, "%8ju %7ju %4ju %4ju %4lu %5ju %7lu %10lu\n",
1497             (uintmax_t)if_getcounter(ifp, IFCOUNTER_OBYTES),
1498             (uintmax_t)if_getcounter(ifp, IFCOUNTER_OPACKETS),
1499             (uintmax_t)if_getcounter(ifp, IFCOUNTER_OERRORS),
1500             (uintmax_t)if_getcounter(ifp, IFCOUNTER_OQDROPS),
1501             0UL,                                /* tx_fifo_errors */
1502             (uintmax_t)if_getcounter(ifp, IFCOUNTER_COLLISIONS),
1503             0UL,                                /* tx_carrier_errors +
1504                                                  * tx_aborted_errors +
1505                                                  * tx_window_errors +
1506                                                  * tx_heartbeat_errors*/
1507             0UL);                               /* tx_compressed */
1508         return (0);
1509 }
1510
1511 static int
1512 linprocfs_donetdev(PFS_FILL_ARGS)
1513 {
1514         struct epoch_tracker et;
1515
1516         sbuf_printf(sb, "%6s|%58s|%s\n"
1517             "%6s|%58s|%58s\n",
1518             "Inter-", "   Receive", "  Transmit",
1519             " face",
1520             "bytes    packets errs drop fifo frame compressed multicast",
1521             "bytes    packets errs drop fifo colls carrier compressed");
1522
1523         CURVNET_SET(TD_TO_VNET(curthread));
1524         NET_EPOCH_ENTER(et);
1525         if_foreach(linprocfs_donetdev_cb, sb);
1526         NET_EPOCH_EXIT(et);
1527         CURVNET_RESTORE();
1528
1529         return (0);
1530 }
1531
1532 struct walkarg {
1533         struct sbuf *sb;
1534 };
1535
1536 static int
1537 linux_route_print(struct rtentry *rt, void *vw)
1538 {
1539 #ifdef INET
1540         struct walkarg *w = vw;
1541         struct route_nhop_data rnd;
1542         struct in_addr dst, mask;
1543         struct nhop_object *nh;
1544         char ifname[16];
1545         uint32_t scopeid = 0;
1546         uint32_t gw = 0;
1547         uint32_t linux_flags = 0;
1548
1549         rt_get_inet_prefix_pmask(rt, &dst, &mask, &scopeid);
1550
1551         rt_get_rnd(rt, &rnd);
1552
1553         /* select only first route in case of multipath */
1554         nh = nhop_select_func(rnd.rnd_nhop, 0);
1555
1556         if (ifname_bsd_to_linux_ifp(nh->nh_ifp, ifname, sizeof(ifname)) <= 0)
1557                 return (ENODEV);
1558
1559         gw = (nh->nh_flags & NHF_GATEWAY)
1560                 ? nh->gw4_sa.sin_addr.s_addr : 0;
1561
1562         linux_flags = RTF_UP |
1563                 (nhop_get_rtflags(nh) & (RTF_GATEWAY | RTF_HOST));
1564
1565         sbuf_printf(w->sb,
1566                 "%s\t"
1567                 "%08X\t%08X\t%04X\t"
1568                 "%d\t%u\t%d\t"
1569                 "%08X\t%d\t%u\t%u",
1570                 ifname,
1571                 dst.s_addr, gw, linux_flags,
1572                 0, 0, rnd.rnd_weight,
1573                 mask.s_addr, nh->nh_mtu, 0, 0);
1574
1575         sbuf_printf(w->sb, "\n\n");
1576 #endif
1577         return (0);
1578 }
1579
1580 /*
1581  * Filler function for proc/net/route
1582  */
1583 static int
1584 linprocfs_donetroute(PFS_FILL_ARGS)
1585 {
1586         struct epoch_tracker et;
1587         struct walkarg w = {
1588                 .sb = sb
1589         };
1590         uint32_t fibnum = curthread->td_proc->p_fibnum;
1591
1592         sbuf_printf(w.sb, "%-127s\n", "Iface\tDestination\tGateway "
1593                "\tFlags\tRefCnt\tUse\tMetric\tMask\t\tMTU"
1594                "\tWindow\tIRTT");
1595
1596         CURVNET_SET(TD_TO_VNET(curthread));
1597         NET_EPOCH_ENTER(et);
1598         rib_walk(fibnum, AF_INET, false, linux_route_print, &w);
1599         NET_EPOCH_EXIT(et);
1600         CURVNET_RESTORE();
1601
1602         return (0);
1603 }
1604
1605 /*
1606  * Filler function for proc/sys/kernel/osrelease
1607  */
1608 static int
1609 linprocfs_doosrelease(PFS_FILL_ARGS)
1610 {
1611         char osrelease[LINUX_MAX_UTSNAME];
1612
1613         linux_get_osrelease(td, osrelease);
1614         sbuf_printf(sb, "%s\n", osrelease);
1615
1616         return (0);
1617 }
1618
1619 /*
1620  * Filler function for proc/sys/kernel/ostype
1621  */
1622 static int
1623 linprocfs_doostype(PFS_FILL_ARGS)
1624 {
1625         char osname[LINUX_MAX_UTSNAME];
1626
1627         linux_get_osname(td, osname);
1628         sbuf_printf(sb, "%s\n", osname);
1629
1630         return (0);
1631 }
1632
1633 /*
1634  * Filler function for proc/sys/kernel/version
1635  */
1636 static int
1637 linprocfs_doosbuild(PFS_FILL_ARGS)
1638 {
1639
1640         linprocfs_osbuild(td, sb);
1641         sbuf_cat(sb, "\n");
1642         return (0);
1643 }
1644
1645 /*
1646  * Filler function for proc/sys/kernel/msgmax
1647  */
1648 static int
1649 linprocfs_domsgmax(PFS_FILL_ARGS)
1650 {
1651
1652         sbuf_printf(sb, "%d\n", msginfo.msgmax);
1653         return (0);
1654 }
1655
1656 /*
1657  * Filler function for proc/sys/kernel/msgmni
1658  */
1659 static int
1660 linprocfs_domsgmni(PFS_FILL_ARGS)
1661 {
1662
1663         sbuf_printf(sb, "%d\n", msginfo.msgmni);
1664         return (0);
1665 }
1666
1667 /*
1668  * Filler function for proc/sys/kernel/msgmnb
1669  */
1670 static int
1671 linprocfs_domsgmnb(PFS_FILL_ARGS)
1672 {
1673
1674         sbuf_printf(sb, "%d\n", msginfo.msgmnb);
1675         return (0);
1676 }
1677
1678 /*
1679  * Filler function for proc/sys/kernel/ngroups_max
1680  *
1681  * Note that in Linux it defaults to 65536, not 1023.
1682  */
1683 static int
1684 linprocfs_dongroups_max(PFS_FILL_ARGS)
1685 {
1686
1687         sbuf_printf(sb, "%d\n", ngroups_max);
1688         return (0);
1689 }
1690
1691 /*
1692  * Filler function for proc/sys/kernel/pid_max
1693  */
1694 static int
1695 linprocfs_dopid_max(PFS_FILL_ARGS)
1696 {
1697
1698         sbuf_printf(sb, "%i\n", PID_MAX);
1699         return (0);
1700 }
1701
1702 /*
1703  * Filler function for proc/sys/kernel/sem
1704  */
1705 static int
1706 linprocfs_dosem(PFS_FILL_ARGS)
1707 {
1708
1709         sbuf_printf(sb, "%d %d %d %d\n", seminfo.semmsl, seminfo.semmns,
1710             seminfo.semopm, seminfo.semmni);
1711         return (0);
1712 }
1713
1714 /*
1715  * Filler function for proc/sys/kernel/shmall
1716  */
1717 static int
1718 linprocfs_doshmall(PFS_FILL_ARGS)
1719 {
1720
1721         sbuf_printf(sb, "%lu\n", shminfo.shmall);
1722         return (0);
1723 }
1724
1725 /*
1726  * Filler function for proc/sys/kernel/shmmax
1727  */
1728 static int
1729 linprocfs_doshmmax(PFS_FILL_ARGS)
1730 {
1731
1732         sbuf_printf(sb, "%lu\n", shminfo.shmmax);
1733         return (0);
1734 }
1735
1736 /*
1737  * Filler function for proc/sys/kernel/shmmni
1738  */
1739 static int
1740 linprocfs_doshmmni(PFS_FILL_ARGS)
1741 {
1742
1743         sbuf_printf(sb, "%lu\n", shminfo.shmmni);
1744         return (0);
1745 }
1746
1747 /*
1748  * Filler function for proc/sys/kernel/tainted
1749  */
1750 static int
1751 linprocfs_dotainted(PFS_FILL_ARGS)
1752 {
1753
1754         sbuf_printf(sb, "0\n");
1755         return (0);
1756 }
1757
1758 /*
1759  * Filler function for proc/sys/vm/min_free_kbytes
1760  *
1761  * This mirrors the approach in illumos to return zero for reads. Effectively,
1762  * it says, no memory is kept in reserve for "atomic allocations". This class
1763  * of allocation can be used at times when a thread cannot be suspended.
1764  */
1765 static int
1766 linprocfs_dominfree(PFS_FILL_ARGS)
1767 {
1768
1769         sbuf_printf(sb, "%d\n", 0);
1770         return (0);
1771 }
1772
1773 /*
1774  * Filler function for proc/scsi/device_info
1775  */
1776 static int
1777 linprocfs_doscsidevinfo(PFS_FILL_ARGS)
1778 {
1779
1780         return (0);
1781 }
1782
1783 /*
1784  * Filler function for proc/scsi/scsi
1785  */
1786 static int
1787 linprocfs_doscsiscsi(PFS_FILL_ARGS)
1788 {
1789
1790         return (0);
1791 }
1792
1793 /*
1794  * Filler function for proc/devices
1795  */
1796 static int
1797 linprocfs_dodevices(PFS_FILL_ARGS)
1798 {
1799         char *char_devices;
1800         sbuf_printf(sb, "Character devices:\n");
1801
1802         char_devices = linux_get_char_devices();
1803         sbuf_printf(sb, "%s", char_devices);
1804         linux_free_get_char_devices(char_devices);
1805
1806         sbuf_printf(sb, "\nBlock devices:\n");
1807
1808         return (0);
1809 }
1810
1811 /*
1812  * Filler function for proc/cmdline
1813  */
1814 static int
1815 linprocfs_docmdline(PFS_FILL_ARGS)
1816 {
1817
1818         sbuf_printf(sb, "BOOT_IMAGE=%s", kernelname);
1819         sbuf_printf(sb, " ro root=302\n");
1820         return (0);
1821 }
1822
1823 /*
1824  * Filler function for proc/filesystems
1825  */
1826 static int
1827 linprocfs_dofilesystems(PFS_FILL_ARGS)
1828 {
1829         struct vfsconf *vfsp;
1830
1831         vfsconf_slock();
1832         TAILQ_FOREACH(vfsp, &vfsconf, vfc_list) {
1833                 if (vfsp->vfc_flags & VFCF_SYNTHETIC)
1834                         sbuf_printf(sb, "nodev");
1835                 sbuf_printf(sb, "\t%s\n", vfsp->vfc_name);
1836         }
1837         vfsconf_sunlock();
1838         return(0);
1839 }
1840
1841 /*
1842  * Filler function for proc/modules
1843  */
1844 static int
1845 linprocfs_domodules(PFS_FILL_ARGS)
1846 {
1847 #if 0
1848         struct linker_file *lf;
1849
1850         TAILQ_FOREACH(lf, &linker_files, link) {
1851                 sbuf_printf(sb, "%-20s%8lu%4d\n", lf->filename,
1852                     (unsigned long)lf->size, lf->refs);
1853         }
1854 #endif
1855         return (0);
1856 }
1857
1858 /*
1859  * Filler function for proc/pid/fd
1860  */
1861 static int
1862 linprocfs_dofdescfs(PFS_FILL_ARGS)
1863 {
1864
1865         if (p == curproc)
1866                 sbuf_printf(sb, "/dev/fd");
1867         else
1868                 sbuf_printf(sb, "unknown");
1869         return (0);
1870 }
1871
1872 /*
1873  * Filler function for proc/pid/limits
1874  */
1875 static const struct linux_rlimit_ident {
1876         const char      *desc;
1877         const char      *unit;
1878         unsigned int    rlim_id;
1879 } linux_rlimits_ident[] = {
1880         { "Max cpu time",       "seconds",      RLIMIT_CPU },
1881         { "Max file size",      "bytes",        RLIMIT_FSIZE },
1882         { "Max data size",      "bytes",        RLIMIT_DATA },
1883         { "Max stack size",     "bytes",        RLIMIT_STACK },
1884         { "Max core file size",  "bytes",       RLIMIT_CORE },
1885         { "Max resident set",   "bytes",        RLIMIT_RSS },
1886         { "Max processes",      "processes",    RLIMIT_NPROC },
1887         { "Max open files",     "files",        RLIMIT_NOFILE },
1888         { "Max locked memory",  "bytes",        RLIMIT_MEMLOCK },
1889         { "Max address space",  "bytes",        RLIMIT_AS },
1890         { "Max file locks",     "locks",        LINUX_RLIMIT_LOCKS },
1891         { "Max pending signals", "signals",     LINUX_RLIMIT_SIGPENDING },
1892         { "Max msgqueue size",  "bytes",        LINUX_RLIMIT_MSGQUEUE },
1893         { "Max nice priority",          "",     LINUX_RLIMIT_NICE },
1894         { "Max realtime priority",      "",     LINUX_RLIMIT_RTPRIO },
1895         { "Max realtime timeout",       "us",   LINUX_RLIMIT_RTTIME },
1896         { 0, 0, 0 }
1897 };
1898
1899 static int
1900 linprocfs_doproclimits(PFS_FILL_ARGS)
1901 {
1902         const struct linux_rlimit_ident *li;
1903         struct plimit *limp;
1904         struct rlimit rl;
1905         ssize_t size;
1906         int res, error;
1907
1908         error = 0;
1909
1910         PROC_LOCK(p);
1911         limp = lim_hold(p->p_limit);
1912         PROC_UNLOCK(p);
1913         size = sizeof(res);
1914         sbuf_printf(sb, "%-26s%-21s%-21s%-21s\n", "Limit", "Soft Limit",
1915                         "Hard Limit", "Units");
1916         for (li = linux_rlimits_ident; li->desc != NULL; ++li) {
1917                 switch (li->rlim_id)
1918                 {
1919                 case LINUX_RLIMIT_LOCKS:
1920                         /* FALLTHROUGH */
1921                 case LINUX_RLIMIT_RTTIME:
1922                         rl.rlim_cur = RLIM_INFINITY;
1923                         break;
1924                 case LINUX_RLIMIT_SIGPENDING:
1925                         error = kernel_sysctlbyname(td,
1926                             "kern.sigqueue.max_pending_per_proc",
1927                             &res, &size, 0, 0, 0, 0);
1928                         if (error != 0)
1929                                 goto out;
1930                         rl.rlim_cur = res;
1931                         rl.rlim_max = res;
1932                         break;
1933                 case LINUX_RLIMIT_MSGQUEUE:
1934                         error = kernel_sysctlbyname(td,
1935                             "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0);
1936                         if (error != 0)
1937                                 goto out;
1938                         rl.rlim_cur = res;
1939                         rl.rlim_max = res;
1940                         break;
1941                 case LINUX_RLIMIT_NICE:
1942                         /* FALLTHROUGH */
1943                 case LINUX_RLIMIT_RTPRIO:
1944                         rl.rlim_cur = 0;
1945                         rl.rlim_max = 0;
1946                         break;
1947                 default:
1948                         rl = limp->pl_rlimit[li->rlim_id];
1949                         break;
1950                 }
1951                 if (rl.rlim_cur == RLIM_INFINITY)
1952                         sbuf_printf(sb, "%-26s%-21s%-21s%-10s\n",
1953                             li->desc, "unlimited", "unlimited", li->unit);
1954                 else
1955                         sbuf_printf(sb, "%-26s%-21llu%-21llu%-10s\n",
1956                             li->desc, (unsigned long long)rl.rlim_cur,
1957                             (unsigned long long)rl.rlim_max, li->unit);
1958         }
1959 out:
1960         lim_free(limp);
1961         return (error);
1962 }
1963
1964 /*
1965  * The point of the following two functions is to work around
1966  * an assertion in Chromium; see kern/240991 for details.
1967  */
1968 static int
1969 linprocfs_dotaskattr(PFS_ATTR_ARGS)
1970 {
1971
1972         vap->va_nlink = 3;
1973         return (0);
1974 }
1975
1976 /*
1977  * Filler function for proc/<pid>/task/.dummy
1978  */
1979 static int
1980 linprocfs_dotaskdummy(PFS_FILL_ARGS)
1981 {
1982
1983         return (0);
1984 }
1985
1986 /*
1987  * Filler function for proc/sys/kernel/random/uuid
1988  */
1989 static int
1990 linprocfs_douuid(PFS_FILL_ARGS)
1991 {
1992         struct uuid uuid;
1993
1994         kern_uuidgen(&uuid, 1);
1995         sbuf_printf_uuid(sb, &uuid);
1996         sbuf_printf(sb, "\n");
1997         return(0);
1998 }
1999
2000 /*
2001  * Filler function for proc/sys/kernel/random/boot_id
2002  */
2003 static int
2004 linprocfs_doboot_id(PFS_FILL_ARGS)
2005 {
2006        static bool firstboot = 1;
2007        static struct uuid uuid;
2008
2009        if (firstboot) {
2010                kern_uuidgen(&uuid, 1);
2011                firstboot = 0;
2012        }
2013        sbuf_printf_uuid(sb, &uuid);
2014        sbuf_printf(sb, "\n");
2015        return(0);
2016 }
2017
2018 /*
2019  * Filler function for proc/pid/auxv
2020  */
2021 static int
2022 linprocfs_doauxv(PFS_FILL_ARGS)
2023 {
2024         struct sbuf *asb;
2025         off_t buflen, resid;
2026         int error;
2027
2028         /*
2029          * Mimic linux behavior and pass only processes with usermode
2030          * address space as valid. Return zero silently otherwise.
2031          */
2032         if (p->p_vmspace == &vmspace0)
2033                 return (0);
2034
2035         if (uio->uio_resid == 0)
2036                 return (0);
2037         if (uio->uio_offset < 0 || uio->uio_resid < 0)
2038                 return (EINVAL);
2039
2040         asb = sbuf_new_auto();
2041         if (asb == NULL)
2042                 return (ENOMEM);
2043         error = proc_getauxv(td, p, asb);
2044         if (error == 0)
2045                 error = sbuf_finish(asb);
2046
2047         resid = sbuf_len(asb) - uio->uio_offset;
2048         if (resid > uio->uio_resid)
2049                 buflen = uio->uio_resid;
2050         else
2051                 buflen = resid;
2052         if (buflen > IOSIZE_MAX)
2053                 return (EINVAL);
2054         if (buflen > maxphys)
2055                 buflen = maxphys;
2056         if (resid <= 0)
2057                 return (0);
2058
2059         if (error == 0)
2060                 error = uiomove(sbuf_data(asb) + uio->uio_offset, buflen, uio);
2061         sbuf_delete(asb);
2062         return (error);
2063 }
2064
2065 /*
2066  * Filler function for proc/self/oom_score_adj
2067  */
2068 static int
2069 linprocfs_do_oom_score_adj(PFS_FILL_ARGS)
2070 {
2071         struct linux_pemuldata *pem;
2072         long oom;
2073
2074         pem = pem_find(p);
2075         if (pem == NULL || uio == NULL)
2076                 return (EOPNOTSUPP);
2077         if (uio->uio_rw == UIO_READ) {
2078                 sbuf_printf(sb, "%d\n", pem->oom_score_adj);
2079         } else {
2080                 sbuf_trim(sb);
2081                 sbuf_finish(sb);
2082                 oom = strtol(sbuf_data(sb), NULL, 10);
2083                 if (oom < LINUX_OOM_SCORE_ADJ_MIN ||
2084                     oom > LINUX_OOM_SCORE_ADJ_MAX)
2085                         return (EINVAL);
2086                 pem->oom_score_adj = oom;
2087         }
2088         return (0);
2089 }
2090
2091 /*
2092  * Filler function for proc/sys/vm/max_map_count
2093  *
2094  * Maximum number of active map areas, on Linux this limits the number
2095  * of vmaps per mm struct. We don't limit mappings, return a suitable
2096  * large value.
2097  */
2098 static int
2099 linprocfs_domax_map_cnt(PFS_FILL_ARGS)
2100 {
2101
2102         sbuf_printf(sb, "%d\n", INT32_MAX);
2103         return (0);
2104 }
2105
2106 /*
2107  * Constructor
2108  */
2109 static int
2110 linprocfs_init(PFS_INIT_ARGS)
2111 {
2112         struct pfs_node *root;
2113         struct pfs_node *dir;
2114         struct pfs_node *sys;
2115
2116         root = pi->pi_root;
2117
2118         /* /proc/... */
2119         pfs_create_file(root, "cmdline", &linprocfs_docmdline,
2120             NULL, NULL, NULL, PFS_RD);
2121         pfs_create_file(root, "cpuinfo", &linprocfs_docpuinfo,
2122             NULL, NULL, NULL, PFS_RD);
2123         pfs_create_file(root, "devices", &linprocfs_dodevices,
2124             NULL, NULL, NULL, PFS_RD);
2125         pfs_create_file(root, "filesystems", &linprocfs_dofilesystems,
2126             NULL, NULL, NULL, PFS_RD);
2127         pfs_create_file(root, "loadavg", &linprocfs_doloadavg,
2128             NULL, NULL, NULL, PFS_RD);
2129         pfs_create_file(root, "meminfo", &linprocfs_domeminfo,
2130             NULL, NULL, NULL, PFS_RD);
2131         pfs_create_file(root, "modules", &linprocfs_domodules,
2132             NULL, NULL, NULL, PFS_RD);
2133         pfs_create_file(root, "mounts", &linprocfs_domtab,
2134             NULL, NULL, NULL, PFS_RD);
2135         pfs_create_file(root, "mtab", &linprocfs_domtab,
2136             NULL, NULL, NULL, PFS_RD);
2137         pfs_create_file(root, "partitions", &linprocfs_dopartitions,
2138             NULL, NULL, NULL, PFS_RD);
2139         pfs_create_link(root, "self", &procfs_docurproc,
2140             NULL, NULL, NULL, 0);
2141         pfs_create_file(root, "stat", &linprocfs_dostat,
2142             NULL, NULL, NULL, PFS_RD);
2143         pfs_create_file(root, "swaps", &linprocfs_doswaps,
2144             NULL, NULL, NULL, PFS_RD);
2145         pfs_create_file(root, "uptime", &linprocfs_douptime,
2146             NULL, NULL, NULL, PFS_RD);
2147         pfs_create_file(root, "version", &linprocfs_doversion,
2148             NULL, NULL, NULL, PFS_RD);
2149
2150         /* /proc/bus/... */
2151         dir = pfs_create_dir(root, "bus", NULL, NULL, NULL, 0);
2152         dir = pfs_create_dir(dir, "pci", NULL, NULL, NULL, 0);
2153         dir = pfs_create_dir(dir, "devices", NULL, NULL, NULL, 0);
2154
2155         /* /proc/net/... */
2156         dir = pfs_create_dir(root, "net", NULL, NULL, NULL, 0);
2157         pfs_create_file(dir, "dev", &linprocfs_donetdev,
2158             NULL, NULL, NULL, PFS_RD);
2159         pfs_create_file(dir, "route", &linprocfs_donetroute,
2160             NULL, NULL, NULL, PFS_RD);
2161
2162         /* /proc/<pid>/... */
2163         dir = pfs_create_dir(root, "pid", NULL, NULL, NULL, PFS_PROCDEP);
2164         pfs_create_file(dir, "cmdline", &linprocfs_doproccmdline,
2165             NULL, NULL, NULL, PFS_RD);
2166         pfs_create_link(dir, "cwd", &linprocfs_doproccwd,
2167             NULL, NULL, NULL, 0);
2168         pfs_create_file(dir, "environ", &linprocfs_doprocenviron,
2169             NULL, &procfs_candebug, NULL, PFS_RD);
2170         pfs_create_link(dir, "exe", &procfs_doprocfile,
2171             NULL, &procfs_notsystem, NULL, 0);
2172         pfs_create_file(dir, "maps", &linprocfs_doprocmaps,
2173             NULL, NULL, NULL, PFS_RD | PFS_AUTODRAIN);
2174         pfs_create_file(dir, "mem", &linprocfs_doprocmem,
2175             procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR | PFS_RAW);
2176         pfs_create_file(dir, "mountinfo", &linprocfs_doprocmountinfo,
2177             NULL, NULL, NULL, PFS_RD);
2178         pfs_create_file(dir, "mounts", &linprocfs_domtab,
2179             NULL, NULL, NULL, PFS_RD);
2180         pfs_create_link(dir, "root", &linprocfs_doprocroot,
2181             NULL, NULL, NULL, 0);
2182         pfs_create_file(dir, "stat", &linprocfs_doprocstat,
2183             NULL, NULL, NULL, PFS_RD);
2184         pfs_create_file(dir, "statm", &linprocfs_doprocstatm,
2185             NULL, NULL, NULL, PFS_RD);
2186         pfs_create_file(dir, "status", &linprocfs_doprocstatus,
2187             NULL, NULL, NULL, PFS_RD);
2188         pfs_create_link(dir, "fd", &linprocfs_dofdescfs,
2189             NULL, NULL, NULL, 0);
2190         pfs_create_file(dir, "auxv", &linprocfs_doauxv,
2191             NULL, &procfs_candebug, NULL, PFS_RD|PFS_RAWRD);
2192         pfs_create_file(dir, "limits", &linprocfs_doproclimits,
2193             NULL, NULL, NULL, PFS_RD);
2194         pfs_create_file(dir, "oom_score_adj", &linprocfs_do_oom_score_adj,
2195             procfs_attr_rw, &procfs_candebug, NULL, PFS_RDWR);
2196
2197         /* /proc/<pid>/task/... */
2198         dir = pfs_create_dir(dir, "task", linprocfs_dotaskattr, NULL, NULL, 0);
2199         pfs_create_file(dir, ".dummy", &linprocfs_dotaskdummy,
2200             NULL, NULL, NULL, PFS_RD);
2201
2202         /* /proc/scsi/... */
2203         dir = pfs_create_dir(root, "scsi", NULL, NULL, NULL, 0);
2204         pfs_create_file(dir, "device_info", &linprocfs_doscsidevinfo,
2205             NULL, NULL, NULL, PFS_RD);
2206         pfs_create_file(dir, "scsi", &linprocfs_doscsiscsi,
2207             NULL, NULL, NULL, PFS_RD);
2208
2209         /* /proc/sys/... */
2210         sys = pfs_create_dir(root, "sys", NULL, NULL, NULL, 0);
2211
2212         /* /proc/sys/kernel/... */
2213         dir = pfs_create_dir(sys, "kernel", NULL, NULL, NULL, 0);
2214         pfs_create_file(dir, "osrelease", &linprocfs_doosrelease,
2215             NULL, NULL, NULL, PFS_RD);
2216         pfs_create_file(dir, "ostype", &linprocfs_doostype,
2217             NULL, NULL, NULL, PFS_RD);
2218         pfs_create_file(dir, "version", &linprocfs_doosbuild,
2219             NULL, NULL, NULL, PFS_RD);
2220         pfs_create_file(dir, "msgmax", &linprocfs_domsgmax,
2221             NULL, NULL, NULL, PFS_RD);
2222         pfs_create_file(dir, "msgmni", &linprocfs_domsgmni,
2223             NULL, NULL, NULL, PFS_RD);
2224         pfs_create_file(dir, "msgmnb", &linprocfs_domsgmnb,
2225             NULL, NULL, NULL, PFS_RD);
2226         pfs_create_file(dir, "ngroups_max", &linprocfs_dongroups_max,
2227             NULL, NULL, NULL, PFS_RD);
2228         pfs_create_file(dir, "pid_max", &linprocfs_dopid_max,
2229             NULL, NULL, NULL, PFS_RD);
2230         pfs_create_file(dir, "sem", &linprocfs_dosem,
2231             NULL, NULL, NULL, PFS_RD);
2232         pfs_create_file(dir, "shmall", &linprocfs_doshmall,
2233             NULL, NULL, NULL, PFS_RD);
2234         pfs_create_file(dir, "shmmax", &linprocfs_doshmmax,
2235             NULL, NULL, NULL, PFS_RD);
2236         pfs_create_file(dir, "shmmni", &linprocfs_doshmmni,
2237             NULL, NULL, NULL, PFS_RD);
2238         pfs_create_file(dir, "tainted", &linprocfs_dotainted,
2239             NULL, NULL, NULL, PFS_RD);
2240
2241         /* /proc/sys/kernel/random/... */
2242         dir = pfs_create_dir(dir, "random", NULL, NULL, NULL, 0);
2243         pfs_create_file(dir, "uuid", &linprocfs_douuid,
2244             NULL, NULL, NULL, PFS_RD);
2245         pfs_create_file(dir, "boot_id", &linprocfs_doboot_id,
2246             NULL, NULL, NULL, PFS_RD);
2247
2248         /* /proc/sys/vm/.... */
2249         dir = pfs_create_dir(sys, "vm", NULL, NULL, NULL, 0);
2250         pfs_create_file(dir, "min_free_kbytes", &linprocfs_dominfree,
2251             NULL, NULL, NULL, PFS_RD);
2252         pfs_create_file(dir, "max_map_count", &linprocfs_domax_map_cnt,
2253             NULL, NULL, NULL, PFS_RD);
2254
2255         return (0);
2256 }
2257
2258 /*
2259  * Destructor
2260  */
2261 static int
2262 linprocfs_uninit(PFS_INIT_ARGS)
2263 {
2264
2265         /* nothing to do, pseudofs will GC */
2266         return (0);
2267 }
2268
2269 PSEUDOFS(linprocfs, 1, VFCF_JAIL);
2270 #if defined(__aarch64__) || defined(__amd64__)
2271 MODULE_DEPEND(linprocfs, linux_common, 1, 1, 1);
2272 #else
2273 MODULE_DEPEND(linprocfs, linux, 1, 1, 1);
2274 #endif
2275 MODULE_DEPEND(linprocfs, procfs, 1, 1, 1);
2276 MODULE_DEPEND(linprocfs, sysvmsg, 1, 1, 1);
2277 MODULE_DEPEND(linprocfs, sysvsem, 1, 1, 1);
2278 MODULE_DEPEND(linprocfs, sysvshm, 1, 1, 1);