3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.34 2020/10/02 03:11:17 sjg Exp $
43 Copyright (c) 2011-2020, Simon J. Gerraty
44 Copyright (c) 2011-2017, Juniper Networks, Inc.
47 Redistribution and use in source and binary forms, with or without
48 modification, are permitted provided that the following conditions
50 1. Redistributions of source code must retain the above copyright
51 notice, this list of conditions and the following disclaimer.
52 2. Redistributions in binary form must reproduce the above copyright
53 notice, this list of conditions and the following disclaimer in the
54 documentation and/or other materials provided with the distribution.
56 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
57 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
58 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
59 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
60 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
61 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
62 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
63 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
64 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
65 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
66 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
72 def getv(dict, key, d=None):
73 """Lookup key in dict and return value or the supplied default."""
78 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
80 Return an absolute path, resolving via cwd or last_dir if needed.
82 if path.endswith('/.'):
84 if len(path) > 0 and path[0] == '/':
85 if os.path.exists(path):
88 print("skipping non-existent:", path, file=debug_out)
92 if path.startswith('./'):
96 for d in [last_dir, cwd]:
101 p = '/'.join(dw[:-1])
105 p = '/'.join([d,path])
107 print("looking for:", p, end=' ', file=debug_out)
108 if not os.path.exists(p):
110 print("nope", file=debug_out)
114 print("found:", p, file=debug_out)
119 """cleanup path without using realpath(3)"""
120 if path.startswith('/'):
127 if not d or d == '.':
137 return r + '/'.join(p)
139 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
141 Return an absolute path, resolving via cwd or last_dir if needed.
142 this gets called a lot, so we try to avoid calling realpath.
144 rpath = resolve(path, cwd, last_dir, debug, debug_out)
147 elif len(path) > 0 and path[0] == '/':
149 if (path.find('/') < 0 or
150 path.find('./') > 0 or
151 path.endswith('/..')):
152 path = cleanpath(path)
155 def sort_unique(list, cmp=None, key=None, reverse=False):
156 list.sort(cmp, key, reverse)
167 return ['/' + x + '/',
173 """class to parse meta files generated by bmake."""
186 def __init__(self, name, conf={}):
187 """if name is set we will parse it now.
188 conf can have the follwing keys:
190 SRCTOPS list of tops of the src tree(s).
192 CURDIR the src directory 'bmake' was run from.
194 RELDIR the relative path from SRCTOP to CURDIR
196 MACHINE the machine we built for.
197 set to 'none' if we are not cross-building.
198 More specifically if machine cannot be deduced from objdirs.
201 Sometimes MACHINE isn't enough.
204 when we build for the pseudo machine 'host'
205 the object tree uses HOST_TARGET rather than MACHINE.
207 OBJROOTS a list of the common prefix for all obj dirs it might
210 DPDEPS names an optional file to which per file dependencies
212 For example if 'some/path/foo.h' is read from SRCTOP
213 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
214 This can allow 'bmake' to learn all the dirs within
215 the tree that depend on 'foo.h'
218 A list of paths to ignore.
219 ccache(1) can otherwise be trouble.
221 debug desired debug level
223 debug_out open file to send debug output to (sys.stderr)
228 self.debug = getv(conf, 'debug', 0)
229 self.debug_out = getv(conf, 'debug_out', sys.stderr)
231 self.machine = getv(conf, 'MACHINE', '')
232 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
233 self.target_spec = getv(conf, 'TARGET_SPEC', '')
234 self.curdir = getv(conf, 'CURDIR')
235 self.reldir = getv(conf, 'RELDIR')
236 self.dpdeps = getv(conf, 'DPDEPS')
240 # some of the steps below we want to do only once
242 self.host_target = getv(conf, 'HOST_TARGET')
243 for srctop in getv(conf, 'SRCTOPS', []):
244 if srctop[-1] != '/':
246 if not srctop in self.srctops:
247 self.srctops.append(srctop)
248 _srctop = os.path.realpath(srctop)
249 if _srctop[-1] != '/':
251 if not _srctop in self.srctops:
252 self.srctops.append(_srctop)
254 trim_list = add_trims(self.machine)
255 if self.machine == 'host':
256 trim_list += add_trims(self.host_target)
258 trim_list += add_trims(self.target_spec)
260 for objroot in getv(conf, 'OBJROOTS', []):
262 if objroot.endswith(e):
263 # this is not what we want - fix it
264 objroot = objroot[0:-len(e)]
266 if objroot[-1] != '/':
268 if not objroot in self.objroots:
269 self.objroots.append(objroot)
270 _objroot = os.path.realpath(objroot)
271 if objroot[-1] == '/':
273 if not _objroot in self.objroots:
274 self.objroots.append(_objroot)
276 # we want the longest match
277 self.srctops.sort(reverse=True)
278 self.objroots.sort(reverse=True)
280 self.excludes = getv(conf, 'EXCLUDES', [])
283 print("host_target=", self.host_target, file=self.debug_out)
284 print("srctops=", self.srctops, file=self.debug_out)
285 print("objroots=", self.objroots, file=self.debug_out)
286 print("excludes=", self.excludes, file=self.debug_out)
288 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
290 if self.dpdeps and not self.reldir:
292 print("need reldir:", end=' ', file=self.debug_out)
294 srctop = self.find_top(self.curdir, self.srctops)
296 self.reldir = self.curdir.replace(srctop,'')
298 print(self.reldir, file=self.debug_out)
300 self.dpdeps = None # we cannot do it?
302 self.cwd = os.getcwd() # make sure this is initialized
303 self.last_dir = self.cwd
309 """reset state if we are being passed meta files from multiple directories."""
315 def dirdeps(self, sep='\n'):
317 return sep.strip() + sep.join(self.obj_deps)
319 def src_dirdeps(self, sep='\n'):
320 """return SRC_DIRDEPS"""
321 return sep.strip() + sep.join(self.src_deps)
323 def file_depends(self, out=None):
324 """Append DPDEPS_${file} += ${RELDIR}
325 for each file we saw, to the output file."""
328 for f in sort_unique(self.file_deps):
329 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
330 # these entries provide for reverse DIRDEPS lookup
331 for f in self.obj_deps:
332 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
334 def seenit(self, dir):
335 """rememer that we have seen dir."""
338 def add(self, list, data, clue=''):
339 """add data to list if it isn't already there."""
343 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
345 def find_top(self, path, list):
346 """the logical tree may be split across multiple trees"""
348 if path.startswith(top):
350 print("found in", top, file=self.debug_out)
354 def find_obj(self, objroot, dir, path, input):
355 """return path within objroot, taking care of .dirdep files"""
357 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
358 if not ddep and os.path.exists(ddepf):
359 ddep = open(ddepf, 'r').readline().strip('# \n')
361 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
362 if ddep.endswith(self.machine):
363 ddep = ddep[0:-(1+len(self.machine))]
364 elif self.target_spec and ddep.endswith(self.target_spec):
365 ddep = ddep[0:-(1+len(self.target_spec))]
368 # no .dirdeps, so remember that we've seen the raw input
371 if self.machine == 'none':
372 if dir.startswith(objroot):
373 return dir.replace(objroot,'')
375 m = self.dirdep_re.match(dir.replace(objroot,''))
378 dmachine = m.group(1)
379 if dmachine != self.machine:
380 if not (self.machine == 'host' and
381 dmachine == self.host_target):
383 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
384 ddep += '.' + dmachine
388 def try_parse(self, name=None, file=None):
389 """give file and line number causing exception"""
391 self.parse(name, file)
394 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
397 def parse(self, name=None, file=None):
398 """A meta file looks like:
400 # Meta data file "path"
405 -- filemon acquired metadata --
415 L "pid" "src" "target"
420 We go to some effort to avoid processing a dependency more than once.
421 Of the above record types only C,E,F,L,R,V and W are of interest.
424 version = 0 # unknown
429 cwd = self.last_dir = self.cwd
431 f = open(self.name, 'r')
439 self.seenit(self.curdir) # we ignore this
441 interesting = 'CEFLRV'
444 # ignore anything we don't care about
445 if not line[0] in interesting:
448 print("input:", line, end=' ', file=self.debug_out)
457 # we cannot ignore 'W' records
458 # as they may be 'rw'
462 self.cwd = cwd = self.last_dir = w[1]
463 self.seenit(cwd) # ignore this
465 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
471 pid_last_dir[last_pid] = self.last_dir
472 cwd = getv(pid_cwd, pid, self.cwd)
473 self.last_dir = getv(pid_last_dir, pid, self.cwd)
480 pid_last_dir[npid] = cwd
484 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
488 print("missing cwd=", cwd, file=self.debug_out)
489 if cwd.endswith('/.'):
491 self.last_dir = pid_last_dir[pid] = cwd
494 print("cwd=", cwd, file=self.debug_out)
497 if w[2] in self.seen:
499 print("seen:", w[2], file=self.debug_out)
503 # these are special, tread src as read and
505 self.parse_path(w[2].strip("'"), cwd, 'R', w)
506 self.parse_path(w[3].strip("'"), cwd, 'W', w)
512 self.parse_path(path, cwd, w[0], w)
518 def is_src(self, base, dir, rdir):
519 """is base in srctop"""
520 for dir in [dir,rdir]:
523 path = '/'.join([dir,base])
524 srctop = self.find_top(path, self.srctops)
527 self.add(self.file_deps, path.replace(srctop,''), 'file')
528 self.add(self.src_deps, dir.replace(srctop,''), 'src')
533 def parse_path(self, path, cwd, op=None, w=[]):
534 """look at a path for the op specified"""
539 # we are never interested in .dirdep files as dependencies
540 if path.endswith('.dirdep'):
542 for p in self.excludes:
543 if p and path.startswith(p):
545 print("exclude:", p, path, file=self.debug_out)
547 # we don't want to resolve the last component if it is
549 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
552 dir,base = os.path.split(path)
555 print("seen:", dir, file=self.debug_out)
557 # we can have a path in an objdir which is a link
558 # to the src dir, we may need to add dependencies for each
560 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
561 rdir = os.path.realpath(dir)
564 # now put path back together
565 path = '/'.join([dir,base])
567 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
569 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
571 print("skipping:", path, file=self.debug_out)
573 if os.path.isdir(path):
575 self.last_dir = path;
577 print("ldir=", self.last_dir, file=self.debug_out)
581 # finally, we get down to it
582 if dir == self.cwd or dir == self.curdir:
584 if self.is_src(base, dir, rdir):
590 for dir in [dir,rdir]:
593 objroot = self.find_top(dir, self.objroots)
597 ddep = self.find_obj(objroot, dir, path, w[2])
599 self.add(self.obj_deps, ddep, 'obj')
600 if self.dpdeps and objroot.endswith('/stage/'):
601 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
602 self.add(self.file_deps, sp, 'file')
604 # don't waste time looking again
609 def main(argv, klass=MetaFile, xopts='', xoptf=None):
610 """Simple driver for class MetaFile.
613 script [options] [key=value ...] "meta" ...
615 Options and key=value pairs contribute to the
616 dictionary passed to MetaFile.
619 add "SRCTOP" to the "SRCTOPS" list.
624 add "OBJROOT" to the "OBJROOTS" list.
639 # import Psyco if we can
640 # it can speed things up quite a bit
656 machine = os.environ['MACHINE']
658 conf['MACHINE'] = machine
659 machine_arch = os.environ['MACHINE_ARCH']
661 conf['MACHINE_ARCH'] = machine_arch
662 srctop = os.environ['SB_SRC']
664 conf['SRCTOPS'].append(srctop)
665 objroot = os.environ['SB_OBJROOT']
667 conf['OBJROOTS'].append(objroot)
674 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
677 conf['MACHINE_ARCH'] = a
683 conf['HOST_TARGET'] = a
685 if a not in conf['SRCTOPS']:
686 conf['SRCTOPS'].append(a)
690 if a not in conf['OBJROOTS']:
691 conf['OBJROOTS'].append(a)
699 conf['TARGET_SPEC'] = a
701 if a not in conf['EXCLUDES']:
702 conf['EXCLUDES'].append(a)
706 conf['debug'] = debug
708 # get any var=val assignments
713 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
729 debug_out = getv(conf, 'debug_out', sys.stderr)
732 print("config:", file=debug_out)
733 print("psyco=", have_psyco, file=debug_out)
734 for k,v in list(conf.items()):
735 print("%s=%s" % (k,v), file=debug_out)
739 if a.endswith('.meta'):
740 if not os.path.exists(a):
743 elif a.startswith('@'):
744 # there can actually multiple files per line
745 for line in open(a[1:]):
746 for f in line.strip().split():
747 if not os.path.exists(f):
754 print(m.src_dirdeps('\nsrc:'))
756 dpdeps = getv(conf, 'DPDEPS')
758 m.file_depends(open(dpdeps, 'wb'))
762 if __name__ == '__main__':
766 # yes, this goes to stdout
767 print("ERROR: ", sys.exc_info()[1])