3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $
43 Copyright (c) 2011-2013, Juniper Networks, Inc.
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions
49 1. Redistributions of source code must retain the above copyright
50 notice, this list of conditions and the following disclaimer.
51 2. Redistributions in binary form must reproduce the above copyright
52 notice, this list of conditions and the following disclaimer in the
53 documentation and/or other materials provided with the distribution.
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 def getv(dict, key, d=None):
72 """Lookup key in dict and return value or the supplied default."""
77 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79 Return an absolute path, resolving via cwd or last_dir if needed.
81 if path.endswith('/.'):
83 if len(path) > 0 and path[0] == '/':
87 if path.startswith('./'):
91 for d in [last_dir, cwd]:
94 p = '/'.join([d,path])
96 print("looking for:", p, end=' ', file=debug_out)
97 if not os.path.exists(p):
99 print("nope", file=debug_out)
103 print("found:", p, file=debug_out)
108 """cleanup path without using realpath(3)"""
109 if path.startswith('/'):
116 if not d or d == '.':
123 return r + '/'.join(p)
125 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
127 Return an absolute path, resolving via cwd or last_dir if needed.
128 this gets called a lot, so we try to avoid calling realpath.
130 rpath = resolve(path, cwd, last_dir, debug, debug_out)
133 if (path.find('/') < 0 or
134 path.find('./') > 0 or
135 path.endswith('/..')):
136 path = cleanpath(path)
139 def sort_unique(list, cmp=None, key=None, reverse=False):
140 list.sort(cmp, key, reverse)
151 return ['/' + x + '/',
157 """class to parse meta files generated by bmake."""
170 def __init__(self, name, conf={}):
171 """if name is set we will parse it now.
172 conf can have the follwing keys:
174 SRCTOPS list of tops of the src tree(s).
176 CURDIR the src directory 'bmake' was run from.
178 RELDIR the relative path from SRCTOP to CURDIR
180 MACHINE the machine we built for.
181 set to 'none' if we are not cross-building.
182 More specifically if machine cannot be deduced from objdirs.
185 Sometimes MACHINE isn't enough.
188 when we build for the pseudo machine 'host'
189 the object tree uses HOST_TARGET rather than MACHINE.
191 OBJROOTS a list of the common prefix for all obj dirs it might
194 DPDEPS names an optional file to which per file dependencies
196 For example if 'some/path/foo.h' is read from SRCTOP
197 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
198 This can allow 'bmake' to learn all the dirs within
199 the tree that depend on 'foo.h'
202 A list of paths to ignore.
203 ccache(1) can otherwise be trouble.
205 debug desired debug level
207 debug_out open file to send debug output to (sys.stderr)
212 self.debug = getv(conf, 'debug', 0)
213 self.debug_out = getv(conf, 'debug_out', sys.stderr)
215 self.machine = getv(conf, 'MACHINE', '')
216 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
217 self.target_spec = getv(conf, 'TARGET_SPEC', '')
218 self.curdir = getv(conf, 'CURDIR')
219 self.reldir = getv(conf, 'RELDIR')
220 self.dpdeps = getv(conf, 'DPDEPS')
224 # some of the steps below we want to do only once
226 self.host_target = getv(conf, 'HOST_TARGET')
227 for srctop in getv(conf, 'SRCTOPS', []):
228 if srctop[-1] != '/':
230 if not srctop in self.srctops:
231 self.srctops.append(srctop)
232 _srctop = os.path.realpath(srctop)
233 if _srctop[-1] != '/':
235 if not _srctop in self.srctops:
236 self.srctops.append(_srctop)
238 trim_list = add_trims(self.machine)
239 if self.machine == 'host':
240 trim_list += add_trims(self.host_target)
242 trim_list += add_trims(self.target_spec)
244 for objroot in getv(conf, 'OBJROOTS', []):
246 if objroot.endswith(e):
247 # this is not what we want - fix it
248 objroot = objroot[0:-len(e)]
250 if objroot[-1] != '/':
252 if not objroot in self.objroots:
253 self.objroots.append(objroot)
254 _objroot = os.path.realpath(objroot)
255 if objroot[-1] == '/':
257 if not _objroot in self.objroots:
258 self.objroots.append(_objroot)
260 # we want the longest match
261 self.srctops.sort(reverse=True)
262 self.objroots.sort(reverse=True)
264 self.excludes = getv(conf, 'EXCLUDES', [])
267 print("host_target=", self.host_target, file=self.debug_out)
268 print("srctops=", self.srctops, file=self.debug_out)
269 print("objroots=", self.objroots, file=self.debug_out)
270 print("excludes=", self.excludes, file=self.debug_out)
272 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
274 if self.dpdeps and not self.reldir:
276 print("need reldir:", end=' ', file=self.debug_out)
278 srctop = self.find_top(self.curdir, self.srctops)
280 self.reldir = self.curdir.replace(srctop,'')
282 print(self.reldir, file=self.debug_out)
284 self.dpdeps = None # we cannot do it?
286 self.cwd = os.getcwd() # make sure this is initialized
287 self.last_dir = self.cwd
293 """reset state if we are being passed meta files from multiple directories."""
299 def dirdeps(self, sep='\n'):
301 return sep.strip() + sep.join(self.obj_deps)
303 def src_dirdeps(self, sep='\n'):
304 """return SRC_DIRDEPS"""
305 return sep.strip() + sep.join(self.src_deps)
307 def file_depends(self, out=None):
308 """Append DPDEPS_${file} += ${RELDIR}
309 for each file we saw, to the output file."""
312 for f in sort_unique(self.file_deps):
313 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
314 # these entries provide for reverse DIRDEPS lookup
315 for f in self.obj_deps:
316 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
318 def seenit(self, dir):
319 """rememer that we have seen dir."""
322 def add(self, list, data, clue=''):
323 """add data to list if it isn't already there."""
327 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
329 def find_top(self, path, list):
330 """the logical tree may be split across multiple trees"""
332 if path.startswith(top):
334 print("found in", top, file=self.debug_out)
338 def find_obj(self, objroot, dir, path, input):
339 """return path within objroot, taking care of .dirdep files"""
341 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
342 if not ddep and os.path.exists(ddepf):
343 ddep = open(ddepf, 'r').readline().strip('# \n')
345 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
346 if ddep.endswith(self.machine):
347 ddep = ddep[0:-(1+len(self.machine))]
348 elif self.target_spec and ddep.endswith(self.target_spec):
349 ddep = ddep[0:-(1+len(self.target_spec))]
352 # no .dirdeps, so remember that we've seen the raw input
355 if self.machine == 'none':
356 if dir.startswith(objroot):
357 return dir.replace(objroot,'')
359 m = self.dirdep_re.match(dir.replace(objroot,''))
362 dmachine = m.group(1)
363 if dmachine != self.machine:
364 if not (self.machine == 'host' and
365 dmachine == self.host_target):
367 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
368 ddep += '.' + dmachine
372 def try_parse(self, name=None, file=None):
373 """give file and line number causing exception"""
375 self.parse(name, file)
378 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
381 def parse(self, name=None, file=None):
382 """A meta file looks like:
384 # Meta data file "path"
389 -- filemon acquired metadata --
399 L "pid" "src" "target"
404 We go to some effort to avoid processing a dependency more than once.
405 Of the above record types only C,E,F,L,R,V and W are of interest.
408 version = 0 # unknown
413 cwd = self.last_dir = self.cwd
415 f = open(self.name, 'r')
423 self.seenit(self.curdir) # we ignore this
425 interesting = 'CEFLRV'
428 # ignore anything we don't care about
429 if not line[0] in interesting:
432 print("input:", line, end=' ', file=self.debug_out)
441 # we cannot ignore 'W' records
442 # as they may be 'rw'
446 self.cwd = cwd = self.last_dir = w[1]
447 self.seenit(cwd) # ignore this
449 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
455 pid_last_dir[last_pid] = self.last_dir
456 cwd = getv(pid_cwd, pid, self.cwd)
457 self.last_dir = getv(pid_last_dir, pid, self.cwd)
464 pid_last_dir[npid] = cwd
468 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
469 if cwd.endswith('/.'):
471 self.last_dir = pid_last_dir[pid] = cwd
474 print("cwd=", cwd, file=self.debug_out)
477 if w[2] in self.seen:
479 print("seen:", w[2], file=self.debug_out)
483 # these are special, tread src as read and
485 self.parse_path(w[1].strip("'"), cwd, 'R', w)
486 self.parse_path(w[2].strip("'"), cwd, 'W', w)
490 self.parse_path(path, cwd, w[0], w)
495 def is_src(self, base, dir, rdir):
496 """is base in srctop"""
497 for dir in [dir,rdir]:
500 path = '/'.join([dir,base])
501 srctop = self.find_top(path, self.srctops)
504 self.add(self.file_deps, path.replace(srctop,''), 'file')
505 self.add(self.src_deps, dir.replace(srctop,''), 'src')
510 def parse_path(self, path, cwd, op=None, w=[]):
511 """look at a path for the op specified"""
516 # we are never interested in .dirdep files as dependencies
517 if path.endswith('.dirdep'):
519 for p in self.excludes:
520 if p and path.startswith(p):
522 print("exclude:", p, path, file=self.debug_out)
524 # we don't want to resolve the last component if it is
526 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
529 dir,base = os.path.split(path)
532 print("seen:", dir, file=self.debug_out)
534 # we can have a path in an objdir which is a link
535 # to the src dir, we may need to add dependencies for each
537 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
538 rdir = os.path.realpath(dir)
541 # now put path back together
542 path = '/'.join([dir,base])
544 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
546 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
548 print("skipping:", path, file=self.debug_out)
550 if os.path.isdir(path):
552 self.last_dir = path;
554 print("ldir=", self.last_dir, file=self.debug_out)
558 # finally, we get down to it
559 if dir == self.cwd or dir == self.curdir:
561 if self.is_src(base, dir, rdir):
567 for dir in [dir,rdir]:
570 objroot = self.find_top(dir, self.objroots)
574 ddep = self.find_obj(objroot, dir, path, w[2])
576 self.add(self.obj_deps, ddep, 'obj')
577 if self.dpdeps and objroot.endswith('/stage/'):
578 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
579 self.add(self.file_deps, sp, 'file')
581 # don't waste time looking again
586 def main(argv, klass=MetaFile, xopts='', xoptf=None):
587 """Simple driver for class MetaFile.
590 script [options] [key=value ...] "meta" ...
592 Options and key=value pairs contribute to the
593 dictionary passed to MetaFile.
596 add "SRCTOP" to the "SRCTOPS" list.
601 add "OBJROOT" to the "OBJROOTS" list.
616 # import Psyco if we can
617 # it can speed things up quite a bit
633 machine = os.environ['MACHINE']
635 conf['MACHINE'] = machine
636 machine_arch = os.environ['MACHINE_ARCH']
638 conf['MACHINE_ARCH'] = machine_arch
639 srctop = os.environ['SB_SRC']
641 conf['SRCTOPS'].append(srctop)
642 objroot = os.environ['SB_OBJROOT']
644 conf['OBJROOTS'].append(objroot)
651 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
654 conf['MACHINE_ARCH'] = a
660 conf['HOST_TARGET'] = a
662 if a not in conf['SRCTOPS']:
663 conf['SRCTOPS'].append(a)
667 if a not in conf['OBJROOTS']:
668 conf['OBJROOTS'].append(a)
676 conf['TARGET_SPEC'] = a
678 if a not in conf['EXCLUDES']:
679 conf['EXCLUDES'].append(a)
683 conf['debug'] = debug
685 # get any var=val assignments
690 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
706 debug_out = getv(conf, 'debug_out', sys.stderr)
709 print("config:", file=debug_out)
710 print("psyco=", have_psyco, file=debug_out)
711 for k,v in list(conf.items()):
712 print("%s=%s" % (k,v), file=debug_out)
716 if a.endswith('.meta'):
717 if not os.path.exists(a):
720 elif a.startswith('@'):
721 # there can actually multiple files per line
722 for line in open(a[1:]):
723 for f in line.strip().split():
724 if not os.path.exists(f):
731 print(m.src_dirdeps('\nsrc:'))
733 dpdeps = getv(conf, 'DPDEPS')
735 m.file_depends(open(dpdeps, 'wb'))
739 if __name__ == '__main__':
743 # yes, this goes to stdout
744 print("ERROR: ", sys.exc_info()[1])