3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $
43 Copyright (c) 2011-2013, Juniper Networks, Inc.
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions
49 1. Redistributions of source code must retain the above copyright
50 notice, this list of conditions and the following disclaimer.
51 2. Redistributions in binary form must reproduce the above copyright
52 notice, this list of conditions and the following disclaimer in the
53 documentation and/or other materials provided with the distribution.
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 def getv(dict, key, d=None):
72 """Lookup key in dict and return value or the supplied default."""
77 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79 Return an absolute path, resolving via cwd or last_dir if needed.
81 if path.endswith('/.'):
83 if len(path) > 0 and path[0] == '/':
87 if path.startswith('./'):
91 for d in [last_dir, cwd]:
100 p = '/'.join([d,path])
102 print("looking for:", p, end=' ', file=debug_out)
103 if not os.path.exists(p):
105 print("nope", file=debug_out)
109 print("found:", p, file=debug_out)
114 """cleanup path without using realpath(3)"""
115 if path.startswith('/'):
122 if not d or d == '.':
132 return r + '/'.join(p)
134 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
136 Return an absolute path, resolving via cwd or last_dir if needed.
137 this gets called a lot, so we try to avoid calling realpath.
139 rpath = resolve(path, cwd, last_dir, debug, debug_out)
142 if (path.find('/') < 0 or
143 path.find('./') > 0 or
144 path.endswith('/..')):
145 path = cleanpath(path)
148 def sort_unique(list, cmp=None, key=None, reverse=False):
149 list.sort(cmp, key, reverse)
160 return ['/' + x + '/',
166 """class to parse meta files generated by bmake."""
179 def __init__(self, name, conf={}):
180 """if name is set we will parse it now.
181 conf can have the follwing keys:
183 SRCTOPS list of tops of the src tree(s).
185 CURDIR the src directory 'bmake' was run from.
187 RELDIR the relative path from SRCTOP to CURDIR
189 MACHINE the machine we built for.
190 set to 'none' if we are not cross-building.
191 More specifically if machine cannot be deduced from objdirs.
194 Sometimes MACHINE isn't enough.
197 when we build for the pseudo machine 'host'
198 the object tree uses HOST_TARGET rather than MACHINE.
200 OBJROOTS a list of the common prefix for all obj dirs it might
203 DPDEPS names an optional file to which per file dependencies
205 For example if 'some/path/foo.h' is read from SRCTOP
206 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
207 This can allow 'bmake' to learn all the dirs within
208 the tree that depend on 'foo.h'
211 A list of paths to ignore.
212 ccache(1) can otherwise be trouble.
214 debug desired debug level
216 debug_out open file to send debug output to (sys.stderr)
221 self.debug = getv(conf, 'debug', 0)
222 self.debug_out = getv(conf, 'debug_out', sys.stderr)
224 self.machine = getv(conf, 'MACHINE', '')
225 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
226 self.target_spec = getv(conf, 'TARGET_SPEC', '')
227 self.curdir = getv(conf, 'CURDIR')
228 self.reldir = getv(conf, 'RELDIR')
229 self.dpdeps = getv(conf, 'DPDEPS')
233 # some of the steps below we want to do only once
235 self.host_target = getv(conf, 'HOST_TARGET')
236 for srctop in getv(conf, 'SRCTOPS', []):
237 if srctop[-1] != '/':
239 if not srctop in self.srctops:
240 self.srctops.append(srctop)
241 _srctop = os.path.realpath(srctop)
242 if _srctop[-1] != '/':
244 if not _srctop in self.srctops:
245 self.srctops.append(_srctop)
247 trim_list = add_trims(self.machine)
248 if self.machine == 'host':
249 trim_list += add_trims(self.host_target)
251 trim_list += add_trims(self.target_spec)
253 for objroot in getv(conf, 'OBJROOTS', []):
255 if objroot.endswith(e):
256 # this is not what we want - fix it
257 objroot = objroot[0:-len(e)]
259 if objroot[-1] != '/':
261 if not objroot in self.objroots:
262 self.objroots.append(objroot)
263 _objroot = os.path.realpath(objroot)
264 if objroot[-1] == '/':
266 if not _objroot in self.objroots:
267 self.objroots.append(_objroot)
269 # we want the longest match
270 self.srctops.sort(reverse=True)
271 self.objroots.sort(reverse=True)
273 self.excludes = getv(conf, 'EXCLUDES', [])
276 print("host_target=", self.host_target, file=self.debug_out)
277 print("srctops=", self.srctops, file=self.debug_out)
278 print("objroots=", self.objroots, file=self.debug_out)
279 print("excludes=", self.excludes, file=self.debug_out)
281 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
283 if self.dpdeps and not self.reldir:
285 print("need reldir:", end=' ', file=self.debug_out)
287 srctop = self.find_top(self.curdir, self.srctops)
289 self.reldir = self.curdir.replace(srctop,'')
291 print(self.reldir, file=self.debug_out)
293 self.dpdeps = None # we cannot do it?
295 self.cwd = os.getcwd() # make sure this is initialized
296 self.last_dir = self.cwd
302 """reset state if we are being passed meta files from multiple directories."""
308 def dirdeps(self, sep='\n'):
310 return sep.strip() + sep.join(self.obj_deps)
312 def src_dirdeps(self, sep='\n'):
313 """return SRC_DIRDEPS"""
314 return sep.strip() + sep.join(self.src_deps)
316 def file_depends(self, out=None):
317 """Append DPDEPS_${file} += ${RELDIR}
318 for each file we saw, to the output file."""
321 for f in sort_unique(self.file_deps):
322 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
323 # these entries provide for reverse DIRDEPS lookup
324 for f in self.obj_deps:
325 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
327 def seenit(self, dir):
328 """rememer that we have seen dir."""
331 def add(self, list, data, clue=''):
332 """add data to list if it isn't already there."""
336 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
338 def find_top(self, path, list):
339 """the logical tree may be split across multiple trees"""
341 if path.startswith(top):
343 print("found in", top, file=self.debug_out)
347 def find_obj(self, objroot, dir, path, input):
348 """return path within objroot, taking care of .dirdep files"""
350 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
351 if not ddep and os.path.exists(ddepf):
352 ddep = open(ddepf, 'r').readline().strip('# \n')
354 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
355 if ddep.endswith(self.machine):
356 ddep = ddep[0:-(1+len(self.machine))]
357 elif self.target_spec and ddep.endswith(self.target_spec):
358 ddep = ddep[0:-(1+len(self.target_spec))]
361 # no .dirdeps, so remember that we've seen the raw input
364 if self.machine == 'none':
365 if dir.startswith(objroot):
366 return dir.replace(objroot,'')
368 m = self.dirdep_re.match(dir.replace(objroot,''))
371 dmachine = m.group(1)
372 if dmachine != self.machine:
373 if not (self.machine == 'host' and
374 dmachine == self.host_target):
376 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
377 ddep += '.' + dmachine
381 def try_parse(self, name=None, file=None):
382 """give file and line number causing exception"""
384 self.parse(name, file)
387 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
390 def parse(self, name=None, file=None):
391 """A meta file looks like:
393 # Meta data file "path"
398 -- filemon acquired metadata --
408 L "pid" "src" "target"
413 We go to some effort to avoid processing a dependency more than once.
414 Of the above record types only C,E,F,L,R,V and W are of interest.
417 version = 0 # unknown
422 cwd = self.last_dir = self.cwd
424 f = open(self.name, 'r')
432 self.seenit(self.curdir) # we ignore this
434 interesting = 'CEFLRV'
437 # ignore anything we don't care about
438 if not line[0] in interesting:
441 print("input:", line, end=' ', file=self.debug_out)
450 # we cannot ignore 'W' records
451 # as they may be 'rw'
455 self.cwd = cwd = self.last_dir = w[1]
456 self.seenit(cwd) # ignore this
458 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
464 pid_last_dir[last_pid] = self.last_dir
465 cwd = getv(pid_cwd, pid, self.cwd)
466 self.last_dir = getv(pid_last_dir, pid, self.cwd)
473 pid_last_dir[npid] = cwd
477 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
478 if cwd.endswith('/.'):
480 self.last_dir = pid_last_dir[pid] = cwd
483 print("cwd=", cwd, file=self.debug_out)
486 if w[2] in self.seen:
488 print("seen:", w[2], file=self.debug_out)
492 # these are special, tread src as read and
494 self.parse_path(w[1].strip("'"), cwd, 'R', w)
495 self.parse_path(w[2].strip("'"), cwd, 'W', w)
499 self.parse_path(path, cwd, w[0], w)
504 def is_src(self, base, dir, rdir):
505 """is base in srctop"""
506 for dir in [dir,rdir]:
509 path = '/'.join([dir,base])
510 srctop = self.find_top(path, self.srctops)
513 self.add(self.file_deps, path.replace(srctop,''), 'file')
514 self.add(self.src_deps, dir.replace(srctop,''), 'src')
519 def parse_path(self, path, cwd, op=None, w=[]):
520 """look at a path for the op specified"""
525 # we are never interested in .dirdep files as dependencies
526 if path.endswith('.dirdep'):
528 for p in self.excludes:
529 if p and path.startswith(p):
531 print("exclude:", p, path, file=self.debug_out)
533 # we don't want to resolve the last component if it is
535 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
538 dir,base = os.path.split(path)
541 print("seen:", dir, file=self.debug_out)
543 # we can have a path in an objdir which is a link
544 # to the src dir, we may need to add dependencies for each
546 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
547 rdir = os.path.realpath(dir)
550 # now put path back together
551 path = '/'.join([dir,base])
553 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
555 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
557 print("skipping:", path, file=self.debug_out)
559 if os.path.isdir(path):
561 self.last_dir = path;
563 print("ldir=", self.last_dir, file=self.debug_out)
567 # finally, we get down to it
568 if dir == self.cwd or dir == self.curdir:
570 if self.is_src(base, dir, rdir):
576 for dir in [dir,rdir]:
579 objroot = self.find_top(dir, self.objroots)
583 ddep = self.find_obj(objroot, dir, path, w[2])
585 self.add(self.obj_deps, ddep, 'obj')
586 if self.dpdeps and objroot.endswith('/stage/'):
587 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
588 self.add(self.file_deps, sp, 'file')
590 # don't waste time looking again
595 def main(argv, klass=MetaFile, xopts='', xoptf=None):
596 """Simple driver for class MetaFile.
599 script [options] [key=value ...] "meta" ...
601 Options and key=value pairs contribute to the
602 dictionary passed to MetaFile.
605 add "SRCTOP" to the "SRCTOPS" list.
610 add "OBJROOT" to the "OBJROOTS" list.
625 # import Psyco if we can
626 # it can speed things up quite a bit
642 machine = os.environ['MACHINE']
644 conf['MACHINE'] = machine
645 machine_arch = os.environ['MACHINE_ARCH']
647 conf['MACHINE_ARCH'] = machine_arch
648 srctop = os.environ['SB_SRC']
650 conf['SRCTOPS'].append(srctop)
651 objroot = os.environ['SB_OBJROOT']
653 conf['OBJROOTS'].append(objroot)
660 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
663 conf['MACHINE_ARCH'] = a
669 conf['HOST_TARGET'] = a
671 if a not in conf['SRCTOPS']:
672 conf['SRCTOPS'].append(a)
676 if a not in conf['OBJROOTS']:
677 conf['OBJROOTS'].append(a)
685 conf['TARGET_SPEC'] = a
687 if a not in conf['EXCLUDES']:
688 conf['EXCLUDES'].append(a)
692 conf['debug'] = debug
694 # get any var=val assignments
699 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
715 debug_out = getv(conf, 'debug_out', sys.stderr)
718 print("config:", file=debug_out)
719 print("psyco=", have_psyco, file=debug_out)
720 for k,v in list(conf.items()):
721 print("%s=%s" % (k,v), file=debug_out)
725 if a.endswith('.meta'):
726 if not os.path.exists(a):
729 elif a.startswith('@'):
730 # there can actually multiple files per line
731 for line in open(a[1:]):
732 for f in line.strip().split():
733 if not os.path.exists(f):
740 print(m.src_dirdeps('\nsrc:'))
742 dpdeps = getv(conf, 'DPDEPS')
744 m.file_depends(open(dpdeps, 'wb'))
748 if __name__ == '__main__':
752 # yes, this goes to stdout
753 print("ERROR: ", sys.exc_info()[1])