3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.24 2017/02/08 22:17:10 sjg Exp $
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
76 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78 Return an absolute path, resolving via cwd or last_dir if needed.
80 if path.endswith('/.'):
82 if len(path) > 0 and path[0] == '/':
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
93 p = '/'.join([d,path])
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
98 print("nope", file=debug_out)
102 print("found:", p, file=debug_out)
107 """cleanup path without using realpath(3)"""
108 if path.startswith('/'):
115 if not d or d == '.':
122 return r + '/'.join(p)
124 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
126 Return an absolute path, resolving via cwd or last_dir if needed.
127 this gets called a lot, so we try to avoid calling realpath.
129 rpath = resolve(path, cwd, last_dir, debug, debug_out)
132 if (path.find('/') < 0 or
133 path.find('./') > 0 or
134 path.endswith('/..')):
135 path = cleanpath(path)
138 def sort_unique(list, cmp=None, key=None, reverse=False):
139 list.sort(cmp, key, reverse)
150 return ['/' + x + '/',
156 """class to parse meta files generated by bmake."""
169 def __init__(self, name, conf={}):
170 """if name is set we will parse it now.
171 conf can have the follwing keys:
173 SRCTOPS list of tops of the src tree(s).
175 CURDIR the src directory 'bmake' was run from.
177 RELDIR the relative path from SRCTOP to CURDIR
179 MACHINE the machine we built for.
180 set to 'none' if we are not cross-building.
181 More specifically if machine cannot be deduced from objdirs.
184 Sometimes MACHINE isn't enough.
187 when we build for the pseudo machine 'host'
188 the object tree uses HOST_TARGET rather than MACHINE.
190 OBJROOTS a list of the common prefix for all obj dirs it might
193 DPDEPS names an optional file to which per file dependencies
195 For example if 'some/path/foo.h' is read from SRCTOP
196 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
197 This can allow 'bmake' to learn all the dirs within
198 the tree that depend on 'foo.h'
201 A list of paths to ignore.
202 ccache(1) can otherwise be trouble.
204 debug desired debug level
206 debug_out open file to send debug output to (sys.stderr)
211 self.debug = getv(conf, 'debug', 0)
212 self.debug_out = getv(conf, 'debug_out', sys.stderr)
214 self.machine = getv(conf, 'MACHINE', '')
215 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
216 self.target_spec = getv(conf, 'TARGET_SPEC', '')
217 self.curdir = getv(conf, 'CURDIR')
218 self.reldir = getv(conf, 'RELDIR')
219 self.dpdeps = getv(conf, 'DPDEPS')
223 # some of the steps below we want to do only once
225 self.host_target = getv(conf, 'HOST_TARGET')
226 for srctop in getv(conf, 'SRCTOPS', []):
227 if srctop[-1] != '/':
229 if not srctop in self.srctops:
230 self.srctops.append(srctop)
231 _srctop = os.path.realpath(srctop)
232 if _srctop[-1] != '/':
234 if not _srctop in self.srctops:
235 self.srctops.append(_srctop)
237 trim_list = add_trims(self.machine)
238 if self.machine == 'host':
239 trim_list += add_trims(self.host_target)
241 trim_list += add_trims(self.target_spec)
243 for objroot in getv(conf, 'OBJROOTS', []):
245 if objroot.endswith(e):
246 # this is not what we want - fix it
247 objroot = objroot[0:-len(e)]
249 if objroot[-1] != '/':
251 if not objroot in self.objroots:
252 self.objroots.append(objroot)
253 _objroot = os.path.realpath(objroot)
254 if objroot[-1] == '/':
256 if not _objroot in self.objroots:
257 self.objroots.append(_objroot)
259 # we want the longest match
260 self.srctops.sort(reverse=True)
261 self.objroots.sort(reverse=True)
263 self.excludes = getv(conf, 'EXCLUDES', [])
266 print("host_target=", self.host_target, file=self.debug_out)
267 print("srctops=", self.srctops, file=self.debug_out)
268 print("objroots=", self.objroots, file=self.debug_out)
269 print("excludes=", self.excludes, file=self.debug_out)
271 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
273 if self.dpdeps and not self.reldir:
275 print("need reldir:", end=' ', file=self.debug_out)
277 srctop = self.find_top(self.curdir, self.srctops)
279 self.reldir = self.curdir.replace(srctop,'')
281 print(self.reldir, file=self.debug_out)
283 self.dpdeps = None # we cannot do it?
285 self.cwd = os.getcwd() # make sure this is initialized
286 self.last_dir = self.cwd
292 """reset state if we are being passed meta files from multiple directories."""
298 def dirdeps(self, sep='\n'):
300 return sep.strip() + sep.join(self.obj_deps)
302 def src_dirdeps(self, sep='\n'):
303 """return SRC_DIRDEPS"""
304 return sep.strip() + sep.join(self.src_deps)
306 def file_depends(self, out=None):
307 """Append DPDEPS_${file} += ${RELDIR}
308 for each file we saw, to the output file."""
311 for f in sort_unique(self.file_deps):
312 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
313 # these entries provide for reverse DIRDEPS lookup
314 for f in self.obj_deps:
315 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
317 def seenit(self, dir):
318 """rememer that we have seen dir."""
321 def add(self, list, data, clue=''):
322 """add data to list if it isn't already there."""
326 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
328 def find_top(self, path, list):
329 """the logical tree may be split across multiple trees"""
331 if path.startswith(top):
333 print("found in", top, file=self.debug_out)
337 def find_obj(self, objroot, dir, path, input):
338 """return path within objroot, taking care of .dirdep files"""
340 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
341 if not ddep and os.path.exists(ddepf):
342 ddep = open(ddepf, 'r').readline().strip('# \n')
344 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
345 if ddep.endswith(self.machine):
346 ddep = ddep[0:-(1+len(self.machine))]
347 elif self.target_spec and ddep.endswith(self.target_spec):
348 ddep = ddep[0:-(1+len(self.target_spec))]
351 # no .dirdeps, so remember that we've seen the raw input
354 if self.machine == 'none':
355 if dir.startswith(objroot):
356 return dir.replace(objroot,'')
358 m = self.dirdep_re.match(dir.replace(objroot,''))
361 dmachine = m.group(1)
362 if dmachine != self.machine:
363 if not (self.machine == 'host' and
364 dmachine == self.host_target):
366 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
367 ddep += '.' + dmachine
371 def try_parse(self, name=None, file=None):
372 """give file and line number causing exception"""
374 self.parse(name, file)
377 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
380 def parse(self, name=None, file=None):
381 """A meta file looks like:
383 # Meta data file "path"
388 -- filemon acquired metadata --
398 L "pid" "src" "target"
403 We go to some effort to avoid processing a dependency more than once.
404 Of the above record types only C,E,F,L,R,V and W are of interest.
407 version = 0 # unknown
412 cwd = self.last_dir = self.cwd
414 f = open(self.name, 'r')
422 self.seenit(self.curdir) # we ignore this
424 interesting = 'CEFLRV'
427 # ignore anything we don't care about
428 if not line[0] in interesting:
431 print("input:", line, end=' ', file=self.debug_out)
440 # we cannot ignore 'W' records
441 # as they may be 'rw'
445 self.cwd = cwd = self.last_dir = w[1]
446 self.seenit(cwd) # ignore this
448 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
454 pid_last_dir[last_pid] = self.last_dir
455 cwd = getv(pid_cwd, pid, self.cwd)
456 self.last_dir = getv(pid_last_dir, pid, self.cwd)
463 pid_last_dir[npid] = cwd
467 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
468 if cwd.endswith('/.'):
470 self.last_dir = pid_last_dir[pid] = cwd
473 print("cwd=", cwd, file=self.debug_out)
476 if w[2] in self.seen:
478 print("seen:", w[2], file=self.debug_out)
482 # these are special, tread src as read and
484 self.parse_path(w[1].strip("'"), cwd, 'R', w)
485 self.parse_path(w[2].strip("'"), cwd, 'W', w)
489 self.parse_path(path, cwd, w[0], w)
494 def parse_path(self, path, cwd, op=None, w=[]):
495 """look at a path for the op specified"""
500 # we are never interested in .dirdep files as dependencies
501 if path.endswith('.dirdep'):
503 for p in self.excludes:
504 if p and path.startswith(p):
506 print("exclude:", p, path, file=self.debug_out)
508 # we don't want to resolve the last component if it is
510 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
513 dir,base = os.path.split(path)
516 print("seen:", dir, file=self.debug_out)
518 # we can have a path in an objdir which is a link
519 # to the src dir, we may need to add dependencies for each
521 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
522 if rdir == dir or rdir.find('./') > 0:
524 if os.path.islink(dir):
525 rdir = os.path.realpath(dir)
526 # now put path back together
527 path = '/'.join([dir,base])
529 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
531 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
533 print("skipping:", path, file=self.debug_out)
535 if os.path.isdir(path):
537 self.last_dir = path;
539 print("ldir=", self.last_dir, file=self.debug_out)
543 # finally, we get down to it
544 if dir == self.cwd or dir == self.curdir:
546 srctop = self.find_top(path, self.srctops)
549 self.add(self.file_deps, path.replace(srctop,''), 'file')
550 self.add(self.src_deps, dir.replace(srctop,''), 'src')
553 if rdir and not rdir.startswith(srctop):
554 dir = rdir # for below
560 for dir in [dir,rdir]:
563 objroot = self.find_top(dir, self.objroots)
567 ddep = self.find_obj(objroot, dir, path, w[2])
569 self.add(self.obj_deps, ddep, 'obj')
570 if self.dpdeps and objroot.endswith('/stage/'):
571 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
572 self.add(self.file_deps, sp, 'file')
574 # don't waste time looking again
579 def main(argv, klass=MetaFile, xopts='', xoptf=None):
580 """Simple driver for class MetaFile.
583 script [options] [key=value ...] "meta" ...
585 Options and key=value pairs contribute to the
586 dictionary passed to MetaFile.
589 add "SRCTOP" to the "SRCTOPS" list.
594 add "OBJROOT" to the "OBJROOTS" list.
609 # import Psyco if we can
610 # it can speed things up quite a bit
626 machine = os.environ['MACHINE']
628 conf['MACHINE'] = machine
629 machine_arch = os.environ['MACHINE_ARCH']
631 conf['MACHINE_ARCH'] = machine_arch
632 srctop = os.environ['SB_SRC']
634 conf['SRCTOPS'].append(srctop)
635 objroot = os.environ['SB_OBJROOT']
637 conf['OBJROOTS'].append(objroot)
644 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
647 conf['MACHINE_ARCH'] = a
653 conf['HOST_TARGET'] = a
655 if a not in conf['SRCTOPS']:
656 conf['SRCTOPS'].append(a)
660 if a not in conf['OBJROOTS']:
661 conf['OBJROOTS'].append(a)
669 conf['TARGET_SPEC'] = a
671 if a not in conf['EXCLUDES']:
672 conf['EXCLUDES'].append(a)
676 conf['debug'] = debug
678 # get any var=val assignments
683 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
699 debug_out = getv(conf, 'debug_out', sys.stderr)
702 print("config:", file=debug_out)
703 print("psyco=", have_psyco, file=debug_out)
704 for k,v in list(conf.items()):
705 print("%s=%s" % (k,v), file=debug_out)
709 if a.endswith('.meta'):
710 if not os.path.exists(a):
713 elif a.startswith('@'):
714 # there can actually multiple files per line
715 for line in open(a[1:]):
716 for f in line.strip().split():
717 if not os.path.exists(f):
724 print(m.src_dirdeps('\nsrc:'))
726 dpdeps = getv(conf, 'DPDEPS')
728 m.file_depends(open(dpdeps, 'wb'))
732 if __name__ == '__main__':
736 # yes, this goes to stdout
737 print("ERROR: ", sys.exc_info()[1])