3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
76 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78 Return an absolute path, resolving via cwd or last_dir if needed.
80 if path.endswith('/.'):
82 if len(path) > 0 and path[0] == '/':
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
93 p = '/'.join([d,path])
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
98 print("nope", file=debug_out)
102 print("found:", p, file=debug_out)
106 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
108 Return an absolute path, resolving via cwd or last_dir if needed.
109 this gets called a lot, so we try to avoid calling realpath
110 until we know we have something.
112 rpath = resolve(path, cwd, last_dir, debug, debug_out)
115 if (path.find('/') < 0 or
116 path.find('./') > 0 or
117 path.endswith('/..') or
118 os.path.islink(path)):
119 return os.path.realpath(path)
122 def sort_unique(list, cmp=None, key=None, reverse=False):
123 list.sort(cmp, key, reverse)
133 return ['/' + x + '/',
139 """class to parse meta files generated by bmake."""
152 def __init__(self, name, conf={}):
153 """if name is set we will parse it now.
154 conf can have the follwing keys:
156 SRCTOPS list of tops of the src tree(s).
158 CURDIR the src directory 'bmake' was run from.
160 RELDIR the relative path from SRCTOP to CURDIR
162 MACHINE the machine we built for.
163 set to 'none' if we are not cross-building.
164 More specifically if machine cannot be deduced from objdirs.
167 Sometimes MACHINE isn't enough.
170 when we build for the pseudo machine 'host'
171 the object tree uses HOST_TARGET rather than MACHINE.
173 OBJROOTS a list of the common prefix for all obj dirs it might
176 DPDEPS names an optional file to which per file dependencies
178 For example if 'some/path/foo.h' is read from SRCTOP
179 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
180 This can allow 'bmake' to learn all the dirs within
181 the tree that depend on 'foo.h'
184 A list of paths to ignore.
185 ccache(1) can otherwise be trouble.
187 debug desired debug level
189 debug_out open file to send debug output to (sys.stderr)
194 self.debug = getv(conf, 'debug', 0)
195 self.debug_out = getv(conf, 'debug_out', sys.stderr)
197 self.machine = getv(conf, 'MACHINE', '')
198 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
199 self.target_spec = getv(conf, 'TARGET_SPEC', '')
200 self.curdir = getv(conf, 'CURDIR')
201 self.reldir = getv(conf, 'RELDIR')
202 self.dpdeps = getv(conf, 'DPDEPS')
206 # some of the steps below we want to do only once
208 self.host_target = getv(conf, 'HOST_TARGET')
209 for srctop in getv(conf, 'SRCTOPS', []):
210 if srctop[-1] != '/':
212 if not srctop in self.srctops:
213 self.srctops.append(srctop)
214 _srctop = os.path.realpath(srctop)
215 if _srctop[-1] != '/':
217 if not _srctop in self.srctops:
218 self.srctops.append(_srctop)
220 trim_list = add_trims(self.machine)
221 if self.machine == 'host':
222 trim_list += add_trims(self.host_target)
224 trim_list += add_trims(self.target_spec)
226 for objroot in getv(conf, 'OBJROOTS', []):
228 if objroot.endswith(e):
229 # this is not what we want - fix it
230 objroot = objroot[0:-len(e)]
232 if objroot[-1] != '/':
234 if not objroot in self.objroots:
235 self.objroots.append(objroot)
236 _objroot = os.path.realpath(objroot)
237 if objroot[-1] == '/':
239 if not _objroot in self.objroots:
240 self.objroots.append(_objroot)
242 # we want the longest match
243 self.srctops.sort(reverse=True)
244 self.objroots.sort(reverse=True)
246 self.excludes = getv(conf, 'EXCLUDES', [])
249 print("host_target=", self.host_target, file=self.debug_out)
250 print("srctops=", self.srctops, file=self.debug_out)
251 print("objroots=", self.objroots, file=self.debug_out)
252 print("excludes=", self.excludes, file=self.debug_out)
254 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
256 if self.dpdeps and not self.reldir:
258 print("need reldir:", end=' ', file=self.debug_out)
260 srctop = self.find_top(self.curdir, self.srctops)
262 self.reldir = self.curdir.replace(srctop,'')
264 print(self.reldir, file=self.debug_out)
266 self.dpdeps = None # we cannot do it?
268 self.cwd = os.getcwd() # make sure this is initialized
269 self.last_dir = self.cwd
275 """reset state if we are being passed meta files from multiple directories."""
281 def dirdeps(self, sep='\n'):
283 return sep.strip() + sep.join(self.obj_deps)
285 def src_dirdeps(self, sep='\n'):
286 """return SRC_DIRDEPS"""
287 return sep.strip() + sep.join(self.src_deps)
289 def file_depends(self, out=None):
290 """Append DPDEPS_${file} += ${RELDIR}
291 for each file we saw, to the output file."""
294 for f in sort_unique(self.file_deps):
295 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
296 # these entries provide for reverse DIRDEPS lookup
297 for f in self.obj_deps:
298 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
300 def seenit(self, dir):
301 """rememer that we have seen dir."""
304 def add(self, list, data, clue=''):
305 """add data to list if it isn't already there."""
309 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
311 def find_top(self, path, list):
312 """the logical tree may be split across multiple trees"""
314 if path.startswith(top):
316 print("found in", top, file=self.debug_out)
320 def find_obj(self, objroot, dir, path, input):
321 """return path within objroot, taking care of .dirdep files"""
323 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
324 if not ddep and os.path.exists(ddepf):
325 ddep = open(ddepf, 'r').readline().strip('# \n')
327 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
328 if ddep.endswith(self.machine):
329 ddep = ddep[0:-(1+len(self.machine))]
330 elif self.target_spec and ddep.endswith(self.target_spec):
331 ddep = ddep[0:-(1+len(self.target_spec))]
334 # no .dirdeps, so remember that we've seen the raw input
337 if self.machine == 'none':
338 if dir.startswith(objroot):
339 return dir.replace(objroot,'')
341 m = self.dirdep_re.match(dir.replace(objroot,''))
344 dmachine = m.group(1)
345 if dmachine != self.machine:
346 if not (self.machine == 'host' and
347 dmachine == self.host_target):
349 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
350 ddep += '.' + dmachine
354 def try_parse(self, name=None, file=None):
355 """give file and line number causing exception"""
357 self.parse(name, file)
360 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
363 def parse(self, name=None, file=None):
364 """A meta file looks like:
366 # Meta data file "path"
371 -- filemon acquired metadata --
381 L "pid" "src" "target"
386 We go to some effort to avoid processing a dependency more than once.
387 Of the above record types only C,E,F,L,R,V and W are of interest.
390 version = 0 # unknown
395 cwd = self.last_dir = self.cwd
397 f = open(self.name, 'r')
405 self.seenit(self.curdir) # we ignore this
407 interesting = 'CEFLRV'
410 # ignore anything we don't care about
411 if not line[0] in interesting:
414 print("input:", line, end=' ', file=self.debug_out)
423 # we cannot ignore 'W' records
424 # as they may be 'rw'
428 self.cwd = cwd = self.last_dir = w[1]
429 self.seenit(cwd) # ignore this
431 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
437 pid_last_dir[last_pid] = self.last_dir
438 cwd = getv(pid_cwd, pid, self.cwd)
439 self.last_dir = getv(pid_last_dir, pid, self.cwd)
446 pid_last_dir[npid] = cwd
450 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
451 if cwd.endswith('/.'):
453 self.last_dir = pid_last_dir[pid] = cwd
456 print("cwd=", cwd, file=self.debug_out)
459 if w[2] in self.seen:
461 print("seen:", w[2], file=self.debug_out)
465 # these are special, tread src as read and
467 self.parse_path(w[1].strip("'"), cwd, 'R', w)
468 self.parse_path(w[2].strip("'"), cwd, 'W', w)
472 self.parse_path(path, cwd, w[0], w)
477 def parse_path(self, path, cwd, op=None, w=[]):
478 """look at a path for the op specified"""
483 # we are never interested in .dirdep files as dependencies
484 if path.endswith('.dirdep'):
486 for p in self.excludes:
487 if p and path.startswith(p):
489 print("exclude:", p, path, file=self.debug_out)
491 # we don't want to resolve the last component if it is
493 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
496 dir,base = os.path.split(path)
499 print("seen:", dir, file=self.debug_out)
501 # we can have a path in an objdir which is a link
502 # to the src dir, we may need to add dependencies for each
504 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
505 if rdir == dir or rdir.find('./') > 0:
507 # now put path back together
508 path = '/'.join([dir,base])
510 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
512 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
514 print("skipping:", path, file=self.debug_out)
516 if os.path.isdir(path):
518 self.last_dir = path;
520 print("ldir=", self.last_dir, file=self.debug_out)
524 # finally, we get down to it
525 if dir == self.cwd or dir == self.curdir:
527 srctop = self.find_top(path, self.srctops)
530 self.add(self.file_deps, path.replace(srctop,''), 'file')
531 self.add(self.src_deps, dir.replace(srctop,''), 'src')
534 if rdir and not rdir.startswith(srctop):
535 dir = rdir # for below
541 for dir in [dir,rdir]:
544 objroot = self.find_top(dir, self.objroots)
548 ddep = self.find_obj(objroot, dir, path, w[2])
550 self.add(self.obj_deps, ddep, 'obj')
551 if self.dpdeps and objroot.endswith('/stage/'):
552 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
553 self.add(self.file_deps, sp, 'file')
555 # don't waste time looking again
560 def main(argv, klass=MetaFile, xopts='', xoptf=None):
561 """Simple driver for class MetaFile.
564 script [options] [key=value ...] "meta" ...
566 Options and key=value pairs contribute to the
567 dictionary passed to MetaFile.
570 add "SRCTOP" to the "SRCTOPS" list.
575 add "OBJROOT" to the "OBJROOTS" list.
590 # import Psyco if we can
591 # it can speed things up quite a bit
607 machine = os.environ['MACHINE']
609 conf['MACHINE'] = machine
610 machine_arch = os.environ['MACHINE_ARCH']
612 conf['MACHINE_ARCH'] = machine_arch
613 srctop = os.environ['SB_SRC']
615 conf['SRCTOPS'].append(srctop)
616 objroot = os.environ['SB_OBJROOT']
618 conf['OBJROOTS'].append(objroot)
625 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
628 conf['MACHINE_ARCH'] = a
634 conf['HOST_TARGET'] = a
636 if a not in conf['SRCTOPS']:
637 conf['SRCTOPS'].append(a)
641 if a not in conf['OBJROOTS']:
642 conf['OBJROOTS'].append(a)
650 conf['TARGET_SPEC'] = a
652 if a not in conf['EXCLUDES']:
653 conf['EXCLUDES'].append(a)
657 conf['debug'] = debug
659 # get any var=val assignments
664 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
680 debug_out = getv(conf, 'debug_out', sys.stderr)
683 print("config:", file=debug_out)
684 print("psyco=", have_psyco, file=debug_out)
685 for k,v in list(conf.items()):
686 print("%s=%s" % (k,v), file=debug_out)
690 if a.endswith('.meta'):
691 if not os.path.exists(a):
694 elif a.startswith('@'):
695 # there can actually multiple files per line
696 for line in open(a[1:]):
697 for f in line.strip().split():
698 if not os.path.exists(f):
705 print(m.src_dirdeps('\nsrc:'))
707 dpdeps = getv(conf, 'DPDEPS')
709 m.file_depends(open(dpdeps, 'wb'))
713 if __name__ == '__main__':
717 # yes, this goes to stdout
718 print("ERROR: ", sys.exc_info()[1])