3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.27 2017/05/24 00:04:04 sjg Exp $
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
76 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78 Return an absolute path, resolving via cwd or last_dir if needed.
80 if path.endswith('/.'):
82 if len(path) > 0 and path[0] == '/':
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
99 p = '/'.join([d,path])
101 print("looking for:", p, end=' ', file=debug_out)
102 if not os.path.exists(p):
104 print("nope", file=debug_out)
108 print("found:", p, file=debug_out)
113 """cleanup path without using realpath(3)"""
114 if path.startswith('/'):
121 if not d or d == '.':
131 return r + '/'.join(p)
133 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
135 Return an absolute path, resolving via cwd or last_dir if needed.
136 this gets called a lot, so we try to avoid calling realpath.
138 rpath = resolve(path, cwd, last_dir, debug, debug_out)
141 if (path.find('/') < 0 or
142 path.find('./') > 0 or
143 path.endswith('/..')):
144 path = cleanpath(path)
147 def sort_unique(list, cmp=None, key=None, reverse=False):
148 list.sort(cmp, key, reverse)
159 return ['/' + x + '/',
165 """class to parse meta files generated by bmake."""
178 def __init__(self, name, conf={}):
179 """if name is set we will parse it now.
180 conf can have the follwing keys:
182 SRCTOPS list of tops of the src tree(s).
184 CURDIR the src directory 'bmake' was run from.
186 RELDIR the relative path from SRCTOP to CURDIR
188 MACHINE the machine we built for.
189 set to 'none' if we are not cross-building.
190 More specifically if machine cannot be deduced from objdirs.
193 Sometimes MACHINE isn't enough.
196 when we build for the pseudo machine 'host'
197 the object tree uses HOST_TARGET rather than MACHINE.
199 OBJROOTS a list of the common prefix for all obj dirs it might
202 DPDEPS names an optional file to which per file dependencies
204 For example if 'some/path/foo.h' is read from SRCTOP
205 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
206 This can allow 'bmake' to learn all the dirs within
207 the tree that depend on 'foo.h'
210 A list of paths to ignore.
211 ccache(1) can otherwise be trouble.
213 debug desired debug level
215 debug_out open file to send debug output to (sys.stderr)
220 self.debug = getv(conf, 'debug', 0)
221 self.debug_out = getv(conf, 'debug_out', sys.stderr)
223 self.machine = getv(conf, 'MACHINE', '')
224 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
225 self.target_spec = getv(conf, 'TARGET_SPEC', '')
226 self.curdir = getv(conf, 'CURDIR')
227 self.reldir = getv(conf, 'RELDIR')
228 self.dpdeps = getv(conf, 'DPDEPS')
232 # some of the steps below we want to do only once
234 self.host_target = getv(conf, 'HOST_TARGET')
235 for srctop in getv(conf, 'SRCTOPS', []):
236 if srctop[-1] != '/':
238 if not srctop in self.srctops:
239 self.srctops.append(srctop)
240 _srctop = os.path.realpath(srctop)
241 if _srctop[-1] != '/':
243 if not _srctop in self.srctops:
244 self.srctops.append(_srctop)
246 trim_list = add_trims(self.machine)
247 if self.machine == 'host':
248 trim_list += add_trims(self.host_target)
250 trim_list += add_trims(self.target_spec)
252 for objroot in getv(conf, 'OBJROOTS', []):
254 if objroot.endswith(e):
255 # this is not what we want - fix it
256 objroot = objroot[0:-len(e)]
258 if objroot[-1] != '/':
260 if not objroot in self.objroots:
261 self.objroots.append(objroot)
262 _objroot = os.path.realpath(objroot)
263 if objroot[-1] == '/':
265 if not _objroot in self.objroots:
266 self.objroots.append(_objroot)
268 # we want the longest match
269 self.srctops.sort(reverse=True)
270 self.objroots.sort(reverse=True)
272 self.excludes = getv(conf, 'EXCLUDES', [])
275 print("host_target=", self.host_target, file=self.debug_out)
276 print("srctops=", self.srctops, file=self.debug_out)
277 print("objroots=", self.objroots, file=self.debug_out)
278 print("excludes=", self.excludes, file=self.debug_out)
280 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
282 if self.dpdeps and not self.reldir:
284 print("need reldir:", end=' ', file=self.debug_out)
286 srctop = self.find_top(self.curdir, self.srctops)
288 self.reldir = self.curdir.replace(srctop,'')
290 print(self.reldir, file=self.debug_out)
292 self.dpdeps = None # we cannot do it?
294 self.cwd = os.getcwd() # make sure this is initialized
295 self.last_dir = self.cwd
301 """reset state if we are being passed meta files from multiple directories."""
307 def dirdeps(self, sep='\n'):
309 return sep.strip() + sep.join(self.obj_deps)
311 def src_dirdeps(self, sep='\n'):
312 """return SRC_DIRDEPS"""
313 return sep.strip() + sep.join(self.src_deps)
315 def file_depends(self, out=None):
316 """Append DPDEPS_${file} += ${RELDIR}
317 for each file we saw, to the output file."""
320 for f in sort_unique(self.file_deps):
321 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
322 # these entries provide for reverse DIRDEPS lookup
323 for f in self.obj_deps:
324 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
326 def seenit(self, dir):
327 """rememer that we have seen dir."""
330 def add(self, list, data, clue=''):
331 """add data to list if it isn't already there."""
335 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
337 def find_top(self, path, list):
338 """the logical tree may be split across multiple trees"""
340 if path.startswith(top):
342 print("found in", top, file=self.debug_out)
346 def find_obj(self, objroot, dir, path, input):
347 """return path within objroot, taking care of .dirdep files"""
349 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
350 if not ddep and os.path.exists(ddepf):
351 ddep = open(ddepf, 'r').readline().strip('# \n')
353 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
354 if ddep.endswith(self.machine):
355 ddep = ddep[0:-(1+len(self.machine))]
356 elif self.target_spec and ddep.endswith(self.target_spec):
357 ddep = ddep[0:-(1+len(self.target_spec))]
360 # no .dirdeps, so remember that we've seen the raw input
363 if self.machine == 'none':
364 if dir.startswith(objroot):
365 return dir.replace(objroot,'')
367 m = self.dirdep_re.match(dir.replace(objroot,''))
370 dmachine = m.group(1)
371 if dmachine != self.machine:
372 if not (self.machine == 'host' and
373 dmachine == self.host_target):
375 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
376 ddep += '.' + dmachine
380 def try_parse(self, name=None, file=None):
381 """give file and line number causing exception"""
383 self.parse(name, file)
386 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
389 def parse(self, name=None, file=None):
390 """A meta file looks like:
392 # Meta data file "path"
397 -- filemon acquired metadata --
407 L "pid" "src" "target"
412 We go to some effort to avoid processing a dependency more than once.
413 Of the above record types only C,E,F,L,R,V and W are of interest.
416 version = 0 # unknown
421 cwd = self.last_dir = self.cwd
423 f = open(self.name, 'r')
431 self.seenit(self.curdir) # we ignore this
433 interesting = 'CEFLRV'
436 # ignore anything we don't care about
437 if not line[0] in interesting:
440 print("input:", line, end=' ', file=self.debug_out)
449 # we cannot ignore 'W' records
450 # as they may be 'rw'
454 self.cwd = cwd = self.last_dir = w[1]
455 self.seenit(cwd) # ignore this
457 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
463 pid_last_dir[last_pid] = self.last_dir
464 cwd = getv(pid_cwd, pid, self.cwd)
465 self.last_dir = getv(pid_last_dir, pid, self.cwd)
472 pid_last_dir[npid] = cwd
476 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
477 if cwd.endswith('/.'):
479 self.last_dir = pid_last_dir[pid] = cwd
482 print("cwd=", cwd, file=self.debug_out)
485 if w[2] in self.seen:
487 print("seen:", w[2], file=self.debug_out)
491 # these are special, tread src as read and
493 self.parse_path(w[1].strip("'"), cwd, 'R', w)
494 self.parse_path(w[2].strip("'"), cwd, 'W', w)
498 self.parse_path(path, cwd, w[0], w)
503 def is_src(self, base, dir, rdir):
504 """is base in srctop"""
505 for dir in [dir,rdir]:
508 path = '/'.join([dir,base])
509 srctop = self.find_top(path, self.srctops)
512 self.add(self.file_deps, path.replace(srctop,''), 'file')
513 self.add(self.src_deps, dir.replace(srctop,''), 'src')
518 def parse_path(self, path, cwd, op=None, w=[]):
519 """look at a path for the op specified"""
524 # we are never interested in .dirdep files as dependencies
525 if path.endswith('.dirdep'):
527 for p in self.excludes:
528 if p and path.startswith(p):
530 print("exclude:", p, path, file=self.debug_out)
532 # we don't want to resolve the last component if it is
534 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
537 dir,base = os.path.split(path)
540 print("seen:", dir, file=self.debug_out)
542 # we can have a path in an objdir which is a link
543 # to the src dir, we may need to add dependencies for each
545 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
546 rdir = os.path.realpath(dir)
549 # now put path back together
550 path = '/'.join([dir,base])
552 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
554 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
556 print("skipping:", path, file=self.debug_out)
558 if os.path.isdir(path):
560 self.last_dir = path;
562 print("ldir=", self.last_dir, file=self.debug_out)
566 # finally, we get down to it
567 if dir == self.cwd or dir == self.curdir:
569 if self.is_src(base, dir, rdir):
575 for dir in [dir,rdir]:
578 objroot = self.find_top(dir, self.objroots)
582 ddep = self.find_obj(objroot, dir, path, w[2])
584 self.add(self.obj_deps, ddep, 'obj')
585 if self.dpdeps and objroot.endswith('/stage/'):
586 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
587 self.add(self.file_deps, sp, 'file')
589 # don't waste time looking again
594 def main(argv, klass=MetaFile, xopts='', xoptf=None):
595 """Simple driver for class MetaFile.
598 script [options] [key=value ...] "meta" ...
600 Options and key=value pairs contribute to the
601 dictionary passed to MetaFile.
604 add "SRCTOP" to the "SRCTOPS" list.
609 add "OBJROOT" to the "OBJROOTS" list.
624 # import Psyco if we can
625 # it can speed things up quite a bit
641 machine = os.environ['MACHINE']
643 conf['MACHINE'] = machine
644 machine_arch = os.environ['MACHINE_ARCH']
646 conf['MACHINE_ARCH'] = machine_arch
647 srctop = os.environ['SB_SRC']
649 conf['SRCTOPS'].append(srctop)
650 objroot = os.environ['SB_OBJROOT']
652 conf['OBJROOTS'].append(objroot)
659 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
662 conf['MACHINE_ARCH'] = a
668 conf['HOST_TARGET'] = a
670 if a not in conf['SRCTOPS']:
671 conf['SRCTOPS'].append(a)
675 if a not in conf['OBJROOTS']:
676 conf['OBJROOTS'].append(a)
684 conf['TARGET_SPEC'] = a
686 if a not in conf['EXCLUDES']:
687 conf['EXCLUDES'].append(a)
691 conf['debug'] = debug
693 # get any var=val assignments
698 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
714 debug_out = getv(conf, 'debug_out', sys.stderr)
717 print("config:", file=debug_out)
718 print("psyco=", have_psyco, file=debug_out)
719 for k,v in list(conf.items()):
720 print("%s=%s" % (k,v), file=debug_out)
724 if a.endswith('.meta'):
725 if not os.path.exists(a):
728 elif a.startswith('@'):
729 # there can actually multiple files per line
730 for line in open(a[1:]):
731 for f in line.strip().split():
732 if not os.path.exists(f):
739 print(m.src_dirdeps('\nsrc:'))
741 dpdeps = getv(conf, 'DPDEPS')
743 m.file_depends(open(dpdeps, 'wb'))
747 if __name__ == '__main__':
751 # yes, this goes to stdout
752 print("ERROR: ", sys.exc_info()[1])