3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.26 2017/05/09 04:04:16 sjg Exp $
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
76 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78 Return an absolute path, resolving via cwd or last_dir if needed.
80 if path.endswith('/.'):
82 if len(path) > 0 and path[0] == '/':
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
93 p = '/'.join([d,path])
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
98 print("nope", file=debug_out)
102 print("found:", p, file=debug_out)
107 """cleanup path without using realpath(3)"""
108 if path.startswith('/'):
115 if not d or d == '.':
122 return r + '/'.join(p)
124 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
126 Return an absolute path, resolving via cwd or last_dir if needed.
127 this gets called a lot, so we try to avoid calling realpath.
129 rpath = resolve(path, cwd, last_dir, debug, debug_out)
132 if (path.find('/') < 0 or
133 path.find('./') > 0 or
134 path.endswith('/..')):
135 path = cleanpath(path)
138 def sort_unique(list, cmp=None, key=None, reverse=False):
139 list.sort(cmp, key, reverse)
150 return ['/' + x + '/',
156 """class to parse meta files generated by bmake."""
169 def __init__(self, name, conf={}):
170 """if name is set we will parse it now.
171 conf can have the follwing keys:
173 SRCTOPS list of tops of the src tree(s).
175 CURDIR the src directory 'bmake' was run from.
177 RELDIR the relative path from SRCTOP to CURDIR
179 MACHINE the machine we built for.
180 set to 'none' if we are not cross-building.
181 More specifically if machine cannot be deduced from objdirs.
184 Sometimes MACHINE isn't enough.
187 when we build for the pseudo machine 'host'
188 the object tree uses HOST_TARGET rather than MACHINE.
190 OBJROOTS a list of the common prefix for all obj dirs it might
193 DPDEPS names an optional file to which per file dependencies
195 For example if 'some/path/foo.h' is read from SRCTOP
196 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
197 This can allow 'bmake' to learn all the dirs within
198 the tree that depend on 'foo.h'
201 A list of paths to ignore.
202 ccache(1) can otherwise be trouble.
204 debug desired debug level
206 debug_out open file to send debug output to (sys.stderr)
211 self.debug = getv(conf, 'debug', 0)
212 self.debug_out = getv(conf, 'debug_out', sys.stderr)
214 self.machine = getv(conf, 'MACHINE', '')
215 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
216 self.target_spec = getv(conf, 'TARGET_SPEC', '')
217 self.curdir = getv(conf, 'CURDIR')
218 self.reldir = getv(conf, 'RELDIR')
219 self.dpdeps = getv(conf, 'DPDEPS')
223 # some of the steps below we want to do only once
225 self.host_target = getv(conf, 'HOST_TARGET')
226 for srctop in getv(conf, 'SRCTOPS', []):
227 if srctop[-1] != '/':
229 if not srctop in self.srctops:
230 self.srctops.append(srctop)
231 _srctop = os.path.realpath(srctop)
232 if _srctop[-1] != '/':
234 if not _srctop in self.srctops:
235 self.srctops.append(_srctop)
237 trim_list = add_trims(self.machine)
238 if self.machine == 'host':
239 trim_list += add_trims(self.host_target)
241 trim_list += add_trims(self.target_spec)
243 for objroot in getv(conf, 'OBJROOTS', []):
245 if objroot.endswith(e):
246 # this is not what we want - fix it
247 objroot = objroot[0:-len(e)]
249 if objroot[-1] != '/':
251 if not objroot in self.objroots:
252 self.objroots.append(objroot)
253 _objroot = os.path.realpath(objroot)
254 if objroot[-1] == '/':
256 if not _objroot in self.objroots:
257 self.objroots.append(_objroot)
259 # we want the longest match
260 self.srctops.sort(reverse=True)
261 self.objroots.sort(reverse=True)
263 self.excludes = getv(conf, 'EXCLUDES', [])
266 print("host_target=", self.host_target, file=self.debug_out)
267 print("srctops=", self.srctops, file=self.debug_out)
268 print("objroots=", self.objroots, file=self.debug_out)
269 print("excludes=", self.excludes, file=self.debug_out)
271 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
273 if self.dpdeps and not self.reldir:
275 print("need reldir:", end=' ', file=self.debug_out)
277 srctop = self.find_top(self.curdir, self.srctops)
279 self.reldir = self.curdir.replace(srctop,'')
281 print(self.reldir, file=self.debug_out)
283 self.dpdeps = None # we cannot do it?
285 self.cwd = os.getcwd() # make sure this is initialized
286 self.last_dir = self.cwd
292 """reset state if we are being passed meta files from multiple directories."""
298 def dirdeps(self, sep='\n'):
300 return sep.strip() + sep.join(self.obj_deps)
302 def src_dirdeps(self, sep='\n'):
303 """return SRC_DIRDEPS"""
304 return sep.strip() + sep.join(self.src_deps)
306 def file_depends(self, out=None):
307 """Append DPDEPS_${file} += ${RELDIR}
308 for each file we saw, to the output file."""
311 for f in sort_unique(self.file_deps):
312 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
313 # these entries provide for reverse DIRDEPS lookup
314 for f in self.obj_deps:
315 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
317 def seenit(self, dir):
318 """rememer that we have seen dir."""
321 def add(self, list, data, clue=''):
322 """add data to list if it isn't already there."""
326 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
328 def find_top(self, path, list):
329 """the logical tree may be split across multiple trees"""
331 if path.startswith(top):
333 print("found in", top, file=self.debug_out)
337 def find_obj(self, objroot, dir, path, input):
338 """return path within objroot, taking care of .dirdep files"""
340 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
341 if not ddep and os.path.exists(ddepf):
342 ddep = open(ddepf, 'r').readline().strip('# \n')
344 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
345 if ddep.endswith(self.machine):
346 ddep = ddep[0:-(1+len(self.machine))]
347 elif self.target_spec and ddep.endswith(self.target_spec):
348 ddep = ddep[0:-(1+len(self.target_spec))]
351 # no .dirdeps, so remember that we've seen the raw input
354 if self.machine == 'none':
355 if dir.startswith(objroot):
356 return dir.replace(objroot,'')
358 m = self.dirdep_re.match(dir.replace(objroot,''))
361 dmachine = m.group(1)
362 if dmachine != self.machine:
363 if not (self.machine == 'host' and
364 dmachine == self.host_target):
366 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
367 ddep += '.' + dmachine
371 def try_parse(self, name=None, file=None):
372 """give file and line number causing exception"""
374 self.parse(name, file)
377 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
380 def parse(self, name=None, file=None):
381 """A meta file looks like:
383 # Meta data file "path"
388 -- filemon acquired metadata --
398 L "pid" "src" "target"
403 We go to some effort to avoid processing a dependency more than once.
404 Of the above record types only C,E,F,L,R,V and W are of interest.
407 version = 0 # unknown
412 cwd = self.last_dir = self.cwd
414 f = open(self.name, 'r')
422 self.seenit(self.curdir) # we ignore this
424 interesting = 'CEFLRV'
427 # ignore anything we don't care about
428 if not line[0] in interesting:
431 print("input:", line, end=' ', file=self.debug_out)
440 # we cannot ignore 'W' records
441 # as they may be 'rw'
445 self.cwd = cwd = self.last_dir = w[1]
446 self.seenit(cwd) # ignore this
448 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
454 pid_last_dir[last_pid] = self.last_dir
455 cwd = getv(pid_cwd, pid, self.cwd)
456 self.last_dir = getv(pid_last_dir, pid, self.cwd)
463 pid_last_dir[npid] = cwd
467 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
468 if cwd.endswith('/.'):
470 self.last_dir = pid_last_dir[pid] = cwd
473 print("cwd=", cwd, file=self.debug_out)
476 if w[2] in self.seen:
478 print("seen:", w[2], file=self.debug_out)
482 # these are special, tread src as read and
484 self.parse_path(w[1].strip("'"), cwd, 'R', w)
485 self.parse_path(w[2].strip("'"), cwd, 'W', w)
489 self.parse_path(path, cwd, w[0], w)
494 def is_src(self, base, dir, rdir):
495 """is base in srctop"""
496 for dir in [dir,rdir]:
499 path = '/'.join([dir,base])
500 srctop = self.find_top(path, self.srctops)
503 self.add(self.file_deps, path.replace(srctop,''), 'file')
504 self.add(self.src_deps, dir.replace(srctop,''), 'src')
509 def parse_path(self, path, cwd, op=None, w=[]):
510 """look at a path for the op specified"""
515 # we are never interested in .dirdep files as dependencies
516 if path.endswith('.dirdep'):
518 for p in self.excludes:
519 if p and path.startswith(p):
521 print("exclude:", p, path, file=self.debug_out)
523 # we don't want to resolve the last component if it is
525 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
528 dir,base = os.path.split(path)
531 print("seen:", dir, file=self.debug_out)
533 # we can have a path in an objdir which is a link
534 # to the src dir, we may need to add dependencies for each
536 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
537 rdir = os.path.realpath(dir)
540 # now put path back together
541 path = '/'.join([dir,base])
543 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
545 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
547 print("skipping:", path, file=self.debug_out)
549 if os.path.isdir(path):
551 self.last_dir = path;
553 print("ldir=", self.last_dir, file=self.debug_out)
557 # finally, we get down to it
558 if dir == self.cwd or dir == self.curdir:
560 if self.is_src(base, dir, rdir):
566 for dir in [dir,rdir]:
569 objroot = self.find_top(dir, self.objroots)
573 ddep = self.find_obj(objroot, dir, path, w[2])
575 self.add(self.obj_deps, ddep, 'obj')
576 if self.dpdeps and objroot.endswith('/stage/'):
577 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
578 self.add(self.file_deps, sp, 'file')
580 # don't waste time looking again
585 def main(argv, klass=MetaFile, xopts='', xoptf=None):
586 """Simple driver for class MetaFile.
589 script [options] [key=value ...] "meta" ...
591 Options and key=value pairs contribute to the
592 dictionary passed to MetaFile.
595 add "SRCTOP" to the "SRCTOPS" list.
600 add "OBJROOT" to the "OBJROOTS" list.
615 # import Psyco if we can
616 # it can speed things up quite a bit
632 machine = os.environ['MACHINE']
634 conf['MACHINE'] = machine
635 machine_arch = os.environ['MACHINE_ARCH']
637 conf['MACHINE_ARCH'] = machine_arch
638 srctop = os.environ['SB_SRC']
640 conf['SRCTOPS'].append(srctop)
641 objroot = os.environ['SB_OBJROOT']
643 conf['OBJROOTS'].append(objroot)
650 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
653 conf['MACHINE_ARCH'] = a
659 conf['HOST_TARGET'] = a
661 if a not in conf['SRCTOPS']:
662 conf['SRCTOPS'].append(a)
666 if a not in conf['OBJROOTS']:
667 conf['OBJROOTS'].append(a)
675 conf['TARGET_SPEC'] = a
677 if a not in conf['EXCLUDES']:
678 conf['EXCLUDES'].append(a)
682 conf['debug'] = debug
684 # get any var=val assignments
689 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
705 debug_out = getv(conf, 'debug_out', sys.stderr)
708 print("config:", file=debug_out)
709 print("psyco=", have_psyco, file=debug_out)
710 for k,v in list(conf.items()):
711 print("%s=%s" % (k,v), file=debug_out)
715 if a.endswith('.meta'):
716 if not os.path.exists(a):
719 elif a.startswith('@'):
720 # there can actually multiple files per line
721 for line in open(a[1:]):
722 for f in line.strip().split():
723 if not os.path.exists(f):
730 print(m.src_dirdeps('\nsrc:'))
732 dpdeps = getv(conf, 'DPDEPS')
734 m.file_depends(open(dpdeps, 'wb'))
738 if __name__ == '__main__':
742 # yes, this goes to stdout
743 print("ERROR: ", sys.exc_info()[1])