4 from __future__ import print_function
7 This script parses each "meta" file and extracts the
8 information needed to deduce build and src dependencies.
10 It works much the same as the original shell script, but is
11 *much* more efficient.
13 The parsing work is handled by the class MetaFile.
14 We only pay attention to a subset of the information in the
15 "meta" files. Specifically:
17 'CWD' to initialize our notion.
19 'C' to track chdir(2) on a per process basis
21 'R' files read are what we really care about.
22 directories read, provide a clue to resolving
23 subsequent relative paths. That is if we cannot find
24 them relative to 'cwd', we check relative to the last
27 'W' files opened for write or read-write,
28 for filemon V3 and earlier.
34 'V' the filemon version, this record is used as a clue
35 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.18 2015/04/03 18:23:25 sjg Exp $
43 Copyright (c) 2011-2013, Juniper Networks, Inc.
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions
49 1. Redistributions of source code must retain the above copyright
50 notice, this list of conditions and the following disclaimer.
51 2. Redistributions in binary form must reproduce the above copyright
52 notice, this list of conditions and the following disclaimer in the
53 documentation and/or other materials provided with the distribution.
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 def getv(dict, key, d=None):
72 """Lookup key in dict and return value or the supplied default."""
77 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79 Return an absolute path, resolving via cwd or last_dir if needed.
81 if path.endswith('/.'):
83 if len(path) > 0 and path[0] == '/':
87 if path.startswith('./'):
91 for d in [last_dir, cwd]:
94 p = '/'.join([d,path])
96 print("looking for:", p, end=' ', file=debug_out)
97 if not os.path.exists(p):
99 print("nope", file=debug_out)
103 print("found:", p, file=debug_out)
107 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
109 Return an absolute path, resolving via cwd or last_dir if needed.
110 this gets called a lot, so we try to avoid calling realpath
111 until we know we have something.
113 rpath = resolve(path, cwd, last_dir, debug, debug_out)
116 if (path.find('/') < 0 or
117 path.find('./') > 0 or
118 path.endswith('/..') or
119 os.path.islink(path)):
120 return os.path.realpath(path)
123 def sort_unique(list, cmp=None, key=None, reverse=False):
124 list.sort(cmp, key, reverse)
134 return ['/' + x + '/',
140 """class to parse meta files generated by bmake."""
153 def __init__(self, name, conf={}):
154 """if name is set we will parse it now.
155 conf can have the follwing keys:
157 SRCTOPS list of tops of the src tree(s).
159 CURDIR the src directory 'bmake' was run from.
161 RELDIR the relative path from SRCTOP to CURDIR
163 MACHINE the machine we built for.
164 set to 'none' if we are not cross-building.
165 More specifically if machine cannot be deduced from objdirs.
168 Sometimes MACHINE isn't enough.
171 when we build for the pseudo machine 'host'
172 the object tree uses HOST_TARGET rather than MACHINE.
174 OBJROOTS a list of the common prefix for all obj dirs it might
177 DPDEPS names an optional file to which per file dependencies
179 For example if 'some/path/foo.h' is read from SRCTOP
180 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
181 This can allow 'bmake' to learn all the dirs within
182 the tree that depend on 'foo.h'
185 A list of paths to ignore.
186 ccache(1) can otherwise be trouble.
188 debug desired debug level
190 debug_out open file to send debug output to (sys.stderr)
195 self.debug = getv(conf, 'debug', 0)
196 self.debug_out = getv(conf, 'debug_out', sys.stderr)
198 self.machine = getv(conf, 'MACHINE', '')
199 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
200 self.target_spec = getv(conf, 'TARGET_SPEC', '')
201 self.curdir = getv(conf, 'CURDIR')
202 self.reldir = getv(conf, 'RELDIR')
203 self.dpdeps = getv(conf, 'DPDEPS')
207 # some of the steps below we want to do only once
209 self.host_target = getv(conf, 'HOST_TARGET')
210 for srctop in getv(conf, 'SRCTOPS', []):
211 if srctop[-1] != '/':
213 if not srctop in self.srctops:
214 self.srctops.append(srctop)
215 _srctop = os.path.realpath(srctop)
216 if _srctop[-1] != '/':
218 if not _srctop in self.srctops:
219 self.srctops.append(_srctop)
221 trim_list = add_trims(self.machine)
222 if self.machine == 'host':
223 trim_list += add_trims(self.host_target)
225 trim_list += add_trims(self.target_spec)
227 for objroot in getv(conf, 'OBJROOTS', []):
229 if objroot.endswith(e):
230 # this is not what we want - fix it
231 objroot = objroot[0:-len(e)]
234 if not objroot in self.objroots:
235 self.objroots.append(objroot)
236 _objroot = os.path.realpath(objroot)
237 if objroot[-1] == '/':
239 if not _objroot in self.objroots:
240 self.objroots.append(_objroot)
242 # we want the longest match
243 self.srctops.sort(reverse=True)
244 self.objroots.sort(reverse=True)
246 self.excludes = getv(conf, 'EXCLUDES', [])
249 print("host_target=", self.host_target, file=self.debug_out)
250 print("srctops=", self.srctops, file=self.debug_out)
251 print("objroots=", self.objroots, file=self.debug_out)
252 print("excludes=", self.excludes, file=self.debug_out)
254 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
256 if self.dpdeps and not self.reldir:
258 print("need reldir:", end=' ', file=self.debug_out)
260 srctop = self.find_top(self.curdir, self.srctops)
262 self.reldir = self.curdir.replace(srctop,'')
264 print(self.reldir, file=self.debug_out)
266 self.dpdeps = None # we cannot do it?
268 self.cwd = os.getcwd() # make sure this is initialized
269 self.last_dir = self.cwd
275 """reset state if we are being passed meta files from multiple directories."""
281 def dirdeps(self, sep='\n'):
283 return sep.strip() + sep.join(self.obj_deps)
285 def src_dirdeps(self, sep='\n'):
286 """return SRC_DIRDEPS"""
287 return sep.strip() + sep.join(self.src_deps)
289 def file_depends(self, out=None):
290 """Append DPDEPS_${file} += ${RELDIR}
291 for each file we saw, to the output file."""
294 for f in sort_unique(self.file_deps):
295 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
297 def seenit(self, dir):
298 """rememer that we have seen dir."""
301 def add(self, list, data, clue=''):
302 """add data to list if it isn't already there."""
306 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
308 def find_top(self, path, list):
309 """the logical tree may be split across multiple trees"""
311 if path.startswith(top):
313 print("found in", top, file=self.debug_out)
317 def find_obj(self, objroot, dir, path, input):
318 """return path within objroot, taking care of .dirdep files"""
320 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
321 if not ddep and os.path.exists(ddepf):
322 ddep = open(ddepf, 'r').readline().strip('# \n')
324 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
325 if ddep.endswith(self.machine):
326 ddep = ddep[0:-(1+len(self.machine))]
327 elif self.target_spec and ddep.endswith(self.target_spec):
328 ddep = ddep[0:-(1+len(self.target_spec))]
331 # no .dirdeps, so remember that we've seen the raw input
334 if self.machine == 'none':
335 if dir.startswith(objroot):
336 return dir.replace(objroot,'')
338 m = self.dirdep_re.match(dir.replace(objroot,''))
341 dmachine = m.group(1)
342 if dmachine != self.machine:
343 if not (self.machine == 'host' and
344 dmachine == self.host_target):
346 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
347 ddep += '.' + dmachine
351 def try_parse(self, name=None, file=None):
352 """give file and line number causing exception"""
354 self.parse(name, file)
357 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
360 def parse(self, name=None, file=None):
361 """A meta file looks like:
363 # Meta data file "path"
368 -- filemon acquired metadata --
378 L "pid" "src" "target"
383 We go to some effort to avoid processing a dependency more than once.
384 Of the above record types only C,E,F,L,R,V and W are of interest.
387 version = 0 # unknown
392 cwd = self.last_dir = self.cwd
394 f = open(self.name, 'r')
402 self.seenit(self.curdir) # we ignore this
404 interesting = 'CEFLRV'
407 # ignore anything we don't care about
408 if not line[0] in interesting:
411 print("input:", line, end=' ', file=self.debug_out)
420 # we cannot ignore 'W' records
421 # as they may be 'rw'
425 self.cwd = cwd = self.last_dir = w[1]
426 self.seenit(cwd) # ignore this
428 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
434 pid_cwd[last_pid] = cwd
435 pid_last_dir[last_pid] = self.last_dir
436 cwd = getv(pid_cwd, pid, self.cwd)
437 self.last_dir = getv(pid_last_dir, pid, self.cwd)
444 pid_last_dir[npid] = cwd
448 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
449 if cwd.endswith('/.'):
453 print("cwd=", cwd, file=self.debug_out)
456 if w[2] in self.seen:
458 print("seen:", w[2], file=self.debug_out)
462 # these are special, tread src as read and
464 self.parse_path(w[1].strip("'"), cwd, 'R', w)
465 self.parse_path(w[2].strip("'"), cwd, 'W', w)
469 self.parse_path(path, cwd, w[0], w)
474 def parse_path(self, path, cwd, op=None, w=[]):
475 """look at a path for the op specified"""
480 # we are never interested in .dirdep files as dependencies
481 if path.endswith('.dirdep'):
483 for p in self.excludes:
484 if p and path.startswith(p):
486 print >> self.debug_out, "exclude:", p, path
488 # we don't want to resolve the last component if it is
490 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
493 dir,base = os.path.split(path)
496 print("seen:", dir, file=self.debug_out)
498 # we can have a path in an objdir which is a link
499 # to the src dir, we may need to add dependencies for each
501 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
502 if rdir == dir or rdir.find('./') > 0:
504 # now put path back together
505 path = '/'.join([dir,base])
507 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
509 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
511 print("skipping:", path, file=self.debug_out)
513 if os.path.isdir(path):
515 self.last_dir = path;
517 print("ldir=", self.last_dir, file=self.debug_out)
521 # finally, we get down to it
522 if dir == self.cwd or dir == self.curdir:
524 srctop = self.find_top(path, self.srctops)
527 self.add(self.file_deps, path.replace(srctop,''), 'file')
528 self.add(self.src_deps, dir.replace(srctop,''), 'src')
531 if rdir and not rdir.startswith(srctop):
532 dir = rdir # for below
538 for dir in [dir,rdir]:
541 objroot = self.find_top(dir, self.objroots)
545 ddep = self.find_obj(objroot, dir, path, w[2])
547 self.add(self.obj_deps, ddep, 'obj')
549 # don't waste time looking again
554 def main(argv, klass=MetaFile, xopts='', xoptf=None):
555 """Simple driver for class MetaFile.
558 script [options] [key=value ...] "meta" ...
560 Options and key=value pairs contribute to the
561 dictionary passed to MetaFile.
564 add "SRCTOP" to the "SRCTOPS" list.
569 add "OBJROOT" to the "OBJROOTS" list.
584 # import Psyco if we can
585 # it can speed things up quite a bit
601 machine = os.environ['MACHINE']
603 conf['MACHINE'] = machine
604 machine_arch = os.environ['MACHINE_ARCH']
606 conf['MACHINE_ARCH'] = machine_arch
607 srctop = os.environ['SB_SRC']
609 conf['SRCTOPS'].append(srctop)
610 objroot = os.environ['SB_OBJROOT']
612 conf['OBJROOTS'].append(objroot)
619 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
622 conf['MACHINE_ARCH'] = a
628 conf['HOST_TARGET'] = a
630 if a not in conf['SRCTOPS']:
631 conf['SRCTOPS'].append(a)
635 if a not in conf['OBJROOTS']:
636 conf['OBJROOTS'].append(a)
644 conf['TARGET_SPEC'] = a
646 if a not in conf['EXCLUDES']:
647 conf['EXCLUDES'].append(a)
651 conf['debug'] = debug
653 # get any var=val assignments
658 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
674 debug_out = getv(conf, 'debug_out', sys.stderr)
677 print("config:", file=debug_out)
678 print("psyco=", have_psyco, file=debug_out)
679 for k,v in list(conf.items()):
680 print("%s=%s" % (k,v), file=debug_out)
684 if a.endswith('.meta'):
685 if not os.path.exists(a):
688 elif a.startswith('@'):
689 # there can actually multiple files per line
690 for line in open(a[1:]):
691 for f in line.strip().split():
692 if not os.path.exists(f):
699 print(m.src_dirdeps('\nsrc:'))
701 dpdeps = getv(conf, 'DPDEPS')
703 m.file_depends(open(dpdeps, 'wb'))
707 if __name__ == '__main__':
711 # yes, this goes to stdout
712 print("ERROR: ", sys.exc_info()[1])