3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
40 $Id: meta2deps.py,v 1.18 2015/04/03 18:23:25 sjg Exp $
42 Copyright (c) 2011-2013, Juniper Networks, Inc.
45 Redistribution and use in source and binary forms, with or without
46 modification, are permitted provided that the following conditions
48 1. Redistributions of source code must retain the above copyright
49 notice, this list of conditions and the following disclaimer.
50 2. Redistributions in binary form must reproduce the above copyright
51 notice, this list of conditions and the following disclaimer in the
52 documentation and/or other materials provided with the distribution.
54 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
55 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
56 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
57 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
58 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
60 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
61 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
62 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
63 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
64 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
70 def getv(dict, key, d=None):
71 """Lookup key in dict and return value or the supplied default."""
76 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
78 Return an absolute path, resolving via cwd or last_dir if needed.
80 if path.endswith('/.'):
82 if len(path) > 0 and path[0] == '/':
86 if path.startswith('./'):
90 for d in [last_dir, cwd]:
93 p = '/'.join([d,path])
95 print("looking for:", p, end=' ', file=debug_out)
96 if not os.path.exists(p):
98 print("nope", file=debug_out)
102 print("found:", p, file=debug_out)
106 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
108 Return an absolute path, resolving via cwd or last_dir if needed.
109 this gets called a lot, so we try to avoid calling realpath
110 until we know we have something.
112 rpath = resolve(path, cwd, last_dir, debug, debug_out)
115 if (path.find('/') < 0 or
116 path.find('./') > 0 or
117 path.endswith('/..') or
118 os.path.islink(path)):
119 return os.path.realpath(path)
122 def sort_unique(list, cmp=None, key=None, reverse=False):
123 list.sort(cmp, key, reverse)
133 return ['/' + x + '/',
139 """class to parse meta files generated by bmake."""
152 def __init__(self, name, conf={}):
153 """if name is set we will parse it now.
154 conf can have the follwing keys:
156 SRCTOPS list of tops of the src tree(s).
158 CURDIR the src directory 'bmake' was run from.
160 RELDIR the relative path from SRCTOP to CURDIR
162 MACHINE the machine we built for.
163 set to 'none' if we are not cross-building.
164 More specifically if machine cannot be deduced from objdirs.
167 Sometimes MACHINE isn't enough.
170 when we build for the pseudo machine 'host'
171 the object tree uses HOST_TARGET rather than MACHINE.
173 OBJROOTS a list of the common prefix for all obj dirs it might
176 DPDEPS names an optional file to which per file dependencies
178 For example if 'some/path/foo.h' is read from SRCTOP
179 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
180 This can allow 'bmake' to learn all the dirs within
181 the tree that depend on 'foo.h'
184 A list of paths to ignore.
185 ccache(1) can otherwise be trouble.
187 debug desired debug level
189 debug_out open file to send debug output to (sys.stderr)
194 self.debug = getv(conf, 'debug', 0)
195 self.debug_out = getv(conf, 'debug_out', sys.stderr)
197 self.machine = getv(conf, 'MACHINE', '')
198 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
199 self.target_spec = getv(conf, 'TARGET_SPEC', '')
200 self.curdir = getv(conf, 'CURDIR')
201 self.reldir = getv(conf, 'RELDIR')
202 self.dpdeps = getv(conf, 'DPDEPS')
206 # some of the steps below we want to do only once
208 self.host_target = getv(conf, 'HOST_TARGET')
209 for srctop in getv(conf, 'SRCTOPS', []):
210 if srctop[-1] != '/':
212 if not srctop in self.srctops:
213 self.srctops.append(srctop)
214 _srctop = os.path.realpath(srctop)
215 if _srctop[-1] != '/':
217 if not _srctop in self.srctops:
218 self.srctops.append(_srctop)
220 trim_list = add_trims(self.machine)
221 if self.machine == 'host':
222 trim_list += add_trims(self.host_target)
224 trim_list += add_trims(self.target_spec)
226 for objroot in getv(conf, 'OBJROOTS', []):
228 if objroot.endswith(e):
229 # this is not what we want - fix it
230 objroot = objroot[0:-len(e)]
233 if not objroot in self.objroots:
234 self.objroots.append(objroot)
235 _objroot = os.path.realpath(objroot)
236 if objroot[-1] == '/':
238 if not _objroot in self.objroots:
239 self.objroots.append(_objroot)
241 # we want the longest match
242 self.srctops.sort(reverse=True)
243 self.objroots.sort(reverse=True)
245 self.excludes = getv(conf, 'EXCLUDES', [])
248 print("host_target=", self.host_target, file=self.debug_out)
249 print("srctops=", self.srctops, file=self.debug_out)
250 print("objroots=", self.objroots, file=self.debug_out)
251 print("excludes=", self.excludes, file=self.debug_out)
253 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
255 if self.dpdeps and not self.reldir:
257 print("need reldir:", end=' ', file=self.debug_out)
259 srctop = self.find_top(self.curdir, self.srctops)
261 self.reldir = self.curdir.replace(srctop,'')
263 print(self.reldir, file=self.debug_out)
265 self.dpdeps = None # we cannot do it?
267 self.cwd = os.getcwd() # make sure this is initialized
268 self.last_dir = self.cwd
274 """reset state if we are being passed meta files from multiple directories."""
280 def dirdeps(self, sep='\n'):
282 return sep.strip() + sep.join(self.obj_deps)
284 def src_dirdeps(self, sep='\n'):
285 """return SRC_DIRDEPS"""
286 return sep.strip() + sep.join(self.src_deps)
288 def file_depends(self, out=None):
289 """Append DPDEPS_${file} += ${RELDIR}
290 for each file we saw, to the output file."""
293 for f in sort_unique(self.file_deps):
294 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
296 def seenit(self, dir):
297 """rememer that we have seen dir."""
300 def add(self, list, data, clue=''):
301 """add data to list if it isn't already there."""
305 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
307 def find_top(self, path, list):
308 """the logical tree may be split across multiple trees"""
310 if path.startswith(top):
312 print("found in", top, file=self.debug_out)
316 def find_obj(self, objroot, dir, path, input):
317 """return path within objroot, taking care of .dirdep files"""
319 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
320 if not ddep and os.path.exists(ddepf):
321 ddep = open(ddepf, 'r').readline().strip('# \n')
323 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
324 if ddep.endswith(self.machine):
325 ddep = ddep[0:-(1+len(self.machine))]
326 elif self.target_spec and ddep.endswith(self.target_spec):
327 ddep = ddep[0:-(1+len(self.target_spec))]
330 # no .dirdeps, so remember that we've seen the raw input
333 if self.machine == 'none':
334 if dir.startswith(objroot):
335 return dir.replace(objroot,'')
337 m = self.dirdep_re.match(dir.replace(objroot,''))
340 dmachine = m.group(1)
341 if dmachine != self.machine:
342 if not (self.machine == 'host' and
343 dmachine == self.host_target):
345 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
346 ddep += '.' + dmachine
350 def try_parse(self, name=None, file=None):
351 """give file and line number causing exception"""
353 self.parse(name, file)
356 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
359 def parse(self, name=None, file=None):
360 """A meta file looks like:
362 # Meta data file "path"
367 -- filemon acquired metadata --
377 L "pid" "src" "target"
382 We go to some effort to avoid processing a dependency more than once.
383 Of the above record types only C,E,F,L,R,V and W are of interest.
386 version = 0 # unknown
391 cwd = self.last_dir = self.cwd
393 f = open(self.name, 'r')
401 self.seenit(self.curdir) # we ignore this
403 interesting = 'CEFLRV'
406 # ignore anything we don't care about
407 if not line[0] in interesting:
410 print("input:", line, end=' ', file=self.debug_out)
419 # we cannot ignore 'W' records
420 # as they may be 'rw'
424 self.cwd = cwd = self.last_dir = w[1]
425 self.seenit(cwd) # ignore this
427 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
433 pid_cwd[last_pid] = cwd
434 pid_last_dir[last_pid] = self.last_dir
435 cwd = getv(pid_cwd, pid, self.cwd)
436 self.last_dir = getv(pid_last_dir, pid, self.cwd)
443 pid_last_dir[npid] = cwd
447 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
448 if cwd.endswith('/.'):
452 print("cwd=", cwd, file=self.debug_out)
455 if w[2] in self.seen:
457 print("seen:", w[2], file=self.debug_out)
461 # these are special, tread src as read and
463 self.parse_path(w[1].strip("'"), cwd, 'R', w)
464 self.parse_path(w[2].strip("'"), cwd, 'W', w)
468 self.parse_path(path, cwd, w[0], w)
473 def parse_path(self, path, cwd, op=None, w=[]):
474 """look at a path for the op specified"""
479 # we are never interested in .dirdep files as dependencies
480 if path.endswith('.dirdep'):
482 for p in self.excludes:
483 if p and path.startswith(p):
485 print >> self.debug_out, "exclude:", p, path
487 # we don't want to resolve the last component if it is
489 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
492 dir,base = os.path.split(path)
495 print("seen:", dir, file=self.debug_out)
497 # we can have a path in an objdir which is a link
498 # to the src dir, we may need to add dependencies for each
500 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
501 if rdir == dir or rdir.find('./') > 0:
503 # now put path back together
504 path = '/'.join([dir,base])
506 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
508 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
510 print("skipping:", path, file=self.debug_out)
512 if os.path.isdir(path):
514 self.last_dir = path;
516 print("ldir=", self.last_dir, file=self.debug_out)
520 # finally, we get down to it
521 if dir == self.cwd or dir == self.curdir:
523 srctop = self.find_top(path, self.srctops)
526 self.add(self.file_deps, path.replace(srctop,''), 'file')
527 self.add(self.src_deps, dir.replace(srctop,''), 'src')
530 if rdir and not rdir.startswith(srctop):
531 dir = rdir # for below
537 for dir in [dir,rdir]:
540 objroot = self.find_top(dir, self.objroots)
544 ddep = self.find_obj(objroot, dir, path, w[2])
546 self.add(self.obj_deps, ddep, 'obj')
548 # don't waste time looking again
553 def main(argv, klass=MetaFile, xopts='', xoptf=None):
554 """Simple driver for class MetaFile.
557 script [options] [key=value ...] "meta" ...
559 Options and key=value pairs contribute to the
560 dictionary passed to MetaFile.
563 add "SRCTOP" to the "SRCTOPS" list.
568 add "OBJROOT" to the "OBJROOTS" list.
583 # import Psyco if we can
584 # it can speed things up quite a bit
600 machine = os.environ['MACHINE']
602 conf['MACHINE'] = machine
603 machine_arch = os.environ['MACHINE_ARCH']
605 conf['MACHINE_ARCH'] = machine_arch
606 srctop = os.environ['SB_SRC']
608 conf['SRCTOPS'].append(srctop)
609 objroot = os.environ['SB_OBJROOT']
611 conf['OBJROOTS'].append(objroot)
618 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
621 conf['MACHINE_ARCH'] = a
627 conf['HOST_TARGET'] = a
629 if a not in conf['SRCTOPS']:
630 conf['SRCTOPS'].append(a)
634 if a not in conf['OBJROOTS']:
635 conf['OBJROOTS'].append(a)
643 conf['TARGET_SPEC'] = a
645 if a not in conf['EXCLUDES']:
646 conf['EXCLUDES'].append(a)
650 conf['debug'] = debug
652 # get any var=val assignments
657 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
673 debug_out = getv(conf, 'debug_out', sys.stderr)
676 print("config:", file=debug_out)
677 print("psyco=", have_psyco, file=debug_out)
678 for k,v in list(conf.items()):
679 print("%s=%s" % (k,v), file=debug_out)
683 if a.endswith('.meta'):
684 if not os.path.exists(a):
687 elif a.startswith('@'):
688 # there can actually multiple files per line
689 for line in open(a[1:]):
690 for f in line.strip().split():
691 if not os.path.exists(f):
698 print(m.src_dirdeps('\nsrc:'))
700 dpdeps = getv(conf, 'DPDEPS')
702 m.file_depends(open(dpdeps, 'wb'))
706 if __name__ == '__main__':
710 # yes, this goes to stdout
711 print("ERROR: ", sys.exc_info()[1])