3 from __future__ import print_function
6 This script parses each "meta" file and extracts the
7 information needed to deduce build and src dependencies.
9 It works much the same as the original shell script, but is
10 *much* more efficient.
12 The parsing work is handled by the class MetaFile.
13 We only pay attention to a subset of the information in the
14 "meta" files. Specifically:
16 'CWD' to initialize our notion.
18 'C' to track chdir(2) on a per process basis
20 'R' files read are what we really care about.
21 directories read, provide a clue to resolving
22 subsequent relative paths. That is if we cannot find
23 them relative to 'cwd', we check relative to the last
26 'W' files opened for write or read-write,
27 for filemon V3 and earlier.
33 'V' the filemon version, this record is used as a clue
34 that we have reached the interesting bit.
41 $Id: meta2deps.py,v 1.22 2016/12/12 19:07:42 sjg Exp $
43 Copyright (c) 2011-2013, Juniper Networks, Inc.
46 Redistribution and use in source and binary forms, with or without
47 modification, are permitted provided that the following conditions
49 1. Redistributions of source code must retain the above copyright
50 notice, this list of conditions and the following disclaimer.
51 2. Redistributions in binary form must reproduce the above copyright
52 notice, this list of conditions and the following disclaimer in the
53 documentation and/or other materials provided with the distribution.
55 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
56 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
57 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
58 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
59 OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
60 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
61 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
62 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
63 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
64 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
65 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
71 def getv(dict, key, d=None):
72 """Lookup key in dict and return value or the supplied default."""
77 def resolve(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
79 Return an absolute path, resolving via cwd or last_dir if needed.
81 if path.endswith('/.'):
83 if len(path) > 0 and path[0] == '/':
87 if path.startswith('./'):
91 for d in [last_dir, cwd]:
94 p = '/'.join([d,path])
96 print("looking for:", p, end=' ', file=debug_out)
97 if not os.path.exists(p):
99 print("nope", file=debug_out)
103 print("found:", p, file=debug_out)
107 def abspath(path, cwd, last_dir=None, debug=0, debug_out=sys.stderr):
109 Return an absolute path, resolving via cwd or last_dir if needed.
110 this gets called a lot, so we try to avoid calling realpath
111 until we know we have something.
113 rpath = resolve(path, cwd, last_dir, debug, debug_out)
116 if (path.find('/') < 0 or
117 path.find('./') > 0 or
118 path.endswith('/..') or
119 os.path.islink(path)):
120 return os.path.realpath(path)
123 def sort_unique(list, cmp=None, key=None, reverse=False):
124 list.sort(cmp, key, reverse)
134 return ['/' + x + '/',
140 """class to parse meta files generated by bmake."""
153 def __init__(self, name, conf={}):
154 """if name is set we will parse it now.
155 conf can have the follwing keys:
157 SRCTOPS list of tops of the src tree(s).
159 CURDIR the src directory 'bmake' was run from.
161 RELDIR the relative path from SRCTOP to CURDIR
163 MACHINE the machine we built for.
164 set to 'none' if we are not cross-building.
165 More specifically if machine cannot be deduced from objdirs.
168 Sometimes MACHINE isn't enough.
171 when we build for the pseudo machine 'host'
172 the object tree uses HOST_TARGET rather than MACHINE.
174 OBJROOTS a list of the common prefix for all obj dirs it might
177 DPDEPS names an optional file to which per file dependencies
179 For example if 'some/path/foo.h' is read from SRCTOP
180 then 'DPDEPS_some/path/foo.h +=' "RELDIR" is output.
181 This can allow 'bmake' to learn all the dirs within
182 the tree that depend on 'foo.h'
185 A list of paths to ignore.
186 ccache(1) can otherwise be trouble.
188 debug desired debug level
190 debug_out open file to send debug output to (sys.stderr)
195 self.debug = getv(conf, 'debug', 0)
196 self.debug_out = getv(conf, 'debug_out', sys.stderr)
198 self.machine = getv(conf, 'MACHINE', '')
199 self.machine_arch = getv(conf, 'MACHINE_ARCH', '')
200 self.target_spec = getv(conf, 'TARGET_SPEC', '')
201 self.curdir = getv(conf, 'CURDIR')
202 self.reldir = getv(conf, 'RELDIR')
203 self.dpdeps = getv(conf, 'DPDEPS')
207 # some of the steps below we want to do only once
209 self.host_target = getv(conf, 'HOST_TARGET')
210 for srctop in getv(conf, 'SRCTOPS', []):
211 if srctop[-1] != '/':
213 if not srctop in self.srctops:
214 self.srctops.append(srctop)
215 _srctop = os.path.realpath(srctop)
216 if _srctop[-1] != '/':
218 if not _srctop in self.srctops:
219 self.srctops.append(_srctop)
221 trim_list = add_trims(self.machine)
222 if self.machine == 'host':
223 trim_list += add_trims(self.host_target)
225 trim_list += add_trims(self.target_spec)
227 for objroot in getv(conf, 'OBJROOTS', []):
229 if objroot.endswith(e):
230 # this is not what we want - fix it
231 objroot = objroot[0:-len(e)]
233 if objroot[-1] != '/':
235 if not objroot in self.objroots:
236 self.objroots.append(objroot)
237 _objroot = os.path.realpath(objroot)
238 if objroot[-1] == '/':
240 if not _objroot in self.objroots:
241 self.objroots.append(_objroot)
243 # we want the longest match
244 self.srctops.sort(reverse=True)
245 self.objroots.sort(reverse=True)
247 self.excludes = getv(conf, 'EXCLUDES', [])
250 print("host_target=", self.host_target, file=self.debug_out)
251 print("srctops=", self.srctops, file=self.debug_out)
252 print("objroots=", self.objroots, file=self.debug_out)
253 print("excludes=", self.excludes, file=self.debug_out)
255 self.dirdep_re = re.compile(r'([^/]+)/(.+)')
257 if self.dpdeps and not self.reldir:
259 print("need reldir:", end=' ', file=self.debug_out)
261 srctop = self.find_top(self.curdir, self.srctops)
263 self.reldir = self.curdir.replace(srctop,'')
265 print(self.reldir, file=self.debug_out)
267 self.dpdeps = None # we cannot do it?
269 self.cwd = os.getcwd() # make sure this is initialized
270 self.last_dir = self.cwd
276 """reset state if we are being passed meta files from multiple directories."""
282 def dirdeps(self, sep='\n'):
284 return sep.strip() + sep.join(self.obj_deps)
286 def src_dirdeps(self, sep='\n'):
287 """return SRC_DIRDEPS"""
288 return sep.strip() + sep.join(self.src_deps)
290 def file_depends(self, out=None):
291 """Append DPDEPS_${file} += ${RELDIR}
292 for each file we saw, to the output file."""
295 for f in sort_unique(self.file_deps):
296 print('DPDEPS_%s += %s' % (f, self.reldir), file=out)
297 # these entries provide for reverse DIRDEPS lookup
298 for f in self.obj_deps:
299 print('DEPDIRS_%s += %s' % (f, self.reldir), file=out)
301 def seenit(self, dir):
302 """rememer that we have seen dir."""
305 def add(self, list, data, clue=''):
306 """add data to list if it isn't already there."""
310 print("%s: %sAdd: %s" % (self.name, clue, data), file=self.debug_out)
312 def find_top(self, path, list):
313 """the logical tree may be split across multiple trees"""
315 if path.startswith(top):
317 print("found in", top, file=self.debug_out)
321 def find_obj(self, objroot, dir, path, input):
322 """return path within objroot, taking care of .dirdep files"""
324 for ddepf in [path + '.dirdep', dir + '/.dirdep']:
325 if not ddep and os.path.exists(ddepf):
326 ddep = open(ddepf, 'r').readline().strip('# \n')
328 print("found %s: %s\n" % (ddepf, ddep), file=self.debug_out)
329 if ddep.endswith(self.machine):
330 ddep = ddep[0:-(1+len(self.machine))]
331 elif self.target_spec and ddep.endswith(self.target_spec):
332 ddep = ddep[0:-(1+len(self.target_spec))]
335 # no .dirdeps, so remember that we've seen the raw input
338 if self.machine == 'none':
339 if dir.startswith(objroot):
340 return dir.replace(objroot,'')
342 m = self.dirdep_re.match(dir.replace(objroot,''))
345 dmachine = m.group(1)
346 if dmachine != self.machine:
347 if not (self.machine == 'host' and
348 dmachine == self.host_target):
350 print("adding .%s to %s" % (dmachine, ddep), file=self.debug_out)
351 ddep += '.' + dmachine
355 def try_parse(self, name=None, file=None):
356 """give file and line number causing exception"""
358 self.parse(name, file)
361 print('{}:{}: '.format(self.name, self.line), end=' ', file=sys.stderr)
364 def parse(self, name=None, file=None):
365 """A meta file looks like:
367 # Meta data file "path"
372 -- filemon acquired metadata --
382 L "pid" "src" "target"
387 We go to some effort to avoid processing a dependency more than once.
388 Of the above record types only C,E,F,L,R,V and W are of interest.
391 version = 0 # unknown
396 cwd = self.last_dir = self.cwd
398 f = open(self.name, 'r')
406 self.seenit(self.curdir) # we ignore this
408 interesting = 'CEFLRV'
411 # ignore anything we don't care about
412 if not line[0] in interesting:
415 print("input:", line, end=' ', file=self.debug_out)
424 # we cannot ignore 'W' records
425 # as they may be 'rw'
429 self.cwd = cwd = self.last_dir = w[1]
430 self.seenit(cwd) # ignore this
432 print("%s: CWD=%s" % (self.name, cwd), file=self.debug_out)
438 pid_last_dir[last_pid] = self.last_dir
439 cwd = getv(pid_cwd, pid, self.cwd)
440 self.last_dir = getv(pid_last_dir, pid, self.cwd)
447 pid_last_dir[npid] = cwd
451 cwd = abspath(w[2], cwd, None, self.debug, self.debug_out)
452 if cwd.endswith('/.'):
454 self.last_dir = pid_last_dir[pid] = cwd
457 print("cwd=", cwd, file=self.debug_out)
460 if w[2] in self.seen:
462 print("seen:", w[2], file=self.debug_out)
466 # these are special, tread src as read and
468 self.parse_path(w[1].strip("'"), cwd, 'R', w)
469 self.parse_path(w[2].strip("'"), cwd, 'W', w)
473 self.parse_path(path, cwd, w[0], w)
478 def parse_path(self, path, cwd, op=None, w=[]):
479 """look at a path for the op specified"""
484 # we are never interested in .dirdep files as dependencies
485 if path.endswith('.dirdep'):
487 for p in self.excludes:
488 if p and path.startswith(p):
490 print("exclude:", p, path, file=self.debug_out)
492 # we don't want to resolve the last component if it is
494 path = resolve(path, cwd, self.last_dir, self.debug, self.debug_out)
497 dir,base = os.path.split(path)
500 print("seen:", dir, file=self.debug_out)
502 # we can have a path in an objdir which is a link
503 # to the src dir, we may need to add dependencies for each
505 dir = abspath(dir, cwd, self.last_dir, self.debug, self.debug_out)
506 if rdir == dir or rdir.find('./') > 0:
508 # now put path back together
509 path = '/'.join([dir,base])
511 print("raw=%s rdir=%s dir=%s path=%s" % (w[2], rdir, dir, path), file=self.debug_out)
513 if path in [self.last_dir, cwd, self.cwd, self.curdir]:
515 print("skipping:", path, file=self.debug_out)
517 if os.path.isdir(path):
519 self.last_dir = path;
521 print("ldir=", self.last_dir, file=self.debug_out)
525 # finally, we get down to it
526 if dir == self.cwd or dir == self.curdir:
528 srctop = self.find_top(path, self.srctops)
531 self.add(self.file_deps, path.replace(srctop,''), 'file')
532 self.add(self.src_deps, dir.replace(srctop,''), 'src')
535 if rdir and not rdir.startswith(srctop):
536 dir = rdir # for below
542 for dir in [dir,rdir]:
545 objroot = self.find_top(dir, self.objroots)
549 ddep = self.find_obj(objroot, dir, path, w[2])
551 self.add(self.obj_deps, ddep, 'obj')
552 if self.dpdeps and objroot.endswith('/stage/'):
553 sp = '/'.join(path.replace(objroot,'').split('/')[1:])
554 self.add(self.file_deps, sp, 'file')
556 # don't waste time looking again
561 def main(argv, klass=MetaFile, xopts='', xoptf=None):
562 """Simple driver for class MetaFile.
565 script [options] [key=value ...] "meta" ...
567 Options and key=value pairs contribute to the
568 dictionary passed to MetaFile.
571 add "SRCTOP" to the "SRCTOPS" list.
576 add "OBJROOT" to the "OBJROOTS" list.
591 # import Psyco if we can
592 # it can speed things up quite a bit
608 machine = os.environ['MACHINE']
610 conf['MACHINE'] = machine
611 machine_arch = os.environ['MACHINE_ARCH']
613 conf['MACHINE_ARCH'] = machine_arch
614 srctop = os.environ['SB_SRC']
616 conf['SRCTOPS'].append(srctop)
617 objroot = os.environ['SB_OBJROOT']
619 conf['OBJROOTS'].append(objroot)
626 opts, args = getopt.getopt(argv[1:], 'a:dS:C:O:R:m:D:H:qT:X:' + xopts)
629 conf['MACHINE_ARCH'] = a
635 conf['HOST_TARGET'] = a
637 if a not in conf['SRCTOPS']:
638 conf['SRCTOPS'].append(a)
642 if a not in conf['OBJROOTS']:
643 conf['OBJROOTS'].append(a)
651 conf['TARGET_SPEC'] = a
653 if a not in conf['EXCLUDES']:
654 conf['EXCLUDES'].append(a)
658 conf['debug'] = debug
660 # get any var=val assignments
665 if k in ['SRCTOP','OBJROOT','SRCTOPS','OBJROOTS']:
681 debug_out = getv(conf, 'debug_out', sys.stderr)
684 print("config:", file=debug_out)
685 print("psyco=", have_psyco, file=debug_out)
686 for k,v in list(conf.items()):
687 print("%s=%s" % (k,v), file=debug_out)
691 if a.endswith('.meta'):
692 if not os.path.exists(a):
695 elif a.startswith('@'):
696 # there can actually multiple files per line
697 for line in open(a[1:]):
698 for f in line.strip().split():
699 if not os.path.exists(f):
706 print(m.src_dirdeps('\nsrc:'))
708 dpdeps = getv(conf, 'DPDEPS')
710 m.file_depends(open(dpdeps, 'wb'))
714 if __name__ == '__main__':
718 # yes, this goes to stdout
719 print("ERROR: ", sys.exc_info()[1])