#!/usr/bin/env python2.7

"""
build.py
    Copyright 2010 Guillaume Lathoud, MIT License
    glathoud@yahoo.fr

Usage:
    build.py [-h] [-v] [-o <basedirname>] [-d <google_d8_call>] [-w] [-r <copyrightfile>] file1.js some/subdir/file2.js file3.js...

    Only the target files have to be given, because implicitely
    required files will be automatically detected (using
    req_extract.js) and integrated.

Defaults:
    <basedirname>:        'build'
    <path_to_google_d8>:  'd8 req_extract.js'
    Break on JSCheck warnings (-w to ignore warnings)

Examples:
    build.py from.js jscheck_web.js 
    build.py from.js jscheck_web.js tomrec_web.js 
    build.py from.js jscheck_web.js tomrec_web.js dom.js

Actions:

 1. Two output dirs <basedirname>_u and <basedirname>_c will be
    created, if not yet existing. They correspond to the uncompressed
    and compressed outputs, respectively.

    - examples: 'build_u' and 'build_c'

 2. For each javascript (JS) file a corresponding JS file will be
    created in <basedirname>_u, packing its `from.req()` resources
    intelligently.

   - Loops are forbidden.
   
   - The original subdir structure (e.g. 'some/subdir/') is reproduced
     under <basedirname>_u.

 3. Each file in <basedirname>_u is compressed, the compressed output
    is written in <basedirname>_c
"""

import getopt, hashlib, os, re, shutil, StringIO, subprocess, sys, tempfile

def my_exec( cmd ):
    p = subprocess.Popen( ( cmd if isinstance( cmd, str ) else ' '.join( cmd ) ),
                          shell=True,
                          stdin=subprocess.PIPE,
                          stdout=subprocess.PIPE,
                          stderr=subprocess.PIPE )

    out = p.stdout.read()
    err = p.stderr.read()
    code = p.wait()

    return (out, err, code)

def grab( filenamelist, d8_cmd ):

    # req,o,all_rsrc = grap( filenamelist, d8_cmd )

    req = {}
    o = parse( filenamelist, d8_cmd )

    all_rsrc = set()

    update_req( req, all_rsrc, o )

    assert set(req.keys()) == set(filenamelist) # sanity check

    return (req,o,all_rsrc,)

def preprocess( filenamelist, req, o, all_rsrc, d8_cmd_prep_tailopt ):
    
    # filenamelist,req,o,all_rsrc = preprocess( filenamelist, req, o, all_rsrc )

    # List the files that need preprocessing

    rx = re.compile( r"\/\*prep\s+(?P<prep_list>[^\*]+?)\s*\*\/" )

    need_prep = set()
    for fname in all_rsrc:
        mo = rx.search( open( fname, 'rb' ).read() )
        if mo:
            need_prep.add( ( fname, mo, ) )
            
    # Apply preprocessing, if needed

    if need_prep:
        outdir = tempfile.mkdtemp('_build_py_preprocess')
        print
        print 'Preprocessing needed, outdir:', outdir
        translate = {}
        for x in need_prep:

            fname, mo = x
            prep_list = filter( lambda x: x, map( lambda x: x.strip(), mo.group('prep_list').strip().split() ))
            if prep_list:

                outfname = os.path.join( outdir, os.path.split( fname )[1] )
                shutil.copyfile(fname, outfname)

                for prep in prep_list:

                    if prep == 'tailopt':
                        o,e,c = my_exec( d8_cmd_prep_tailopt + ' -- ' + outfname )
                        if c != 0:
                            print >> sys.stderr
                            print >> sys.stderr, 'Got an error from "' + d8_cmd_prep_tailopt + '": ', e
                            sys.exit(c)
                        open( outfname, 'wb' ).write( o )
                    else:
                        print >> sys.stderr, 'In file "' + outfname + '", found an unknown preprocessor "' + prep + '"'
                        sys.exit(1)

                print fname, 'preprocessed with:', prep_list, ' --> result:', outfname

                # Update to point to the new, preprocessed file

                if fname in filenamelist:
                    filenamelist = list( filenamelist )
                    filenamelist.remove( fname )
                    filenamelist.append( outfname )
                
                all_rsrc.remove( fname )
                all_rsrc.add( outfname )

                if fname in req:
                    req[ outfname ] = req.pop( fname )

    # Done

    return (filenamelist,req,o,all_rsrc)
        

def parse(filenamelist, d8_cmd):

    o,e,c = my_exec( d8_cmd + ' -- ' + ' '.join( map( lambda s: '"' + s + '"', filenamelist ) ) )
    
    if c != 0:
        print >> sys.stderr
        print >> sys.stderr, 'Got an error from "' + d8_cmd + '": ', e
        sys.exit(c)

    return o

def update_req(req, all_rsrc, o):

    for line in StringIO.StringIO(o):

        if line.endswith(os.linesep):
            line = line[:-len(os.linesep)]
            
        arr = line.split(',')

        filename = arr[0]

        all_rsrc.add( filename )
        
        if filename in req:
            continue

        req[ filename ] = {
            'size' : os.stat( filename ).st_size,
            'error' : arr[1],
            'warning' : arr[2],
            'rsrc' : [],
            'rsrc_where' : [],
            }
        if len(arr) > 3:
            rsrc_where = []
            rest = arr[3:]
            while rest:
                rsrc_where.append( {
                    'start' : int(rest[0]),
                    'end' : int(rest[1]),
                    'value' : rest[2],
                    })
                all_rsrc.add( rest[2] )
                rest = rest[3:]

            req[ filename ]['rsrc_where'] = rsrc_where
            req[ filename ]['rsrc'] = map( lambda o: o['value'], rsrc_where )


def crash_on_error(req):
    for filename in req:
        if req[filename]['error']:
            print >> sys.stderr
            print >> sys.stderr, '[ERROR] Parse error on file "{0}"'.format(filename)
            sys.exit(1)

def crash_on_warning(req):
    for filename in req:
        if req[filename]['warning']:
            print >> sys.stderr
            print >> sys.stderr, '[ERROR] Parse warning on file "{0}"'.format(filename)
            sys.exit(1)

def crash_on_loop(req, filename, acc=None):
    
    acc = acc or set()

    if filename in acc:
        print >> sys.stderr
        print >> sys.stderr, ('[ERROR] Dependency loop detected on file "{filename}" involving {acc}'
                              .format(**vars()))
        sys.exit(1)

    for other in req[filename]['rsrc']:  # Just in case: note that we follow the order of the array
        crash_on_loop( req, other, acc.union([filename]) )

def expand(req, target_arr):

    # special case: from.js (base_pieces)

    F = 'from.js'
    if F in target_arr:
        while F in target_arr:
            target_arr.remove(F)
        target_arr[0:0] = [F]

    excluded = [] + target_arr

    #
    
    S = 'rsrc_expanded'

    for filename in target_arr:
        
        o = req[filename]
        o[S] = o['rsrc']
        
        while True:
            tmp = []
            for n in o[S]:

                if n in tmp:
                    continue

                if n in excluded:
                    continue

                to_add = []
                if n not in excluded:
                    to_add.extend(req[n]['rsrc'])
                
                to_add.append(n)
                
                for p in to_add:
                    if (p != n) and (p in excluded):
                        continue
                    if p not in tmp:
                        tmp.append(os.path.split(p)[1])
                
            if o[S] == tmp:
                break

            o[S] = tmp

        if filename == F:
            o[S] = [filename] + o[S] # special case: from.js comes *just before* its `base_pieces`
        else:
            o[S].append( filename )

        if filename == F: # special case: from.js (`base_pieces`)
            excluded.extend( o[S] )
        
        print 'expanded:', filename, '->', o[S]

def cleanupdir( d ):
    if os.path.exists( d ):
        shutil.rmtree( d )

    if not os.path.exists( d ):
        os.makedirs( d )

def update_paths( req, filenamelist, transform = None, filename = None ):

    if filename == None:
        for filename in filenamelist:
            update_paths( req, filenamelist, transform, filename )
        return

    outputfilename = req[filename]['to']
    
    data = open(outputfilename, 'rb').read()
    new_data = data

    reversed = True # We'll start with the last one
    for o in sorted( req[filename]['rsrc_where_to'], None, lambda o: o['start'], reversed ):

        if transform and (o['value'] in transform):
            # Custom transformation
            old_v = o['value']
            new_v = transform[old_v]   # dict 
            
        elif o['value'] in filenamelist:
            # Default transformation
            old_v = o['value']
            new_v = req[o['value']]['to']

        else:
            continue

        print '############## old, new:', old_v, new_v

        assert new_data[ o['start']:o['end'] ] == old_v
        new_data = new_data[:o['start']] + new_v + new_data[o['end']:]
        o['value'] = new_v
        o['end'] = o['start'] + len(new_v)

        # All following values where shifted

        delta = len(new_v) - len(old_v)
        if delta != 0:
            for o2 in req[filename]['rsrc_where_to']:
                if o2['start'] <= o['start']:
                    continue
                o2['start'] += delta
                o2['end'] += delta


    if data != new_data:
        open(outputfilename, 'wb').write(new_data)

def sanity_check(req, filenamelist):
    
    for filename in filenamelist:

        import pprint
        pprint.pprint(req[filename])

        data = open(req[filename]['to'], 'rb').read()
        for o in req[filename]['rsrc_where_to']:

            print o['value']
            print data[ o['start'] : o['end'] ]
            assert o['value'] == data[ o['start'] : o['end'] ]

    
def main(filenamelist, output, d8_cmd="d8 req_extract.js", d8_cmd_prep_tailopt="d8 prep_tailopt.js", compress=False, verbose=False, warning_ignore=False, copyrightfile="build.copyright.txt"):

    output_u = output + '_u'
    output_c = output + '_c'

    # for each filename, grab its required resources

    req,o,all_rsrc = grab( filenamelist, d8_cmd )

    # recursively parse all required files

    while True:
        to_parse = set(all_rsrc).difference(set(req.keys()))
        if not to_parse:
            break

        update_req( req, all_rsrc, parse( to_parse, d8_cmd ) )

    # make sure all required resources are available

    missing_rsrc = filter( lambda s: not os.path.exists(s), all_rsrc )
    
    if missing_rsrc:
        print >> sys.stderr
        print >> sys.stderr, '[ERROR] Not all resources found! missing:', missing_rsrc
        sys.exit(1)
    
    # checks

    crash_on_error( req )

    if not warning_ignore:
        crash_on_warning( req )

    for filename in req:
        crash_on_loop( req, filename )

    # apply preprocessors, if any.
    # - declared with a Javascript comment /*prep -> */  /*prep tailopt */ /*prep -> tailopt*/
    # - applied in the order (e.g. -> first, then tailopt, in /*prep -> tailopt*/).
    #
    # Note: this will probably copy some of the files to a temporary directory...

    filenamelist,req,o,all_rsrc = preprocess( filenamelist,req,o,all_rsrc, d8_cmd_prep_tailopt )
    
    # ...so we have to do it again: for each filename, grab its required resources.

    req,o,all_rsrc = grab( filenamelist, d8_cmd )

    # recursively parse all required files

    while True:
        to_parse = set(all_rsrc).difference(set(req.keys()))
        if not to_parse:
            break

        update_req( req, all_rsrc, parse( to_parse, d8_cmd ) )
        
    # make sure all required resources are available

    missing_rsrc = filter( lambda s: not os.path.exists(s), all_rsrc )
    
    if missing_rsrc:
        print >> sys.stderr
        print >> sys.stderr, '[ERROR] Not all resources found! missing:', missing_rsrc
        sys.exit(1)
    
    # checks

    crash_on_error( req )

    for filename in req:
        crash_on_loop( req, filename )

    # for each target, expand all resources that are not themselves targets
    
    expand(req, filenamelist)
    
    # produce the expanded files

    print '.'

    cleanupdir( output_u )


    for filename in filenamelist:

        outputfilename = os.path.join( output_u, os.path.split( filename )[1] )
        if os.path.exists( outputfilename ):
            raise IOError("Output file already exists !!" + outputfilename)

        req[filename]['to'] = outputfilename


    separator = os.linesep + '// ' + ('-' * 70) + os.linesep

    for filename in filenamelist:

        outputfilename = req[filename]['to']

        rsrc_where_to = []
        offset = 0

        print '=' * 30, filename

        out = open(outputfilename, 'wb')
        for f in req[filename]['rsrc_expanded']:

            data = open(f,'rb').read()
            out.write(data)
            print >> out, separator,
            
            # Mark where the package names are in the source, because
            # we'll change them a bit (e.g. insert 'build_u/' path and
            # hash).

            for o in req[f]['rsrc_where']:
                
                o2 = {}
                for k in o:
                    o2[k] = o[k]

                o2['start'] += offset
                o2['end'] += offset

                o2['value'] = os.path.split( o2['value'] )[1]

                rsrc_where_to.append(o2)

            # For the next included files
            offset += len(data) + len(separator)

        req[filename]['rsrc_where_to'] = rsrc_where_to

        out.close()

        print 'Wrote:', outputfilename

        req[filename]['to'] = outputfilename

    # Update the paths to the target files
    
    update_paths( req, filenamelist )

    # Hashes for browser cachebust, similar to that of:
    # http://glat.info/pub/dojo_cachebust_hash/dojo_cachebust_hash.xhtml

    def hh( data ):
        return '.'.join( [ hashlib.md5( data ).hexdigest() , str( len( data ) ) ] )

    hash_table = {}
    for filename in filenamelist:

        if filename == 'from.js':
            continue

        current = req[filename]['to']
        data = open(current, 'rb').read()

        hash_table[current] = hh( data )

    
    import pprint
    pprint.pprint( hash_table)

    s = 'from.js'
    if s in filenamelist:
        
        old = req[s]['to']

        # insert the hash table in "from.js" (package cachebust)
        
        old_data = open( old, 'rb' ).read()

        rx = re.compile( r'(hash_table)\s*=\s*\{\s*\}' )
        mo = rx.search( old_data )
        if mo == None:
            print >> sys.stderr, ''
            print >> sys.stderr, '[ERROR] Could not find the `hash_table` definition in "from.js"'
            sys.exit(1)

        new_str = mo.group(1) + '=' + str(hash_table)

        new_data = old_data[:mo.start()] + new_str + old_data[mo.end():]
        offset = len(new_data) - len(old_data)
        
        req[s]['to'] += '.' + hh( new_data ) + '.js'   # Append also a .js to ensure MIME type (on most servers)

        # update existing reference (most likely `base_pieces`)

        for o in req[s]['rsrc_where_to']:
            if o['start'] < mo.end():
                continue
            o['start'] += offset
            o['end'] += offset

        # add the new references present in the `hash_table`

        rx2 = re.compile(r"(?P<quote>[\"'])(?P<name>[^'\"]+)(?P=quote)\s*:")
        for mo2 in rx2.finditer(new_str):
            req[s]['rsrc_where_to'].append(
                {
                'value' : mo2.group('name'),
                'start' : mo.start() + mo2.start('name'),
                'end'   : mo.start() + mo2.end('name'),
                }
                )
            import pprint
            pprint.pprint('----------')
            o = req[s]['rsrc_where_to'][-1]
            pprint.pprint(o)
            pprint.pprint(data[o['start']:o['end']])
        
        # change the filename of "from.js" (entry point cachebust)
        
        open( req[s]['to'], 'wb' ).write( new_data )

        os.remove( old )
    
    # --- Sanity check

    sanity_check( req, filenamelist )

    # --- Produce the compressed files

    print '.'

    cleanupdir( output_c )

    transform = {}

    for filename in filenamelist:

        # copy the file
        
        old = req[filename]['to']
        new = os.path.join( *(map(lambda s: output_c if (s == output_u) else s, os.path.split( old ))) )
        if os.path.exists( new ):
            raise IOError("Output file already exists !!" + new)
        shutil.copyfile(old, new)
        
        req[filename]['to'] = new

        transform[old] = new

    # update the contents accordingly

    update_paths( req, filenamelist, transform )

    sanity_check( req, filenamelist )

    # code compression

    copyrighttext = ''
    if os.path.exists( copyrightfile ):
        copyrighttext = open( copyrightfile, 'rb' ).read()

    for filename in filenamelist:

        new = req[filename]['to']

        if not os.path.exists( new ):
            raise IOError("Built file does not exist !! " + new)

        cmd = ("d8 -e \"load('from.js');from.req('jscheck.js','narcissus.code.compress.js')(function () {" + "print(narcissus.code.compress(read('" + new + "')).compressed_code);" + "});\"")

        o,e,c = my_exec(cmd)
        if c != 0:
            raise IOError("Failed to parse and regenerate file " + new)

        f = open(new, 'wb')
        print >> f, copyrighttext
        f.write(o)
        f.close()
        
        print 'build.py: compressed "' + new + '"'

    if not copyrighttext:
        print '(no copyrighttext)'
    else:
        print 'Copyright text:'
        print copyrighttext
    
if __name__ == '__main__':
    try:
        opts, args = getopt.gnu_getopt(sys.argv[1:], "ho:d:cvwr", ["help", "output=", "d8_cmd=", "d8_cmd_prep_tailopt=", "compress", "verbose", "warning_ignore", "copyrightfile" ])
    except getopt.GetoptError, err:
        # print help information and exit:
        print str(err) # will print something like "option -a not recognized"
        print __doc__
        sys.exit(2)
    d8_cmd = "d8 req_extract.js"
    d8_cmd_prep_tailopt = "d8 prep_tailopt.js"
    verbose = False
    compress = False
    output  = None
    warning_ignore = False
    copyrightfile = "build.copyright.txt"
    for o, a in opts:
        if o in ("-v", '--verbose'):
            verbose = True
        elif o in ("-h", "--help"):
            print __doc__
            sys.exit()
        elif o in ('-c', '--compress'):
            compress = True
        elif o in ('-d', '--d8_cmd'):
            d8_cmd = a
        elif o in ('-dpt', '--d8_cmd_prep_tailopt'):
            d8_cmd_prep_tailopt = a
        elif o in ('-o', '--output'):
            output = a
        elif o in ('-w', '--warning_ignore'):
            warning_ignore = True
        elif o in ('-r', '--copyrightfile'):
            copyrightfile = a
        else:
            assert False, "unhandled option"

    if output == None:
        output = "build"
    
    if len(args) < 1:
        print __doc__
        sys.exit()

    main(args, output, d8_cmd, d8_cmd_prep_tailopt, compress, verbose, warning_ignore, copyrightfile)