#!/usr/bin/env python
# Copyright (c) 2010-2012 OpenStack, LLC.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
# implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from ConfigParser import ConfigParser
from optparse import OptionParser
from sys import exit, stdout, stderr
from time import time
try:
    import simplejson as json
except ImportError:
    import json

from eventlet import GreenPool, hubs, patcher, Timeout
from eventlet.pools import Pool

from swift.common import direct_client
from swift.common.client import ClientException, Connection, get_auth
from swift.common.ring import Ring
from swift.common.utils import compute_eta, get_time_units, TRUE_VALUES


unmounted = []
json_output = False


def get_error_log(prefix):

    def error_log(msg_or_exc):
        global unmounted
        if hasattr(msg_or_exc, 'http_status') and \
                msg_or_exc.http_status == 507:
            identifier = '%s:%s/%s'
            if identifier not in unmounted:
                unmounted.append(identifier)
                print >>stderr, 'ERROR: %s:%s/%s is unmounted -- This will ' \
                    'cause replicas designated for that device to be ' \
                    'considered missing until resolved or the ring is ' \
                    'updated.' % (msg_or_exc.http_host, msg_or_exc.http_port,
                    msg_or_exc.http_device)
                stderr.flush()
        if not hasattr(msg_or_exc, 'http_status') or \
                msg_or_exc.http_status not in (404, 507):
            print >>stderr, 'ERROR: %s: %s' % (prefix, msg_or_exc)
            stderr.flush()
    return error_log


def container_dispersion_report(coropool, connpool, account, container_ring,
                                retries):
    with connpool.item() as conn:
        containers = [c['name'] for c in conn.get_account(prefix='dispersion_',
                                                         full_listing=True)[1]]
    containers_listed = len(containers)
    if not containers_listed:
        print >>stderr, 'No containers to query. Has ' \
                        'swift-dispersion-populate been run?'
        stderr.flush()
        return
    retries_done = [0]
    containers_queried = [0]
    container_copies_found = [0, 0, 0, 0]
    begun = time()
    next_report = [time() + 2]

    def direct(container, part, nodes):
        found_count = 0
        for node in nodes:
            error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
            try:
                attempts, _junk = direct_client.retry(
                                direct_client.direct_head_container, node,
                                part, account, container, error_log=error_log,
                                retries=retries)
                retries_done[0] += attempts - 1
                found_count += 1
            except ClientException, err:
                if err.http_status not in (404, 507):
                    error_log('Giving up on /%s/%s/%s: %s' % (part, account,
                              container, err))
            except (Exception, Timeout), err:
                error_log('Giving up on /%s/%s/%s: %s' % (part, account,
                          container, err))
        container_copies_found[found_count] += 1
        containers_queried[0] += 1
        if time() >= next_report[0]:
            next_report[0] = time() + 5
            eta, eta_unit = compute_eta(begun, containers_queried[0],
                                        containers_listed)
            if not json_output:
                print '\r\x1B[KQuerying containers: %d of %d, %d%s left, %d ' \
                      'retries' % (containers_queried[0], containers_listed,
                      round(eta), eta_unit, retries_done[0]),
                stdout.flush()
    container_parts = {}
    for container in containers:
        part, nodes = container_ring.get_nodes(account, container)
        if part not in container_parts:
            container_parts[part] = part
            coropool.spawn(direct, container, part, nodes)
    coropool.waitall()
    distinct_partitions = len(container_parts)
    copies_expected = distinct_partitions * container_ring.replica_count
    copies_found = sum(a * b for a, b in enumerate(container_copies_found))
    value = 100.0 * copies_found / copies_expected
    elapsed, elapsed_unit = get_time_units(time() - begun)
    if not json_output:
        print '\r\x1B[KQueried %d containers for dispersion reporting, ' \
              '%d%s, %d retries' % (containers_listed, round(elapsed),
              elapsed_unit, retries_done[0])
        if containers_listed - distinct_partitions:
            print 'There were %d overlapping partitions' % (
                  containers_listed - distinct_partitions)
        if container_copies_found[2]:
            print 'There were %d partitions missing one copy.' % \
                  container_copies_found[2]
        if container_copies_found[1]:
            print '! There were %d partitions missing two copies.' % \
                  container_copies_found[1]
        if container_copies_found[0]:
            print '!!! There were %d partitions missing all copies.' % \
                  container_copies_found[0]
        print '%.02f%% of container copies found (%d of %d)' % (
              value, copies_found, copies_expected)
        print 'Sample represents %.02f%% of the container partition space' % (
              100.0 * distinct_partitions / container_ring.partition_count)
        stdout.flush()
        return None
    else:
        return {'retries': retries_done[0],
                'overlapping': containers_listed - distinct_partitions,
                'missing_one': container_copies_found[2],
                'missing_two': container_copies_found[1],
                'missing_all': container_copies_found[0],
                'pct_found': value,
                'copies_found': copies_found,
                'copies_expected': copies_expected}


def object_dispersion_report(coropool, connpool, account, object_ring,
                             retries):
    container = 'dispersion_objects'
    with connpool.item() as conn:
        try:
            objects = [o['name'] for o in conn.get_container(container,
                                   prefix='dispersion_', full_listing=True)[1]]
        except ClientException, err:
            if err.http_status != 404:
                raise
            print >>stderr, 'No objects to query. Has ' \
                            'swift-dispersion-populate been run?'
            stderr.flush()
            return
    objects_listed = len(objects)
    if not objects_listed:
        print >>stderr, 'No objects to query. Has swift-dispersion-populate ' \
                        'been run?'
        stderr.flush()
        return
    retries_done = [0]
    objects_queried = [0]
    object_copies_found = [0, 0, 0, 0]
    begun = time()
    next_report = [time() + 2]

    def direct(obj, part, nodes):
        found_count = 0
        for node in nodes:
            error_log = get_error_log('%(ip)s:%(port)s/%(device)s' % node)
            try:
                attempts, _junk = direct_client.retry(
                                direct_client.direct_head_object, node, part,
                                account, container, obj, error_log=error_log,
                                retries=retries)
                retries_done[0] += attempts - 1
                found_count += 1
            except ClientException, err:
                if err.http_status not in (404, 507):
                    error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
                              container, obj, err))
            except (Exception, Timeout), err:
                error_log('Giving up on /%s/%s/%s/%s: %s' % (part, account,
                          container, obj, err))
        object_copies_found[found_count] += 1
        objects_queried[0] += 1
        if time() >= next_report[0]:
            next_report[0] = time() + 5
            eta, eta_unit = compute_eta(begun, objects_queried[0],
                                        objects_listed)
            if not json_output:
                print '\r\x1B[KQuerying objects: %d of %d, %d%s left, %d ' \
                      'retries' % (objects_queried[0], objects_listed,
                                   round(eta), eta_unit, retries_done[0]),
            stdout.flush()
    object_parts = {}
    for obj in objects:
        part, nodes = object_ring.get_nodes(account, container, obj)
        if part not in object_parts:
            object_parts[part] = part
            coropool.spawn(direct, obj, part, nodes)
    coropool.waitall()
    distinct_partitions = len(object_parts)
    copies_expected = distinct_partitions * object_ring.replica_count
    copies_found = sum(a * b for a, b in enumerate(object_copies_found))
    value = 100.0 * copies_found / copies_expected
    elapsed, elapsed_unit = get_time_units(time() - begun)
    if not json_output:
        print '\r\x1B[KQueried %d objects for dispersion reporting, ' \
              '%d%s, %d retries' % (objects_listed, round(elapsed),
              elapsed_unit, retries_done[0])
        if objects_listed - distinct_partitions:
            print 'There were %d overlapping partitions' % (
                  objects_listed - distinct_partitions)
        if object_copies_found[2]:
            print 'There were %d partitions missing one copy.' % \
                  object_copies_found[2]
        if object_copies_found[1]:
            print '! There were %d partitions missing two copies.' % \
                  object_copies_found[1]
        if object_copies_found[0]:
            print '!!! There were %d partitions missing all copies.' % \
                  object_copies_found[0]
        print '%.02f%% of object copies found (%d of %d)' % \
                    (value, copies_found, copies_expected)
        print 'Sample represents %.02f%% of the object partition space' % (
              100.0 * distinct_partitions / object_ring.partition_count)
        stdout.flush()
        return None
    else:
        return {'retries': retries_done[0],
                'overlapping': objects_listed - distinct_partitions,
                'missing_one': object_copies_found[2],
                'missing_two': object_copies_found[1],
                'missing_all': object_copies_found[0],
                'pct_found': value,
                'copies_found': copies_found,
                'copies_expected': copies_expected}


if __name__ == '__main__':
    patcher.monkey_patch()
    hubs.get_hub().debug_exceptions = False

    parser = OptionParser(usage='''
Usage: %prog [options] [conf_file]

[conf_file] defaults to /etc/swift/stats.conf'''.strip())
    parser.add_option('-j', '--dump-json', action='store_true', default=False,
                      help='dump dispersion report in json format')

    options, args = parser.parse_args()

    conffile = '/etc/swift/dispersion.conf'
    if args:
        conffile = args.pop(0)

    c = ConfigParser()
    if not c.read(conffile):
        exit('Unable to read config file: %s' % conffile)
    conf = dict(c.items('dispersion'))
    swift_dir = conf.get('swift_dir', '/etc/swift')
    dispersion_coverage = int(conf.get('dispersion_coverage', 1))
    retries = int(conf.get('retries', 5))
    concurrency = int(conf.get('concurrency', 25))
    if options.dump_json or conf.get('dump_json', 'no').lower() in TRUE_VALUES:
        json_output = True

    coropool = GreenPool(size=concurrency)

    url, token = get_auth(conf['auth_url'], conf['auth_user'],
                          conf['auth_key'])
    account = url.rsplit('/', 1)[1]
    connpool = Pool(max_size=concurrency)
    connpool.create = lambda: Connection(conf['auth_url'],
                                conf['auth_user'], conf['auth_key'],
                                retries=retries,
                                preauthurl=url, preauthtoken=token)

    container_ring = Ring(os.path.join(swift_dir, 'container.ring.gz'))
    object_ring = Ring(os.path.join(swift_dir, 'object.ring.gz'))

    container_result = container_dispersion_report(coropool, connpool,
                                                   account, container_ring,
                                                   retries)
    object_result = object_dispersion_report(coropool, connpool, account,
                                             object_ring, retries)
    if json_output:
        print json.dumps({"container": container_result,
                          "object": object_result})
