"""
Dotaplus win impact analysis
"""

import pickle
from collections import Counter, defaultdict
ANON = 4294967295
PLAYER_DPLUS = defaultdict(int)

def num_humans(list_players):
    """ Returns number of non-anon humans
    """
    return sum(1 if xx != ANON else 0 for xx in list_players)


def null_hyp(match):
    """ Null hypothesis
    """
    return 0.5


def hyp1(match, u):
    """ Hypothesis that shared public data is correlated with player quality
    """
    if num_humans(match['dire']) == num_humans(match['radiant']):
        return 0.5
    elif num_humans(match['dire']) < num_humans(match['radiant']):
        return u
    else:
        return 1-u


def hyp1_helper(match):
    """ Hypothesis 1 helper
    """
    if num_humans(match['dire']) == num_humans(match['radiant']):
        return 0

    return 1 \
            if (num_humans(match['dire']) > num_humans(match['radiant'])) \
            != match['radiant_win'] else -1


def sum_dplus(list_players, time):
    return sum(1 if (PLAYER_DPLUS[player] < time and PLAYER_DPLUS[player] != 0) else 0 for player in list_players)


def hyp2(match, u):
    """ Hypothesis that dotaplus helps to win
    """
    dire_dp = sum_dplus(match['dire'], match['start_time'])
    radiant_dp = sum_dplus(match['radiant'], match['start_time'])
    if abs(dire_dp - radiant_dp) < 0.5:
        return 0.5

    if dire_dp < radiant_dp:
        return u
    else:
        return 1-u



def hyp2_helper(match):
    """ Hypothesis 2 helper
    """
    dire_dp = sum_dplus(match['dire'], match['start_time'])
    radiant_dp = sum_dplus(match['radiant'], match['start_time'])
    if abs(dire_dp - radiant_dp) < 0.01:
    # if num_humans(match['dire']) != num_humans(match['radiant']) or abs(dire_dp - radiant_dp) < 0.5:
        return 0

    return 1 \
            if (dire_dp > radiant_dp) \
            != match['radiant_win'] else -1

def hyp3_helper(match):
    """ Hypothesis 2 helper
    """
    dire_dp = sum_dplus(match['dire'], match['start_time'])
    radiant_dp = sum_dplus(match['radiant'], match['start_time'])
    if abs(dire_dp - radiant_dp) < 0.01 or (dire_dp > 0.01 and radiant_dp > 0.01):
    # if num_humans(match['dire']) != num_humans(match['radiant']) or abs(dire_dp - radiant_dp) < 0.5:
        return 0

    return 1 \
            if (dire_dp > radiant_dp) \
            != match['radiant_win'] else -1


def main():
    """ Main script
    """
    all_matches = pickle.load(open('matches', 'rb'))
    player_matches = pickle.load(open('./matches.player_specific', 'rb'))['player_matches']
    player_matches = {k:v for k, v in player_matches.items() if len(v['before']) > 0}
    print('Num of matches =', len(all_matches))
    global PLAYER_DPLUS

    # Due to bad parallelization, there might be multiple entries for the same
    # Player in this file. Sometimes they will have conflict (if the player bought
    # dplus during the process was running), just consider the one with dplus = 0
    with open('./dota_plus.status.13') as f:
        for line in f:
            player, dplus, dplus_time, rank = line.strip().split()
            player = int(player)
            dplus = int(dplus)
            dplus_time = int(dplus_time)
            PLAYER_DPLUS[player] = max(PLAYER_DPLUS[player], dplus_time)

    print("Analyzing shared data bias")
    od_p2w = Counter(hyp1_helper(x) for x in all_matches)
    n = od_p2w[1] + od_p2w[-1]
    sample_mean = od_p2w[1]/n
    # sample_std = ((od_p2w[1]*(sample_mean-1)**2 + od_p2w[-1]*(sample_mean)**2)/(n - 1))**0.5
    # z = (sample_mean - 0.5)/(sample_std/n**0.5)
    print('n = %d, mean = %.4f, win = %d, loss = %d' % (n, sample_mean, od_p2w[1], od_p2w[-1]))

    all_matches = [match for match in all_matches if num_humans(match['dire']) == num_humans(match['radiant']) and num_humans(match['dire']) > 0]
    print('Num of matches with equal information =', len(all_matches))

    print("Analyzing dota plus bias")
    dp_p2w = Counter(hyp2_helper(x) for x in all_matches)
    n = dp_p2w[1] + dp_p2w[-1]
    sample_mean = dp_p2w[1]/n
    print('n = %d, mean = %.4f, win = %d, loss = %d' % (n, sample_mean, dp_p2w[1], dp_p2w[-1]))

    print("Analyzing strict dota plus bias")
    dp_p2w = Counter(hyp3_helper(x) for x in all_matches)
    n = dp_p2w[1] + dp_p2w[-1]
    sample_mean = dp_p2w[1]/n
    print('n = %d, mean = %.4f, win = %d, loss = %d' % (n, sample_mean, dp_p2w[1], dp_p2w[-1]))

    print("Analyzing strict dota plus bias => player-level")
    avg = (lambda x: sum(1 if ((match['player_slot']>>7) == 0) == match['radiant_win'] else 0 for match in x)/len(x))
    win1 = sum(avg(v['before']) for _, v in player_matches.items())/len(player_matches)
    win2 = sum(avg(v['after']) for _, v in player_matches.items())/len(player_matches)
    print('n1 = %d, mean1 = %.4f, mean2 = %.4f' % (len(player_matches), win1, win2))

if __name__ == '__main__':
    main()
