#!/usr/bin/python
# -*- coding: utf-8 -*-
import sqlite3
import math
import random
import os
import sys
import traceback

class StdevFunc:
    def __init__(self):
        self.M = 0.0    #Mean
        self.V = 0.0    #Used to Calculate Variance
        self.S = 0.0    #Standard Deviation
        self.k = 1      #Population or Small 

    def step(self, value):
        try:
            if value is None:
                return None

            tM = self.M
            self.M += (value - tM) / self.k
            self.V += (value - tM) * (value - self.M)
            self.k += 1
        except Exception as EXStep:
            pass
            return None    

    def finalize(self):
        try:
            if ((self.k - 1) < 3):
                return None
            
            #Now with our range Calculated, and Multiplied finish the Variance Calculation
            if (self.k > 40):
                self.V = (self.V / (self.k-2))
            else:
                #For Small Populations
                self.V = (self.V / (self.k-1))

            #Standard Deviation is the Square Root of Variance
            self.S = math.sqrt(self.V)

            return self.S
        except Exception as EXFinal:
            pass
            return None 

def Histogram(Population):
    try:
        BinCount = 6 
        More = 0

        #a = 1          #For testing Trapping
        #b = 0          #and Trace Back
        #c = (a / b)    #with Detailed Info
        
        #If you want to store the Database
        #uncDatabase = os.path.join(os.getcwd(),"BellCurve.db3")
        #con = sqlite3.connect(uncDatabase)
        
        #If you want the database in Memory
        con = sqlite3.connect(':memory:')    

        #row_factory that allows accessing fields by Row and Col Name
        con.row_factory = sqlite3.Row

        #Add our Non Persistent, Runtime Standard Deviation Function to the Database
        con.create_aggregate("Stdev", 1, StdevFunc)

        #Lets Grab a Cursor
        cur = con.cursor()

        #Lets Initialize some tables, so each run with be clear of previous run
        cur.executescript('drop table if exists MyData;') #executescript requires ; at the end of the string
        cur.execute("create table IF NOT EXISTS MyData('Val' FLOAT)")
        cur.executescript('drop table if exists Bins;')   #executescript requires ; at the end of the string
        cur.execute("create table IF NOT EXISTS Bins('Bin' UNSIGNED BIG INT, 'Val' FLOAT, 'Frequency' UNSIGNED BIG INT)")

        #Lets generate some random data, and insert in to the Database
        for n in range(0,(Population)):
            sql = "insert into MyData(Val) values ({0})".format(random.uniform(-1,1))
            cur.execute(sql)

        #Now let’s calculate some built in Aggregates, that SQLite comes with
        cur.execute("select Avg(Val) from MyData")
        Average = cur.fetchone()[0]
        cur.execute("select Max(Val) from MyData")
        Max = cur.fetchone()[0]
        cur.execute("select Min(Val) from MyData")
        Min = cur.fetchone()[0]
        cur.execute("select Count(Val) from MyData")
        Records = cur.fetchone()[0]

        #Now let’s get Standard Deviation using our function that we added
        cur.execute("select Stdev(Val) from MyData")
        Stdev = cur.fetchone()[0]

        if (Stdev == None):
            print("================================   Data Error --=================================")
            print("                 Insufficient Population Size, Or Bad Data.")   
            print("**********************************************************************************")
        elif (abs(Max-Min) == 0):
            print("================================   Data Error --=================================")
            print(" The entire Population Contains Identical values, Distribution Incalculable..")
            print("**********************************************************************************")
        else:  
            Bin = []        #Holds the Bin Values
            Frequency = []  #Holds the Bin Frequency for each Bin

            #Establish the 1st Bin, which is based on (Standard Deviation * 3) being subtracted from the Mean
            LowBin = (Average - ((3 * Stdev)))
            Bin.append(LowBin)

            #We start the Index with 1 because we already added the LowBin and as such increased the Bin Count is 1
            for b in range(0,(BinCount)):
                #Establish the remaining Bins, which is basically adding 1 Standard Deviation for each interation, -3, -2, -1, 1, 2, 3             
                Bin.append((Bin[(b)] + Stdev))
                Frequency.append(0)

            for b in range(0,(BinCount)):              
                #With our Bins Calculated, lets exploit the Database and have it do the hard work calculating distribution
                #of all the Bins, with SQL's between operator.
                sqlBinFreq = "select count(*) as Frequency from MyData where val between {0} and {1}".format(Bin[b], Bin[(b + 1)])


                #If the Database Reports Values that fall between the Current Bin, Store the Frequency to a Bins Table. 
                for rowBinFreq in cur.execute(sqlBinFreq):
                    Frequency[b] = rowBinFreq['Frequency']
                    sqlBinFreqInsert = "insert into Bins (Bin, Val, Frequency) values ({0}, {1}, {2})".format(b, Bin[b], Frequency[b])
                    cur.execute(sqlBinFreqInsert)
 
                #Allthough the Demo is not likley produce values that fall outside of Standard Distribution
                #if this demo was to Calculate with real data, we want to know how many non-Standard data points we have. 
                More = (More + Frequency[b])
                
            #Now Report the Analysis
            print("================================ The Population ==================================")
            print("             {0} {1} {2} {3} {4} {5}".format("Size".rjust(10, ' '), "Max".rjust(10, ' '), "Min".rjust(10, ' '), "Mean".rjust(10, ' '), "Range".rjust(10, ' '), "Stdev".rjust(10, ' ')))
            print("Aggregates:  {0:10d} {1:10.4f} {2:10.4f} {3:10.4f} {4:10.4f} {5:10.4f}".format(Population, Max, Min, Average, abs(Max-Min), Stdev))
            print("================================ The Distribution ================================") 
            print("Bins:        {0:10.0f} {1:10.0f} {2:10.0f} {3:10.0f} {4:10.0f} {5:10.0f}".format(1, 2, 3, 4, 5, 6))
            print("----------------------------------------------------------------------------9-----") 
            print("Ranges:      {0:10.4f} {1:10.4f} {2:10.4f} {3:10.4f} {4:10.4f} {5:10.4f}".format(Bin[0], Bin[1], Bin[2], Bin[3], Bin[4], Bin[5]))
            print("Frequency:   {0:10.0f} {1:10.0f} {2:10.0f} {3:10.0f} {4:10.0f} {5:10.0f}".format(Frequency[0], Frequency[1], Frequency[2], Frequency[3], Frequency[4], Frequency[5]))
            print("----------------------------------------------------------------------------------") 
            print("More:        {0:10.0f}".format(abs((More) - Records)))
            print("Bined:       {0:10.0f}".format(abs((More))))
            print("================================= The Histogram ==================================")  

            LabelString = "{0} {1} {2} {3}".format("Bin".rjust(4, ' '), "Ranges".rjust(10, ' '), "Frequency".rjust(10, ' '), "Histogram".rjust(10, ' '))
            sqlChart = "select * from Bins order by Bin asc"

            print(LabelString)
            print("----------------------------------------------------------------------------------")
            
            #Let's Paint a Histogram
            for rowChart in cur.execute(sqlChart):
                fPercent = ((float(rowChart['Frequency']) / float(More) * 100))
                iPrecent = int(math.ceil(fPercent))
                
                ChartString = "{0:4.0f} {1:10.4f} {2:10.0f}  {3}".format((rowChart['Bin'] + 1), rowChart['Val'], rowChart['Frequency'], "".rjust(iPrecent, '#'))
                print(ChartString)
                
            print("**********************************************************************************")

            #Clean Up
            con.commit()
            cur.close()
            con.close()

    except Exception as EXBellCurve:
        pass
        TraceInfo = traceback.format_exc()       
        raise Exception(TraceInfo)  

            
print("*********************************************************************************")
print("Using SQLite, leverage the create_aggregate(), and SQL's Between Operator to")
print("calculate distribution, then Demonstrate and Store the results in a Database.\n")
print("This demo creates a Population of values between -1 and 1 generated with a")
print("Pseudo Random Number Generator. The Mean value should be very close to zero,")
print("with a Range value of about 2, contained within a Standard Distribution.")

PythonVersion = sys.version_info[0] 
  
#Population = 2             #To Test Insufficient Population Size 
#Population = 3             #To Test Minimum Population Size
#Population = 8             #Population of 8
#Population = 39            #Test Small Population
#Population = 40            #Test Large Population
Population = (16 ** 2)      #Population of 256
#Population = (64 ** 2)     #Population of 4096
#Population = (128 ** 2)    #Population of 16384
#Population = (256 ** 2)    #Population of 65536
#Population = (1024 ** 2)   #Population of 1048576

Histogram(Population)       #Do a Distribution Analysis

#Want to see the affect of Population size on distribution? Try this loop
#and see how the distribution becomes unreliable with small populations
#for i in range(15, ((16 ** 2)), 64):
#   Histogram(i)

if (PythonVersion == 3):
    kb = input("Press Enter to continue...")
else:
    kb = raw_input("Press Enter to continue...")