# (c) cavaliba.com - data - pipeline.py


import hashlib
import operator
import re
import uuid
from datetime import datetime

import yaml
from app_data.data import Instance, get_instances
from app_data.fieldtypes.field_ipv4 import check_valid_ipv4, check_valid_ipv4_range
from app_data.models import DataInstance
from app_home.configuration import get_configuration
from app_ipam.common import align_to_subnet
from app_user.models import SireneGroup, SirenePermission, SireneUser
from dateutil.relativedelta import relativedelta
from django.conf import settings
from django.utils import timezone

# ---------------------------------------------------------------------
# Pipelines tasks
# ---------------------------------------------------------------------
# IN:
# OUT:
# RETURN:
#    None => run to next task
#    status => stop processing with this status


def task_discard(datadict, taskopt):
    return "discard"


def task_exit(datadict, taskopt):
    try:
        return taskopt[0]
    except Exception:
        return 'exit'

#

def task_field_noop(datadict, taskopt):
    return


def task_field_toint(datadict, taskopt):
    for v in taskopt:
        try:
            datadict[v] = int(datadict[v])
        except Exception:
            datadict[v] = None


def task_field_tofloat(datadict, taskopt):
    for v in taskopt:
        try:
            datadict[v] = float(datadict[v])
        except Exception:
            datadict[v] = None

def task_field_tostring(datadict, taskopt):
    for v in taskopt:
        try:
            datadict[v] = str(datadict[v])
        except Exception:
            datadict[v] = None


def task_field_nospace(datadict, taskopt):
    for v in taskopt:
        try:
            datadict[v] = ''.join(datadict[v].split())
        except Exception:
            pass


def task_field_set(datadict, taskopt):
    # field_set:  "fieldname", "value"
    # add / overwrite
    try:
        (v1, v2) = taskopt
        datadict[v1] = v2
    except Exception:
        pass


def task_field_copy(datadict, taskopt):
    # field_copy: "field1", "field2"
    # copy 1 to 2
    try:
        (v1,v2) = taskopt
        datadict[v2] = datadict[v1]
    except Exception:
        pass


def task_field_rename(datadict, taskopt):
    # field_rename: ["field1", "field2"]
    # rename 1 to 2
    try:
        (v1,v2) = taskopt
        datadict[v2] = datadict[v1]
        datadict.pop(v1)
    except Exception:
        pass

def task_field_delete(datadict,taskopt):
    for v in taskopt:
        try:
            datadict.pop(v, None)
        except Exception:
            pass


def task_field_keep(datadict,taskopt):
    purge=[]
    try:
        for f in datadict:
            if f in ["classname", "keyname"]:
                continue
            if f not in taskopt:
                purge.append(f)
        for f in purge:
            datadict.pop(f)
    except Exception:
        pass


def task_field_lower(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = datadict[v].lower()
        except Exception:
            pass


def task_field_upper(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = datadict[v].upper()
        except Exception:
            pass


def task_field_date_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%Y-%m-%d') )
        except Exception:
            pass


def task_field_time_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%H:%M:%S') )
        except Exception:
            pass


def task_field_datetime_now(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str ( datetime.today().strftime('%Y-%m-%d %H:%M:%S') )
        except Exception:
            pass

def task_field_regexp_sub(datadict,taskopt):
    try:
        (field,pattern,replace) = taskopt
        datadict[field] = re.sub(pattern, replace, datadict[field])
    except Exception:
        pass

def task_field_uuid(datadict,taskopt):
    for v in taskopt:
        try:
            datadict[v] = str(uuid.uuid4())
        except Exception:
            pass

def task_field_merge(datadict,taskopt):
    # field_merge : ["field1", "field2", "field3"]
    # rename 1 + 2 => 3
    try:
        (v1,v2,v3) = taskopt
        datadict[v3] = datadict[v1] + datadict[v2]
    except Exception:
        pass

def task_field_append(datadict,taskopt):
    #  field, value => "field"+"value"
    try:
        (v1,v2) = taskopt
        datadict[v1] = f"{datadict[v1]}{v2}"
    except Exception:
        pass

def task_field_prepend(datadict,taskopt):
    #  field, value => "field"+"value"
    try:
        (v1,v2) = taskopt
        datadict[v1] = f"{v2}{datadict[v1]}"
    except Exception:
        pass

# 3.23.1
def task_field_md5(datadict,taskopt):
    # field_md5 : ["dst", "src1", "src2"]
    # dst = md5(src1 + src2 + ...)
    v1 = None
    try:
        v1 = taskopt[0]
        data = [str(datadict[v]) for v in taskopt[1:]   ]
        vstr = ''.join(data)
        sig = hashlib.md5()
        sig.update(vstr.encode('utf-8'))
        datadict[v1] = str(sig.hexdigest())
    except Exception:
        pass

# 3.24
def task_align_subnet4(datadict, taskopt):
    # align_subnet4: ["fieldname"]
    # aligns IP/mask to subnet boundaries
    # IN:  10.1.1.1/24
    # OUT: 10.1.1.0/24
    try:
        fieldname = taskopt[0]
        datadict[fieldname] = align_to_subnet(datadict[fieldname])
    except Exception:
        pass


def task_field_join(datadict, taskopt):
    # field_join: [CONDITION, separator, dst_field, src1, src2, ...]
    # join multiple fields into destination field using separator
    # Example: field_join: ['', '-', 'result', 'field1', 'field2']
    # Result: 'value1-value2'
    try:
        separator = taskopt[0]
        dst_field = taskopt[1]
        src_fields = taskopt[2:]

        values = [str(datadict[field]) for field in src_fields]
        datadict[dst_field] = separator.join(values)
    except Exception:
        pass


def task_field_truncate(datadict, taskopt):
    # field_truncate: [COND, field, length]
    # cuts field content to length chars
    try:
        (field, length) = taskopt
        datadict[field] = str(datadict[field])[:int(length)]
    except Exception:
        pass


def task_field_lookup(datadict, taskopt):
    # field_lookup: [CONDITION, dstfield, schemaname, srcfield]
    # Looks up an entry in schemaname by the value of srcfield.
    # Sets dstfield to the keyname of the found entry, or "" if not found.
    # schemaname: _user, _group, _role, _permission, or any DataInstance classname.
    try:
        dstfield   = taskopt[0]
        schemaname = taskopt[1]
        srcfield   = taskopt[2]
        value      = str(datadict[srcfield])
    except Exception:
        return

    found = ""
    try:
        if schemaname == "_user":
            obj = SireneUser.objects.filter(login=value).first()
            if obj:
                found = obj.login
        elif schemaname == "_group":
            obj = SireneGroup.objects.filter(keyname=value, is_role=False).first()
            if obj:
                found = obj.keyname
        elif schemaname == "_role":
            obj = SireneGroup.objects.filter(keyname=value, is_role=True).first()
            if obj:
                found = obj.keyname
        elif schemaname == "_permission":
            obj = SirenePermission.objects.filter(keyname=value).first()
            if obj:
                found = obj.keyname
        else:
            obj = DataInstance.objects.filter(classname=schemaname, keyname=value).first()
            if obj:
                found = obj.keyname
    except Exception:
        pass

    datadict[dstfield] = found




_DELTA_UNITS = {
    'second': 'seconds',
    'minute': 'minutes',
    'hour':   'hours',
    'day':    'days',
    'week':   'weeks',
    'month':  'months',
    'year':   'years',
}

def task_field_datetime(datadict, taskopt):
    # field_datetime: [COND, target_field, source, delta]
    # source : now() | field_name | "YYYY-MM-DD HH:MM:SS"
    # delta  : [+-]<int><unit>  (optional) ; unit = second/minute/hour/day/week/month/year
    #          or a field name containing such a string

    try:
        target_field = taskopt[0]
        source       = taskopt[1]
    except Exception:
        return

    delta_str = taskopt[2] if len(taskopt) > 2 else None

    # resolve source to a naive datetime
    try:
        if source == 'now()':
            dt = timezone.localtime(timezone.now()).replace(tzinfo=None)
        elif source in datadict:
            dt = datetime.fromisoformat(str(datadict[source]))
        else:
            dt = datetime.fromisoformat(str(source))
    except Exception:
        return

    # apply optional delta
    if delta_str:
        # resolve field reference
        if delta_str in datadict:
            delta_str = str(datadict[delta_str])

        m = re.match(r'^([+-])(\d+)(second|minute|hour|day|week|month|year)s?$', delta_str.strip())
        if m:
            sign   = 1 if m.group(1) == '+' else -1
            amount = int(m.group(2)) * sign
            unit   = _DELTA_UNITS[m.group(3)]
            dt     = dt + relativedelta(**{unit: amount})

    datadict[target_field] = dt.strftime('%Y-%m-%d %H:%M:%S')


# Conditions

def condition_field_match(datadict, taskopt):
    # fieldname, pattern
    try:
        (field, pattern) = taskopt
        x = re.search(pattern, datadict[field])
        if x:
            return True
    except Exception:
        pass
    return False


def _coerce(v1, v2):
    ''' Try to coerce both values to int, then float, then keep as str. '''
    for cast in (int, float, str):
        try:
            return cast(v1), cast(v2)
        except (ValueError, TypeError):
            pass
    return str(v1), str(v2)


def condition_field_compare(op, datadict, taskopt):
    # taskopt: [field1, field2]
    try:
        (f1, f2) = taskopt
        v1, v2 = _coerce(datadict[f1], datadict[f2])
        return op(v1, v2)
    except Exception:
        return False


def condition_ge(datadict, taskopt):
    return condition_field_compare(operator.ge, datadict, taskopt)

def condition_gt(datadict, taskopt):
    return condition_field_compare(operator.gt, datadict, taskopt)

def condition_lt(datadict, taskopt):
    return condition_field_compare(operator.lt, datadict, taskopt)

def condition_le(datadict, taskopt):
    return condition_field_compare(operator.le, datadict, taskopt)

def condition_equal(datadict, taskopt):
    return condition_field_compare(operator.eq, datadict, taskopt)

def condition_not_equal(datadict, taskopt):
    return condition_field_compare(operator.ne, datadict, taskopt)

def condition_exists(datadict, taskopt):
    # exists: [COND, exists, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    return fieldname in datadict

def condition_empty(datadict, taskopt):
    # empty: [COND, empty, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return str(datadict[fieldname]).strip() == ""

def condition_not_empty(datadict, taskopt):
    # not_empty: [COND, not_empty, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return str(datadict[fieldname]).strip() != ""

def condition_is_int(datadict, taskopt):
    # is_int: [COND, is_int, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    try:
        int(datadict[fieldname])
        return True
    except (ValueError, TypeError):
        return False

def condition_is_float(datadict, taskopt):
    # is_float: [COND, is_float, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    try:
        float(datadict[fieldname])
        return True
    except (ValueError, TypeError):
        return False

def condition_is_boolean(datadict, taskopt):
    # is_boolean: [COND, is_boolean, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    BOOLEAN_LIST = ('on', 'On', 'ON', True, False, 'yes', 'Yes', 'YES', 'no', 'No', 'NO',
                    'True', 'true', 'TRUE', 'False', 'false', 'FALSE',
                    '1', '0', 1, 0)
    return datadict[fieldname] in BOOLEAN_LIST

def condition_is_date(datadict, taskopt):
    # is_date: [COND, is_date, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    try:
        datetime.strptime(str(datadict[fieldname]), "%Y-%m-%d")
        return True
    except (ValueError, TypeError):
        return False

def condition_is_time(datadict, taskopt):
    # is_time: [COND, is_time, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    try:
        datetime.strptime(str(datadict[fieldname]), "%H:%M:%S")
        return True
    except (ValueError, TypeError):
        return False

def condition_is_datetime(datadict, taskopt):
    # is_datetime: [COND, is_datetime, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    try:
        datetime.strptime(str(datadict[fieldname]), "%Y-%m-%d %H:%M:%S")
        return True
    except (ValueError, TypeError):
        return False

def condition_is_ip4(datadict, taskopt):
    # is_ip4: [COND, is_ip4, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return check_valid_ipv4(str(datadict[fieldname]))

def condition_is_subnet(datadict, taskopt):
    # is_subnet: [COND, is_subnet, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return check_valid_ipv4_range(str(datadict[fieldname]))

def condition_is_true(datadict, taskopt):
    # is_true: [COND, is_true, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return datadict[fieldname] in settings.TRUE_LIST

def condition_is_false(datadict, taskopt):
    # is_false: [COND, is_false, field]
    if not taskopt:
        return False
    fieldname = taskopt[0]
    if fieldname not in datadict:
        return False
    return datadict[fieldname] not in settings.TRUE_LIST

# ---------------------------------------------------------------------
# Pipeline MAP
# ---------------------------------------------------------------------


PIPELINE_TASK = {

    # condition checkers
    "field_match":  condition_field_match,
    "ge":           condition_ge,
    "gt":           condition_gt,
    "lt":           condition_lt,
    "le":           condition_le,
    "eq":           condition_equal,
    "ne":           condition_not_equal,
    "exists":       condition_exists,
    "empty":        condition_empty,
    "not_empty":    condition_not_empty,
    "is_ip4":       condition_is_ip4,
    "is_subnet":    condition_is_subnet,
    "is_int":       condition_is_int,
    "is_float":     condition_is_float,
    "is_boolean":   condition_is_boolean,
    "is_date":      condition_is_date,
    "is_time":      condition_is_time,
    "is_datetime":  condition_is_datetime,
    "is_true":      condition_is_true,
    "is_false":     condition_is_false,

    # exit
    "discard": task_discard,
    "skip": task_discard,               # alias
    "exit": task_exit,

    # transformer
    "field_noop": task_field_noop,
    "noop": task_field_noop,             # alias
    "field_toint": task_field_toint,
    "field_tofloat": task_field_tofloat,
    "field_nospace": task_field_nospace,
    "field_set": task_field_set,
    "field_copy": task_field_copy,
    "field_rename": task_field_rename,
    "field_delete": task_field_delete,
    "field_keep": task_field_keep,
    "field_lower": task_field_lower,
    "field_upper": task_field_upper,
    "field_date_now": task_field_date_now,
    "field_datetime_now": task_field_datetime_now,
    "field_time_now": task_field_time_now,
    "field_regexp_sub": task_field_regexp_sub,
    "field_uuid": task_field_uuid,
    "field_merge": task_field_merge,
    "field_tostring": task_field_tostring,
    "field_append": task_field_append,
    "field_prepend": task_field_prepend,
    "field_md5": task_field_md5,
    "align_subnet4": task_align_subnet4,
    "field_join": task_field_join,
    "field_datetime": task_field_datetime,
    "field_truncate": task_field_truncate,
    "field_lookup": task_field_lookup,

}




def check_condition(datadict, opts):
    # taskopt = task_condition, opt1, opt2, ...
    # returns True / False

    try:
        operator = opts[0]
        taskopt = opts[1:]
    except Exception:
        return False

    cfunc = PIPELINE_TASK.get(operator, None)
    if not cfunc:
        return False

    r = cfunc(datadict, taskopt)

    return r




# -------------------------------------------
# new 3.33 - Pipeline class
# -------------------------------------------

class Pipeline:

    def __init__(self):

        self.id = None             # Instance() object DB ID
        self.keyname = None        # Instance() keyname

        self.csv_delimiter = get_configuration(appname="home", keyname="CSV_DELIMITER")
        self.encoding = 'utf-8'
        self.classname = None      # target classname   ; deprecated 3.33
        self.keyfield = None       # field containing keyname for target classanme ; deprecated 3.33
        self.run_permission = None
        self.tasks = []            # list of tasks
        # tasks:
        # - taskname: [condition, data1, data2, ...]
        # - taskname: [condition, data1, data2, ...]
        # ...
        # with each line being one of :
        # - set_condition: [CONDITION_NAME, check, opt2, ...]
        # - operator: [ '[!]CONDITION_NAME', opt1, opt2, ...]


    @classmethod
    def list(cls, is_enabled=None):

        return get_instances(classname="_pipeline", is_enabled=is_enabled)


    def print(self):
        print(f"Pipeline: {self.keyname} (id={self.id})")
        print(f"  classname   : {self.classname}")
        print(f"  keyfield    : {self.keyfield}")
        print(f"  csv_delimiter: {self.csv_delimiter}")
        print(f"  encoding    : {self.encoding}")
        print(f"  tasks ({len(self.tasks)}):")
        for task in self.tasks:
            print(f"    {task}")

    @classmethod
    def from_name(cls, keyname):

        instance = Instance.from_keyname(classname="_pipeline", keyname=keyname)
        if not instance:
            return None

        return cls.from_instance(instance)


    @classmethod
    def from_id(cls, id):

        instance = Instance.from_id(id=id)
        if not instance:
            return None

        return cls.from_instance(instance)


    @classmethod
    def from_instance(cls, instance):

        if not instance:
            return None

        obj = cls()
        obj.id = instance.id
        obj.keyname = instance.keyname

        content = {}
        try:
            raw = instance.fields["content"].value[0]
            content = yaml.safe_load(raw) or {}
        except Exception:
            pass

        obj.classname = content.get("classname", None)
        obj.keyfield = content.get("keyfield", None)
        obj.tasks = content.get("tasks", [])

        try:
            obj.run_permission = instance.fields["run_permission"].value[0]
        except Exception:
            pass

        csv_delimiter = content.get("csv_delimiter", None)
        if csv_delimiter:
            obj.csv_delimiter = csv_delimiter

        encoding = content.get("encoding", None)
        if encoding:
            obj.encoding = encoding

        return obj


    def apply(self, datadict):
        '''
        Apply pipeline tasks to a single datadict.
        Returns None (continue) or "discard"/"exit_message".
        '''

        result = None
        condition_truelist = []

        for task in self.tasks:

            try:
                taskname, attributes = next(iter(task.items()))
            except Exception:
                continue

            try:
                condition = attributes[0]
                taskopt = attributes[1:]
            except Exception:
                continue

            # special task = set_condition ?
            if taskname == "set_condition":
                r = check_condition(datadict, taskopt)
                if r:
                    if condition not in condition_truelist:
                        condition_truelist.append(condition)
                else:
                    if condition in condition_truelist:
                        condition_truelist.remove(condition)
                continue

            # regular operator : check condition first
            if len(condition) > 0:
                if condition[0] != '!':
                    if condition not in condition_truelist:
                        continue
                else:
                    if len(condition) > 1:
                        if condition[1:] in condition_truelist:
                            continue

            # apply operator
            tfunc = PIPELINE_TASK.get(taskname, None)
            if not tfunc:
                continue
            result = tfunc(datadict, taskopt)
            if result:
                break

        return result




    # def apply_multi(self, datalist):
    #     '''
    #     Apply pipeline to a list of dicts.
    #     Returns a new list with modified dicts (discarded entries excluded).
    #     '''

    #     if not datalist:
    #         return []

    #     result = []
    #     for datadict in datalist:
    #         status = self.apply(datadict)
    #         if status != "discard":
    #             result.append(datadict)

    #     return result
