import { Schema, Type as ArrowType } from 'apache-arrow';
import { unsupported } from 'graffe-shared/src/lib/devflow';
import { isObject } from 'graffe-shared/src/lib/js';
import { DataFieldData, DataFieldOverrideData, LogicalType } from 'graffe-shared/src/models/dataField';
import { clone, getStorageModel } from 'graffe-shared/src/universe/utils';
import { keyBy } from 'lodash-es';
import { signal } from 'ufti';
import { Signal } from "ufti";
import { reportFailed, reportWarning } from '../types/notifications';

export function isLogicalFloat(t: LogicalType) {
  return t === LogicalType.Float || t === LogicalType.Float32 || t === LogicalType.Float64;
}

export function isLogicalInt(t: LogicalType) {
  return t === LogicalType.Int || t === LogicalType.Int8 || t === LogicalType.Int16 || t === LogicalType.Int32 || t === LogicalType.Int64;
}

export function isLogicalNumeric(t: LogicalType) {
  return isLogicalFloat(t) || isLogicalInt(t);
}

// See apache-arrow/src/enum.ts for this, excerp:
// -----------------
// /**
//  * Main data type enumeration.
//  *
//  * Data types in this library are all *logical*. They can be expressed as
//  * either a primitive physical type (bytes or bits of some fixed size), a
//  * nested type consisting of other data types, or another data type (e.g. a
//  * timestamp encoded as an int64).
//  *
//  * **Note**: Only enum values 0-17 (NONE through Map) are written to an Arrow
//  * IPC payload.
//  *
//  * The rest of the values are specified here so TypeScript can narrow the type
//  * signatures further beyond the base Arrow Types. The Arrow DataTypes include
//  * metadata like `bitWidth` that impact the type signatures of the values we
//  * accept and return.
//  *
//  * For example, the `Int8Vector` reads 1-byte numbers from an `Int8Array`, an
//  * `Int32Vector` reads a 4-byte number from an `Int32Array`, and an `Int64Vector`
//  * reads a pair of 4-byte lo, hi 32-bit integers as a zero-copy slice from the
//  * underlying `Int32Array`.
//  *
//  * Library consumers benefit by knowing the narrowest type, since we can ensure
//  * the types across all public methods are propagated, and never bail to `any`.
//  * These values are _never_ used at runtime, and they will _never_ be written
//  * to the flatbuffers metadata of serialized Arrow IPC payloads.
//  */
// export enum Type {
//   NONE = 0, /** The default placeholder type */
//   Null = 1, /** A NULL type having no physical storage */
//   Int = 2, /** Signed or unsigned 8, 16, 32, or 64-bit little-endian integer */
//   Float = 3, /** 2, 4, or 8-byte floating point value */
//   Binary = 4, /** Variable-length bytes (no guarantee of UTF8-ness) */
//   Utf8 = 5, /** UTF8 variable-length string as List<Char> */
//   Bool = 6, /** Boolean as 1 bit, LSB bit-packed ordering */
//   Decimal = 7, /** Precision-and-scale-based decimal type. Storage type depends on the parameters. */
//   Date = 8, /** int32_t days or int64_t milliseconds since the UNIX epoch */
//   Time = 9, /** Time as signed 32 or 64-bit integer, representing either seconds, milliseconds, microseconds, or nanoseconds since midnight since midnight */
//   Timestamp = 10, /** Exact timestamp encoded with int64 since UNIX epoch (Default unit millisecond) */
//   Interval = 11, /** YEAR_MONTH or DAY_TIME interval in SQL style */
//   List = 12, /** A list of some logical data type */
//   Struct = 13, /** Struct of logical types */
//   Union = 14, /** Union of logical types */
//   FixedSizeBinary = 15, /** Fixed-size binary. Each value occupies the same number of bytes */
//   FixedSizeList = 16, /** Fixed-size list. Each value occupies the same number of bytes */
//   Map = 17, /** Map of named logical types */

//   Dictionary = -1, /** Dictionary aka Category type */
//   Int8 = -2,
//   Int16 = -3,
//   Int32 = -4,
//   Int64 = -5,
//   Uint8 = -6,
//   Uint16 = -7,
//   Uint32 = -8,
//   Uint64 = -9,
//   Float16 = -10,
//   Float32 = -11,
//   Float64 = -12,
//   DateDay = -13,
//   DateMillisecond = -14,
//   TimestampSecond = -15,
//   TimestampMillisecond = -16,
//   TimestampMicrosecond = -17,
//   TimestampNanosecond = -18,
//   TimeSecond = -19,
//   TimeMillisecond = -20,
//   TimeMicrosecond = -21,
//   TimeNanosecond = -22,
//   DenseUnion = -23,
//   SparseUnion = -24,
//   IntervalDayTime = -25,
//   IntervalYearMonth = -26,
// }
// ------------------------------

// How logical types map to arrow types
export const logicalToArrowTypes: Record<LogicalType, ArrowType> = {
  // Primitive types
  [LogicalType.Int8]: ArrowType.Int8,
  [LogicalType.Int16]: ArrowType.Int16,
  [LogicalType.Int32]: ArrowType.Int32,
  [LogicalType.Int64]: ArrowType.Int64,

  [LogicalType.Decimal]: ArrowType.Decimal,

  [LogicalType.Float64]: ArrowType.Float64,
  [LogicalType.Utf8]: ArrowType.Utf8,
  [LogicalType.Bool]: ArrowType.Bool,
  
  // Logical types
  [LogicalType.JsonArray]: ArrowType.Utf8,
  [LogicalType.JsonObject]: ArrowType.Utf8,
  
  // Millisecond timestamp
  [LogicalType.Timestamp]: ArrowType.Timestamp,

  // Time since midnight
  [LogicalType.Time]: ArrowType.Time,

  [LogicalType.Date]: ArrowType.Date,
}

// TODO: blocker - requires full implementation both lists should have mappings which make sense
export const arrowToLogicalType: Record<ArrowType, LogicalType> = {
  [ArrowType.Int8]: LogicalType.Int8,
  [ArrowType.Int16]: LogicalType.Int16,
  [ArrowType.Int32]: LogicalType.Int32,
  [ArrowType.Int64]: LogicalType.Int64,
  [ArrowType.Int]: LogicalType.Int,

  [ArrowType.Decimal]: LogicalType.Decimal,
  
  [ArrowType.Float32]: LogicalType.Float64,
  [ArrowType.Float64]: LogicalType.Float64,
  [ArrowType.Float]: LogicalType.Float,

  [ArrowType.Utf8]: LogicalType.Utf8,
  [ArrowType.Bool]: LogicalType.Bool,

  [ArrowType.Timestamp]: LogicalType.Timestamp,

  [ArrowType.Time]: LogicalType.Time,

  [ArrowType.Date]: LogicalType.Date,
}

// The system follows this behavior:
// -  When you fetch and field definitions are missing, then inferring is done on the first query and fields are set based on the results.
// -  When new fields show up, they are inferred, or they use an overridden value.

// JSON-serializable structure which holds the field typings
export class DataField {
  // Field name in the data set
  name: Signal<string>;

  // Logical data type
  type: Signal<LogicalType>;

  // More info on the field
  info: Signal<string>;

  // Get the arrow storage type
  get arrowType() : ArrowType {
    return DataField.arrowTypeFor(this.type.v);
  }

  static fromData(data: DataFieldData) : DataField {
    if(typeof data.name !== 'string') {
      debugger; // Debugger, shouldn't happen
    }
    
    const df = new DataField();
    df.name = signal(data.name);
    df.type = signal(data.type);
    df.info = signal(data.info);

    return df;
  }

  static fromField(f: DataField) : DataField {
    return DataField.fromData(clone(getStorageModel(f)));
  }

  static arrowTypeFor(type: LogicalType) : ArrowType {
    return logicalToArrowTypes[type];
  }
}

// JSON-serializable structure which holds the field typings
export class DataFieldOverride {
  // Field name in the data set
  name: Signal<string>;

  // Logical data type
  type: Signal<LogicalType>;

  // Renamed field
  alias: Signal<string>;

  // Drop if we hide the field in the output
  drop: Signal<boolean>;

  // More info
  info: Signal<string>;

  // Get the arrow storage type
  get arrowType() : ArrowType {
    if(this.type.v != null) {
      return DataField.arrowTypeFor(this.type.v);
    }
  }

  static fromData(data: DataFieldOverrideData) : DataFieldOverride {
    if(typeof data.name !== 'string') {
      debugger; // Debugger, shouldn't happen
    }
    
    const df = new DataFieldOverride();
    df.name = signal(data.name);
    df.type = signal(data.type);
    df.alias = signal(data.alias);
    df.drop = signal(data.drop);
    df.info = signal(data.info);
    
    return df;
  }

  static fromFieldOverride(f: DataFieldOverride) : DataFieldOverride {
    return DataFieldOverride.fromData(clone(getStorageModel(f)));
  }
}

const typeOfLogicalTypes: Record<string, LogicalType> = {
  'string': LogicalType.Utf8,
  'boolean': LogicalType.Bool,
  'number': LogicalType.Float64,
}

// TODO: this function is O(N) with size of dataset - requires sampling work to move it closer to O(1).
// TODO: instead of sample count something smarter, a confidence score, if changes don't move a lot, stop sampling a field.
export function inferDataFields(data: JSONArray, maxSamples?: number = 500) : DataFieldData[] {
  // Search top level keys in data
  const fields: Record<string, DataFieldData> = {};
  const fieldNamesIdx = {};
  const fieldNames = [];

  // Loop over the array
  const sampleSize = (maxSamples == null || maxSamples > data.length ? data.length : maxSamples);
  
  // Find all the keys to know all columns
  for(let i = 0; i < sampleSize; i++) {
    const names = Object.keys(data[i]);
    for(let name of names) {
      if(!fieldNamesIdx[name]) {
        fieldNamesIdx[name] = true;
        fieldNames.push(name);
        fields[name] = { 
          name,
          type: LogicalType.None,
        }
      }
    }
  }

  // Loop over rows
  for(let i = 0; i < sampleSize; i++) {
    for(let name of fieldNames) {
      const field = fields[name];
      const val = data[i][name];

      // When no data, continue
      if(val == null) continue;

      // TODO: add support for multiple type tracking (mixed types in data set)
      
      // Guess the data type based on the first value
      if(field.type === LogicalType.None) {
        if(typeOfLogicalTypes[typeof val]) {
          field.type = typeOfLogicalTypes[typeof val];
        } else {
          if (Array.isArray(val)) {
            field.type = LogicalType.JsonArray;
          } else if (isObject(val)) {
            field.type = LogicalType.JsonObject;
          } else {
            throw new Error(`unsupported data name:${name} val:${val}`);
          }
        }
      }
    }
  }

  // Return a list in predictable order
  return fieldNames.map(name => fields[name]);
}

export function inferDataFieldsForArrowSchema(schema: Schema) : DataFieldData[] {
  return schema.fields.map(f => {
    const logicalType = arrowToLogicalType[f.typeId];
    if(!logicalType) {
      throw unsupported('Unknown data type: '+f.type.toString());
    }

    // TODO: check metadata

    return {
      name: f.name,
      type: logicalType,
    }
  })
}

export function mergeFields(iFields: DataField[], oFields: DataFieldOverride[]) : DataField[] {
  // If nothing changed, return original set
  if(oFields == null) {
    return [...iFields];
  }

  // Update the fields with the expected behavior: apply or infer.
  const fieldsIdx = (iFields != null ? keyBy(iFields, f => f.name.v) : {});

  // Check if we have invalid output fields, which cannot be found in the input set.
  // This means something breaking changed in the transform script.
  // We throw an error here, such the wrapping code can handle it. 
  // Ideally we list all fields in the output field view and allow selection of the source field in the select input name.
  if(oFields.find(d => !fieldsIdx[d.name.v])) {
    throw new Error('InvalidOutputFields: fields changed. Reset the fields to fix.');
  }

  // Construct output fields
  return oFields
    .filter(d => d.drop.v !== true)
    .map((oField, i) => {
      const iField = fieldsIdx[oField.name.v];
      const name = oField.alias.v ?? iField.name.v;
      const type = oField.type.v ?? iField.type.v;
      const info = oField.info.v ?? iField.info.v;

      return DataField.fromData({ 
        name, 
        type, 
        info
      });
    })
    .filter(d => d); // Remove broken fields
}
