const probabilisticSampling = (modelOutput) => {
  var randomNumber
  var selectedAction
  var counter
  const selectedActionsMatrix = modelOutput.map((currentActionProbabilities) => {
    randomNumber = Math.random()
    selectedAction = 0
    counter = 0
    while (selectedAction < currentActionProbabilities.length - 1) {
      counter += currentActionProbabilities[selectedAction]
      if (randomNumber <= counter) {
        break
      }
      else {
        selectedAction++
      }
    }
    return selectedAction
  })
  return selectedActionsMatrix
}

const argmax = (array) => {
  return array.map((x, i) => [x, i]).reduce((r, a) => (a[0] > r[0] ? a : r))[1]
}

const argmaxPolicy = (modelOutput) => {
  const selectedActionsMatrix = modelOutput.map((currentModelOutputs) => {
    return argmax(currentModelOutputs)
  })
  return selectedActionsMatrix
}

const epsilonGreedy = (modelOutput) => {
  const epsilon = 0.1
  const selectedActionsMatrix = modelOutput.map((currentModelOutputs) => {
    if (Math.random() < epsilon) {
      return Math.min(Math.floor(Math.random() * currentModelOutputs.length), currentModelOutputs.length - 1)
    }
    else {
      return argmax(currentModelOutputs)
    }    
  })
  return selectedActionsMatrix
}

const policyMapping = {
  probabilisticSampling,
  epsilonGreedy,
  argmaxPolicy
}

export {
  policyMapping
}