#!/usr/bin/env ruby

$LOAD_PATH.unshift File.expand_path('../lib', __dir__)
$stdout.sync = true

require 'chronic_duration'
require 'down/http'
require 'http'
require 'gpt_logger'
require 'gpt_test_data'
require 'import_project'
require 'optimist'
require 'pathname'
require 'rainbow'
require 'time'
require 'uri'

k6_dir = Pathname.new(File.expand_path('../k6', __dir__)).relative_path_from(Dir.pwd)

gpt_data_version = '1.4.0'
puts Rainbow("GPT Data Generator v#{gpt_data_version} - opinionated test data for the GitLab Performance Tool").color(230, 83, 40)

opts = Optimist.options do
  banner "\nUsage: generate-gpt-data [options]"
  banner "\nGenerates opinionated test data for the GitLab Performance Tool. Data generated can be 'horizontal' (many groups and projects) and or 'vertical' (large project imports)."
  banner "\nOptions:"
  opt :environment, "Name of Environment Config file in environments directory that will be used for test data generation. Alternative filepath can also be given.", type: :string
  opt :environment_url, "Full URL for the environment to import to.", short: :none, type: :string, default: ENV['ENVIRONMENT_URL'] || nil
  opt :root_group, "Root group for GPT data.", short: :none, type: :string
  opt :horizontal, "Generate horizontal GPT data with multiple subgroups and projects.", short: :none, type: :boolean, default: true
  opt :group, "Group name that the subgroups and projects will be generated under.", short: :none, type: :string
  opt :subgroup_prefix, "Prefix that the subgroups will be generated with.", short: :none, type: :string
  opt :subgroups, "Number of subgroups to create", type: :integer
  opt :project_prefix, "Prefix that the projects will be generated with.", short: :none, type: :string
  opt :projects, "Number of projects to create in each subgroup", type: :integer
  opt :vertical, "Generate vertical GPT data with large projects", short: :none, type: :boolean, default: true
  opt :vert_group, "Group name that the vertical data will be generated to.", short: :none, type: :string
  opt :large_project_name, "Name for large project to import.", short: :none, type: :string
  opt :large_project_tarball, "Location of custom large project tarball to import. Can be local or remote.", short: :none, type: :string
  opt :storage_nodes, "Repository storages that will be used to import vertical data.", short: :none, type: :strings
  opt :vulnerability_data, "Creates a separate group and projects for creating vulnerabilities data", short: :none, type: :boolean, default: false
  opt :unattended, "Skip all user prompts and run through tests automatically.", type: :flag, default: false
  opt :force, "Alternative flag for unattended. Skip all user prompts and run through generation automatically.", type: :flag, default: false
  opt :clean_up, "Clean up GPT data. Defaults to all data but can be customised with the --clean-up-mode param.", type: :flag, default: false
  opt :clean_up_mode, "Specify 'vertical', 'horizontal' or 'vulnerabilities' to clean up only Vertical, Horizontal or Vulnerabilities GPT data. Requires the --clean-up param to also be set.", type: :string, default: 'none'
  opt :skip_project_validation, "Skip large project metadata validation", type: :flag, default: false
  opt :max_wait_for_delete, "Maximum wait time(seconds) for groups and projects to be deleted", type: :integer, default: 300
  opt :help, 'Show help message'
  banner "\nEnvironment Variables:"
  banner "  ACCESS_TOKEN             A valid GitLab Personal Access Token for the specified environment. The token should have admin access and all permissions set.  (Default: nil)"
  banner "\nExamples:"
  banner "  Generate horizontal and vertical data using 10k.json environment file:"
  banner "    #{ENV['GPT_DOCKER'] ? 'docker run -it gitlab/gpt-data-generator' : $PROGRAM_NAME} --environment 10k.json"
  banner "  Generate only horizontal using 10k.json environment file:"
  banner "    #{ENV['GPT_DOCKER'] ? 'docker run -it gitlab/gpt-data-generator' : $PROGRAM_NAME} --environment 10k.json --horizontal --no-vertical"
  banner "  Generate only vertical data using 10k.json environment file:"
  banner "    #{ENV['GPT_DOCKER'] ? 'docker run -it gitlab/gpt-data-generator' : $PROGRAM_NAME} --environment 10k.json --no-horizontal --vertical"
  banner "  Generate only horizontal data with 10 subgroups and 100 projects in each:"
  banner "    #{ENV['GPT_DOCKER'] ? 'docker run -it gitlab/gpt-data-generator' : $PROGRAM_NAME} --environment_url 10k.testbed.gitlab.net --subgroups 10 --projects 100 --no-vertical"
  banner "  Generate only vertical data using custom project tarball path:"
  banner "    #{ENV['GPT_DOCKER'] ? 'docker run -it gitlab/gpt-data-generator' : $PROGRAM_NAME} --environment 10k.json --no-horizontal --vertical --large-project-tarball=/home/user/test-project.tar.gz"
end

raise Rainbow("Environment Variable ACCESS_TOKEN has not been set. Unable to create required test data. Exiting...").red unless ENV['ACCESS_TOKEN']

# Variables
if opts[:environment]
  env_file = Dir.glob([opts[:environment], "#{ENV['GPT_DOCKER_CONFIG_DIR'] || ''}/environments/#{opts[:environment]}", "#{k6_dir}/#{opts[:environment]}", "#{k6_dir}/config/environments/#{opts[:environment]}"])[0]
  raise Rainbow("Environment file is not found: #{opts[:environment]}.").red if env_file.nil? || !File.exist?(env_file)

  env_file_vars = JSON.parse(File.read(env_file))

  # General
  opts[:environment_url] ||= env_file_vars['environment']['url']
  opts[:root_group] ||= env_file_vars['gpt_data']['root_group']
  opts[:storage_nodes] ||= env_file_vars['environment']['storage_nodes']

  # Horizontal
  opts[:group] ||= env_file_vars['gpt_data']['many_groups_and_projects']['group']
  opts[:subgroup_prefix] ||= env_file_vars['gpt_data']['many_groups_and_projects']['subgroup_prefix']
  opts[:subgroups] ||= env_file_vars['gpt_data']['many_groups_and_projects']['subgroups']
  opts[:project_prefix] ||= env_file_vars['gpt_data']['many_groups_and_projects']['project_prefix']
  opts[:projects] ||= env_file_vars['gpt_data']['many_groups_and_projects']['projects']

  # Vulnerabilities data
  opts[:vulnerabilities_perf] ||= env_file_vars['gpt_data'].key?('vulnerabilities_projects')
  opts[:vulnerabilities_group] ||= env_file_vars['gpt_data']['vulnerabilities_projects']['group'] if opts[:vulnerabilities_perf]
  opts[:vulnerabilities_projects_prefix] ||= env_file_vars['gpt_data']['vulnerabilities_projects']['project_prefix'] if opts[:vulnerabilities_perf]
  opts[:vulnerabilities_projects_count] ||= env_file_vars['gpt_data']['vulnerabilities_projects']['projects'] if opts[:vulnerabilities_perf]
  opts[:vulnerabilities_count] ||= env_file_vars['gpt_data']['vulnerabilities_projects']['vulnerabilities_count'] if opts[:vulnerabilities_perf]

  # Vertical
  opts[:vert_group] ||= env_file_vars['gpt_data']['large_projects']['group']
  large_project_file_name = env_file_vars['gpt_data']['large_projects']['project']

  large_projects_data_file = Dir.glob(["#{ENV['GPT_DOCKER_CONFIG_DIR'] || ''}/projects/#{large_project_file_name}.json", "#{k6_dir}/config/projects/#{large_project_file_name}.json", large_project_file_name])[0]
  raise Rainbow("Project Config file '#{large_project_file_name}' not found as given or in default folder. Exiting...").red if large_projects_data_file.nil? || !File.exist?(large_projects_data_file)

  large_projects_data = JSON.parse(File.read(large_projects_data_file))
  opts[:large_project_name] ||= large_projects_data['name']
  opts[:large_project_metadata] ||= large_projects_data['metadata']
end

if opts.except(:large_project_tarball).values.any?(&:nil?)
  empty_options = opts.except(:large_project_tarball).filter_map { |opt, value| opt if value.nil? }
  raise "These GPT Data Generator option(s) are not defined: '#{empty_options.join(', ')}'. Exiting..."
end

# GPT logger setup
start_time = Time.now
results_home = ENV['GPT_DOCKER_RESULTS_DIR'] || Pathname.new(File.expand_path('../results', __dir__)).relative_path_from(Dir.pwd)
FileUtils.mkdir_p(results_home)
results_file_prefix = "#{URI(opts[:environment_url]).host}_#{start_time.strftime('%Y-%m-%d_%H%M%S')}"
ENV['GPT_LOGGER_PATH'] = "#{results_home}/generate-gpt-data_#{results_file_prefix}.log"
unattended = opts[:unattended] || opts[:force]

GPTCommon.show_warning_prompt("The GPT Data Generator will inject the data into the specified group `#{opts[:root_group]}` with Public visibility on #{opts[:environment_url]}. Note that this may take some time.") unless unattended || opts[:clean_up]

# Generate opinionated test data
begin
  gpt_test_data = GPTTestData.new(gpt_data_version:, unattended:, env_url: opts[:environment_url], storage_nodes: opts[:storage_nodes], max_wait_for_delete: opts[:max_wait_for_delete], skip_project_validation: opts[:skip_project_validation])

  if opts[:clean_up]
    root_group = gpt_test_data.check_group_exists(grp_path: opts[:root_group])
    abort('No Root group found') unless root_group

    group_to_delete = case opts[:clean_up_mode].downcase
                      when 'horizontal'
                        gpt_test_data.check_group_exists(grp_path: "#{root_group['full_path']}/#{opts[:group]}")
                      when 'vertical'
                        gpt_test_data.check_group_exists(grp_path: "#{root_group['full_path']}/#{opts[:vert_group]}")
                      when 'vulnerabilities'
                        gpt_test_data.check_group_exists(grp_path: "#{root_group['full_path']}/#{opts[:vulnerabilities_group]}")
                      else
                        top_level = true
                        root_group
                      end

    unless group_to_delete
      GPTLogger.logger.warn Rainbow('No group found. Exiting...').yellow
      exit
    end

    GPTCommon.show_warning_prompt("\nThe GPT Data Generator will clean up the data it previously generated in '#{group_to_delete['full_path']}' group on the environment.") unless unattended
    GPTCommon.show_warning_prompt("\n'#{group_to_delete['full_path']}' group will be marked for deletion as part of the clean up. To remove group immediately, follow https://docs.gitlab.com/ee/user/group/#remove-a-group-immediately after clean up command.") if gpt_test_data.permanent_delete_available? && !unattended

    gpt_test_data.delete_group(group: group_to_delete, top_level:)
    GPTLogger.logger.info Rainbow("\n'#{group_to_delete['full_path']}' group data clean up completed successfully!").green
    exit
  end

  gpt_test_data.check_users_with_same_name_as_group(grp_path: opts[:root_group])

  if opts[:vulnerability_data]
    raise Rainbow("Secure projects and group not defined in ENV file in k6/config/environments..exiting").red unless opts[:vulnerabilities_perf]

    root_group = gpt_test_data.create_group(group_name: opts[:root_group])
    vulnerabilities_group = [gpt_test_data.create_group(group_name: opts[:vulnerabilities_group], parent_group: root_group)]
    projects = gpt_test_data.create_projects(project_prefix: opts[:vulnerabilities_projects_prefix], subgroups: vulnerabilities_group, projects_count: opts[:vulnerabilities_projects_count])
    abort(Rainbow("Ultimate license not found in #{@env_url} gitlab instance, exiting").yellow) unless gpt_test_data.check_gitlab_ultimate?
    gpt_test_data.check_vuln_api_supported
    gpt_test_data.create_vulnerability_test_data(vulnerabilities_count: opts[:vulnerabilities_count], projects:)
  end

  if opts[:horizontal]
    horizontal_start_time = Time.now
    GPTCommon.show_warning_prompt("\nFor the creation of `horizontal` data with large projects, the GPT Data Generator will need to change the Repository Storages setting on the target GitLab environment. This is to facilitate the creation of numerous Groups and Projects evenly across each Storage node. As such, it will change this setting to point to all nodes as set in the `storage_nodes` option and then create the data.\n\nWhile the Generator is doing this any other projects created or imported during this time will be stored on one of these nodes randomly.\nThe original setting will be restored after the the tool is finished.") unless unattended
    root_group = gpt_test_data.create_group(group_name: opts[:root_group])
    many_projects_group = gpt_test_data.create_group(group_name: opts[:group], parent_group: root_group)
    gpt_test_data.create_horizontal_test_data(root_group:, parent_group: many_projects_group, subgroups_count: opts[:subgroups], subgroup_prefix: opts[:subgroup_prefix], projects_count: opts[:projects], project_prefix: opts[:project_prefix])
    horizontal_run_time = ChronicDuration.output((Time.now - horizontal_start_time).to_i, format: :long, keep_zero: true)
    GPTLogger.logger.info Rainbow("\n| Horizontal data: successfully generated after #{horizontal_run_time}!").green
  end

  if opts[:vertical]
    vertical_start_time = Time.now
    opts[:large_project_tarball] ||= gpt_test_data.select_default_large_project_tarball
    GPTCommon.show_warning_prompt("\nFor the creation of `vertical` data with large projects, the GPT Data Generator will need to change the Repository Storages setting on the target GitLab environment. This is to facilitate the creation of a large project in each Storage node specifically. As such, it will change this setting to point to each node as set in the `storage_nodes` option and then create the project sequentially.\n\nWhile the Generator is doing this any other projects created or imported during this time will also be confined to the currently active Storage node.\nThe original setting will be restored after the the script is finished.") unless unattended
    GPTLogger.logger.info Rainbow("\n| Vertical data: importing large projects for GPT...").color(230, 83, 35)
    root_group = gpt_test_data.create_group(group_name: opts[:root_group])
    large_projects_group = gpt_test_data.create_group(group_name: opts[:vert_group], parent_group: root_group)
    gpt_test_data.create_vertical_test_data(project_tarball: opts[:large_project_tarball], large_projects_group:, project_name: opts[:large_project_name], project_metadata: opts[:large_project_metadata])

    vertical_run_time = ChronicDuration.output((Time.now - vertical_start_time).to_i, format: :long, keep_zero: true)
    if gpt_test_data.large_projects_validation_errors.values.all?(&:empty?)
      GPTLogger.logger.info Rainbow("\n| Vertical data: successfully generated after #{vertical_run_time}!").green
    else
      GPTLogger.logger.warn(Rainbow("| Vertical data: generated with warnings after #{vertical_run_time}!").red)
      gpt_test_data.large_projects_validation_errors.each do |project, errors|
        next if errors.empty?

        GPTLogger.logger.warn(Rainbow("\n• Project '#{project}' validation errors:").underline.red)
        errors.each { |error| GPTLogger.logger.warn(error) }
        exit 1
      end
    end
  end

  run_time = ChronicDuration.output((Time.now - start_time).to_i, format: :long, keep_zero: true)
  GPTLogger.logger.info Rainbow("█ GPT data generation finished after #{run_time}.").green
rescue Interrupt
  GPTLogger.logger.warn Rainbow("Caught the interrupt. Stopping.").yellow
  exit 130
rescue StandardError, HTTP::Error => e
  GPTLogger.logger.error Rainbow("\nGPT data generation failed:\n#{e.class} - #{e.exception}\n Traceback:#{e.backtrace}").red
  exit 1
ensure
  unless gpt_test_data.nil?
    gpt_test_data.restore_soft_delete_settings unless ENV['SKIP_CHANGING_ENV_SETTINGS']
    gpt_test_data.restore_import_sources_setting unless ENV['SKIP_CHANGING_ENV_SETTINGS']
    gpt_test_data.restore_max_import_size_setting unless ENV['SKIP_CHANGING_ENV_SETTINGS']
    gpt_test_data.restore_repo_storage_settings unless ENV['SKIP_CHANGING_ENV_SETTINGS']
  end

  File.open(ENV['GPT_LOGGER_PATH'], 'a') { |f| f.write("\nGPT Data Generator: v#{gpt_data_version}") }
  puts "\n█ Logs: #{ENV['GPT_LOGGER_PATH']}" unless ENV['GPT_LOGGER_PATH'].nil?
end
