https://github.com/CNugteren/CLTune
Raw File
Tip revision: 9edbebc9b094c4bf1dbd84e02a90754234778757 authored by Cedric Nugteren on 26 May 2015, 14:33:16 UTC
Merge pull request #23 from CNugteren/cmake_update
Tip revision: 9edbebc
cltune.cc

// =================================================================================================
// This file is part of the CLTune project, which loosely follows the Google C++ styleguide and uses
// a tab-size of two spaces and a max-width of 100 characters per line.
//
// Author: cedric.nugteren@surfsara.nl (Cedric Nugteren)
//
// This file implements the Tuner class (see the header for information about the class).
//
// -------------------------------------------------------------------------------------------------
//
// Copyright 2014 SURFsara
// 
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// 
//  http://www.apache.org/licenses/LICENSE-2.0
// 
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//
// =================================================================================================

// The corresponding header file
#include "cltune.h"

// And the implemenation (Pimpl idiom)
#include "internal/tuner_impl.h"

#include <iostream> // FILE
#include <limits> // std::numeric_limits

namespace cltune {
// =================================================================================================

// The implemenation of the constructors and destructors are hidden in the TunerImpl class
Tuner::Tuner():
    pimpl(new TunerImpl()) {
}
Tuner::Tuner(size_t platform_id, size_t device_id):
    pimpl(new TunerImpl(platform_id, device_id)) {
}
Tuner::~Tuner() {
}

// =================================================================================================

// Loads the OpenCL source-code from a file and calls the function-overload below.
size_t Tuner::AddKernel(const std::vector<std::string> &filenames, const std::string &kernel_name,
                        const IntRange &global, const IntRange &local) {
  auto source = std::string{};
  for (auto &filename: filenames) {
    source += pimpl->LoadFile(filename);
  }
  return AddKernelFromString(source, kernel_name, global, local);
}

// Loads the OpenCL source-code from a string and creates a new variable of type KernelInfo to store
// all the kernel-information.
size_t Tuner::AddKernelFromString(const std::string &source, const std::string &kernel_name,
                                  const IntRange &global, const IntRange &local) {
  pimpl->kernels_.push_back(KernelInfo(kernel_name, source, pimpl->device()));
  auto id = pimpl->kernels_.size() - 1;
  pimpl->kernels_[id].set_global_base(global);
  pimpl->kernels_[id].set_local_base(local);
  return id;
}

// =================================================================================================

// Sets the reference kernel (source-code location, kernel name, global/local thread-sizes) and
// sets a flag to indicate that there is now a reference. Calling this function again will simply
// overwrite the old reference.
void Tuner::SetReference(const std::vector<std::string> &filenames, const std::string &kernel_name,
                         const IntRange &global, const IntRange &local) {
  auto source = std::string{};
  for (auto &filename: filenames) {
    source += pimpl->LoadFile(filename);
  }
  SetReferenceFromString(source, kernel_name, global, local);
}
void Tuner::SetReferenceFromString(const std::string &source, const std::string &kernel_name,
                                   const IntRange &global, const IntRange &local) {
  pimpl->has_reference_ = true;
  pimpl->reference_kernel_.reset(new KernelInfo(kernel_name, source, pimpl->device()));
  pimpl->reference_kernel_->set_global_base(global);
  pimpl->reference_kernel_->set_local_base(local);
}

// =================================================================================================

// Adds parameters for a kernel to tune. Also checks whether this parameter already exists.
void Tuner::AddParameter(const size_t id, const std::string &parameter_name,
                         const std::initializer_list<size_t> &values) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  if (pimpl->kernels_[id].ParameterExists(parameter_name)) {
    throw std::runtime_error("Parameter already exists");
  }
  pimpl->kernels_[id].AddParameter(parameter_name, values);
}

// As above, but now adds a single valued parameter to the reference
void Tuner::AddParameterReference(const std::string &parameter_name, const size_t value) {
  auto value_string = std::string{std::to_string(static_cast<long long>(value))};
  pimpl->reference_kernel_->PrependSource("#define "+parameter_name+" "+value_string);
}

// =================================================================================================

// These functions forward their work (adding a modifier to global/local thread-sizes) to an object
// of KernelInfo class
void Tuner::MulGlobalSize(const size_t id, const StringRange range) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  pimpl->kernels_[id].AddModifier(range, KernelInfo::ThreadSizeModifierType::kGlobalMul);
}
void Tuner::DivGlobalSize(const size_t id, const StringRange range) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  pimpl->kernels_[id].AddModifier(range, KernelInfo::ThreadSizeModifierType::kGlobalDiv);
}
void Tuner::MulLocalSize(const size_t id, const StringRange range) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  pimpl->kernels_[id].AddModifier(range, KernelInfo::ThreadSizeModifierType::kLocalMul);
}
void Tuner::DivLocalSize(const size_t id, const StringRange range) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  pimpl->kernels_[id].AddModifier(range, KernelInfo::ThreadSizeModifierType::kLocalDiv);
}

// Adds a contraint to the list of constraints for a particular kernel. First checks whether the
// kernel exists and whether the parameters exist.
void Tuner::AddConstraint(const size_t id, ConstraintFunction valid_if,
                          const std::vector<std::string> &parameters) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  for (auto &parameter: parameters) {
    if (!pimpl->kernels_[id].ParameterExists(parameter)) {
      throw std::runtime_error("Invalid parameter");
    }
  }
  pimpl->kernels_[id].AddConstraint(valid_if, parameters);
}

// As above, but for the local memory usage
void Tuner::SetLocalMemoryUsage(const size_t id, LocalMemoryFunction amount,
                                const std::vector<std::string> &parameters) {
  if (id >= pimpl->kernels_.size()) { throw std::runtime_error("Invalid kernel ID"); }
  for (auto &parameter: parameters) {
    if (!pimpl->kernels_[id].ParameterExists(parameter)) {
      throw std::runtime_error("Invalid parameter");
    }
  }
 pimpl->kernels_[id].SetLocalMemoryUsage(amount, parameters);
}


// =================================================================================================

// Creates a new buffer of type Memory (containing both host and device data) based on a source
// vector of data. Then, upload it to the device and store the argument in a list.
template <typename T>
void Tuner::AddArgumentInput(const std::vector<T> &source) {
  auto device_buffer = Buffer(pimpl->context(), CL_MEM_READ_ONLY, source.size()*sizeof(T));
  auto status = device_buffer.WriteBuffer(pimpl->queue(), source.size()*sizeof(T), source);
  if (status != CL_SUCCESS) {
    throw std::runtime_error("Write buffer error: " + std::to_string(status));
  }
  auto argument = TunerImpl::MemArgument{pimpl->argument_counter_++, source.size(),
                                         pimpl->GetType<T>(), device_buffer};
  pimpl->arguments_input_.push_back(argument);
}

// Compiles the function for various data-types
template void Tuner::AddArgumentInput<int>(const std::vector<int>&);
template void Tuner::AddArgumentInput<size_t>(const std::vector<size_t>&);
template void Tuner::AddArgumentInput<float>(const std::vector<float>&);
template void Tuner::AddArgumentInput<double>(const std::vector<double>&);
template void Tuner::AddArgumentInput<float2>(const std::vector<float2>&);
template void Tuner::AddArgumentInput<double2>(const std::vector<double2>&);

// Similar to the above function, but now marked as output buffer. Output buffers are special in the
// sense that they will be checked in the verification process.
template <typename T>
void Tuner::AddArgumentOutput(const std::vector<T> &source) {
  auto device_buffer = Buffer(pimpl->context(), CL_MEM_READ_WRITE, source.size()*sizeof(T));
  auto argument = TunerImpl::MemArgument{pimpl->argument_counter_++, source.size(),
                                         pimpl->GetType<T>(), device_buffer};
  pimpl->arguments_output_.push_back(argument);
}

// Compiles the function for various data-types
template void Tuner::AddArgumentOutput<int>(const std::vector<int>&);
template void Tuner::AddArgumentOutput<size_t>(const std::vector<size_t>&);
template void Tuner::AddArgumentOutput<float>(const std::vector<float>&);
template void Tuner::AddArgumentOutput<double>(const std::vector<double>&);
template void Tuner::AddArgumentOutput<float2>(const std::vector<float2>&);
template void Tuner::AddArgumentOutput<double2>(const std::vector<double2>&);

// Sets a scalar value as an argument to the kernel. Since a vector of scalars of any type doesn't
// exist, there is no general implemenation. Instead, each data-type has its specialised version in
// which it stores to a specific vector.
template <> void Tuner::AddArgumentScalar<int>(const int argument) {
  pimpl->arguments_int_.push_back({pimpl->argument_counter_++, argument});
}
template <> void Tuner::AddArgumentScalar<size_t>(const size_t argument) {
  pimpl->arguments_size_t_.push_back({pimpl->argument_counter_++, argument});
}
template <> void Tuner::AddArgumentScalar<float>(const float argument) {
  pimpl->arguments_float_.push_back({pimpl->argument_counter_++, argument});
}
template <> void Tuner::AddArgumentScalar<double>(const double argument) {
  pimpl->arguments_double_.push_back({pimpl->argument_counter_++, argument});
}
template <> void Tuner::AddArgumentScalar<float2>(const float2 argument) {
  pimpl->arguments_float2_.push_back({pimpl->argument_counter_++, argument});
}
template <> void Tuner::AddArgumentScalar<double2>(const double2 argument) {
  pimpl->arguments_double2_.push_back({pimpl->argument_counter_++, argument});
}

// =================================================================================================

// Use full search as a search strategy. This is the default method.
void Tuner::UseFullSearch() {
  pimpl->search_method_ = SearchMethod::FullSearch;
}

// Use random search as a search strategy.
void Tuner::UseRandomSearch(const double fraction) {
  pimpl->search_method_ = SearchMethod::RandomSearch;
  pimpl->search_args_.push_back(fraction);
}

// Use simulated annealing as a search strategy.
void Tuner::UseAnnealing(const double fraction, const double max_temperature) {
  pimpl->search_method_ = SearchMethod::Annealing;
  pimpl->search_args_.push_back(fraction);
  pimpl->search_args_.push_back(max_temperature);
}

// Use PSO as a search strategy.
void Tuner::UsePSO(const double fraction, const size_t swarm_size, const double influence_global,
                   const double influence_local, const double influence_random) {
  pimpl->search_method_ = SearchMethod::PSO;
  pimpl->search_args_.push_back(fraction);
  pimpl->search_args_.push_back(static_cast<double>(swarm_size));
  pimpl->search_args_.push_back(influence_global);
  pimpl->search_args_.push_back(influence_local);
  pimpl->search_args_.push_back(influence_random);
}


// Output the search process to a file. This is disabled per default.
void Tuner::OutputSearchLog(const std::string &filename) {
  pimpl->output_search_process_ = true;
  pimpl->search_log_filename_ = filename;
}

// =================================================================================================

// Starts the tuning process. See the TunerImpl's implemenation for details
void Tuner::Tune() {
  pimpl->Tune();
}

// =================================================================================================

// Iterates over all tuning results and prints each parameter configuration and the corresponding
// timing-results. Printing is to stdout.
double Tuner::PrintToScreen() const {

  // Finds the best result
  auto best_result = pimpl->tuning_results_[0];
  auto best_time = std::numeric_limits<double>::max();
  for (auto &tuning_result: pimpl->tuning_results_) {
    if (tuning_result.status && best_time >= tuning_result.time) {
      best_result = tuning_result;
      best_time = tuning_result.time;
    }
  }

  // Aborts if there was no best time found
  if (best_time == std::numeric_limits<double>::max()) {
    pimpl->PrintHeader("No tuner results found");
    return 0.0;
  }

  // Prints all valid results and the one with the lowest execution time
  pimpl->PrintHeader("Printing results to stdout");
  for (auto &tuning_result: pimpl->tuning_results_) {
    if (tuning_result.status && tuning_result.time != std::numeric_limits<double>::max()) {
      pimpl->PrintResult(stdout, tuning_result, pimpl->kMessageResult);
    }
  }
  pimpl->PrintHeader("Printing best result to stdout");
  pimpl->PrintResult(stdout, best_result, pimpl->kMessageBest);

  // Return the best time
  return best_time;
}

// Prints the best result in a neatly formatted C++ database format to screen
void Tuner::PrintFormatted() const {

  // Finds the best result
  auto best_result = pimpl->tuning_results_[0];
  auto best_time = std::numeric_limits<double>::max();
  for (auto &tuning_result: pimpl->tuning_results_) {
    if (tuning_result.status && best_time >= tuning_result.time) {
      best_result = tuning_result;
      best_time = tuning_result.time;
    }
  }

  // Prints the best result in C++ database format
  auto count = 0UL;
  pimpl->PrintHeader("Printing best result in database format to stdout");
  fprintf(stdout, "{ \"%s\", { ", pimpl->device().Name().c_str());
  for (auto &setting: best_result.configuration) {
    fprintf(stdout, "%s", setting.GetDatabase().c_str());
    if (count < best_result.configuration.size()-1) {
      fprintf(stdout, ", ");
    }
    count++;
  }
  fprintf(stdout, " } }\n");
}

// Same as PrintToScreen, but now outputs into a file and does not mark the best-case
void Tuner::PrintToFile(const std::string &filename) const {
  pimpl->PrintHeader("Printing results to file: "+filename);
  auto file = fopen(filename.c_str(), "w");
  std::vector<std::string> processed_kernels;
  for (auto &tuning_result: pimpl->tuning_results_) {
    if (tuning_result.status) {

      // Checks whether this is a kernel which hasn't been encountered yet
      auto new_kernel = true;
      for (auto &kernel_name: processed_kernels) {
        if (kernel_name == tuning_result.kernel_name) { new_kernel = false; break; }
      }
      processed_kernels.push_back(tuning_result.kernel_name);

      // Prints the header in case of a new kernel name
      if (new_kernel) {
        fprintf(file, "name;time;threads;");
        for (auto &setting: tuning_result.configuration) {
          fprintf(file, "%s;", setting.name.c_str());
        }
        fprintf(file, "\n");
      }

      // Prints an entry to file
      fprintf(file, "%s;", tuning_result.kernel_name.c_str());
      fprintf(file, "%.2lf;", tuning_result.time);
      fprintf(file, "%lu;", tuning_result.threads);
      for (auto &setting: tuning_result.configuration) {
        fprintf(file, "%lu;", setting.value);
      }
      fprintf(file, "\n");
    }
  }
  fclose(file);
}

// Set the flag to suppress output to true. Note that this cannot be undone.
void Tuner::SuppressOutput() {
  pimpl->suppress_output_ = true;
}

// =================================================================================================
} // namespace cltune
back to top