https://github.com/wilkeraziz/mosesdecoder
Raw File
Tip revision: 2ec5207db1493a3580a4c2dc3aab4fc65ed528d5 authored by Hieu Hoang on 16 September 2015, 12:31:36 UTC
int warnings
Tip revision: 2ec5207
TypeDef.h
// -*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*-
/***********************************************************************
Moses - factored phrase-based language decoder
Copyright (C) 2006 University of Edinburgh

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
***********************************************************************/

#pragma once

#include <list>
#include <limits>
#include <vector>
#include <string>
#include <stdint.h>

#include <boost/shared_ptr.hpp>
#include <boost/weak_ptr.hpp>

//! all the typedefs and enums goes here


namespace Moses
{

#define PROJECT_NAME		"moses"

#ifndef BOS_
#define BOS_ "<s>" //Beginning of sentence symbol
#endif
#ifndef EOS_
#define EOS_ "</s>" //End of sentence symbol
#endif

#define UNKNOWN_FACTOR	"UNK"
#define EPSILON         "*EPS*"

#define NOT_FOUND 			std::numeric_limits<size_t>::max()
#define MAX_NGRAM_SIZE  20

const size_t DEFAULT_CUBE_PRUNING_POP_LIMIT = 1000;
const size_t DEFAULT_CUBE_PRUNING_DIVERSITY = 0;
const size_t DEFAULT_MAX_HYPOSTACK_SIZE = 200;
const size_t DEFAULT_MAX_TRANS_OPT_CACHE_SIZE = 10000;
const size_t DEFAULT_MAX_TRANS_OPT_SIZE	= 5000;
const size_t DEFAULT_MAX_PART_TRANS_OPT_SIZE = 10000;
//#ifdef PT_UG
// setting to std::numeric_limits<size_t>::max() makes the regression test for (deprecated) PhraseDictionaryDynamicSuffixArray fail.
// const size_t DEFAULT_MAX_PHRASE_LENGTH = 100000;
//#else
const size_t DEFAULT_MAX_PHRASE_LENGTH = 20;
//#endif
const size_t DEFAULT_MAX_CHART_SPAN			= 20;
const size_t ARRAY_SIZE_INCR					= 10; //amount by which a phrase gets resized when necessary
const float LOWEST_SCORE							= -100.0f;
const float DEFAULT_BEAM_WIDTH				= 0.00001f;
const float DEFAULT_EARLY_DISCARDING_THRESHOLD		= 0.0f;
const float DEFAULT_TRANSLATION_OPTION_THRESHOLD	= 0.0f;
const size_t DEFAULT_VERBOSE_LEVEL = 1;

// output floats with five significant digits
static const size_t PRECISION = 3;

// tolerance for equality in floating point comparisons
const float FLOAT_EPSILON = 0.0001;

// enums.
// must be 0, 1, 2, ..., unless otherwise stated

// can only be 2 at the moment
const int NUM_LANGUAGES = 2;

// Looking for MAX_NUM_FACTORS?  It's defined by the build system: bjam --max-factors=4

enum FactorDirection {
  Input,			//! Source factors
  Output			//! Target factors
};

enum DecodeType {
  Translate,
  Generate
};

namespace LexReorderType
{
enum LexReorderType { // explain values
  Backward,
  Forward,
  Bidirectional,
  Fe,
  F
};
}

namespace DistortionOrientationType
{
enum DistortionOrientationOptions {
  Monotone, //distinguish only between monotone and non-monotone as possible orientations
  Msd //further separate non-monotone into swapped and discontinuous
};
}

enum InputTypeEnum {
  SentenceInput         = 0,
  ConfusionNetworkInput	= 1,
  WordLatticeInput      = 2,
  TreeInputType         = 3,
  //,WordLatticeInput2 = 4,
  TabbedSentenceInput    = 5,
  ForestInputType        = 6
};

enum XmlInputType {
  XmlPassThrough = 0,
  XmlIgnore      = 1,
  XmlExclusive   = 2,
  XmlInclusive   = 3,
  XmlConstraint	 = 4
};

enum DictionaryFind {
  Best		= 0,
  All		= 1
};

// Note: StaticData uses SearchAlgorithm to determine whether the translation
// model is phrase-based or syntax-based.  If you add a syntax-based search
// algorithm here then you should also update StaticData::IsSyntax().
enum SearchAlgorithm {
  Normal = 0,
  CubePruning	= 1,
  //,CubeGrowing = 2
  CYKPlus = 3,
  //NormalBatch  = 4,
  ChartIncremental = 5,
  SyntaxS2T = 6,
  SyntaxT2S = 7,
  SyntaxT2S_SCFG = 8,
  SyntaxF2S = 9,
  DefaultSearchAlgorithm = 777 // means: use StaticData.m_searchAlgorithm
};

enum SourceLabelOverlap {
  SourceLabelOverlapAdd = 0,
  SourceLabelOverlapReplace = 1,
  SourceLabelOverlapDiscard = 2
};

enum WordAlignmentSort {
  NoSort = 0,
  TargetOrder = 1
};

enum FormatType {
  MosesFormat,
  HieroFormat
};

enum S2TParsingAlgorithm {
  RecursiveCYKPlus,
  Scope3
};

enum AddStatus {
  New,
  Pruned,
  RecombinedWin,
  RecombinedLose
};

// typedef
typedef size_t FactorType;

typedef std::vector<float> Scores;
typedef std::vector<std::string> WordAlignments;

typedef std::vector<FactorType> FactorList;

typedef std::pair<std::vector<std::string const*>,WordAlignments > StringWordAlignmentCand;

class TranslationTask;
typedef boost::shared_ptr<TranslationTask> ttasksptr;
typedef boost::weak_ptr<TranslationTask> ttaskwptr;
}

back to top