30 m_num_plif_nodes = 20;
31 m_use_plifs = use_plifs;
40 SG_ERROR(
"The EStateModelType given is not valid\n")
58 return free_states*(free_states + D*m_num_plif_nodes);
60 return free_states*(free_states + D*m_num_obs);
80 m_transmission_weights.
zero();
82 for ( int32_t i = 0 ; i < state_seq.
vlen-1 ; ++i )
83 m_transmission_weights(state_seq[i],state_seq[i+1]) += 1;
87 "obs.num_rows (%d) != D (%d) OR obs.num_cols (%d) != state_seq.vlen (%d)\n",
89 m_emission_weights.
zero();
94 for ( int32_t f = 0 ; f < D ; ++f )
96 aux_idx = f*m_num_obs;
98 for ( int32_t j = 0 ; j < state_seq.
vlen ; ++j )
100 weight_idx = aux_idx + state_seq[j]*D*m_num_obs + obs(f,j);
101 m_emission_weights[weight_idx] += 1;
105 m_state_model->
weights_to_vector(psi, m_transmission_weights, m_emission_weights,
112 for ( int32_t f = 0 ; f < D ; ++f )
114 aux_idx = f*m_num_plif_nodes;
116 for ( int32_t j = 0 ; j < state_seq.
vlen ; ++j )
125 for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
127 if ( limits[i] <= value )
133 weight_idx = aux_idx + state_seq[j]*D*m_num_plif_nodes;
136 m_emission_weights[weight_idx] += 1;
137 else if ( count == m_num_plif_nodes )
138 m_emission_weights[weight_idx + m_num_plif_nodes-1] += 1;
141 m_emission_weights[weight_idx + count] +=
142 (value-limits[count-1]) / (limits[count]-limits[count-1]);
144 m_emission_weights[weight_idx + count-1] +=
145 (limits[count]-value) / (limits[count]-limits[count-1]);
152 m_state_model->
weights_to_vector(psi, m_transmission_weights, m_emission_weights,
153 D, m_num_plif_nodes);
175 REQUIRE(m_plif_matrix,
"PLiF matrix not allocated, has the SO machine been trained with "
176 "the use_plifs option?\n");
178 "feature dimension and/or number of states changed from training to prediction?\n");
200 for ( int32_t i = 0 ; i < T ; ++i )
202 for ( int32_t j = 0 ; j < D ; ++j )
207 for ( int32_t s = 0 ; s < S ; ++s )
208 E(s,i) += m_emission_weights[s*D*m_num_obs + em_idx];
216 for ( int32_t i = 0 ; i < T ; ++i )
218 for ( int32_t f = 0 ; f < D ; ++f )
220 for ( int32_t s = 0 ; s < S ; ++s )
223 E(s,i) += plif->
lookup( x(f,i) );
237 "x^i (%d) and the length of its corresponding label y^i "
238 "(%d) must be the same.\n", T, ytrue->
get_data().
size());
243 loss_matrix.num_cols == E.
num_cols);
257 for ( int32_t s = 0 ; s < S ; ++s )
274 for ( int32_t i = 1 ; i < T ; ++i )
276 for ( int32_t cur = 0 ; cur < S ; ++cur )
286 for ( int32_t prev = 0 ; prev < S ; ++prev )
289 a = m_transmission_weights[cur*S + prev];
294 tmp_score = e + a + dp[prev*T + i-1];
296 if ( tmp_score > dp[idx] )
314 for ( int32_t s = 0 ; s < S ; ++s )
320 ret->
score = dp[idx];
325 REQUIRE(opt_path[T-1]!=-1,
"Viterbi decoding found no possible sequence states.\n"
326 "Maybe the state model used cannot produce such sequence.\n"
327 "If using the TwoStateModel, please use sequences of length greater than two.\n");
329 for ( int32_t i = T-1 ; i > 0 ; --i )
330 opt_path[i-1] = trb[opt_path[i]*T + i];
353 return m_state_model->
loss(seq1, seq2);
377 float64_t C_smooth = 0.02*regularization;
381 for ( int32_t i = 0 ; i <
get_dim() ; ++i )
396 int32_t
delta = m_use_plifs ? m_num_plif_nodes : m_num_obs;
397 for ( int32_t idx = S*S, k = 0 ; k < S*D ; idx +=
delta, ++k )
398 score_starts[k] = idx;
402 for ( int32_t idx =
get_dim(), k = 0 ; k < S*D ; idx += delta-1, ++k )
403 aux_starts_smooth[k] = idx;
409 int32_t con_idx = 0, scr_idx, aux_idx;
411 for ( int32_t i = 0 ; i < score_starts.
vlen ; ++i )
413 scr_idx = score_starts[i];
414 aux_idx = aux_starts_smooth[i];
416 for ( int32_t j = 0 ; j < delta-1 ; ++j )
418 A(con_idx, scr_idx) = 1;
419 A(con_idx, scr_idx+1) = -1;
421 if ( monotonicity[i] != 1 )
422 A(con_idx, aux_idx) = -1;
425 A(con_idx, scr_idx) = -1;
426 A(con_idx, scr_idx+1) = 1;
428 if ( monotonicity[i] != -1 )
429 A(con_idx, aux_idx) = -1;
432 ++scr_idx, ++aux_idx;
456 for ( int32_t j = 0 ; j < seq_data.size() ; ++j )
462 SG_ERROR(
"Found state out of {0, 1, ..., "
478 if ( state_freq[i] <= 0 )
480 SG_ERROR(
"What? State %d has never appeared\n", i)
488 void CHMSVMModel::init()
491 SG_ADD(&m_transmission_weights,
"m_transmission_weights",
493 SG_ADD(&m_emission_weights,
"m_emission_weights",
495 SG_ADD(&m_num_plif_nodes,
"m_num_plif_nodes",
"The number of points per PLiF",
502 m_state_model = NULL;
503 m_plif_matrix = NULL;
504 m_num_plif_nodes = 0;
519 m_use_plifs = use_plifs;
541 m_num_aux = free_states*D*(m_num_plif_nodes-1);
543 m_num_aux = free_states*D*(m_num_obs-1);
564 for ( int32_t i = 0 ; i < signal_idxs.
vlen ; ++i )
569 for ( int32_t f = 0 ; f < D ; ++f )
576 for ( int32_t j = 0 ; j < feat_vec.
num_cols ; ++j )
577 signal[idx++] = feat_vec(f,j);
582 for ( int32_t i = 0 ; i < m_num_plif_nodes ; ++i )
583 limits[i] = signal[ signal_idxs[i] ];
586 for ( int32_t s = 0 ; s < S ; ++s )
600 return m_transmission_weights;
605 return m_emission_weights;
611 return m_state_model;
SGVector< float64_t > psi_truth
CStateModel * get_state_model() const
Base class of the labels used in Structured Output (SO) problems.
SGVector< float64_t > get_start_states() const
virtual void init_primal_opt(float64_t regularization, SGMatrix< float64_t > &A, SGVector< float64_t > a, SGMatrix< float64_t > B, SGVector< float64_t > &b, SGVector< float64_t > &lb, SGVector< float64_t > &ub, SGMatrix< float64_t > &C)
void set_max_value(float64_t p_max_value)
static const float64_t INFTY
infinity
static SGVector< float64_t > linspace_vec(T start, T end, int32_t n)
virtual bool check_training_setup() const
virtual int32_t get_num_vectors() const
class CTwoStateModel class for the internal two-state representation used in the CHMSVMModel.
virtual CSequence * states_to_labels(SGVector< int32_t > state_seq) const =0
virtual void init_training()
int32_t get_num_elements() const
SGVector< float64_t > get_stop_states() const
Class CSequenceLabels used e.g. in the application of Structured Output (SO) learning to Hidden Marko...
class CStateModel base, abstract class for the internal state representation used in the CHMSVMModel...
virtual int32_t get_num_aux_con() const
SGVector< float64_t > get_joint_feature_vector(int32_t feat_idx, int32_t lab_idx)
void set_plif_limits(SGVector< float64_t > p_limits)
virtual void reshape_emission_params(SGVector< float64_t > &emission_weights, SGVector< float64_t > w, int32_t num_feats, int32_t num_obs)=0
Class CSequence to be used in the application of Structured Output (SO) learning to Hidden Markov Sup...
Class SGObject is the base class of all shogun objects.
SGVector< float64_t > get_emission_weights() const
virtual int32_t get_num_aux() const
int32_t get_num_states() const
SGMatrix< ST > get_feature_vector(int32_t num) const
int32_t get_num_features() const
virtual SGVector< float64_t > get_joint_feature_vector(int32_t feat_idx, CStructuredData *y)
Dynamic array class for CSGObject pointers that creates an array that can be used like a list or an a...
SGMatrix< float64_t > get_transmission_weights() const
float64_t delta_loss(int32_t ytrue_idx, CStructuredData *ypred)
int32_t get_num_states() const
virtual CResultSet * argmax(SGVector< float64_t > w, int32_t feat_idx, bool const training=true)
virtual SGVector< int32_t > get_monotonicity(int32_t num_free_states, int32_t num_feats) const
static float64_t dot(const bool *v1, const bool *v2, int32_t n)
Compute dot product between v1 and v2 (blas optimized)
Class CStructuredModel that represents the application specific model and contains most of the applic...
virtual SGVector< int32_t > labels_to_states(CSequence *label_seq) const =0
SGVector< float64_t > get_plif_limits()
CStructuredLabels * m_labels
all of classes and functions are contained in the shogun namespace
void set_use_plifs(bool use_plifs)
virtual int32_t get_num_labels() const
Class CMatrixFeatures used to represent data whose feature vectors are better represented with matric...
The class Features is the base class of all feature objects.
virtual float64_t delta_loss(CStructuredData *y1, CStructuredData *y2)
virtual void reshape_transmission_params(SGMatrix< float64_t > &transmission_weights, SGVector< float64_t > w)=0
float64_t lookup(float64_t p_value)
virtual float64_t loss(CSequence *label_seq_lhs, CSequence *label_seq_rhs)=0
virtual void weights_to_vector(SGVector< float64_t > &psi, SGMatrix< float64_t > transmission_weights, SGVector< float64_t > emission_weights, int32_t num_feats, int32_t num_obs) const =0
SGVector< int32_t > get_data() const
virtual CStructuredData * get_label(int32_t idx)
SGVector< float64_t > psi_pred
CSGObject * get_element(int32_t index) const
static CSequence * obtain_from_generic(CStructuredData *base_data)
void push_back(CSGObject *e)
void set_min_value(float64_t p_min_value)
static float64_t round(float64_t d)
virtual SGMatrix< float64_t > loss_matrix(CSequence *label_seq)=0
virtual int32_t get_dim() const
Base class of the components of StructuredLabels.
void add(const SGVector< T > x)