49 #include "EST_Token.h" 50 #include "EST_FMatrix.h" 51 #include "EST_multistats.h" 52 #include "EST_Wagon.h" 64 int wgn_min_cluster_size = 50;
65 int wgn_max_questions = 2000000;
67 float wgn_dropout_feats = 0.0;
68 float wgn_dropout_samples = 0.0;
71 int wgn_quiet = FALSE;
72 int wgn_verbose = FALSE;
73 int wgn_count_field = -1;
75 int wgn_predictee = 0;
77 float wgn_float_range_split = 10;
78 float wgn_balance = 0;
83 static float do_summary(
WNode &tree,
WDataSet &ds,ostream *output);
84 static float test_tree_float(
WNode &tree,
WDataSet &ds,ostream *output);
85 static float test_tree_class(
WNode &tree,
WDataSet &ds,ostream *output);
86 static float test_tree_cluster(
WNode &tree,
WDataSet &dataset, ostream *output);
87 static float test_tree_vector(
WNode &tree,
WDataSet &dataset,ostream *output);
88 static float test_tree_trajectory(
WNode &tree,
WDataSet &dataset,ostream *output);
89 static float test_tree_ols(
WNode &tree,
WDataSet &dataset,ostream *output);
90 static int wagon_split(
int margin,
WNode &node);
92 static void construct_binary_ques(
int feat,
WQuestion &test_ques);
96 static WNode *wagon_stepwise_find_next_best(
float &bscore,
int &best_feat);
98 Declare_TList_T(
WVector *, WVectorP)
100 Declare_TVector_Base_T(
WVector *,NULL,NULL,WVectorP)
102 #if defined(INSTANTIATE_TEMPLATES) 104 #include "../base_class/EST_TList.cc" 105 #include "../base_class/EST_TVector.cc" 107 Instantiate_TList_T(
WVector *, WVectorP)
113 void wgn_load_datadescription(
EST_String fname,LISP ignores)
116 wgn_dataset.load_description(fname,ignores);
117 wgn_test_dataset.load_description(fname,ignores);
128 if (ts.
open(fname) == -1)
129 wagon_error(
EST_String(
"unable to open data file \"")+
137 v =
new WVector(dataset.width());
141 int type = dataset.ftype(i);
142 if ((type == wndt_float) ||
143 (type == wndt_ols) ||
144 (wgn_count_field == i))
147 float f = atof(ts.
get().string());
152 cout << fname <<
": bad float " << f
154 dataset.feat_name(i) <<
" vector " <<
155 dataset.samples() << endl;
156 v->set_flt_val(i,0.0);
159 else if (type == wndt_binary)
160 v->set_int_val(i,atoi(ts.
get().string()));
161 else if (type == wndt_cluster)
162 v->set_int_val(i,atoi(ts.
get().string()));
163 else if (type == wndt_vector)
164 v->set_int_val(i,atoi(ts.
get().string()));
165 else if (type == wndt_trajectory)
171 v->set_int_val(i,atoi(ts.
get().string()));
173 else if (type == wndt_ignore)
181 int n = wgn_discretes.discrete(type).
name(s);
184 cout << fname <<
": bad value " << s <<
" in field " <<
185 dataset.feat_name(i) <<
" vector " <<
186 dataset.samples() << endl;
193 while (!ts.
eoln() && i<dataset.width());
195 if (i != dataset.width())
197 wagon_error(fname+
": data vector "+itoString(nvec)+
" contains " 198 +itoString(i)+
" parameters instead of "+
199 itoString(dataset.width()));
203 cerr << fname <<
": data vector " << nvec <<
204 " contains too many parameters instead of " 205 << dataset.width() << endl;
206 wagon_error(
EST_String(
"extra parameter(s) from ")+
212 cout <<
"Dataset of " << dataset.samples() <<
" vectors of " <<
213 dataset.width() <<
" parameters from: " << fname << endl;
217 float summary_results(
WNode &tree,ostream *output)
219 if (wgn_test_dataset.samples() != 0)
220 return do_summary(tree,wgn_test_dataset,output);
222 return do_summary(tree,wgn_dataset,output);
225 static float do_summary(
WNode &tree,
WDataSet &ds,ostream *output)
227 if (wgn_dataset.ftype(wgn_predictee) == wndt_cluster)
228 return test_tree_cluster(tree,ds,output);
229 else if (wgn_dataset.ftype(wgn_predictee) == wndt_vector)
230 return test_tree_vector(tree,ds,output);
231 else if (wgn_dataset.ftype(wgn_predictee) == wndt_trajectory)
232 return test_tree_trajectory(tree,ds,output);
233 else if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
234 return test_tree_ols(tree,ds,output);
235 else if (wgn_dataset.ftype(wgn_predictee) >= wndt_class)
236 return test_tree_class(tree,ds,output);
238 return test_tree_float(tree,ds,output);
241 WNode *wgn_build_tree(
float &score)
247 wgn_set_up_data(top->get_data(),wgn_dataset,wgn_held_out,TRUE);
250 wagon_split(margin,*top);
252 if (wgn_held_out > 0)
254 wgn_set_up_data(top->get_data(),wgn_dataset,wgn_held_out,FALSE);
255 top->held_out_prune();
261 score = summary_results(*top,0);
276 for (j=i=0,d=ds.head(); d != 0; d=d->next(),j++)
278 if ((in) && ((j%100) >= held_out))
293 static float test_tree_class(
WNode &tree,
WDataSet &dataset,ostream *output)
303 float correct=0,total=0, count=0;
305 float bcorrect=0, bpredicted=0, bactual=0;
306 float precision=0, recall=0;
308 for (p=dataset.head(); p != 0; p=p->next())
310 pnode = tree.predict_node((*dataset(p)));
311 predict = (
EST_String)pnode->get_impurity().value();
312 if (wgn_count_field == -1)
315 count = dataset(p)->get_flt_val(wgn_count_field);
316 prob = pnode->get_impurity().pd().probability(predict);
317 H += (log(prob))*count;
318 type = dataset.ftype(wgn_predictee);
319 real = wgn_discretes[type].name(dataset(p)->get_int_val(wgn_predictee));
321 if (wgn_opt_param ==
"B_NB_F1")
339 for (i=0; i<wgn_discretes[dataset.ftype(wgn_predictee)].length(); i++)
340 lex.
append(wgn_discretes[dataset.ftype(wgn_predictee)].name(i));
346 print_confusion(m,pairs,lex);
347 *output <<
";; entropy " << (-1*(H/total)) <<
" perplexity " <<
348 pow(2.0,(-1*(H/total))) << endl;
353 if (wgn_opt_param ==
"entropy")
354 return -pow(2.0,(-1*(H/total)));
355 else if(wgn_opt_param ==
"B_NB_F1")
360 precision = bcorrect/bpredicted;
364 recall = bcorrect/bactual;
366 if((precision+recall) !=0)
367 fmeasure = 2* (precision*recall)/(precision+recall);
368 cout<<
"F1 :" << fmeasure <<
" Prec:" << precision <<
" Rec:" << recall <<
" B-Pred:" << bpredicted <<
" B-Actual:" << bactual <<
" B-Correct:" << bcorrect << endl;
372 return (
float)correct/(float)total;
375 static float test_tree_vector(
WNode &tree,
WDataSet &dataset,ostream *output)
382 float predict, actual;
390 for (p=dataset.head(); p != 0; p=p->next())
392 leaf = tree.predict_node((*dataset(p)));
393 pos = dataset(p)->get_int_val(wgn_predictee);
395 if (wgn_VertexFeats.
a(0,j) > 0.0)
398 for (pp=leaf->get_impurity().members.head(); pp != 0; pp=pp->next())
400 i = leaf->get_impurity().members.
item(pp);
401 b += wgn_VertexTrack.
a(i,j);
404 actual = wgn_VertexTrack.
a(pos,j);
405 if (wgn_count_field == -1)
408 count = dataset(p)->get_flt_val(wgn_count_field);
409 x.cumulate(predict,count);
410 y.cumulate(actual,count);
413 error = predict-actual;
415 error = (predict-actual)/b.
stddev();
416 error = predict-actual;
417 se.cumulate((error*error),count);
418 e.cumulate(fabs(error),count);
419 xx.cumulate(predict*predict,count);
420 yy.cumulate(actual*actual,count);
421 xy.cumulate(predict*actual,count);
446 <<
";; RMSE " << ftoString(sqrt(se.
mean()),4,1)
447 <<
" Correlation is " << ftoString(cor,4,1)
448 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
449 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
451 cout <<
"RMSE " << ftoString(sqrt(se.
mean()),4,1)
452 <<
" Correlation is " << ftoString(cor,4,1)
453 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
454 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
457 if (wgn_opt_param ==
"rmse")
458 return -sqrt(se.
mean());
463 static float test_tree_trajectory(
WNode &tree,
WDataSet &dataset,ostream *output)
471 float predict, actual;
479 for (p=dataset.head(); p != 0; p=p->next())
481 leaf = tree.predict_node((*dataset(p)));
482 pos = dataset(p)->get_int_val(wgn_predictee);
484 if (wgn_VertexFeats.
a(0,j) > 0.0)
487 for (pp=leaf->get_impurity().members.head(); pp != 0; pp=pp->next())
489 i = leaf->get_impurity().members.
item(pp);
490 b += wgn_VertexTrack.
a(i,j);
493 actual = wgn_VertexTrack.
a(pos,j);
494 if (wgn_count_field == -1)
497 count = dataset(p)->get_flt_val(wgn_count_field);
498 x.cumulate(predict,count);
499 y.cumulate(actual,count);
502 error = predict-actual;
504 error = (predict-actual)/b.
stddev();
505 error = predict-actual;
506 se.cumulate((error*error),count);
507 e.cumulate(fabs(error),count);
508 xx.cumulate(predict*predict,count);
509 yy.cumulate(actual*actual,count);
510 xy.cumulate(predict*actual,count);
535 <<
";; RMSE " << ftoString(sqrt(se.
mean()),4,1)
536 <<
" Correlation is " << ftoString(cor,4,1)
537 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
538 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
540 cout <<
"RMSE " << ftoString(sqrt(se.
mean()),4,1)
541 <<
" Correlation is " << ftoString(cor,4,1)
542 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
543 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
546 if (wgn_opt_param ==
"rmse")
547 return -sqrt(se.
mean());
552 static float test_tree_cluster(
WNode &tree,
WDataSet &dataset,ostream *output)
561 for (p=dataset.head(); p != 0; p=p->next())
563 leaf = tree.predict_node((*dataset(p)));
564 real = dataset(p)->get_int_val(wgn_predictee);
565 meandist += leaf->get_impurity().cluster_distance(real);
566 right_cluster += leaf->get_impurity().in_cluster(real);
567 ranking += leaf->get_impurity().cluster_ranking(real);
574 *output <<
";; Right cluster " << right_cluster <<
" (" <<
575 (int)(100.0*(
float)right_cluster/(float)dataset.length()) <<
576 "%) mean ranking " << ranking.
mean() <<
" mean distance " 577 << meandist.
mean() << endl;
578 cout <<
"Right cluster " << right_cluster <<
" (" <<
579 (int)(100.0*(
float)right_cluster/(float)dataset.length()) <<
580 "%) mean ranking " << ranking.
mean() <<
" mean distance " 581 << meandist.
mean() << endl;
584 return 10000-meandist.
mean();
587 static float test_tree_float(
WNode &tree,
WDataSet &dataset,ostream *output)
596 for (p=dataset.head(); p != 0; p=p->next())
598 predict = tree.predict((*dataset(p)));
599 real = dataset(p)->get_flt_val(wgn_predictee);
600 if (wgn_count_field == -1)
603 count = dataset(p)->get_flt_val(wgn_count_field);
604 x.cumulate(predict,count);
605 y.cumulate(real,count);
606 error = predict-real;
607 se.cumulate((error*error),count);
608 e.cumulate(fabs(error),count);
609 xx.cumulate(predict*predict,count);
610 yy.cumulate(real*real,count);
611 xy.cumulate(predict*real,count);
635 <<
";; RMSE " << ftoString(sqrt(se.
mean()),4,1)
636 <<
" Correlation is " << ftoString(cor,4,1)
637 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
638 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
640 cout <<
"RMSE " << ftoString(sqrt(se.
mean()),4,1)
641 <<
" Correlation is " << ftoString(cor,4,1)
642 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
643 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
646 if (wgn_opt_param ==
"rmse")
647 return -sqrt(se.
mean());
652 static float test_tree_ols(
WNode &tree,
WDataSet &dataset,ostream *output)
662 for (p=dataset.head(); p != 0; p=p->next())
664 leaf = tree.predict_node((*dataset(p)));
667 real = dataset(p)->get_flt_val(wgn_predictee);
668 if (wgn_count_field == -1)
671 count = dataset(p)->get_flt_val(wgn_count_field);
672 x.cumulate(predict,count);
673 y.cumulate(real,count);
674 error = predict-real;
675 se.cumulate((error*error),count);
676 e.cumulate(fabs(error),count);
677 xx.cumulate(predict*predict,count);
678 yy.cumulate(real*real,count);
679 xy.cumulate(predict*real,count);
703 <<
";; RMSE " << ftoString(sqrt(se.
mean()),4,1)
704 <<
" Correlation is " << ftoString(cor,4,1)
705 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
706 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
708 cout <<
"RMSE " << ftoString(sqrt(se.
mean()),4,1)
709 <<
" Correlation is " << ftoString(cor,4,1)
710 <<
" Mean (abs) Error " << ftoString(e.
mean(),4,1)
711 <<
" (" << ftoString(e.
stddev(),4,1) <<
")" << endl;
714 if (wgn_opt_param ==
"rmse")
715 return -sqrt(se.
mean());
720 static int wagon_split(
int margin,
WNode &node)
726 node.set_impurity(
WImpurity(node.get_data()));
727 if (wgn_max_questions < 1)
730 q = find_best_question(node.get_data());
736 double impurity_measure = node.get_impurity().measure();
737 double question_score = q.get_score();
739 if ((question_score < WGN_HUGE_VAL) &&
740 (question_score < impurity_measure))
745 wgn_find_split(q,node.get_data(),l->get_data(),r->get_data());
746 node.set_subnodes(l,r);
747 node.set_question(q);
751 for (i=0; i < margin; i++)
757 wagon_split(margin,*l);
759 wagon_split(margin,*r);
768 for (i=0; i < margin; i++)
770 cout <<
"stopped samples: " << node.samples() <<
" impurity: " 771 << node.get_impurity() << endl;
783 if (wgn_dropout_samples > 0.0)
786 for (iy=in=i=0; i < ds.
n(); i++)
787 if (q.ask(*ds(i)) == TRUE)
802 for (iy=in=i=0; i < ds.
n(); i++)
803 if (q.ask(*ds(i)) == TRUE)
810 static float wgn_random_number(
float x)
813 return (((
float)random())/RAND_MAX)*x;
824 float* scores =
new float[wgn_dataset.width()];
825 bscore = tscore = WGN_HUGE_VAL;
826 best_ques.set_score(bscore);
829 for (i=0;i < wgn_dataset.width(); i++)
832 questions[i]->set_score(bscore);}
835 for (i=0;i < wgn_dataset.width(); i++)
837 if ((wgn_dataset.ignore(i) == TRUE) ||
838 (i == wgn_predictee))
839 scores[i] = WGN_HUGE_VAL;
840 else if (wgn_random_number(1.0) < wgn_dropout_feats)
841 scores[i] = WGN_HUGE_VAL;
842 else if (wgn_dataset.ftype(i) == wndt_binary)
844 construct_binary_ques(i,*questions[i]);
845 scores[i] = wgn_score_question(*questions[i],dset);
847 else if (wgn_dataset.ftype(i) == wndt_float)
849 scores[i] = construct_float_ques(i,*questions[i],dset);
851 else if (wgn_dataset.ftype(i) == wndt_ignore)
852 scores[i] = WGN_HUGE_VAL;
855 else if (wgn_csubset && (wgn_dataset.ftype(i) >= wndt_class))
857 wagon_error(
"subset selection temporarily deleted");
858 tscore = construct_class_ques_subset(i,test_ques,dset);
861 else if (wgn_dataset.ftype(i) >= wndt_class)
862 scores[i] = construct_class_ques(i,*questions[i],dset);
864 for (i=0;i < wgn_dataset.width(); i++)
866 if (scores[i] < bscore)
868 memcpy(&best_ques,questions[i],
sizeof(*questions[i]));
869 best_ques.set_score(scores[i]);
887 bscore = tscore = WGN_HUGE_VAL;
888 best_ques.set_score(bscore);
890 for (i=0;i < wgn_dataset.width(); i++)
892 if ((wgn_dataset.ignore(i) == TRUE) ||
893 (i == wgn_predictee))
894 tscore = WGN_HUGE_VAL;
895 else if (wgn_random_number(1.0) < wgn_dropout_feats)
896 tscore = WGN_HUGE_VAL;
897 else if (wgn_dataset.ftype(i) == wndt_binary)
899 construct_binary_ques(i,test_ques);
900 tscore = wgn_score_question(test_ques,dset);
902 else if (wgn_dataset.ftype(i) == wndt_float)
904 tscore = construct_float_ques(i,test_ques,dset);
906 else if (wgn_dataset.ftype(i) == wndt_ignore)
907 tscore = WGN_HUGE_VAL;
910 else if (wgn_csubset && (wgn_dataset.ftype(i) >= wndt_class))
912 wagon_error(
"subset selection temporarily deleted");
913 tscore = construct_class_ques_subset(i,test_ques,dset);
916 else if (wgn_dataset.ftype(i) >= wndt_class)
917 tscore = construct_class_ques(i,test_ques,dset);
920 best_ques = test_ques;
921 best_ques.set_score(tscore);
933 float tscore,bscore = WGN_HUGE_VAL;
938 test_q.set_oper(wnop_is);
941 for (cl=0; cl < wgn_discretes[wgn_dataset.ftype(feat)].length(); cl++)
943 test_q.set_operand1(
EST_Val(cl));
944 tscore = wgn_score_question(test_q,ds);
956 static float construct_class_ques_subset(
int feat,
WQuestion &ques,
964 float tscore,bscore = WGN_HUGE_VAL;
969 ques.set_oper(wnop_is);
970 float *scores =
new float[wgn_discretes[wgn_dataset.ftype(feat)].length()];
973 for (cl=0; cl < wgn_discretes[wgn_dataset.ftype(feat)].length(); cl++)
975 ques.set_operand(flocons(cl));
976 scores[cl] = wgn_score_question(ques,ds);
979 LISP order = sort_class_scores(feat,scores);
982 if (siod_llength(order) == 1)
984 ques.set_oper(wnop_is);
985 ques.set_operand(car(order));
986 return scores[get_c_int(car(order))];
989 ques.set_oper(wnop_in);
991 for (l=cdr(order); CDR(l) != NIL; l = cdr(l))
994 tscore = wgn_score_question(ques,ds);
1005 if (siod_llength(best_l) == 1)
1007 ques.set_oper(wnop_is);
1008 ques.set_operand(car(best_l));
1010 else if (equal(cdr(order),best_l) != NIL)
1012 ques.set_oper(wnop_is);
1013 ques.set_operand(car(order));
1017 cout <<
"Found a good subset" << endl;
1018 ques.set_operand(best_l);
1024 static LISP sort_class_scores(
int feat,
float *scores)
1031 for (i=0; i < wgn_discretes[wgn_dataset.ftype(feat)].length(); i++)
1033 if (scores[i] != WGN_HUGE_VAL)
1036 items = cons(flocons(i),NIL);
1039 for (l=items; l != NIL; l=cdr(l))
1041 if (scores[i] < scores[get_c_int(car(l))])
1043 CDR(l) = cons(car(l),cdr(l));
1044 CAR(l) = flocons(i);
1049 items = l_append(items,cons(flocons(i),NIL));
1061 float tscore,bscore = WGN_HUGE_VAL;
1065 float max,min,val,incr;
1067 test_q.set_fp(feat);
1068 test_q.set_oper(wnop_lessthan);
1071 min = max = ds(0)->get_flt_val(feat);
1072 for (d=0; d < ds.
n(); d++)
1074 val = ds(d)->get_flt_val(feat);
1081 return WGN_HUGE_VAL;
1082 incr = (max-min)/wgn_float_range_split;
1086 for (i=0,p=min+incr; i < wgn_float_range_split; i++,p += incr )
1088 test_q.set_operand1(
EST_Val(p));
1089 tscore = wgn_score_question(test_q,ds);
1090 if (tscore < bscore)
1100 static void construct_binary_ques(
int feat,
WQuestion &test_ques)
1106 test_ques.set_fp(feat);
1107 test_ques.set_oper(wnop_binary);
1108 test_ques.set_operand1(
EST_Val(
""));
1116 int d, num_yes, num_no;
1120 num_yes = num_no = 0;
1123 for (d=0; d < ds.
n(); d++)
1125 if (wgn_random_number(1.0) < wgn_dropout_samples)
1129 else if ((ignorenth < 2) ||
1130 (d%ignorenth != ignorenth-1))
1133 if (wgn_count_field == -1)
1136 count = (*wv)[wgn_count_field];
1138 if (q.ask(*wv) == TRUE)
1141 if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
1142 y.cumulate(d,count);
1144 y.cumulate((*wv)[wgn_predictee],count);
1149 if (wgn_dataset.ftype(wgn_predictee) == wndt_ols)
1150 n.cumulate(d,count);
1152 n.cumulate((*wv)[wgn_predictee],count);
1162 if ((wgn_balance == 0.0) ||
1163 (ds.
n()/wgn_balance < wgn_min_cluster_size))
1164 min_cluster = wgn_min_cluster_size;
1166 min_cluster = (int)(ds.
n()/wgn_balance);
1168 if ((y.samples() < min_cluster) ||
1169 (n.samples() < min_cluster))
1170 return WGN_HUGE_VAL;
1190 return score_question_set(q,ds,1);
1193 WNode *wagon_stepwise(
float limit)
1201 WNode *best = 0,*new_best = 0;
1202 float bscore,best_score = -WGN_HUGE_VAL;
1207 for (i=0; i < wgn_dataset.width(); i++)
1208 wgn_dataset.set_ignore(i,TRUE);
1210 for (i=0; i < wgn_dataset.width(); i++)
1212 if ((wgn_dataset.ftype(i) == wndt_ignore) || (i == wgn_predictee))
1220 new_best = wagon_stepwise_find_next_best(bscore,best_feat);
1222 if ((bscore - fabs(bscore * (limit/100))) <= best_score)
1230 best_score = bscore;
1233 wgn_dataset.set_ignore(best_feat,FALSE);
1236 fprintf(stdout,
"FEATURE %d %s: %2.4f\n",
1238 (
const char *)wgn_dataset.feat_name(best_feat),
1249 static WNode *wagon_stepwise_find_next_best(
float &bscore,
int &best_feat)
1254 float best_score = -WGN_HUGE_VAL;
1255 int best_new_feat = -1;
1258 for (i=0; i < wgn_dataset.width(); i++)
1260 if (wgn_dataset.ftype(i) == wndt_ignore)
1262 else if (i == wgn_predictee)
1264 else if (wgn_dataset.ignore(i) == TRUE)
1270 wgn_dataset.set_ignore(i,FALSE);
1272 current = wgn_build_tree(score);
1274 if (score > best_score)
1290 wgn_dataset.set_ignore(i,TRUE);
1294 bscore = best_score;
1295 best_feat = best_new_feat;
EST_TokenStream & get(EST_Token &t)
get next token in stream
double stddev(void) const
standard deviation of currently cummulated values
float & a(int i, int c=0)
int num_channels() const
return number of channels in track
void set_SingleCharSymbols(const EST_String &sc)
set which characters are to be treated as single character symbols
void close(void)
Close stream.
void set_PrePunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
int open(const EST_String &filename)
open a {EST_TokenStream} for a file.
void set_PunctuationSymbols(const EST_String &ps)
set which characters are to be treated as (post) punctuation
void resize(int n, int set=1)
double mean(void) const
mean of currently cummulated values
void append(const T &item)
add item onto end of list
EST_Token & peek(void)
peek at next token
void reset(void)
reset internal values
int add_item(const K &rkey, const V &rval, int no_search=0)
add key-val pair to list
T & item(const EST_Litem *p)
INLINE int n() const
number of items in vector.
const EST_String & name(const int n) const
The name given the index.