Generate \( \smash(F, \varepsilon) \\\)-Approximate Decision Reducts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
//load data
var data = Data.Benchmark.Factory.Golf();

//set parameters for reduct factory
var parm = new Args();
parm.SetParameter(ReductFactoryOptions.DecisionTable, data);
parm.SetParameter(ReductFactoryOptions.ReductType, 
	ReductTypes.ApproximateDecisionReduct);
parm.SetParameter(ReductFactoryOptions.FMeasure, 
	(FMeasure) FMeasures.Majority);
parm.SetParameter(ReductFactoryOptions.Epsilon, 0.05);

//compute reducts
var reducts = ReductFactory.GetReductGenerator(parm).GetReducts();

//output reducts and attributes
foreach (IReduct reduct in reducts)
	Console.WriteLine(reduct.Attributes.ToArray().ToStr());
reducts
samples

Generate \( (\omega,\varepsilon) \\\)-Reducts Over Universe of Weighted Objects

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
//load benchmark data
var data = Data.Benchmark.Factory.Zoo();

//set object weights using r(u) weighting scheme
data.SetWeights(new WeightGeneratorRelative(data).Weights);

//split data into training and testing sets
DecisionTable train, test;
var splitter = new DataSplitterRatio(data, 0.8);
splitter.Split(out train, out test);

//set parameters for reduct factory
var parm = new Args();
parm.SetParameter(ReductFactoryOptions.DecisionTable, train);
parm.SetParameter(ReductFactoryOptions.ReductType, 
	ReductTypes.ApproximateDecisionReduct);
parm.SetParameter(ReductFactoryOptions.FMeasure, 
	(FMeasure)FMeasures.MajorityWeighted);
parm.SetParameter(ReductFactoryOptions.Epsilon, 0.05);

//compute reducts
var reductGenerator = ReductFactory.GetReductGenerator(parm);
var reducts = reductGenerator.GetReducts();

//select 10 reducts with least number of attributes
var bestReduct = reducts.OrderBy(r => r.Attributes.Count).Take(10);

//create decision rules based on reducts
var decisionRules = new ReductDecisionRules(bestReducts);

//when test instance is not recognized
//set output as unclassified
decisionRules.DefaultOutput = null;

//classify test data
var result = Classifier.DefaultClassifer.Classify(
	decisionRules, test);

//output accuracy and coverage
Console.WriteLine("Accuracy: {0}", result.Accuracy);
reducts
samples
weights

Compute Generalized Majority \( (m^{\varepsilon},\cap) \\\)-Reducts

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
//load training data set
var train = Data.Benchmark.Factory.Dna();

//setup reduct factory parameters
Args parms = new Args();
parms.SetParameter(ReductFactoryOptions.DecisionTable, train);
parms.SetParameter(ReductFactoryOptions.ReductType,
	ReductTypes.GeneralizedMajorityDecision);
parms.SetParameter(ReductFactoryOptions.WeightGenerator,
	new WeightGeneratorMajority(train));
parms.SetParameter(ReductFactoryOptions.Epsilon, 0.05);
parms.SetParameter(ReductFactoryOptions.PermutationCollection,
	new PermutationCollection(10,
	train.SelectAttributeIds(a => a.IsStandard)
		.ToArray()));

//generate reducts
var reductGenerator = ReductFactory.GetReductGenerator(parms);
var reducts = reductGenerator.GetReducts();
reducts
samples
generalized-decision

Compute Generalized Majority \( (m^{\varepsilon},\cap) \\\)-Reducts with Exceptions

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
//load training and test data sets
var train = Data.Benchmark.Factory.Dna();
var test = Data.Benchmark.Factory.DnaTest();

//setup reduct factory parameters
Args parms = new Args();
parms.SetParameter(ReductFactoryOptions.DecisionTable, train);
parms.SetParameter(ReductFactoryOptions.ReductType,
	ReductTypes.GeneralizedMajorityDecision);
parms.SetParameter(ReductFactoryOptions.WeightGenerator,
	new WeightGeneratorMajority(train));
parms.SetParameter(ReductFactoryOptions.Epsilon, 0.05);
parms.SetParameter(ReductFactoryOptions.PermutationCollection,
	new PermutationCollection(10,
		train.SelectAttributeIds(a => a.IsStandard)
			.ToArray()));
parms.SetParameter(ReductFactoryOptions.UseExceptionRules, true);

//generate reducts with exceptions
var reductGenerator = ReductFactory.GetReductGenerator(parms);
var reducts = reductGenerator.GetReducts();

foreach (var reduct in reducts) {
	var r = reduct as ReductWithExceptions;
	foreach (var exception in r.Exceptions) {
		Console.WriteLine(exception.Attributes
			.ToArray().ToStr());
		Console.WriteLine(exception.SupportedObjects
			.ToArray().ToStr());
	}
}

var rules = new ReductDecisionRules(reducts);
rules.DecisionIdentificationMethod
	= RuleQualityMethods.Confidence;
rules.RuleVotingMethod = RuleQualityMethods.SingleVote;
rules.Learn(train, null);

//classify test data set
var result = Classifier.Default.Classify(rules, test);

//show results
Console.WriteLine(result);
reducts
samples
generalized-decision
exceptions

Decision Table Discretization

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
var data = Data.Benchmark.Factory.Vehicle();

DecisionTable train, test;
var splitter = new DataSplitterRatio(data, 0.8);
splitter.Split(out train, out test);

var tableDiscretizer = new TableDiscretizer(
	new IDiscretizer[]
	{
		//try to discretize using Fayyad MDL Criterion
		new DiscretizeFayyad(),

		//in case Fayyad MDL is to strict
		//use standard entropy and 5 buckets
		new DiscretizeEntropy(5)
});

tableDiscretizer.FieldsToDiscretize = train
	.SelectAttributeIds(a => a.IsStandard && a.CanDiscretize());

var filter = new DiscretizeFilter();
filter.TableDiscretizer = tableDiscretizer;
filter.Compute(train);

foreach(int attributeId in tableDiscretizer.FieldsToDiscretize)
{
	var fieldDiscretizer = filter
		.GetAttributeDiscretizer(attributeId);

	Console.WriteLine("Attribute {0} was discretized with {1}",
		attributeId, fieldDiscretizer.GetType().Name);
	Console.WriteLine("Computed Cuts: {0}",
		fieldDiscretizer.Cuts.ToStr());
}

var trainDisc = filter.Apply(train);
var testDisc = filter.Apply(test);
samples
discretization

Boosting \( (\omega,\varepsilon) \\\)-Decision Reduct Based Weak Classifiers

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//load training and testing DNA (spieces) data sets
var train = Data.Benchmark.Factory.Dna();
var test = Data.Benchmark.Factory.DnaTest();

//set weights
var weightGen = new WeightGeneratorConstant(train, 
	1.0 / (double)train.NumberOfRecords);
train.SetWeights(weightGen.Weights);

//create parameters for reduct factory
var parm = new Args();
parm.SetParameter(ReductFactoryOptions.ReductType, 
	ReductTypes.ApproximateDecisionReduct);
parm.SetParameter(ReductFactoryOptions.FMeasure, 
	(FMeasure)FMeasures.MajorityWeighted);
parm.SetParameter(ReductFactoryOptions.Epsilon, 0.05);
parm.SetParameter(ReductFactoryOptions.NumberOfReducts, 100);
parm.SetParameter(ReductFactoryOptions.ReductComparer,
	ReductRuleNumberComparer.Default);
parm.SetParameter(ReductFactoryOptions.SelectTopReducts, 1);

//create weak classifier prototype
var prototype = new ReductDecisionRules();
prototype.ReductGeneratorArgs = parm;

//create ada boost ensemble
var adaBoost = new AdaBoost<ReductDecisionRules>(prototype);
adaBoost.Learn(train, 
	train.SelectAttributeIds(a => a.IsStandard).ToArray());

//classify test data set
var result = Classifier.Default.Classify(adaBoost, test);

//print result header & result
Console.WriteLine(ClassificationResult.TableHeader());
Console.WriteLine(result);
reducts
samples
ensembles

\( (F, \varepsilon) \\\)-Decision Reduct Ensemble Using Hierarchical Clustering Diversification

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
//load training and testing DNA (spieces) data sets
var train = Data.Benchmark.Factory.Dna();
var test = Data.Benchmark.Factory.DnaTest();

//set weights
var weightGen = new WeightGeneratorConstant(train, 
	1.0 / (double)train.NumberOfRecords);
train.SetWeights(weightGen.Weights);

//create parameters for reduct factory
var parm = new Args();
parm.SetParameter(ReductFactoryOptions.ReductType, 
	ReductTypes.ApproximateDecisionReduct);
parm.SetParameter(ReductFactoryOptions.FMeasure, 
	(FMeasure)FMeasures.MajorityWeighted);
parm.SetParameter(ReductFactoryOptions.Epsilon, 0.05);
parm.SetParameter(ReductFactoryOptions.NumberOfReducts, 100);
parm.SetParameter(ReductFactoryOptions.ReductComparer, 
	ReductRuleNumberComparer.Default);
parm.SetParameter(ReductFactoryOptions.SelectTopReducts, 1);

//create weak classifier prototype
var prototype = new ReductDecisionRules();
prototype.ReductGeneratorArgs = parm;

//create ada boost ensemble
var adaBoost = new AdaBoost<ReductDecisionRules>(prototype);
adaBoost.Learn(train, 
	train.SelectAttributeIds(a => a.IsStandard).ToArray());

//classify test data set
var result = Classifier.Default.Classify(adaBoost, test);

//print result header & result
Console.WriteLine(ClassificationResult.TableHeader());
Console.WriteLine(result);
reducts
diversification
samples
ensembles

Random Forest Based on C4.5 Decision Trees

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
//load data from a CSV file
var data = DecisionTable.Load("german.data", FileFormat.CSV);

DecisionTable train, test;
var splitter = new DataSplitterRatio(data, 0.8);
splitter.Split(out train, out test);

//Initialize and Learn Random Forest
var forest = new DecisionForestRandom<DecisionTreeC45>();
forest.Size = 500;
forest.Learn(train, train.SelectAttributeIds(a => a.IsStandard).ToArray());

//Validate on test data set
var result = Classifier.Default.Classify(forest, test);

//Output the results
Console.WriteLine(result);
samples
ensembles

10-Fold Coss Validation of C4.5 Decision Tree

1
2
3
4
5
6
7
8
9
10
11
12
//load data
var data = DecisionTable.Load("data.txt", FileFormat.CSV);

//create 10-fold 25-repeated cross validation
var cv = new CrossValidation(data, 10, 25);

//create C4.5 decision tree and run cv evaluation
var c45 = new DecisionTreeC45();
var result = cv.Run<DecisionTreeC45>(c45);

//output result
Console.WriteLine("Train Error: {0}", result.Error);
cross-validation
decision-trees

Filter by tag: