build_decision_tree :-
generate_node_id(_),
clause(attributes(Attributes),true),
findbag(Ex,clause(example(Ex,_,_),true),Exs),
id3(Exs,Attributes,Node),
assert(decision_tree(Node)), !.
12generate_node_id(Y) :-
clause(current_node(X),true), !,
retract(current_node(X)),
Y is X + 1,
assert(current_node(Y)).
generate_node_id(0) :-
assert(current_node(0)).
/******************************************************************/
/* */
/* call : id3(+Examples,+Attributes,-Class) */
/* */
/* arguments : Examples = List of Examples */
/* Attributes = List of Attributes */
/* Class = Node ID of Class or leaf(Class) */
/* */
/******************************************************************/
/* ID3 determines an attribute-value pair which best splits the */
/* examples according to the information-theoretical 'gain-ration'*/
/* measure. The attribute-value pair is deleted from the set of */
/* all attribute-value pairs and the process of generating a sub- */
/* decision tree is called recursively with the according to the */
/* attribute-value pair splitted examples. The recursion */
/* terminates either if there is no more example to process or if */
/* all examples belong to the same class. In the last case */
/* leaf(Class) is returned insteed of the SubtreeIDs. */
/* In the end for every generated subtree an ID is generated and */
/* the tree structure is asserted in the database. */
/******************************************************************/
123id3([],_,[]).
id3(Exs,_,[leaf(Class)]) :-
termination_criterion(Exs,Class).
id3(Exs,Attributes,ID) :-
get_best_attribute(Attributes,Exs,BestAttribute),
split_values(BestAttribute,Exs,DividedValues),
delete(BestAttribute,Attributes,NewAttributes),
generate_subtrees(DividedValues,NewAttributes,SubtreeIDs),
generate_node_id(ID),
assert(node(ID,BestAttribute,SubtreeIDs)).
1termination_criterion([Ex|Exs],Class) :-
clause(example(Ex,Class,_),true),
!,
all_in_same_class(Exs,Class).
12all_in_same_class([],_).
all_in_same_class([Ex|Exs],C) :-
clause(example(Ex,C,_),true),
!,
all_in_same_class(Exs,C).
1get_best_attribute(Attributes,Exs,BestAttribute) :-
construct_contingency_table(Attributes,Exs),
common_calculations(MC,N),
calculate_parameter_classification(Attributes,MC,N,Values),
get_best(Attributes,Values,BestAttribute).
1construct_contingency_table(Attributes,Exs) :-
clause(classes(Lc),true),
length(Lc,NroColTab),
abolish(table,3),
initialize_contingency_tables(Attributes,NroColTab),
construct_contingency_tables(Attributes,Exs).
12initialize_contingency_tables([],_).
initialize_contingency_tables([A|As],NoCol) :-
create_list_of_zeros(NoCol,List),
assert(table(A,[],List)),
initialize_contingency_tables(As,NoCol).
123create_list_of_zeros(0,[]).
create_list_of_zeros(N,[0|R]) :-
N1 is N-1,
create_list_of_zeros(N1,R).
12construct_contingency_tables([],_).
construct_contingency_tables([Attribute|Attributes],ExampleList) :-
contingency_table(Attribute,ExampleList),
!,
construct_contingency_tables(Attributes,ExampleList).
12contingency_table(_,[]).
contingency_table(Attribute,[Ex|Exs]) :-
value(Attribute,Ex,V),
position_of_class(Ex,Pc),
update_table(Attribute,V,Pc),
!,
contingency_table(Attribute,Exs).
123value(A,[A = V|_],V) :- !.
value(A,[_|Sels],V) :- value(A,Sels,V).
value(A,No,V) :-
example(No,_,Ex),
value(A,Ex,V).
1position_of_class(Ex,Pc) :-
clause(example(Ex,C,_),true),
clause(classes(Classes),true),
position(C,Classes,Pc).
1update_table(Attribute,V,Pc) :-
retract(table(Attribute,TabLines,TotClass)),
modify_table(TabLines,V,Pc,NewLines),
increment_position_list(1,Pc,TotClass,NewTotal),
assert(table(Attribute,NewLines,NewTotal)).
12modify_table([],V,Pc,[(V,Values,1)]) :-
clause(classes(Classes),true),
length(Classes,NoOfColums),
create_list_of_zeros(NoOfColums,L),
increment_position_list(1,Pc,L,Values).
modify_table([(V,Nums,Tot)|Rest],V,Pc,[(V,NewNums,NewTot)|Rest]) :-
NewTot is Tot+1,
increment_position_list(1,Pc,Nums,NewNums).
modify_table([X|Rest1],V,Pc,[X|Rest2]) :-
modify_table(Rest1,V,Pc,Rest2).
1234increment_position_list(N,N,[X|R],[Y|R]) :-
Y is X+1.
increment_position_list(N1,N,[X|R1],[X|R2]) :-
N2 is N1+1,
increment_position_list(N2,N,R1,R2).
123common_calculations(MC,N) :-
clause(table(_,_,Xjs),true),
common_calculations(Xjs,0,0,MC,N).
common_calculations([],TotalSum,N,MC,N) :-
log(N, LogN),
MC is (-1 / N) * ( TotalSum - N * LogN ).
common_calculations([Xj|Xjs],Ac1,Ac2,MC,N) :-
log(Xj, LogXj),
NAc1 is Ac1 + Xj * LogXj,
NAc2 is Ac2 + Xj,
common_calculations(Xjs,NAc1,NAc2,MC,N).
12calculate_parameter_classification([],_,_,[]).
calculate_parameter_classification([A|As],MC,N,[V|Vs]) :-
gain_ratio(A,MC,N,V),
calculate_parameter_classification(As,MC,N,Vs).
1gain_ratio(A,MC,N,GR) :-
clause(table(A,Lines,_),true),
calculate_factors_B_and_IV(Lines,N,0,0,B,IV),
IM is MC - B,
GR is IM / IV.
12calculate_factors_B_and_IV([],N,Sum1,Sum2,B,IV) :-
B is ( -1 / N ) * ( Sum1 - Sum2 ),
log(N, LogN),
IV is ( -1 / N ) * ( Sum2 - N * LogN ).
calculate_factors_B_and_IV([(_,L,TotL)|Rest],N,Ac1,Ac2,B,IV) :-
sum_of_lines(L,0,SL),
NAc1 is Ac1 + SL,
log(TotL, LogTotL),
NAc2 is Ac2 + TotL * LogTotL,
calculate_factors_B_and_IV(Rest,N,NAc1,NAc2,B,IV).
123sum_of_lines([],X,X).
sum_of_lines([0|Ns],Ac,Tot) :-
sum_of_lines(Ns,Ac,Tot).
sum_of_lines([N|Ns],Ac,Tot) :-
log(N, LogN),
Nac is Ac + N * LogN,
sum_of_lines(Ns,Nac,Tot).
1get_best([A|As],[V|Vs],Result) :-
best_value(As,Vs,(A,V),Result).
123best_value([],[],(A,V),A).
best_value([A|As],[V|Vs],(TA,TV),Result) :-
V > TV,
best_value(As,Vs,(A,V),Result).
best_value([A|As],[V|Vs],(TA,TV),Result) :-
best_value(As,Vs,(TA,TV),Result).
1split_values(Attribute,Exs,Result) :-
get_values(Attribute,Exs,Values),
split_examples(Attribute,Values,Exs,Result).
1get_values(Attribute,Exs,Vals) :-
findbag(V,(member(Ex,Exs),value(Attribute,Ex,V)),Vs),
remove_duplicates(Vs,Vals).
12split_examples(_,[V],Exs,[(V,Exs)]).
split_examples(A,[V|Vs],Exs,[(V,VExs)|Rest]) :-
findbag(Ex,(member(Ex,Exs),value(A,Ex,V)),VExs),
difference(VExs,Exs,RestEx),
split_examples(A,Vs,RestEx,Rest).
12generate_subtrees([],_,[]).
generate_subtrees([(Value,Exs)|Rest1],Attributes,[(Value,Id)|Rest2]) :-
id3(Exs,Attributes,Id),
!,
generate_subtrees(Rest1,Attributes,Rest2).
/******************************************************************/
/* */
/* call : show_decision_tree */
/* */
/******************************************************************/
/* A simple pretty-print procedure for displaying decision trees. */
/* In steed of this procedure, we can also generate rules from the*/
/* decision tree by traversing every path in the tree until a */
/* leaf node was reached and collecting the attribute-value pairs */
/* of that path. Then the leaf node forms the head of a Horn- */
/* formula and the set of attribute-value pairs of the path forms */
/* the body of the clause. */
/******************************************************************/
show_decision_tree :-
nl,
clause(decision_tree(Node),true),
show_subtree(Node,0), !.
12show_subtree(NodeNo,Indent) :-
clause(node(NodeNo,Attribute,SubtreeList),true),
show_subtrees(SubtreeList,Attribute,Indent).
123show_subtrees([],_,_) :- nl.
show_subtrees([(Value,[leaf(X)])|Brothers],Attribute,Indent) :-
write(Attribute=Value), write(' '),
write(' ==> '), write(class = X), nl,
space(Indent),
show_subtrees(Brothers,Attribute,Indent).
show_subtrees([(Value,NodeNo)|Brothers],Attribute,Indent) :-
name(Attribute,List1), length(List1,N1),
name(Value,List2), length(List2,N2),
write(Attribute=Value),
write(' and '),
Offset is Indent + N1 + 1 + N2 + 5,
show_subtree(NodeNo,Offset),
space(Indent),
show_subtrees(Brothers,Attribute,Indent).
/******************************************************************/
/* Utilit predicates */
/******************************************************************/
123space(0).
space(N) :-
N > 0, write(' '), N1 is N - 1, space(N1).
123remove_duplicates([],[]).
remove_duplicates([X|Xs],Ys) :-
member(X,Xs),
remove_duplicates(Xs,Ys).
remove_duplicates([X|Xs],[X|Ys]) :-
remove_duplicates(Xs,Ys).
%length([],0).
%length([L|Ls],N) :-
% length(Ls,N1),
% N is N1+1.
12position(X,L,P) :- position(X,1,L,P).
position(X,P,[X|_],P).
position(X,N,[_|R],P) :- N1 is N+1,position(X,N1,R,P).
12delete(X,[X|Xs],Xs).
delete(X,[Y|Ys],[Y|Zs]) :-
delete(X,Ys,Zs).
1difference(L1,L2,L3) :-
findbag(N,(member(N,L2),not(member(N,L1))),L3).
:- dynamic found/1.
1234findbag(X,G,_) :-
asserta(found(mark)), call(G),
asserta(found(X)), fail .
findbag(_,_,L) :-
collect_found([],L) .
12collect_found(L,L1) :-
getnext(X), collect_found([X|L],L1) .
collect_found(L,L) .
1getnext(X) :-
retract(found(X)), !, not (X == mark) .