build_decision_tree :-
    generate_node_id(_),
    clause(attributes(Attributes),true),
    findbag(Ex,clause(example(Ex,_,_),true),Exs),
    id3(Exs,Attributes,Node),
    assert(decision_tree(Node)), !.

1 2 
generate_node_id(Y) :-
    clause(current_node(X),true), !,
    retract(current_node(X)),
    Y is X + 1,
    assert(current_node(Y)).
generate_node_id(0) :-
    assert(current_node(0)).

/******************************************************************/
/*                                                                */
/*  call        : id3(+Examples,+Attributes,-Class)               */
/*                                                                */
/*  arguments   : Examples   = List of Examples                   */
/*                Attributes = List of Attributes                 */
/*                Class      = Node ID of Class or leaf(Class)    */
/*                                                                */
/******************************************************************/
/* ID3 determines an attribute-value pair which best splits the   */
/* examples according to the information-theoretical 'gain-ration'*/
/* measure. The attribute-value pair is deleted from the set of   */
/* all attribute-value pairs and the process of generating a sub- */
/* decision tree is called recursively with the according to the  */
/* attribute-value pair splitted examples. The recursion          */
/* terminates either if there is no more example to process or if */
/* all examples belong to the same class. In the last case        */
/* leaf(Class) is returned insteed of the SubtreeIDs.             */
/* In the end for every generated subtree an ID is generated and  */
/* the tree structure is asserted in the database.                */
/******************************************************************/
1 2 3 
id3([],_,[]).
id3(Exs,_,[leaf(Class)]) :-
    termination_criterion(Exs,Class).
id3(Exs,Attributes,ID) :-
    get_best_attribute(Attributes,Exs,BestAttribute),
    split_values(BestAttribute,Exs,DividedValues),
    delete(BestAttribute,Attributes,NewAttributes),
    generate_subtrees(DividedValues,NewAttributes,SubtreeIDs),
    generate_node_id(ID),
    assert(node(ID,BestAttribute,SubtreeIDs)).

1 
termination_criterion([Ex|Exs],Class) :-
    clause(example(Ex,Class,_),true),
    !,
    all_in_same_class(Exs,Class).

1 2 
all_in_same_class([],_).
all_in_same_class([Ex|Exs],C) :-
    clause(example(Ex,C,_),true),
    !,
    all_in_same_class(Exs,C).

1 
get_best_attribute(Attributes,Exs,BestAttribute) :-
    construct_contingency_table(Attributes,Exs),
    common_calculations(MC,N),
    calculate_parameter_classification(Attributes,MC,N,Values),
    get_best(Attributes,Values,BestAttribute).

1 
construct_contingency_table(Attributes,Exs) :-
    clause(classes(Lc),true),
    length(Lc,NroColTab),
    abolish(table,3),
    initialize_contingency_tables(Attributes,NroColTab),
    construct_contingency_tables(Attributes,Exs).

1 2 
initialize_contingency_tables([],_).
initialize_contingency_tables([A|As],NoCol) :-
    create_list_of_zeros(NoCol,List),
    assert(table(A,[],List)),
    initialize_contingency_tables(As,NoCol).

1 2 3 
create_list_of_zeros(0,[]).
create_list_of_zeros(N,[0|R]) :-
     N1 is N-1,
     create_list_of_zeros(N1,R).

1 2 
construct_contingency_tables([],_).
construct_contingency_tables([Attribute|Attributes],ExampleList) :-
    contingency_table(Attribute,ExampleList),
    !,
    construct_contingency_tables(Attributes,ExampleList).

1 2 
contingency_table(_,[]).
contingency_table(Attribute,[Ex|Exs]) :-
     value(Attribute,Ex,V),
     position_of_class(Ex,Pc),
     update_table(Attribute,V,Pc),
     !,
     contingency_table(Attribute,Exs).

1 2 3 
value(A,[A = V|_],V) :- !.
value(A,[_|Sels],V) :- value(A,Sels,V).
value(A,No,V) :-
      example(No,_,Ex),
      value(A,Ex,V).

1 
position_of_class(Ex,Pc) :-
     clause(example(Ex,C,_),true),
     clause(classes(Classes),true),
     position(C,Classes,Pc).

1 
update_table(Attribute,V,Pc) :-
     retract(table(Attribute,TabLines,TotClass)),
     modify_table(TabLines,V,Pc,NewLines),
     increment_position_list(1,Pc,TotClass,NewTotal),
     assert(table(Attribute,NewLines,NewTotal)).

1 2 
modify_table([],V,Pc,[(V,Values,1)]) :-
     clause(classes(Classes),true),
     length(Classes,NoOfColums),
     create_list_of_zeros(NoOfColums,L),
     increment_position_list(1,Pc,L,Values).
modify_table([(V,Nums,Tot)|Rest],V,Pc,[(V,NewNums,NewTot)|Rest]) :-
     NewTot is Tot+1,
     increment_position_list(1,Pc,Nums,NewNums).
modify_table([X|Rest1],V,Pc,[X|Rest2]) :-
     modify_table(Rest1,V,Pc,Rest2).

1 2 3 4 
increment_position_list(N,N,[X|R],[Y|R]) :-
     Y is X+1.
increment_position_list(N1,N,[X|R1],[X|R2]) :-
     N2 is N1+1,
     increment_position_list(N2,N,R1,R2).

1 2 3 
common_calculations(MC,N) :-
    clause(table(_,_,Xjs),true),
    common_calculations(Xjs,0,0,MC,N).

common_calculations([],TotalSum,N,MC,N) :-
    log(N, LogN),
    MC is (-1 / N) * ( TotalSum - N * LogN ).
common_calculations([Xj|Xjs],Ac1,Ac2,MC,N) :-
    log(Xj, LogXj),
    NAc1 is Ac1 + Xj * LogXj,
    NAc2 is Ac2 + Xj,
    common_calculations(Xjs,NAc1,NAc2,MC,N).

1 2 
calculate_parameter_classification([],_,_,[]).
calculate_parameter_classification([A|As],MC,N,[V|Vs]) :-
    gain_ratio(A,MC,N,V),
    calculate_parameter_classification(As,MC,N,Vs).

1 
gain_ratio(A,MC,N,GR) :-
    clause(table(A,Lines,_),true),
    calculate_factors_B_and_IV(Lines,N,0,0,B,IV),
    IM is MC - B,
    GR is IM / IV.

1 2 
calculate_factors_B_and_IV([],N,Sum1,Sum2,B,IV) :-
    B  is ( -1 / N ) * ( Sum1 - Sum2 ),
    log(N, LogN),
    IV is ( -1 / N ) * ( Sum2 - N * LogN ).
calculate_factors_B_and_IV([(_,L,TotL)|Rest],N,Ac1,Ac2,B,IV) :-
    sum_of_lines(L,0,SL),
    NAc1 is Ac1 + SL,
    log(TotL, LogTotL),
    NAc2 is Ac2 + TotL * LogTotL,
    calculate_factors_B_and_IV(Rest,N,NAc1,NAc2,B,IV).

1 2 3 
sum_of_lines([],X,X).
sum_of_lines([0|Ns],Ac,Tot) :-
    sum_of_lines(Ns,Ac,Tot).
sum_of_lines([N|Ns],Ac,Tot) :-
    log(N, LogN),
    Nac is Ac + N * LogN,
    sum_of_lines(Ns,Nac,Tot).

1 
get_best([A|As],[V|Vs],Result) :-
    best_value(As,Vs,(A,V),Result).

1 2 3 
best_value([],[],(A,V),A).
best_value([A|As],[V|Vs],(TA,TV),Result) :-
    V > TV,
    best_value(As,Vs,(A,V),Result).
best_value([A|As],[V|Vs],(TA,TV),Result) :-
    best_value(As,Vs,(TA,TV),Result).

1 
split_values(Attribute,Exs,Result) :-
    get_values(Attribute,Exs,Values),
    split_examples(Attribute,Values,Exs,Result).

1 
get_values(Attribute,Exs,Vals) :-
    findbag(V,(member(Ex,Exs),value(Attribute,Ex,V)),Vs),
    remove_duplicates(Vs,Vals).

1 2 
split_examples(_,[V],Exs,[(V,Exs)]).
split_examples(A,[V|Vs],Exs,[(V,VExs)|Rest]) :-
    findbag(Ex,(member(Ex,Exs),value(A,Ex,V)),VExs),
    difference(VExs,Exs,RestEx),
    split_examples(A,Vs,RestEx,Rest).

1 2 
generate_subtrees([],_,[]).
generate_subtrees([(Value,Exs)|Rest1],Attributes,[(Value,Id)|Rest2]) :-
    id3(Exs,Attributes,Id),
    !,
    generate_subtrees(Rest1,Attributes,Rest2).

/******************************************************************/
/*                                                                */
/*  call        : show_decision_tree                              */
/*                                                                */
/******************************************************************/
/* A simple pretty-print procedure for displaying decision trees. */
/* In steed of this procedure, we can also generate rules from the*/
/* decision tree by traversing every path in the tree until a     */
/* leaf node was reached and collecting the attribute-value pairs */
/* of that path. Then the leaf node forms the head of a Horn-     */
/* formula and the set of attribute-value pairs of the path forms */
/* the body of the clause.                                        */
/******************************************************************/
show_decision_tree :-
        nl,
        clause(decision_tree(Node),true),
        show_subtree(Node,0), !.

1 2 
show_subtree(NodeNo,Indent) :-
        clause(node(NodeNo,Attribute,SubtreeList),true),
        show_subtrees(SubtreeList,Attribute,Indent).

1 2 3 
show_subtrees([],_,_) :- nl.
show_subtrees([(Value,[leaf(X)])|Brothers],Attribute,Indent) :-
        write(Attribute=Value), write(' '),
        write(' ==> '), write(class = X), nl,
        space(Indent),
        show_subtrees(Brothers,Attribute,Indent).
show_subtrees([(Value,NodeNo)|Brothers],Attribute,Indent) :-
        name(Attribute,List1), length(List1,N1),
        name(Value,List2), length(List2,N2),
        write(Attribute=Value),
        write(' and '),
        Offset is Indent + N1 + 1 + N2 + 5,
        show_subtree(NodeNo,Offset),
        space(Indent),
        show_subtrees(Brothers,Attribute,Indent).

/******************************************************************/
/* Utilit predicates                                              */
/******************************************************************/
1 2 3 
space(0).
space(N) :-
        N > 0, write(' '), N1 is N - 1, space(N1).

1 2 3 
remove_duplicates([],[]).
remove_duplicates([X|Xs],Ys) :-
      member(X,Xs),
      remove_duplicates(Xs,Ys).
remove_duplicates([X|Xs],[X|Ys]) :-
      remove_duplicates(Xs,Ys).

%length([],0).
%length([L|Ls],N) :-
%      length(Ls,N1),
%      N is N1+1.

1 2 
position(X,L,P) :- position(X,1,L,P).
position(X,P,[X|_],P).
position(X,N,[_|R],P) :- N1 is N+1,position(X,N1,R,P).

1 2 
delete(X,[X|Xs],Xs).
delete(X,[Y|Ys],[Y|Zs]) :-
    delete(X,Ys,Zs).

1 
difference(L1,L2,L3) :-
    findbag(N,(member(N,L2),not(member(N,L1))),L3).

:- dynamic found/1.

1 2 3 4 
findbag(X,G,_) :-
     asserta(found(mark)), call(G),
       asserta(found(X)), fail .
findbag(_,_,L) :-
     collect_found([],L) .

1 2 
collect_found(L,L1) :-
     getnext(X), collect_found([X|L],L1) .
collect_found(L,L) .

1 
getnext(X) :-
     retract(found(X)), !, not (X == mark) .