Source code for rfgb.boosting

# -*- coding: utf-8 -*-

# Copyright © 2017-2019 rfgb Contributors
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program (at the base of this repository). If not,
# see <http://www.gnu.org/licenses/>

"""
Core methods for performing learning and inference, such as computing
gradients, updating gradients, and performing inference.

Documentation
-------------
"""

from __future__ import division

from .utils import Utils
from .logic import Prover

from math import log
from math import exp
from copy import deepcopy

_log_prior = -1.8


[docs]def computeAdviceGradient(example): """ Proves each clause (:meth:`.Prover.prove`) and computes the advice gradient as ``NumberTrue - NumberFalse``. :param example: :type example: """ nt, nf = 0, 0 target = Utils.data.target.split("(")[0] for clause in Utils.data.adviceClauses: if Prover.prove(Utils.data, example, clause): if target in Utils.data.adviceClauses[clause]["preferred"]: nt += 1 if target in Utils.data.adviceClauses[clause]["nonPreferred"]: nf += 1 return nt - nf
[docs]def computeSumOfGradients(example, trees, data): """ Computes new gradients for an example. :param example: :type example: :param trees: :type trees: :param data: :type data: """ sumOfGradients = 0 # Add leaf values satisfied by example in each tree. for tree in trees: gradient = inferTreeValue(tree, example, data) sumOfGradients += gradient return sumOfGradients
[docs]def inferTreeValue(clauses, query, data): """ Returns the probability of `query` given data and the clauses learned. :param clauses: :type clauses: :param query: :type query: :param data: :type data: """ for clause in clauses: clauseCopy = deepcopy(clause) clauseValue = float(clauseCopy.split(" ")[1]) clauseRule = clauseCopy.split(" ")[0].replace(";", ",") if not clauseRule.split(":-")[1]: return clauseValue if Prover.prove(data, query, clauseRule): # Check if query satisifes clause return clauseValue
[docs]def performInference(testData, trees): """ Computes the probabilities for test examples. :param testData: Data for testing. :type testData: :py:class:`.utils.Data` object. :param trees: List of strings representing learned decision trees. :type trees: list. Example: .. code-block:: python from rfgb.boosting import performInference """ logPrior = _log_prior if not testData.regression: # Initialize log odds of assumed prior probability for example. for example in testData.pos: # Compute sum of gradients sumOfGradients = computeSumOfGradients(example, trees, testData) # Calculate probability as sigmoid(log odds) testData.pos[example] = Utils.sigmoid(logPrior + sumOfGradients) for example in testData.neg: # Compute sum of gradients sumOfGradients = computeSumOfGradients(example, trees, testData) # Calculate probability as sigmoid(log odds) testData.neg[example] = Utils.sigmoid(logPrior + sumOfGradients) elif testData.regression: logPrior = 0.0 for example in testData.examples: sumOfGradients = computeSumOfGradients(example, trees, testData) testData.examples[example] = sumOfGradients
[docs]def updateGradients(data, trees, loss="LS", delta=None): """ Update gradients of the data. :param data: Training or testing data (with parameters). :type data: :py:class:`.utils.Data` object. :param trees: List of strings representing trees. :type trees: list. :param loss: Loss function for regression (currently implemented: 'LS', 'LAD', 'Huber'). :type loss: str. :param delta: Delta value for Huber loss. :type delta: float Example: .. code-block:: python from rfgb.boosting import updateGradients """ if data.regression: # If this is regression data, compute gradient as y - y_hat for example in data.examples: sumOfGradients = computeSumOfGradients(example, trees, data) trueValue = data.getExampleTrueValue(example) if loss == "LS": # Least Squares data.examples[example] = trueValue - sumOfGradients elif loss == "LAD": # Least Absolute Deviation updatedGradient = 0 gradient = trueValue - sumOfGradients if gradient: updatedGradient = gradient / float(abs(gradient)) data.examples[example] = updatedGradient elif loss == "Huber": # Huber Loss gradient = trueValue - sumOfGradients updatedGradient = 0 if gradient: if gradient > float(delta): updatedGradient = gradient / float(abs(gradient)) elif gradient <= float(delta): updatedGradient = gradient data.examples[example] = updatedGradient else: # If this is classification data, compute 1 - P for each positive. """ logPrior = __logPrior__ # P = sigmoid(sum of gradients) given by each tree learned so far. for example in data.pos: # For each positive example compute 1 - P sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 1 - prob if data.advice: adviceGradient = computeAdviceGradient(example) updatedGradient += adviceGradient data.pos[example] = updatedGradient for example in data.neg: # For each negative example compute 0 - P sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 0 - prob if data.advice: adviceGradient = computeAdviceGradient(example) updatedGradient += adviceGradient data.neg[example] = updatedGradient """ logPrior = _log_prior if data.softm: for example in data.pos: sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 1 - prob / (prob + (1 - prob) * exp(data.alpha)) data.pos[example] = updatedGradient for example in data.neg: sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 1 - prob / (prob + (1 - prob) * exp(-data.beta)) data.neg[example] = updatedGradient else: for example in data.pos: sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 1 - prob if data.advice: updatedGradient += computeAdviceGradient(example) data.pos[example] = updatedGradient for example in data.neg: sumOfGradients = computeSumOfGradients(example, trees, data) prob = Utils.sigmoid(logPrior + sumOfGradients) updatedGradient = 0 - prob if data.advice: updatedGradient += computeAdviceGradient(example) data.neg[example] = updatedGradient