|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectstartup.ClassificationAction
public class ClassificationAction
This class represents the dataflow that is necessary to either build a classification model for a certain functional site, or evaluate a sequence for said functional site. This class is thus part of the backbone of the FunSiP-program, combining all the available resources and classes to produce valid data. Failure of execution in this class will return the workflow-pointer to the SprWorkflow-object, thus ending the objectives set for this classification action. In other words, the execution of ClassificationAction is as atomic as possible.
Nested Class Summary | |
---|---|
static class |
ClassificationAction.CLASSIFIERS
Choice of different available classifiers. |
static class |
ClassificationAction.COMMAND
The different dataflow/workflow commands. |
static class |
ClassificationAction.FILE_TYPE
Possible filetypes which can be used when evaluating sequence files. |
static class |
ClassificationAction.STRAND
Possible strands which should be evaluated by the classification models. |
Constructor Summary | |
---|---|
ClassificationAction(org.apache.log4j.Logger logger,
FunSiPWorkflow parent)
Constructor for this classification action. |
Method Summary | |
---|---|
void |
addClassificationFeature(ClassificationFeature cfi)
|
void |
addConversion(Conversion c)
|
Classifier |
build_model()
Method which creates a classification model for the current type of supplied training data. |
Classifier |
chooseClassifier(ClassificationAction.CLASSIFIERS cl)
This method creates the right classifier implementation that implements the Classifier interface, according to the chosen enumeration. |
void |
computeOptimalisationStrings()
This method is a more or less necessary evil. |
void |
deleteTrainingFile()
This method attempts to delete the file that contains all the features used for training the classifier. |
void |
evaluate_model(Classifier classifier)
Evaluate the content of sequence files with the supplied classifier (and accompagnied classification model). |
java.io.File |
extractAndClassify(java.lang.String sequence,
Classifier classifier,
java.util.List<java.lang.Integer> sites,
java.util.List<java.lang.Integer> output_sites,
java.lang.String clas_name,
java.lang.String fileName,
int increase,
int increase_output)
This method extracts features around functional sites in a sequence, and then uses the provided classifier to classify that particular functional site. |
void |
extractAndEvaluateData(java.io.File file,
Classifier classifier,
int index)
Evaluate the content of a specific file with the supplied classification model. |
java.util.List<java.util.List<java.lang.Double>> |
extractFeaturesFromSequence(java.lang.String sequence,
java.lang.String secon,
java.lang.Double energy,
java.lang.String[] conversion_strings,
int splicesite)
This method is part of the main-loop that extracts the different features. |
void |
extractFeaturesFromTrainingData(Classifier classifier)
This method extracts the necessary features in order to train a classifier with these features. |
java.lang.String |
getClassification_name()
|
java.util.List<ClassificationFeature> |
getClassificationFeatures()
|
java.lang.String[] |
getClassifier_options()
|
ClassificationAction.CLASSIFIERS |
getClassifier_type()
|
ClassificationAction.COMMAND |
getCommand()
|
java.util.List<Conversion> |
getConversionList()
|
java.lang.String[] |
getConversionStrings(java.lang.String sequence)
This method takes one string as input and produces all the conversions of this string |
int |
getCrossvalidation_fold()
|
java.io.File |
getCrossValidationTabFile()
|
java.lang.String |
getFeatures_output_file()
|
FeatureSelectionStub |
getFeatureSelection()
|
java.io.File |
getFeaturesTrainingData(java.lang.String fileName,
java.util.List<java.lang.String> sequences,
SecondaryStructureData ssData,
Classifier classifier,
Classifier.DATA_TYPE data_type)
This method extracts the features from a trainingfile (positive/negative), and stores the extracted features into the feature_output_file. |
int |
getMaximum_crossvalidation()
|
int |
getMaximumSecondaryDownRange()
This method returns the maximum downstreamrange of all ClassificationFeatures that implement secondary structure features. |
int |
getMaximumSecondaryUpRange()
This method returns the maximum upstreamrange of all ClassificationFeatures that implement secondary structure features. |
java.lang.String |
getModel_file()
|
java.lang.String |
getName()
|
int |
getNegative_training_amount()
|
java.lang.String |
getNegative_training_file_secstruct()
|
java.lang.String |
getNegative_training_file()
|
java.lang.Double |
getOutput_fixed_alpha()
|
java.lang.Double |
getOutput_fixed_beta()
|
java.lang.String |
getOutputdirectory()
|
int |
getPattern_classification_location()
|
int |
getPattern_classification_output_f()
|
int |
getPattern_classification_output_r()
|
java.lang.String |
getPattern()
|
int |
getPositive_training_amount()
|
java.lang.String |
getPositive_training_file_secstruct()
|
java.lang.String |
getPositive_training_file()
|
java.lang.String[] |
getPredictionOutputFiles()
|
ClassificationAction.STRAND |
getStrand()
|
ClassificationAction.FILE_TYPE |
getTestfiles_filetype()
|
java.lang.String[] |
getTestfiles()
|
int |
getTraining_functional_site_position()
|
boolean |
isComplexityCrossvalidation()
|
boolean |
isPattern_regexp()
|
Classifier |
load_model()
This method loads a precomputed classification model into memory. |
ClassificationAction |
make_copy()
This method makes a copy of the current classification action. |
boolean |
mustExtractSecondaryStructures()
This method determines whether or not the class should extract the secondary structures, according to the types of classificationfeatures |
void |
perform_crossvalidation(Classifier classifier,
java.io.File featureFile,
java.util.List<RocCurveData> rocCurveDataNames,
CrossValidationOutput output)
This method performs the actual crossvalidation. |
void |
performOptimalComplexityCrossvalidation(Classifier classifier,
java.io.File featureFile)
This method performs the complexity crossvalidation operation. |
void |
run()
Starts the dataflow in a asymptotic thread way. |
void |
setClassification_name(java.lang.String classification_name)
|
void |
setClassificationFeatures(java.util.List<ClassificationFeature> newList)
|
void |
setClassifier_options(java.lang.String[] classifier_options)
|
void |
setClassifier_type(ClassificationAction.CLASSIFIERS classifier_type)
|
boolean |
setClassifier_type(java.lang.String s)
|
void |
setCommand(ClassificationAction.COMMAND command)
|
boolean |
setCommand(java.lang.String s)
|
void |
setComplexityCrossvalidation(boolean complexityCrossvalidation)
|
void |
setConversionList(java.util.List<Conversion> conversionList)
|
void |
setCrossvalidation_fold(int crossvalidation_fold)
|
void |
setCrossValidationTabFile(java.io.File crossValidationTabFile)
|
void |
setFeatures_output_file(java.lang.String features_output_file)
|
void |
setFeatureSelection(FeatureSelectionStub featureSelection)
|
void |
setMaximum_crossvalidation(int maximum_crossvalidation)
|
void |
setModel_file(java.lang.String file)
|
void |
setName(java.lang.String name)
|
void |
setNegative_training_amount(int negative_training_amount)
|
void |
setNegative_training_file_secstruct(java.lang.String negative_training_file_secstruct)
|
void |
setNegative_training_file(java.lang.String negative_training_file)
|
void |
setOutput_fixed_alpha(java.lang.Double output_fixed_alpha)
|
void |
setOutput_fixed_beta(java.lang.Double output_fixed_beta)
|
void |
setOutputdirectory(java.lang.String outputdirectory)
|
void |
setPattern_classification_location(int pattern_classification_location)
|
void |
setPattern_classification_output_f(int pattern_classification_output_f)
|
void |
setPattern_classification_output_r(int pattern_classification_output_r)
|
void |
setPattern_regexp(boolean pattern_regexp)
|
void |
setPattern(java.lang.String pattern)
|
void |
setPositive_training_amount(int positive_training_amount)
|
void |
setPositive_training_file_secstruct(java.lang.String positive_training_file_secstruct)
|
void |
setPositive_training_file(java.lang.String positive_training_file)
|
void |
setPredictionOutputFiles(java.lang.String[] predictionOutputFiles)
|
void |
setStrand(ClassificationAction.STRAND strand)
|
boolean |
setStrand(java.lang.String s)
|
void |
setTestfiles_filetype(ClassificationAction.FILE_TYPE testfiles_filetype)
|
boolean |
setTestfiles_filetype(java.lang.String s)
|
void |
setTestfiles(java.lang.String[] testfiles)
|
void |
setTraining_functional_site_position(int splicesite)
|
void |
start_classification()
This method starts the actual dataflow within the classification action. |
Methods inherited from class java.lang.Object |
---|
equals, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Constructor Detail |
---|
public ClassificationAction(org.apache.log4j.Logger logger, FunSiPWorkflow parent)
logger
- The logger-object is kept the same throughout the entire workflow,
so all logging-activity is done in the same way.parent
- The workflow object that contains this classification action. Each
classification action is part of a workflow, and each workflow contains at least
one classification action.Method Detail |
---|
public ClassificationAction make_copy()
public void run()
run
in interface java.lang.Runnable
public void start_classification()
public Classifier build_model()
public void computeOptimalisationStrings()
public void perform_crossvalidation(Classifier classifier, java.io.File featureFile, java.util.List<RocCurveData> rocCurveDataNames, CrossValidationOutput output)
classifier
- The classifier-object used for performing the calculationsfeatureFile
- The file containing the extracted featuresrocCurveDataNames
- An empty list to be filled with names of roc-curves, derived
from the crossvalidation results.output
- Object to which the output of the crossvalidation will be written.public void performOptimalComplexityCrossvalidation(Classifier classifier, java.io.File featureFile)
classifier
- The classifier object.featureFile
- The file containing the extracte features.public Classifier load_model()
public void evaluate_model(Classifier classifier)
classifier
- The classifier-object containing the classification model.public void extractAndEvaluateData(java.io.File file, Classifier classifier, int index) throws java.lang.Exception
file
- File containing the sequence to be evaluated.classifier
- The classifier-object containing theindex
- The index of the file.
java.lang.Exception
- Thrown if the evaluation of a certain file goes wrong.public java.io.File extractAndClassify(java.lang.String sequence, Classifier classifier, java.util.List<java.lang.Integer> sites, java.util.List<java.lang.Integer> output_sites, java.lang.String clas_name, java.lang.String fileName, int increase, int increase_output)
sequence
- The sequence in which the functional sites are located.classifier
- The classifier with associated classification model that is used for evaluation of the functional sites.sites
- The location of the functional sites in the sequence.output_sites
- The location of the functional sites when remapped to the forward strand (necessary for output).clas_name
- The name used for output for the functional site.fileName
- The name of the file that will contain the results of the evaluation.increase
- An integer that is used to increase/decrease the position of the functional site relative to the
location of its identifying pattern (e.g. acceptor AG sites: AG is the pattern, but the acceptor splice site is
located after the G).increase_output
- An integer that identifies an additional increase/decrease to the location of the functional site in the
output. This is done in order to accomodate for the difference in counting (informaticians start from 0, biologists from 1),
so the output can be finetuned enough so no post-processing is needed.
public void extractFeaturesFromTrainingData(Classifier classifier) throws java.lang.Exception
classifier
- The classifier to be used.
java.lang.Exception
public java.io.File getFeaturesTrainingData(java.lang.String fileName, java.util.List<java.lang.String> sequences, SecondaryStructureData ssData, Classifier classifier, Classifier.DATA_TYPE data_type)
fileName
- The name of the file that contains the sequencessequences
- A list with sequencesssData
- The secondary structure dataclassifier
- The selected classifierdata_type
- The datatype (positive,negative,unknown).
public java.lang.String[] getConversionStrings(java.lang.String sequence)
sequence
- The original DNA/RNA sequence
public java.util.List<java.util.List<java.lang.Double>> extractFeaturesFromSequence(java.lang.String sequence, java.lang.String secon, java.lang.Double energy, java.lang.String[] conversion_strings, int splicesite) throws FeatureExtractionException
sequence
- The sequence from which the features should be extracted.secon
- The secondary structure of the sequenceenergy
- The free energy that is associated with the secondary structuresplicesite
- The location of the splicesite, relative to which the upstreamm/downstream parameters are located
FeatureExtractionException
- Thrown when the set parameters are in contradiction with the arguments of the methods.public void deleteTrainingFile()
public Classifier chooseClassifier(ClassificationAction.CLASSIFIERS cl)
cl
- Indication of classifier to be used
public boolean mustExtractSecondaryStructures()
public int getMaximumSecondaryUpRange()
public int getMaximumSecondaryDownRange()
public java.lang.String getPattern()
public void setPattern(java.lang.String pattern)
public int getPattern_classification_location()
public void setPattern_classification_location(int pattern_classification_location)
public boolean isPattern_regexp()
public void setPattern_regexp(boolean pattern_regexp)
public int getNegative_training_amount()
public void setNegative_training_amount(int negative_training_amount)
public int getPositive_training_amount()
public void setPositive_training_amount(int positive_training_amount)
public ClassificationAction.STRAND getStrand()
public void setStrand(ClassificationAction.STRAND strand)
public boolean setStrand(java.lang.String s)
public ClassificationAction.FILE_TYPE getTestfiles_filetype()
public void setTestfiles_filetype(ClassificationAction.FILE_TYPE testfiles_filetype)
public boolean setTestfiles_filetype(java.lang.String s)
public ClassificationAction.COMMAND getCommand()
public void setCommand(ClassificationAction.COMMAND command)
public boolean setCommand(java.lang.String s)
public int getCrossvalidation_fold()
public void setCrossvalidation_fold(int crossvalidation_fold)
public int getMaximum_crossvalidation()
public void setMaximum_crossvalidation(int maximum_crossvalidation)
public java.lang.String getModel_file()
public void setModel_file(java.lang.String file)
public java.lang.String getFeatures_output_file()
public void setFeatures_output_file(java.lang.String features_output_file)
public java.lang.String[] getTestfiles()
public void setTestfiles(java.lang.String[] testfiles)
public java.lang.String getNegative_training_file()
public void setNegative_training_file(java.lang.String negative_training_file)
public java.lang.String getOutputdirectory()
public void setOutputdirectory(java.lang.String outputdirectory)
public java.lang.String getPositive_training_file()
public void setPositive_training_file(java.lang.String positive_training_file)
public int getTraining_functional_site_position()
public void setTraining_functional_site_position(int splicesite)
public void setName(java.lang.String name)
public java.lang.String getName()
public void setClassificationFeatures(java.util.List<ClassificationFeature> newList)
public void addClassificationFeature(ClassificationFeature cfi)
public java.util.List<ClassificationFeature> getClassificationFeatures()
public java.lang.String getClassification_name()
public void setClassification_name(java.lang.String classification_name)
public ClassificationAction.CLASSIFIERS getClassifier_type()
public void setClassifier_type(ClassificationAction.CLASSIFIERS classifier_type)
public boolean setClassifier_type(java.lang.String s)
public java.lang.String getNegative_training_file_secstruct()
public void setNegative_training_file_secstruct(java.lang.String negative_training_file_secstruct)
public java.lang.String getPositive_training_file_secstruct()
public void setPositive_training_file_secstruct(java.lang.String positive_training_file_secstruct)
public FeatureSelectionStub getFeatureSelection()
public void setFeatureSelection(FeatureSelectionStub featureSelection)
public java.lang.String[] getClassifier_options()
public void setClassifier_options(java.lang.String[] classifier_options)
public java.lang.String[] getPredictionOutputFiles()
public void setPredictionOutputFiles(java.lang.String[] predictionOutputFiles)
public int getPattern_classification_output_f()
public void setPattern_classification_output_f(int pattern_classification_output_f)
public int getPattern_classification_output_r()
public void setPattern_classification_output_r(int pattern_classification_output_r)
public java.lang.Double getOutput_fixed_alpha()
public void setOutput_fixed_alpha(java.lang.Double output_fixed_alpha)
public java.lang.Double getOutput_fixed_beta()
public void setOutput_fixed_beta(java.lang.Double output_fixed_beta)
public boolean isComplexityCrossvalidation()
public void setComplexityCrossvalidation(boolean complexityCrossvalidation)
public java.io.File getCrossValidationTabFile()
public void setCrossValidationTabFile(java.io.File crossValidationTabFile)
public java.util.List<Conversion> getConversionList()
public void addConversion(Conversion c)
public void setConversionList(java.util.List<Conversion> conversionList)
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |