use v6;

use DSL::Shared::Roles::English::PipelineCommand;
use DSL::Shared::Utilities::FuzzyMatching;

# Latent Semantic Analysis (LSA) phrases
role DSL::English::LatentSemanticAnalysisWorkflows::Grammar::LatentSemanticAnalysisPhrases
        does DSL::Shared::Roles::English::PipelineCommand {

    # For some reason using <item-noun> below gives the error: "Too many positionals passed; expected 1 argument but got 2".

    # LSA specific
    token analysis-noun { 'analysis' }
    token component-noun { 'component' }
    token decomposition-noun { 'decomposition' }
    token entries-noun { 'entries' }
    token factorization-noun { 'factorization' }
    token independent-adjective { 'independent' }
    token indexing-noun { 'indexing' }
    token item-noun { 'item' }
    token items-noun { 'items' }
    token latent-adjective { 'latent' }
    token negative-adjective { 'negative' }
    token nonnegative-adjective { 'non-negative' | 'nonnegative' }
    token partition-noun { 'partition' }
    token principal-adjective { 'principal' }
    token query-noun { 'query' }
    token rules-noun { 'rules' }
    token semantic-adjective { 'semantic' }
    token singular-adjective { 'singular' }
    token stemming-noun { 'stemming' }
    token stop-adjective { 'stop' }
    token stop-noun { 'stop' }
    token synonym-noun { 'synonym' }
    token synonyms-noun { 'synonyms' }
    token term-noun { 'term' }
    token terms-noun { 'terms' }
    token thesaurus-noun { 'thesaurus' }
    token topic-noun { 'topic' }
    token topics-noun { 'topics' }

    token document-term-phrase {[ <document-noun> | 'doc' | 'item' ] [ '-' | '-vs-' | \h+ ] [ <term-noun> | <word-noun> ] }
    token term-weight-phrase { <term-noun> \h* '-'? \h* <weight-noun> }
    rule doc-term-mat { [ <document-noun> | <item-noun> ] [ <term-noun> | <word-noun> ] <matrix-noun> }
    rule doc-term-mat-phrase { <document-term-phrase> <matrix-noun> }
    rule ingest-directive { <ingest-verb> | <load-verb> | <use-verb> | <get-verb> }
    rule lsa-object { <lsa-phrase>? <object-noun> }
    rule lsa-phrase { <latent-adjective> <semantic-adjective> <analysis-noun> | 'lsa' | 'LSA' }
    rule lsi-phrase { <latent-adjective> <semantic-adjective> <indexing-noun> | 'lsi' | 'LSI' }
    rule matrix-entries { [ <doc-term-mat-phrase> | <matrix-noun> ]? <entries-noun> }
    rule number-of-terms-phrase { <number-of>? <terms-noun> }
    rule number-of-synonyms-phrase { <number-of>? <synonyms-noun> [ <per-preposition> [ <word-noun> | <term-noun> ] ]? }
    rule the-outliers { <the-determiner> <outliers> }

    # Document term matrix creation related
    rule data-element { 'sentence' | 'paragraph' | 'section' | 'chapter' | 'word' }
    rule data-elements { 'sentences' | 'paragraphs' | 'sections' | 'chapters' | 'words' }

    rule docs { <document-noun> | <documents-noun> | 'docs' | <item-noun> | <items-noun> }
    rule terms-phrase { <word-noun> | <words-noun> | <term-noun> | <terms-noun> }

    rule stemming-rules-phrase { <stemming-noun> [<rules-noun>]? }
    rule stop-words-phrase { <stop-adjective> <words-noun> }

    rule text-corpus-phrase { 'texts' | 'text' [ 'corpus' | 'collection' ]? }

    # Topics and thesaurus
    rule statistical-thesaurus-phrase { <statistical>? <thesaurus-noun> }
    rule topics-table-phrase { <topics-noun> <table-noun> | <table-noun> <of-preposition> <topics-noun>}
    rule per-topic-phrase { [ <per-preposition> | <for-preposition> <a-determiner>? ] <one-pronoun>? <topic-noun> }

    # LSI specific
    token frequency-noun { 'frequency' }
    token global-adjective { 'global' }
    token inverse-adjective { 'inverse' }
    token local-adjective { 'local' }
    token normalization-noun { 'normalization' }
    token normalizer-noun { 'normalizer' }
    token normalizing-noun { 'normalizing' }

    rule global-function-phrase { <global-adjective> <term-noun>? <weight-adjective>? <function-noun> }
    rule join-type-phrase { <join-verb>? <type-noun> }
    rule local-function-phrase { <local-adjective> <term-noun>? <weight-adjective>? <function-noun> }
    rule normalizer-function-phrase { [ <normalizer-noun> | <normalizing-noun> | <normalization-noun> ] <term-noun>? <weight-adjective>? <function-noun>? }

    # Matrix factorization specific
    rule ICA-phrase { <independent-adjective> <component-noun> <analysis-noun> }
    rule NNMF-phrase { [ <non-prefix> <negative-adjective> | <nonnegative-adjective> ] <matrix-noun> <factorization-noun> }
    rule PCA-phrase { <principal-adjective> <component-noun> <analysis-noun> }
    rule SVD-phrase { <singular-adjective> <value-noun> <decomposition-noun> }
}
