Class VcfEntry

    • Field Detail

      • EMPTY_STRING_ARRAY

        public static final java.lang.String[] EMPTY_STRING_ARRAY
      • ALLELE_FEQUENCY_COMMON

        public static final double ALLELE_FEQUENCY_COMMON
        See Also:
        Constant Field Values
      • INFO_KEY_PATTERN

        public static final java.util.regex.Pattern INFO_KEY_PATTERN
      • VCF_ALT_NON_REF_gVCF

        public static final java.lang.String VCF_ALT_NON_REF_gVCF
        See Also:
        Constant Field Values
      • VCF_ALT_MISSING_REF

        public static final java.lang.String VCF_ALT_MISSING_REF
        See Also:
        Constant Field Values
      • VCF_ALT_NON_REF_gVCF_ARRAY

        public static final java.lang.String[] VCF_ALT_NON_REF_gVCF_ARRAY
      • VCF_ALT_NON_REF_ARRAY

        public static final java.lang.String[] VCF_ALT_NON_REF_ARRAY
      • VCF_ALT_MISSING_REF_ARRAY

        public static final java.lang.String[] VCF_ALT_MISSING_REF_ARRAY
      • VCF_INFO_PRIVATE

        public static final java.lang.String VCF_INFO_PRIVATE
        See Also:
        Constant Field Values
      • alts

        protected java.lang.String[] alts
      • altStr

        protected java.lang.String altStr
      • chromosomeName

        protected java.lang.String chromosomeName
      • filter

        protected java.lang.String filter
      • format

        protected java.lang.String format
      • formatFields

        protected java.lang.String[] formatFields
      • genotypeFields

        protected java.lang.String[] genotypeFields
      • genotypeFieldsStr

        protected java.lang.String genotypeFieldsStr
      • genotypeScores

        protected byte[] genotypeScores
      • info

        protected java.util.HashMap<java.lang.String,​java.lang.String> info
      • infoStr

        protected java.lang.String infoStr
      • line

        protected java.lang.String line
      • lineNum

        protected int lineNum
      • quality

        protected java.lang.Double quality
      • ref

        protected java.lang.String ref
      • variants

        protected java.util.LinkedList<Variant> variants
      • vcfEffects

        protected java.util.List<VcfEffect> vcfEffects
      • vcfGenotypes

        protected java.util.ArrayList<VcfGenotype> vcfGenotypes
    • Constructor Detail

      • VcfEntry

        public VcfEntry​(VcfFileIterator vcfFileIterator,
                        Marker parent,
                        java.lang.String chromosomeName,
                        int start,
                        java.lang.String id,
                        java.lang.String ref,
                        java.lang.String altsStr,
                        double quality,
                        java.lang.String filterPass,
                        java.lang.String infoStr,
                        java.lang.String format)
      • VcfEntry

        public VcfEntry​(VcfFileIterator vcfFileIterator,
                        java.lang.String line,
                        int lineNum,
                        boolean parseNow)
        Create a line form a file iterator
    • Method Detail

      • isEmpty

        public static boolean isEmpty​(java.lang.String value)
        Does 'value' represent an EMPTY / MISSING value in a VCF field? (or multiple MISSING comma-separated values)
      • isValidInfoKey

        public static boolean isValidInfoKey​(java.lang.String key)
        Make sure the INFO key matches the regular expression (as specified in VCF spec 4.3)
      • isValidInfoValue

        public static boolean isValidInfoValue​(java.lang.String value)
        Check that this value can be added to an INFO field
        Returns:
        true if OK, false if invalid value
      • vcfInfoDecode

        public static java.lang.String vcfInfoDecode​(java.lang.String str)
        Decode INFO value
      • vcfInfoEncode

        public static java.lang.String vcfInfoEncode​(java.lang.String str)
        Encode a string to be used in an 'INFO' field value From the VCF 4.3 specification Characters with special meaning (such as field delimiters ';' in INFO or ':' FORMAT fields) must be represented using the capitalized percent encoding: %3A : (colon) %3B ; (semicolon) %3D = (equal sign) %25 % (percent sign) %2C , (comma) %0D CR %0A LF %09 TAB
      • vcfInfoKeySafe

        public static java.lang.String vcfInfoKeySafe​(java.lang.String str)
        Return a string safe to be used in an 'INFO' field key
      • vcfInfoValueSafe

        public static java.lang.String vcfInfoValueSafe​(java.lang.String str)
        Return a string safe to be used in an 'INFO' field value
      • addFilter

        public void addFilter​(java.lang.String filterStr)
        Add string to FILTER field
      • addFormat

        public void addFormat​(java.lang.String formatName)
        Add a 'FORMAT' field
      • addGenotype

        public void addGenotype​(java.lang.String vcfGenotypeStr)
        Add a genotype as a string
      • addInfo

        public void addInfo​(java.lang.String key,
                            java.lang.String value)
        Add a "key=value" tuple the info field
        Parameters:
        key - : INFO key name
        value - : Can be null if it is a boolean field.
      • calcHetero

        public java.lang.Boolean calcHetero()
        Is this entry heterozygous? Infer Hom/Her if there is only one sample in the file. Ohtherwise the field is null.
      • check

        public java.lang.String check()
        Perform several simple checks and report problems (if any).
      • cloneShallow

        public Cds cloneShallow()
        Description copied from class: Marker
        Perform a shallow clone
        Overrides:
        cloneShallow in class Marker
      • compressGenotypes

        public boolean compressGenotypes()
        Compress genotypes into "HO/HE/NA" INFO fields
      • delFilter

        public boolean delFilter​(java.lang.String filterStr)
        Remove a string from FILTER field
      • getAltIndex

        public int getAltIndex​(java.lang.String alt)
        Get index of matching ALT entry
        Returns:
        -1 if not found
      • getAlts

        public java.lang.String[] getAlts()
      • getAltsStr

        public java.lang.String getAltsStr()
        Create a comma separated ALTS string
      • getChromosomeNameOri

        public java.lang.String getChromosomeNameOri()
        Original chromosome name (as it appeared in the VCF file)
        Overrides:
        getChromosomeNameOri in class Interval
      • getFilter

        public java.lang.String getFilter()
      • getFormat

        public java.lang.String getFormat()
      • getFormatFields

        public java.lang.String[] getFormatFields()
      • getGenotypesScores

        public byte[] getGenotypesScores()
        Return genotypes parsed as an array of codes
      • getInfo

        public java.lang.String getInfo​(java.lang.String key)
        Get info string
      • getInfo

        public java.lang.String getInfo​(java.lang.String key,
                                        java.lang.String allele)
        Get info string for a specific allele
      • getInfo

        public java.lang.String getInfo​(java.lang.String key,
                                        Variant var)
        Get an INFO field matching a variant
      • getInfoFlag

        public boolean getInfoFlag​(java.lang.String key)
        Does the entry exists?
      • getInfoFloat

        public double getInfoFloat​(java.lang.String key)
        Get info field as a 'double' number The norm specifies data type as 'FLOAT', that is why the name of this method might be not intuitive
      • getInfoInt

        public long getInfoInt​(java.lang.String key)
        Get info field as an long number The norm specifies data type as 'INT', that is why the name of this method might be not intuitive
      • getInfoKeys

        public java.util.Set<java.lang.String> getInfoKeys()
        Get all keys available in the info field
      • getInfoStr

        public java.lang.String getInfoStr()
        Get the full (unparsed) INFO field
      • getLine

        public java.lang.String getLine()
        Original VCF line (from file)
      • getLineNum

        public int getLineNum()
      • getNumberOfSamples

        public int getNumberOfSamples()
        number of samples in this VCF file
      • getQuality

        public double getQuality()
      • getRef

        public java.lang.String getRef()
      • getStr

        public java.lang.String getStr()
      • getVcfEffects

        public java.util.List<VcfEffect> getVcfEffects()
      • getVcfEffects

        public java.util.List<VcfEffect> getVcfEffects​(EffFormatVersion formatVersion)
        Parse 'EFF' info field and get a list of effects
      • getVcfGenotype

        public VcfGenotype getVcfGenotype​(int index)
      • getVcfGenotypes

        public java.util.List<VcfGenotype> getVcfGenotypes()
      • getVcfInfo

        public VcfHeaderInfo getVcfInfo​(java.lang.String id)
        Get VcfInfo type for a given ID
      • getVcfInfoNumber

        public VcfInfoType getVcfInfoNumber​(java.lang.String id)
        Get Info number for a given ID
      • hasField

        public boolean hasField​(java.lang.String filedName)
      • hasGenotypes

        public boolean hasGenotypes()
      • hasInfo

        public boolean hasInfo​(java.lang.String infoFieldName)
      • hasQuality

        public boolean hasQuality()
      • isBiAllelic

        public boolean isBiAllelic()
        Is this bi-allelic (based ONLY on the number of ALTs) WARINIG: You should use 'calcHetero()' method for a more precise calculation.
      • isCompressedGenotypes

        public boolean isCompressedGenotypes()
        Do we have compressed genotypes in "HO,HE,NA" INFO fields?
      • isFilterPass

        public boolean isFilterPass()
      • isMultiallelic

        public boolean isMultiallelic()
        Is this multi-allelic (based ONLY on the number of ALTs) WARINIG: You should use 'calcHetero()' method for a more precise calculation.
      • isShowWarningIfParentDoesNotInclude

        protected boolean isShowWarningIfParentDoesNotInclude()
        Description copied from class: Marker
        Show an error if parent does not include child?
        Overrides:
        isShowWarningIfParentDoesNotInclude in class Marker
      • isSingleSnp

        public boolean isSingleSnp()
        Is thins a VCF entry with a single SNP?
      • isSingleton

        public boolean isSingleton()
        Is this variant a singleton (appears only in one genotype)
      • isVariant

        public boolean isVariant()
        Is this a change or are the ALTs actually the same as the reference
      • isVariant

        public boolean isVariant​(java.lang.String alt)
        Is this ALT string a variant?
      • iterator

        public java.util.Iterator<VcfGenotype> iterator()
        Specified by:
        iterator in interface java.lang.Iterable<VcfGenotype>
      • mac

        public int mac()
        Calculate Minor allele count
      • maf

        public double maf()
        Calculate Minor allele frequency
      • parse

        public void parse()
        Parse a 'line' from a 'vcfFileIterator'
      • parseLof

        public java.util.List<VcfLof> parseLof()
        Parse LOF from VcfEntry
      • parseNmd

        public java.util.List<VcfNmd> parseNmd()
        Parse NMD from VcfEntry
      • removeInfo

        public void removeInfo​(java.lang.String key)
        Remove INFO field
      • rmInfo

        public boolean rmInfo​(java.lang.String info)
        Parse INFO fields
      • setFilter

        public void setFilter​(java.lang.String filter)
      • setFormat

        public void setFormat​(java.lang.String format)
      • setGenotypeStr

        public void setGenotypeStr​(java.lang.String genotypeFieldsStr)
      • setLineNum

        public void setLineNum​(int lineNum)
      • toStr

        public java.lang.String toStr()
        To string as a simple "CHR:START_REF/ALTs" format
        Overrides:
        toStr in class Interval
      • toString

        public java.lang.String toString()
        Overrides:
        toString in class Marker
      • toStringNoGt

        public java.lang.String toStringNoGt()
        Show only first eight fields (no genotype entries)
      • uncompressGenotypes

        public VcfEntry uncompressGenotypes()
        Uncompress VCF entry having genotypes in "HO,HE,NA" fields
      • variants

        public java.util.List<Variant> variants()
        Create a list of variants from this VcfEntry